From f77c7b96014471f2c39823ed9fcb42967e7d9ede Mon Sep 17 00:00:00 2001 From: Linus Heckemann Date: Sat, 21 Feb 2026 12:48:06 +0100 Subject: [PATCH 0001/1561] selftests/net: add test for IP-in-IPv6 tunneling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 81c734dae203 ("ip6_tunnel: use skb_vlan_inet_prepare() in __ip6_tnl_rcv()") was fine in and of itself, but its backport to 6.12 (and 6.6) broke IPv4-in-IPv6 tunneling, see [1]. This adds a self-test for basic IPv4-in-IPv6 and IPv6-in-IPv6 functionality. [1]: https://lore.kernel.org/all/CAA2RiuSnH_2xc+-W6EnFEG00XjS-dszMq61JEvRjcGS31CBw=g@mail.gmail.com/ Signed-off-by: Linus Heckemann Reviewed-by: Hangbin Liu Reviewed-by: Ricardo B. Marlière Tested-by: Ricardo B. Marlière Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260221114806.1231666-1-git@sphalerite.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/ip6_tunnel.sh | 44 +++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100755 tools/testing/selftests/net/ip6_tunnel.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index afdea6d95bde..e97c90886f34 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -44,6 +44,7 @@ TEST_PROGS := \ io_uring_zerocopy_tx.sh \ ioam6.sh \ ip6_gre_headroom.sh \ + ip6_tunnel.sh \ ip_defrag.sh \ ip_local_port_range.sh \ ipv6_flowlabel.sh \ diff --git a/tools/testing/selftests/net/ip6_tunnel.sh b/tools/testing/selftests/net/ip6_tunnel.sh new file mode 100755 index 000000000000..fe081a521819 --- /dev/null +++ b/tools/testing/selftests/net/ip6_tunnel.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# Test that IPv4-over-IPv6 tunneling works. + +source lib.sh +set -e + +setup_prepare() { + ip link add transport1 type veth peer name transport2 + + setup_ns ns1 + ip link set transport1 netns $ns1 + ip -n $ns1 address add 2001:db8::1/64 dev transport1 nodad + ip -n $ns1 address add 2001:db8::3/64 dev transport1 nodad + ip -n $ns1 link set transport1 up + ip -n $ns1 link add link transport1 name tunnel4 type ip6tnl mode ipip6 local 2001:db8::1 remote 2001:db8::2 + ip -n $ns1 address add 172.0.0.1/32 peer 172.0.0.2/32 dev tunnel4 + ip -n $ns1 link set tunnel4 up + ip -n $ns1 link add link transport1 name tunnel6 type ip6tnl mode ip6ip6 local 2001:db8::3 remote 2001:db8::4 + ip -n $ns1 address add 2001:db8:6::1/64 dev tunnel6 + ip -n $ns1 link set tunnel6 up + + setup_ns ns2 + ip link set transport2 netns $ns2 + ip -n $ns2 address add 2001:db8::2/64 dev transport2 nodad + ip -n $ns2 address add 2001:db8::4/64 dev transport2 nodad + ip -n $ns2 link set transport2 up + ip -n $ns2 link add link transport2 name tunnel4 type ip6tnl mode ipip6 local 2001:db8::2 remote 2001:db8::1 + ip -n $ns2 address add 172.0.0.2/32 peer 172.0.0.1/32 dev tunnel4 + ip -n $ns2 link set tunnel4 up + ip -n $ns2 link add link transport2 name tunnel6 type ip6tnl mode ip6ip6 local 2001:db8::4 remote 2001:db8::3 + ip -n $ns2 address add 2001:db8:6::2/64 dev tunnel6 + ip -n $ns2 link set tunnel6 up +} + +cleanup() { + cleanup_all_ns + # in case the namespaces haven't been set up yet + ip link delete transport1 &>/dev/null || true +} + +trap cleanup EXIT +setup_prepare +ip netns exec $ns1 ping -q -W1 -c1 172.0.0.2 >/dev/null +ip netns exec $ns1 ping -q -W1 -c1 2001:db8:6::2 >/dev/null From 8ac768bb198ccdb1b6dde4bb3616a115642de65c Mon Sep 17 00:00:00 2001 From: Naveen Anandhan Date: Sun, 22 Feb 2026 15:25:36 +0530 Subject: [PATCH 0002/1561] selftests: tc-testing: preserve list order when removing duplicates Using set() removes duplicates but breaks ordering. Test output should be deterministic, so replace with dict.fromkeys(). Signed-off-by: Naveen Anandhan Link: https://patch.msgid.link/20260222095536.17371-1-mr.navi8680@gmail.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/tc-testing/tdc_helper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/tc-testing/tdc_helper.py b/tools/testing/selftests/tc-testing/tdc_helper.py index 0440d252c4c5..e06f03c0fb5d 100644 --- a/tools/testing/selftests/tc-testing/tdc_helper.py +++ b/tools/testing/selftests/tc-testing/tdc_helper.py @@ -16,9 +16,9 @@ def get_categorized_testlist(alltests, ucat): def get_unique_item(lst): - """ For a list, return a list of the unique items in the list. """ + """Return unique items while preserving original order.""" if len(lst) > 1: - return list(set(lst)) + return list(dict.fromkeys(lst)) else: return lst From e5a80f5e67ce95c41615471d89ae7412b298cd85 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 23 Feb 2026 08:41:34 +0100 Subject: [PATCH 0003/1561] net: l2tp_eth: Replace deprecated strcpy with strscpy in l2tp_eth_create strcpy() has been deprecated [1] because it performs no bounds checking on the destination buffer, which can lead to buffer overflows. Replace it with the safer strscpy(). Use the two-argument version of strscpy() to copy 'cfg->ifname'. Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strcpy [1] Signed-off-by: Thorsten Blum Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260223074137.321862-1-thorsten.blum@linux.dev Signed-off-by: Paolo Abeni --- net/l2tp/l2tp_eth.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index cf0b66f4fb29..a4956ef9574c 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -235,10 +236,10 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel, int rc; if (cfg->ifname) { - strscpy(name, cfg->ifname, IFNAMSIZ); + strscpy(name, cfg->ifname); name_assign_type = NET_NAME_USER; } else { - strcpy(name, L2TP_ETH_DEV_NAME); + strscpy(name, L2TP_ETH_DEV_NAME); name_assign_type = NET_NAME_ENUM; } From 8636385b9f0175220318bc23f6926b3fd013b131 Mon Sep 17 00:00:00 2001 From: Nicolai Buchwitz Date: Mon, 23 Feb 2026 09:54:42 +0100 Subject: [PATCH 0004/1561] net: microchip: lan743x: add ethtool nway_reset support Wire phylink_ethtool_nway_reset() as the .nway_reset ethtool operation, allowing userspace to restart PHY autonegotiation via 'ethtool -r'. Signed-off-by: Nicolai Buchwitz Reviewed-by: Russel King (Oracle) Link: https://patch.msgid.link/20260223085442.42852-1-nb@tipi-net.de Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microchip/lan743x_ethtool.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c index 8a3c1ecc7866..9195419ecee0 100644 --- a/drivers/net/ethernet/microchip/lan743x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c @@ -1079,6 +1079,13 @@ static int lan743x_ethtool_set_eee(struct net_device *netdev, return phylink_ethtool_set_eee(adapter->phylink, eee); } +static int lan743x_ethtool_nway_reset(struct net_device *netdev) +{ + struct lan743x_adapter *adapter = netdev_priv(netdev); + + return phylink_ethtool_nway_reset(adapter->phylink); +} + static int lan743x_ethtool_set_link_ksettings(struct net_device *netdev, const struct ethtool_link_ksettings *cmd) @@ -1369,6 +1376,7 @@ const struct ethtool_ops lan743x_ethtool_ops = { .set_rxfh = lan743x_ethtool_set_rxfh, .get_rxfh_fields = lan743x_ethtool_get_rxfh_fields, .get_ts_info = lan743x_ethtool_get_ts_info, + .nway_reset = lan743x_ethtool_nway_reset, .get_eee = lan743x_ethtool_get_eee, .set_eee = lan743x_ethtool_set_eee, .get_link_ksettings = lan743x_ethtool_get_link_ksettings, From 2550def53bbf2323894265e0e64363998bf9e5c3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Feb 2026 09:27:15 +0000 Subject: [PATCH 0005/1561] net: __lock_sock() can be static After commit 6511882cdd82 ("mptcp: allocate fwd memory separately on the rx and tx path") __lock_sock() can be static again. Make sure __lock_sock() is not inlined, so that lock_sock_nested() no longer needs a stack canary. Add a noinline attribute on lock_sock_nested() so that calls to lock_sock() from net/core/sock.c are not inlined, none of them are fast path to deserve that: - sockopt_lock_sock() - sock_set_reuseport() - sock_set_reuseaddr() - sock_set_mark() - sock_set_keepalive() - sock_no_linger() - sock_bindtoindex() - sk_wait_data() - sock_set_rcvbuf() $ scripts/bloat-o-meter -t vmlinux.old vmlinux add/remove: 0/0 grow/shrink: 0/3 up/down: 0/-312 (-312) Function old new delta __lock_sock 192 188 -4 __lock_sock_fast 239 86 -153 lock_sock_nested 227 72 -155 Total: Before=24888707, After=24888395, chg -0.00% Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260223092716.3673939-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 1 - net/core/sock.c | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 66b56288c1d3..55b61e4b0d83 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1709,7 +1709,6 @@ static inline void lock_sock(struct sock *sk) lock_sock_nested(sk, 0); } -void __lock_sock(struct sock *sk); void __release_sock(struct sock *sk); void release_sock(struct sock *sk); diff --git a/net/core/sock.c b/net/core/sock.c index 693e6d80f501..cfb2a6209946 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3175,7 +3175,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag) } EXPORT_SYMBOL(sk_page_frag_refill); -void __lock_sock(struct sock *sk) +static void __lock_sock(struct sock *sk) __releases(&sk->sk_lock.slock) __acquires(&sk->sk_lock.slock) { @@ -3774,14 +3774,14 @@ void sock_init_data(struct socket *sock, struct sock *sk) } EXPORT_SYMBOL(sock_init_data); -void lock_sock_nested(struct sock *sk, int subclass) +void noinline lock_sock_nested(struct sock *sk, int subclass) { /* The sk_lock has mutex_lock() semantics here. */ mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); might_sleep(); spin_lock_bh(&sk->sk_lock.slock); - if (sock_owned_by_user_nocheck(sk)) + if (unlikely(sock_owned_by_user_nocheck(sk))) __lock_sock(sk); sk->sk_lock.owned = 1; spin_unlock_bh(&sk->sk_lock.slock); @@ -3810,7 +3810,7 @@ bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock) might_sleep(); spin_lock_bh(&sk->sk_lock.slock); - if (!sock_owned_by_user_nocheck(sk)) { + if (likely(!sock_owned_by_user_nocheck(sk))) { /* * Fast path return with bottom halves disabled and * sock::sk_lock.slock held. From f033335937d6f72a13bb38d82422eef30da31972 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Feb 2026 09:34:45 +0000 Subject: [PATCH 0006/1561] udp: move udp6_csum_init() back to net/ipv6/udp.c This function has a single caller in net/ipv6/udp.c. Move it there so that the compiler can decide to (auto)inline it if he prefers to. IBT glue is removed anyway. With clang, we can see it was able to inline it and also inlined one other helper at the same time. UDPLITE removal will also help. $ scripts/bloat-o-meter -t vmlinux.old vmlinux.new add/remove: 0/2 grow/shrink: 1/0 up/down: 840/-785 (55) Function old new delta __udp6_lib_rcv 1247 2087 +840 __pfx_udp6_csum_init 16 - -16 udp6_csum_init 769 - -769 Total: Before=25074399, After=25074454, chg +0.00% Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260223093445.3696368-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/ip6_checksum.h | 2 -- net/ipv6/ip6_checksum.c | 47 -------------------------------------- net/ipv6/udp.c | 46 +++++++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 49 deletions(-) diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h index c8a96b888277..6677b3cc3972 100644 --- a/include/net/ip6_checksum.h +++ b/include/net/ip6_checksum.h @@ -82,6 +82,4 @@ static inline __sum16 udp_v6_check(int len, void udp6_set_csum(bool nocheck, struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr, int len); - -int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto); #endif diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index 377717045f8f..8bb68a0cdfd6 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -62,53 +62,6 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, EXPORT_SYMBOL(csum_ipv6_magic); #endif -int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) -{ - int err; - - UDP_SKB_CB(skb)->partial_cov = 0; - UDP_SKB_CB(skb)->cscov = skb->len; - - if (proto == IPPROTO_UDPLITE) { - err = udplite_checksum_init(skb, uh); - if (err) - return err; - - if (UDP_SKB_CB(skb)->partial_cov) { - skb->csum = ip6_compute_pseudo(skb, proto); - return 0; - } - } - - /* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels) - * we accept a checksum of zero here. When we find the socket - * for the UDP packet we'll check if that socket allows zero checksum - * for IPv6 (set by socket option). - * - * Note, we are only interested in != 0 or == 0, thus the - * force to int. - */ - err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, - ip6_compute_pseudo); - if (err) - return err; - - if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) { - /* If SW calculated the value, we know it's bad */ - if (skb->csum_complete_sw) - return 1; - - /* HW says the value is bad. Let's validate that. - * skb->csum is no longer the full packet checksum, - * so don't treat is as such. - */ - skb_checksum_complete_unset(skb); - } - - return 0; -} -EXPORT_SYMBOL(udp6_csum_init); - /* Function to set UDP checksum for an IPv6 UDP packet. This is intended * for the simple case like when setting the checksum for a UDP tunnel. */ diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 010b909275dd..48f73401adf4 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1069,6 +1069,52 @@ static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, return 0; } +static int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) +{ + int err; + + UDP_SKB_CB(skb)->partial_cov = 0; + UDP_SKB_CB(skb)->cscov = skb->len; + + if (proto == IPPROTO_UDPLITE) { + err = udplite_checksum_init(skb, uh); + if (err) + return err; + + if (UDP_SKB_CB(skb)->partial_cov) { + skb->csum = ip6_compute_pseudo(skb, proto); + return 0; + } + } + + /* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels) + * we accept a checksum of zero here. When we find the socket + * for the UDP packet we'll check if that socket allows zero checksum + * for IPv6 (set by socket option). + * + * Note, we are only interested in != 0 or == 0, thus the + * force to int. + */ + err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + ip6_compute_pseudo); + if (err) + return err; + + if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) { + /* If SW calculated the value, we know it's bad */ + if (skb->csum_complete_sw) + return 1; + + /* HW says the value is bad. Let's validate that. + * skb->csum is no longer the full packet checksum, + * so don't treat is as such. + */ + skb_checksum_complete_unset(skb); + } + + return 0; +} + int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { From bd5e5e1d41d316c47dd9001104b62033992daf6e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Feb 2026 10:07:27 +0000 Subject: [PATCH 0007/1561] tcp: inline __tcp_v4_send_check() Inline __tcp_v4_send_check(), like __tcp_v6_send_check(). Move tcp_v4_send_check() to tcp_output.c close to its fast path caller (__tcp_transmit_skb()). Note __tcp_v4_send_check() is still out-of-line for tcp4_gso_segment() because it is called in an unlikely() section. $ scripts/bloat-o-meter -t vmlinux.0 vmlinux.1 add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-9 (-9) Function old new delta __tcp_v4_send_check 130 121 -9 Total: Before=25143100, After=25143091, chg -0.00% Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260223100729.3761597-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 10 +++++++++- net/ipv4/tcp_ipv4.c | 18 ------------------ net/ipv4/tcp_output.c | 10 +++++++++- 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 40e72b9cb85f..3c84433f3d57 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2382,7 +2382,15 @@ void tcp_gro_complete(struct sk_buff *skb); static inline void tcp_gro_complete(struct sk_buff *skb) { } #endif -void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); +static inline void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, + __be32 daddr) +{ + struct tcphdr *th = tcp_hdr(skb); + + th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); +} static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6264fc0b2be5..da708aff0623 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -661,24 +661,6 @@ out: return 0; } -void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr) -{ - struct tcphdr *th = tcp_hdr(skb); - - th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct tcphdr, check); -} - -/* This routine computes an IPv4 TCP checksum. */ -void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) -{ - const struct inet_sock *inet = inet_sk(sk); - - __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); -} -EXPORT_IPV6_MOD(tcp_v4_send_check); - #define REPLY_OPTIONS_LEN (MAX_TCP_OPTION_SPACE / sizeof(__be32)) static bool tcp_v4_ao_sign_reset(const struct sock *sk, struct sk_buff *skb, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 326b58ff1118..29056d6fc787 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1496,7 +1496,15 @@ static void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) INDIRECT_CALLABLE_DECLARE(int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)); INDIRECT_CALLABLE_DECLARE(int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)); -INDIRECT_CALLABLE_DECLARE(void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)); + +/* This routine computes an IPv4 TCP checksum. */ +void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) +{ + const struct inet_sock *inet = inet_sk(sk); + + __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); +} +EXPORT_IPV6_MOD(tcp_v4_send_check); /* This routine actually transmits TCP packets queued in by * tcp_do_sendmsg(). This is used by both the initial From 255688652b8c439eb35c68ec5cdac4aa63737d35 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Feb 2026 10:07:28 +0000 Subject: [PATCH 0008/1561] tcp: move tcp_v6_send_check() to tcp_output.c Move tcp_v6_send_check() so that __tcp_transmit_skb() can inline it. $ scripts/bloat-o-meter -t vmlinux.1 vmlinux.2 add/remove: 0/0 grow/shrink: 1/0 up/down: 105/0 (105) Function old new delta __tcp_transmit_skb 3321 3426 +105 Total: Before=25143091, After=25143196, chg +0.00% Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260223100729.3761597-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 3 ++- net/ipv4/tcp_output.c | 19 ++++++++++++++++--- net/ipv6/tcp_ipv6.c | 5 ----- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 3c84433f3d57..feaddce9d805 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1132,7 +1132,8 @@ static inline int tcp_v6_sdif(const struct sk_buff *skb) extern const struct inet_connection_sock_af_ops ipv6_specific; -INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)); +void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb); + INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb)); void tcp_v6_early_demux(struct sk_buff *skb); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 29056d6fc787..fdddb16630a5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1506,6 +1506,16 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) } EXPORT_IPV6_MOD(tcp_v4_send_check); +#if IS_ENABLED(CONFIG_IPV6) +#include + +void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) +{ + __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); +} +EXPORT_IPV6_MOD(tcp_v6_send_check); +#endif + /* This routine actually transmits TCP packets queued in by * tcp_do_sendmsg(). This is used by both the initial * transmission and possible later retransmissions. @@ -1667,9 +1677,12 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, /* BPF prog is the last one writing header option */ bpf_skops_write_hdr_opt(sk, skb, NULL, NULL, 0, &opts); - INDIRECT_CALL_INET(icsk->icsk_af_ops->send_check, - tcp_v6_send_check, tcp_v4_send_check, - sk, skb); +#if IS_ENABLED(CONFIG_IPV6) + if (likely(icsk->icsk_af_ops->send_check == tcp_v6_send_check)) + tcp_v6_send_check(sk, skb); + else +#endif + tcp_v4_send_check(sk, skb); if (likely(tcb->tcp_flags & TCPHDR_ACK)) tcp_event_ack_sent(sk, rcv_nxt); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d10487b4e5bf..306ca0585b4a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2015,11 +2015,6 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), }; -INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) -{ - __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); -} - const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, From fcd3d039fab693df3d41ac9bcb12fb4e8ddd69fe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Feb 2026 10:07:29 +0000 Subject: [PATCH 0009/1561] tcp: make tcp_v{4,6}_send_check() static tcp_v{4,6}_send_check() are only called from tcp_output.c and should be made static so that the compiler does not need to put an out of line copy of them. Remove (struct inet_connection_sock_af_ops) send_check field and use instead @net_header_len. Move @net_header_len close to @queue_xmit for data locality as both are used in TCP tx fast path. $ scripts/bloat-o-meter -t vmlinux.2 vmlinux.3 add/remove: 0/2 grow/shrink: 0/3 up/down: 0/-172 (-172) Function old new delta __tcp_transmit_skb 3426 3423 -3 tcp_v4_send_check 136 132 -4 mptcp_subflow_init 777 763 -14 __pfx_tcp_v6_send_check 16 - -16 tcp_v6_send_check 135 - -135 Total: Before=25143196, After=25143024, chg -0.00% Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260223100729.3761597-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/inet_connection_sock.h | 3 +-- include/net/tcp.h | 3 --- net/ipv4/tcp_ipv4.c | 1 - net/ipv4/tcp_output.c | 8 +++----- net/ipv6/tcp_ipv6.c | 2 -- net/mptcp/subflow.c | 1 - net/tls/tls_device_fallback.c | 3 --- 7 files changed, 4 insertions(+), 17 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index ecb362025c4e..bbc9355871c7 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -34,7 +34,7 @@ struct tcp_congestion_ops; */ struct inet_connection_sock_af_ops { int (*queue_xmit)(struct sock *sk, struct sk_buff *skb, struct flowi *fl); - void (*send_check)(struct sock *sk, struct sk_buff *skb); + u16 net_header_len; int (*rebuild_header)(struct sock *sk); void (*sk_rx_dst_set)(struct sock *sk, const struct sk_buff *skb); int (*conn_request)(struct sock *sk, struct sk_buff *skb); @@ -43,7 +43,6 @@ struct inet_connection_sock_af_ops { struct dst_entry *dst, struct request_sock *req_unhash, bool *own_req); - u16 net_header_len; int (*setsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int (*getsockopt)(struct sock *sk, int level, int optname, diff --git a/include/net/tcp.h b/include/net/tcp.h index feaddce9d805..dfcd38089f11 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -531,7 +531,6 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, * TCP v4 functions exported for the inet6 API */ -void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb); void tcp_v4_mtu_reduced(struct sock *sk); void tcp_req_err(struct sock *sk, u32 seq, bool abort); void tcp_ld_RTO_revert(struct sock *sk, u32 seq); @@ -1132,8 +1131,6 @@ static inline int tcp_v6_sdif(const struct sk_buff *skb) extern const struct inet_connection_sock_af_ops ipv6_specific; -void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb); - INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb)); void tcp_v6_early_demux(struct sk_buff *skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index da708aff0623..bd613e401d48 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2405,7 +2405,6 @@ EXPORT_IPV6_MOD(inet_sk_rx_dst_set); const struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, - .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, .sk_rx_dst_set = inet_sk_rx_dst_set, .conn_request = tcp_v4_conn_request, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index fdddb16630a5..1ef419c66a0e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1498,22 +1498,20 @@ INDIRECT_CALLABLE_DECLARE(int ip_queue_xmit(struct sock *sk, struct sk_buff *skb INDIRECT_CALLABLE_DECLARE(int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)); /* This routine computes an IPv4 TCP checksum. */ -void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) +static void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) { const struct inet_sock *inet = inet_sk(sk); __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); } -EXPORT_IPV6_MOD(tcp_v4_send_check); #if IS_ENABLED(CONFIG_IPV6) #include -void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) +static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) { __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); } -EXPORT_IPV6_MOD(tcp_v6_send_check); #endif /* This routine actually transmits TCP packets queued in by @@ -1678,7 +1676,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, bpf_skops_write_hdr_opt(sk, skb, NULL, NULL, 0, &opts); #if IS_ENABLED(CONFIG_IPV6) - if (likely(icsk->icsk_af_ops->send_check == tcp_v6_send_check)) + if (likely(icsk->icsk_af_ops->net_header_len == sizeof(struct ipv6hdr))) tcp_v6_send_check(sk, skb); else #endif diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 306ca0585b4a..f17da56b449e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2017,7 +2017,6 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = { const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, - .send_check = tcp_v6_send_check, .rebuild_header = inet6_sk_rebuild_header, .sk_rx_dst_set = inet6_sk_rx_dst_set, .conn_request = tcp_v6_conn_request, @@ -2049,7 +2048,6 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { */ static const struct inet_connection_sock_af_ops ipv6_mapped = { .queue_xmit = ip_queue_xmit, - .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, .sk_rx_dst_set = inet_sk_rx_dst_set, .conn_request = tcp_v6_conn_request, diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index f66129f1e649..dd79c5b37a6b 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -2190,7 +2190,6 @@ void __init mptcp_subflow_init(void) subflow_v6m_specific = subflow_v6_specific; subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit; - subflow_v6m_specific.send_check = ipv4_specific.send_check; subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len; subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced; subflow_v6m_specific.rebuild_header = subflow_rebuild_header; diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 03d508a45aae..de7d86bdd7ec 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -149,9 +149,6 @@ static int tls_enc_records(struct aead_request *aead_req, return rc; } -/* Can't use icsk->icsk_af_ops->send_check here because the ip addresses - * might have been changed by NAT. - */ static void update_chksum(struct sk_buff *skb, int headln) { struct tcphdr *th = tcp_hdr(skb); From 3197cce4d48c1e5f159f8f24a4e3b9bd83385c14 Mon Sep 17 00:00:00 2001 From: Gabriel Goller Date: Mon, 23 Feb 2026 11:12:54 +0100 Subject: [PATCH 0010/1561] docs: net: document neigh gc_stale_time sysctl Add missing documentation for a neighbor table garbage collector sysctl parameter in ip-sysctl.rst: neigh/default/gc_stale_time: controls how long an unused neighbor entry is kept before becoming eligible for garbage collection (default: 60 seconds) Signed-off-by: Gabriel Goller Link: https://patch.msgid.link/20260223101257.47563-1-g.goller@proxmox.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 6921d8594b84..9c90333530fa 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -202,6 +202,17 @@ neigh/default/gc_thresh3 - INTEGER Default: 1024 +neigh/default/gc_stale_time - INTEGER + Determines how long a neighbor entry can remain unused before it is + considered stale and eligible for garbage collection. Entries that have + not been used for longer than this time will be removed by the garbage + collector, unless they have active references, are marked as PERMANENT, + or carry the NTF_EXT_LEARNED or NTF_EXT_VALIDATED flag. Stale entries + are only removed by the periodic GC when there are at least gc_thresh1 + neighbors in the table. + + Default: 60 seconds + neigh/default/unres_qlen_bytes - INTEGER The maximum number of bytes which may be used by packets queued for each unresolved address by other network layers. From 472e079f8c9013057fe57e7ec1eb16fe695fc2d3 Mon Sep 17 00:00:00 2001 From: Satish Kharat Date: Mon, 23 Feb 2026 02:27:20 -0800 Subject: [PATCH 0011/1561] net:ethernet:enic: add VIC subsystem ids Add VIC subsystem id for 12xx, 13xx, 14xx and 15xxx series Signed-off-by: Satish Kharat Link: https://patch.msgid.link/20260223-enic-cscwi36355-v2-1-63488194a974@cisco.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cisco/enic/enic.h | 39 ++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index 301b3f3114af..6959e85ab516 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -22,6 +22,45 @@ #define DRV_NAME "enic" #define DRV_DESCRIPTION "Cisco VIC Ethernet NIC Driver" +#define PCI_SUBDEV_ID_CISCO_VIC_1225 0x085 +#define PCI_SUBDEV_ID_CISCO_VIC_1225T 0x0CE +#define PCI_SUBDEV_ID_CISCO_VIC_1227 0x12E +#define PCI_SUBDEV_ID_CISCO_VIC_1227T 0x139 +#define PCI_SUBDEV_ID_CISCO_VIC_1240 0x084 +#define PCI_SUBDEV_ID_CISCO_VIC_1280 0x04F +#define PCI_SUBDEV_ID_CISCO_VIC_1285 0x0CD + +#define PCI_SUBDEV_ID_CISCO_VIC_1340 0x12C +#define PCI_SUBDEV_ID_CISCO_VIC_1380 0x137 +#define PCI_SUBDEV_ID_CISCO_VIC_1385 0x14D +#define PCI_SUBDEV_ID_CISCO_VIC_1387 0x15D + +#define PCI_SUBDEV_ID_CISCO_VIC_1440 0x0215 +#define PCI_SUBDEV_ID_CISCO_VIC_1455 0x0217 +#define PCI_SUBDEV_ID_CISCO_VIC_1457 0x0218 +#define PCI_SUBDEV_ID_CISCO_VIC_1467 0x02AF +#define PCI_SUBDEV_ID_CISCO_VIC_1477 0x2B0 +#define PCI_SUBDEV_ID_CISCO_VIC_1480 0x0216 +#define PCI_SUBDEV_ID_CISCO_VIC_1485 0x0219 +#define PCI_SUBDEV_ID_CISCO_VIC_1487 0x021A +#define PCI_SUBDEV_ID_CISCO_VIC_1495 0x024A +#define PCI_SUBDEV_ID_CISCO_VIC_1497 0x024B +#define PCI_SUBDEV_ID_CISCO_VIC_14425 0x02CF +#define PCI_SUBDEV_ID_CISCO_VIC_14825 0x02D0 + +#define PCI_SUBDEV_ID_CISCO_VIC_15230 0x02DF +#define PCI_SUBDEV_ID_CISCO_VIC_15231 0x02DB +#define PCI_SUBDEV_ID_CISCO_VIC_15235 0x02E4 +#define PCI_SUBDEV_ID_CISCO_VIC_15237 0x02F3 +#define PCI_SUBDEV_ID_CISCO_VIC_15238 0x02E8 +#define PCI_SUBDEV_ID_CISCO_VIC_15411 0x02DC +#define PCI_SUBDEV_ID_CISCO_VIC_15412 0x02E2 +#define PCI_SUBDEV_ID_CISCO_VIC_15420 0x02DE +#define PCI_SUBDEV_ID_CISCO_VIC_15422 0x02E1 +#define PCI_SUBDEV_ID_CISCO_VIC_15425 0x02F2 +#define PCI_SUBDEV_ID_CISCO_VIC_15427 0x02E0 +#define PCI_SUBDEV_ID_CISCO_VIC_15428 0x02DD + #define ENIC_BARS_MAX 6 #define ENIC_WQ_MAX 256 From 426f1f5b8752d68e729fb65a00795a0f919b47d1 Mon Sep 17 00:00:00 2001 From: Satish Kharat Date: Mon, 23 Feb 2026 02:27:21 -0800 Subject: [PATCH 0012/1561] net:ethernet:enic: map ethtool link modes by VIC type Report supported media types based on the VIC subsystem ID so ethtool reflects the hardware capabilities. Signed-off-by: Satish Kharat Link: https://patch.msgid.link/20260223-enic-cscwi36355-v2-2-63488194a974@cisco.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/cisco/enic/enic_ethtool.c | 155 +++++++++++++++++- 1 file changed, 148 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index 471613899ec0..61541fb8db7b 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -126,19 +126,160 @@ static void enic_intr_coal_set_rx(struct enic *enic, u32 timer) } } +static void enic_get_supp_adv_media_type(struct net_device *netdev, + struct ethtool_link_ksettings *ecmd) +{ + struct enic *enic = netdev_priv(netdev); + struct ethtool_link_settings *base = &ecmd->base; + u16 sub_dev_id = 0; + + base->port = PORT_OTHER; + + if (enic->pdev) + sub_dev_id = enic->pdev->subsystem_device; + + switch (sub_dev_id) { + case PCI_SUBDEV_ID_CISCO_VIC_1225: + case PCI_SUBDEV_ID_CISCO_VIC_1227: + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 10000baseSR_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, FIBRE); + base->port = PORT_FIBRE; + break; + case PCI_SUBDEV_ID_CISCO_VIC_1285: + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 40000baseSR4_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, FIBRE); + base->port = PORT_FIBRE; + break; + case PCI_SUBDEV_ID_CISCO_VIC_1225T: + case PCI_SUBDEV_ID_CISCO_VIC_1227T: + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 10000baseT_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, FIBRE); + base->port = PORT_FIBRE; + break; + case PCI_SUBDEV_ID_CISCO_VIC_1385: + case PCI_SUBDEV_ID_CISCO_VIC_1387: + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 10000baseSR_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 10000baseLR_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 40000baseSR4_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 40000baseLR4_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, FIBRE); + base->port = PORT_FIBRE; + break; + case PCI_SUBDEV_ID_CISCO_VIC_1477: + case PCI_SUBDEV_ID_CISCO_VIC_1485: + case PCI_SUBDEV_ID_CISCO_VIC_1487: + case PCI_SUBDEV_ID_CISCO_VIC_1495: + case PCI_SUBDEV_ID_CISCO_VIC_1497: + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 40000baseCR4_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 40000baseSR4_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 40000baseLR4_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 100000baseSR4_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 100000baseCR4_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, FIBRE); + base->port = PORT_FIBRE; + break; + case PCI_SUBDEV_ID_CISCO_VIC_15235: + case PCI_SUBDEV_ID_CISCO_VIC_15237: + case PCI_SUBDEV_ID_CISCO_VIC_15238: + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 40000baseCR4_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 40000baseSR4_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 40000baseLR4_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 100000baseSR4_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 100000baseCR4_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 200000baseSR4_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 200000baseDR4_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 200000baseLR4_ER4_FR4_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, FIBRE); + base->port = PORT_FIBRE; + break; + case PCI_SUBDEV_ID_CISCO_VIC_1455: + case PCI_SUBDEV_ID_CISCO_VIC_1457: + case PCI_SUBDEV_ID_CISCO_VIC_1467: + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 10000baseT_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 25000baseSR_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, FIBRE); + base->port = PORT_FIBRE; + break; + case PCI_SUBDEV_ID_CISCO_VIC_15428: + case PCI_SUBDEV_ID_CISCO_VIC_15427: + case PCI_SUBDEV_ID_CISCO_VIC_15425: + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 10000baseT_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 25000baseSR_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 50000baseSR_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, FIBRE); + base->port = PORT_FIBRE; + break; + /* Do not mention port type as FIBRE for blade VICs */ + case PCI_SUBDEV_ID_CISCO_VIC_1240: + case PCI_SUBDEV_ID_CISCO_VIC_1280: + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 10000baseKR_Full); + break; + case PCI_SUBDEV_ID_CISCO_VIC_1340: + case PCI_SUBDEV_ID_CISCO_VIC_1380: + case PCI_SUBDEV_ID_CISCO_VIC_1440: /* 10G/40G KR */ + case PCI_SUBDEV_ID_CISCO_VIC_1480: /* 10G/40G KR */ + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 10000baseKR_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 40000baseKR4_Full); + break; + case PCI_SUBDEV_ID_CISCO_VIC_14425: /* 25G KR */ + case PCI_SUBDEV_ID_CISCO_VIC_14825: /* 25G KR */ + case PCI_SUBDEV_ID_CISCO_VIC_15420: /* 25G KR */ + case PCI_SUBDEV_ID_CISCO_VIC_15422: /* 25G KR */ + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 25000baseKR_Full); + break; + case PCI_SUBDEV_ID_CISCO_VIC_15411: /* 10G KR */ + case PCI_SUBDEV_ID_CISCO_VIC_15412: /* 10G KR */ + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 10000baseKR_Full); + break; + case PCI_SUBDEV_ID_CISCO_VIC_15231: /* 25G/100G/200G KR */ + case PCI_SUBDEV_ID_CISCO_VIC_15230: /* 25G/100G/200G KR */ + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 25000baseKR_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 100000baseKR_Full); + ethtool_link_ksettings_add_link_mode(ecmd, supported, + 200000baseKR4_Full); + break; + } +} + static int enic_get_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *ecmd) { struct enic *enic = netdev_priv(netdev); struct ethtool_link_settings *base = &ecmd->base; - ethtool_link_ksettings_add_link_mode(ecmd, supported, - 10000baseT_Full); - ethtool_link_ksettings_add_link_mode(ecmd, supported, FIBRE); - ethtool_link_ksettings_add_link_mode(ecmd, advertising, - 10000baseT_Full); - ethtool_link_ksettings_add_link_mode(ecmd, advertising, FIBRE); - base->port = PORT_FIBRE; + enic_get_supp_adv_media_type(netdev, ecmd); if (netif_carrier_ok(netdev)) { base->speed = vnic_dev_port_speed(enic->vdev); From ebfc2be12ec599e3504ad2c1eaa9fc5892c94c9d Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 23 Feb 2026 09:34:15 +0000 Subject: [PATCH 0013/1561] net: stmmac: qcom-ethqos: rename "por" members to "rgmii_por" Rename the "por" and "num_por" members to indicate that they are for RGMII mode only as ethqos_configure_rgmii() is the only place that the values are programmed into the registers. Reviewed-by: Mohd Ayaan Anwar Tested-by: Mohd Ayaan Anwar Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vuSKV-0000000ASbg-28JK@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../stmicro/stmmac/dwmac-qcom-ethqos.c | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index af8204c0e188..690bd5c7e1a6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -87,8 +87,8 @@ struct ethqos_emac_por { }; struct ethqos_emac_driver_data { - const struct ethqos_emac_por *por; - unsigned int num_por; + const struct ethqos_emac_por *rgmii_por; + unsigned int num_rgmii_por; bool rgmii_config_loopback_en; bool has_emac_ge_3; const char *link_clk_name; @@ -108,8 +108,8 @@ struct qcom_ethqos { int serdes_speed; phy_interface_t phy_mode; - const struct ethqos_emac_por *por; - unsigned int num_por; + const struct ethqos_emac_por *rgmii_por; + unsigned int num_rgmii_por; bool rgmii_config_loopback_en; bool has_emac_ge_3; bool needs_sgmii_loopback; @@ -218,8 +218,8 @@ static const struct ethqos_emac_por emac_v2_3_0_por[] = { }; static const struct ethqos_emac_driver_data emac_v2_3_0_data = { - .por = emac_v2_3_0_por, - .num_por = ARRAY_SIZE(emac_v2_3_0_por), + .rgmii_por = emac_v2_3_0_por, + .num_rgmii_por = ARRAY_SIZE(emac_v2_3_0_por), .rgmii_config_loopback_en = true, .has_emac_ge_3 = false, }; @@ -234,8 +234,8 @@ static const struct ethqos_emac_por emac_v2_1_0_por[] = { }; static const struct ethqos_emac_driver_data emac_v2_1_0_data = { - .por = emac_v2_1_0_por, - .num_por = ARRAY_SIZE(emac_v2_1_0_por), + .rgmii_por = emac_v2_1_0_por, + .num_rgmii_por = ARRAY_SIZE(emac_v2_1_0_por), .rgmii_config_loopback_en = false, .has_emac_ge_3 = false, }; @@ -250,8 +250,8 @@ static const struct ethqos_emac_por emac_v3_0_0_por[] = { }; static const struct ethqos_emac_driver_data emac_v3_0_0_data = { - .por = emac_v3_0_0_por, - .num_por = ARRAY_SIZE(emac_v3_0_0_por), + .rgmii_por = emac_v3_0_0_por, + .num_rgmii_por = ARRAY_SIZE(emac_v3_0_0_por), .rgmii_config_loopback_en = false, .has_emac_ge_3 = true, .dwmac4_addrs = { @@ -282,8 +282,8 @@ static const struct ethqos_emac_por emac_v4_0_0_por[] = { }; static const struct ethqos_emac_driver_data emac_v4_0_0_data = { - .por = emac_v4_0_0_por, - .num_por = ARRAY_SIZE(emac_v4_0_0_por), + .rgmii_por = emac_v4_0_0_por, + .num_rgmii_por = ARRAY_SIZE(emac_v4_0_0_por), .rgmii_config_loopback_en = false, .has_emac_ge_3 = true, .link_clk_name = "phyaux", @@ -522,9 +522,9 @@ static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos, int speed) u32 val; /* Reset to POR values and enable clk */ - for (i = 0; i < ethqos->num_por; i++) - rgmii_writel(ethqos, ethqos->por[i].value, - ethqos->por[i].offset); + for (i = 0; i < ethqos->num_rgmii_por; i++) + rgmii_writel(ethqos, ethqos->rgmii_por[i].value, + ethqos->rgmii_por[i].offset); ethqos_set_func_clk_en(ethqos); /* Initialize the DLL first */ @@ -780,8 +780,8 @@ static int qcom_ethqos_probe(struct platform_device *pdev) "Failed to map rgmii resource\n"); data = of_device_get_match_data(dev); - ethqos->por = data->por; - ethqos->num_por = data->num_por; + ethqos->rgmii_por = data->rgmii_por; + ethqos->num_rgmii_por = data->num_rgmii_por; ethqos->rgmii_config_loopback_en = data->rgmii_config_loopback_en; ethqos->has_emac_ge_3 = data->has_emac_ge_3; ethqos->needs_sgmii_loopback = data->needs_sgmii_loopback; From e6f43a41ba6206b691c8e9a301f0b4528b8f2278 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 23 Feb 2026 09:34:20 +0000 Subject: [PATCH 0014/1561] net: stmmac: qcom-ethqos: remove register field value obfuscations Convert the register field values to something more human readable. For example, using (BIT(29) | BIT(27)) to update a register field that consists of bits 29:27 is an obfuscated way of writing decimal 5 for this field. The comment above needs to explain that this value is 5. Worse still is BIT(12) | GENMASK(9, 8), which is used to hide the decimal value 19 for the bitfield 16:8. Fix these, and a few others by using FIELD_PREP(). While it means we have bare numeric constants, this is more preferable than having the obfuscation. Reviewed-by: Mohd Ayaan Anwar Tested-by: Mohd Ayaan Anwar Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vuSKa-0000000ASbo-2zQg@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../stmicro/stmmac/dwmac-qcom-ethqos.c | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 690bd5c7e1a6..50b95fd19f9d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -361,10 +361,12 @@ static int ethqos_dll_configure(struct qcom_ethqos *ethqos) SDCC_HC_REG_DLL_CONFIG2); rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_MCLK_FREQ_CALC, - 0x1A << 10, SDCC_HC_REG_DLL_CONFIG2); + FIELD_PREP(SDCC_DLL_CONFIG2_MCLK_FREQ_CALC, 26), + SDCC_HC_REG_DLL_CONFIG2); rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_DDR_TRAFFIC_INIT_SEL, - BIT(2), SDCC_HC_REG_DLL_CONFIG2); + FIELD_PREP(SDCC_DLL_CONFIG2_DDR_TRAFFIC_INIT_SEL, + 1), SDCC_HC_REG_DLL_CONFIG2); rgmii_setmask(ethqos, SDCC_DLL_CONFIG2_DDR_TRAFFIC_INIT_SW, SDCC_HC_REG_DLL_CONFIG2); @@ -425,11 +427,13 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) if (ethqos->has_emac_ge_3) { /* 0.9 ns */ rgmii_updatel(ethqos, SDCC_DDR_CONFIG_PRG_RCLK_DLY, - 115, SDCC_HC_REG_DDR_CONFIG); + FIELD_PREP(SDCC_DDR_CONFIG_PRG_RCLK_DLY, + 115), SDCC_HC_REG_DDR_CONFIG); } else { /* 1.8 ns */ rgmii_updatel(ethqos, SDCC_DDR_CONFIG_PRG_RCLK_DLY, - 57, SDCC_HC_REG_DDR_CONFIG); + FIELD_PREP(SDCC_DDR_CONFIG_PRG_RCLK_DLY, + 57), SDCC_HC_REG_DDR_CONFIG); } rgmii_setmask(ethqos, SDCC_DDR_CONFIG_PRG_DLY_EN, SDCC_HC_REG_DDR_CONFIG); @@ -451,7 +455,8 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) rgmii_updatel(ethqos, RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN, phase_shift, RGMII_IO_MACRO_CONFIG2); rgmii_updatel(ethqos, RGMII_CONFIG_MAX_SPD_PRG_2, - BIT(6), RGMII_IO_MACRO_CONFIG); + FIELD_PREP(RGMII_CONFIG_MAX_SPD_PRG_2, 1), + RGMII_IO_MACRO_CONFIG); rgmii_clrmask(ethqos, RGMII_CONFIG2_RSVD_CONFIG15, RGMII_IO_MACRO_CONFIG2); @@ -464,7 +469,8 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) /* Write 0x5 to PRG_RCLK_DLY_CODE */ rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE, - (BIT(29) | BIT(27)), SDCC_HC_REG_DDR_CONFIG); + FIELD_PREP(SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE, + 5), SDCC_HC_REG_DDR_CONFIG); rgmii_setmask(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY, SDCC_HC_REG_DDR_CONFIG); rgmii_setmask(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN, @@ -487,7 +493,7 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) rgmii_updatel(ethqos, RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN, phase_shift, RGMII_IO_MACRO_CONFIG2); rgmii_updatel(ethqos, RGMII_CONFIG_MAX_SPD_PRG_9, - BIT(12) | GENMASK(9, 8), + FIELD_PREP(RGMII_CONFIG_MAX_SPD_PRG_9, 19), RGMII_IO_MACRO_CONFIG); rgmii_clrmask(ethqos, RGMII_CONFIG2_RSVD_CONFIG15, RGMII_IO_MACRO_CONFIG2); @@ -499,7 +505,8 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) RGMII_IO_MACRO_CONFIG2); /* Write 0x5 to PRG_RCLK_DLY_CODE */ rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE, - (BIT(29) | BIT(27)), SDCC_HC_REG_DDR_CONFIG); + FIELD_PREP(SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE, + 5), SDCC_HC_REG_DDR_CONFIG); rgmii_setmask(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY, SDCC_HC_REG_DDR_CONFIG); rgmii_setmask(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN, From 649a00c3926fd3a52fc59bc52769ddb94a746bf8 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 23 Feb 2026 09:34:26 +0000 Subject: [PATCH 0015/1561] net: stmmac: qcom-ethqos: change ethqos_configure*() to return void The ethqos_configure*() family of functions always return zero, and the return value is never checked. Change the int return type to void. Reviewed-by: Mohd Ayaan Anwar Tested-by: Mohd Ayaan Anwar Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vuSKg-0000000ASbv-0iWL@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 50b95fd19f9d..168f0fed68c0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -100,7 +100,7 @@ struct ethqos_emac_driver_data { struct qcom_ethqos { struct platform_device *pdev; void __iomem *rgmii_base; - int (*configure_func)(struct qcom_ethqos *ethqos, int speed); + void (*configure_func)(struct qcom_ethqos *ethqos, int speed); unsigned int link_clk_rate; struct clk *link_clk; @@ -522,7 +522,7 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) return 0; } -static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos, int speed) +static void ethqos_configure_rgmii(struct qcom_ethqos *ethqos, int speed) { struct device *dev = ðqos->pdev->dev; unsigned int i; @@ -587,8 +587,6 @@ static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos, int speed) ethqos_dll_configure(ethqos); ethqos_rgmii_macro_init(ethqos, speed); - - return 0; } static void ethqos_set_serdes_speed(struct qcom_ethqos *ethqos, int speed) @@ -607,7 +605,7 @@ static void ethqos_pcs_set_inband(struct stmmac_priv *priv, bool enable) /* On interface toggle MAC registers gets reset. * Configure MAC block for SGMII on ethernet phy link up */ -static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos, int speed) +static void ethqos_configure_sgmii(struct qcom_ethqos *ethqos, int speed) { struct net_device *dev = platform_get_drvdata(ethqos->pdev); struct stmmac_priv *priv = netdev_priv(dev); @@ -638,11 +636,9 @@ static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos, int speed) ethqos_pcs_set_inband(priv, true); break; } - - return 0; } -static int ethqos_configure(struct qcom_ethqos *ethqos, int speed) +static void ethqos_configure(struct qcom_ethqos *ethqos, int speed) { return ethqos->configure_func(ethqos, speed); } From 3baa791f19be6a4701047fa23058e84fe4ccc1c4 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 23 Feb 2026 09:34:31 +0000 Subject: [PATCH 0016/1561] net: stmmac: qcom-ethqos: move qcom_ethqos_set_sgmii_loopback() up ethqos_set_func_clk_en() configures both SGMII loopback and the RGMII functional clock setting. qcom_ethqos_set_sgmii_loopback() is only called from within ethqos_set_func_clk_en(), and checks for PHY_INTERFACE_MODE_2500BASEX. Move qcom_ethqos_set_sgmii_loopback() to the callers of ethqos_set_func_clk_en() except for ethqos_configure_rgmii() where we know that ethqos->phy_mode will not be PHY_INTERFACE_MODE_2500BASEX. Reviewed-by: Mohd Ayaan Anwar Tested-by: Mohd Ayaan Anwar Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vuSKl-0000000ASc1-18ka@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 168f0fed68c0..964eca46a653 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -204,7 +204,6 @@ qcom_ethqos_set_sgmii_loopback(struct qcom_ethqos *ethqos, bool enable) static void ethqos_set_func_clk_en(struct qcom_ethqos *ethqos) { - qcom_ethqos_set_sgmii_loopback(ethqos, true); rgmii_setmask(ethqos, RGMII_CONFIG_FUNC_CLK_EN, RGMII_IO_MACRO_CONFIG); } @@ -532,6 +531,7 @@ static void ethqos_configure_rgmii(struct qcom_ethqos *ethqos, int speed) for (i = 0; i < ethqos->num_rgmii_por; i++) rgmii_writel(ethqos, ethqos->rgmii_por[i].value, ethqos->rgmii_por[i].offset); + ethqos_set_func_clk_en(ethqos); /* Initialize the DLL first */ @@ -701,6 +701,7 @@ static int ethqos_clks_config(void *priv, bool enabled) * cycled. The actual configuration will be adjusted once * ethqos_fix_mac_speed() is invoked. */ + qcom_ethqos_set_sgmii_loopback(ethqos, true); ethqos_set_func_clk_en(ethqos); } else { clk_disable_unprepare(ethqos->link_clk); @@ -809,6 +810,8 @@ static int qcom_ethqos_probe(struct platform_device *pdev) ethqos->serdes_speed = SPEED_1000; ethqos_update_link_clk(ethqos, SPEED_1000); + + qcom_ethqos_set_sgmii_loopback(ethqos, true); ethqos_set_func_clk_en(ethqos); plat_dat->bsp_priv = ethqos; From 834c72ca306cf4c8dc84b39447fee316c9f868ab Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 23 Feb 2026 09:34:36 +0000 Subject: [PATCH 0017/1561] net: stmmac: qcom-ethqos: move loopback disable to .mac_finish() Loopback is enabled to allow the dwmac soft reset to succeed. This is enabled when clocks are enabled in ethqos_clks_config(), which happens at driver probe and runtime PM resume - e.g. when the network device is administratively brought up. Currently, the loopback is disabled when the link comes up (via .mac_link_up() calling this driver's .fix_mac_speed().) Move the qcom_ethqos_set_sgmii_loopback() call which disables loopback from ethqos_fix_mac_speed() into ethqos' SerDes specific .mac_finish() method so that loopback is disabled a little earlier after reset has completed, and dwmac setup has completed. Reviewed-by: Mohd Ayaan Anwar Tested-by: Mohd Ayaan Anwar Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vuSKq-0000000AScA-1Wh3@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 964eca46a653..bd5d3bf90400 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -647,7 +647,6 @@ static void ethqos_fix_mac_speed(void *priv, int speed, unsigned int mode) { struct qcom_ethqos *ethqos = priv; - qcom_ethqos_set_sgmii_loopback(ethqos, false); ethqos_update_link_clk(ethqos, speed); ethqos_configure(ethqos, speed); } @@ -684,6 +683,17 @@ static void qcom_ethqos_serdes_powerdown(struct net_device *ndev, void *priv) phy_exit(ethqos->serdes_phy); } +static int ethqos_mac_finish_serdes(struct net_device *ndev, void *priv, + unsigned int mode, + phy_interface_t interface) +{ + struct qcom_ethqos *ethqos = priv; + + qcom_ethqos_set_sgmii_loopback(ethqos, false); + + return 0; +} + static int ethqos_clks_config(void *priv, bool enabled) { struct qcom_ethqos *ethqos = priv; @@ -770,6 +780,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev) case PHY_INTERFACE_MODE_2500BASEX: case PHY_INTERFACE_MODE_SGMII: ethqos->configure_func = ethqos_configure_sgmii; + plat_dat->mac_finish = ethqos_mac_finish_serdes; break; default: dev_err(dev, "Unsupported phy mode %s\n", From cd0aa651535092afd5d776bfe94e4fdf750f89c3 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 23 Feb 2026 09:34:41 +0000 Subject: [PATCH 0018/1561] net: stmmac: pass interface mode into fix_mac_speed() method Pass the current interface mode reported by phylink into the fix_mac_speed() method. This will be used by qcom-ethqos for its "SGMII" configuration. Reviewed-by: Maxime Chevallier Tested-by: Mohd Ayaan Anwar Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vuSKv-0000000AScG-1zv6@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c | 3 ++- drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c | 11 +++++++---- drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c | 3 ++- .../net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 3 ++- drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 3 ++- drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c | 11 +++++++---- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 ++- include/linux/stmmac.h | 3 ++- 8 files changed, 26 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index d043bad4a862..0495437d3a6e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -112,7 +112,8 @@ static int dwc_qos_probe(struct platform_device *pdev, #define AUTO_CAL_STATUS 0x880c #define AUTO_CAL_STATUS_ACTIVE BIT(31) -static void tegra_eqos_fix_speed(void *bsp_priv, int speed, unsigned int mode) +static void tegra_eqos_fix_speed(void *bsp_priv, phy_interface_t interface, + int speed, unsigned int mode) { struct tegra_eqos *eqos = bsp_priv; bool needs_calibration = false; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index c4e85197629d..9f5a15b81f8a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -48,7 +48,8 @@ struct imx_dwmac_ops { int (*fix_soc_reset)(struct stmmac_priv *priv); int (*set_intf_mode)(struct imx_priv_data *dwmac, u8 phy_intf_sel); - void (*fix_mac_speed)(void *priv, int speed, unsigned int mode); + void (*fix_mac_speed)(void *priv, phy_interface_t interface, + int speed, unsigned int mode); }; struct imx_priv_data { @@ -160,7 +161,8 @@ static int imx_dwmac_set_clk_tx_rate(void *bsp_priv, struct clk *clk_tx_i, return stmmac_set_clk_tx_rate(bsp_priv, clk_tx_i, interface, speed); } -static void imx_dwmac_fix_speed(void *priv, int speed, unsigned int mode) +static void imx_dwmac_fix_speed(void *priv, phy_interface_t interface, + int speed, unsigned int mode) { struct plat_stmmacenet_data *plat_dat; struct imx_priv_data *dwmac = priv; @@ -185,13 +187,14 @@ static void imx_dwmac_fix_speed(void *priv, int speed, unsigned int mode) dev_err(dwmac->dev, "failed to set tx rate %lu\n", rate); } -static void imx93_dwmac_fix_speed(void *priv, int speed, unsigned int mode) +static void imx93_dwmac_fix_speed(void *priv, phy_interface_t interface, + int speed, unsigned int mode) { struct imx_priv_data *dwmac = priv; unsigned int iface; int ctrl, old_ctrl; - imx_dwmac_fix_speed(priv, speed, mode); + imx_dwmac_fix_speed(priv, interface, speed, mode); if (!dwmac || mode != MLO_AN_FIXED) return; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index 815213223583..9c51c96223ad 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -143,7 +143,8 @@ static struct stmmac_pci_info loongson_gmac_pci_info = { .setup = loongson_gmac_data, }; -static void loongson_gnet_fix_speed(void *priv, int speed, unsigned int mode) +static void loongson_gnet_fix_speed(void *priv, phy_interface_t interface, + int speed, unsigned int mode) { struct loongson_data *ld = (struct loongson_data *)priv; struct net_device *ndev = dev_get_drvdata(ld->dev); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index bd5d3bf90400..9b29516a5a7c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -643,7 +643,8 @@ static void ethqos_configure(struct qcom_ethqos *ethqos, int speed) return ethqos->configure_func(ethqos, speed); } -static void ethqos_fix_mac_speed(void *priv, int speed, unsigned int mode) +static void ethqos_fix_mac_speed(void *priv, phy_interface_t interface, + int speed, unsigned int mode) { struct qcom_ethqos *ethqos = priv; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 4c8991f3b38d..c6b99814d391 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -72,7 +72,8 @@ struct socfpga_dwmac { const struct socfpga_dwmac_ops *ops; }; -static void socfpga_dwmac_fix_mac_speed(void *bsp_priv, int speed, +static void socfpga_dwmac_fix_mac_speed(void *bsp_priv, + phy_interface_t interface, int speed, unsigned int mode) { struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)bsp_priv; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c index f50547b67fbc..6ebbf95d158f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c @@ -91,11 +91,13 @@ struct sti_dwmac { struct regmap *regmap; bool gmac_en; int speed; - void (*fix_retime_src)(void *priv, int speed, unsigned int mode); + void (*fix_retime_src)(void *priv, phy_interface_t interface, + int speed, unsigned int mode); }; struct sti_dwmac_of_data { - void (*fix_retime_src)(void *priv, int speed, unsigned int mode); + void (*fix_retime_src)(void *priv, phy_interface_t interface, + int speed, unsigned int mode); }; enum { @@ -114,7 +116,8 @@ static u32 stih4xx_tx_retime_val[] = { | STIH4XX_ETH_SEL_INTERNAL_NOTEXT_PHYCLK, }; -static void stih4xx_fix_retime_src(void *priv, int spd, unsigned int mode) +static void stih4xx_fix_retime_src(void *priv, phy_interface_t interface, + int spd, unsigned int mode) { struct sti_dwmac *dwmac = priv; u32 src = dwmac->tx_retime_src; @@ -170,7 +173,7 @@ static int sti_set_phy_intf_sel(void *bsp_priv, u8 phy_intf_sel) val = (dwmac->interface == PHY_INTERFACE_MODE_REVMII) ? 0 : ENMII; regmap_update_bits(regmap, reg, ENMII_MASK, val); - dwmac->fix_retime_src(dwmac, dwmac->speed, 0); + dwmac->fix_retime_src(dwmac, dwmac->interface, dwmac->speed, 0); return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 82375d34ad57..d7c730179a7f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1071,7 +1071,8 @@ static void stmmac_mac_link_up(struct phylink_config *config, } if (priv->plat->fix_mac_speed) - priv->plat->fix_mac_speed(priv->plat->bsp_priv, speed, mode); + priv->plat->fix_mac_speed(priv->plat->bsp_priv, interface, + speed, mode); if (!duplex) ctrl &= ~priv->hw->link.duplex; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 32352a216567..b96ae9dadfab 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -256,7 +256,8 @@ struct plat_stmmacenet_data { int (*set_phy_intf_sel)(void *priv, u8 phy_intf_sel); int (*set_clk_tx_rate)(void *priv, struct clk *clk_tx_i, phy_interface_t interface, int speed); - void (*fix_mac_speed)(void *priv, int speed, unsigned int mode); + void (*fix_mac_speed)(void *priv, phy_interface_t interface, + int speed, unsigned int mode); int (*fix_soc_reset)(struct stmmac_priv *priv); int (*serdes_powerup)(struct net_device *ndev, void *priv); void (*serdes_powerdown)(struct net_device *ndev, void *priv); From b560938163db801d5ba27fa55a1aa68ccaf1ef2e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 23 Feb 2026 09:34:46 +0000 Subject: [PATCH 0019/1561] net: stmmac: qcom-ethqos: pass phy interface mode to configs Pass the current phylink phy interface mode to the RGMII and "SGMII" configuration functions. Reviewed-by: Mohd Ayaan Anwar Tested-by: Mohd Ayaan Anwar Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vuSL0-0000000AScM-2TN0@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 9b29516a5a7c..e2dd334ffd31 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -100,7 +100,8 @@ struct ethqos_emac_driver_data { struct qcom_ethqos { struct platform_device *pdev; void __iomem *rgmii_base; - void (*configure_func)(struct qcom_ethqos *ethqos, int speed); + void (*configure_func)(struct qcom_ethqos *ethqos, + phy_interface_t interface, int speed); unsigned int link_clk_rate; struct clk *link_clk; @@ -521,7 +522,8 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) return 0; } -static void ethqos_configure_rgmii(struct qcom_ethqos *ethqos, int speed) +static void ethqos_configure_rgmii(struct qcom_ethqos *ethqos, + phy_interface_t interface, int speed) { struct device *dev = ðqos->pdev->dev; unsigned int i; @@ -605,7 +607,8 @@ static void ethqos_pcs_set_inband(struct stmmac_priv *priv, bool enable) /* On interface toggle MAC registers gets reset. * Configure MAC block for SGMII on ethernet phy link up */ -static void ethqos_configure_sgmii(struct qcom_ethqos *ethqos, int speed) +static void ethqos_configure_sgmii(struct qcom_ethqos *ethqos, + phy_interface_t interface, int speed) { struct net_device *dev = platform_get_drvdata(ethqos->pdev); struct stmmac_priv *priv = netdev_priv(dev); @@ -638,9 +641,10 @@ static void ethqos_configure_sgmii(struct qcom_ethqos *ethqos, int speed) } } -static void ethqos_configure(struct qcom_ethqos *ethqos, int speed) +static void ethqos_configure(struct qcom_ethqos *ethqos, + phy_interface_t interface, int speed) { - return ethqos->configure_func(ethqos, speed); + return ethqos->configure_func(ethqos, interface, speed); } static void ethqos_fix_mac_speed(void *priv, phy_interface_t interface, @@ -649,7 +653,7 @@ static void ethqos_fix_mac_speed(void *priv, phy_interface_t interface, struct qcom_ethqos *ethqos = priv; ethqos_update_link_clk(ethqos, speed); - ethqos_configure(ethqos, speed); + ethqos_configure(ethqos, interface, speed); } static int qcom_ethqos_serdes_powerup(struct net_device *ndev, void *priv) From b8ab32315e22c0ef2c466d4f5ef4de0b5852e735 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 23 Feb 2026 09:34:51 +0000 Subject: [PATCH 0020/1561] net: stmmac: qcom-ethqos: use phy interface mode for inband qcom-ethqos currently forces inband to be enabled for the Cisco SGMII speeds (1G, 100M and 10M) but not for 2500BASE-X (2.5G). Rather than using the speed to determine the forced inband state, use phylink's PHY interface mode which will switch between SGMII for the 10M, 100M and 1G speeds, and 2500BASE-X for 2.5G. Reviewed-by: Mohd Ayaan Anwar Tested-by: Mohd Ayaan Anwar Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vuSL5-0000000AScX-2wuM@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../stmicro/stmmac/dwmac-qcom-ethqos.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index e2dd334ffd31..cd6d1660bdb2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -615,30 +615,26 @@ static void ethqos_configure_sgmii(struct qcom_ethqos *ethqos, switch (speed) { case SPEED_2500: - rgmii_setmask(ethqos, RGMII_CONFIG2_RGMII_CLK_SEL_CFG, - RGMII_IO_MACRO_CONFIG2); - ethqos_set_serdes_speed(ethqos, SPEED_2500); - ethqos_pcs_set_inband(priv, false); - break; case SPEED_1000: rgmii_setmask(ethqos, RGMII_CONFIG2_RGMII_CLK_SEL_CFG, RGMII_IO_MACRO_CONFIG2); - ethqos_set_serdes_speed(ethqos, SPEED_1000); - ethqos_pcs_set_inband(priv, true); break; case SPEED_100: - ethqos_set_serdes_speed(ethqos, SPEED_1000); - ethqos_pcs_set_inband(priv, true); break; case SPEED_10: rgmii_updatel(ethqos, RGMII_CONFIG_SGMII_CLK_DVDR, FIELD_PREP(RGMII_CONFIG_SGMII_CLK_DVDR, SGMII_10M_RX_CLK_DVDR), RGMII_IO_MACRO_CONFIG); - ethqos_set_serdes_speed(ethqos, SPEED_1000); - ethqos_pcs_set_inband(priv, true); break; } + + if (interface == PHY_INTERFACE_MODE_SGMII) + ethqos_set_serdes_speed(ethqos, SPEED_1000); + else + ethqos_set_serdes_speed(ethqos, SPEED_2500); + + ethqos_pcs_set_inband(priv, interface == PHY_INTERFACE_MODE_SGMII); } static void ethqos_configure(struct qcom_ethqos *ethqos, From fb42f19e671f831bab406d795664f98e1242d478 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 23 Feb 2026 09:34:56 +0000 Subject: [PATCH 0021/1561] net: stmmac: qcom-ethqos: move SerDes speed configuration Move the SerDes speed configuration to phylink's .mac_finish() stage so that the SerDes is appropriately configured for the interface mode prior to the link coming up. Reviewed-by: Mohd Ayaan Anwar Tested-by: Mohd Ayaan Anwar Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vuSLA-0000000AScc-3RFf@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index cd6d1660bdb2..153f6abc77cd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -629,11 +629,6 @@ static void ethqos_configure_sgmii(struct qcom_ethqos *ethqos, break; } - if (interface == PHY_INTERFACE_MODE_SGMII) - ethqos_set_serdes_speed(ethqos, SPEED_1000); - else - ethqos_set_serdes_speed(ethqos, SPEED_2500); - ethqos_pcs_set_inband(priv, interface == PHY_INTERFACE_MODE_SGMII); } @@ -692,6 +687,11 @@ static int ethqos_mac_finish_serdes(struct net_device *ndev, void *priv, qcom_ethqos_set_sgmii_loopback(ethqos, false); + if (interface == PHY_INTERFACE_MODE_SGMII) + ethqos_set_serdes_speed(ethqos, SPEED_1000); + else if (interface == PHY_INTERFACE_MODE_2500BASEX) + ethqos_set_serdes_speed(ethqos, SPEED_2500); + return 0; } From 9192320a65b7b1496fa9d39f7207d6d1fca48b06 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 23 Feb 2026 09:35:01 +0000 Subject: [PATCH 0022/1561] net: stmmac: qcom-ethqos: convert to set_clk_tx_rate() method Set the RGMII link clock using the set_clk_tx_rate() method rather than coding it into the .fix_mac_speed() method. This simplifies ethqos's ethqos_fix_mac_speed(). Tested-by: Mohd Ayaan Anwar Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vuSLF-0000000ASci-42kh@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../stmicro/stmmac/dwmac-qcom-ethqos.c | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 153f6abc77cd..ad5b5d950fff 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -103,7 +103,6 @@ struct qcom_ethqos { void (*configure_func)(struct qcom_ethqos *ethqos, phy_interface_t interface, int speed); - unsigned int link_clk_rate; struct clk *link_clk; struct phy *serdes_phy; int serdes_speed; @@ -175,19 +174,20 @@ static void rgmii_dump(void *priv) rgmii_readl(ethqos, EMAC_SYSTEM_LOW_POWER_DEBUG)); } -static void -ethqos_update_link_clk(struct qcom_ethqos *ethqos, int speed) +static int ethqos_set_clk_tx_rate(void *bsp_priv, struct clk *clk_tx_i, + phy_interface_t interface, int speed) { + struct qcom_ethqos *ethqos = bsp_priv; long rate; - if (!phy_interface_mode_is_rgmii(ethqos->phy_mode)) - return; + if (!phy_interface_mode_is_rgmii(interface)) + return 0; rate = rgmii_clock(speed); - if (rate > 0) - ethqos->link_clk_rate = rate * 2; + if (rate < 0) + return rate; - clk_set_rate(ethqos->link_clk, ethqos->link_clk_rate); + return clk_set_rate(ethqos->link_clk, rate * 2); } static void @@ -643,7 +643,6 @@ static void ethqos_fix_mac_speed(void *priv, phy_interface_t interface, { struct qcom_ethqos *ethqos = priv; - ethqos_update_link_clk(ethqos, speed); ethqos_configure(ethqos, interface, speed); } @@ -821,12 +820,14 @@ static int qcom_ethqos_probe(struct platform_device *pdev) "Failed to get serdes phy\n"); ethqos->serdes_speed = SPEED_1000; - ethqos_update_link_clk(ethqos, SPEED_1000); + ethqos_set_clk_tx_rate(ethqos, NULL, plat_dat->phy_interface, + SPEED_1000); qcom_ethqos_set_sgmii_loopback(ethqos, true); ethqos_set_func_clk_en(ethqos); plat_dat->bsp_priv = ethqos; + plat_dat->set_clk_tx_rate = ethqos_set_clk_tx_rate; plat_dat->fix_mac_speed = ethqos_fix_mac_speed; plat_dat->dump_debug_regs = rgmii_dump; plat_dat->ptp_clk_freq_config = ethqos_ptp_clk_freq_config; From fca59a2dd0b8ce55ad090707ec425d6d37b8b932 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Feb 2026 11:35:01 +0000 Subject: [PATCH 0023/1561] tcp: reduce calls to tcp_schedule_loss_probe() For RPC workloads, we alternate tcp_schedule_loss_probe() calls from output path and from input path, with tp->packets_out value oscillating between !zero and zero, leading to poor branch prediction. Move tp->packets_out check from tcp_schedule_loss_probe() to tcp_set_xmit_timer(). We avoid one call to tcp_schedule_loss_probe() from tcp_ack() path for typical RPC workloads, while improving branch prediction. Signed-off-by: Eric Dumazet Reviewed-by: Neal Cardwell Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260223113501.4070245-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_output.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e7b41abb82aa..6c3f1d031444 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3552,7 +3552,7 @@ void tcp_rearm_rto(struct sock *sk) /* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */ static void tcp_set_xmit_timer(struct sock *sk) { - if (!tcp_schedule_loss_probe(sk, true)) + if (!tcp_sk(sk)->packets_out || !tcp_schedule_loss_probe(sk, true)) tcp_rearm_rto(sk); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1ef419c66a0e..46bd48cf776a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3135,7 +3135,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto) * not in loss recovery, that are either limited by cwnd or application. */ if ((early_retrans != 3 && early_retrans != 4) || - !tp->packets_out || !tcp_is_sack(tp) || + !tcp_is_sack(tp) || (icsk->icsk_ca_state != TCP_CA_Open && icsk->icsk_ca_state != TCP_CA_CWR)) return false; From 539a6cf0844da56c32513b86305a7327760f9932 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Feb 2026 15:30:47 +0000 Subject: [PATCH 0024/1561] tcp: move inet6_csk_update_pmtu() to tcp_ipv6.c This function is only called from tcp_v6_mtu_reduced() and can be (auto)inlined by the compiler. Note that inet6_csk_route_socket() is no longer (auto)inlined, which is a good thing as it is slow path. $ scripts/bloat-o-meter -t vmlinux.0 vmlinux.1 add/remove: 0/2 grow/shrink: 2/0 up/down: 93/-129 (-36) Function old new delta tcp_v6_mtu_reduced 139 228 +89 inet6_csk_route_socket 486 490 +4 __pfx_inet6_csk_update_pmtu 16 - -16 inet6_csk_update_pmtu 113 - -113 Total: Before=25076512, After=25076476, chg -0.00% Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260223153047.886683-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/inet6_connection_sock.h | 4 +++- net/ipv6/inet6_connection_sock.c | 19 ++----------------- net/ipv6/tcp_ipv6.c | 15 +++++++++++++++ 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index ece8dabd209a..b814e1acc512 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -18,6 +18,9 @@ struct sk_buff; struct sock; struct sockaddr; +struct dst_entry *inet6_csk_route_socket(struct sock *sk, + struct flowi6 *fl6); + struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct dst_entry *dst, struct flowi6 *fl6, @@ -25,5 +28,4 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl); -struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu); #endif /* _INET6_CONNECTION_SOCK_H */ diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 11fc2f7de2fe..37534e116899 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -56,8 +56,8 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, return dst; } -static struct dst_entry *inet6_csk_route_socket(struct sock *sk, - struct flowi6 *fl6) +struct dst_entry *inet6_csk_route_socket(struct sock *sk, + struct flowi6 *fl6) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -118,18 +118,3 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused return res; } EXPORT_SYMBOL_GPL(inet6_csk_xmit); - -struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) -{ - struct flowi6 *fl6 = &inet_sk(sk)->cork.fl.u.ip6; - struct dst_entry *dst; - - dst = inet6_csk_route_socket(sk, fl6); - - if (IS_ERR(dst)) - return NULL; - dst->ops->update_pmtu(dst, sk, NULL, mtu, true); - - dst = inet6_csk_route_socket(sk, fl6); - return IS_ERR(dst) ? NULL : dst; -} diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f17da56b449e..07e07afb1ff1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -349,6 +349,21 @@ failure: return err; } +static struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) +{ + struct flowi6 *fl6 = &inet_sk(sk)->cork.fl.u.ip6; + struct dst_entry *dst; + + dst = inet6_csk_route_socket(sk, fl6); + + if (IS_ERR(dst)) + return NULL; + dst->ops->update_pmtu(dst, sk, NULL, mtu, true); + + dst = inet6_csk_route_socket(sk, fl6); + return IS_ERR(dst) ? NULL : dst; +} + static void tcp_v6_mtu_reduced(struct sock *sk) { struct dst_entry *dst; From 64db5933c7adcdc4dd8f5ef6506cc998ecbe63ac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Feb 2026 16:17:42 +0000 Subject: [PATCH 0025/1561] icmp: increase net.ipv4.icmp_msgs_{per_sec,burst} These sysctls were added in 4cdf507d5452 ("icmp: add a global rate limitation") and their default values might be too small. Some network tools send probes to closed UDP ports from many hosts to estimate proportion of packet drops on a particular target. This patch sets both sysctls to 10000. Note the per-peer rate-limit (as described in RFC 4443 2.4 (f)) intent is still enforced. This also increases security, see b38e7819cae9 ("icmp: randomize the global rate limiter") for reference. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260223161742.929830-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 6 +++--- net/ipv4/icmp.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 9c90333530fa..d1eeb5323af0 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1758,14 +1758,14 @@ icmp_msgs_per_sec - INTEGER controlled by this limit. For security reasons, the precise count of messages per second is randomized. - Default: 1000 + Default: 10000 icmp_msgs_burst - INTEGER icmp_msgs_per_sec controls number of ICMP packets sent per second, - while icmp_msgs_burst controls the burst size of these packets. + while icmp_msgs_burst controls the token bucket size. For security reasons, the precise burst size is randomized. - Default: 50 + Default: 10000 icmp_ratemask - INTEGER Mask made of ICMP types for which rates are being limited. diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index a62b4c4033cc..1cf9e391aa0c 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1727,8 +1727,8 @@ static int __net_init icmp_sk_init(struct net *net) net->ipv4.sysctl_icmp_ratemask = 0x1818; net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0; net->ipv4.sysctl_icmp_errors_extension_mask = 0; - net->ipv4.sysctl_icmp_msgs_per_sec = 1000; - net->ipv4.sysctl_icmp_msgs_burst = 50; + net->ipv4.sysctl_icmp_msgs_per_sec = 10000; + net->ipv4.sysctl_icmp_msgs_burst = 10000; return 0; } From fc1f97929ada9e923c3b0c70a999469eeb0b9f94 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 23 Feb 2026 23:07:18 +0000 Subject: [PATCH 0026/1561] bonding: Optimise is_netpoll_tx_blocked(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bond_start_xmit() spends some cycles in is_netpoll_tx_blocked(): if (unlikely(is_netpoll_tx_blocked(dev))) return NETDEV_TX_BUSY; because of the "pushf;pop reg" sequence (aka irqs_disabled()). Let's swap the conditions in is_netpoll_tx_blocked() and convert netpoll_block_tx to a static key. Before: 1.23 │ mov %gs:0x28,%rax 1.24 │ mov %rax,0x18(%rsp) 29.45 │ pushfq 0.50 │ pop %rax 0.47 │ test $0x200,%eax │ ↓ je 1b4 0.49 │ 32: lea 0x980(%rsi),%rbx After: 0.72 │ mov %gs:0x28,%rax 0.81 │ mov %rax,0x18(%rsp) 0.82 │ nop 2.77 │ 2a: lea 0x980(%rsi),%rbx Suggested-by: Eric Dumazet Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260223230749.2376145-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 4 ++-- include/net/bonding.h | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 55a960da42b5..2f2dddcf9d30 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -206,7 +206,7 @@ MODULE_PARM_DESC(lp_interval, "The number of seconds between instances where " /*----------------------------- Global variables ----------------------------*/ #ifdef CONFIG_NET_POLL_CONTROLLER -atomic_t netpoll_block_tx = ATOMIC_INIT(0); +DEFINE_STATIC_KEY_FALSE(netpoll_block_tx); #endif unsigned int bond_net_id __read_mostly; @@ -6589,7 +6589,7 @@ static void __exit bonding_exit(void) #ifdef CONFIG_NET_POLL_CONTROLLER /* Make sure we don't have an imbalance on our netpoll blocking */ - WARN_ON(atomic_read(&netpoll_block_tx)); + WARN_ON(static_branch_unlikely(&netpoll_block_tx)); #endif } diff --git a/include/net/bonding.h b/include/net/bonding.h index 4ad5521e7731..7f8fda2fdfcb 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -91,22 +91,22 @@ NETIF_F_GSO_ESP) #ifdef CONFIG_NET_POLL_CONTROLLER -extern atomic_t netpoll_block_tx; +DECLARE_STATIC_KEY_FALSE(netpoll_block_tx); static inline void block_netpoll_tx(void) { - atomic_inc(&netpoll_block_tx); + static_branch_inc(&netpoll_block_tx); } static inline void unblock_netpoll_tx(void) { - atomic_dec(&netpoll_block_tx); + static_branch_dec(&netpoll_block_tx); } static inline int is_netpoll_tx_blocked(struct net_device *dev) { - if (unlikely(netpoll_tx_running(dev))) - return atomic_read(&netpoll_block_tx); + if (static_branch_unlikely(&netpoll_block_tx)) + return netpoll_tx_running(dev); return 0; } #else From 6c32b07650676ab4c112ff8e9d345b6770ae2be5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Feb 2026 12:37:01 -0800 Subject: [PATCH 0027/1561] eth: bnxt: rename ring_err_stats -> ring_drv_stats We recently added GRO stats to bnxt, which are maintained by the driver. Having "err" in the name of the struct for ring stats no longer makes sense (as pointed out by Michael, see Link). Rename them to "drv" stats, as these are all maintained by the driver (even if partially based on info from descriptors). Michael suggested calling these misc, happy to go back to that. IMHO "drv" is a bit more meaningful that "misc". Pure rename using sed, no functional changes. Link: https://lore.kernel.org/CACKFLimgibJ0qkM1AacZVh8MKKy-pE_AAc4KPKZ7GUqebmXW9A@mail.gmail.com Reviewed-by: Michael Chan Link: https://patch.msgid.link/20260223203702.4137801-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 18 ++++++++--------- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 8 ++++---- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 20 +++++++++---------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index fb45e1dd1dd7..ba010bdc1e71 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -13252,7 +13252,7 @@ static void __bnxt_close_nic(struct bnxt *bp, bool irq_re_init, /* Save ring stats before shutdown */ if (bp->bnapi && irq_re_init) { bnxt_get_ring_stats(bp, &bp->net_stats_prev); - bnxt_get_ring_err_stats(bp, &bp->ring_err_stats_prev); + bnxt_get_ring_drv_stats(bp, &bp->ring_drv_stats_prev); } if (irq_re_init) { bnxt_free_irq(bp); @@ -13497,8 +13497,8 @@ bnxt_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) clear_bit(BNXT_STATE_READ_STATS, &bp->state); } -static void bnxt_get_one_ring_err_stats(struct bnxt *bp, - struct bnxt_total_ring_err_stats *stats, +static void bnxt_get_one_ring_drv_stats(struct bnxt *bp, + struct bnxt_total_ring_drv_stats *stats, struct bnxt_cp_ring_info *cpr) { struct bnxt_sw_stats *sw_stats = cpr->sw_stats; @@ -13519,13 +13519,13 @@ static void bnxt_get_one_ring_err_stats(struct bnxt *bp, stats->total_missed_irqs += sw_stats->cmn.missed_irqs; } -void bnxt_get_ring_err_stats(struct bnxt *bp, - struct bnxt_total_ring_err_stats *stats) +void bnxt_get_ring_drv_stats(struct bnxt *bp, + struct bnxt_total_ring_drv_stats *stats) { int i; for (i = 0; i < bp->cp_nr_rings; i++) - bnxt_get_one_ring_err_stats(bp, stats, &bp->bnapi[i]->cp_ring); + bnxt_get_one_ring_drv_stats(bp, stats, &bp->bnapi[i]->cp_ring); } static bool bnxt_mc_list_updated(struct bnxt *bp, u32 *rx_mask) @@ -15970,9 +15970,9 @@ static void bnxt_get_base_stats(struct net_device *dev, rx->packets = bp->net_stats_prev.rx_packets; rx->bytes = bp->net_stats_prev.rx_bytes; - rx->alloc_fail = bp->ring_err_stats_prev.rx_total_oom_discards; - rx->hw_gro_packets = bp->ring_err_stats_prev.rx_total_hw_gro_packets; - rx->hw_gro_wire_packets = bp->ring_err_stats_prev.rx_total_hw_gro_wire_packets; + rx->alloc_fail = bp->ring_drv_stats_prev.rx_total_oom_discards; + rx->hw_gro_packets = bp->ring_drv_stats_prev.rx_total_hw_gro_packets; + rx->hw_gro_wire_packets = bp->ring_drv_stats_prev.rx_total_hw_gro_wire_packets; tx->packets = bp->net_stats_prev.tx_packets; tx->bytes = bp->net_stats_prev.tx_bytes; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 9a41b9e0423c..90fa3e93c8d6 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1147,7 +1147,7 @@ struct bnxt_sw_stats { struct bnxt_cmn_sw_stats cmn; }; -struct bnxt_total_ring_err_stats { +struct bnxt_total_ring_drv_stats { u64 rx_total_l4_csum_errors; u64 rx_total_resets; u64 rx_total_buf_errors; @@ -2572,7 +2572,7 @@ struct bnxt { u8 pri2cos_idx[8]; u8 pri2cos_valid; - struct bnxt_total_ring_err_stats ring_err_stats_prev; + struct bnxt_total_ring_drv_stats ring_drv_stats_prev; u16 hwrm_max_req_len; u16 hwrm_max_ext_req_len; @@ -2974,8 +2974,8 @@ int bnxt_half_open_nic(struct bnxt *bp); void bnxt_half_close_nic(struct bnxt *bp); void bnxt_reenable_sriov(struct bnxt *bp); void bnxt_close_nic(struct bnxt *, bool, bool); -void bnxt_get_ring_err_stats(struct bnxt *bp, - struct bnxt_total_ring_err_stats *stats); +void bnxt_get_ring_drv_stats(struct bnxt *bp, + struct bnxt_total_ring_drv_stats *stats); bool bnxt_rfs_capable(struct bnxt *bp, bool new_rss_ctx); int bnxt_dbg_hwrm_rd_reg(struct bnxt *bp, u32 reg_off, u16 num_words, u32 *reg_buf); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 53a83b6680c4..283be8767a99 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -340,7 +340,7 @@ enum { RX_NETPOLL_DISCARDS, }; -static const char *const bnxt_ring_err_stats_arr[] = { +static const char *const bnxt_ring_drv_stats_arr[] = { "rx_total_l4_csum_errors", "rx_total_resets", "rx_total_buf_errors", @@ -500,7 +500,7 @@ static const struct { BNXT_TX_STATS_PRI_ENTRIES(tx_packets), }; -#define BNXT_NUM_RING_ERR_STATS ARRAY_SIZE(bnxt_ring_err_stats_arr) +#define BNXT_NUM_RING_DRV_STATS ARRAY_SIZE(bnxt_ring_drv_stats_arr) #define BNXT_NUM_PORT_STATS ARRAY_SIZE(bnxt_port_stats_arr) #define BNXT_NUM_STATS_PRI \ (ARRAY_SIZE(bnxt_rx_bytes_pri_arr) + \ @@ -539,7 +539,7 @@ static int bnxt_get_num_stats(struct bnxt *bp) int num_stats = bnxt_get_num_ring_stats(bp); int len; - num_stats += BNXT_NUM_RING_ERR_STATS; + num_stats += BNXT_NUM_RING_DRV_STATS; if (bp->flags & BNXT_FLAG_PORT_STATS) num_stats += BNXT_NUM_PORT_STATS; @@ -594,7 +594,7 @@ static bool is_tx_ring(struct bnxt *bp, int ring_num) static void bnxt_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *buf) { - struct bnxt_total_ring_err_stats ring_err_stats = {0}; + struct bnxt_total_ring_drv_stats ring_drv_stats = {0}; struct bnxt *bp = netdev_priv(dev); u64 *curr, *prev; u32 tpa_stats; @@ -643,12 +643,12 @@ skip_tpa_ring_stats: buf[j] = sw[k]; } - bnxt_get_ring_err_stats(bp, &ring_err_stats); + bnxt_get_ring_drv_stats(bp, &ring_drv_stats); skip_ring_stats: - curr = &ring_err_stats.rx_total_l4_csum_errors; - prev = &bp->ring_err_stats_prev.rx_total_l4_csum_errors; - for (i = 0; i < BNXT_NUM_RING_ERR_STATS; i++, j++, curr++, prev++) + curr = &ring_drv_stats.rx_total_l4_csum_errors; + prev = &bp->ring_drv_stats_prev.rx_total_l4_csum_errors; + for (i = 0; i < BNXT_NUM_RING_DRV_STATS; i++, j++, curr++, prev++) buf[j] = *curr + *prev; if (bp->flags & BNXT_FLAG_PORT_STATS) { @@ -758,8 +758,8 @@ skip_tpa_stats: ethtool_sprintf(&buf, "[%d]: %s", i, str); } } - for (i = 0; i < BNXT_NUM_RING_ERR_STATS; i++) - ethtool_puts(&buf, bnxt_ring_err_stats_arr[i]); + for (i = 0; i < BNXT_NUM_RING_DRV_STATS; i++) + ethtool_puts(&buf, bnxt_ring_drv_stats_arr[i]); if (bp->flags & BNXT_FLAG_PORT_STATS) for (i = 0; i < BNXT_NUM_PORT_STATS; i++) { From d99aa5912c3af4c0f84738d9471836f068d73d69 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Feb 2026 12:26:31 -0800 Subject: [PATCH 0028/1561] selftests: net: py: avoid masking exceptions in bkg() failures bkg() failures are currently quite hard to debug and spot. Often we have code along the lines of: with bkg("./cmd_rx_something -p PORT"): wait_port_listen(PORT) cmd("./cmd_tx_something", host=remote) When wait_port_listen() fails we don't get to see the exit status of bkg(). Even tho very often it's a failure in the bkg() command that's actually to blame. Try not to interfere with the bkg() command error checking. With: with bkg("false", exit_wait=True): time.sleep(0.01) # let the 'false' cmd run raise Exception("bla") Before: .. stack trace .. # Exception| Exception: bla After: .. stack trace .. # Exception| Exception: bla # Exception| # Exception| During handling of the above exception, another exception occurred: .. stack trace .. # Exception| lib.py.utils.CmdExitFailure: Command failed: false # Exception| STDOUT: b'' # Exception| STDERR: b'' Reviewed-by: Petr Machata Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260223202633.4126087-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib/py/utils.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 85884f3e827b..8fa1c2fabfc2 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -159,8 +159,11 @@ class bkg(cmd): return self def __exit__(self, ex_type, ex_value, ex_tb): - # Force termination on exception - terminate = self.terminate or (self._exit_wait and ex_type is not None) + terminate = self.terminate + # Force termination on exception, but only if bkg() didn't already exit + # since forcing termination silences failures with fail=None + if self.proc.poll() is None: + terminate = terminate or (self._exit_wait and ex_type is not None) return self.process(terminate=terminate, fail=self.check_fail) From 04abab18e1205a9e2037fb95f334b80cf74f89c9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Feb 2026 12:26:32 -0800 Subject: [PATCH 0029/1561] selftests: net: py: use repr(cmd) for failure exceptions Reuse repr(cmd) instead of manually formatting a similar string. Before: # Exception| lib.py.utils.CmdExitFailure: Command failed: false # Exception| STDOUT: b'' # Exception| STDERR: b'' After: # Exception| lib.py.utils.CmdExitFailure: Command failed # Exception| CMD: false # Exception| EXIT: 1 Reviewed-by: Petr Machata Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260223202633.4126087-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib/py/utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 8fa1c2fabfc2..52d98ca139ff 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -11,7 +11,7 @@ import time class CmdExitFailure(Exception): def __init__(self, msg, cmd_obj): - super().__init__(msg) + super().__init__(msg + "\n" + repr(cmd_obj)) self.cmd = cmd_obj @@ -98,8 +98,7 @@ class cmd: if self.proc.returncode != 0 and fail: if len(stderr) > 0 and stderr[-1] == "\n": stderr = stderr[:-1] - raise CmdExitFailure("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" % - (self.proc.args, stdout, stderr), self) + raise CmdExitFailure("Command failed", self) def __repr__(self): def str_fmt(name, s): From 6e4dff20021ad280bc9c8e047239707da7689431 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Feb 2026 12:26:33 -0800 Subject: [PATCH 0030/1561] selftests: net: py: add cmd info for ksft_wait failure Gal recently complained: When [ksft_wait failure] happens, the test fails with a cryptic message: # Exception| Exception: Did not receive ready message Let's try to include the stdout/stderr of the command we tried to start. E.g. for cmd("false", ksft_wait=True): # Exception| lib.py.utils.CmdInitFailure: Did not receive ready message # Exception| CMD: false # Exception| EXIT: 1 We need to factor out _process_terminate() otherwise the exit path may try to write to already disconnected self.ksft_term_fd. Reviewed-by: Petr Machata Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260223202633.4126087-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib/py/utils.py | 30 ++++++++++++++++----- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 52d98ca139ff..09535346c13d 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -9,7 +9,15 @@ import subprocess import time +class CmdInitFailure(Exception): + """ Command failed to start. Only raised by bkg(). """ + def __init__(self, msg, cmd_obj): + super().__init__(msg + "\n" + repr(cmd_obj)) + self.cmd = cmd_obj + + class CmdExitFailure(Exception): + """ Command failed (returned non-zero exit code). """ def __init__(self, msg, cmd_obj): super().__init__(msg + "\n" + repr(cmd_obj)) self.cmd = cmd_obj @@ -76,16 +84,13 @@ class cmd: msg = fd_read_timeout(rfd, ksft_wait) os.close(rfd) if not msg: - raise Exception("Did not receive ready message") + terminate = self.proc.poll() is None + self._process_terminate(terminate=terminate, timeout=1) + raise CmdInitFailure("Did not receive ready message", self) if not background: self.process(terminate=False, fail=fail, timeout=timeout) - def process(self, terminate=True, fail=None, timeout=5): - if fail is None: - fail = not terminate - - if self.ksft_term_fd: - os.write(self.ksft_term_fd, b"1") + def _process_terminate(self, terminate, timeout): if terminate: self.proc.terminate() stdout, stderr = self.proc.communicate(timeout) @@ -95,6 +100,17 @@ class cmd: self.proc.stderr.close() self.ret = self.proc.returncode + return stdout, stderr + + def process(self, terminate=True, fail=None, timeout=5): + if fail is None: + fail = not terminate + + if self.ksft_term_fd: + os.write(self.ksft_term_fd, b"1") + + stdout, stderr = self._process_terminate(terminate=terminate, + timeout=timeout) if self.proc.returncode != 0 and fail: if len(stderr) > 0 and stderr[-1] == "\n": stderr = stderr[:-1] From d2adf01780b8957b31f4954d56646b02de956f65 Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Mon, 23 Feb 2026 17:41:41 -0800 Subject: [PATCH 0031/1561] net: freescale: ucc_geth: call of_node_put once Move it up to avoid placing it in both the error and success paths. Signed-off-by: Rosen Penev Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260224014141.352642-1-rosenp@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/ucc_geth.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 131d1210dc4a..a4eb71dcd6c5 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3470,14 +3470,13 @@ static int ucc_geth_probe(struct platform_device* ofdev) phy_node = of_parse_phandle(np, "phy-handle", 0); if (phy_node) { prop = of_get_property(phy_node, "interface", NULL); + of_node_put(phy_node); if (prop) { dev_err(&ofdev->dev, "Device-tree property 'interface' is no longer supported. Please use 'phy-connection-type' instead."); - of_node_put(phy_node); err = -EINVAL; goto err_put_tbi; } - of_node_put(phy_node); } err = of_get_phy_mode(np, &phy_interface); From 8215d7cbfbcf7f40ad409f5217d88c7abce76fe0 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 23 Feb 2026 12:17:13 +0000 Subject: [PATCH 0032/1561] net: stmmac: fix EEE supportable interfaces According to the dwmac v3.74a databook, only MII, GMII and RGMII dwmac interface modes are supported for EEE. Restrict EEE to these modes, or the modules supported by a PCS other than the GMAC's integrated PCS. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vuUsD-0000000Afci-0XxO@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d7c730179a7f..b18405626aa5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1375,9 +1375,21 @@ static int stmmac_phylink_setup(struct stmmac_priv *priv) pcs->supported_interfaces); if (priv->dma_cap.eee) { - /* Assume all supported interfaces also support LPI */ - memcpy(config->lpi_interfaces, config->supported_interfaces, - sizeof(config->lpi_interfaces)); + /* The GMAC 3.74a databook states that EEE is only supported + * in MII, GMII, and RGMII interfaces. + */ + __set_bit(PHY_INTERFACE_MODE_MII, config->lpi_interfaces); + __set_bit(PHY_INTERFACE_MODE_GMII, config->lpi_interfaces); + phy_interface_set_rgmii(config->lpi_interfaces); + + /* If we have a non-integrated PCS, assume that it is connected + * to the GMAC using GMII or another EEE compatible interface, + * and thus all PCS-supported interfaces support LPI. + */ + if (pcs) + phy_interface_or(config->lpi_interfaces, + config->lpi_interfaces, + pcs->supported_interfaces); /* All full duplex speeds above 100Mbps are supported */ config->lpi_capabilities = ~(MAC_1000FD - 1) | MAC_100FD; From 6698d6ce6a6b20d8780ade93b9b689787b5a4945 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 23 Feb 2026 12:40:30 -0800 Subject: [PATCH 0033/1561] selftests: hw-net: tso: set a TCP window clamp to avoid spurious drops The TSO test wants to make sure that there isn't a lot of retransmits, because that could indicate that device has a buggy TSO implementation. On debug kernels, however, we're likely to see significant packet loss because we simply overwhelm the receiver. In a QEMU loop with virtio devices we see ~10% false positive rate with occasional run hitting the threshold of 25% packet loss. Since we're only sending 4MB of data, set a TCP_WINDOW_CLAMP to 200k. This seems to make virtio happy while having little impact since we're primarily interested in testing the sender, and the test doesn't currently enable BIG TCP. Running socat over virtio loop for 2 sec on a debug kernel shows: TcpOutSegs 27327 0.0 TcpRetransSegs 83 0.0 TcpOutSegs 30012 0.0 TcpRetransSegs 80 0.0 TcpOutSegs 28767 0.0 TcpRetransSegs 77 0.0 But with the clamp the 3 attempts show no retransmit: TcpOutSegs 31537 0.0 TcpRetransSegs 0 0.0 TcpOutSegs 30323 0.0 TcpRetransSegs 0 0.0 TcpOutSegs 28700 0.0 TcpRetransSegs 0 0.0 Since we expect no receiver-related drops now we can significantly increase test's sensitivity to drops. All the testing we do in NIPA uses cubic. Link: https://patch.msgid.link/20260223204030.4142884-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/tso.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py index 0998e68ebaf0..bb675e3dac88 100755 --- a/tools/testing/selftests/drivers/net/hw/tso.py +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -36,8 +36,11 @@ def tcp_sock_get_retrans(sock): def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso): cfg.require_cmd("socat", local=False, remote=True) + # Set recv window clamp to avoid overwhelming receiver on debug kernels + # the 200k clamp should still let use reach > 15Gbps on real HW port = rand_port() - listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof" + listen_opts = f"{port},reuseport,tcp-window-clamp=200000" + listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{listen_opts} /dev/null,ignoreeof" with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc: wait_port_listen(port, host=cfg.remote) @@ -68,7 +71,7 @@ def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso): # Make sure we have order of magnitude more LSO packets than # retransmits, in case TCP retransmitted all the LSO packets. - ksft_lt(tcp_sock_get_retrans(sock), total_lso_wire / 4) + ksft_lt(tcp_sock_get_retrans(sock), total_lso_wire / 16) sock.close() if should_lso: From dc2a1facbde80b388d5052c572577d74eb4da69f Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Mon, 23 Feb 2026 17:46:06 -0800 Subject: [PATCH 0034/1561] net: fs_enet: allow nvmem to override MAC address NVMEM typically loads after the ethernet driver and of_get_ethdev_address returns -EPROBE_DEFER. return in such a case to allow NVMEM to work. Signed-off-by: Rosen Penev Link: https://patch.msgid.link/20260224014607.353378-1-rosenp@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index f563692a4a00..194edf3c43c1 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -951,7 +951,9 @@ static int fs_enet_probe(struct platform_device *ofdev) spin_lock_init(&fep->lock); spin_lock_init(&fep->tx_lock); - of_get_ethdev_address(ofdev->dev.of_node, ndev); + ret = of_get_ethdev_address(ofdev->dev.of_node, ndev); + if (ret == -EPROBE_DEFER) + goto out_cleanup_data; ret = fep->ops->allocate_bd(ndev); if (ret) From 51432958b5655e9f848b559ca59ae1a7f00a9166 Mon Sep 17 00:00:00 2001 From: kexinsun Date: Tue, 24 Feb 2026 10:07:20 +0800 Subject: [PATCH 0035/1561] rds: update outdated comment The function rds_send_reset() was subsumed by rds_send_path_reset() by commit d769ef81d5b5 ("RDS: Update rds_conn_shutdown to work with rds_conn_path"). Update the comment accordingly. Signed-off-by: kexinsun Link: https://patch.msgid.link/20260224020720.1174-1-kexinsun@smail.nju.edu.cn Signed-off-by: Jakub Kicinski --- net/rds/send.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/send.c b/net/rds/send.c index a1039e422a38..d8b14ff9d366 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -284,7 +284,7 @@ restart: * * cp_xmit_rm holds a ref while we're sending this message down * the connection. We can use this ref while holding the - * send_sem.. rds_send_reset() is serialized with it. + * send_sem.. rds_send_path_reset() is serialized with it. */ if (!rm) { unsigned int len; From 819101c3c158f6e220be9a9ded3c48ff5701cf42 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 24 Feb 2026 09:01:50 +0000 Subject: [PATCH 0036/1561] net: stmmac: use circ_buf helpers for descriptors The stmmac descriptor queues are circular buffers, operated as far as the hardware is concerned as either a ring, or a chain that loops back on itself. From the software perspective, it forms a circular buffer. We have a few places which calculate the number of in-use and free entries in these circular buffers, for which we have macros for. Use CIRC_CNT() and CIRC_SPACE() as appropriate to calculate these values. Validating, for stmmac_tx_avail(), which uses CIRC_SPACE(): dirty_tx = 1, cur_tx = 0 -> 0 dirty_tx = 0, cur_tx = 0 -> dma_tx_size - 1 dirty_tx = 0, cur_tx = 1 -> dma_tx_size - 2 dirty_tx passed as end, reduced by one. cur_tx passed as start. Output on sane computers is identical. For stmmac_rx_dirty(), which uses CIRC_CNT(): dirty_rx = 1, cur_rx = 0 -> dma_rx_size - 1 dirty_rx = 0, cur_rx = 0 -> 0 dirty_rx = 0, cur_rx = 1 -> 1 dirty_rx passed as start, cur_rx passed as end. Output is identical. Same validation performed on the is_last_segment calculation, which also gets converted to CIRC_CNT(). Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Link: https://patch.msgid.link/E1vuoIg-0000000Aout-0LyS@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 23 ++++++------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b18405626aa5..1a9fdde281f8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -14,6 +14,7 @@ https://bugzilla.stlinux.com/ *******************************************************************************/ +#include #include #include #include @@ -355,14 +356,9 @@ static void print_pkt(unsigned char *buf, int len) static inline u32 stmmac_tx_avail(struct stmmac_priv *priv, u32 queue) { struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue]; - u32 avail; - if (tx_q->dirty_tx > tx_q->cur_tx) - avail = tx_q->dirty_tx - tx_q->cur_tx - 1; - else - avail = priv->dma_conf.dma_tx_size - tx_q->cur_tx + tx_q->dirty_tx - 1; - - return avail; + return CIRC_SPACE(tx_q->cur_tx, tx_q->dirty_tx, + priv->dma_conf.dma_tx_size); } /** @@ -373,14 +369,9 @@ static inline u32 stmmac_tx_avail(struct stmmac_priv *priv, u32 queue) static inline u32 stmmac_rx_dirty(struct stmmac_priv *priv, u32 queue) { struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[queue]; - u32 dirty; - if (rx_q->dirty_rx <= rx_q->cur_rx) - dirty = rx_q->cur_rx - rx_q->dirty_rx; - else - dirty = priv->dma_conf.dma_rx_size - rx_q->dirty_rx + rx_q->cur_rx; - - return dirty; + return CIRC_CNT(rx_q->cur_rx, rx_q->dirty_rx, + priv->dma_conf.dma_rx_size); } static bool stmmac_eee_tx_busy(struct stmmac_priv *priv) @@ -4584,8 +4575,8 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) /* If we only have one entry used, then the first entry is the last * segment. */ - is_last_segment = ((tx_q->cur_tx - first_entry) & - (priv->dma_conf.dma_tx_size - 1)) == 1; + is_last_segment = CIRC_CNT(tx_q->cur_tx, first_entry, + priv->dma_conf.dma_tx_size) == 1; /* Complete the first descriptor before granting the DMA */ stmmac_prepare_tso_tx_desc(priv, first, 1, proto_hdr_len, 0, 1, From dd53a0e85969c6b2a5a4e4e46ba05b3187231017 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 24 Feb 2026 09:01:55 +0000 Subject: [PATCH 0037/1561] net: stmmac: fix transmit interrupt coalescing The accounting for transmit frames does not count the descriptors correctly. It uses: tx_packets = (tx_q->cur_tx + 1) - first_tx; however, these are indexes into a circular buffer, so cur_tx can be less than first_tx, and when that happens, tx_packets becomes a very large unsigned integer. When this is added to tx_q->tx_count_frames, it has the effect of reducing the count of frames, possibly causing it to also wrap to a very large unsigned integer. Fix this by using CIRC_CNT() to calculate the number of descriptors used. Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Link: https://patch.msgid.link/E1vuoIl-0000000Aouz-0ttb@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 1a9fdde281f8..7bf3a8667999 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4516,7 +4516,8 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) tx_q->tx_skbuff_dma[tx_q->cur_tx].buf_type = STMMAC_TXBUF_T_SKB; /* Manage tx mitigation */ - tx_packets = (tx_q->cur_tx + 1) - first_tx; + tx_packets = CIRC_CNT(tx_q->cur_tx + 1, first_tx, + priv->dma_conf.dma_tx_size); tx_q->tx_count_frames += tx_packets; if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && priv->hwts_tx_en) @@ -4794,7 +4795,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) * This approach takes care about the fragments: desc is the first * element in case of no SG. */ - tx_packets = (entry + 1) - first_tx; + tx_packets = CIRC_CNT(entry + 1, first_tx, priv->dma_conf.dma_tx_size); tx_q->tx_count_frames += tx_packets; if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && priv->hwts_tx_en) From 45ce4b753a5047c2e61c45c9d8d878b835b39c14 Mon Sep 17 00:00:00 2001 From: Nicolai Buchwitz Date: Tue, 24 Feb 2026 15:57:23 +0100 Subject: [PATCH 0038/1561] net: cadence: macb: add ethtool nway_reset support Wire phy_ethtool_nway_reset() as the .nway_reset ethtool operation, allowing userspace to restart PHY autonegotiation via 'ethtool -r'. Signed-off-by: Nicolai Buchwitz Link: https://patch.msgid.link/20260224145723.49450-1-nb@tipi-net.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 43cd013bb70e..0b450b72d09c 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3945,6 +3945,7 @@ static const struct ethtool_ops gem_ethtool_ops = { .get_rxnfc = gem_get_rxnfc, .set_rxnfc = gem_set_rxnfc, .get_rx_ring_count = gem_get_rx_ring_count, + .nway_reset = phy_ethtool_nway_reset, }; static int macb_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) From 8debe7a22309497f42fb963494833b9abd446328 Mon Sep 17 00:00:00 2001 From: Abhilekh Deka Date: Tue, 24 Feb 2026 21:06:01 +0530 Subject: [PATCH 0039/1561] net/ibmveth: fix comment typos in ibmveth.c Correct spelling mistakes in comments: - Fix misspelling of gro_receive - Fix misspelling of Partition Signed-off-by: Abhilekh Deka Reviewed-by: Dave Marquardt Link: https://patch.msgid.link/20260224153601.17534-1-abhindeka@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmveth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 6f0821f1e798..745dc89f0e49 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -355,7 +355,7 @@ hcall_failure: /* * If multi rx buffers hcall is no longer supported by FW - * e.g. in the case of Live Parttion Migration + * e.g. in the case of Live Partition Migration */ if (batch > 1 && lpar_rc == H_FUNCTION) { /* @@ -480,7 +480,7 @@ static int ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, */ if (!reuse) { /* remove the skb pointer to mark free. actual freeing is done - * by upper level networking after gro_recieve + * by upper level networking after gro_receive */ adapter->rx_buff_pool[pool].skbuff[index] = NULL; From 7717fbb14028be5735acb911aeb7553b7c662418 Mon Sep 17 00:00:00 2001 From: Eric Woudstra Date: Tue, 24 Feb 2026 16:50:30 +0100 Subject: [PATCH 0040/1561] net: pppoe: avoid zero-length arrays in struct pppoe_hdr Jakub Kicinski reported following issue in upcoming patches: W=1 C=1 GCC build gives us: net/bridge/netfilter/nf_conntrack_bridge.c: note: in included file (through ../include/linux/if_pppox.h, ../include/uapi/linux/netfilter_bridge.h, ../include/linux/netfilter_bridge.h): include/uapi/linux/if_pppox.h: 153:29: warning: array of flexible structures sparse doesn't like that hdr has a zero-length array which overlaps proto. The kernel code doesn't currently need those arrays. PPPoE connection is functional after applying this patch. Reviewed-by: Nikolay Aleksandrov Reviewed-by: Kees Cook Signed-off-by: Eric Woudstra Link: https://patch.msgid.link/20260224155030.106918-1-ericwouds@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ppp/pppoe.c | 2 +- include/uapi/linux/if_pppox.h | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 4275b393a454..7900cc3212a5 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -885,7 +885,7 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m, skb->protocol = cpu_to_be16(ETH_P_PPP_SES); ph = skb_put(skb, total_len + sizeof(struct pppoe_hdr)); - start = (char *)&ph->tag[0]; + start = (char *)ph + sizeof(*ph); error = memcpy_from_msg(start, m, total_len); if (error < 0) { diff --git a/include/uapi/linux/if_pppox.h b/include/uapi/linux/if_pppox.h index 9abd80dcc46f..29b804aa7474 100644 --- a/include/uapi/linux/if_pppox.h +++ b/include/uapi/linux/if_pppox.h @@ -122,7 +122,9 @@ struct sockaddr_pppol2tpv3in6 { struct pppoe_tag { __be16 tag_type; __be16 tag_len; +#ifndef __KERNEL__ char tag_data[]; +#endif } __attribute__ ((packed)); /* Tag identifiers */ @@ -150,7 +152,9 @@ struct pppoe_hdr { __u8 code; __be16 sid; __be16 length; +#ifndef __KERNEL__ struct pppoe_tag tag[]; +#endif } __packed; /* Length of entire PPPoE + PPP header */ From 74455a5b4326add2499cb4a1f9706154b3a1eab4 Mon Sep 17 00:00:00 2001 From: Jiejian Wu Date: Tue, 24 Feb 2026 21:50:40 +0100 Subject: [PATCH 0041/1561] ipvs: make ip_vs_svc_table and ip_vs_svc_fwm_table per netns Current ipvs uses one global mutex "__ip_vs_mutex" to keep the global "ip_vs_svc_table" and "ip_vs_svc_fwm_table" safe. But when there are tens of thousands of services from different netns in the table, it takes a long time to look up the table, for example, using "ipvsadm -ln" from different netns simultaneously. We make "ip_vs_svc_table" and "ip_vs_svc_fwm_table" per netns, and we add "service_mutex" per netns to keep these two tables safe instead of the global "__ip_vs_mutex" in current version. To this end, looking up services from different netns simultaneously will not get stuck, shortening the time consumption in large-scale deployment. It can be reproduced using the simple scripts below. init.sh: #!/bin/bash for((i=1;i<=4;i++));do ip netns add ns$i ip netns exec ns$i ip link set dev lo up ip netns exec ns$i sh add-services.sh done add-services.sh: #!/bin/bash for((i=0;i<30000;i++)); do ipvsadm -A -t 10.10.10.10:$((80+$i)) -s rr done runtest.sh: #!/bin/bash for((i=1;i<4;i++));do ip netns exec ns$i ipvsadm -ln > /dev/null & done ip netns exec ns4 ipvsadm -ln > /dev/null Run "sh init.sh" to initiate the network environment. Then run "time ./runtest.sh" to evaluate the time consumption. Our testbed is a 4-core Intel Xeon ECS. The result of the original version is around 8 seconds, while the result of the modified version is only 0.8 seconds. Signed-off-by: Jiejian Wu Co-developed-by: Dust Li Signed-off-by: Dust Li Signed-off-by: Julian Anastasov Signed-off-by: Florian Westphal Link: https://patch.msgid.link/20260224205048.4718-2-fw@strlen.de Signed-off-by: Jakub Kicinski --- include/net/ip_vs.h | 13 +++ net/netfilter/ipvs/ip_vs_ctl.c | 167 ++++++++++++++------------------- net/netfilter/ipvs/ip_vs_est.c | 18 ++-- 3 files changed, 94 insertions(+), 104 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 29a36709e7f3..074a204ec6db 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -33,6 +33,12 @@ #define IP_VS_HDR_INVERSE 1 #define IP_VS_HDR_ICMP 2 +/* + * Hash table: for virtual service lookups + */ +#define IP_VS_SVC_TAB_BITS 8 +#define IP_VS_SVC_TAB_SIZE BIT(IP_VS_SVC_TAB_BITS) +#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) /* Generic access of ipvs struct */ static inline struct netns_ipvs *net_ipvs(struct net* net) @@ -1041,6 +1047,13 @@ struct netns_ipvs { */ unsigned int mixed_address_family_dests; unsigned int hooks_afmask; /* &1=AF_INET, &2=AF_INET6 */ + + /* the service mutex that protect svc_table and svc_fwm_table */ + struct mutex service_mutex; + /* the service table hashed by */ + struct hlist_head svc_table[IP_VS_SVC_TAB_SIZE]; + /* the service table hashed by fwmark */ + struct hlist_head svc_fwm_table[IP_VS_SVC_TAB_SIZE]; }; #define DEFAULT_SYNC_THRESHOLD 3 diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 068702894377..d871273ce917 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -48,7 +48,7 @@ MODULE_ALIAS_GENL_FAMILY(IPVS_GENL_NAME); -DEFINE_MUTEX(__ip_vs_mutex); /* Serialize configuration with sockopt/netlink */ +static struct lock_class_key __ipvs_service_key; /* sysctl variables */ @@ -293,17 +293,6 @@ ip_vs_use_count_dec(void) } -/* - * Hash table: for virtual service lookups - */ -#define IP_VS_SVC_TAB_BITS 8 -#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS) -#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) - -/* the service table hashed by */ -static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; -/* the service table hashed by fwmark */ -static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; /* @@ -338,8 +327,8 @@ static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 } /* - * Hashes a service in the ip_vs_svc_table by - * or in the ip_vs_svc_fwm_table by fwmark. + * Hashes a service in the svc_table by + * or in the svc_fwm_table by fwmark. * Should be called with locked tables. */ static int ip_vs_svc_hash(struct ip_vs_service *svc) @@ -354,17 +343,17 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) if (svc->fwmark == 0) { /* - * Hash it by in ip_vs_svc_table + * Hash it by in svc_table */ hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol, &svc->addr, svc->port); - hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]); + hlist_add_head_rcu(&svc->s_list, &svc->ipvs->svc_table[hash]); } else { /* * Hash it by fwmark in svc_fwm_table */ hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark); - hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]); + hlist_add_head_rcu(&svc->f_list, &svc->ipvs->svc_fwm_table[hash]); } svc->flags |= IP_VS_SVC_F_HASHED; @@ -413,12 +402,9 @@ __ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol, /* Check for "full" addressed entries */ hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport); - hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) { - if ((svc->af == af) - && ip_vs_addr_equal(af, &svc->addr, vaddr) - && (svc->port == vport) - && (svc->protocol == protocol) - && (svc->ipvs == ipvs)) { + hlist_for_each_entry_rcu(svc, &ipvs->svc_table[hash], s_list) { + if (svc->af == af && ip_vs_addr_equal(af, &svc->addr, vaddr) && + svc->port == vport && svc->protocol == protocol) { /* HIT */ return svc; } @@ -440,9 +426,8 @@ __ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark) /* Check for fwmark addressed entries */ hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark); - hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) { - if (svc->fwmark == fwmark && svc->af == af - && (svc->ipvs == ipvs)) { + hlist_for_each_entry_rcu(svc, &ipvs->svc_fwm_table[hash], f_list) { + if (svc->fwmark == fwmark && svc->af == af) { /* HIT */ return svc; } @@ -1701,10 +1686,9 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) * Flush the service table hashed by */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx], + hlist_for_each_entry_safe(svc, n, &ipvs->svc_table[idx], s_list) { - if (svc->ipvs == ipvs) - ip_vs_unlink_service(svc, cleanup); + ip_vs_unlink_service(svc, cleanup); } } @@ -1712,10 +1696,9 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) * Flush the service table hashed by fwmark */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx], + hlist_for_each_entry_safe(svc, n, &ipvs->svc_fwm_table[idx], f_list) { - if (svc->ipvs == ipvs) - ip_vs_unlink_service(svc, cleanup); + ip_vs_unlink_service(svc, cleanup); } } @@ -1732,12 +1715,12 @@ void ip_vs_service_nets_cleanup(struct list_head *net_list) struct net *net; /* Check for "full" addressed entries */ - mutex_lock(&__ip_vs_mutex); list_for_each_entry(net, net_list, exit_list) { ipvs = net_ipvs(net); + mutex_lock(&ipvs->service_mutex); ip_vs_flush(ipvs, true); + mutex_unlock(&ipvs->service_mutex); } - mutex_unlock(&__ip_vs_mutex); } /* Put all references for device (dst_cache) */ @@ -1775,25 +1758,20 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, if (event != NETDEV_DOWN || !ipvs) return NOTIFY_DONE; IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { - if (svc->ipvs == ipvs) { - list_for_each_entry(dest, &svc->destinations, - n_list) { - ip_vs_forget_dev(dest, dev); - } + hlist_for_each_entry(svc, &ipvs->svc_table[idx], s_list) { + list_for_each_entry(dest, &svc->destinations, + n_list) { + ip_vs_forget_dev(dest, dev); } } - hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { - if (svc->ipvs == ipvs) { - list_for_each_entry(dest, &svc->destinations, - n_list) { - ip_vs_forget_dev(dest, dev); - } + hlist_for_each_entry(svc, &ipvs->svc_fwm_table[idx], f_list) { + list_for_each_entry(dest, &svc->destinations, + n_list) { + ip_vs_forget_dev(dest, dev); } - } } @@ -1802,7 +1780,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, ip_vs_forget_dev(dest, dev); } spin_unlock_bh(&ipvs->dest_trash_lock); - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); return NOTIFY_DONE; } @@ -1826,16 +1804,14 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs) struct ip_vs_service *svc; for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { - if (svc->ipvs == ipvs) - ip_vs_zero_service(svc); + hlist_for_each_entry(svc, &ipvs->svc_table[idx], s_list) { + ip_vs_zero_service(svc); } } for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { - if (svc->ipvs == ipvs) - ip_vs_zero_service(svc); + hlist_for_each_entry(svc, &ipvs->svc_fwm_table[idx], f_list) { + ip_vs_zero_service(svc); } } @@ -2306,9 +2282,9 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) /* look in hash by protocol */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) { - if ((svc->ipvs == ipvs) && pos-- == 0) { - iter->table = ip_vs_svc_table; + hlist_for_each_entry_rcu(svc, &ipvs->svc_table[idx], s_list) { + if (pos-- == 0) { + iter->table = ipvs->svc_table; iter->bucket = idx; return svc; } @@ -2317,10 +2293,10 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) /* keep looking in fwmark */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx], + hlist_for_each_entry_rcu(svc, &ipvs->svc_fwm_table[idx], f_list) { - if ((svc->ipvs == ipvs) && pos-- == 0) { - iter->table = ip_vs_svc_fwm_table; + if (pos-- == 0) { + iter->table = ipvs->svc_fwm_table; iter->bucket = idx; return svc; } @@ -2343,6 +2319,8 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct hlist_node *e; struct ip_vs_iter *iter; struct ip_vs_service *svc; + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); ++*pos; if (v == SEQ_START_TOKEN) @@ -2351,7 +2329,7 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) svc = v; iter = seq->private; - if (iter->table == ip_vs_svc_table) { + if (iter->table == ipvs->svc_table) { /* next service in table hashed by protocol */ e = rcu_dereference(hlist_next_rcu(&svc->s_list)); if (e) @@ -2359,13 +2337,13 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { hlist_for_each_entry_rcu(svc, - &ip_vs_svc_table[iter->bucket], + &ipvs->svc_table[iter->bucket], s_list) { return svc; } } - iter->table = ip_vs_svc_fwm_table; + iter->table = ipvs->svc_fwm_table; iter->bucket = -1; goto scan_fwmark; } @@ -2378,7 +2356,7 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) scan_fwmark: while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { hlist_for_each_entry_rcu(svc, - &ip_vs_svc_fwm_table[iter->bucket], + &ipvs->svc_fwm_table[iter->bucket], f_list) return svc; } @@ -2414,7 +2392,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) if (svc->ipvs != ipvs) return 0; - if (iter->table == ip_vs_svc_table) { + if (iter->table == ipvs->svc_table) { #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) seq_printf(seq, "%s [%pI6]:%04X %s ", @@ -2736,7 +2714,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len) return ret; } - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); if (cmd == IP_VS_SO_SET_FLUSH) { /* Flush the virtual service */ ret = ip_vs_flush(ipvs, false); @@ -2833,7 +2811,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len) } out_unlock: - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); return ret; } @@ -2871,9 +2849,9 @@ __ip_vs_get_service_entries(struct netns_ipvs *ipvs, int ret = 0; for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + hlist_for_each_entry(svc, &ipvs->svc_table[idx], s_list) { /* Only expose IPv4 entries to old interface */ - if (svc->af != AF_INET || (svc->ipvs != ipvs)) + if (svc->af != AF_INET) continue; if (count >= get->num_services) @@ -2890,9 +2868,9 @@ __ip_vs_get_service_entries(struct netns_ipvs *ipvs, } for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + hlist_for_each_entry(svc, &ipvs->svc_fwm_table[idx], f_list) { /* Only expose IPv4 entries to old interface */ - if (svc->af != AF_INET || (svc->ipvs != ipvs)) + if (svc->af != AF_INET) continue; if (count >= get->num_services) @@ -3061,7 +3039,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); switch (cmd) { case IP_VS_SO_GET_VERSION: { @@ -3160,7 +3138,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) } out: - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); return ret; } @@ -3395,10 +3373,10 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, struct net *net = sock_net(skb->sk); struct netns_ipvs *ipvs = net_ipvs(net); - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { - hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { - if (++idx <= start || (svc->ipvs != ipvs)) + hlist_for_each_entry(svc, &ipvs->svc_table[i], s_list) { + if (++idx <= start) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { idx--; @@ -3408,8 +3386,8 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, } for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { - hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { - if (++idx <= start || (svc->ipvs != ipvs)) + hlist_for_each_entry(svc, &ipvs->svc_fwm_table[i], f_list) { + if (++idx <= start) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { idx--; @@ -3419,7 +3397,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, } nla_put_failure: - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); cb->args[0] = idx; return skb->len; @@ -3608,7 +3586,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, struct net *net = sock_net(skb->sk); struct netns_ipvs *ipvs = net_ipvs(net); - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); /* Try to find the service for which to dump destinations */ if (nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy, cb->extack)) @@ -3633,7 +3611,7 @@ nla_put_failure: cb->args[0] = idx; out_err: - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); return skb->len; } @@ -3916,7 +3894,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) cmd = info->genlhdr->cmd; - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); if (cmd == IPVS_CMD_FLUSH) { ret = ip_vs_flush(ipvs, false); @@ -4028,7 +4006,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) } out: - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); return ret; } @@ -4058,7 +4036,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd); if (reply == NULL) @@ -4126,7 +4104,7 @@ nla_put_failure: out_err: nlmsg_free(msg); out: - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); return ret; } @@ -4243,6 +4221,7 @@ static struct genl_family ip_vs_genl_family __ro_after_init = { .small_ops = ip_vs_genl_ops, .n_small_ops = ARRAY_SIZE(ip_vs_genl_ops), .resv_start_op = IPVS_CMD_FLUSH + 1, + .parallel_ops = 1, }; static int __init ip_vs_genl_register(void) @@ -4425,6 +4404,13 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) int ret = -ENOMEM; int idx; + /* Initialize service_mutex, svc_table, svc_fwm_table per netns */ + __mutex_init(&ipvs->service_mutex, "ipvs->service_mutex", &__ipvs_service_key); + for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { + INIT_HLIST_HEAD(&ipvs->svc_table[idx]); + INIT_HLIST_HEAD(&ipvs->svc_fwm_table[idx]); + } + /* Initialize rs_table */ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) INIT_HLIST_HEAD(&ipvs->rs_table[idx]); @@ -4529,17 +4515,8 @@ void ip_vs_unregister_nl_ioctl(void) int __init ip_vs_control_init(void) { - int idx; int ret; - /* Initialize svc_table, ip_vs_svc_fwm_table */ - for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - INIT_HLIST_HEAD(&ip_vs_svc_table[idx]); - INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]); - } - - smp_wmb(); /* Do we really need it now ? */ - ret = register_netdevice_notifier(&ip_vs_dst_notifier); if (ret < 0) return ret; diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 77f4f637ff67..dc207172ca9f 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -602,7 +602,7 @@ static void ip_vs_est_drain_temp_list(struct netns_ipvs *ipvs) while (1) { int max = 16; - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); while (max-- > 0) { est = hlist_entry_safe(ipvs->est_temp_list.first, @@ -622,12 +622,12 @@ static void ip_vs_est_drain_temp_list(struct netns_ipvs *ipvs) } goto unlock; } - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); cond_resched(); } unlock: - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); } /* Calculate limits for all kthreads */ @@ -647,9 +647,9 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max) u64 val; INIT_HLIST_HEAD(&chain); - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); kd = ipvs->est_kt_arr[0]; - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); s = kd ? kd->calc_stats : NULL; if (!s) goto out; @@ -748,7 +748,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs) if (!ip_vs_est_calc_limits(ipvs, &chain_max)) return; - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); /* Stop all other tasks, so that we can immediately move the * estimators to est_temp_list without RCU grace period @@ -815,9 +815,9 @@ walk_chain: /* Give chance estimators to be added (to est_temp_list) * and deleted (releasing kthread contexts) */ - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); cond_resched(); - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); /* Current kt released ? */ if (id >= ipvs->est_kt_count) @@ -893,7 +893,7 @@ unlock2: mutex_unlock(&ipvs->est_mutex); unlock: - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); } void ip_vs_zero_estimator(struct ip_vs_stats *stats) From 3de0ec2873eac27ba033e867f5da23e081929c8f Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 24 Feb 2026 21:50:41 +0100 Subject: [PATCH 0042/1561] ipvs: some service readers can use RCU Some places walk the services under mutex but they can just use RCU: * ip_vs_dst_event() uses ip_vs_forget_dev() which uses its own lock to modify dest * ip_vs_genl_dump_services(): ip_vs_genl_fill_service() just fills skb * ip_vs_genl_parse_service(): move RCU lock to callers ip_vs_genl_set_cmd(), ip_vs_genl_dump_dests() and ip_vs_genl_get_cmd() * ip_vs_genl_dump_dests(): just fill skb Signed-off-by: Julian Anastasov Reviewed-by: Dust Li Signed-off-by: Florian Westphal Link: https://patch.msgid.link/20260224205048.4718-3-fw@strlen.de Signed-off-by: Jakub Kicinski --- net/netfilter/ipvs/ip_vs_ctl.c | 47 +++++++++++++++++----------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d871273ce917..b9eaf048a29f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1758,23 +1758,21 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, if (event != NETDEV_DOWN || !ipvs) return NOTIFY_DONE; IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); - mutex_lock(&ipvs->service_mutex); + rcu_read_lock(); for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry(svc, &ipvs->svc_table[idx], s_list) { - list_for_each_entry(dest, &svc->destinations, - n_list) { + hlist_for_each_entry_rcu(svc, &ipvs->svc_table[idx], s_list) + list_for_each_entry_rcu(dest, &svc->destinations, + n_list) ip_vs_forget_dev(dest, dev); - } - } - hlist_for_each_entry(svc, &ipvs->svc_fwm_table[idx], f_list) { - list_for_each_entry(dest, &svc->destinations, - n_list) { + hlist_for_each_entry_rcu(svc, &ipvs->svc_fwm_table[idx], f_list) + list_for_each_entry_rcu(dest, &svc->destinations, + n_list) ip_vs_forget_dev(dest, dev); - } - } } + rcu_read_unlock(); + mutex_lock(&ipvs->service_mutex); spin_lock_bh(&ipvs->dest_trash_lock); list_for_each_entry(dest, &ipvs->dest_trash, t_list) { ip_vs_forget_dev(dest, dev); @@ -3317,9 +3315,9 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, goto nla_put_failure; } - sched = rcu_dereference_protected(svc->scheduler, 1); + sched = rcu_dereference(svc->scheduler); sched_name = sched ? sched->name : "none"; - pe = rcu_dereference_protected(svc->pe, 1); + pe = rcu_dereference(svc->pe); if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) || (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || @@ -3373,9 +3371,9 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, struct net *net = sock_net(skb->sk); struct netns_ipvs *ipvs = net_ipvs(net); - mutex_lock(&ipvs->service_mutex); + rcu_read_lock(); for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { - hlist_for_each_entry(svc, &ipvs->svc_table[i], s_list) { + hlist_for_each_entry_rcu(svc, &ipvs->svc_table[i], s_list) { if (++idx <= start) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { @@ -3386,7 +3384,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, } for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { - hlist_for_each_entry(svc, &ipvs->svc_fwm_table[i], f_list) { + hlist_for_each_entry_rcu(svc, &ipvs->svc_fwm_table[i], f_list) { if (++idx <= start) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { @@ -3397,7 +3395,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, } nla_put_failure: - mutex_unlock(&ipvs->service_mutex); + rcu_read_unlock(); cb->args[0] = idx; return skb->len; @@ -3453,13 +3451,11 @@ static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs, usvc->fwmark = 0; } - rcu_read_lock(); if (usvc->fwmark) svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark); else svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol, &usvc->addr, usvc->port); - rcu_read_unlock(); *ret_svc = svc; /* If a full entry was requested, check for the additional fields */ @@ -3586,7 +3582,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, struct net *net = sock_net(skb->sk); struct netns_ipvs *ipvs = net_ipvs(net); - mutex_lock(&ipvs->service_mutex); + rcu_read_lock(); /* Try to find the service for which to dump destinations */ if (nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy, cb->extack)) @@ -3598,7 +3594,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, goto out_err; /* Dump the destinations */ - list_for_each_entry(dest, &svc->destinations, n_list) { + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { if (++idx <= start) continue; if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) { @@ -3611,7 +3607,7 @@ nla_put_failure: cb->args[0] = idx; out_err: - mutex_unlock(&ipvs->service_mutex); + rcu_read_unlock(); return skb->len; } @@ -3914,9 +3910,12 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) need_full_svc = true; + /* We use function that requires RCU lock (hlist_bl) */ + rcu_read_lock(); ret = ip_vs_genl_parse_service(ipvs, &usvc, info->attrs[IPVS_CMD_ATTR_SERVICE], need_full_svc, &svc); + rcu_read_unlock(); if (ret) goto out; @@ -4036,7 +4035,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - mutex_lock(&ipvs->service_mutex); + rcu_read_lock(); reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd); if (reply == NULL) @@ -4104,7 +4103,7 @@ nla_put_failure: out_err: nlmsg_free(msg); out: - mutex_unlock(&ipvs->service_mutex); + rcu_read_unlock(); return ret; } From b24ae1a387e404e832385448ccad30cb03520e45 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 24 Feb 2026 21:50:42 +0100 Subject: [PATCH 0043/1561] ipvs: use single svc table fwmark based services and non-fwmark based services can be hashed in same service table. This reduces the burden of working with two tables. Signed-off-by: Julian Anastasov Signed-off-by: Florian Westphal Link: https://patch.msgid.link/20260224205048.4718-4-fw@strlen.de Signed-off-by: Jakub Kicinski --- include/net/ip_vs.h | 8 +- net/netfilter/ipvs/ip_vs_ctl.c | 146 +++++---------------------------- 2 files changed, 22 insertions(+), 132 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 074a204ec6db..b5a5a5efe3cc 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -679,8 +679,7 @@ struct ip_vs_dest_user_kern { * forwarding entries. */ struct ip_vs_service { - struct hlist_node s_list; /* for normal service table */ - struct hlist_node f_list; /* for fwmark-based service table */ + struct hlist_node s_list; /* node in service table */ atomic_t refcnt; /* reference counter */ u16 af; /* address family */ @@ -1050,10 +1049,7 @@ struct netns_ipvs { /* the service mutex that protect svc_table and svc_fwm_table */ struct mutex service_mutex; - /* the service table hashed by */ - struct hlist_head svc_table[IP_VS_SVC_TAB_SIZE]; - /* the service table hashed by fwmark */ - struct hlist_head svc_fwm_table[IP_VS_SVC_TAB_SIZE]; + struct hlist_head svc_table[IP_VS_SVC_TAB_SIZE]; /* Services */ }; #define DEFAULT_SYNC_THRESHOLD 3 diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index b9eaf048a29f..2ef1f99dada6 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -328,7 +328,7 @@ static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 /* * Hashes a service in the svc_table by - * or in the svc_fwm_table by fwmark. + * or by fwmark. * Should be called with locked tables. */ static int ip_vs_svc_hash(struct ip_vs_service *svc) @@ -343,18 +343,17 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) if (svc->fwmark == 0) { /* - * Hash it by in svc_table + * Hash it by */ hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol, &svc->addr, svc->port); - hlist_add_head_rcu(&svc->s_list, &svc->ipvs->svc_table[hash]); } else { /* - * Hash it by fwmark in svc_fwm_table + * Hash it by fwmark */ hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark); - hlist_add_head_rcu(&svc->f_list, &svc->ipvs->svc_fwm_table[hash]); } + hlist_add_head_rcu(&svc->s_list, &svc->ipvs->svc_table[hash]); svc->flags |= IP_VS_SVC_F_HASHED; /* increase its refcnt because it is referenced by the svc table */ @@ -364,7 +363,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) /* - * Unhashes a service from svc_table / svc_fwm_table. + * Unhashes a service from svc_table. * Should be called with locked tables. */ static int ip_vs_svc_unhash(struct ip_vs_service *svc) @@ -375,13 +374,8 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) return 0; } - if (svc->fwmark == 0) { - /* Remove it from the svc_table table */ - hlist_del_rcu(&svc->s_list); - } else { - /* Remove it from the svc_fwm_table table */ - hlist_del_rcu(&svc->f_list); - } + /* Remove it from svc_table */ + hlist_del_rcu(&svc->s_list); svc->flags &= ~IP_VS_SVC_F_HASHED; atomic_dec(&svc->refcnt); @@ -404,7 +398,8 @@ __ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol, hlist_for_each_entry_rcu(svc, &ipvs->svc_table[hash], s_list) { if (svc->af == af && ip_vs_addr_equal(af, &svc->addr, vaddr) && - svc->port == vport && svc->protocol == protocol) { + svc->port == vport && svc->protocol == protocol && + !svc->fwmark) { /* HIT */ return svc; } @@ -426,7 +421,7 @@ __ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark) /* Check for fwmark addressed entries */ hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark); - hlist_for_each_entry_rcu(svc, &ipvs->svc_fwm_table[hash], f_list) { + hlist_for_each_entry_rcu(svc, &ipvs->svc_table[hash], s_list) { if (svc->fwmark == fwmark && svc->af == af) { /* HIT */ return svc; @@ -1682,26 +1677,11 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) struct ip_vs_service *svc; struct hlist_node *n; - /* - * Flush the service table hashed by - */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { hlist_for_each_entry_safe(svc, n, &ipvs->svc_table[idx], - s_list) { + s_list) ip_vs_unlink_service(svc, cleanup); - } } - - /* - * Flush the service table hashed by fwmark - */ - for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry_safe(svc, n, &ipvs->svc_fwm_table[idx], - f_list) { - ip_vs_unlink_service(svc, cleanup); - } - } - return 0; } @@ -1764,11 +1744,6 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, list_for_each_entry_rcu(dest, &svc->destinations, n_list) ip_vs_forget_dev(dest, dev); - - hlist_for_each_entry_rcu(svc, &ipvs->svc_fwm_table[idx], f_list) - list_for_each_entry_rcu(dest, &svc->destinations, - n_list) - ip_vs_forget_dev(dest, dev); } rcu_read_unlock(); @@ -1802,15 +1777,8 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs) struct ip_vs_service *svc; for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry(svc, &ipvs->svc_table[idx], s_list) { + hlist_for_each_entry(svc, &ipvs->svc_table[idx], s_list) ip_vs_zero_service(svc); - } - } - - for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry(svc, &ipvs->svc_fwm_table[idx], f_list) { - ip_vs_zero_service(svc); - } } ip_vs_zero_stats(&ipvs->tot_stats->s); @@ -2246,7 +2214,6 @@ static struct ctl_table vs_vars[] = { struct ip_vs_iter { struct seq_net_private p; /* Do not move this, netns depends upon it*/ - struct hlist_head *table; int bucket; }; @@ -2269,7 +2236,6 @@ static inline const char *ip_vs_fwd_name(unsigned int flags) } -/* Get the Nth entry in the two lists */ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) { struct net *net = seq_file_net(seq); @@ -2278,29 +2244,14 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) int idx; struct ip_vs_service *svc; - /* look in hash by protocol */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { hlist_for_each_entry_rcu(svc, &ipvs->svc_table[idx], s_list) { if (pos-- == 0) { - iter->table = ipvs->svc_table; iter->bucket = idx; return svc; } } } - - /* keep looking in fwmark */ - for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry_rcu(svc, &ipvs->svc_fwm_table[idx], - f_list) { - if (pos-- == 0) { - iter->table = ipvs->svc_fwm_table; - iter->bucket = idx; - return svc; - } - } - } - return NULL; } @@ -2327,38 +2278,17 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) svc = v; iter = seq->private; - if (iter->table == ipvs->svc_table) { - /* next service in table hashed by protocol */ - e = rcu_dereference(hlist_next_rcu(&svc->s_list)); - if (e) - return hlist_entry(e, struct ip_vs_service, s_list); - - while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { - hlist_for_each_entry_rcu(svc, - &ipvs->svc_table[iter->bucket], - s_list) { - return svc; - } - } - - iter->table = ipvs->svc_fwm_table; - iter->bucket = -1; - goto scan_fwmark; - } - - /* next service in hashed by fwmark */ - e = rcu_dereference(hlist_next_rcu(&svc->f_list)); + e = rcu_dereference(hlist_next_rcu(&svc->s_list)); if (e) - return hlist_entry(e, struct ip_vs_service, f_list); + return hlist_entry(e, struct ip_vs_service, s_list); - scan_fwmark: while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { hlist_for_each_entry_rcu(svc, - &ipvs->svc_fwm_table[iter->bucket], - f_list) + &ipvs->svc_table[iter->bucket], + s_list) { return svc; + } } - return NULL; } @@ -2380,17 +2310,12 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) seq_puts(seq, " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n"); } else { - struct net *net = seq_file_net(seq); - struct netns_ipvs *ipvs = net_ipvs(net); const struct ip_vs_service *svc = v; - const struct ip_vs_iter *iter = seq->private; const struct ip_vs_dest *dest; struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); char *sched_name = sched ? sched->name : "none"; - if (svc->ipvs != ipvs) - return 0; - if (iter->table == ipvs->svc_table) { + if (!svc->fwmark) { #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) seq_printf(seq, "%s [%pI6]:%04X %s ", @@ -2865,24 +2790,6 @@ __ip_vs_get_service_entries(struct netns_ipvs *ipvs, } } - for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry(svc, &ipvs->svc_fwm_table[idx], f_list) { - /* Only expose IPv4 entries to old interface */ - if (svc->af != AF_INET) - continue; - - if (count >= get->num_services) - goto out; - memset(&entry, 0, sizeof(entry)); - ip_vs_copy_service(&entry, svc); - if (copy_to_user(&uptr->entrytable[count], - &entry, sizeof(entry))) { - ret = -EFAULT; - goto out; - } - count++; - } - } out: return ret; } @@ -3383,17 +3290,6 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, } } - for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { - hlist_for_each_entry_rcu(svc, &ipvs->svc_fwm_table[i], f_list) { - if (++idx <= start) - continue; - if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { - idx--; - goto nla_put_failure; - } - } - } - nla_put_failure: rcu_read_unlock(); cb->args[0] = idx; @@ -4403,12 +4299,10 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) int ret = -ENOMEM; int idx; - /* Initialize service_mutex, svc_table, svc_fwm_table per netns */ + /* Initialize service_mutex, svc_table per netns */ __mutex_init(&ipvs->service_mutex, "ipvs->service_mutex", &__ipvs_service_key); - for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { + for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) INIT_HLIST_HEAD(&ipvs->svc_table[idx]); - INIT_HLIST_HEAD(&ipvs->svc_fwm_table[idx]); - } /* Initialize rs_table */ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) From 40fb72209fd83edaf3632dba644b1fcf81143cfc Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 24 Feb 2026 21:50:43 +0100 Subject: [PATCH 0044/1561] ipvs: do not keep dest_dst after dest is removed Before now dest->dest_dst is not released when server is moved into dest_trash list after removal. As result, we can keep dst/dev references for long time without actively using them. It is better to avoid walking the dest_trash list when ip_vs_dst_event() receives dev events. So, make sure we do not hold dev references in dest_trash list. As packets can be flying while server is being removed, check the IP_VS_DEST_F_AVAILABLE flag in slow path to ensure we do not save new dev references to removed servers. Signed-off-by: Julian Anastasov Signed-off-by: Florian Westphal Link: https://patch.msgid.link/20260224205048.4718-5-fw@strlen.de Signed-off-by: Jakub Kicinski --- net/netfilter/ipvs/ip_vs_ctl.c | 20 ++++++++------------ net/netfilter/ipvs/ip_vs_xmit.c | 12 ++++++++---- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 2ef1f99dada6..7c0e2d9b5b98 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -809,7 +809,6 @@ static void ip_vs_dest_free(struct ip_vs_dest *dest) { struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1); - __ip_vs_dst_cache_reset(dest); __ip_vs_svc_put(svc); call_rcu(&dest->rcu_head, ip_vs_dest_rcu_free); } @@ -1012,10 +1011,6 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, dest->af = udest->af; - spin_lock_bh(&dest->dst_lock); - __ip_vs_dst_cache_reset(dest); - spin_unlock_bh(&dest->dst_lock); - if (add) { list_add_rcu(&dest->n_list, &svc->destinations); svc->num_dests++; @@ -1023,6 +1018,10 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, if (sched && sched->add_dest) sched->add_dest(svc, dest); } else { + spin_lock_bh(&dest->dst_lock); + __ip_vs_dst_cache_reset(dest); + spin_unlock_bh(&dest->dst_lock); + sched = rcu_dereference_protected(svc->scheduler, 1); if (sched && sched->upd_dest) sched->upd_dest(svc, dest); @@ -1257,6 +1256,10 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, { dest->flags &= ~IP_VS_DEST_F_AVAILABLE; + spin_lock_bh(&dest->dst_lock); + __ip_vs_dst_cache_reset(dest); + spin_unlock_bh(&dest->dst_lock); + /* * Remove it from the d-linked destination list. */ @@ -1747,13 +1750,6 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, } rcu_read_unlock(); - mutex_lock(&ipvs->service_mutex); - spin_lock_bh(&ipvs->dest_trash_lock); - list_for_each_entry(dest, &ipvs->dest_trash, t_list) { - ip_vs_forget_dev(dest, dev); - } - spin_unlock_bh(&ipvs->dest_trash_lock); - mutex_unlock(&ipvs->service_mutex); return NOTIFY_DONE; } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 4389bfe3050d..394b5b5f2ccd 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -336,9 +336,11 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, goto err_unreach; } /* It is forbidden to attach dest->dest_dst if - * device is going down. + * device is going down or if server is removed and + * stored in dest_trash. */ - if (!rt_dev_is_down(dst_dev_rcu(&rt->dst))) + if (!rt_dev_is_down(dst_dev_rcu(&rt->dst)) && + dest->flags & IP_VS_DEST_F_AVAILABLE) __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0); else noref = 0; @@ -513,9 +515,11 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, rt = dst_rt6_info(dst); cookie = rt6_get_cookie(rt); /* It is forbidden to attach dest->dest_dst if - * device is going down. + * device is going down or if server is removed and + * stored in dest_trash. */ - if (!rt_dev_is_down(dst_dev_rcu(&rt->dst))) + if (!rt_dev_is_down(dst_dev_rcu(&rt->dst)) && + dest->flags & IP_VS_DEST_F_AVAILABLE) __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); else noref = 0; From c59bd9e62e060bb5cd4d697b73bbe6f23a723345 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 24 Feb 2026 21:50:44 +0100 Subject: [PATCH 0045/1561] ipvs: use more counters to avoid service lookups When new connection is created we can lookup for services multiple times to support fallback options. We already have some counters to skip specific lookups because it costs CPU cycles for hash calculation, etc. Add more counters for fwmark/non-fwmark services (fwm_services and nonfwm_services) and make all counters per address family. Signed-off-by: Julian Anastasov Signed-off-by: Florian Westphal Link: https://patch.msgid.link/20260224205048.4718-6-fw@strlen.de Signed-off-by: Jakub Kicinski --- include/net/ip_vs.h | 24 ++++++--- net/netfilter/ipvs/ip_vs_core.c | 2 +- net/netfilter/ipvs/ip_vs_ctl.c | 86 +++++++++++++++++++-------------- 3 files changed, 69 insertions(+), 43 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index b5a5a5efe3cc..f2291be36409 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -271,6 +271,18 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, pr_err(msg, ##__VA_ARGS__); \ } while (0) +/* For arrays per family */ +enum { + IP_VS_AF_INET, + IP_VS_AF_INET6, + IP_VS_AF_MAX +}; + +static inline int ip_vs_af_index(int af) +{ + return af == AF_INET6 ? IP_VS_AF_INET6 : IP_VS_AF_INET; +} + /* The port number of FTP service (in network order). */ #define FTPPORT cpu_to_be16(21) #define FTPDATA cpu_to_be16(20) @@ -940,17 +952,17 @@ struct netns_ipvs { /* ip_vs_ctl */ struct ip_vs_stats_rcu *tot_stats; /* Statistics & est. */ - int num_services; /* no of virtual services */ - int num_services6; /* IPv6 virtual services */ - /* Trash for destinations */ struct list_head dest_trash; spinlock_t dest_trash_lock; struct timer_list dest_trash_timer; /* expiration timer */ /* Service counters */ - atomic_t ftpsvc_counter; - atomic_t nullsvc_counter; - atomic_t conn_out_counter; + atomic_t num_services[IP_VS_AF_MAX]; /* Services */ + atomic_t fwm_services[IP_VS_AF_MAX]; /* Services */ + atomic_t nonfwm_services[IP_VS_AF_MAX];/* Services */ + atomic_t ftpsvc_counter[IP_VS_AF_MAX]; /* FTPPORT */ + atomic_t nullsvc_counter[IP_VS_AF_MAX];/* Zero port */ + atomic_t conn_out_counter[IP_VS_AF_MAX];/* out conn */ #ifdef CONFIG_SYSCTL /* delayed work for expiring no dest connections */ diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 90d56f92c0f6..869f18e0e835 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1400,7 +1400,7 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat return handle_response(af, skb, pd, cp, &iph, hooknum); /* Check for real-server-started requests */ - if (atomic_read(&ipvs->conn_out_counter)) { + if (atomic_read(&ipvs->conn_out_counter[ip_vs_af_index(af)])) { /* Currently only for UDP: * connection oriented protocols typically use * ephemeral ports for outgoing connections, so diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 7c0e2d9b5b98..564e09f0f90a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -436,35 +436,42 @@ struct ip_vs_service * ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport) { - struct ip_vs_service *svc; + struct ip_vs_service *svc = NULL; + int af_id = ip_vs_af_index(af); /* * Check the table hashed by fwmark first */ - if (fwmark) { + if (fwmark && atomic_read(&ipvs->fwm_services[af_id])) { svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark); if (svc) goto out; } + if (!atomic_read(&ipvs->nonfwm_services[af_id])) + goto out; + /* * Check the table hashed by * for "full" addressed entries */ svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport); + if (svc) + goto out; - if (!svc && protocol == IPPROTO_TCP && - atomic_read(&ipvs->ftpsvc_counter) && + if (protocol == IPPROTO_TCP && + atomic_read(&ipvs->ftpsvc_counter[af_id]) && (vport == FTPDATA || !inet_port_requires_bind_service(ipvs->net, ntohs(vport)))) { /* * Check if ftp service entry exists, the packet * might belong to FTP data connections. */ svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT); + if (svc) + goto out; } - if (svc == NULL - && atomic_read(&ipvs->nullsvc_counter)) { + if (atomic_read(&ipvs->nullsvc_counter[af_id])) { /* * Check if the catch-all port (port zero) exists */ @@ -1352,6 +1359,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, { int ret = 0; struct ip_vs_scheduler *sched = NULL; + int af_id = ip_vs_af_index(u->af); struct ip_vs_pe *pe = NULL; struct ip_vs_service *svc = NULL; int ret_hooks = -1; @@ -1396,8 +1404,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, } #endif - if ((u->af == AF_INET && !ipvs->num_services) || - (u->af == AF_INET6 && !ipvs->num_services6)) { + if (!atomic_read(&ipvs->num_services[af_id])) { ret = ip_vs_register_hooks(ipvs, u->af); if (ret < 0) goto out_err; @@ -1444,21 +1451,21 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, /* Update the virtual service counters */ if (svc->port == FTPPORT) - atomic_inc(&ipvs->ftpsvc_counter); - else if (svc->port == 0) - atomic_inc(&ipvs->nullsvc_counter); + atomic_inc(&ipvs->ftpsvc_counter[af_id]); + else if (!svc->port && !svc->fwmark) + atomic_inc(&ipvs->nullsvc_counter[af_id]); if (pe && pe->conn_out) - atomic_inc(&ipvs->conn_out_counter); + atomic_inc(&ipvs->conn_out_counter[af_id]); /* Bind the ct retriever */ RCU_INIT_POINTER(svc->pe, pe); pe = NULL; - /* Count only IPv4 services for old get/setsockopt interface */ - if (svc->af == AF_INET) - ipvs->num_services++; - else if (svc->af == AF_INET6) - ipvs->num_services6++; + if (svc->fwmark) + atomic_inc(&ipvs->fwm_services[af_id]); + else + atomic_inc(&ipvs->nonfwm_services[af_id]); + atomic_inc(&ipvs->num_services[af_id]); /* Hash the service into the service table */ ip_vs_svc_hash(svc); @@ -1503,6 +1510,8 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) struct ip_vs_pe *pe = NULL, *old_pe = NULL; int ret = 0; bool new_pe_conn_out, old_pe_conn_out; + struct netns_ipvs *ipvs = svc->ipvs; + int af_id = ip_vs_af_index(svc->af); /* * Lookup the scheduler, by 'u->sched_name' @@ -1571,9 +1580,9 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) new_pe_conn_out = (pe && pe->conn_out) ? true : false; old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false; if (new_pe_conn_out && !old_pe_conn_out) - atomic_inc(&svc->ipvs->conn_out_counter); + atomic_inc(&ipvs->conn_out_counter[af_id]); if (old_pe_conn_out && !new_pe_conn_out) - atomic_dec(&svc->ipvs->conn_out_counter); + atomic_dec(&ipvs->conn_out_counter[af_id]); } out: @@ -1593,16 +1602,15 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) struct ip_vs_scheduler *old_sched; struct ip_vs_pe *old_pe; struct netns_ipvs *ipvs = svc->ipvs; + int af_id = ip_vs_af_index(svc->af); - if (svc->af == AF_INET) { - ipvs->num_services--; - if (!ipvs->num_services) - ip_vs_unregister_hooks(ipvs, svc->af); - } else if (svc->af == AF_INET6) { - ipvs->num_services6--; - if (!ipvs->num_services6) - ip_vs_unregister_hooks(ipvs, svc->af); - } + atomic_dec(&ipvs->num_services[af_id]); + if (!atomic_read(&ipvs->num_services[af_id])) + ip_vs_unregister_hooks(ipvs, svc->af); + if (svc->fwmark) + atomic_dec(&ipvs->fwm_services[af_id]); + else + atomic_dec(&ipvs->nonfwm_services[af_id]); ip_vs_stop_estimator(svc->ipvs, &svc->stats); @@ -1614,7 +1622,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) /* Unbind persistence engine, keep svc->pe */ old_pe = rcu_dereference_protected(svc->pe, 1); if (old_pe && old_pe->conn_out) - atomic_dec(&ipvs->conn_out_counter); + atomic_dec(&ipvs->conn_out_counter[af_id]); ip_vs_pe_put(old_pe); /* @@ -1629,9 +1637,9 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) * Update the virtual service counters */ if (svc->port == FTPPORT) - atomic_dec(&ipvs->ftpsvc_counter); - else if (svc->port == 0) - atomic_dec(&ipvs->nullsvc_counter); + atomic_dec(&ipvs->ftpsvc_counter[af_id]); + else if (!svc->port && !svc->fwmark) + atomic_dec(&ipvs->nullsvc_counter[af_id]); /* * Free the service if nobody refers to it @@ -2961,7 +2969,8 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) struct ip_vs_getinfo info; info.version = IP_VS_VERSION_CODE; info.size = ip_vs_conn_tab_size; - info.num_services = ipvs->num_services; + info.num_services = + atomic_read(&ipvs->num_services[IP_VS_AF_INET]); if (copy_to_user(user, &info, sizeof(info)) != 0) ret = -EFAULT; } @@ -4307,9 +4316,14 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) INIT_LIST_HEAD(&ipvs->dest_trash); spin_lock_init(&ipvs->dest_trash_lock); timer_setup(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 0); - atomic_set(&ipvs->ftpsvc_counter, 0); - atomic_set(&ipvs->nullsvc_counter, 0); - atomic_set(&ipvs->conn_out_counter, 0); + for (idx = 0; idx < IP_VS_AF_MAX; idx++) { + atomic_set(&ipvs->num_services[idx], 0); + atomic_set(&ipvs->fwm_services[idx], 0); + atomic_set(&ipvs->nonfwm_services[idx], 0); + atomic_set(&ipvs->ftpsvc_counter[idx], 0); + atomic_set(&ipvs->nullsvc_counter[idx], 0); + atomic_set(&ipvs->conn_out_counter[idx], 0); + } INIT_DELAYED_WORK(&ipvs->est_reload_work, est_reload_work_handler); From 09b71fb459468b408f3fa3e2b75d20113374f057 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 24 Feb 2026 21:50:45 +0100 Subject: [PATCH 0046/1561] ipvs: no_cport and dropentry counters can be per-net Change the no_cport counters to be per-net and address family. This should reduce the extra conn lookups done during present NO_CPORT connections. By changing from global to per-net dropentry counters, one net will not affect the drop rate of another net. Signed-off-by: Julian Anastasov Signed-off-by: Florian Westphal Link: https://patch.msgid.link/20260224205048.4718-7-fw@strlen.de Signed-off-by: Jakub Kicinski --- include/net/ip_vs.h | 2 ++ net/netfilter/ipvs/ip_vs_conn.c | 64 ++++++++++++++++++++------------- 2 files changed, 41 insertions(+), 25 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index f2291be36409..ad8a16146ac5 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -948,6 +948,7 @@ struct netns_ipvs { #endif /* ip_vs_conn */ atomic_t conn_count; /* connection counter */ + atomic_t no_cport_conns[IP_VS_AF_MAX]; /* ip_vs_ctl */ struct ip_vs_stats_rcu *tot_stats; /* Statistics & est. */ @@ -973,6 +974,7 @@ struct netns_ipvs { int drop_counter; int old_secure_tcp; atomic_t dropentry; + s8 dropentry_counters[8]; /* locks in ctl.c */ spinlock_t dropentry_lock; /* drop entry handling */ spinlock_t droppacket_lock; /* drop packet handling */ diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 50cc492c7553..66057db63d02 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -59,9 +59,6 @@ static struct hlist_head *ip_vs_conn_tab __read_mostly; /* SLAB cache for IPVS connections */ static struct kmem_cache *ip_vs_conn_cachep __read_mostly; -/* counter for no client port connections */ -static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0); - /* random value for IPVS connection hash */ static unsigned int ip_vs_conn_rnd __read_mostly; @@ -294,10 +291,16 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p) struct ip_vs_conn *cp; cp = __ip_vs_conn_in_get(p); - if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) { - struct ip_vs_conn_param cport_zero_p = *p; - cport_zero_p.cport = 0; - cp = __ip_vs_conn_in_get(&cport_zero_p); + if (!cp) { + struct netns_ipvs *ipvs = p->ipvs; + int af_id = ip_vs_af_index(p->af); + + if (atomic_read(&ipvs->no_cport_conns[af_id])) { + struct ip_vs_conn_param cport_zero_p = *p; + + cport_zero_p.cport = 0; + cp = __ip_vs_conn_in_get(&cport_zero_p); + } } IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n", @@ -490,9 +493,12 @@ void ip_vs_conn_put(struct ip_vs_conn *cp) void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport) { if (ip_vs_conn_unhash(cp)) { + struct netns_ipvs *ipvs = cp->ipvs; + int af_id = ip_vs_af_index(cp->af); + spin_lock_bh(&cp->lock); if (cp->flags & IP_VS_CONN_F_NO_CPORT) { - atomic_dec(&ip_vs_conn_no_cport_cnt); + atomic_dec(&ipvs->no_cport_conns[af_id]); cp->flags &= ~IP_VS_CONN_F_NO_CPORT; cp->cport = cport; } @@ -891,8 +897,11 @@ static void ip_vs_conn_expire(struct timer_list *t) if (unlikely(cp->app != NULL)) ip_vs_unbind_app(cp); ip_vs_unbind_dest(cp); - if (cp->flags & IP_VS_CONN_F_NO_CPORT) - atomic_dec(&ip_vs_conn_no_cport_cnt); + if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { + int af_id = ip_vs_af_index(cp->af); + + atomic_dec(&ipvs->no_cport_conns[af_id]); + } if (cp->flags & IP_VS_CONN_F_ONE_PACKET) ip_vs_conn_rcu_free(&cp->rcu_head); else @@ -999,8 +1008,11 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, cp->out_seq.delta = 0; atomic_inc(&ipvs->conn_count); - if (flags & IP_VS_CONN_F_NO_CPORT) - atomic_inc(&ip_vs_conn_no_cport_cnt); + if (unlikely(flags & IP_VS_CONN_F_NO_CPORT)) { + int af_id = ip_vs_af_index(cp->af); + + atomic_inc(&ipvs->no_cport_conns[af_id]); + } /* Bind the connection with a destination server */ cp->dest = NULL; @@ -1257,6 +1269,7 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = { }; #endif +#ifdef CONFIG_SYSCTL /* Randomly drop connection entries before running out of memory * Can be used for DATA and CTL conns. For TPL conns there are exceptions: @@ -1266,12 +1279,7 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = { */ static inline int todrop_entry(struct ip_vs_conn *cp) { - /* - * The drop rate array needs tuning for real environments. - * Called from timer bh only => no locking - */ - static const signed char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; - static signed char todrop_counter[9] = {0}; + struct netns_ipvs *ipvs = cp->ipvs; int i; /* if the conn entry hasn't lasted for 60 seconds, don't drop it. @@ -1280,15 +1288,17 @@ static inline int todrop_entry(struct ip_vs_conn *cp) if (time_before(cp->timeout + jiffies, cp->timer.expires + 60*HZ)) return 0; - /* Don't drop the entry if its number of incoming packets is not - located in [0, 8] */ + /* Drop only conns with number of incoming packets in [1..8] range */ i = atomic_read(&cp->in_pkts); - if (i > 8 || i < 0) return 0; + if (i > 8 || i < 1) + return 0; - if (!todrop_rate[i]) return 0; - if (--todrop_counter[i] > 0) return 0; + i--; + if (--ipvs->dropentry_counters[i] > 0) + return 0; - todrop_counter[i] = todrop_rate[i]; + /* Prefer to drop conns with less number of incoming packets */ + ipvs->dropentry_counters[i] = i + 1; return 1; } @@ -1368,7 +1378,7 @@ drop: } rcu_read_unlock(); } - +#endif /* * Flush all the connection entries in the ip_vs_conn_tab @@ -1450,7 +1460,11 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs) */ int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs) { + int idx; + atomic_set(&ipvs->conn_count, 0); + for (idx = 0; idx < IP_VS_AF_MAX; idx++) + atomic_set(&ipvs->no_cport_conns[idx], 0); #ifdef CONFIG_PROC_FS if (!proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net, From 3aea466a439984053d9240b0ca946842a91e484d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 24 Feb 2026 21:50:46 +0100 Subject: [PATCH 0047/1561] netfilter: nft_set_rbtree: don't disable bh when acquiring tree lock As of commit 7e43e0a1141d ("netfilter: nft_set_rbtree: translate rbtree to array for binary search") the lock is only taken from control plane, no need to disable BH anymore. Signed-off-by: Florian Westphal Link: https://patch.msgid.link/20260224205048.4718-8-fw@strlen.de Signed-off-by: Jakub Kicinski --- net/netfilter/nft_set_rbtree.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 644d4b916705..ef567a948703 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -684,9 +684,9 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, cond_resched(); - write_lock_bh(&priv->lock); + write_lock(&priv->lock); err = __nft_rbtree_insert(net, set, rbe, elem_priv, tstamp, last); - write_unlock_bh(&priv->lock); + write_unlock(&priv->lock); if (nft_rbtree_interval_end(rbe)) priv->start_rbe_cookie = 0; @@ -696,13 +696,6 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, return err; } -static void nft_rbtree_erase(struct nft_rbtree *priv, struct nft_rbtree_elem *rbe) -{ - write_lock_bh(&priv->lock); - rb_erase(&rbe->node, &priv->root); - write_unlock_bh(&priv->lock); -} - static void nft_rbtree_remove(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) @@ -710,7 +703,9 @@ static void nft_rbtree_remove(const struct net *net, struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv); struct nft_rbtree *priv = nft_set_priv(set); - nft_rbtree_erase(priv, rbe); + write_lock(&priv->lock); + rb_erase(&rbe->node, &priv->root); + write_unlock(&priv->lock); } static void nft_rbtree_activate(const struct net *net, @@ -871,9 +866,9 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, nft_rbtree_do_walk(ctx, set, iter); break; case NFT_ITER_READ: - read_lock_bh(&priv->lock); + read_lock(&priv->lock); nft_rbtree_do_walk(ctx, set, iter); - read_unlock_bh(&priv->lock); + read_unlock(&priv->lock); break; default: iter->err = -EINVAL; @@ -909,14 +904,14 @@ static void nft_rbtree_gc_scan(struct nft_set *set) /* end element needs to be removed first, it has * no timeout extension. */ - write_lock_bh(&priv->lock); + write_lock(&priv->lock); if (rbe_end) { nft_rbtree_gc_elem_move(net, set, priv, rbe_end); rbe_end = NULL; } nft_rbtree_gc_elem_move(net, set, priv, rbe); - write_unlock_bh(&priv->lock); + write_unlock(&priv->lock); } priv->last_gc = jiffies; From b6461103e01ae863148e30c663a06f7327cf6681 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 24 Feb 2026 21:50:47 +0100 Subject: [PATCH 0048/1561] netfilter: nf_tables: drop obsolete EXPORT_SYMBOLs These are no longer required, calling objects are nowadays baked into nf_tables.ko itself. Signed-off-by: Florian Westphal Link: https://patch.msgid.link/20260224205048.4718-9-fw@strlen.de Signed-off-by: Jakub Kicinski --- net/netfilter/nf_tables_api.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0c5a4855b97d..23ef775897a4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4128,7 +4128,6 @@ int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain) nft_chain_vstate_update(ctx, chain); return 0; } -EXPORT_SYMBOL_GPL(nft_chain_validate); static int nft_table_validate(struct net *net, const struct nft_table *table) { @@ -4745,7 +4744,6 @@ struct nft_set *nft_set_lookup_global(const struct net *net, } return set; } -EXPORT_SYMBOL_GPL(nft_set_lookup_global); static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, const char *name) @@ -5818,7 +5816,6 @@ bind: return 0; } -EXPORT_SYMBOL_GPL(nf_tables_bind_set); static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, bool event) @@ -5898,7 +5895,6 @@ void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) nft_use_inc_restore(&set->use); } -EXPORT_SYMBOL_GPL(nf_tables_activate_set); void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, @@ -5938,14 +5934,12 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, phase == NFT_TRANS_COMMIT); } } -EXPORT_SYMBOL_GPL(nf_tables_deactivate_set); void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set) { if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) nft_set_destroy(ctx, set); } -EXPORT_SYMBOL_GPL(nf_tables_destroy_set); const struct nft_set_ext_type nft_set_ext_types[] = { [NFT_SET_EXT_KEY] = { @@ -6783,7 +6777,6 @@ void nft_set_elem_destroy(const struct nft_set *set, __nft_set_elem_destroy(&ctx, set, elem_priv, destroy_expr); } -EXPORT_SYMBOL_GPL(nft_set_elem_destroy); /* Drop references and destroy. Called from abort path. */ static void nft_trans_set_elem_destroy(const struct nft_ctx *ctx, struct nft_trans_elem *te) @@ -6910,7 +6903,6 @@ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net, return NULL; } -EXPORT_SYMBOL_GPL(nft_set_catchall_lookup); static int nft_setelem_catchall_insert(const struct net *net, struct nft_set *set, @@ -8038,7 +8030,6 @@ out: rcu_read_unlock(); return ERR_PTR(-ENOENT); } -EXPORT_SYMBOL_GPL(nft_obj_lookup); static struct nft_object *nft_obj_lookup_byhandle(const struct nft_table *table, const struct nlattr *nla, @@ -11860,7 +11851,6 @@ void nft_data_release(const struct nft_data *data, enum nft_data_types type) WARN_ON(1); } } -EXPORT_SYMBOL_GPL(nft_data_release); int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, enum nft_data_types type, unsigned int len) @@ -11887,7 +11877,6 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, nla_nest_end(skb, nest); return err; } -EXPORT_SYMBOL_GPL(nft_data_dump); static void __nft_release_hook(struct net *net, struct nft_table *table) { From 6b94d081f81dd524626f7aab2b98a9de335edb72 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 24 Feb 2026 21:50:48 +0100 Subject: [PATCH 0049/1561] netfilter: nf_tables: remove register tracking infrastructure This facility was disabled in commit 9e539c5b6d9c ("netfilter: nf_tables: disable expression reduction infra"), because not all nft_exprs guarantee they will update the destination register: some may set NFT_BREAK instead to cancel evaluation of the rule. This has been dead code ever since. There are no plans to salvage this at this time, so remove this. Signed-off-by: Florian Westphal Link: https://patch.msgid.link/20260224205048.4718-10-fw@strlen.de Signed-off-by: Jakub Kicinski --- include/net/netfilter/nf_tables.h | 32 ------- include/net/netfilter/nft_fib.h | 2 - include/net/netfilter/nft_meta.h | 3 - net/bridge/netfilter/nft_meta_bridge.c | 20 ----- net/bridge/netfilter/nft_reject_bridge.c | 1 - net/ipv4/netfilter/nft_dup_ipv4.c | 1 - net/ipv4/netfilter/nft_fib_ipv4.c | 2 - net/ipv4/netfilter/nft_reject_ipv4.c | 1 - net/ipv6/netfilter/nft_dup_ipv6.c | 1 - net/ipv6/netfilter/nft_fib_ipv6.c | 2 - net/ipv6/netfilter/nft_reject_ipv6.c | 1 - net/netfilter/nf_tables_api.c | 67 --------------- net/netfilter/nft_bitwise.c | 104 ----------------------- net/netfilter/nft_byteorder.c | 11 --- net/netfilter/nft_cmp.c | 3 - net/netfilter/nft_compat.c | 10 --- net/netfilter/nft_connlimit.c | 1 - net/netfilter/nft_counter.c | 1 - net/netfilter/nft_ct.c | 46 ---------- net/netfilter/nft_dup_netdev.c | 1 - net/netfilter/nft_dynset.c | 1 - net/netfilter/nft_exthdr.c | 34 -------- net/netfilter/nft_fib.c | 42 --------- net/netfilter/nft_fib_inet.c | 1 - net/netfilter/nft_fib_netdev.c | 1 - net/netfilter/nft_flow_offload.c | 1 - net/netfilter/nft_fwd_netdev.c | 2 - net/netfilter/nft_hash.c | 36 -------- net/netfilter/nft_immediate.c | 12 --- net/netfilter/nft_last.c | 1 - net/netfilter/nft_limit.c | 2 - net/netfilter/nft_log.c | 1 - net/netfilter/nft_lookup.c | 12 --- net/netfilter/nft_masq.c | 3 - net/netfilter/nft_meta.c | 45 ---------- net/netfilter/nft_nat.c | 2 - net/netfilter/nft_numgen.c | 22 ----- net/netfilter/nft_objref.c | 2 - net/netfilter/nft_osf.c | 25 ------ net/netfilter/nft_payload.c | 47 ---------- net/netfilter/nft_queue.c | 2 - net/netfilter/nft_quota.c | 1 - net/netfilter/nft_range.c | 1 - net/netfilter/nft_redir.c | 3 - net/netfilter/nft_reject_inet.c | 1 - net/netfilter/nft_reject_netdev.c | 1 - net/netfilter/nft_rt.c | 1 - net/netfilter/nft_socket.c | 26 ------ net/netfilter/nft_synproxy.c | 1 - net/netfilter/nft_tproxy.c | 1 - net/netfilter/nft_tunnel.c | 26 ------ net/netfilter/nft_xfrm.c | 27 ------ 52 files changed, 693 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 426534a711b0..40e8106e71f0 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -122,17 +122,6 @@ struct nft_regs { }; }; -struct nft_regs_track { - struct { - const struct nft_expr *selector; - const struct nft_expr *bitwise; - u8 num_reg; - } regs[NFT_REG32_NUM]; - - const struct nft_expr *cur; - const struct nft_expr *last; -}; - /* Store/load an u8, u16 or u64 integer to/from the u32 data register. * * Note, when using concatenations, register allocation happens at 32-bit @@ -427,8 +416,6 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src, gfp_t gfp); void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr); int nft_expr_dump(struct sk_buff *skb, unsigned int attr, const struct nft_expr *expr, bool reset); -bool nft_expr_reduce_bitwise(struct nft_regs_track *track, - const struct nft_expr *expr); struct nft_set_ext; @@ -941,7 +928,6 @@ struct nft_offload_ctx; * @destroy_clone: destruction clone function * @dump: function to dump parameters * @validate: validate expression, called during loop detection - * @reduce: reduce expression * @gc: garbage collection expression * @offload: hardware offload expression * @offload_action: function to report true/false to allocate one slot or not in the flow @@ -975,8 +961,6 @@ struct nft_expr_ops { bool reset); int (*validate)(const struct nft_ctx *ctx, const struct nft_expr *expr); - bool (*reduce)(struct nft_regs_track *track, - const struct nft_expr *expr); bool (*gc)(struct net *net, const struct nft_expr *expr); int (*offload)(struct nft_offload_ctx *ctx, @@ -1954,20 +1938,4 @@ static inline u64 nft_net_tstamp(const struct net *net) return nft_pernet(net)->tstamp; } -#define __NFT_REDUCE_READONLY 1UL -#define NFT_REDUCE_READONLY (void *)__NFT_REDUCE_READONLY - -void nft_reg_track_update(struct nft_regs_track *track, - const struct nft_expr *expr, u8 dreg, u8 len); -void nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg, u8 len); -void __nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg); - -static inline bool nft_reg_track_cmp(struct nft_regs_track *track, - const struct nft_expr *expr, u8 dreg) -{ - return track->regs[dreg].selector && - track->regs[dreg].selector->ops == expr->ops && - track->regs[dreg].num_reg == 0; -} - #endif /* _NET_NF_TABLES_H */ diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index 7370fba844ef..e0422456f27b 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -66,6 +66,4 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, void nft_fib_store_result(void *reg, const struct nft_fib *priv, const struct net_device *dev); -bool nft_fib_reduce(struct nft_regs_track *track, - const struct nft_expr *expr); #endif diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h index d602263590fe..f74e63290603 100644 --- a/include/net/netfilter/nft_meta.h +++ b/include/net/netfilter/nft_meta.h @@ -43,9 +43,6 @@ void nft_meta_set_destroy(const struct nft_ctx *ctx, int nft_meta_set_validate(const struct nft_ctx *ctx, const struct nft_expr *expr); -bool nft_meta_get_reduce(struct nft_regs_track *track, - const struct nft_expr *expr); - struct nft_inner_tun_ctx; void nft_meta_inner_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt, diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index b7af36bbd306..7763e78abb00 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -112,7 +112,6 @@ static const struct nft_expr_ops nft_meta_bridge_get_ops = { .eval = nft_meta_bridge_get_eval, .init = nft_meta_bridge_get_init, .dump = nft_meta_get_dump, - .reduce = nft_meta_get_reduce, }; static void nft_meta_bridge_set_eval(const struct nft_expr *expr, @@ -159,24 +158,6 @@ static int nft_meta_bridge_set_init(const struct nft_ctx *ctx, return 0; } -static bool nft_meta_bridge_set_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - int i; - - for (i = 0; i < NFT_REG32_NUM; i++) { - if (!track->regs[i].selector) - continue; - - if (track->regs[i].selector->ops != &nft_meta_bridge_get_ops) - continue; - - __nft_reg_track_cancel(track, i); - } - - return false; -} - static int nft_meta_bridge_set_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -202,7 +183,6 @@ static const struct nft_expr_ops nft_meta_bridge_set_ops = { .init = nft_meta_bridge_set_init, .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, - .reduce = nft_meta_bridge_set_reduce, .validate = nft_meta_bridge_set_validate, }; diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 1cb5c16e97b7..cd2b04236a99 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -184,7 +184,6 @@ static const struct nft_expr_ops nft_reject_bridge_ops = { .init = nft_reject_init, .dump = nft_reject_dump, .validate = nft_reject_bridge_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_reject_bridge_type __read_mostly = { diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c index ef5dd88107dd..d53a65ddbd7b 100644 --- a/net/ipv4/netfilter/nft_dup_ipv4.c +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -76,7 +76,6 @@ static const struct nft_expr_ops nft_dup_ipv4_ops = { .eval = nft_dup_ipv4_eval, .init = nft_dup_ipv4_init, .dump = nft_dup_ipv4_dump, - .reduce = NFT_REDUCE_READONLY, }; static const struct nla_policy nft_dup_ipv4_policy[NFTA_DUP_MAX + 1] = { diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 82af6cd76d13..9d0c6d75109b 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -163,7 +163,6 @@ static const struct nft_expr_ops nft_fib4_type_ops = { .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, - .reduce = nft_fib_reduce, }; static const struct nft_expr_ops nft_fib4_ops = { @@ -173,7 +172,6 @@ static const struct nft_expr_ops nft_fib4_ops = { .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, - .reduce = nft_fib_reduce, }; static const struct nft_expr_ops * diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c index 6cb213bb7256..55fc23a8f7a7 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -45,7 +45,6 @@ static const struct nft_expr_ops nft_reject_ipv4_ops = { .init = nft_reject_init, .dump = nft_reject_dump, .validate = nft_reject_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_reject_ipv4_type __read_mostly = { diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c index 492a811828a7..95ec27b3971c 100644 --- a/net/ipv6/netfilter/nft_dup_ipv6.c +++ b/net/ipv6/netfilter/nft_dup_ipv6.c @@ -74,7 +74,6 @@ static const struct nft_expr_ops nft_dup_ipv6_ops = { .eval = nft_dup_ipv6_eval, .init = nft_dup_ipv6_init, .dump = nft_dup_ipv6_dump, - .reduce = NFT_REDUCE_READONLY, }; static const struct nla_policy nft_dup_ipv6_policy[NFTA_DUP_MAX + 1] = { diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 421036a3605b..dc375b725b28 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -225,7 +225,6 @@ static const struct nft_expr_ops nft_fib6_type_ops = { .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, - .reduce = nft_fib_reduce, }; static const struct nft_expr_ops nft_fib6_ops = { @@ -235,7 +234,6 @@ static const struct nft_expr_ops nft_fib6_ops = { .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, - .reduce = nft_fib_reduce, }; static const struct nft_expr_ops * diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index 5c61294f410e..ed69c768797e 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -46,7 +46,6 @@ static const struct nft_expr_ops nft_reject_ipv6_ops = { .init = nft_reject_init, .dump = nft_reject_dump, .validate = nft_reject_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_reject_ipv6_type __read_mostly = { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 23ef775897a4..c8d5f7e93dfd 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -932,58 +932,6 @@ static int nft_delflowtable(struct nft_ctx *ctx, return 0; } -static void __nft_reg_track_clobber(struct nft_regs_track *track, u8 dreg) -{ - int i; - - for (i = track->regs[dreg].num_reg; i > 0; i--) - __nft_reg_track_cancel(track, dreg - i); -} - -static void __nft_reg_track_update(struct nft_regs_track *track, - const struct nft_expr *expr, - u8 dreg, u8 num_reg) -{ - track->regs[dreg].selector = expr; - track->regs[dreg].bitwise = NULL; - track->regs[dreg].num_reg = num_reg; -} - -void nft_reg_track_update(struct nft_regs_track *track, - const struct nft_expr *expr, u8 dreg, u8 len) -{ - unsigned int regcount; - int i; - - __nft_reg_track_clobber(track, dreg); - - regcount = DIV_ROUND_UP(len, NFT_REG32_SIZE); - for (i = 0; i < regcount; i++, dreg++) - __nft_reg_track_update(track, expr, dreg, i); -} -EXPORT_SYMBOL_GPL(nft_reg_track_update); - -void nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg, u8 len) -{ - unsigned int regcount; - int i; - - __nft_reg_track_clobber(track, dreg); - - regcount = DIV_ROUND_UP(len, NFT_REG32_SIZE); - for (i = 0; i < regcount; i++, dreg++) - __nft_reg_track_cancel(track, dreg); -} -EXPORT_SYMBOL_GPL(nft_reg_track_cancel); - -void __nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg) -{ - track->regs[dreg].selector = NULL; - track->regs[dreg].bitwise = NULL; - track->regs[dreg].num_reg = 0; -} -EXPORT_SYMBOL_GPL(__nft_reg_track_cancel); - /* * Tables */ @@ -10172,16 +10120,9 @@ void nf_tables_trans_destroy_flush_work(struct net *net) } EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work); -static bool nft_expr_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - return false; -} - static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain) { const struct nft_expr *expr, *last; - struct nft_regs_track track = {}; unsigned int size, data_size; void *data, *data_boundary; struct nft_rule_dp *prule; @@ -10218,15 +10159,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha return -ENOMEM; size = 0; - track.last = nft_expr_last(rule); nft_rule_for_each_expr(expr, last, rule) { - track.cur = expr; - - if (nft_expr_reduce(&track, expr)) { - expr = track.cur; - continue; - } - if (WARN_ON_ONCE(data + size + expr->ops->size > data_boundary)) return -ENOMEM; diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index d550910aabec..a4ff781f334d 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -391,61 +391,12 @@ static int nft_bitwise_offload(struct nft_offload_ctx *ctx, return 0; } -static bool nft_bitwise_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_bitwise *priv = nft_expr_priv(expr); - const struct nft_bitwise *bitwise; - unsigned int regcount; - u8 dreg; - int i; - - if (!track->regs[priv->sreg].selector) - return false; - - bitwise = nft_expr_priv(track->regs[priv->dreg].selector); - if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector && - track->regs[priv->sreg].num_reg == 0 && - track->regs[priv->dreg].bitwise && - track->regs[priv->dreg].bitwise->ops == expr->ops && - priv->sreg == bitwise->sreg && - priv->sreg2 == bitwise->sreg2 && - priv->dreg == bitwise->dreg && - priv->op == bitwise->op && - priv->len == bitwise->len && - !memcmp(&priv->mask, &bitwise->mask, sizeof(priv->mask)) && - !memcmp(&priv->xor, &bitwise->xor, sizeof(priv->xor)) && - !memcmp(&priv->data, &bitwise->data, sizeof(priv->data))) { - track->cur = expr; - return true; - } - - if (track->regs[priv->sreg].bitwise || - track->regs[priv->sreg].num_reg != 0) { - nft_reg_track_cancel(track, priv->dreg, priv->len); - return false; - } - - if (priv->sreg != priv->dreg) { - nft_reg_track_update(track, track->regs[priv->sreg].selector, - priv->dreg, priv->len); - } - - dreg = priv->dreg; - regcount = DIV_ROUND_UP(priv->len, NFT_REG32_SIZE); - for (i = 0; i < regcount; i++, dreg++) - track->regs[dreg].bitwise = expr; - - return false; -} - static const struct nft_expr_ops nft_bitwise_ops = { .type = &nft_bitwise_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)), .eval = nft_bitwise_eval, .init = nft_bitwise_init, .dump = nft_bitwise_dump, - .reduce = nft_bitwise_reduce, .offload = nft_bitwise_offload, }; @@ -548,48 +499,12 @@ static int nft_bitwise_fast_offload(struct nft_offload_ctx *ctx, return 0; } -static bool nft_bitwise_fast_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr); - const struct nft_bitwise_fast_expr *bitwise; - - if (!track->regs[priv->sreg].selector) - return false; - - bitwise = nft_expr_priv(track->regs[priv->dreg].selector); - if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector && - track->regs[priv->dreg].bitwise && - track->regs[priv->dreg].bitwise->ops == expr->ops && - priv->sreg == bitwise->sreg && - priv->dreg == bitwise->dreg && - priv->mask == bitwise->mask && - priv->xor == bitwise->xor) { - track->cur = expr; - return true; - } - - if (track->regs[priv->sreg].bitwise) { - nft_reg_track_cancel(track, priv->dreg, NFT_REG32_SIZE); - return false; - } - - if (priv->sreg != priv->dreg) { - track->regs[priv->dreg].selector = - track->regs[priv->sreg].selector; - } - track->regs[priv->dreg].bitwise = expr; - - return false; -} - const struct nft_expr_ops nft_bitwise_fast_ops = { .type = &nft_bitwise_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise_fast_expr)), .eval = NULL, /* inlined */ .init = nft_bitwise_fast_init, .dump = nft_bitwise_fast_dump, - .reduce = nft_bitwise_fast_reduce, .offload = nft_bitwise_fast_offload, }; @@ -626,22 +541,3 @@ struct nft_expr_type nft_bitwise_type __read_mostly = { .maxattr = NFTA_BITWISE_MAX, .owner = THIS_MODULE, }; - -bool nft_expr_reduce_bitwise(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_expr *last = track->last; - const struct nft_expr *next; - - if (expr == last) - return false; - - next = nft_expr_next(expr); - if (next->ops == &nft_bitwise_ops) - return nft_bitwise_reduce(track, next); - else if (next->ops == &nft_bitwise_fast_ops) - return nft_bitwise_fast_reduce(track, next); - - return false; -} -EXPORT_SYMBOL_GPL(nft_expr_reduce_bitwise); diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index af9206a3afd1..744878773dac 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -170,23 +170,12 @@ nla_put_failure: return -1; } -static bool nft_byteorder_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - struct nft_byteorder *priv = nft_expr_priv(expr); - - nft_reg_track_cancel(track, priv->dreg, priv->len); - - return false; -} - static const struct nft_expr_ops nft_byteorder_ops = { .type = &nft_byteorder_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), .eval = nft_byteorder_eval, .init = nft_byteorder_init, .dump = nft_byteorder_dump, - .reduce = nft_byteorder_reduce, }; struct nft_expr_type nft_byteorder_type __read_mostly = { diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 2605f43737bc..b61dc9c3383e 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -190,7 +190,6 @@ static const struct nft_expr_ops nft_cmp_ops = { .eval = nft_cmp_eval, .init = nft_cmp_init, .dump = nft_cmp_dump, - .reduce = NFT_REDUCE_READONLY, .offload = nft_cmp_offload, }; @@ -282,7 +281,6 @@ const struct nft_expr_ops nft_cmp_fast_ops = { .eval = NULL, /* inlined */ .init = nft_cmp_fast_init, .dump = nft_cmp_fast_dump, - .reduce = NFT_REDUCE_READONLY, .offload = nft_cmp_fast_offload, }; @@ -376,7 +374,6 @@ const struct nft_expr_ops nft_cmp16_fast_ops = { .eval = NULL, /* inlined */ .init = nft_cmp16_fast_init, .dump = nft_cmp16_fast_dump, - .reduce = NFT_REDUCE_READONLY, .offload = nft_cmp16_fast_offload, }; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 08f620311b03..5021a01ba42c 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -778,14 +778,6 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = { static struct nft_expr_type nft_match_type; -static bool nft_match_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct xt_match *match = expr->ops->data; - - return strcmp(match->name, "comment") == 0; -} - static const struct nft_expr_ops * nft_match_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -828,7 +820,6 @@ nft_match_select_ops(const struct nft_ctx *ctx, ops->dump = nft_match_dump; ops->validate = nft_match_validate; ops->data = match; - ops->reduce = nft_match_reduce; matchsize = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize)); if (matchsize > NFT_MATCH_LARGE_THRESH) { @@ -918,7 +909,6 @@ nft_target_select_ops(const struct nft_ctx *ctx, ops->dump = nft_target_dump; ops->validate = nft_target_validate; ops->data = target; - ops->reduce = NFT_REDUCE_READONLY; if (family == NFPROTO_BRIDGE) ops->eval = nft_target_eval_bridge; diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index 657764774a2d..47d817983e81 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -258,7 +258,6 @@ static const struct nft_expr_ops nft_connlimit_ops = { .destroy_clone = nft_connlimit_destroy_clone, .dump = nft_connlimit_dump, .gc = nft_connlimit_gc, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_connlimit_type __read_mostly = { diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 169ae93688bc..3fa6369790f4 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -313,7 +313,6 @@ static const struct nft_expr_ops nft_counter_ops = { .destroy_clone = nft_counter_destroy, .dump = nft_counter_dump, .clone = nft_counter_clone, - .reduce = NFT_REDUCE_READONLY, .offload = nft_counter_offload, .offload_stats = nft_counter_offload_stats, }; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 6f2ae7cad731..b6abd5f8de92 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -698,29 +698,6 @@ nla_put_failure: return -1; } -static bool nft_ct_get_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_ct *priv = nft_expr_priv(expr); - const struct nft_ct *ct; - - if (!nft_reg_track_cmp(track, expr, priv->dreg)) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - ct = nft_expr_priv(track->regs[priv->dreg].selector); - if (priv->key != ct->key) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - if (!track->regs[priv->dreg].bitwise) - return true; - - return nft_expr_reduce_bitwise(track, expr); -} - static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { @@ -755,27 +732,8 @@ static const struct nft_expr_ops nft_ct_get_ops = { .init = nft_ct_get_init, .destroy = nft_ct_get_destroy, .dump = nft_ct_get_dump, - .reduce = nft_ct_get_reduce, }; -static bool nft_ct_set_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - int i; - - for (i = 0; i < NFT_REG32_NUM; i++) { - if (!track->regs[i].selector) - continue; - - if (track->regs[i].selector->ops != &nft_ct_get_ops) - continue; - - __nft_reg_track_cancel(track, i); - } - - return false; -} - #ifdef CONFIG_MITIGATION_RETPOLINE static const struct nft_expr_ops nft_ct_get_fast_ops = { .type = &nft_ct_type, @@ -784,7 +742,6 @@ static const struct nft_expr_ops nft_ct_get_fast_ops = { .init = nft_ct_get_init, .destroy = nft_ct_get_destroy, .dump = nft_ct_get_dump, - .reduce = nft_ct_set_reduce, }; #endif @@ -795,7 +752,6 @@ static const struct nft_expr_ops nft_ct_set_ops = { .init = nft_ct_set_init, .destroy = nft_ct_set_destroy, .dump = nft_ct_set_dump, - .reduce = nft_ct_set_reduce, }; #ifdef CONFIG_NF_CONNTRACK_ZONES @@ -806,7 +762,6 @@ static const struct nft_expr_ops nft_ct_set_zone_ops = { .init = nft_ct_set_init, .destroy = nft_ct_set_destroy, .dump = nft_ct_set_dump, - .reduce = nft_ct_set_reduce, }; #endif @@ -876,7 +831,6 @@ static const struct nft_expr_ops nft_notrack_ops = { .type = &nft_notrack_type, .size = NFT_EXPR_SIZE(0), .eval = nft_notrack_eval, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_notrack_type __read_mostly = { diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c index 0573f96ce079..06866799e946 100644 --- a/net/netfilter/nft_dup_netdev.c +++ b/net/netfilter/nft_dup_netdev.c @@ -80,7 +80,6 @@ static const struct nft_expr_ops nft_dup_netdev_ops = { .eval = nft_dup_netdev_eval, .init = nft_dup_netdev_init, .dump = nft_dup_netdev_dump, - .reduce = NFT_REDUCE_READONLY, .offload = nft_dup_netdev_offload, .offload_action = nft_dup_netdev_offload_action, }; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 7807d8129664..6bff6287e7d5 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -421,7 +421,6 @@ static const struct nft_expr_ops nft_dynset_ops = { .activate = nft_dynset_activate, .deactivate = nft_dynset_deactivate, .dump = nft_dynset_dump, - .reduce = NFT_REDUCE_READONLY, }; struct nft_expr_type nft_dynset_type __read_mostly = { diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 7eedf4e3ae9c..5f01269a49bd 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -702,40 +702,12 @@ static int nft_exthdr_dump_strip(struct sk_buff *skb, return nft_exthdr_dump_common(skb, priv); } -static bool nft_exthdr_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_exthdr *priv = nft_expr_priv(expr); - const struct nft_exthdr *exthdr; - - if (!nft_reg_track_cmp(track, expr, priv->dreg)) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - exthdr = nft_expr_priv(track->regs[priv->dreg].selector); - if (priv->type != exthdr->type || - priv->op != exthdr->op || - priv->flags != exthdr->flags || - priv->offset != exthdr->offset || - priv->len != exthdr->len) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - if (!track->regs[priv->dreg].bitwise) - return true; - - return nft_expr_reduce_bitwise(track, expr); -} - static const struct nft_expr_ops nft_exthdr_ipv6_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), .eval = nft_exthdr_ipv6_eval, .init = nft_exthdr_init, .dump = nft_exthdr_dump, - .reduce = nft_exthdr_reduce, }; static const struct nft_expr_ops nft_exthdr_ipv4_ops = { @@ -744,7 +716,6 @@ static const struct nft_expr_ops nft_exthdr_ipv4_ops = { .eval = nft_exthdr_ipv4_eval, .init = nft_exthdr_ipv4_init, .dump = nft_exthdr_dump, - .reduce = nft_exthdr_reduce, }; static const struct nft_expr_ops nft_exthdr_tcp_ops = { @@ -753,7 +724,6 @@ static const struct nft_expr_ops nft_exthdr_tcp_ops = { .eval = nft_exthdr_tcp_eval, .init = nft_exthdr_init, .dump = nft_exthdr_dump, - .reduce = nft_exthdr_reduce, }; static const struct nft_expr_ops nft_exthdr_tcp_set_ops = { @@ -762,7 +732,6 @@ static const struct nft_expr_ops nft_exthdr_tcp_set_ops = { .eval = nft_exthdr_tcp_set_eval, .init = nft_exthdr_tcp_set_init, .dump = nft_exthdr_dump_set, - .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops nft_exthdr_tcp_strip_ops = { @@ -771,7 +740,6 @@ static const struct nft_expr_ops nft_exthdr_tcp_strip_ops = { .eval = nft_exthdr_tcp_strip_eval, .init = nft_exthdr_tcp_strip_init, .dump = nft_exthdr_dump_strip, - .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops nft_exthdr_sctp_ops = { @@ -780,7 +748,6 @@ static const struct nft_expr_ops nft_exthdr_sctp_ops = { .eval = nft_exthdr_sctp_eval, .init = nft_exthdr_init, .dump = nft_exthdr_dump, - .reduce = nft_exthdr_reduce, }; #ifdef CONFIG_NFT_EXTHDR_DCCP @@ -790,7 +757,6 @@ static const struct nft_expr_ops nft_exthdr_dccp_ops = { .eval = nft_exthdr_dccp_eval, .init = nft_exthdr_dccp_init, .dump = nft_exthdr_dump, - .reduce = nft_exthdr_reduce, }; #endif diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index 96e02a83c045..f7dc0e54375f 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -162,48 +162,6 @@ void nft_fib_store_result(void *reg, const struct nft_fib *priv, } EXPORT_SYMBOL_GPL(nft_fib_store_result); -bool nft_fib_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_fib *priv = nft_expr_priv(expr); - unsigned int len = NFT_REG32_SIZE; - const struct nft_fib *fib; - - switch (priv->result) { - case NFT_FIB_RESULT_OIF: - break; - case NFT_FIB_RESULT_OIFNAME: - if (priv->flags & NFTA_FIB_F_PRESENT) - len = NFT_REG32_SIZE; - else - len = IFNAMSIZ; - break; - case NFT_FIB_RESULT_ADDRTYPE: - break; - default: - WARN_ON_ONCE(1); - break; - } - - if (!nft_reg_track_cmp(track, expr, priv->dreg)) { - nft_reg_track_update(track, expr, priv->dreg, len); - return false; - } - - fib = nft_expr_priv(track->regs[priv->dreg].selector); - if (priv->result != fib->result || - priv->flags != fib->flags) { - nft_reg_track_update(track, expr, priv->dreg, len); - return false; - } - - if (!track->regs[priv->dreg].bitwise) - return true; - - return false; -} -EXPORT_SYMBOL_GPL(nft_fib_reduce); - MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Query routing table from nftables"); MODULE_AUTHOR("Florian Westphal "); diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c index 666a3741d20b..a88d44e163d1 100644 --- a/net/netfilter/nft_fib_inet.c +++ b/net/netfilter/nft_fib_inet.c @@ -49,7 +49,6 @@ static const struct nft_expr_ops nft_fib_inet_ops = { .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, - .reduce = nft_fib_reduce, }; static struct nft_expr_type nft_fib_inet_type __read_mostly = { diff --git a/net/netfilter/nft_fib_netdev.c b/net/netfilter/nft_fib_netdev.c index 9121ec64e918..3f3478abd845 100644 --- a/net/netfilter/nft_fib_netdev.c +++ b/net/netfilter/nft_fib_netdev.c @@ -58,7 +58,6 @@ static const struct nft_expr_ops nft_fib_netdev_ops = { .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, - .reduce = nft_fib_reduce, }; static struct nft_expr_type nft_fib_netdev_type __read_mostly = { diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 179d0e59e2b5..32b4281038dd 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -225,7 +225,6 @@ static const struct nft_expr_ops nft_flow_offload_ops = { .destroy = nft_flow_offload_destroy, .validate = nft_flow_offload_validate, .dump = nft_flow_offload_dump, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_flow_offload_type __read_mostly = { diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 152a9fb4d23a..ad48dcd45abe 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -218,7 +218,6 @@ static const struct nft_expr_ops nft_fwd_neigh_netdev_ops = { .init = nft_fwd_neigh_init, .dump = nft_fwd_neigh_dump, .validate = nft_fwd_validate, - .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops nft_fwd_netdev_ops = { @@ -228,7 +227,6 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = { .init = nft_fwd_netdev_init, .dump = nft_fwd_netdev_dump, .validate = nft_fwd_validate, - .reduce = NFT_REDUCE_READONLY, .offload = nft_fwd_netdev_offload, .offload_action = nft_fwd_netdev_offload_action, }; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 5d034bbb6913..1cf41e0a0e0c 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -166,16 +166,6 @@ nla_put_failure: return -1; } -static bool nft_jhash_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_jhash *priv = nft_expr_priv(expr); - - nft_reg_track_cancel(track, priv->dreg, sizeof(u32)); - - return false; -} - static int nft_symhash_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { @@ -196,30 +186,6 @@ nla_put_failure: return -1; } -static bool nft_symhash_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - struct nft_symhash *priv = nft_expr_priv(expr); - struct nft_symhash *symhash; - - if (!nft_reg_track_cmp(track, expr, priv->dreg)) { - nft_reg_track_update(track, expr, priv->dreg, sizeof(u32)); - return false; - } - - symhash = nft_expr_priv(track->regs[priv->dreg].selector); - if (priv->offset != symhash->offset || - priv->modulus != symhash->modulus) { - nft_reg_track_update(track, expr, priv->dreg, sizeof(u32)); - return false; - } - - if (!track->regs[priv->dreg].bitwise) - return true; - - return false; -} - static struct nft_expr_type nft_hash_type; static const struct nft_expr_ops nft_jhash_ops = { .type = &nft_hash_type, @@ -227,7 +193,6 @@ static const struct nft_expr_ops nft_jhash_ops = { .eval = nft_jhash_eval, .init = nft_jhash_init, .dump = nft_jhash_dump, - .reduce = nft_jhash_reduce, }; static const struct nft_expr_ops nft_symhash_ops = { @@ -236,7 +201,6 @@ static const struct nft_expr_ops nft_symhash_ops = { .eval = nft_symhash_eval, .init = nft_symhash_init, .dump = nft_symhash_dump, - .reduce = nft_symhash_reduce, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 02ee5fb69871..37c29947b380 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -320,17 +320,6 @@ static bool nft_immediate_offload_action(const struct nft_expr *expr) return false; } -static bool nft_immediate_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_immediate_expr *priv = nft_expr_priv(expr); - - if (priv->dreg != NFT_REG_VERDICT) - nft_reg_track_cancel(track, priv->dreg, priv->dlen); - - return false; -} - static const struct nft_expr_ops nft_imm_ops = { .type = &nft_imm_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), @@ -341,7 +330,6 @@ static const struct nft_expr_ops nft_imm_ops = { .destroy = nft_immediate_destroy, .dump = nft_immediate_dump, .validate = nft_immediate_validate, - .reduce = nft_immediate_reduce, .offload = nft_immediate_offload, .offload_action = nft_immediate_offload_action, }; diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c index de1b6066bfa8..e845779268d3 100644 --- a/net/netfilter/nft_last.c +++ b/net/netfilter/nft_last.c @@ -125,7 +125,6 @@ static const struct nft_expr_ops nft_last_ops = { .destroy = nft_last_destroy, .clone = nft_last_clone, .dump = nft_last_dump, - .reduce = NFT_REDUCE_READONLY, }; struct nft_expr_type nft_last_type __read_mostly = { diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 21d26b79b460..0daeb0b23c20 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -243,7 +243,6 @@ static const struct nft_expr_ops nft_limit_pkts_ops = { .destroy = nft_limit_pkts_destroy, .clone = nft_limit_pkts_clone, .dump = nft_limit_pkts_dump, - .reduce = NFT_REDUCE_READONLY, }; static void nft_limit_bytes_eval(const struct nft_expr *expr, @@ -299,7 +298,6 @@ static const struct nft_expr_ops nft_limit_bytes_ops = { .dump = nft_limit_bytes_dump, .clone = nft_limit_bytes_clone, .destroy = nft_limit_bytes_destroy, - .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index bf01cf8a8907..da0c0d1c9cea 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -235,7 +235,6 @@ static const struct nft_expr_ops nft_log_ops = { .init = nft_log_init, .destroy = nft_log_destroy, .dump = nft_log_dump, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_log_type __read_mostly = { diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index fc2d7c5d83c8..e4e619027542 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -266,17 +266,6 @@ static int nft_lookup_validate(const struct nft_ctx *ctx, return 0; } -static bool nft_lookup_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_lookup *priv = nft_expr_priv(expr); - - if (priv->set->flags & NFT_SET_MAP) - nft_reg_track_cancel(track, priv->dreg, priv->set->dlen); - - return false; -} - static const struct nft_expr_ops nft_lookup_ops = { .type = &nft_lookup_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), @@ -287,7 +276,6 @@ static const struct nft_expr_ops nft_lookup_ops = { .destroy = nft_lookup_destroy, .dump = nft_lookup_dump, .validate = nft_lookup_validate, - .reduce = nft_lookup_reduce, }; struct nft_expr_type nft_lookup_type __read_mostly = { diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 868bd4d73555..2b01128737a3 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -143,7 +143,6 @@ static const struct nft_expr_ops nft_masq_ipv4_ops = { .destroy = nft_masq_ipv4_destroy, .dump = nft_masq_dump, .validate = nft_masq_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_masq_ipv4_type __read_mostly = { @@ -171,7 +170,6 @@ static const struct nft_expr_ops nft_masq_ipv6_ops = { .destroy = nft_masq_ipv6_destroy, .dump = nft_masq_dump, .validate = nft_masq_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_masq_ipv6_type __read_mostly = { @@ -213,7 +211,6 @@ static const struct nft_expr_ops nft_masq_inet_ops = { .destroy = nft_masq_inet_destroy, .dump = nft_masq_dump, .validate = nft_masq_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_masq_inet_type __read_mostly = { diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 05cd1e6e6a2f..983158274c68 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -742,60 +742,16 @@ static int nft_meta_get_offload(struct nft_offload_ctx *ctx, return 0; } -bool nft_meta_get_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_meta *priv = nft_expr_priv(expr); - const struct nft_meta *meta; - - if (!nft_reg_track_cmp(track, expr, priv->dreg)) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - meta = nft_expr_priv(track->regs[priv->dreg].selector); - if (priv->key != meta->key || - priv->dreg != meta->dreg) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - if (!track->regs[priv->dreg].bitwise) - return true; - - return nft_expr_reduce_bitwise(track, expr); -} -EXPORT_SYMBOL_GPL(nft_meta_get_reduce); - static const struct nft_expr_ops nft_meta_get_ops = { .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_get_eval, .init = nft_meta_get_init, .dump = nft_meta_get_dump, - .reduce = nft_meta_get_reduce, .validate = nft_meta_get_validate, .offload = nft_meta_get_offload, }; -static bool nft_meta_set_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - int i; - - for (i = 0; i < NFT_REG32_NUM; i++) { - if (!track->regs[i].selector) - continue; - - if (track->regs[i].selector->ops != &nft_meta_get_ops) - continue; - - __nft_reg_track_cancel(track, i); - } - - return false; -} - static const struct nft_expr_ops nft_meta_set_ops = { .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), @@ -803,7 +759,6 @@ static const struct nft_expr_ops nft_meta_set_ops = { .init = nft_meta_set_init, .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, - .reduce = nft_meta_set_reduce, .validate = nft_meta_set_validate, }; diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 6e21f72c5b57..e32cd9fbc7c2 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -320,7 +320,6 @@ static const struct nft_expr_ops nft_nat_ops = { .destroy = nft_nat_destroy, .dump = nft_nat_dump, .validate = nft_nat_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_nat_type __read_mostly = { @@ -351,7 +350,6 @@ static const struct nft_expr_ops nft_nat_inet_ops = { .destroy = nft_nat_destroy, .dump = nft_nat_dump, .validate = nft_nat_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_inet_nat_type __read_mostly = { diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index bd058babfc82..06e87dfd76e7 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -84,16 +84,6 @@ err: return err; } -static bool nft_ng_inc_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_ng_inc *priv = nft_expr_priv(expr); - - nft_reg_track_cancel(track, priv->dreg, NFT_REG32_SIZE); - - return false; -} - static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, u32 modulus, enum nft_ng_types type, u32 offset) { @@ -178,16 +168,6 @@ static int nft_ng_random_dump(struct sk_buff *skb, priv->offset); } -static bool nft_ng_random_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_ng_random *priv = nft_expr_priv(expr); - - nft_reg_track_cancel(track, priv->dreg, NFT_REG32_SIZE); - - return false; -} - static struct nft_expr_type nft_ng_type; static const struct nft_expr_ops nft_ng_inc_ops = { .type = &nft_ng_type, @@ -196,7 +176,6 @@ static const struct nft_expr_ops nft_ng_inc_ops = { .init = nft_ng_inc_init, .destroy = nft_ng_inc_destroy, .dump = nft_ng_inc_dump, - .reduce = nft_ng_inc_reduce, }; static const struct nft_expr_ops nft_ng_random_ops = { @@ -205,7 +184,6 @@ static const struct nft_expr_ops nft_ng_random_ops = { .eval = nft_ng_random_eval, .init = nft_ng_random_init, .dump = nft_ng_random_dump, - .reduce = nft_ng_random_reduce, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 1a62e384766a..633cce69568f 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -123,7 +123,6 @@ static const struct nft_expr_ops nft_objref_ops = { .deactivate = nft_objref_deactivate, .dump = nft_objref_dump, .validate = nft_objref_validate, - .reduce = NFT_REDUCE_READONLY, }; struct nft_objref_map { @@ -245,7 +244,6 @@ static const struct nft_expr_ops nft_objref_map_ops = { .destroy = nft_objref_map_destroy, .dump = nft_objref_map_dump, .validate = nft_objref_map_validate, - .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index 1c0b493ef0a9..39ccd67ed265 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -127,30 +127,6 @@ static int nft_osf_validate(const struct nft_ctx *ctx, return nft_chain_validate_hooks(ctx->chain, hooks); } -static bool nft_osf_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - struct nft_osf *priv = nft_expr_priv(expr); - struct nft_osf *osf; - - if (!nft_reg_track_cmp(track, expr, priv->dreg)) { - nft_reg_track_update(track, expr, priv->dreg, NFT_OSF_MAXGENRELEN); - return false; - } - - osf = nft_expr_priv(track->regs[priv->dreg].selector); - if (priv->flags != osf->flags || - priv->ttl != osf->ttl) { - nft_reg_track_update(track, expr, priv->dreg, NFT_OSF_MAXGENRELEN); - return false; - } - - if (!track->regs[priv->dreg].bitwise) - return true; - - return false; -} - static struct nft_expr_type nft_osf_type; static const struct nft_expr_ops nft_osf_op = { .eval = nft_osf_eval, @@ -159,7 +135,6 @@ static const struct nft_expr_ops nft_osf_op = { .dump = nft_osf_dump, .type = &nft_osf_type, .validate = nft_osf_validate, - .reduce = nft_osf_reduce, }; static struct nft_expr_type nft_osf_type __read_mostly = { diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index b0214418f75a..973d56af03ff 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -250,31 +250,6 @@ nla_put_failure: return -1; } -static bool nft_payload_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_payload *priv = nft_expr_priv(expr); - const struct nft_payload *payload; - - if (!nft_reg_track_cmp(track, expr, priv->dreg)) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - payload = nft_expr_priv(track->regs[priv->dreg].selector); - if (priv->base != payload->base || - priv->offset != payload->offset || - priv->len != payload->len) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - if (!track->regs[priv->dreg].bitwise) - return true; - - return nft_expr_reduce_bitwise(track, expr); -} - static bool nft_payload_offload_mask(struct nft_offload_reg *reg, u32 priv_len, u32 field_len) { @@ -578,7 +553,6 @@ static const struct nft_expr_ops nft_payload_ops = { .eval = nft_payload_eval, .init = nft_payload_init, .dump = nft_payload_dump, - .reduce = nft_payload_reduce, .offload = nft_payload_offload, }; @@ -588,7 +562,6 @@ const struct nft_expr_ops nft_payload_fast_ops = { .eval = nft_payload_eval, .init = nft_payload_init, .dump = nft_payload_dump, - .reduce = nft_payload_reduce, .offload = nft_payload_offload, }; @@ -1012,32 +985,12 @@ nla_put_failure: return -1; } -static bool nft_payload_set_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - int i; - - for (i = 0; i < NFT_REG32_NUM; i++) { - if (!track->regs[i].selector) - continue; - - if (track->regs[i].selector->ops != &nft_payload_ops && - track->regs[i].selector->ops != &nft_payload_fast_ops) - continue; - - __nft_reg_track_cancel(track, i); - } - - return false; -} - static const struct nft_expr_ops nft_payload_set_ops = { .type = &nft_payload_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_payload_set)), .eval = nft_payload_set_eval, .init = nft_payload_set_init, .dump = nft_payload_set_dump, - .reduce = nft_payload_set_reduce, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index 344fe311878f..8eb13a02942e 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -191,7 +191,6 @@ static const struct nft_expr_ops nft_queue_ops = { .init = nft_queue_init, .dump = nft_queue_dump, .validate = nft_queue_validate, - .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops nft_queue_sreg_ops = { @@ -201,7 +200,6 @@ static const struct nft_expr_ops nft_queue_sreg_ops = { .init = nft_queue_sreg_init, .dump = nft_queue_sreg_dump, .validate = nft_queue_validate, - .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index cb6c0e04ff67..bb3cf3d16e79 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -266,7 +266,6 @@ static const struct nft_expr_ops nft_quota_ops = { .destroy = nft_quota_destroy, .clone = nft_quota_clone, .dump = nft_quota_dump, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_quota_type __read_mostly = { diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index ea382f7bbd78..cbb02644b836 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -138,7 +138,6 @@ static const struct nft_expr_ops nft_range_ops = { .eval = nft_range_eval, .init = nft_range_init, .dump = nft_range_dump, - .reduce = NFT_REDUCE_READONLY, }; struct nft_expr_type nft_range_type __read_mostly = { diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 95eedad85c83..58ae802db8f5 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -146,7 +146,6 @@ static const struct nft_expr_ops nft_redir_ipv4_ops = { .destroy = nft_redir_ipv4_destroy, .dump = nft_redir_dump, .validate = nft_redir_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_redir_ipv4_type __read_mostly = { @@ -174,7 +173,6 @@ static const struct nft_expr_ops nft_redir_ipv6_ops = { .destroy = nft_redir_ipv6_destroy, .dump = nft_redir_dump, .validate = nft_redir_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_redir_ipv6_type __read_mostly = { @@ -203,7 +201,6 @@ static const struct nft_expr_ops nft_redir_inet_ops = { .destroy = nft_redir_inet_destroy, .dump = nft_redir_dump, .validate = nft_redir_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_redir_inet_type __read_mostly = { diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index 49020e67304a..dcae83ddc32e 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -79,7 +79,6 @@ static const struct nft_expr_ops nft_reject_inet_ops = { .init = nft_reject_init, .dump = nft_reject_dump, .validate = nft_reject_inet_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_reject_inet_type __read_mostly = { diff --git a/net/netfilter/nft_reject_netdev.c b/net/netfilter/nft_reject_netdev.c index 2558ce1505d9..b53e81e4ca75 100644 --- a/net/netfilter/nft_reject_netdev.c +++ b/net/netfilter/nft_reject_netdev.c @@ -158,7 +158,6 @@ static const struct nft_expr_ops nft_reject_netdev_ops = { .init = nft_reject_init, .dump = nft_reject_dump, .validate = nft_reject_netdev_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_reject_netdev_type __read_mostly = { diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index dc50b9a5bd68..ad527f3596c0 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -195,7 +195,6 @@ static const struct nft_expr_ops nft_rt_get_ops = { .init = nft_rt_get_init, .dump = nft_rt_get_dump, .validate = nft_rt_validate, - .reduce = NFT_REDUCE_READONLY, }; struct nft_expr_type nft_rt_type __read_mostly = { diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 36affbb697c2..c55a1310226a 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -249,31 +249,6 @@ static int nft_socket_dump(struct sk_buff *skb, return 0; } -static bool nft_socket_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_socket *priv = nft_expr_priv(expr); - const struct nft_socket *socket; - - if (!nft_reg_track_cmp(track, expr, priv->dreg)) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - socket = nft_expr_priv(track->regs[priv->dreg].selector); - if (priv->key != socket->key || - priv->dreg != socket->dreg || - priv->level != socket->level) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - if (!track->regs[priv->dreg].bitwise) - return true; - - return nft_expr_reduce_bitwise(track, expr); -} - static int nft_socket_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -296,7 +271,6 @@ static const struct nft_expr_ops nft_socket_ops = { .init = nft_socket_init, .dump = nft_socket_dump, .validate = nft_socket_validate, - .reduce = nft_socket_reduce, }; static struct nft_expr_type nft_socket_type __read_mostly = { diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c index b71ef18b0e8c..8e452a874969 100644 --- a/net/netfilter/nft_synproxy.c +++ b/net/netfilter/nft_synproxy.c @@ -292,7 +292,6 @@ static const struct nft_expr_ops nft_synproxy_ops = { .dump = nft_synproxy_dump, .type = &nft_synproxy_type, .validate = nft_synproxy_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_synproxy_type __read_mostly = { diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index 50481280abd2..f2101af8c867 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -331,7 +331,6 @@ static const struct nft_expr_ops nft_tproxy_ops = { .init = nft_tproxy_init, .destroy = nft_tproxy_destroy, .dump = nft_tproxy_dump, - .reduce = NFT_REDUCE_READONLY, .validate = nft_tproxy_validate, }; diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index a12486ae089d..f5cadba91417 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -124,31 +124,6 @@ nla_put_failure: return -1; } -static bool nft_tunnel_get_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_tunnel *priv = nft_expr_priv(expr); - const struct nft_tunnel *tunnel; - - if (!nft_reg_track_cmp(track, expr, priv->dreg)) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - tunnel = nft_expr_priv(track->regs[priv->dreg].selector); - if (priv->key != tunnel->key || - priv->dreg != tunnel->dreg || - priv->mode != tunnel->mode) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - if (!track->regs[priv->dreg].bitwise) - return true; - - return false; -} - static struct nft_expr_type nft_tunnel_type; static const struct nft_expr_ops nft_tunnel_get_ops = { .type = &nft_tunnel_type, @@ -156,7 +131,6 @@ static const struct nft_expr_ops nft_tunnel_get_ops = { .eval = nft_tunnel_get_eval, .init = nft_tunnel_get_init, .dump = nft_tunnel_get_dump, - .reduce = nft_tunnel_get_reduce, }; static struct nft_expr_type nft_tunnel_type __read_mostly = { diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c index 3210cfc966ab..7ffe6a2690d1 100644 --- a/net/netfilter/nft_xfrm.c +++ b/net/netfilter/nft_xfrm.c @@ -259,32 +259,6 @@ static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *e return nft_chain_validate_hooks(ctx->chain, hooks); } -static bool nft_xfrm_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_xfrm *priv = nft_expr_priv(expr); - const struct nft_xfrm *xfrm; - - if (!nft_reg_track_cmp(track, expr, priv->dreg)) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - xfrm = nft_expr_priv(track->regs[priv->dreg].selector); - if (priv->key != xfrm->key || - priv->dreg != xfrm->dreg || - priv->dir != xfrm->dir || - priv->spnum != xfrm->spnum) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - if (!track->regs[priv->dreg].bitwise) - return true; - - return nft_expr_reduce_bitwise(track, expr); -} - static struct nft_expr_type nft_xfrm_type; static const struct nft_expr_ops nft_xfrm_get_ops = { .type = &nft_xfrm_type, @@ -293,7 +267,6 @@ static const struct nft_expr_ops nft_xfrm_get_ops = { .init = nft_xfrm_get_init, .dump = nft_xfrm_get_dump, .validate = nft_xfrm_validate, - .reduce = nft_xfrm_reduce, }; static struct nft_expr_type nft_xfrm_type __read_mostly = { From b0249c0d41b306ddd79de58ca7fea543ab5e7a2e Mon Sep 17 00:00:00 2001 From: Dimitri Daskalakis Date: Tue, 24 Feb 2026 14:46:58 -0800 Subject: [PATCH 0050/1561] selftests: net: py: Add rand_ports helper method Certain tests need a unique set of ports. Successive calls to the existing rand_port method may return a duplicate port, resulting in test flakiness. The new helper keeps sockets open while building a list of ephemeral ports, thus the kernel enforces their uniqueness. Signed-off-by: Dimitri Daskalakis Link: https://patch.msgid.link/20260224224659.1507082-2-dimitri.daskalakis1@gmail.com Signed-off-by: Jakub Kicinski --- .../drivers/net/hw/lib/py/__init__.py | 5 ++-- .../selftests/drivers/net/lib/py/__init__.py | 4 ++-- .../testing/selftests/net/lib/py/__init__.py | 5 ++-- tools/testing/selftests/net/lib/py/utils.py | 24 ++++++++++++++++--- 4 files changed, 29 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py index d5d247eca6b7..1971577d47e9 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -22,7 +22,8 @@ try: NlError, RtnlFamily, DevlinkFamily, PSPFamily from net.lib.py import CmdExitFailure from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ - fd_read_timeout, ip, rand_port, wait_port_listen, wait_file, tool + fd_read_timeout, ip, rand_port, rand_ports, wait_port_listen, \ + wait_file, tool from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ ksft_setup, ksft_variants, KsftNamedVariant @@ -36,7 +37,7 @@ try: "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily", "CmdExitFailure", "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool", - "fd_read_timeout", "ip", "rand_port", + "fd_read_timeout", "ip", "rand_port", "rand_ports", "wait_port_listen", "wait_file", "tool", "KsftSkipEx", "KsftFailEx", "KsftXfailEx", "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run", diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index 8b75faa9af6d..5872d114f142 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -22,7 +22,7 @@ try: NlError, RtnlFamily, DevlinkFamily, PSPFamily from net.lib.py import CmdExitFailure from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ - fd_read_timeout, ip, rand_port, wait_port_listen, wait_file + fd_read_timeout, ip, rand_port, rand_ports, wait_port_listen, wait_file from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ ksft_setup, ksft_variants, KsftNamedVariant @@ -34,7 +34,7 @@ try: "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily", "CmdExitFailure", "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool", - "fd_read_timeout", "ip", "rand_port", + "fd_read_timeout", "ip", "rand_port", "rand_ports", "wait_port_listen", "wait_file", "KsftSkipEx", "KsftFailEx", "KsftXfailEx", "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run", diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index f528b67639de..a584e7f806a4 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -13,7 +13,8 @@ from .ksft import KsftFailEx, KsftSkipEx, KsftXfailEx, ksft_pr, ksft_eq, \ from .netns import NetNS, NetNSEnter from .nsim import NetdevSim, NetdevSimDev from .utils import CmdExitFailure, fd_read_timeout, cmd, bkg, defer, \ - bpftool, ip, ethtool, bpftrace, rand_port, wait_port_listen, wait_file, tool + bpftool, ip, ethtool, bpftrace, rand_port, rand_ports, wait_port_listen, \ + wait_file, tool from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily from .ynl import NetshaperFamily, DevlinkFamily, PSPFamily @@ -25,7 +26,7 @@ __all__ = ["KSRC", "ksft_run", "ksft_exit", "ksft_variants", "KsftNamedVariant", "NetNS", "NetNSEnter", "CmdExitFailure", "fd_read_timeout", "cmd", "bkg", "defer", - "bpftool", "ip", "ethtool", "bpftrace", "rand_port", + "bpftool", "ip", "ethtool", "bpftrace", "rand_port", "rand_ports", "wait_port_listen", "wait_file", "tool", "NetdevSim", "NetdevSimDev", "NetshaperFamily", "DevlinkFamily", "PSPFamily", "NlError", diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 09535346c13d..5ffd9fb2a8ce 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -281,9 +281,27 @@ def rand_port(stype=socket.SOCK_STREAM): """ Get a random unprivileged port. """ - with socket.socket(socket.AF_INET6, stype) as s: - s.bind(("", 0)) - return s.getsockname()[1] + return rand_ports(1, stype)[0] + + +def rand_ports(count, stype=socket.SOCK_STREAM): + """ + Get a unique set of random unprivileged ports. + """ + sockets = [] + ports = [] + + try: + for _ in range(count): + s = socket.socket(socket.AF_INET6, stype) + sockets.append(s) + s.bind(("", 0)) + ports.append(s.getsockname()[1]) + finally: + for s in sockets: + s.close() + + return ports def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadline=5): From 19c3a2a81d2bca7ca2187ea3686c4bdfd147115b Mon Sep 17 00:00:00 2001 From: Dimitri Daskalakis Date: Tue, 24 Feb 2026 14:46:59 -0800 Subject: [PATCH 0051/1561] selftests: drv-net: rss: Generate unique ports for RSS context tests The RSS ctx tests rely on NFC rules with unique ports to steer packets to the correct ctx. This updates the test to use the new rand_ports() helper to guarantee the ports are unique. Manual testing shows that generating 32 ports with the existing method would result in at least one duplicate 4% of the time. Signed-off-by: Dimitri Daskalakis Link: https://patch.msgid.link/20260224224659.1507082-3-dimitri.daskalakis1@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/rss_ctx.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index ed7e405682f0..cbeb3a38fdfe 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -9,7 +9,7 @@ from lib.py import ksft_eq, ksft_ne, ksft_ge, ksft_in, ksft_lt, ksft_true, ksft_ from lib.py import NetDrvEpEnv from lib.py import EthtoolFamily, NetdevFamily from lib.py import KsftSkipEx, KsftFailEx -from lib.py import rand_port +from lib.py import rand_port, rand_ports from lib.py import ethtool, ip, defer, GenerateTraffic, CmdExitFailure @@ -452,7 +452,7 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): except: raise KsftSkipEx("Not enough queues for the test") - ports = [] + ports = rand_ports(ctx_cnt) # Use queues 0 and 1 for normal traffic ethtool(f"-X {cfg.ifname} equal 2") @@ -486,7 +486,6 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): ksft_eq(min(data['rss-indirection-table']), 2 + i * 2, "Unexpected context cfg: " + str(data)) ksft_eq(max(data['rss-indirection-table']), 2 + i * 2 + 1, "Unexpected context cfg: " + str(data)) - ports.append(rand_port()) flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {ports[i]} context {ctx_id}" ntuple = ethtool_create(cfg, "-N", flow) defer(ethtool, f"-N {cfg.ifname} delete {ntuple}") @@ -542,7 +541,7 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4): ntuple = [] ctx = [] - ports = [] + ports = rand_ports(ctx_cnt) def remove_ctx(idx): ntuple[idx].exec() @@ -574,7 +573,6 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4): ctx_id = ethtool_create(cfg, "-X", f"context new start {2 + i * 2} equal 2") ctx.append(defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")) - ports.append(rand_port()) flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {ports[i]} context {ctx_id}" ntuple_id = ethtool_create(cfg, "-N", flow) ntuple.append(defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")) @@ -788,9 +786,10 @@ def test_rss_default_context_rule(cfg): ethtool(f"-N {cfg.ifname} {flow_generic}") defer(ethtool, f"-N {cfg.ifname} delete 1") + ports = rand_ports(2) # Specific high-priority rule for a random port that should stay on context 0. # Assign loc 0 so it is evaluated before the generic rule. - port_main = rand_port() + port_main = ports[0] flow_main = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port_main} context 0 loc 0" ethtool(f"-N {cfg.ifname} {flow_main}") defer(ethtool, f"-N {cfg.ifname} delete 0") @@ -803,7 +802,7 @@ def test_rss_default_context_rule(cfg): 'empty' : (2, 3) }) # And that traffic for any other port is steered to the new context - port_other = rand_port() + port_other = ports[1] _send_traffic_check(cfg, port_other, f"context {ctx_id}", { 'target': (2, 3), 'noise' : (0, 1) }) From be14d13625c9b070c33c423026b598ed65695225 Mon Sep 17 00:00:00 2001 From: kexinsun Date: Tue, 24 Feb 2026 10:18:21 +0800 Subject: [PATCH 0052/1561] xfrm: update outdated comment The function __xfrm4_bundle_create() was refactored into xfrm_bundle_create() (among others) by commit 25ee3286dcbc ("xfrm: Merge common code into xfrm_bundle_create"). The responsibility for setting dst->obsolete to DST_OBSOLETE_FORCE_CHK now lives in xfrm_bundle_create(). Update the comment accordingly. Signed-off-by: kexinsun Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 62486f866975..7bcb6583e84c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3910,7 +3910,7 @@ EXPORT_SYMBOL(__xfrm_route_forward); static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) { - /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete + /* Code (such as xfrm_bundle_create()) sets dst->obsolete * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to * get validated by dst_ops->check on every use. We do this * because when a normal route referenced by an XFRM dst is From 376cf4227401f538cce799aee91bc33fb008d388 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 23 Feb 2026 22:41:41 +0200 Subject: [PATCH 0053/1561] net/mlx5e: Make mlx5e_rq_param naming consistent This structure is used under different names: rq_param, rq_params, param, rqp. Refactor the code to use a single name: rq_param. This patch has no functional change. Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260223204155.1783580-2-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 5 +- .../ethernet/mellanox/mlx5/core/en/params.c | 27 +++++----- .../net/ethernet/mellanox/mlx5/core/en/ptp.c | 4 +- .../mellanox/mlx5/core/en/xsk/setup.c | 9 ++-- .../net/ethernet/mellanox/mlx5/core/en_main.c | 50 +++++++++---------- 5 files changed, 51 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index ea2cd1f5d1d0..550426979627 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1060,13 +1060,14 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv); struct mlx5e_xsk_param; struct mlx5e_rq_param; -int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, +int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *rq_param, struct mlx5e_xsk_param *xsk, int node, u16 q_counter, struct mlx5e_rq *rq); #define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time); void mlx5e_close_rq(struct mlx5e_rq *rq); -int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param, u16 q_counter); +int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *rq_param, + u16 q_counter); void mlx5e_destroy_rq(struct mlx5e_rq *rq); bool mlx5e_reset_rx_moderation(struct dim_cq_moder *cq_moder, u8 cq_period_mode, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 8e99d07586fa..3fdaf003e1d0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -883,14 +883,16 @@ static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *param int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, - struct mlx5e_rq_param *param) + struct mlx5e_rq_param *rq_param) { - void *rqc = param->rqc; - void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + void *rqc = rq_param->rqc; u32 lro_timeout; int ndsegs = 1; + void *wq; int err; + wq = MLX5_ADDR_OF(rqc, rqc, wq); + switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: { u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); @@ -938,11 +940,12 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, } default: /* MLX5_WQ_TYPE_CYCLIC */ MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); - err = mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info, - ¶m->xdp_frag_size); + err = mlx5e_build_rq_frags_info(mdev, params, xsk, + &rq_param->frags_info, + &rq_param->xdp_frag_size); if (err) return err; - ndsegs = param->frags_info.num_frags; + ndsegs = rq_param->frags_info.num_frags; } MLX5_SET(wq, wq, wq_type, params->rq_wq_type); @@ -953,23 +956,23 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable); MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en); - param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); - mlx5e_build_rx_cq_param(mdev, params, xsk, ¶m->cqp); + rq_param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); + mlx5e_build_rx_cq_param(mdev, params, xsk, &rq_param->cqp); return 0; } void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev, - struct mlx5e_rq_param *param) + struct mlx5e_rq_param *rq_param) { - void *rqc = param->rqc; + void *rqc = rq_param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); MLX5_SET(wq, wq, log_wq_stride, mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1)); - param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); + rq_param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); } void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev, @@ -1097,7 +1100,7 @@ static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev, static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_rq_param *rqp) + struct mlx5e_rq_param *rq_param) { u32 wqebbs, total_pages, useful_space; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 74660e7fe674..13add74d1b97 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -660,13 +660,13 @@ static void mlx5e_ptp_build_rq_param(struct mlx5_core_dev *mdev, struct net_device *netdev, struct mlx5e_ptp_params *ptp_params) { - struct mlx5e_rq_param *rq_params = &ptp_params->rq_param; + struct mlx5e_rq_param *rq_param = &ptp_params->rq_param; struct mlx5e_params *params = &ptp_params->params; params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC; mlx5e_init_rq_type_params(mdev, params); params->sw_mtu = netdev->max_mtu; - mlx5e_build_rq_param(mdev, params, NULL, rq_params); + mlx5e_build_rq_param(mdev, params, NULL, rq_param); } static void mlx5e_ptp_build_params(struct mlx5e_ptp *c, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 5981c71cae2d..50c14ad29ed6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -90,8 +90,10 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, c->napi.napi_id); } -static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_rq_param *rq_params, struct xsk_buff_pool *pool, +static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_rq_param *rq_param, + struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk) { u16 q_counter = c->priv->q_counter[c->sd_ix]; @@ -102,7 +104,8 @@ static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *param if (err) return err; - err = mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), q_counter, xskrq); + err = mlx5e_open_rq(params, rq_param, xsk, cpu_to_node(c->cpu), + q_counter, xskrq); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 7eb691c2a1bd..f2ce24cf56ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -780,7 +780,7 @@ static int mlx5e_create_rq_hd_mkey(struct mlx5_core_dev *mdev, static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_rq_param *rqp, + struct mlx5e_rq_param *rq_param, struct mlx5e_rq *rq, int node) { @@ -791,7 +791,7 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) return 0; - hd_per_wq = mlx5e_shampo_hd_per_wq(mdev, params, rqp); + hd_per_wq = mlx5e_shampo_hd_per_wq(mdev, params, rq_param); hd_buf_size = hd_per_wq * BIT(MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE); nentries = hd_buf_size / PAGE_SIZE; if (!nentries) { @@ -852,18 +852,17 @@ static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq) static int mlx5e_alloc_rq(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, - struct mlx5e_rq_param *rqp, + struct mlx5e_rq_param *rq_param, int node, struct mlx5e_rq *rq) { + void *rqc_wq = MLX5_ADDR_OF(rqc, rq_param->rqc, wq); struct mlx5_core_dev *mdev = rq->mdev; - void *rqc = rqp->rqc; - void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); u32 pool_size; int wq_sz; int err; int i; - rqp->wq.db_numa_node = node; + rq_param->wq.db_numa_node = node; INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work); INIT_WORK(&rq->rx_timeout_work, mlx5e_rq_timeout_work); @@ -879,8 +878,8 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq, - &rq->wq_ctrl); + err = mlx5_wq_ll_create(mdev, &rq_param->wq, rqc_wq, + &rq->mpwqe.wq, &rq->wq_ctrl); if (err) goto err_rq_xdp_prog; @@ -925,14 +924,14 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, if (err) goto err_rq_mkey; - err = mlx5_rq_shampo_alloc(mdev, params, rqp, rq, node); + err = mlx5_rq_shampo_alloc(mdev, params, rq_param, rq, node); if (err) goto err_free_mpwqe_info; break; default: /* MLX5_WQ_TYPE_CYCLIC */ - err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq, - &rq->wq_ctrl); + err = mlx5_wq_cyc_create(mdev, &rq_param->wq, rqc_wq, + &rq->wqe.wq, &rq->wq_ctrl); if (err) goto err_rq_xdp_prog; @@ -940,7 +939,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq); - rq->wqe.info = rqp->frags_info; + rq->wqe.info = rq_param->frags_info; rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride; err = mlx5e_init_wqe_alloc_info(rq, node); @@ -1085,7 +1084,8 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) xdp_rxq_info_unreg(&rq->xdp_rxq); } -int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param, u16 q_counter) +int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *rq_param, + u16 q_counter) { struct mlx5_core_dev *mdev = rq->mdev; u8 ts_format; @@ -1107,7 +1107,7 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param, u16 q_cou rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); wq = MLX5_ADDR_OF(rqc, rqc, wq); - memcpy(rqc, param->rqc, sizeof(param->rqc)); + memcpy(rqc, rq_param->rqc, sizeof(rq_param->rqc)); MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); @@ -1323,7 +1323,7 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq) } -int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, +int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *rq_param, struct mlx5e_xsk_param *xsk, int node, u16 q_counter, struct mlx5e_rq *rq) { @@ -1333,11 +1333,11 @@ int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) __set_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state); - err = mlx5e_alloc_rq(params, xsk, param, node, rq); + err = mlx5e_alloc_rq(params, xsk, rq_param, node, rq); if (err) return err; - err = mlx5e_create_rq(rq, param, q_counter); + err = mlx5e_create_rq(rq, rq_param, q_counter); if (err) goto err_free_rq; @@ -2507,16 +2507,17 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) } static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_rq_param *rq_params) + struct mlx5e_rq_param *rq_param) { u16 q_counter = c->priv->q_counter[c->sd_ix]; int err; - err = mlx5e_init_rxq_rq(c, params, rq_params->xdp_frag_size, &c->rq); + err = mlx5e_init_rxq_rq(c, params, rq_param->xdp_frag_size, &c->rq); if (err) return err; - return mlx5e_open_rq(params, rq_params, NULL, cpu_to_node(c->cpu), q_counter, &c->rq); + return mlx5e_open_rq(params, rq_param, NULL, cpu_to_node(c->cpu), + q_counter, &c->rq); } static struct mlx5e_icosq * @@ -3577,15 +3578,14 @@ static void mlx5e_free_drop_rq(struct mlx5e_rq *rq) static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq, - struct mlx5e_rq_param *param) + struct mlx5e_rq_param *rq_param) { - void *rqc = param->rqc; - void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); + void *rqc_wq = MLX5_ADDR_OF(rqc, rq_param->rqc, wq); int err; - param->wq.db_numa_node = param->wq.buf_numa_node; + rq_param->wq.db_numa_node = rq_param->wq.buf_numa_node; - err = mlx5_wq_cyc_create(mdev, ¶m->wq, rqc_wq, &rq->wqe.wq, + err = mlx5_wq_cyc_create(mdev, &rq_param->wq, rqc_wq, &rq->wqe.wq, &rq->wq_ctrl); if (err) return err; From d3a99b71a29ce6a9c157bba50028a82c4a129c3c Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 23 Feb 2026 22:41:42 +0200 Subject: [PATCH 0054/1561] net/mlx5e: Extract striding rq param calculation in function Calculating parameters for striding rq is large enough to deserve its own function. As the names are also very long it is very easy to hit on the 80 char limitation every time a change is made. This is an additional sign that it should be extracted into its own function. This patch has no functional change. Signed-off-by: Dragos Tatulea Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260223204155.1783580-3-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- .../ethernet/mellanox/mlx5/core/en/params.c | 106 ++++++++++-------- 1 file changed, 62 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 3fdaf003e1d0..07d75a85ee7f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -880,13 +880,70 @@ static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *param MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN; } +static int mlx5e_mpwqe_build_rq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_param *rq_param) +{ + u8 log_rq_sz = mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + u8 log_wqe_num_of_strides, log_wqe_stride_size; + enum mlx5e_mpwrq_umr_mode umr_mode; + void *rqc = rq_param->rqc; + u32 lro_timeout; + void *wq; + + log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, + xsk); + log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, + xsk); + umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + + wq = MLX5_ADDR_OF(rqc, rqc, wq); + if (!mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size, + log_wqe_num_of_strides, + page_shift, umr_mode)) { + mlx5_core_err(mdev, + "Bad RX MPWQE params: log_stride_size %u, log_num_strides %u, umr_mode %d\n", + log_wqe_stride_size, log_wqe_num_of_strides, + umr_mode); + return -EINVAL; + } + + MLX5_SET(wq, wq, log_wqe_num_of_strides, + log_wqe_num_of_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE); + MLX5_SET(wq, wq, log_wqe_stride_size, + log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE); + MLX5_SET(wq, wq, log_wq_sz, log_rq_sz); + if (params->packet_merge.type != MLX5E_PACKET_MERGE_SHAMPO) + return 0; + + MLX5_SET(wq, wq, shampo_enable, true); + MLX5_SET(wq, wq, log_reservation_size, + MLX5E_SHAMPO_WQ_LOG_RESRV_SIZE - + MLX5E_SHAMPO_WQ_RESRV_SIZE_BASE_SHIFT); + MLX5_SET(wq, wq, log_max_num_of_packets_per_reservation, + mlx5e_shampo_get_log_pkt_per_rsrv(params)); + MLX5_SET(wq, wq, log_headers_entry_size, + MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE - + MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE_SHIFT); + lro_timeout = mlx5e_choose_lro_timeout(mdev, + MLX5E_DEFAULT_SHAMPO_TIMEOUT); + MLX5_SET(rqc, rqc, reservation_timeout, lro_timeout); + MLX5_SET(rqc, rqc, shampo_match_criteria_type, + MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED); + MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity, + MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE); + + return 0; +} + int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, struct mlx5e_rq_param *rq_param) { void *rqc = rq_param->rqc; - u32 lro_timeout; int ndsegs = 1; void *wq; int err; @@ -894,50 +951,11 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, wq = MLX5_ADDR_OF(rqc, rqc, wq); switch (params->rq_wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: { - u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); - u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); - enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); - u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); - - if (!mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size, - log_wqe_num_of_strides, - page_shift, umr_mode)) { - mlx5_core_err(mdev, - "Bad RX MPWQE params: log_stride_size %u, log_num_strides %u, umr_mode %d\n", - log_wqe_stride_size, log_wqe_num_of_strides, - umr_mode); - return -EINVAL; - } - - MLX5_SET(wq, wq, log_wqe_num_of_strides, - log_wqe_num_of_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE); - MLX5_SET(wq, wq, log_wqe_stride_size, - log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE); - MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk)); - if (params->packet_merge.type != MLX5E_PACKET_MERGE_SHAMPO) - break; - - MLX5_SET(wq, wq, shampo_enable, true); - MLX5_SET(wq, wq, log_reservation_size, - MLX5E_SHAMPO_WQ_LOG_RESRV_SIZE - - MLX5E_SHAMPO_WQ_RESRV_SIZE_BASE_SHIFT); - MLX5_SET(wq, wq, - log_max_num_of_packets_per_reservation, - mlx5e_shampo_get_log_pkt_per_rsrv(params)); - MLX5_SET(wq, wq, log_headers_entry_size, - MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE - - MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE_SHIFT); - lro_timeout = - mlx5e_choose_lro_timeout(mdev, - MLX5E_DEFAULT_SHAMPO_TIMEOUT); - MLX5_SET(rqc, rqc, reservation_timeout, lro_timeout); - MLX5_SET(rqc, rqc, shampo_match_criteria_type, - MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED); - MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity, - MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE); + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + err = mlx5e_mpwqe_build_rq_param(mdev, params, xsk, rq_param); + if (err) + return err; break; - } default: /* MLX5_WQ_TYPE_CYCLIC */ MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); err = mlx5e_build_rq_frags_info(mdev, params, xsk, From a2ff2f5f808fe2638361fc634e8f3a2d73353bc8 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 23 Feb 2026 22:41:43 +0200 Subject: [PATCH 0055/1561] net/mlx5e: Extract max_xsk_wqebbs into its own function Calculating max_xsk_wqebbs seems large enough to deserve its own function. It will make upcoming changes easier. This patch has no functional changes. Signed-off-by: Dragos Tatulea Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260223204155.1783580-4-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- .../ethernet/mellanox/mlx5/core/en/params.c | 98 ++++++++++--------- 1 file changed, 54 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 07d75a85ee7f..be1aa37531de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -1116,6 +1116,59 @@ static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev, return umr_wqebbs * (1 << mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk)); } +static u32 mlx5e_max_xsk_wqebbs(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + struct mlx5e_xsk_param xsk = {0}; + u32 max_xsk_wqebbs = 0; + u8 frame_shift; + + if (!params->xdp_prog) + return 0; + + /* If XDP program is attached, XSK may be turned on at any time without + * restarting the channel. ICOSQ must be big enough to fit UMR WQEs of + * both regular RQ and XSK RQ. + * + * XSK uses different values of page_shift, and the total number of UMR + * WQEBBs depends on it. This dependency is complex and not monotonic, + * especially taking into consideration that some of the parameters come + * from capabilities. Hence, we have to try all valid values of XSK + * frame size (and page_shift) to find the maximum. + */ + for (frame_shift = XDP_UMEM_MIN_CHUNK_SHIFT; + frame_shift <= PAGE_SHIFT; frame_shift++) { + u32 total_wqebbs; + + /* The headroom doesn't affect the calculations below. */ + + /* XSK aligned mode. */ + xsk.chunk_size = 1 << frame_shift; + xsk.unaligned = false; + total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk); + max_xsk_wqebbs = max(max_xsk_wqebbs, total_wqebbs); + + /* XSK unaligned mode, frame size is a power of two. */ + xsk.unaligned = true; + total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk); + max_xsk_wqebbs = max(max_xsk_wqebbs, total_wqebbs); + + /* XSK unaligned mode, frame size is not equal to stride + * size. + */ + xsk.chunk_size -= 1; + total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk); + max_xsk_wqebbs = max(max_xsk_wqebbs, total_wqebbs); + + /* XSK unaligned mode, frame size is a triple power of two. */ + xsk.chunk_size = (1 << frame_shift) / 4 * 3; + total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk); + max_xsk_wqebbs = max(max_xsk_wqebbs, total_wqebbs); + } + + return max_xsk_wqebbs; +} + static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_rq_param *rq_param) @@ -1129,50 +1182,7 @@ static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev, /* UMR WQEs for the regular RQ. */ wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, NULL); - /* If XDP program is attached, XSK may be turned on at any time without - * restarting the channel. ICOSQ must be big enough to fit UMR WQEs of - * both regular RQ and XSK RQ. - * - * XSK uses different values of page_shift, and the total number of UMR - * WQEBBs depends on it. This dependency is complex and not monotonic, - * especially taking into consideration that some of the parameters come - * from capabilities. Hence, we have to try all valid values of XSK - * frame size (and page_shift) to find the maximum. - */ - if (params->xdp_prog) { - u32 max_xsk_wqebbs = 0; - u8 frame_shift; - - for (frame_shift = XDP_UMEM_MIN_CHUNK_SHIFT; - frame_shift <= PAGE_SHIFT; frame_shift++) { - /* The headroom doesn't affect the calculation. */ - struct mlx5e_xsk_param xsk = { - .chunk_size = 1 << frame_shift, - .unaligned = false, - }; - - /* XSK aligned mode. */ - max_xsk_wqebbs = max(max_xsk_wqebbs, - mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk)); - - /* XSK unaligned mode, frame size is a power of two. */ - xsk.unaligned = true; - max_xsk_wqebbs = max(max_xsk_wqebbs, - mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk)); - - /* XSK unaligned mode, frame size is not equal to stride size. */ - xsk.chunk_size -= 1; - max_xsk_wqebbs = max(max_xsk_wqebbs, - mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk)); - - /* XSK unaligned mode, frame size is a triple power of two. */ - xsk.chunk_size = (1 << frame_shift) / 4 * 3; - max_xsk_wqebbs = max(max_xsk_wqebbs, - mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk)); - } - - wqebbs += max_xsk_wqebbs; - } + wqebbs += mlx5e_max_xsk_wqebbs(mdev, params); /* UMR WQEs don't cross the page boundary, they are padded with NOPs. * This padding is always smaller than the max WQE size. That gives us From ba4f39c256f503b4c6685f83f2a09d0a170a8144 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 23 Feb 2026 22:41:44 +0200 Subject: [PATCH 0056/1561] net/mlx5e: Expose and rename xsk channel parameter function mlx5e_build_xsk_cparam() is meant to be the alternative to mlx5e_build_channel_param(). It calculates only the parameters that it requires using the previously configured mlx5e_xsk_param. Move this function to params.c to be alongside mlx5e_build_channel_param() and give it a similar name. Expose the function as it will be needed by upcoming changes. This patch has no functional changes. Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260223204155.1783580-5-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 9 +++++++++ drivers/net/ethernet/mellanox/mlx5/core/en/params.h | 5 +++++ .../net/ethernet/mellanox/mlx5/core/en/xsk/setup.c | 11 +---------- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index be1aa37531de..4d51fad7d9eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -1272,3 +1272,12 @@ int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, return 0; } + +void mlx5e_build_xsk_channel_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_channel_param *cparam) +{ + mlx5e_build_rq_param(mdev, params, xsk, &cparam->rq); + mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 00617c65fe3c..26680985ee39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -138,6 +138,11 @@ int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_channel_param *cparam); +void mlx5e_build_xsk_channel_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_channel_param *cparam); + u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params); int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 50c14ad29ed6..e3b7e79863ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -48,15 +48,6 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params, } } -static void mlx5e_build_xsk_cparam(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, - struct mlx5e_channel_param *cparam) -{ - mlx5e_build_rq_param(mdev, params, xsk, &cparam->rq); - mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq); -} - static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *params, struct xsk_buff_pool *pool, @@ -130,7 +121,7 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, if (!cparam) return -ENOMEM; - mlx5e_build_xsk_cparam(priv->mdev, params, xsk, cparam); + mlx5e_build_xsk_channel_param(priv->mdev, params, xsk, cparam); err = mlx5e_open_cq(c->mdev, params->rx_cq_moderation, &cparam->rq.cqp, &ccp, &c->xskrq.cq); From 8a96b9144f18a2c996d3cfead85e4ca0f9e58de7 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 23 Feb 2026 22:41:45 +0200 Subject: [PATCH 0057/1561] net/mlx5e: Alloc xsk channel param out of mlx5e_open_xsk() Currently the allocation and filling of the xsk channel parameters was done in mlx5e_open_xsk(). Move this responsibility out of mlx5e_open_xsk() and have the function take an already filled mlx5e_channel_param. mlx5e_open_channel() already allocates channel parameters. The only precaution that is needed is to call mlx5e_build_xsk_channel_param() before mlx5e_open_xsk(). mlx5e_xsk_enable_locked() now allocates and fills the xsk parameters. For simplicity, link the xsk parameters in struct mlx5e_channel_params so that channel params can be passed around. This patch has no functional changes. Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260223204155.1783580-6-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- .../ethernet/mellanox/mlx5/core/en/params.c | 1 + .../ethernet/mellanox/mlx5/core/en/params.h | 1 + .../ethernet/mellanox/mlx5/core/en/xsk/pool.c | 17 +++++++++++++++-- .../ethernet/mellanox/mlx5/core/en/xsk/setup.c | 18 ++++-------------- .../ethernet/mellanox/mlx5/core/en/xsk/setup.h | 4 +++- .../net/ethernet/mellanox/mlx5/core/en_main.c | 3 ++- 6 files changed, 26 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 4d51fad7d9eb..ef88097c1d4d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -1278,6 +1278,7 @@ void mlx5e_build_xsk_channel_param(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk, struct mlx5e_channel_param *cparam) { + cparam->xsk = xsk; mlx5e_build_rq_param(mdev, params, xsk, &cparam->rq); mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 26680985ee39..c132649dd9f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -42,6 +42,7 @@ struct mlx5e_channel_param { struct mlx5e_sq_param xdp_sq; struct mlx5e_sq_param icosq; struct mlx5e_sq_param async_icosq; + struct mlx5e_xsk_param *xsk; }; struct mlx5e_create_sq_param { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c index 5c5360a25c64..92bcf16a2019 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c @@ -79,6 +79,7 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, struct xsk_buff_pool *pool, u16 ix) { struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_channel_param *cparam; struct mlx5e_xsk_param xsk; struct mlx5e_channel *c; int err; @@ -89,15 +90,20 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, if (unlikely(!mlx5e_xsk_is_pool_sane(pool))) return -EINVAL; + cparam = kvzalloc_obj(*cparam, GFP_KERNEL); + if (!cparam) + return -ENOMEM; + err = mlx5e_xsk_map_pool(mlx5_sd_ch_ix_get_dev(priv->mdev, ix), pool); if (unlikely(err)) - return err; + goto err_free_cparam; err = mlx5e_xsk_add_pool(&priv->xsk, pool, ix); if (unlikely(err)) goto err_unmap_pool; mlx5e_build_xsk_param(pool, &xsk); + mlx5e_build_xsk_channel_param(priv->mdev, params, &xsk, cparam); if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && mlx5e_mpwrq_umr_mode(priv->mdev, &xsk) == MLX5E_MPWRQ_UMR_MODE_OVERSIZED) { @@ -122,7 +128,7 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, c = priv->channels.c[ix]; - err = mlx5e_open_xsk(priv, params, &xsk, pool, c); + err = mlx5e_open_xsk(priv, params, cparam, pool, c); if (unlikely(err)) goto err_remove_pool; @@ -138,6 +144,8 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, mlx5e_deactivate_rq(&c->rq); mlx5e_flush_rq(&c->rq, MLX5_RQC_STATE_RDY); + kvfree(cparam); + return 0; err_remove_pool: @@ -146,6 +154,9 @@ err_remove_pool: err_unmap_pool: mlx5e_xsk_unmap_pool(priv, pool); +err_free_cparam: + kvfree(cparam); + return err; validate_closed: @@ -157,6 +168,8 @@ validate_closed: goto err_remove_pool; } + kvfree(cparam); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index e3b7e79863ae..03f1be361701 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -105,10 +105,11 @@ static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, } int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool, + struct mlx5e_channel_param *cparam, + struct xsk_buff_pool *pool, struct mlx5e_channel *c) { - struct mlx5e_channel_param *cparam; + struct mlx5e_xsk_param *xsk = cparam->xsk; struct mlx5e_create_cq_param ccp; int err; @@ -117,16 +118,10 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev)) return -EINVAL; - cparam = kvzalloc(sizeof(*cparam), GFP_KERNEL); - if (!cparam) - return -ENOMEM; - - mlx5e_build_xsk_channel_param(priv->mdev, params, xsk, cparam); - err = mlx5e_open_cq(c->mdev, params->rx_cq_moderation, &cparam->rq.cqp, &ccp, &c->xskrq.cq); if (unlikely(err)) - goto err_free_cparam; + return err; err = mlx5e_open_xsk_rq(c, params, &cparam->rq, pool, xsk); if (unlikely(err)) @@ -147,8 +142,6 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, if (unlikely(err)) goto err_close_tx_cq; - kvfree(cparam); - set_bit(MLX5E_CHANNEL_STATE_XSK, c->state); return 0; @@ -162,9 +155,6 @@ err_close_rq: err_close_rx_cq: mlx5e_close_cq(&c->xskrq.cq); -err_free_cparam: - kvfree(cparam); - return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h index 50e111b85efd..fc86d19ea2b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h @@ -11,8 +11,10 @@ struct mlx5e_xsk_param; bool mlx5e_validate_xsk_param(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, struct mlx5_core_dev *mdev); +struct mlx5e_channel_param; int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool, + struct mlx5e_channel_param *cparam, + struct xsk_buff_pool *pool, struct mlx5e_channel *c); void mlx5e_close_xsk(struct mlx5e_channel *c); void mlx5e_activate_xsk(struct mlx5e_channel *c); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f2ce24cf56ce..35b767105492 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2829,7 +2829,8 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, if (xsk_pool) { mlx5e_build_xsk_param(xsk_pool, &xsk); - err = mlx5e_open_xsk(priv, params, &xsk, xsk_pool, c); + mlx5e_build_xsk_channel_param(priv->mdev, params, &xsk, cparam); + err = mlx5e_open_xsk(priv, params, cparam, xsk_pool, c); if (unlikely(err)) goto err_close_queues; } From 099efb294e0a0c8cc4984a8c56b9a69e480ac5e9 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 23 Feb 2026 22:41:46 +0200 Subject: [PATCH 0058/1561] net/mlx5e: Move xsk param into new option container struct The xsk parameter configuration (struct mlx5e_xsk_param) is passed around many places during parameter calculation. It is used to contain channel specific information (as opposed to the global info from struct mlx5e_params). Upcoming changes will need to push similar channel specific rq configuration. Instead of adding one more parameter to all these functions, create a new container structure that has optional rq specific parameters. The xsk parameter will be the first of such kind. The new container struct is itself optional. That means that before checking its members, it has to be checked itself for validity. This patch has no functional changes. Signed-off-by: Dragos Tatulea Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260223204155.1783580-7-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 +- .../ethernet/mellanox/mlx5/core/en/params.c | 192 ++++++++++-------- .../ethernet/mellanox/mlx5/core/en/params.h | 38 ++-- .../net/ethernet/mellanox/mlx5/core/en/xdp.c | 5 +- .../net/ethernet/mellanox/mlx5/core/en/xdp.h | 3 +- .../ethernet/mellanox/mlx5/core/en/xsk/pool.c | 6 +- .../mellanox/mlx5/core/en/xsk/setup.c | 31 +-- .../mellanox/mlx5/core/en/xsk/setup.h | 2 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 33 +-- 9 files changed, 185 insertions(+), 128 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 550426979627..5181d6ab39ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1060,8 +1060,9 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv); struct mlx5e_xsk_param; struct mlx5e_rq_param; +struct mlx5e_rq_opt_param; int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *rq_param, - struct mlx5e_xsk_param *xsk, int node, u16 q_counter, + struct mlx5e_rq_opt_param *rqo, int node, u16 q_counter, struct mlx5e_rq *rq); #define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index ef88097c1d4d..97f5d1c2adea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -21,10 +21,14 @@ static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev) return min_page_shift ? : 12; } -u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk) +u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, + struct mlx5e_rq_opt_param *rqo) { - u8 req_page_shift = xsk ? order_base_2(xsk->chunk_size) : PAGE_SHIFT; + struct mlx5e_xsk_param *xsk = mlx5e_rqo_xsk_param(rqo); u8 min_page_shift = mlx5e_mpwrq_min_page_shift(mdev); + u8 req_page_shift; + + req_page_shift = xsk ? order_base_2(xsk->chunk_size) : PAGE_SHIFT; /* Regular RQ uses order-0 pages, the NIC must be able to map them. */ if (WARN_ON_ONCE(!xsk && req_page_shift < min_page_shift)) @@ -34,7 +38,8 @@ u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xs } enum mlx5e_mpwrq_umr_mode -mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk) +mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, + struct mlx5e_rq_opt_param *rqo) { /* Different memory management schemes use different mechanisms to map * user-mode memory. The stricter guarantees we have, the faster @@ -45,7 +50,8 @@ mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk) * 3. KLM - indirect mapping to another MKey to arbitrary addresses, and * mappings can have different sizes. */ - u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + struct mlx5e_xsk_param *xsk = mlx5e_rqo_xsk_param(rqo); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, rqo); bool unaligned = xsk ? xsk->unaligned : false; bool oversized = false; @@ -225,12 +231,12 @@ u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift, } u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) + struct mlx5e_rq_opt_param *rqo) { u16 headroom; - if (xsk) - return xsk->headroom; + if (mlx5e_rqo_xsk_param(rqo)) + return rqo->xsk->headroom; headroom = NET_IP_ALIGN; if (params->xdp_prog) @@ -263,19 +269,23 @@ static u32 mlx5e_rx_get_linear_sz_skb(struct mlx5e_params *params, bool no_head_ static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_opt_param *rqo, bool mpwqe) { + struct mlx5e_xsk_param *xsk = mlx5e_rqo_xsk_param(rqo); bool no_head_tail_room; u32 sz; /* XSK frames are mapped as individual pages, because frames may come in * an arbitrary order from random locations in the UMEM. */ - if (xsk) - return mpwqe ? 1 << mlx5e_mpwrq_page_shift(mdev, xsk) : PAGE_SIZE; + if (xsk) { + return mpwqe ? + BIT(mlx5e_mpwrq_page_shift(mdev, rqo)) : PAGE_SIZE; + } - no_head_tail_room = params->xdp_prog && mpwqe && !mlx5e_rx_is_linear_skb(mdev, params, xsk); + no_head_tail_room = params->xdp_prog && mpwqe && + !mlx5e_rx_is_linear_skb(mdev, params, rqo); /* When no_head_tail_room is set, headroom and tailroom are excluded from skb calculations. * no_head_tail_room should be set in the case of XDP with Striding RQ @@ -291,11 +301,12 @@ static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev, static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) + struct mlx5e_rq_opt_param *rqo) { - u32 linear_stride_sz = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true); - enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); - u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, rqo); + u32 linear_stride_sz = + mlx5e_rx_get_linear_stride_sz(mdev, params, rqo, true); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, rqo); return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) - order_base_2(linear_stride_sz); @@ -303,8 +314,10 @@ static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev, bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) + struct mlx5e_rq_opt_param *rqo) { + struct mlx5e_xsk_param *xsk = mlx5e_rqo_xsk_param(rqo); + if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) return false; @@ -315,7 +328,7 @@ bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev, * Both XSK and non-XSK cases allocate an SKB on XDP_PASS. Packet data * must fit into a CPU page. */ - if (mlx5e_rx_get_linear_sz_skb(params, xsk) > PAGE_SIZE) + if (mlx5e_rx_get_linear_sz_skb(params, !!xsk) > PAGE_SIZE) return false; /* XSK frames must be big enough to hold the packet data. */ @@ -349,12 +362,14 @@ static bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev, bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) + struct mlx5e_rq_opt_param *rqo) { - u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); - u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); - enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); - u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, rqo); + u8 log_wqe_num_of_strides = + mlx5e_mpwqe_get_log_num_strides(mdev, params, rqo); + u8 log_wqe_stride_size = + mlx5e_mpwqe_get_log_stride_size(mdev, params, rqo); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, rqo); return mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size, log_wqe_num_of_strides, @@ -363,18 +378,20 @@ bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev, bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) + struct mlx5e_rq_opt_param *rqo) { - enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); - u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, rqo); + u32 linear_stride_sz = + mlx5e_rx_get_linear_stride_sz(mdev, params, rqo, true); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, rqo); u8 log_num_strides; u8 log_stride_sz; u8 log_wqe_sz; - if (!mlx5e_rx_is_linear_skb(mdev, params, xsk)) + if (!mlx5e_rx_is_linear_skb(mdev, params, rqo)) return false; - log_stride_sz = order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true)); + log_stride_sz = order_base_2(linear_stride_sz); log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode); if (log_wqe_sz < log_stride_sz) @@ -389,13 +406,13 @@ bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) + struct mlx5e_rq_opt_param *rqo) { - enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, rqo); u8 log_pkts_per_wqe, page_shift, max_log_rq_size; - log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(mdev, params, xsk); - page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(mdev, params, rqo); + page_shift = mlx5e_mpwrq_page_shift(mdev, rqo); max_log_rq_size = mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, umr_mode); /* Numbers are unsigned, don't subtract to avoid underflow. */ @@ -423,10 +440,11 @@ static u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5e_params *params) u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) + struct mlx5e_rq_opt_param *rqo) { - if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) - return order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true)); + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, rqo)) + return order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, + rqo, true)); /* XDP in mlx5e doesn't support multiple packets per page. */ if (params->xdp_prog) @@ -437,17 +455,18 @@ u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) + struct mlx5e_rq_opt_param *rqo) { - enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); - u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, rqo); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, rqo); u8 log_wqe_size, log_stride_size; log_wqe_size = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode); - log_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + log_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, rqo); WARN(log_wqe_size < log_stride_size, "Log WQE size %u < log stride size %u (page shift %u, umr mode %d, xsk on? %d)\n", - log_wqe_size, log_stride_size, page_shift, umr_mode, !!xsk); + log_wqe_size, log_stride_size, page_shift, umr_mode, + rqo && rqo->xsk); return log_wqe_size - log_stride_size; } @@ -459,14 +478,14 @@ u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz) u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) + struct mlx5e_rq_opt_param *rqo) { - u16 linear_headroom = mlx5e_get_linear_rq_headroom(params, xsk); + u16 linear_headroom = mlx5e_get_linear_rq_headroom(params, rqo); if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) return linear_headroom; - if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, rqo)) return linear_headroom; if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) @@ -535,10 +554,11 @@ int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params } int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) + struct mlx5e_rq_opt_param *rqo) { - enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); - u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, rqo); + struct mlx5e_xsk_param *xsk = mlx5e_rqo_xsk_param(rqo); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, rqo); u16 max_mtu_pkts; if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode)) { @@ -547,7 +567,7 @@ int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *pa return -EOPNOTSUPP; } - if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) { + if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, rqo)) { mlx5_core_err(mdev, "Striding RQ linear mode for XSK can't be activated with current params\n"); return -EINVAL; } @@ -559,7 +579,8 @@ int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *pa mlx5e_mpwrq_max_log_rq_pkts(mdev, page_shift, xsk->unaligned)); if (params->log_rq_mtu_frames > max_mtu_pkts) { mlx5_core_err(mdev, "Current RQ length %d is too big for XSK with given frame size %u\n", - 1 << params->log_rq_mtu_frames, xsk->chunk_size); + 1 << params->log_rq_mtu_frames, + xsk->chunk_size); return -EINVAL; } @@ -672,7 +693,7 @@ static void mlx5e_rx_compute_wqe_bulk_params(struct mlx5e_params *params, static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_opt_param *rqo, struct mlx5e_rq_frags_info *info, u32 *xdp_frag_size) { @@ -684,10 +705,11 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, int max_mtu; int i; - if (mlx5e_rx_is_linear_skb(mdev, params, xsk)) { + if (mlx5e_rx_is_linear_skb(mdev, params, rqo)) { int frag_stride; - frag_stride = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, false); + frag_stride = mlx5e_rx_get_linear_stride_sz(mdev, params, rqo, + false); info->arr[0].frag_size = byte_count; info->arr[0].frag_stride = frag_stride; @@ -703,7 +725,7 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, goto out; } - headroom = mlx5e_get_linear_rq_headroom(params, xsk); + headroom = mlx5e_get_linear_rq_headroom(params, rqo); first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom); max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max, @@ -819,12 +841,13 @@ static void mlx5e_build_common_cq_param(struct mlx5_core_dev *mdev, static u32 mlx5e_shampo_get_log_cq_size(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) + struct mlx5e_rq_opt_param *rqo) { - u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk)); - u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, rqo); + u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, + rqo)); int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(params)); - int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk)); + int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, rqo)); int wqe_size = BIT(log_stride_sz) * num_strides; int rsrv_size = MLX5E_SHAMPO_WQ_RESRV_SIZE; @@ -836,7 +859,7 @@ static u32 mlx5e_shampo_get_log_cq_size(struct mlx5_core_dev *mdev, static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_opt_param *rqo, struct mlx5e_cq_param *param) { bool hw_stridx = false; @@ -847,10 +870,13 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev, case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index); if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) - log_cq_size = mlx5e_shampo_get_log_cq_size(mdev, params, xsk); + log_cq_size = + mlx5e_shampo_get_log_cq_size(mdev, params, rqo); else - log_cq_size = mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk) + - mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); + log_cq_size = + mlx5e_mpwqe_get_log_rq_size(mdev, params, rqo) + + mlx5e_mpwqe_get_log_num_strides(mdev, params, + rqo); break; default: /* MLX5_WQ_TYPE_CYCLIC */ log_cq_size = params->log_rq_mtu_frames; @@ -882,22 +908,22 @@ static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *param static int mlx5e_mpwqe_build_rq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_opt_param *rqo, struct mlx5e_rq_param *rq_param) { - u8 log_rq_sz = mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk); - u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + u8 log_rq_sz = mlx5e_mpwqe_get_log_rq_size(mdev, params, rqo); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, rqo); u8 log_wqe_num_of_strides, log_wqe_stride_size; enum mlx5e_mpwrq_umr_mode umr_mode; void *rqc = rq_param->rqc; u32 lro_timeout; void *wq; - log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, - xsk); - log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, - xsk); - umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + log_wqe_num_of_strides = + mlx5e_mpwqe_get_log_num_strides(mdev, params, rqo); + log_wqe_stride_size = + mlx5e_mpwqe_get_log_stride_size(mdev, params, rqo); + umr_mode = mlx5e_mpwrq_umr_mode(mdev, rqo); wq = MLX5_ADDR_OF(rqc, rqc, wq); if (!mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size, @@ -940,7 +966,7 @@ static int mlx5e_mpwqe_build_rq_param(struct mlx5_core_dev *mdev, int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_opt_param *rqo, struct mlx5e_rq_param *rq_param) { void *rqc = rq_param->rqc; @@ -952,13 +978,13 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - err = mlx5e_mpwqe_build_rq_param(mdev, params, xsk, rq_param); + err = mlx5e_mpwqe_build_rq_param(mdev, params, rqo, rq_param); if (err) return err; break; default: /* MLX5_WQ_TYPE_CYCLIC */ MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); - err = mlx5e_build_rq_frags_info(mdev, params, xsk, + err = mlx5e_build_rq_frags_info(mdev, params, rqo, &rq_param->frags_info, &rq_param->xdp_frag_size); if (err) @@ -975,7 +1001,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en); rq_param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); - mlx5e_build_rx_cq_param(mdev, params, xsk, &rq_param->cqp); + mlx5e_build_rx_cq_param(mdev, params, rqo, &rq_param->cqp); return 0; } @@ -1105,20 +1131,22 @@ u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) + struct mlx5e_rq_opt_param *rqo) { - enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); - u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, rqo); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, rqo); u8 umr_wqebbs; umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, umr_mode); - return umr_wqebbs * (1 << mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk)); + return umr_wqebbs * + (1 << mlx5e_mpwqe_get_log_rq_size(mdev, params, rqo)); } static u32 mlx5e_max_xsk_wqebbs(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { + struct mlx5e_rq_opt_param rqo = {0}; struct mlx5e_xsk_param xsk = {0}; u32 max_xsk_wqebbs = 0; u8 frame_shift; @@ -1126,6 +1154,8 @@ static u32 mlx5e_max_xsk_wqebbs(struct mlx5_core_dev *mdev, if (!params->xdp_prog) return 0; + rqo.xsk = &xsk; + /* If XDP program is attached, XSK may be turned on at any time without * restarting the channel. ICOSQ must be big enough to fit UMR WQEs of * both regular RQ and XSK RQ. @@ -1145,24 +1175,24 @@ static u32 mlx5e_max_xsk_wqebbs(struct mlx5_core_dev *mdev, /* XSK aligned mode. */ xsk.chunk_size = 1 << frame_shift; xsk.unaligned = false; - total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk); + total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &rqo); max_xsk_wqebbs = max(max_xsk_wqebbs, total_wqebbs); /* XSK unaligned mode, frame size is a power of two. */ xsk.unaligned = true; - total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk); + total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &rqo); max_xsk_wqebbs = max(max_xsk_wqebbs, total_wqebbs); /* XSK unaligned mode, frame size is not equal to stride * size. */ xsk.chunk_size -= 1; - total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk); + total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &rqo); max_xsk_wqebbs = max(max_xsk_wqebbs, total_wqebbs); /* XSK unaligned mode, frame size is a triple power of two. */ xsk.chunk_size = (1 << frame_shift) / 4 * 3; - total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk); + total_wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &rqo); max_xsk_wqebbs = max(max_xsk_wqebbs, total_wqebbs); } @@ -1278,7 +1308,7 @@ void mlx5e_build_xsk_channel_param(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk, struct mlx5e_channel_param *cparam) { - cparam->xsk = xsk; - mlx5e_build_rq_param(mdev, params, xsk, &cparam->rq); + cparam->rq_opt.xsk = xsk; + mlx5e_build_rq_param(mdev, params, &cparam->rq_opt, &cparam->rq); mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index c132649dd9f2..4bce769d48ed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -12,6 +12,10 @@ struct mlx5e_xsk_param { bool unaligned; }; +struct mlx5e_rq_opt_param { + struct mlx5e_xsk_param *xsk; +}; + struct mlx5e_cq_param { u32 cqc[MLX5_ST_SZ_DW(cqc)]; struct mlx5_wq_param wq; @@ -38,11 +42,11 @@ struct mlx5e_sq_param { struct mlx5e_channel_param { struct mlx5e_rq_param rq; + struct mlx5e_rq_opt_param rq_opt; struct mlx5e_sq_param txq_sq; struct mlx5e_sq_param xdp_sq; struct mlx5e_sq_param icosq; struct mlx5e_sq_param async_icosq; - struct mlx5e_xsk_param *xsk; }; struct mlx5e_create_sq_param { @@ -57,9 +61,11 @@ struct mlx5e_create_sq_param { /* Striding RQ dynamic parameters */ -u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, + struct mlx5e_rq_opt_param *rqo); enum mlx5e_mpwrq_umr_mode -mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk); +mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, + struct mlx5e_rq_opt_param *rqo); u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode); u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, enum mlx5e_mpwrq_umr_mode umr_mode); @@ -81,22 +87,22 @@ u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift, bool slow_pci_heuristic(struct mlx5_core_dev *mdev); int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params); int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk); + struct mlx5e_rq_opt_param *rqo); void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk); + struct mlx5e_rq_opt_param *rqo); bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk); + struct mlx5e_rq_opt_param *rqo); bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk); + struct mlx5e_rq_opt_param *rqo); u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk); + struct mlx5e_rq_opt_param *rqo); u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_rq_param *rq_param); @@ -106,21 +112,21 @@ u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev, u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout); u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk); + struct mlx5e_rq_opt_param *rqo); u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk); + struct mlx5e_rq_opt_param *rqo); u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz); u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk); + struct mlx5e_rq_opt_param *rqo); /* Build queue parameters */ void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c); int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_opt_param *rqo, struct mlx5e_rq_param *param); void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev, struct mlx5e_rq_param *param); @@ -148,7 +154,7 @@ u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk); + struct mlx5e_rq_opt_param *rqo); static inline void mlx5e_params_print_info(struct mlx5_core_dev *mdev, struct mlx5e_params *params) @@ -164,4 +170,10 @@ static inline void mlx5e_params_print_info(struct mlx5_core_dev *mdev, "enhanced" : "basic"); }; +static inline struct mlx5e_xsk_param * +mlx5e_rqo_xsk_param(struct mlx5e_rq_opt_param *rqo) +{ + return rqo ? rqo->xsk : NULL; +} + #endif /* __MLX5_EN_PARAMS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 80f9fc10877a..04e1b5fa4825 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -37,9 +37,10 @@ #include #include -int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) +int mlx5e_xdp_max_mtu(struct mlx5e_params *params, + struct mlx5e_rq_opt_param *rqo) { - int hr = mlx5e_get_linear_rq_headroom(params, xsk); + int hr = mlx5e_get_linear_rq_headroom(params, rqo); /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)). * The condition checked in mlx5e_rx_is_linear_skb is: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 46ab0a9e8cdd..3c54f8962664 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -96,7 +96,8 @@ union mlx5e_xdp_info { }; struct mlx5e_xsk_param; -int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); +int mlx5e_xdp_max_mtu(struct mlx5e_params *params, + struct mlx5e_rq_opt_param *rqo); bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct bpf_prog *prog, struct mlx5e_xdp_buff *mlctx); void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c index 92bcf16a2019..565e5c4ddcce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c @@ -80,6 +80,7 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, { struct mlx5e_params *params = &priv->channels.params; struct mlx5e_channel_param *cparam; + enum mlx5e_mpwrq_umr_mode umr_mode; struct mlx5e_xsk_param xsk; struct mlx5e_channel *c; int err; @@ -105,8 +106,9 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, mlx5e_build_xsk_param(pool, &xsk); mlx5e_build_xsk_channel_param(priv->mdev, params, &xsk, cparam); + umr_mode = mlx5e_mpwrq_umr_mode(priv->mdev, &cparam->rq_opt); if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && - mlx5e_mpwrq_umr_mode(priv->mdev, &xsk) == MLX5E_MPWRQ_UMR_MODE_OVERSIZED) { + umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED) { const char *recommendation = is_power_of_2(xsk.chunk_size) ? "Upgrade firmware" : "Disable striding RQ"; @@ -163,7 +165,7 @@ validate_closed: /* Check the configuration in advance, rather than fail at a later stage * (in mlx5e_xdp_set or on open) and end up with no channels. */ - if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) { + if (!mlx5e_validate_xsk_param(params, &cparam->rq_opt, priv->mdev)) { err = -EINVAL; goto err_remove_pool; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 03f1be361701..11500fd213a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -9,9 +9,9 @@ static int mlx5e_legacy_rq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) + struct mlx5e_rq_opt_param *rqo) { - if (!mlx5e_rx_is_linear_skb(mdev, params, xsk)) { + if (!mlx5e_rx_is_linear_skb(mdev, params, rqo)) { mlx5_core_err(mdev, "Legacy RQ linear mode for XSK can't be activated with current params\n"); return -EINVAL; } @@ -25,9 +25,14 @@ static int mlx5e_legacy_rq_validate_xsk(struct mlx5_core_dev *mdev, #define MLX5E_MIN_XSK_CHUNK_SIZE max(2048, XDP_UMEM_MIN_CHUNK_SIZE) bool mlx5e_validate_xsk_param(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_opt_param *rqo, struct mlx5_core_dev *mdev) { + struct mlx5e_xsk_param *xsk = mlx5e_rqo_xsk_param(rqo); + + if (WARN_ON(!xsk)) + return false; + /* AF_XDP doesn't support frames larger than PAGE_SIZE, * and xsk->chunk_size is limited to 65535 bytes. */ @@ -42,9 +47,9 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params, */ switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - return !mlx5e_mpwrq_validate_xsk(mdev, params, xsk); + return !mlx5e_mpwrq_validate_xsk(mdev, params, rqo); default: /* MLX5_WQ_TYPE_CYCLIC */ - return !mlx5e_legacy_rq_validate_xsk(mdev, params, xsk); + return !mlx5e_legacy_rq_validate_xsk(mdev, params, rqo); } } @@ -83,19 +88,20 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_rq_param *rq_param, - struct xsk_buff_pool *pool, - struct mlx5e_xsk_param *xsk) + struct mlx5e_channel_param *cparam, + struct xsk_buff_pool *pool) { + struct mlx5e_rq_param *rq_param = &cparam->rq; + struct mlx5e_rq_opt_param *rqo = &cparam->rq_opt; u16 q_counter = c->priv->q_counter[c->sd_ix]; struct mlx5e_rq *xskrq = &c->xskrq; int err; - err = mlx5e_init_xsk_rq(c, params, pool, xsk, xskrq); + err = mlx5e_init_xsk_rq(c, params, pool, rqo->xsk, xskrq); if (err) return err; - err = mlx5e_open_rq(params, rq_param, xsk, cpu_to_node(c->cpu), + err = mlx5e_open_rq(params, rq_param, rqo, cpu_to_node(c->cpu), q_counter, xskrq); if (err) return err; @@ -109,13 +115,12 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, struct xsk_buff_pool *pool, struct mlx5e_channel *c) { - struct mlx5e_xsk_param *xsk = cparam->xsk; struct mlx5e_create_cq_param ccp; int err; mlx5e_build_create_cq_param(&ccp, c); - if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev)) + if (!mlx5e_validate_xsk_param(params, &cparam->rq_opt, priv->mdev)) return -EINVAL; err = mlx5e_open_cq(c->mdev, params->rx_cq_moderation, &cparam->rq.cqp, &ccp, @@ -123,7 +128,7 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, if (unlikely(err)) return err; - err = mlx5e_open_xsk_rq(c, params, &cparam->rq, pool, xsk); + err = mlx5e_open_xsk_rq(c, params, cparam, pool); if (unlikely(err)) goto err_close_rx_cq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h index fc86d19ea2b3..664ec78192c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h @@ -9,7 +9,7 @@ struct mlx5e_xsk_param; bool mlx5e_validate_xsk_param(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_opt_param *rqo, struct mlx5_core_dev *mdev); struct mlx5e_channel_param; int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 35b767105492..9e406275e243 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -851,8 +851,8 @@ static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq) } static int mlx5e_alloc_rq(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, struct mlx5e_rq_param *rq_param, + struct mlx5e_rq_opt_param *rqo, int node, struct mlx5e_rq *rq) { void *rqc_wq = MLX5_ADDR_OF(rqc, rq_param->rqc, wq); @@ -871,7 +871,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, RCU_INIT_POINTER(rq->xdp_prog, params->xdp_prog); rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; - rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk); + rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, rqo); pool_size = 1 << params->log_rq_mtu_frames; rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey); @@ -891,8 +891,8 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); - rq->mpwqe.page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); - rq->mpwqe.umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + rq->mpwqe.page_shift = mlx5e_mpwrq_page_shift(mdev, rqo); + rq->mpwqe.umr_mode = mlx5e_mpwrq_umr_mode(mdev, rqo); rq->mpwqe.pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, rq->mpwqe.page_shift, rq->mpwqe.umr_mode); @@ -904,14 +904,17 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, rq->mpwqe.umr_mode); pool_size = rq->mpwqe.pages_per_wqe << - mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk); + mlx5e_mpwqe_get_log_rq_size(mdev, params, rqo); - if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk) && params->xdp_prog) + if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, rqo) && + params->xdp_prog) pool_size *= 2; /* additional page per packet for the linear part */ - rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + rq->mpwqe.log_stride_sz = + mlx5e_mpwqe_get_log_stride_size(mdev, params, + rqo); rq->mpwqe.num_strides = - BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk)); + BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, rqo)); rq->mpwqe.min_wqe_bulk = mlx5e_mpwqe_get_min_wqe_bulk(wq_sz); rq->buff.frame0_sz = (1 << rq->mpwqe.log_stride_sz); @@ -947,7 +950,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, goto err_rq_wq_destroy; } - if (xsk) { + if (mlx5e_rqo_xsk_param(rqo)) { err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, MEM_TYPE_XSK_BUFF_POOL, NULL); if (err) @@ -1324,7 +1327,7 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq) } int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *rq_param, - struct mlx5e_xsk_param *xsk, int node, u16 q_counter, + struct mlx5e_rq_opt_param *rqo, int node, u16 q_counter, struct mlx5e_rq *rq) { struct mlx5_core_dev *mdev = rq->mdev; @@ -1333,7 +1336,7 @@ int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *rq_param, if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) __set_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state); - err = mlx5e_alloc_rq(params, xsk, rq_param, node, rq); + err = mlx5e_alloc_rq(params, rq_param, rqo, node, rq); if (err) return err; @@ -4587,6 +4590,7 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, for (ix = 0; ix < chs->params.num_channels; ix++) { struct xsk_buff_pool *xsk_pool = mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, ix); + struct mlx5e_rq_opt_param rqo = {0}; struct mlx5e_xsk_param xsk; int max_xdp_mtu; @@ -4594,12 +4598,13 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, continue; mlx5e_build_xsk_param(xsk_pool, &xsk); - max_xdp_mtu = mlx5e_xdp_max_mtu(new_params, &xsk); + rqo.xsk = &xsk; + max_xdp_mtu = mlx5e_xdp_max_mtu(new_params, &rqo); /* Validate XSK params and XDP MTU in advance */ - if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev) || + if (!mlx5e_validate_xsk_param(new_params, &rqo, mdev) || new_params->sw_mtu > max_xdp_mtu) { - u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk); + u32 hr = mlx5e_get_linear_rq_headroom(new_params, &rqo); int max_mtu_frame, max_mtu_page, max_mtu; /* Two criteria must be met: From 3707a73854c1614a268e43a0fd98292a699f3b35 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 23 Feb 2026 22:41:47 +0200 Subject: [PATCH 0059/1561] net/mlx5e: Drop unused channel parameters The channel parameters from struct mlx5_qmgmt_data are built in mlx5e_queue_mem_alloc() but are not used. mlx5e_open_channel() builds the channel parameters internally and those parameters will be the ones that are used when opening the queue. This patch drops the unused parameters. Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260223204155.1783580-8-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9e406275e243..aca88fed2ac7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5555,7 +5555,6 @@ static const struct netdev_stat_ops mlx5e_stat_ops = { struct mlx5_qmgmt_data { struct mlx5e_channel *c; - struct mlx5e_channel_param cparam; }; static int mlx5e_queue_mem_alloc(struct net_device *dev, @@ -5566,7 +5565,6 @@ static int mlx5e_queue_mem_alloc(struct net_device *dev, struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_channels *chs = &priv->channels; struct mlx5e_params params = chs->params; - struct mlx5_core_dev *mdev; int err; mutex_lock(&priv->state_lock); @@ -5590,11 +5588,6 @@ static int mlx5e_queue_mem_alloc(struct net_device *dev, goto unlock; } - mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, queue_index); - err = mlx5e_build_channel_param(mdev, ¶ms, &new->cparam); - if (err) - goto unlock; - err = mlx5e_open_channel(priv, queue_index, ¶ms, NULL, &new->c); unlock: mutex_unlock(&priv->state_lock); From dff1c3164a69284ac9fedb1c25d4c008139e9fb8 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 23 Feb 2026 22:41:48 +0200 Subject: [PATCH 0060/1561] net/mlx5e: SHAMPO, Always calculate page size Adapt the rx path in SHAMPO mode to calculate page size based on configured page_shift when dealing with payload data. This is necessary as an upcoming patch will add support for using different page sizes. This change has no functional changes. Signed-off-by: Dragos Tatulea Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260223204155.1783580-9-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 34 ++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index efcfcddab376..fc95ea00666b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1847,11 +1847,14 @@ mlx5e_shampo_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, struct mlx5e_frag_page *frag_page, u32 data_bcnt, u32 data_offset) { + u32 page_size = BIT(rq->mpwqe.page_shift); + net_prefetchw(skb->data); do { /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ - u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - data_offset, data_bcnt); + u32 pg_consumed_bytes = min_t(u32, page_size - data_offset, + data_bcnt); unsigned int truesize = pg_consumed_bytes; mlx5e_add_skb_frag(rq, skb, frag_page, data_offset, @@ -1872,6 +1875,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt); struct mlx5e_frag_page *head_page = frag_page; struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf; + u32 page_size = BIT(rq->mpwqe.page_shift); u32 frag_offset = head_offset; u32 byte_cnt = cqe_bcnt; struct skb_shared_info *sinfo; @@ -1926,9 +1930,9 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w linear_hr = skb_headroom(skb); linear_data_len = headlen; linear_frame_sz = MLX5_SKB_FRAG_SZ(skb_end_offset(skb)); - if (unlikely(frag_offset >= PAGE_SIZE)) { + if (unlikely(frag_offset >= page_size)) { frag_page++; - frag_offset -= PAGE_SIZE; + frag_offset -= page_size; } } @@ -1940,7 +1944,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w while (byte_cnt) { /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ pg_consumed_bytes = - min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); + min_t(u32, page_size - frag_offset, byte_cnt); if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) truesize += pg_consumed_bytes; @@ -1978,7 +1982,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w nr_frags_free = old_nr_frags - sinfo->nr_frags; if (unlikely(nr_frags_free)) { frag_page -= nr_frags_free; - truesize -= (nr_frags_free - 1) * PAGE_SIZE + + truesize -= (nr_frags_free - 1) * page_size + ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz)); } @@ -2166,15 +2170,16 @@ mlx5e_shampo_flush_skb(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match) rq->hw_gro_data->skb = NULL; } -static bool -mlx5e_hw_gro_skb_has_enough_space(struct sk_buff *skb, u16 data_bcnt) +static bool mlx5e_hw_gro_skb_has_enough_space(struct sk_buff *skb, + u16 data_bcnt, + u32 page_size) { int nr_frags = skb_shinfo(skb)->nr_frags; - if (PAGE_SIZE >= GRO_LEGACY_MAX_SIZE) + if (page_size >= GRO_LEGACY_MAX_SIZE) return skb->len + data_bcnt <= GRO_LEGACY_MAX_SIZE; else - return PAGE_SIZE * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE; + return page_size * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE; } static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) @@ -2183,18 +2188,19 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe); u32 wqe_offset = be32_to_cpu(cqe->shampo.data_offset); u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); - u32 data_offset = wqe_offset & (PAGE_SIZE - 1); u32 cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); u16 wqe_id = be16_to_cpu(cqe->wqe_id); - u32 page_idx = wqe_offset >> PAGE_SHIFT; u16 head_size = cqe->shampo.header_size; struct sk_buff **skb = &rq->hw_gro_data->skb; bool flush = cqe->shampo.flush; bool match = cqe->shampo.match; + u32 page_size = BIT(rq->mpwqe.page_shift); struct mlx5e_rq_stats *stats = rq->stats; struct mlx5e_rx_wqe_ll *wqe; struct mlx5e_mpw_info *wi; struct mlx5_wq_ll *wq; + u32 data_offset; + u32 page_idx; wi = mlx5e_get_mpw_info(rq, wqe_id); wi->consumed_strides += cstrides; @@ -2210,7 +2216,11 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq goto mpwrq_cqe_out; } - if (*skb && (!match || !(mlx5e_hw_gro_skb_has_enough_space(*skb, data_bcnt)))) { + data_offset = wqe_offset & (page_size - 1); + page_idx = wqe_offset >> rq->mpwqe.page_shift; + if (*skb && + !(match && mlx5e_hw_gro_skb_has_enough_space(*skb, data_bcnt, + page_size))) { match = false; mlx5e_shampo_flush_skb(rq, cqe, match); } From 3a145cf492a3a154afb288cd460adf6721614eab Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 23 Feb 2026 22:41:49 +0200 Subject: [PATCH 0061/1561] net/mlx5e: Set page_pool order based on calculated page_shift Instead of unconditionally setting the page_pool to 0, calculate it from page_shift for MPWQE case. Signed-off-by: Dragos Tatulea Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260223204155.1783580-10-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index aca88fed2ac7..6344dbb6335e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -857,6 +857,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, { void *rqc_wq = MLX5_ADDR_OF(rqc, rq_param->rqc, wq); struct mlx5_core_dev *mdev = rq->mdev; + u32 pool_order = 0; u32 pool_size; int wq_sz; int err; @@ -905,6 +906,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, pool_size = rq->mpwqe.pages_per_wqe << mlx5e_mpwqe_get_log_rq_size(mdev, params, rqo); + pool_order = rq->mpwqe.page_shift - PAGE_SHIFT; if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, rqo) && params->xdp_prog) @@ -960,7 +962,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, /* Create a page_pool and register it with rxq */ struct page_pool_params pp_params = { 0 }; - pp_params.order = 0; + pp_params.order = pool_order; pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; pp_params.pool_size = pool_size; pp_params.nid = node; @@ -968,7 +970,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, pp_params.napi = rq->cq.napi; pp_params.netdev = rq->netdev; pp_params.dma_dir = rq->buff.map_dir; - pp_params.max_len = PAGE_SIZE; + pp_params.max_len = BIT(PAGE_SHIFT + pool_order); pp_params.queue_idx = rq->ix; /* Shampo header data split allow for unreadable netmem */ From 0285cc3dac1b4ceb3dacdfce43627d41d649cf47 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 23 Feb 2026 22:41:50 +0200 Subject: [PATCH 0062/1561] net/mlx5e: Alloc rq drop page based on calculated page_shift An upcoming patch will allow setting the page order for RX pages to be greater than 0. Make sure that the drop page will also be allocated with the right size when that happens. Take extra care when calculating the drop page size to account for page_shift < PAGE_SHIFT which can happen for xsk. Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260223204155.1783580-11-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6344dbb6335e..2d3d89707246 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -636,14 +636,18 @@ static void mlx5e_rq_timeout_work(struct work_struct *timeout_work) static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq) { - rq->wqe_overflow.page = alloc_page(GFP_KERNEL); + /* xsk can have page_shift < PAGE_SHIFT */ + u16 page_order = max_t(s16, rq->mpwqe.page_shift - PAGE_SHIFT, 0); + u32 page_size = BIT(PAGE_SHIFT + page_order); + + rq->wqe_overflow.page = alloc_pages(GFP_KERNEL, page_order); if (!rq->wqe_overflow.page) return -ENOMEM; rq->wqe_overflow.addr = dma_map_page(rq->pdev, rq->wqe_overflow.page, 0, - PAGE_SIZE, rq->buff.map_dir); + page_size, rq->buff.map_dir); if (dma_mapping_error(rq->pdev, rq->wqe_overflow.addr)) { - __free_page(rq->wqe_overflow.page); + __free_pages(rq->wqe_overflow.page, page_order); return -ENOMEM; } return 0; @@ -651,9 +655,12 @@ static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq) static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq) { - dma_unmap_page(rq->pdev, rq->wqe_overflow.addr, PAGE_SIZE, - rq->buff.map_dir); - __free_page(rq->wqe_overflow.page); + u16 page_order = max_t(s16, rq->mpwqe.page_shift - PAGE_SHIFT, 0); + u32 page_size = BIT(PAGE_SHIFT + page_order); + + dma_unmap_page(rq->pdev, rq->wqe_overflow.addr, page_size, + rq->buff.map_dir); + __free_pages(rq->wqe_overflow.page, page_order); } static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params, @@ -884,15 +891,15 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, if (err) goto err_rq_xdp_prog; - err = mlx5e_alloc_mpwqe_rq_drop_page(rq); - if (err) - goto err_rq_wq_destroy; - rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR]; wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); rq->mpwqe.page_shift = mlx5e_mpwrq_page_shift(mdev, rqo); + err = mlx5e_alloc_mpwqe_rq_drop_page(rq); + if (err) + goto err_rq_wq_destroy; + rq->mpwqe.umr_mode = mlx5e_mpwrq_umr_mode(mdev, rqo); rq->mpwqe.pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, rq->mpwqe.page_shift, From 8611660778bf5db9f5f063c9bd58d41012801cb8 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 23 Feb 2026 22:41:51 +0200 Subject: [PATCH 0063/1561] net/mlx5e: RX, Make page frag bias more robust The formula uses the system page size but does not account for high order pages. One way to fix this would be to adapt the formula to take into account the pool order. This would require calculating it for every allocation or adding an additional rq struct member to hold the bias max. However, the above is not really needed as the driver doesn't check the bias value. It has other means to calculate the expected number of fragments based on context. This patch simply sets the value to the max possible value. A sanity check is added during queue init phase to avoid having really big pages from using more fragments than the type can fit. Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260223204155.1783580-12-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 ++++++ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2 -- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 5181d6ab39ae..c7ac6ebe8290 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -80,6 +80,7 @@ struct page_pool; #define MLX5_SKB_FRAG_SZ(len) (SKB_DATA_ALIGN(len) + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) +#define MLX5E_PAGECNT_BIAS_MAX U16_MAX #define MLX5E_RX_MAX_HEAD (256) #define MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE (8) #define MLX5E_SHAMPO_WQ_HEADER_PER_PAGE \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2d3d89707246..cf977273f753 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -969,6 +969,12 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, /* Create a page_pool and register it with rxq */ struct page_pool_params pp_params = { 0 }; + if (WARN_ON(BIT(PAGE_SHIFT + pool_order) / 64 > + MLX5E_PAGECNT_BIAS_MAX)) { + err = -E2BIG; + goto err_free_by_rq_type; + } + pp_params.order = pool_order; pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; pp_params.pool_size = pool_size; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index fc95ea00666b..8fb57a4f36dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -272,8 +272,6 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem); } -#define MLX5E_PAGECNT_BIAS_MAX (PAGE_SIZE / 64) - static int mlx5e_page_alloc_fragmented(struct page_pool *pp, struct mlx5e_frag_page *frag_page) { From 0fa8c93357601cbfb7006a420fd3b63f4a06af11 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 23 Feb 2026 22:41:52 +0200 Subject: [PATCH 0064/1561] net/mlx5e: Add queue config ops for page size For now allow only PAGE_SIZE. A subsequent patch will add support for high order pages in zero-copy mode. Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260223204155.1783580-13-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index cf977273f753..336e384c143a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5572,6 +5572,22 @@ struct mlx5_qmgmt_data { struct mlx5e_channel *c; }; +static void mlx5e_queue_default_qcfg(struct net_device *dev, + struct netdev_queue_config *qcfg) +{ + qcfg->rx_page_size = PAGE_SIZE; +} + +static int mlx5e_queue_validate_qcfg(struct net_device *dev, + struct netdev_queue_config *qcfg, + struct netlink_ext_ack *extack) +{ + if (qcfg->rx_page_size != PAGE_SIZE) + return -EINVAL; + + return 0; +} + static int mlx5e_queue_mem_alloc(struct net_device *dev, struct netdev_queue_config *qcfg, void *newq, int queue_index) @@ -5682,6 +5698,9 @@ static const struct netdev_queue_mgmt_ops mlx5e_queue_mgmt_ops = { .ndo_queue_start = mlx5e_queue_start, .ndo_queue_stop = mlx5e_queue_stop, .ndo_queue_get_dma_dev = mlx5e_queue_get_dma_dev, + .ndo_default_qcfg = mlx5e_queue_default_qcfg, + .ndo_validate_qcfg = mlx5e_queue_validate_qcfg, + .supported_params = QCFG_RX_PAGE_SIZE, }; static void mlx5e_build_nic_netdev(struct net_device *netdev) From 585cfa99d3578db997423e79c9c99d4e5e428271 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 23 Feb 2026 22:41:53 +0200 Subject: [PATCH 0065/1561] net/mlx5e: Pass netdev queue config to param calculations If set, take rx_page_size into consideration when calculating the page shift in Multi Packet WQE mode. The queue config is saved in the mlx5e_rq_opt_param struct which is added to the mlx5e_channel_param struct. Now the configuration can be read from the struct instead of adding it as an argument to all call sites. For consistency, the queue config is assigned in mlx5e_build_channel_param(). The queue configuration is read only from queue management ops as that's the only place where it is currently useful. Furthermore, netdev_queue_config() expects netdev->queue_mgmt_ops to be set which is not always the case (representor netdevs). Signed-off-by: Dragos Tatulea Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260223204155.1783580-14-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- .../ethernet/mellanox/mlx5/core/en/params.c | 14 ++++++++++++-- .../ethernet/mellanox/mlx5/core/en/params.h | 2 ++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 19 ++++++++++++------- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 97f5d1c2adea..304b46ecc8df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -10,6 +10,7 @@ #include #include #include +#include #define MLX5_MPWRQ_MAX_LOG_WQE_SZ 18 #define MLX5_REP_MPWRQ_MAX_LOG_WQE_SZ 17 @@ -24,11 +25,17 @@ static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev) u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_rq_opt_param *rqo) { + struct netdev_queue_config *qcfg = rqo ? rqo->qcfg : NULL; struct mlx5e_xsk_param *xsk = mlx5e_rqo_xsk_param(rqo); u8 min_page_shift = mlx5e_mpwrq_min_page_shift(mdev); u8 req_page_shift; - req_page_shift = xsk ? order_base_2(xsk->chunk_size) : PAGE_SHIFT; + if (xsk) + req_page_shift = order_base_2(xsk->chunk_size); + else if (qcfg && qcfg->rx_page_size) + req_page_shift = order_base_2(qcfg->rx_page_size); + else + req_page_shift = PAGE_SHIFT; /* Regular RQ uses order-0 pages, the NIC must be able to map them. */ if (WARN_ON_ONCE(!xsk && req_page_shift < min_page_shift)) @@ -1283,12 +1290,15 @@ void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, + struct netdev_queue_config *qcfg, struct mlx5e_channel_param *cparam) { u8 icosq_log_wq_sz, async_icosq_log_wq_sz; int err; - err = mlx5e_build_rq_param(mdev, params, NULL, &cparam->rq); + cparam->rq_opt.qcfg = qcfg; + + err = mlx5e_build_rq_param(mdev, params, &cparam->rq_opt, &cparam->rq); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 4bce769d48ed..5b6d528bce9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -14,6 +14,7 @@ struct mlx5e_xsk_param { struct mlx5e_rq_opt_param { struct mlx5e_xsk_param *xsk; + struct netdev_queue_config *qcfg; }; struct mlx5e_cq_param { @@ -143,6 +144,7 @@ void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, struct mlx5e_sq_param *param); int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, + struct netdev_queue_config *qcfg, struct mlx5e_channel_param *cparam); void mlx5e_build_xsk_channel_param(struct mlx5_core_dev *mdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 336e384c143a..59e38e7e067e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2524,8 +2524,10 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } -static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_rq_param *rq_param) +static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_rq_param *rq_param, + struct mlx5e_rq_opt_param *rqo) { u16 q_counter = c->priv->q_counter[c->sd_ix]; int err; @@ -2534,7 +2536,7 @@ static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param if (err) return err; - return mlx5e_open_rq(params, rq_param, NULL, cpu_to_node(c->cpu), + return mlx5e_open_rq(params, rq_param, rqo, cpu_to_node(c->cpu), q_counter, &c->rq); } @@ -2638,7 +2640,7 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, if (err) goto err_close_icosq; - err = mlx5e_open_rxq_rq(c, params, &cparam->rq); + err = mlx5e_open_rxq_rq(c, params, &cparam->rq, &cparam->rq_opt); if (err) goto err_close_sqs; @@ -2783,6 +2785,7 @@ static void mlx5e_channel_pick_doorbell(struct mlx5e_channel *c) static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_params *params, + struct netdev_queue_config *qcfg, struct xsk_buff_pool *xsk_pool, struct mlx5e_channel **cp) { @@ -2816,7 +2819,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, goto err_free; } - err = mlx5e_build_channel_param(mdev, params, cparam); + err = mlx5e_build_channel_param(mdev, params, qcfg, cparam); if (err) goto err_free; @@ -2941,7 +2944,8 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, if (chs->params.xdp_prog) xsk_pool = mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, i); - err = mlx5e_open_channel(priv, i, &chs->params, xsk_pool, &chs->c[i]); + err = mlx5e_open_channel(priv, i, &chs->params, NULL, + xsk_pool, &chs->c[i]); if (err) goto err_close_channels; } @@ -5619,7 +5623,8 @@ static int mlx5e_queue_mem_alloc(struct net_device *dev, goto unlock; } - err = mlx5e_open_channel(priv, queue_index, ¶ms, NULL, &new->c); + err = mlx5e_open_channel(priv, queue_index, ¶ms, qcfg, NULL, + &new->c); unlock: mutex_unlock(&priv->state_lock); return err; From 5b6e0ddb368625f37b66f6f3efba88e5bc93618e Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 23 Feb 2026 22:41:54 +0200 Subject: [PATCH 0066/1561] net/mlx5e: Add param helper to calculate max page size This function will be necessary to determine the upper limit of rx-page-size. Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260223204155.1783580-15-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 12 ++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en/params.h | 1 + 2 files changed, 13 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 304b46ecc8df..26bb31c56e45 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -501,6 +501,18 @@ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, return 0; } +u32 mlx5e_mpwrq_max_page_size(struct mlx5_core_dev *mdev) +{ + if (mlx5_core_is_ecpf(mdev)) + return PAGE_SIZE; + + /* Two MTTs are needed to form an octword. Driver is using a + * single page per MTT for simplicity. Hence the limit of having + * at least 2 pages per WQE. + */ + return BIT(MLX5_MPWRQ_MAX_LOG_WQE_SZ - 1); +} + u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { bool is_mpwqe = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 5b6d528bce9b..9b1a2aed17c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -121,6 +121,7 @@ u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz); u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_rq_opt_param *rqo); +u32 mlx5e_mpwrq_max_page_size(struct mlx5_core_dev *mdev); /* Build queue parameters */ From df5135fced85bf3c3dfe174f1cf853cdf39312e5 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 23 Feb 2026 22:41:55 +0200 Subject: [PATCH 0067/1561] net/mlx5e: SHAMPO, Allow high order pages in zerocopy mode Allow high order pages only when SHAMPO mode is enabled (hw-gro) and the queue is used for zerocopy (has memory provider ops set). The limit is 128K and it was chosen for the following reasons: - 256K size requires a special case during MTT calculation to split the page in two. That's because two MTTs are needed to form an octword. - Higher sizes require increasing WQE size and/or reducing the number of WQEs. - Having the RQ lined with too few large pages can lead to refill issues. Results show an increase in BW and a decrease in CPU usage. The benchmark was done with the zcrx samples from liburing [0]. rx_buf_len=4K, oncpu [1]: packets=3358832 (MB=820027), rps=55794 (MB/s=13621) Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle Average: 9 1.56 0.00 18.09 13.42 0.00 66.80 0.00 0.00 0.00 0.12 rx_buf_len=128K, oncpu [2]: packets=3781376 (MB=923187), rps=62813 (MB/s=15335) Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle Average: 9 0.33 0.00 7.61 18.86 0.00 73.08 0.00 0.00 0.00 0.12 rx_buf_len=4K, offcpu [3]: packets=3460368 (MB=844816), rps=57481 (MB/s=14033) Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle Average: 9 0.00 0.00 0.26 0.00 0.00 92.63 0.00 0.00 0.00 7.11 Average: 11 3.04 0.00 68.09 28.87 0.00 0.00 0.00 0.00 0.00 0.00 rx_buf_len=128K, offcpu [4]: packets=4119840 (MB=1005820), rps=68435 (MB/s=16707) Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle Average: 9 0.00 0.00 0.87 0.00 0.00 63.77 0.00 0.00 0.00 35.36 Average: 11 1.96 0.00 43.68 54.37 0.00 0.00 0.00 0.00 0.00 0.00 [0] https://github.com/isilence/liburing/tree/zcrx/rx-buf-len [1] commands: $> taskset -c 9 ./zcrx 6 -i eth2 -q 9 -A 1 -B 4096 -S 33554432 $> ./send-zerocopy tcp -6 -D 2001:db8::1 -t 60 -C 0 -l 1 -b 1 -n 1 -z 1 -d -s 256000 [2] commands: $> taskset -c 9 ./zcrx 6 -i eth2 -q 9 -A 1 -B 131072 -S 33554432 $> ./send-zerocopy tcp -6 -D 2001:db8::1 -t 60 -C 0 -l 1 -b 1 -n 1 -z 1 -d -s 256000 [3] commands: $> taskset -c 11 ./zcrx 6 -i eth2 -q 9 -A 1 -B 4096 -S 33554432 $> ./send-zerocopy tcp -6 -D 2001:db8::1 -t 60 -C 0 -l 1 -b 1 -n 1 -z 1 -d -s 256000 [4] commands: $> taskset -c 11 ./zcrx 6 -i eth2 -q 9 -A 1 -B 131072 -S 33554432 $> ./send-zerocopy tcp -6 -D 2001:db8::1 -t 60 -C 0 -l 1 -b 1 -n 1 -z 1 -d -s 256000 Signed-off-by: Dragos Tatulea Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260223204155.1783580-16-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 59e38e7e067e..67dc38981101 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5586,12 +5586,40 @@ static int mlx5e_queue_validate_qcfg(struct net_device *dev, struct netdev_queue_config *qcfg, struct netlink_ext_ack *extack) { - if (qcfg->rx_page_size != PAGE_SIZE) + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + u32 max; + + if (!is_power_of_2(qcfg->rx_page_size)) { + netdev_err(priv->netdev, "rx_page_size not power of 2: %u", + qcfg->rx_page_size); return -EINVAL; + } + + max = mlx5e_mpwrq_max_page_size(mdev); + if (qcfg->rx_page_size < PAGE_SIZE || qcfg->rx_page_size > max) { + netdev_err(priv->netdev, + "Selected rx_page_size %u not in supported range [%lu, %u]\n", + qcfg->rx_page_size, PAGE_SIZE, max); + return -ERANGE; + } return 0; } +static bool mlx5e_queue_validate_page_size(struct net_device *dev, + struct netdev_queue_config *qcfg, + int queue_index) +{ + if (qcfg->rx_page_size == PAGE_SIZE) + return true; + + if (!netif_rxq_has_unreadable_mp(dev, queue_index)) + return false; + + return true; +} + static int mlx5e_queue_mem_alloc(struct net_device *dev, struct netdev_queue_config *qcfg, void *newq, int queue_index) @@ -5623,6 +5651,12 @@ static int mlx5e_queue_mem_alloc(struct net_device *dev, goto unlock; } + if (!mlx5e_queue_validate_page_size(dev, qcfg, queue_index)) { + netdev_err(priv->netdev, "High order pages are supported only in Zero-Copy mode\n"); + err = -EINVAL; + goto unlock; + } + err = mlx5e_open_channel(priv, queue_index, ¶ms, qcfg, NULL, &new->c); unlock: From 15c9ed1d8286dc0297f01347dc74f5a8cbc173de Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Tue, 24 Feb 2026 09:50:52 +0800 Subject: [PATCH 0068/1561] pppoe: remove kernel-mode relay support The kernel-mode PPPoE relay feature and its two associated ioctls (PPPOEIOCSFWD and PPPOEIOCDFWD) are not used by any existing userspace PPPoE implementations. The most commonly-used package, RP-PPPoE [1], handles the relaying entirely in userspace. This legacy code has remained in the driver since its introduction in kernel 2.3.99-pre7 for over two decades, but has served no practical purpose. Remove the unused relay code. [1] https://dianne.skoll.ca/projects/rp-pppoe/ Signed-off-by: Qingfang Deng Acked-by: Arnd Bergmann Reviewed-by: Guillaume Nault Link: https://patch.msgid.link/20260224015053.42472-1-dqfext@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ppp/pppoe.c | 79 ----------------------------------- drivers/net/ppp/pppox.c | 3 -- include/linux/if_pppox.h | 6 --- include/uapi/linux/if_pppox.h | 10 ----- 4 files changed, 98 deletions(-) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 7900cc3212a5..1ac61c273b28 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -237,25 +237,6 @@ static inline struct pppox_sock *get_item(struct pppoe_net *pn, __be16 sid, return po; } -static inline struct pppox_sock *__get_item_by_addr(struct net *net, - struct sockaddr_pppox *sp) -{ - struct net_device *dev; - struct pppoe_net *pn; - struct pppox_sock *pppox_sock = NULL; - - int ifindex; - - dev = dev_get_by_name_rcu(net, sp->sa_addr.pppoe.dev); - if (dev) { - ifindex = dev->ifindex; - pn = pppoe_pernet(net); - pppox_sock = __get_item(pn, sp->sa_addr.pppoe.sid, - sp->sa_addr.pppoe.remote, ifindex); - } - return pppox_sock; -} - static inline void delete_item(struct pppoe_net *pn, __be16 sid, char *addr, int ifindex) { @@ -369,7 +350,6 @@ static struct notifier_block pppoe_notifier = { static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb) { struct pppox_sock *po = pppox_sk(sk); - struct pppox_sock *relay_po; /* Backlog receive. Semantics of backlog rcv preclude any code from * executing in lock_sock()/release_sock() bounds; meaning sk->sk_state @@ -378,17 +358,6 @@ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb) if (sk->sk_state & PPPOX_BOUND) { ppp_input(&po->chan, skb); - } else if (sk->sk_state & PPPOX_RELAY) { - relay_po = __get_item_by_addr(sock_net(sk), - &po->pppoe_relay); - if (relay_po == NULL) - goto abort_kfree; - - if ((sk_pppox(relay_po)->sk_state & PPPOX_CONNECTED) == 0) - goto abort_kfree; - - if (!__pppoe_xmit(sk_pppox(relay_po), skb)) - goto abort_kfree; } else { if (sock_queue_rcv_skb(sk, skb)) goto abort_kfree; @@ -656,7 +625,6 @@ static int pppoe_connect(struct socket *sock, struct sockaddr_unsized *uservaddr po->pppoe_ifindex = 0; memset(&po->pppoe_pa, 0, sizeof(po->pppoe_pa)); - memset(&po->pppoe_relay, 0, sizeof(po->pppoe_relay)); memset(&po->chan, 0, sizeof(po->chan)); po->next = NULL; po->num = 0; @@ -783,53 +751,6 @@ static int pppoe_ioctl(struct socket *sock, unsigned int cmd, err = 0; break; - case PPPOEIOCSFWD: - { - struct pppox_sock *relay_po; - - err = -EBUSY; - if (sk->sk_state & (PPPOX_BOUND | PPPOX_DEAD)) - break; - - err = -ENOTCONN; - if (!(sk->sk_state & PPPOX_CONNECTED)) - break; - - /* PPPoE address from the user specifies an outbound - PPPoE address which frames are forwarded to */ - err = -EFAULT; - if (copy_from_user(&po->pppoe_relay, - (void __user *)arg, - sizeof(struct sockaddr_pppox))) - break; - - err = -EINVAL; - if (po->pppoe_relay.sa_family != AF_PPPOX || - po->pppoe_relay.sa_protocol != PX_PROTO_OE) - break; - - /* Check that the socket referenced by the address - actually exists. */ - rcu_read_lock(); - relay_po = __get_item_by_addr(sock_net(sk), &po->pppoe_relay); - rcu_read_unlock(); - if (!relay_po) - break; - - sk->sk_state |= PPPOX_RELAY; - err = 0; - break; - } - - case PPPOEIOCDFWD: - err = -EALREADY; - if (!(sk->sk_state & PPPOX_RELAY)) - break; - - sk->sk_state &= ~PPPOX_RELAY; - err = 0; - break; - default: err = -ENOTTY; } diff --git a/drivers/net/ppp/pppox.c b/drivers/net/ppp/pppox.c index 08364f10a43f..5861a2f6ce3e 100644 --- a/drivers/net/ppp/pppox.c +++ b/drivers/net/ppp/pppox.c @@ -102,9 +102,6 @@ EXPORT_SYMBOL(pppox_ioctl); #ifdef CONFIG_COMPAT int pppox_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { - if (cmd == PPPOEIOCSFWD32) - cmd = PPPOEIOCSFWD; - return pppox_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); } diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index db45d6f1c4f4..8bbf676c2a85 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -25,8 +25,6 @@ struct pppoe_opt { struct net_device *dev; /* device associated with socket*/ int ifindex; /* ifindex of device associated with socket */ struct pppoe_addr pa; /* what this socket is bound to*/ - struct sockaddr_pppox relay; /* what socket data will be - relayed to (PPPoE relaying) */ struct work_struct padt_work;/* Work item for handling PADT */ }; @@ -53,7 +51,6 @@ struct pppox_sock { #define pppoe_dev proto.pppoe.dev #define pppoe_ifindex proto.pppoe.ifindex #define pppoe_pa proto.pppoe.pa -#define pppoe_relay proto.pppoe.relay static inline struct pppox_sock *pppox_sk(struct sock *sk) { @@ -80,14 +77,11 @@ extern void pppox_unbind_sock(struct sock *sk);/* delete ppp-channel binding */ extern int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); extern int pppox_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); -#define PPPOEIOCSFWD32 _IOW(0xB1 ,0, compat_size_t) - /* PPPoX socket states */ enum { PPPOX_NONE = 0, /* initial state */ PPPOX_CONNECTED = 1, /* connection established ==TCP_ESTABLISHED */ PPPOX_BOUND = 2, /* bound to ppp device */ - PPPOX_RELAY = 4, /* forwarding is enabled */ PPPOX_DEAD = 16 /* dead, useless, please clean me up!*/ }; diff --git a/include/uapi/linux/if_pppox.h b/include/uapi/linux/if_pppox.h index 29b804aa7474..7ae044d71fb7 100644 --- a/include/uapi/linux/if_pppox.h +++ b/include/uapi/linux/if_pppox.h @@ -103,16 +103,6 @@ struct sockaddr_pppol2tpv3in6 { struct pppol2tpv3in6_addr pppol2tp; } __packed; -/********************************************************************* - * - * ioctl interface for defining forwarding of connections - * - ********************************************************************/ - -#define PPPOEIOCSFWD _IOW(0xB1 ,0, size_t) -#define PPPOEIOCDFWD _IO(0xB1 ,1) -/*#define PPPOEIOCGFWD _IOWR(0xB1,2, size_t)*/ - /* Codes to identify message types */ #define PADI_CODE 0x09 #define PADO_CODE 0x07 From 4916f2e2f3fc9aef289fcd07949301e5c29094c2 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 24 Feb 2026 02:02:14 +0000 Subject: [PATCH 0069/1561] bonding: print churn state via netlink Currently, the churn state is printed only in sysfs. Add netlink support so users could get the state via netlink. Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20260224020215.6012-1-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_netlink.c | 9 +++++++++ include/uapi/linux/if_link.h | 2 ++ 2 files changed, 11 insertions(+) diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index 286f11c517f7..ea1a80e658ae 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -29,6 +29,8 @@ static size_t bond_get_slave_size(const struct net_device *bond_dev, nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE */ nla_total_size(sizeof(s32)) + /* IFLA_BOND_SLAVE_PRIO */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_ACTOR_PORT_PRIO */ + nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_AD_CHURN_ACTOR_STATE */ + nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_AD_CHURN_PARTNER_STATE */ 0; } @@ -77,6 +79,13 @@ static int bond_fill_slave_info(struct sk_buff *skb, IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, ad_port->partner_oper.port_state)) goto nla_put_failure; + + if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_CHURN_ACTOR_STATE, + ad_port->sm_churn_actor_state)) + goto nla_put_failure; + if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_CHURN_PARTNER_STATE, + ad_port->sm_churn_partner_state)) + goto nla_put_failure; } if (nla_put_u16(skb, IFLA_BOND_SLAVE_ACTOR_PORT_PRIO, diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index e9b5f79e1ee1..83a96c56b8ca 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1568,6 +1568,8 @@ enum { IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, IFLA_BOND_SLAVE_PRIO, IFLA_BOND_SLAVE_ACTOR_PORT_PRIO, + IFLA_BOND_SLAVE_AD_CHURN_ACTOR_STATE, + IFLA_BOND_SLAVE_AD_CHURN_PARTNER_STATE, __IFLA_BOND_SLAVE_MAX, }; From a8911fbeff8bee4fe3376c5044b64fbf3cceb78e Mon Sep 17 00:00:00 2001 From: Alejandro Colomar Date: Sun, 21 Dec 2025 23:25:54 +0100 Subject: [PATCH 0070/1561] wifi: ath9k: Fix typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This only worked by chance, because all callers of this macro used the same identifiers that were expected by the macro. $ grep -rn ath_for_each_chanctx drivers/net/wireless/ath/ath9k/main.c:1576: ath_for_each_chanctx(sc, ctx) drivers/net/wireless/ath/ath9k/main.c:2554: ath_for_each_chanctx(sc, ctx) { drivers/net/wireless/ath/ath9k/channel.c:165: ath_for_each_chanctx(sc, ctx) { drivers/net/wireless/ath/ath9k/channel.c:291: ath_for_each_chanctx(sc, ctx) { drivers/net/wireless/ath/ath9k/channel.c:861: ath_for_each_chanctx(sc, ctx) { drivers/net/wireless/ath/ath9k/debug.c:717: ath_for_each_chanctx(sc, ctx) { drivers/net/wireless/ath/ath9k/ath9k.h:446:#define ath_for_each_chanctx(_sc, _ctx) \ Cc: Rajkumar Manoharan Cc: John W. Linville Cc: Toke Høiland-Jørgensen Cc: # the code worked by chance Fixes: c4dc0d040e35 ("ath9k: Fetch appropriate operating channel context") Signed-off-by: Alejandro Colomar Acked-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/6ab107cf786f9d05dc4d84ea4e2d1b219ce108c0.1766355822.git.alx@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath9k/ath9k.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h index 6e38aa7351e3..e8635bf81f9d 100644 --- a/drivers/net/wireless/ath/ath9k/ath9k.h +++ b/drivers/net/wireless/ath/ath9k/ath9k.h @@ -443,7 +443,7 @@ ath_node_to_tid(struct ath_node *an, u8 tidno) #define case_rtn_string(val) case val: return #val -#define ath_for_each_chanctx(_sc, _ctx) \ +#define ath_for_each_chanctx(sc, ctx) \ for (ctx = &sc->chanctx[0]; \ ctx <= &sc->chanctx[ARRAY_SIZE(sc->chanctx) - 1]; \ ctx++) From 0ef4738f0d38f103e525ffb12c112a935c2cf011 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Mon, 9 Feb 2026 10:52:19 +0800 Subject: [PATCH 0071/1561] wifi: ath12k: fix indentation in ath12k_qmi_aux_uc_load() Smatch complains: drivers/net/wireless/ath/ath12k/qmi.c:3342 ath12k_qmi_aux_uc_load() warn: inconsistent indenting Fix it. Compile tested only. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202602061221.5SCuwKhy-lkp@intel.com/ Signed-off-by: Baochen Qiang Link: https://patch.msgid.link/20260209-ath12k-fix-indention-v1-1-6b87ae99745e@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/qmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/qmi.c b/drivers/net/wireless/ath/ath12k/qmi.c index cfde4147c8fc..41f53d34d33e 100644 --- a/drivers/net/wireless/ath/ath12k/qmi.c +++ b/drivers/net/wireless/ath/ath12k/qmi.c @@ -3339,7 +3339,7 @@ static int ath12k_qmi_aux_uc_load(struct ath12k_base *ab) goto out; } - aux_uc_mem->total_size = aux_uc_len; + aux_uc_mem->total_size = aux_uc_len; copy: memcpy(aux_uc_mem->vaddr, aux_uc_data, aux_uc_len); From 151322bccdbdb132f5a73cc8ad5d3ab89b90ed52 Mon Sep 17 00:00:00 2001 From: Maharaja Kennadyrajan Date: Mon, 23 Feb 2026 18:56:22 +0530 Subject: [PATCH 0072/1561] wifi: ath12k: add basic hwmon temperature reporting Add initial thermal support by wiring up a per-radio (pdev) hwmon temperature sensor backed by the existing WMI pdev temperature command and event. When userspace reads the sysfs file temp1_input, the driver sends WMI_PDEV_GET_TEMPERATURE_CMDID (tag WMI_TAG_PDEV_GET_TEMPERATURE_CMD) and waits for the corresponding WMI_PDEV_TEMPERATURE_EVENTID (tag WMI_TAG_PDEV_TEMPERATURE_EVENT) to get the temperature and pdev_id. Export the reported value in millidegrees Celsius as required by hwmon. The temperature reported is per-radio (pdev). In a multi-radio wiphy under a single phy, a separate hwmon device is created for each radio. Sample command and output: $ cat /sys/devices/pci0000:00/.../ieee80211/phyX/hwmonY/temp1_input $ 50000 Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.6-01243-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00302-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.115823.3 Co-developed-by: Aishwarya R Signed-off-by: Aishwarya R Signed-off-by: Maharaja Kennadyrajan Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20260223132622.43464-1-maharaja.kennadyrajan@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/Makefile | 1 + drivers/net/wireless/ath/ath12k/core.c | 13 +++ drivers/net/wireless/ath/ath12k/core.h | 3 + drivers/net/wireless/ath/ath12k/mac.c | 4 + drivers/net/wireless/ath/ath12k/thermal.c | 124 ++++++++++++++++++++++ drivers/net/wireless/ath/ath12k/thermal.h | 40 +++++++ drivers/net/wireless/ath/ath12k/wmi.c | 57 +++++----- 7 files changed, 211 insertions(+), 31 deletions(-) create mode 100644 drivers/net/wireless/ath/ath12k/thermal.c create mode 100644 drivers/net/wireless/ath/ath12k/thermal.h diff --git a/drivers/net/wireless/ath/ath12k/Makefile b/drivers/net/wireless/ath/ath12k/Makefile index 3ba1236956cc..3b39b2c33307 100644 --- a/drivers/net/wireless/ath/ath12k/Makefile +++ b/drivers/net/wireless/ath/ath12k/Makefile @@ -32,6 +32,7 @@ ath12k-$(CONFIG_ATH12K_TRACING) += trace.o ath12k-$(CONFIG_PM) += wow.o ath12k-$(CONFIG_ATH12K_COREDUMP) += coredump.o ath12k-$(CONFIG_NL80211_TESTMODE) += testmode.o +ath12k-$(CONFIG_THERMAL) += thermal.o # for tracing framework to find trace.h CFLAGS_trace.o := -I$(src) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 9d6c50a94e64..9dca1a0af73e 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -863,11 +863,22 @@ static int ath12k_core_pdev_create(struct ath12k_base *ab) return ret; } + ret = ath12k_thermal_register(ab); + if (ret) { + ath12k_err(ab, "could not register thermal device: %d\n", ret); + goto err_dp_pdev_free; + } + return 0; + +err_dp_pdev_free: + ath12k_dp_pdev_free(ab); + return ret; } static void ath12k_core_pdev_destroy(struct ath12k_base *ab) { + ath12k_thermal_unregister(ab); ath12k_dp_pdev_free(ab); } @@ -1361,6 +1372,7 @@ static int ath12k_core_reconfigure_on_crash(struct ath12k_base *ab) mutex_lock(&ab->core_lock); ath12k_link_sta_rhash_tbl_destroy(ab); + ath12k_thermal_unregister(ab); ath12k_dp_pdev_free(ab); ath12k_ce_cleanup_pipes(ab); ath12k_wmi_detach(ab); @@ -1502,6 +1514,7 @@ static void ath12k_core_pre_reconfigure_recovery(struct ath12k_base *ab) complete(&ar->vdev_delete_done); complete(&ar->bss_survey_done); complete_all(&ar->regd_update_completed); + complete_all(&ar->thermal.wmi_sync); wake_up(&ar->dp.tx_empty_waitq); idr_for_each(&ar->txmgmt_idr, diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 990934ec92fc..760c76d6f0f4 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -36,6 +36,7 @@ #include "coredump.h" #include "cmn_defs.h" #include "dp_cmn.h" +#include "thermal.h" #define SM(_v, _f) (((_v) << _f##_LSB) & _f##_MASK) @@ -757,6 +758,8 @@ struct ath12k { s8 max_allowed_tx_power; struct ath12k_pdev_rssi_offsets rssi_info; + + struct ath12k_thermal thermal; }; struct ath12k_hw { diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 68431a0e128e..b9091f3f723f 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -14792,6 +14792,10 @@ static void ath12k_mac_setup(struct ath12k *ar) init_completion(&ar->mlo_setup_done); init_completion(&ar->completed_11d_scan); init_completion(&ar->regd_update_completed); + init_completion(&ar->thermal.wmi_sync); + + ar->thermal.temperature = 0; + ar->thermal.hwmon_dev = NULL; INIT_DELAYED_WORK(&ar->scan.timeout, ath12k_scan_timeout_work); wiphy_work_init(&ar->scan.vdev_clean_wk, ath12k_scan_vdev_clean_work); diff --git a/drivers/net/wireless/ath/ath12k/thermal.c b/drivers/net/wireless/ath/ath12k/thermal.c new file mode 100644 index 000000000000..a764d2112a3c --- /dev/null +++ b/drivers/net/wireless/ath/ath12k/thermal.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: BSD-3-Clause-Clear +/* + * Copyright (c) 2020 The Linux Foundation. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +#include +#include +#include +#include +#include +#include "core.h" +#include "debug.h" + +static ssize_t ath12k_thermal_temp_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ath12k *ar = dev_get_drvdata(dev); + unsigned long time_left; + int ret, temperature; + + guard(wiphy)(ath12k_ar_to_hw(ar)->wiphy); + + if (ar->ah->state != ATH12K_HW_STATE_ON) + return -ENETDOWN; + + reinit_completion(&ar->thermal.wmi_sync); + ret = ath12k_wmi_send_pdev_temperature_cmd(ar); + if (ret) { + ath12k_warn(ar->ab, "failed to read temperature %d\n", ret); + return ret; + } + + if (test_bit(ATH12K_FLAG_CRASH_FLUSH, &ar->ab->dev_flags)) + return -ESHUTDOWN; + + time_left = wait_for_completion_timeout(&ar->thermal.wmi_sync, + ATH12K_THERMAL_SYNC_TIMEOUT_HZ); + if (!time_left) { + ath12k_warn(ar->ab, "failed to synchronize thermal read\n"); + return -ETIMEDOUT; + } + + spin_lock_bh(&ar->data_lock); + temperature = ar->thermal.temperature; + spin_unlock_bh(&ar->data_lock); + + /* display in millidegree celsius */ + return sysfs_emit(buf, "%d\n", temperature * 1000); +} + +void ath12k_thermal_event_temperature(struct ath12k *ar, int temperature) +{ + spin_lock_bh(&ar->data_lock); + ar->thermal.temperature = temperature; + spin_unlock_bh(&ar->data_lock); + complete_all(&ar->thermal.wmi_sync); +} + +static SENSOR_DEVICE_ATTR_RO(temp1_input, ath12k_thermal_temp, 0); + +static struct attribute *ath12k_hwmon_attrs[] = { + &sensor_dev_attr_temp1_input.dev_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(ath12k_hwmon); + +int ath12k_thermal_register(struct ath12k_base *ab) +{ + struct ath12k *ar; + int i, j, ret; + + if (!IS_REACHABLE(CONFIG_HWMON)) + return 0; + + for (i = 0; i < ab->num_radios; i++) { + ar = ab->pdevs[i].ar; + if (!ar) + continue; + + ar->thermal.hwmon_dev = + hwmon_device_register_with_groups(&ar->ah->hw->wiphy->dev, + "ath12k_hwmon", ar, + ath12k_hwmon_groups); + if (IS_ERR(ar->thermal.hwmon_dev)) { + ret = PTR_ERR(ar->thermal.hwmon_dev); + ar->thermal.hwmon_dev = NULL; + ath12k_err(ar->ab, "failed to register hwmon device: %d\n", + ret); + for (j = i - 1; j >= 0; j--) { + ar = ab->pdevs[j].ar; + if (!ar) + continue; + + hwmon_device_unregister(ar->thermal.hwmon_dev); + ar->thermal.hwmon_dev = NULL; + } + return ret; + } + } + + return 0; +} + +void ath12k_thermal_unregister(struct ath12k_base *ab) +{ + struct ath12k *ar; + int i; + + if (!IS_REACHABLE(CONFIG_HWMON)) + return; + + for (i = 0; i < ab->num_radios; i++) { + ar = ab->pdevs[i].ar; + if (!ar) + continue; + + if (ar->thermal.hwmon_dev) { + hwmon_device_unregister(ar->thermal.hwmon_dev); + ar->thermal.hwmon_dev = NULL; + } + } +} diff --git a/drivers/net/wireless/ath/ath12k/thermal.h b/drivers/net/wireless/ath/ath12k/thermal.h new file mode 100644 index 000000000000..9d84056188e1 --- /dev/null +++ b/drivers/net/wireless/ath/ath12k/thermal.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: BSD-3-Clause-Clear */ +/* + * Copyright (c) 2020 The Linux Foundation. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +#ifndef _ATH12K_THERMAL_ +#define _ATH12K_THERMAL_ + +#define ATH12K_THERMAL_SYNC_TIMEOUT_HZ (5 * HZ) + +struct ath12k_thermal { + struct completion wmi_sync; + + /* temperature value in Celsius degree protected by data_lock. */ + int temperature; + struct device *hwmon_dev; +}; + +#if IS_REACHABLE(CONFIG_THERMAL) +int ath12k_thermal_register(struct ath12k_base *ab); +void ath12k_thermal_unregister(struct ath12k_base *ab); +void ath12k_thermal_event_temperature(struct ath12k *ar, int temperature); +#else +static inline int ath12k_thermal_register(struct ath12k_base *ab) +{ + return 0; +} + +static inline void ath12k_thermal_unregister(struct ath12k_base *ab) +{ +} + +static inline void ath12k_thermal_event_temperature(struct ath12k *ar, + int temperature) +{ +} + +#endif +#endif /* _ATH12K_THERMAL_ */ diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 7617fc3a2479..69f2dbcfca63 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -6778,31 +6778,6 @@ static int ath12k_pull_peer_assoc_conf_ev(struct ath12k_base *ab, struct sk_buff return 0; } -static int -ath12k_pull_pdev_temp_ev(struct ath12k_base *ab, struct sk_buff *skb, - const struct wmi_pdev_temperature_event *ev) -{ - const void **tb; - int ret; - - tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC); - if (IS_ERR(tb)) { - ret = PTR_ERR(tb); - ath12k_warn(ab, "failed to parse tlv: %d\n", ret); - return ret; - } - - ev = tb[WMI_TAG_PDEV_TEMPERATURE_EVENT]; - if (!ev) { - ath12k_warn(ab, "failed to fetch pdev temp ev"); - kfree(tb); - return -EPROTO; - } - - kfree(tb); - return 0; -} - static void ath12k_wmi_op_ep_tx_credits(struct ath12k_base *ab) { /* try to send pending beacons first. they take priority */ @@ -8811,25 +8786,45 @@ static void ath12k_wmi_pdev_temperature_event(struct ath12k_base *ab, struct sk_buff *skb) { + const struct wmi_pdev_temperature_event *ev; struct ath12k *ar; - struct wmi_pdev_temperature_event ev = {}; + const void **tb; + int temp; + u32 pdev_id; - if (ath12k_pull_pdev_temp_ev(ab, skb, &ev) != 0) { - ath12k_warn(ab, "failed to extract pdev temperature event"); + tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC); + if (IS_ERR(tb)) { + ath12k_warn(ab, "failed to parse tlv: %ld\n", PTR_ERR(tb)); return; } + ev = tb[WMI_TAG_PDEV_TEMPERATURE_EVENT]; + if (!ev) { + ath12k_warn(ab, "failed to fetch pdev temp ev\n"); + kfree(tb); + return; + } + + temp = a_sle32_to_cpu(ev->temp); + pdev_id = le32_to_cpu(ev->pdev_id); + + kfree(tb); + ath12k_dbg(ab, ATH12K_DBG_WMI, - "pdev temperature ev temp %d pdev_id %d\n", ev.temp, ev.pdev_id); + "pdev temperature ev temp %d pdev_id %u\n", + temp, pdev_id); rcu_read_lock(); - ar = ath12k_mac_get_ar_by_pdev_id(ab, le32_to_cpu(ev.pdev_id)); + ar = ath12k_mac_get_ar_by_pdev_id(ab, pdev_id); if (!ar) { - ath12k_warn(ab, "invalid pdev id in pdev temperature ev %d", ev.pdev_id); + ath12k_warn(ab, "invalid pdev id %u in pdev temperature ev\n", + pdev_id); goto exit; } + ath12k_thermal_event_temperature(ar, temp); + exit: rcu_read_unlock(); } From ff49eba595df500e4ddccc593088c8a4ab5f2c27 Mon Sep 17 00:00:00 2001 From: Zilin Guan Date: Fri, 30 Jan 2026 08:44:51 +0000 Subject: [PATCH 0073/1561] wifi: ath11k: fix memory leaks in beacon template setup The functions ath11k_mac_setup_bcn_tmpl_ema() and ath11k_mac_setup_bcn_tmpl_mbssid() allocate memory for beacon templates but fail to free it when parameter setup returns an error. Since beacon templates must be released during normal execution, they must also be released in the error handling paths to prevent memory leaks. Fix this by using unified exit paths with proper cleanup in the respective error paths. Compile tested only. Issue found using a prototype static analysis tool and code review. Fixes: 3a415daa3e8b ("wifi: ath11k: add P2P IE in beacon template") Fixes: 335a92765d30 ("wifi: ath11k: MBSSID beacon support") Suggested-by: Baochen Qiang Signed-off-by: Zilin Guan Reviewed-by: Vasanthakumar Thiagarajan Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20260130084451.110768-1-zilin@seu.edu.cn Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/mac.c | 28 ++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 4dfd08b58416..e872f416ea97 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -1557,12 +1557,15 @@ static int ath11k_mac_setup_bcn_tmpl_ema(struct ath11k_vif *arvif, if (!beacons || !beacons->cnt) { ath11k_warn(arvif->ar->ab, "failed to get ema beacon templates from mac80211\n"); - return -EPERM; + ret = -EPERM; + goto free; } if (tx_arvif == arvif) { - if (ath11k_mac_set_vif_params(tx_arvif, beacons->bcn[0].skb)) - return -EINVAL; + if (ath11k_mac_set_vif_params(tx_arvif, beacons->bcn[0].skb)) { + ret = -EINVAL; + goto free; + } } else { arvif->wpaie_present = tx_arvif->wpaie_present; } @@ -1589,11 +1592,11 @@ static int ath11k_mac_setup_bcn_tmpl_ema(struct ath11k_vif *arvif, } } - ieee80211_beacon_free_ema_list(beacons); - if (tx_arvif != arvif && !nontx_vif_params_set) - return -EINVAL; /* Profile not found in the beacons */ + ret = -EINVAL; /* Profile not found in the beacons */ +free: + ieee80211_beacon_free_ema_list(beacons); return ret; } @@ -1622,19 +1625,22 @@ static int ath11k_mac_setup_bcn_tmpl_mbssid(struct ath11k_vif *arvif, } if (tx_arvif == arvif) { - if (ath11k_mac_set_vif_params(tx_arvif, bcn)) - return -EINVAL; + if (ath11k_mac_set_vif_params(tx_arvif, bcn)) { + ret = -EINVAL; + goto free; + } } else if (!ath11k_mac_set_nontx_vif_params(tx_arvif, arvif, bcn)) { - return -EINVAL; + ret = -EINVAL; + goto free; } ret = ath11k_wmi_bcn_tmpl(ar, arvif->vdev_id, &offs, bcn, 0); - kfree_skb(bcn); - if (ret) ath11k_warn(ab, "failed to submit beacon template command: %d\n", ret); +free: + kfree_skb(bcn); return ret; } From 5c894879f17c70cd93712b30fa62e6803b1c46e2 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 25 Feb 2026 10:01:19 +0000 Subject: [PATCH 0074/1561] net: stmmac: ptp: limit n_per_out ptp_clock_ops.n_per_out sets the number of PPS outputs, which the PTP subsystem uses to validate userspace input, such as the index number used in a PTP_CLK_REQ_PEROUT request. stmmac_enable() uses this to index the priv->pps array, which is an array of size STMMAC_PPS_MAX. ptp_clock_ops.n_per_out is initialised using priv->dma_cap.pps_out_num, which is a three bit field read from hardware. Documentation that I've checked suggests that values >= 5 are reserved, but that doesn't mean such values won't appear, and if they do, we can overrun the priv->pps array in stmmac_enable(). stmmac_ptp_register() has protection against this in its loop, but it doesn't act to limit ptp_clock_ops.n_per_out. Fix this by introducing a local variable, pps_out_num which is limited to STMMAC_PPS_MAX, and use that when initialising the array and setting priv->ptp_clock_ops.n_per_out. Print a warning when we limit the number of outputs. Reviewed-by: Simon Horman Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vvBhn-0000000ArCg-4C4u@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_ptp.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 3e30172fa129..98da499ba3b1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -334,14 +334,19 @@ const struct ptp_clock_info dwmac1000_ptp_clock_ops = { */ void stmmac_ptp_register(struct stmmac_priv *priv) { + unsigned int pps_out_num = priv->dma_cap.pps_out_num; int i; - for (i = 0; i < priv->dma_cap.pps_out_num; i++) { - if (i >= STMMAC_PPS_MAX) - break; - priv->pps[i].available = true; + if (pps_out_num > STMMAC_PPS_MAX) { + dev_warn(priv->device, + "pps outputs (%u) exceeds driver maximum, limiting to %u\n", + pps_out_num, STMMAC_PPS_MAX); + pps_out_num = STMMAC_PPS_MAX; } + for (i = 0; i < pps_out_num; i++) + priv->pps[i].available = true; + /* Calculate the clock domain crossing (CDC) error if necessary */ priv->plat->cdc_error_adj = 0; if (priv->plat->core_type == DWMAC_CORE_GMAC4) @@ -350,8 +355,8 @@ void stmmac_ptp_register(struct stmmac_priv *priv) /* Update the ptp clock parameters based on feature discovery, when * available */ - if (priv->dma_cap.pps_out_num) - priv->ptp_clock_ops.n_per_out = priv->dma_cap.pps_out_num; + if (pps_out_num) + priv->ptp_clock_ops.n_per_out = pps_out_num; if (priv->dma_cap.aux_snapshot_n) priv->ptp_clock_ops.n_ext_ts = priv->dma_cap.aux_snapshot_n; From d68d21ea6b29f87f1c334f007e845ca5fb678c80 Mon Sep 17 00:00:00 2001 From: Gabriel Goller Date: Wed, 25 Feb 2026 10:58:10 +0100 Subject: [PATCH 0075/1561] docs: net: document neigh gc_interval sysctl Add entry for the neigh/default/gc_interval sysctl. This sysctl is unused since kernel v2.6.8. Suggested-by: Jakub Kicinski Signed-off-by: Gabriel Goller Link: https://patch.msgid.link/20260225095822.44050-1-g.goller@proxmox.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index d1eeb5323af0..265158534cda 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -202,6 +202,13 @@ neigh/default/gc_thresh3 - INTEGER Default: 1024 +neigh/default/gc_interval - INTEGER + Specifies how often the garbage collector for neighbor entries + should run. This value applies to the entire table, not + individual entries. Unused since kernel v2.6.8. + + Default: 30 seconds + neigh/default/gc_stale_time - INTEGER Determines how long a neighbor entry can remain unused before it is considered stale and eligible for garbage collection. Entries that have From f22b4e6fbba5c50612ca82220f34f8fa2ad1d24d Mon Sep 17 00:00:00 2001 From: Bo Sun Date: Wed, 25 Feb 2026 16:23:48 +0800 Subject: [PATCH 0076/1561] octeontx2-af: CGX: replace kfree() with rvu_free_bitmap() mac_to_index_bmap is allocated with rvu_alloc_bitmap(), so free it with rvu_free_bitmap() instead of open-coding kfree(.bmap) to keep the alloc/free API pairing consistent. Signed-off-by: Bo Sun Reviewed-by: Vadim Fedorenko Reviewed-by: Jijie Shao Link: https://patch.msgid.link/20260225082348.2519131-1-bo@mboxify.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 6000795823a3..4f33a816bc7a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -1822,7 +1822,7 @@ static int cgx_lmac_exit(struct cgx *cgx) continue; cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, false); cgx_configure_interrupt(cgx, lmac, lmac->lmac_id, true); - kfree(lmac->mac_to_index_bmap.bmap); + rvu_free_bitmap(&lmac->mac_to_index_bmap); rvu_free_bitmap(&lmac->rx_fc_pfvf_bmap); rvu_free_bitmap(&lmac->tx_fc_pfvf_bmap); kfree(lmac->name); From 5cf47393d96f211836ce98a22cebdf0eb8555413 Mon Sep 17 00:00:00 2001 From: Yohei Kojima Date: Thu, 26 Feb 2026 00:12:09 +0900 Subject: [PATCH 0077/1561] docs: ethtool: clarify the bit-by-bit bitset format description Clarify the bit-by-bit bitset format's behavior around mandatory attributes and bit identification. More specifically, the following changes are made: * Rephrase a misleading sentence which implies name and index are mutually exclusive * Describe that ETHTOOL_A_BITSET_BITS nest is mandatory * Describe that a request fails if inconsistent identifiers are given Signed-off-by: Yohei Kojima Reviewed-by: Jakub Kicinski Reviewed-by: Simon Horman Link: https://patch.msgid.link/ef90a56965ca66e57aa177929ce3e10c5ca815fa.1772031974.git.yk@y-koj.net Signed-off-by: Jakub Kicinski --- Documentation/networking/ethtool-netlink.rst | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index af56c304cef4..32179168eb73 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -96,7 +96,7 @@ For short bitmaps of (reasonably) fixed length, standard ``NLA_BITFIELD32`` type is used. For arbitrary length bitmaps, ethtool netlink uses a nested attribute with contents of one of two forms: compact (two binary bitmaps representing bit values and mask of affected bits) and bit-by-bit (list of -bits identified by either index or name). +bits identified by index or name). Verbose (bit-by-bit) bitsets allow sending symbolic names for bits together with their values which saves a round trip (when the bitset is passed in a @@ -156,12 +156,16 @@ Bit-by-bit form: nested (bitset) attribute contents: | | | ``ETHTOOL_A_BITSET_BIT_VALUE`` | flag | present if bit is set | +-+-+--------------------------------+--------+-----------------------------+ -Bit size is optional for bit-by-bit form. ``ETHTOOL_A_BITSET_BITS`` nest can +For bit-by-bit form, ``ETHTOOL_A_BITSET_SIZE`` is optional, and +``ETHTOOL_A_BITSET_BITS`` is mandatory. ``ETHTOOL_A_BITSET_BITS`` nest can only contain ``ETHTOOL_A_BITSET_BITS_BIT`` attributes but there can be an arbitrary number of them. A bit may be identified by its index or by its name. When used in requests, listed bits are set to 0 or 1 according to -``ETHTOOL_A_BITSET_BIT_VALUE``, the rest is preserved. A request fails if -index exceeds kernel bit length or if name is not recognized. +``ETHTOOL_A_BITSET_BIT_VALUE``, the rest is preserved. + +A request fails if index exceeds kernel bit length or if name is not +recognized. If both name and index are set, the request will fail if they +point to different bits. When ``ETHTOOL_A_BITSET_NOMASK`` flag is present, bitset is interpreted as a simple bitmap. ``ETHTOOL_A_BITSET_BIT_VALUE`` attributes are not used in From 363c5108e4e2b3b5f99243e61d524dd9c23d8c1b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 Feb 2026 13:40:23 +0000 Subject: [PATCH 0078/1561] inet: remove three EXPORT_SYMBOL() inet_rcv_saddr_equal() and inet_csk_listen_stop() are not used from any modules. inet_csk_accept() can use EXPORT_IPV6_MOD() Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260225134023.1176738-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/inet_connection_sock.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 5dfac6ce1110..26e2e792916c 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -107,7 +107,6 @@ bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, ipv6_only_sock(sk2), match_wildcard, match_wildcard); } -EXPORT_SYMBOL(inet_rcv_saddr_equal); bool inet_rcv_saddr_any(const struct sock *sk) { @@ -724,7 +723,7 @@ out_err: arg->err = error; return NULL; } -EXPORT_SYMBOL(inet_csk_accept); +EXPORT_IPV6_MOD(inet_csk_accept); /* * Using different timers for retransmit, delayed acks and probes @@ -1537,7 +1536,6 @@ skip_child_forget: } WARN_ON_ONCE(sk->sk_ack_backlog); } -EXPORT_SYMBOL_GPL(inet_csk_listen_stop); static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl) { From 1338cfef1ff1b95891990f8677631a834c2cf22d Mon Sep 17 00:00:00 2001 From: Charles Perry Date: Tue, 24 Feb 2026 12:28:52 -0800 Subject: [PATCH 0079/1561] net: macb: fix SGMII with inband aneg disabled Make it possible to connect a PHY which does not use inband autoneg to a gem MAC using phylink's information. The previous implementation relied on whether or not the link was a fixed-link to disable SGMII autoneg. This commit extend this to all link which are not configured for inband autonegotiation. Signed-off-by: Charles Perry Link: https://patch.msgid.link/20260224202854.112813-2-charles.perry@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 36 +++++++++++++----------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 3709b2224879..fc26e70e23d4 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -574,6 +574,26 @@ static int macb_pcs_config(struct phylink_pcs *pcs, const unsigned long *advertising, bool permit_pause_to_mac) { + struct macb *bp = container_of(pcs, struct macb, phylink_sgmii_pcs); + u32 old, new; + + old = gem_readl(bp, PCSANADV); + new = phylink_mii_c22_pcs_encode_advertisement(interface, advertising); + if (new != -EINVAL && old != new) + gem_writel(bp, PCSANADV, new); + + /* Disable AN if it's not to be used, enable otherwise. + * Must be written after PCSSEL is set in NCFGR which is done in + * macb_mac_config(), otherwise writes will not take effect. + */ + old = gem_readl(bp, PCSCNTRL); + if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) + new = old | BMCR_ANENABLE; + else + new = old & ~BMCR_ANENABLE; + if (old != new) + gem_writel(bp, PCSCNTRL, new); + return 0; } @@ -628,22 +648,6 @@ static void macb_mac_config(struct phylink_config *config, unsigned int mode, if (old_ncr ^ ncr) macb_or_gem_writel(bp, NCR, ncr); - /* Disable AN for SGMII fixed link configuration, enable otherwise. - * Must be written after PCSSEL is set in NCFGR, - * otherwise writes will not take effect. - */ - if (macb_is_gem(bp) && state->interface == PHY_INTERFACE_MODE_SGMII) { - u32 pcsctrl, old_pcsctrl; - - old_pcsctrl = gem_readl(bp, PCSCNTRL); - if (mode == MLO_AN_FIXED) - pcsctrl = old_pcsctrl & ~GEM_BIT(PCSAUTONEG); - else - pcsctrl = old_pcsctrl | GEM_BIT(PCSAUTONEG); - if (old_pcsctrl != pcsctrl) - gem_writel(bp, PCSCNTRL, pcsctrl); - } - spin_unlock_irqrestore(&bp->lock, flags); } From 7f44b2acc5a111471d8a3ae0e809bd419c0237e0 Mon Sep 17 00:00:00 2001 From: Charles Perry Date: Tue, 24 Feb 2026 12:28:53 -0800 Subject: [PATCH 0080/1561] net: macb: add support for reporting SGMII inband link status This makes it possible to use in-band autonegotiation with SGMII. If using a device tree, this can be done by adding the managed = "in-band-status" property to the gem node. Signed-off-by: Charles Perry Link: https://patch.msgid.link/20260224202854.112813-3-charles.perry@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index fc26e70e23d4..8c192d65eeeb 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -560,7 +560,12 @@ static int macb_usx_pcs_config(struct phylink_pcs *pcs, static void macb_pcs_get_state(struct phylink_pcs *pcs, unsigned int neg_mode, struct phylink_link_state *state) { - state->link = 0; + struct macb *bp = container_of(pcs, struct macb, phylink_sgmii_pcs); + u16 bmsr, lpa; + + bmsr = gem_readl(bp, PCSSTS); + lpa = gem_readl(bp, PCSANLPBASE); + phylink_mii_c22_pcs_decode_state(state, neg_mode, bmsr, lpa); } static void macb_pcs_an_restart(struct phylink_pcs *pcs) From d3549e2b48187dc042c0b37bac387948146a023b Mon Sep 17 00:00:00 2001 From: Charles Perry Date: Tue, 24 Feb 2026 12:28:54 -0800 Subject: [PATCH 0081/1561] net: macb: add the .pcs_inband_caps() callback for SGMII In SGMII mode, GEM can work with or without inband autonegotiation. Signed-off-by: Charles Perry Link: https://patch.msgid.link/20260224202854.112813-4-charles.perry@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 8c192d65eeeb..02eab26fd98b 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -557,6 +557,12 @@ static int macb_usx_pcs_config(struct phylink_pcs *pcs, return 0; } +static unsigned int macb_pcs_inband_caps(struct phylink_pcs *pcs, + phy_interface_t interface) +{ + return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE; +} + static void macb_pcs_get_state(struct phylink_pcs *pcs, unsigned int neg_mode, struct phylink_link_state *state) { @@ -609,6 +615,7 @@ static const struct phylink_pcs_ops macb_phylink_usx_pcs_ops = { }; static const struct phylink_pcs_ops macb_phylink_pcs_ops = { + .pcs_inband_caps = macb_pcs_inband_caps, .pcs_get_state = macb_pcs_get_state, .pcs_an_restart = macb_pcs_an_restart, .pcs_config = macb_pcs_config, From b70190d767be4464e092b252185fa65398bcc662 Mon Sep 17 00:00:00 2001 From: Tiernan Hubble Date: Tue, 24 Feb 2026 18:20:25 -0600 Subject: [PATCH 0082/1561] net: atlantic: fix reading SFP module info on some AQC100 cards Commit 853a2944aaf3 ("net: atlantic: support reading SFP module info") added support for reading SFP module info on AQC100-based cards. However, it only supports reading directly from the controller's hardware registers, and this does not seem to be supported on certain cards, including my TRENDnet TEG-10GECSFP V3. "ethtool -m" times out when reading certain registers, even when I increase the read poll timeout values. The DPDK "atlantic" driver reads module info via firmware calls instead of directly reading the hardware registers, provided that the NIC's firmware version supports it. This change adapts the DPDK firmware call code to the kernel driver. It preserves the old hardware-based module read code as a fallback when the firmware does not support it, to avoid breaking cards that are currently working. Tested on 2 different TRENDnet TEG-10GECSFP V3 cards, both with firmware version 3.1.121 (current at the time of this patch). Both cards correctly reported module info for a passive DAC cable and 2 different 10G optical transceivers. Signed-off-by: Tiernan Hubble Link: https://patch.msgid.link/20260225002026.1754045-1-thubble@thubble.ca Signed-off-by: Jakub Kicinski --- .../ethernet/aquantia/atlantic/aq_ethtool.c | 55 ++++++++++--- .../net/ethernet/aquantia/atlantic/aq_hw.h | 3 + .../aquantia/atlantic/hw_atl/hw_atl_utils.h | 7 ++ .../atlantic/hw_atl/hw_atl_utils_fw2x.c | 80 +++++++++++++++++++ 4 files changed, 135 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index a6e1826dd5d7..420af958d486 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -983,6 +983,38 @@ static int aq_ethtool_set_phy_tunable(struct net_device *ndev, return err; } +static bool aq_ethtool_can_read_module_eeprom(struct aq_nic_s *aq_nic) +{ + return aq_nic->aq_fw_ops->read_module_eeprom || + aq_nic->aq_hw_ops->hw_read_module_eeprom; +} + +static int aq_ethtool_read_module_eeprom(struct aq_nic_s *aq_nic, u8 dev_addr, + u8 reg_start_addr, int len, u8 *data) +{ + const struct aq_fw_ops *fw_ops = aq_nic->aq_fw_ops; + const struct aq_hw_ops *hw_ops = aq_nic->aq_hw_ops; + int err = -EOPNOTSUPP; + + if (fw_ops->read_module_eeprom) { + err = fw_ops->read_module_eeprom(aq_nic->aq_hw, dev_addr, + reg_start_addr, len, data); + + /* If the only error is that the firmware version doesn't + * support reading EEPROM, we can still attempt to read it + * directly from the hardware if supported. + */ + if (err != -EOPNOTSUPP) + return err; + } + + if (hw_ops->hw_read_module_eeprom) + err = hw_ops->hw_read_module_eeprom(aq_nic->aq_hw, dev_addr, + reg_start_addr, len, data); + + return err; +} + static int aq_ethtool_get_module_info(struct net_device *ndev, struct ethtool_modinfo *modinfo) { @@ -992,16 +1024,18 @@ static int aq_ethtool_get_module_info(struct net_device *ndev, /* Module EEPROM is only supported for controllers with external PHY */ if (aq_nic->aq_nic_cfg.aq_hw_caps->media_type != AQ_HW_MEDIA_TYPE_FIBRE || - !aq_nic->aq_hw_ops->hw_read_module_eeprom) + !aq_ethtool_can_read_module_eeprom(aq_nic)) return -EOPNOTSUPP; - err = aq_nic->aq_hw_ops->hw_read_module_eeprom(aq_nic->aq_hw, - SFF_8472_ID_ADDR, SFF_8472_COMP_ADDR, 1, &compliance_val); + err = aq_ethtool_read_module_eeprom(aq_nic, SFF_8472_ID_ADDR, + SFF_8472_COMP_ADDR, 1, + &compliance_val); if (err) return err; - err = aq_nic->aq_hw_ops->hw_read_module_eeprom(aq_nic->aq_hw, - SFF_8472_ID_ADDR, SFF_8472_DOM_TYPE_ADDR, 1, &dom_type); + err = aq_ethtool_read_module_eeprom(aq_nic, SFF_8472_ID_ADDR, + SFF_8472_DOM_TYPE_ADDR, 1, + &dom_type); if (err) return err; @@ -1022,7 +1056,7 @@ static int aq_ethtool_get_module_eeprom(struct net_device *ndev, unsigned int first, last, len; int err; - if (!aq_nic->aq_hw_ops->hw_read_module_eeprom) + if (!aq_ethtool_can_read_module_eeprom(aq_nic)) return -EOPNOTSUPP; first = ee->offset; @@ -1032,8 +1066,8 @@ static int aq_ethtool_get_module_eeprom(struct net_device *ndev, len = min(last, ETH_MODULE_SFF_8079_LEN); len -= first; - err = aq_nic->aq_hw_ops->hw_read_module_eeprom(aq_nic->aq_hw, - SFF_8472_ID_ADDR, first, len, data); + err = aq_ethtool_read_module_eeprom(aq_nic, SFF_8472_ID_ADDR, + first, len, data); if (err) return err; @@ -1045,8 +1079,9 @@ static int aq_ethtool_get_module_eeprom(struct net_device *ndev, len -= first; first -= ETH_MODULE_SFF_8079_LEN; - err = aq_nic->aq_hw_ops->hw_read_module_eeprom(aq_nic->aq_hw, - SFF_8472_DIAGNOSTICS_ADDR, first, len, data); + err = aq_ethtool_read_module_eeprom(aq_nic, + SFF_8472_DIAGNOSTICS_ADDR, + first, len, data); if (err) return err; } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index 4e66fd9b2ab1..57ea59026a2c 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -404,6 +404,9 @@ struct aq_fw_ops { int (*send_macsec_req)(struct aq_hw_s *self, struct macsec_msg_fw_request *msg, struct macsec_msg_fw_response *resp); + + int (*read_module_eeprom)(struct aq_hw_s *self, u8 dev_addr, + u8 reg_start_addr, int len, u8 *data); }; #endif /* AQ_HW_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h index f6b990b7f5b4..404c84adad4a 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h @@ -319,6 +319,13 @@ struct __packed hw_atl_utils_settings { u32 media_detect; }; +struct __packed smbus_request { + u32 msg_id; + u32 device_id; + u32 address; + u32 length; +}; + enum macsec_msg_type { macsec_cfg_msg = 0, macsec_add_rx_sc_msg, diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c index 4d4cfbc91e19..2cac0d9670bf 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c @@ -703,6 +703,85 @@ static int aq_fw2x_send_macsec_req(struct aq_hw_s *hw, return err; } +static int aq_fw2x_read_module_eeprom(struct aq_hw_s *self, u8 dev_addr, + u8 reg_start_addr, int len, u8 *data) +{ + u32 low_status, orig_low_status, low_req = 0; + u32 res_bytes_remain_cnt = len % sizeof(u32); + u32 res_dword_cnt = len / sizeof(u32); + struct smbus_request request = { 0 }; + u32 req_dword_cnt; + u32 result = 0; + u32 caps_lo; + u32 offset; + int err; + + caps_lo = aq_fw2x_get_link_capabilities(self); + if (!(caps_lo & BIT(CAPS_LO_SMBUS_READ))) + return -EOPNOTSUPP; + + request.msg_id = 0; + request.device_id = dev_addr; + request.address = reg_start_addr; + request.length = len; + + /* Write SMBUS request to cfg memory */ + req_dword_cnt = DIV_ROUND_UP(sizeof(request), sizeof(u32)); + err = hw_atl_write_fwcfg_dwords(self, (void *)&request, req_dword_cnt); + if (err < 0) + return err; + + /* Toggle 0x368.CAPS_LO_SMBUS_READ bit */ + low_req = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL_ADDR); + orig_low_status = low_req & BIT(CAPS_LO_SMBUS_READ); + low_req ^= BIT(CAPS_LO_SMBUS_READ); + aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL_ADDR, low_req); + + /* Wait FW to report back */ + err = readx_poll_timeout_atomic(aq_fw2x_state_get, self, low_status, + orig_low_status != (low_status & + BIT(CAPS_LO_SMBUS_READ)), + 10U, 100000U); + if (err) + return err; + + /* Read status of read operation */ + offset = self->rpc_addr + sizeof(u32); + err = hw_atl_utils_fw_downld_dwords(self, offset, &result, + sizeof(result) / sizeof(u32)); + if (err < 0) + return err; + if (result) + return -EIO; + + /* Read response full DWORD data */ + if (res_dword_cnt) { + offset = self->rpc_addr + sizeof(u32) * 2; + err = hw_atl_utils_fw_downld_dwords(self, offset, (u32 *)data, + res_dword_cnt); + if (err < 0) + return err; + } + + /* Read response trailing bytes data */ + if (res_bytes_remain_cnt) { + u32 bytes_remain_val = 0; + + offset = self->rpc_addr + + (sizeof(u32) * 2) + + (res_dword_cnt * sizeof(u32)); + err = hw_atl_utils_fw_downld_dwords(self, offset, + &bytes_remain_val, 1); + if (err < 0) + return err; + + memcpy(data + len - res_bytes_remain_cnt, + &bytes_remain_val, res_bytes_remain_cnt); + } + + return 0; +} + const struct aq_fw_ops aq_fw_2x_ops = { .init = aq_fw2x_init, .deinit = aq_fw2x_deinit, @@ -729,4 +808,5 @@ const struct aq_fw_ops aq_fw_2x_ops = { .adjust_ptp = aq_fw3x_adjust_ptp, .get_link_capabilities = aq_fw2x_get_link_capabilities, .send_macsec_req = aq_fw2x_send_macsec_req, + .read_module_eeprom = aq_fw2x_read_module_eeprom, }; From aebf15e8eb09b01e99f043e9f5d423798aac9d32 Mon Sep 17 00:00:00 2001 From: Zhengping Zhang Date: Thu, 26 Feb 2026 10:37:08 +0800 Subject: [PATCH 0083/1561] net: airoha: fix typo in function name Corrected the typo in the function name from `airhoa_is_lan_gdm_port` to `airoha_is_lan_gdm_port`. This change ensures consistency in the API naming convention. Signed-off-by: Zhengping Zhang Reviewed-by: Simon Horman Acked-by: Lorenzo Bianconi Link: https://patch.msgid.link/tencent_E4FD5D6BC0131E617D848896F5F9FCED6E0A@qq.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 2 +- drivers/net/ethernet/airoha/airoha_eth.h | 2 +- drivers/net/ethernet/airoha/airoha_ppe.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 62bcbbbe2a95..3779f93b47bc 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -76,7 +76,7 @@ static void airoha_set_macaddr(struct airoha_gdm_port *port, const u8 *addr) struct airoha_eth *eth = port->qdma->eth; u32 val, reg; - reg = airhoa_is_lan_gdm_port(port) ? REG_FE_LAN_MAC_H + reg = airoha_is_lan_gdm_port(port) ? REG_FE_LAN_MAC_H : REG_FE_WAN_MAC_H; val = (addr[0] << 16) | (addr[1] << 8) | addr[2]; airoha_fe_wr(eth, reg, val); diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index 20e602d61e61..59ed7569f7c3 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -626,7 +626,7 @@ u32 airoha_rmw(void __iomem *base, u32 offset, u32 mask, u32 val); #define airoha_qdma_clear(qdma, offset, val) \ airoha_rmw((qdma)->regs, (offset), (val), 0) -static inline bool airhoa_is_lan_gdm_port(struct airoha_gdm_port *port) +static inline bool airoha_is_lan_gdm_port(struct airoha_gdm_port *port) { /* GDM1 port on EN7581 SoC is connected to the lan dsa switch. * GDM{2,3,4} can be used as wan port connected to an external diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 42dbe8f93231..ec5ce41dad80 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -321,7 +321,7 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth, /* For downlink traffic consume SRAM memory for hw * forwarding descriptors queue. */ - if (airhoa_is_lan_gdm_port(port)) + if (airoha_is_lan_gdm_port(port)) val |= AIROHA_FOE_IB2_FAST_PATH; if (dsa_port >= 0) val |= FIELD_PREP(AIROHA_FOE_IB2_NBQ, From fd6dad4e1ae296b67b87291256878a58dad36c93 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Tue, 24 Feb 2026 15:14:24 +0900 Subject: [PATCH 0084/1561] netmem: remove the pp fields from net_iov MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the pp fields in net_iov have no users, remove them from net_iov and clean up. Signed-off-by: Byungchul Park Reviewed-by: Simon Horman Reviewed-by: Pavel Begunkov Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20260224061424.11219-1-byungchul@sk.com Signed-off-by: Jakub Kicinski --- include/net/netmem.h | 38 +------------------------------------- 1 file changed, 1 insertion(+), 37 deletions(-) diff --git a/include/net/netmem.h b/include/net/netmem.h index a96b3e5e5574..a6d65ced5231 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -93,23 +93,7 @@ enum net_iov_type { * supported. */ struct net_iov { - union { - struct netmem_desc desc; - - /* XXX: The following part should be removed once all - * the references to them are converted so as to be - * accessed via netmem_desc e.g. niov->desc.pp instead - * of niov->pp. - */ - struct { - unsigned long _flags; - unsigned long pp_magic; - struct page_pool *pp; - unsigned long _pp_mapping_pad; - unsigned long dma_addr; - atomic_long_t pp_ref_count; - }; - }; + struct netmem_desc desc; struct net_iov_area *owner; enum net_iov_type type; }; @@ -123,26 +107,6 @@ struct net_iov_area { unsigned long base_virtual; }; -/* net_iov is union'ed with struct netmem_desc mirroring struct page, so - * the page_pool can access these fields without worrying whether the - * underlying fields are accessed via netmem_desc or directly via - * net_iov, until all the references to them are converted so as to be - * accessed via netmem_desc e.g. niov->desc.pp instead of niov->pp. - * - * The non-net stack fields of struct page are private to the mm stack - * and must never be mirrored to net_iov. - */ -#define NET_IOV_ASSERT_OFFSET(desc, iov) \ - static_assert(offsetof(struct netmem_desc, desc) == \ - offsetof(struct net_iov, iov)) -NET_IOV_ASSERT_OFFSET(_flags, _flags); -NET_IOV_ASSERT_OFFSET(pp_magic, pp_magic); -NET_IOV_ASSERT_OFFSET(pp, pp); -NET_IOV_ASSERT_OFFSET(_pp_mapping_pad, _pp_mapping_pad); -NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr); -NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count); -#undef NET_IOV_ASSERT_OFFSET - static inline struct net_iov_area *net_iov_owner(const struct net_iov *niov) { return niov->owner; From 11c0663a595801b6e6f7a937adec8532706ef486 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jens=20Emil=20Schulz=20=C3=98stergaard?= Date: Thu, 26 Feb 2026 09:24:19 +0100 Subject: [PATCH 0085/1561] net: phy: micrel: Add support for lan9645x internal phy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LAN9645X is a family of switch chips with 5 internal copper phys. The internal PHY is based on parts of LAN8832. This is a low-power, single port triple-speed (10BASE-T/100BASE-TX/1000BASE-T) ethernet physical layer transceiver (PHY) that supports transmission and reception of data on standard CAT-5, as well as CAT-5e and CAT-6 Unshielded Twisted Pair (UTP) cables. Add support for the internal PHY of the lan9645x chip family. Reviewed-by: Steen Hegelund Reviewed-by: Daniel Machon Signed-off-by: Jens Emil Schulz Østergaard Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20260226-phy_micrel_add_support_for_lan9645x_internal_phy-v3-1-1fe82379962b@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 152 +++++++++++++++++++++++++++++++++++++ include/linux/micrel_phy.h | 1 + 2 files changed, 153 insertions(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index c6b011a9d636..2aa1dedd21b8 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -6523,6 +6523,142 @@ static void lan8842_get_phy_stats(struct phy_device *phydev, stats->tx_errors = priv->phy_stats.tx_errors; } +#define LAN9645X_CTRL_REG 0x1f +#define LAN9645X_CTRL_REG_SW_SOFT_RST BIT(1) + +#define LAN9645X_DAC_ICAS_AMP_POWER_DOWN 0x47 +#define LAN9645X_BTRX_QBIAS_POWER_DOWN 0x46 +#define LAN9645X_TX_LOW_I_CH_CD_POWER_MGMT 0x45 +#define LAN9645X_TX_LOW_I_CH_B_POWER_MGMT 0x44 +#define LAN9645X_TX_LOW_I_CH_A_POWER_MGMT 0x43 + +static const struct lanphy_reg_data force_dac_tx_errata[] = { + /* Force channel A/B/C/D TX on */ + { LAN8814_PAGE_POWER_REGS, + LAN9645X_DAC_ICAS_AMP_POWER_DOWN, + 0 }, + /* Force channel A/B/C/D QBias on */ + { LAN8814_PAGE_POWER_REGS, + LAN9645X_BTRX_QBIAS_POWER_DOWN, + 0xaa }, + /* Tx low I on channel C/D overwrite */ + { LAN8814_PAGE_POWER_REGS, + LAN9645X_TX_LOW_I_CH_CD_POWER_MGMT, + 0xbfff }, + /* Channel B low I overwrite */ + { LAN8814_PAGE_POWER_REGS, + LAN9645X_TX_LOW_I_CH_B_POWER_MGMT, + 0xabbf }, + /* Channel A low I overwrite */ + { LAN8814_PAGE_POWER_REGS, + LAN9645X_TX_LOW_I_CH_A_POWER_MGMT, + 0xbd3f }, +}; + +static int lan9645x_config_init(struct phy_device *phydev) +{ + int ret; + + /* Apply erratas from previous generations. */ + ret = lan8842_erratas(phydev); + if (ret < 0) + return ret; + + /* Apply errata for an issue where bringing a port down, can cause a few + * CRC errors for traffic flowing through adjacent ports. + */ + return lanphy_write_reg_data(phydev, force_dac_tx_errata, + ARRAY_SIZE(force_dac_tx_errata)); +} + +static int lan9645x_suspend(struct phy_device *phydev) +{ + int ret, val; + + /* Force link down before software power down (SPD), by doing software + * soft reset. This resets the PHY, but keeps all register configuration + * intact. The bit self clears. + * + * This is needed as a workaround for an issue where performing SPD on a + * port can bring adjacent ports down, when there is traffic flowing + * through the ports. + */ + ret = phy_set_bits(phydev, LAN9645X_CTRL_REG, + LAN9645X_CTRL_REG_SW_SOFT_RST); + if (ret) + return ret; + + ret = phy_read_poll_timeout(phydev, LAN9645X_CTRL_REG, val, + !(val & LAN9645X_CTRL_REG_SW_SOFT_RST), + 3000, 100000, true); + if (ret) + return ret; + + return genphy_suspend(phydev); +} + +static int lan9645x_config_intr(struct phy_device *phydev) +{ + int err; + + /* enable / disable interrupts */ + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + /* This is an internal PHY of lan9645x and is not possible to + * change the polarity of irq sources in the OIC (CPU_INTR) + * found in lan9645x. Therefore change the polarity of the + * interrupt in the PHY from being active low instead of active + * high. + */ + err = phy_write(phydev, LAN8804_CONTROL, + LAN8804_CONTROL_INTR_POLARITY); + if (err) + return err; + + /* By default interrupt buffer is open-drain in which case the + * interrupt can be active only low. Therefore change the + * interrupt buffer to be push-pull to be able to change + * interrupt polarity. + */ + err = phy_write(phydev, LAN8804_OUTPUT_CONTROL, + LAN8804_OUTPUT_CONTROL_INTR_BUFFER); + if (err) + return err; + + err = lan8814_ack_interrupt(phydev); + if (err) + return err; + + err = phy_write(phydev, LAN8814_INTC, + LAN8814_INT_LINK | LAN8814_INT_FLF); + } else { + err = phy_write(phydev, LAN8814_INTC, 0); + if (err) + return err; + + err = lan8814_ack_interrupt(phydev); + } + + return err; +} + +static irqreturn_t lan9645x_handle_interrupt(struct phy_device *phydev) +{ + int status; + + status = phy_read(phydev, LAN8814_INTS); + if (status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (status & (LAN8814_INT_LINK | LAN8814_INT_FLF)) { + phy_trigger_machine(phydev); + return IRQ_HANDLED; + } + + return IRQ_NONE; +} + static struct phy_driver ksphy_driver[] = { { PHY_ID_MATCH_MODEL(PHY_ID_KS8737), @@ -6761,6 +6897,21 @@ static struct phy_driver ksphy_driver[] = { .set_tunable = lan8842_set_tunable, .cable_test_start = lan8814_cable_test_start, .cable_test_get_status = ksz886x_cable_test_get_status, +}, { + PHY_ID_MATCH_MODEL(PHY_ID_LAN9645X), + .name = "Microchip LAN9645X Gigabit PHY", + .config_init = lan9645x_config_init, + .driver_data = &ksz9021_type, + .probe = kszphy_probe, + .soft_reset = genphy_soft_reset, + .suspend = lan9645x_suspend, + .resume = genphy_resume, + .config_intr = lan9645x_config_intr, + .handle_interrupt = lan9645x_handle_interrupt, + .get_tunable = lan8842_get_tunable, + .set_tunable = lan8842_set_tunable, + .get_phy_stats = lan8842_get_phy_stats, + .update_stats = lan8842_update_stats, }, { PHY_ID_MATCH_MODEL(PHY_ID_KSZ9131), .name = "Microchip KSZ9131 Gigabit PHY", @@ -6859,6 +7010,7 @@ static const struct mdio_device_id __maybe_unused micrel_tbl[] = { { PHY_ID_MATCH_MODEL(PHY_ID_LAN8804) }, { PHY_ID_MATCH_MODEL(PHY_ID_LAN8841) }, { PHY_ID_MATCH_MODEL(PHY_ID_LAN8842) }, + { PHY_ID_MATCH_MODEL(PHY_ID_LAN9645X) }, { } }; diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index ca691641788b..9c6f9817383f 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -33,6 +33,7 @@ #define PHY_ID_LAN8804 0x00221670 #define PHY_ID_LAN8841 0x00221650 #define PHY_ID_LAN8842 0x002216C0 +#define PHY_ID_LAN9645X 0x002216D0 #define PHY_ID_KSZ886X 0x00221430 #define PHY_ID_KSZ8863 0x00221435 From b99ccb37eda85a9070d1f9f275fc8ca1f4e33024 Mon Sep 17 00:00:00 2001 From: Kexin Sun Date: Wed, 25 Feb 2026 22:51:59 +0800 Subject: [PATCH 0086/1561] net/hsr: update outdated comments The function hsr_rcv() was renamed hsr_handle_frame() and moved to net/hsr/hsr_slave.c by commit 81ba6afd6e64 ("net/hsr: Switch from dev_add_pack() to netdev_rx_handler_register()"). Update all remaining references in the comments accordingly. Signed-off-by: Kexin Sun Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260225145159.2953-1-kexinsun@smail.nju.edu.cn Signed-off-by: Jakub Kicinski --- net/hsr/hsr_device.c | 2 +- net/hsr/hsr_framereg.c | 4 ++-- net/hsr/hsr_main.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index d1bfc49b5f01..5c3eca2235ce 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -742,7 +742,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], hsr->proto_ops = &hsr_ops; } - /* Make sure we recognize frames from ourselves in hsr_rcv() */ + /* Make sure we recognize frames from ourselves in hsr_handle_frame() */ res = hsr_create_self_node(hsr, hsr_dev->dev_addr, slave[1]->dev_addr); if (res < 0) diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 50996f4de7f9..577fb588bc2f 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -71,8 +71,8 @@ bool hsr_is_node_in_db(struct list_head *node_db, return !!find_node_by_addr_A(node_db, addr); } -/* Helper for device init; the self_node is used in hsr_rcv() to recognize - * frames from self that's been looped over the HSR ring. +/* Helper for device init; the self_node is used in hsr_handle_frame() to + * recognize frames from self that's been looped over the HSR ring. */ int hsr_create_self_node(struct hsr_priv *hsr, const unsigned char addr_a[ETH_ALEN], diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index bc94b07101d8..33951d9bd3c5 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -89,7 +89,7 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, } } - /* Make sure we recognize frames from ourselves in hsr_rcv() */ + /* Make sure we recognize frames from ourselves in hsr_handle_frame() */ port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); res = hsr_create_self_node(hsr, master->dev->dev_addr, From 5151ec54f5861ad71e08190a7ce2173df788d36a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Feb 2026 02:12:15 +0000 Subject: [PATCH 0087/1561] net: use try_cmpxchg() in lock_sock_nested() Add a fast path in lock_sock_nested(), to avoid acquiring the socket spinlock only to set @owned to one: spin_lock_bh(&sk->sk_lock.slock); if (unlikely(sock_owned_by_user_nocheck(sk))) __lock_sock(sk); sk->sk_lock.owned = 1; spin_unlock_bh(&sk->sk_lock.slock); On x86_64 compiler generates something quite efficient: 00000000000077c0 : 77c0: f3 0f 1e fa endbr64 77c4: e8 00 00 00 00 call __fentry__ 77c9: b9 01 00 00 00 mov $0x1,%ecx 77ce: 31 c0 xor %eax,%eax 77d0: f0 48 0f b1 8f 48 01 00 00 lock cmpxchg %rcx,0x148(%rdi) 77d9: 75 06 jne slow_path 77db: 2e e9 00 00 00 00 cs jmp __x86_return_thunk-0x4 slow_path: ... Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Jason Xing Link: https://patch.msgid.link/20260226021215.1764237-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 9 +++++++-- net/core/sock.c | 13 +++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index e7099d3d0416..a7a8b31e9877 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -81,8 +81,13 @@ * mini-semaphore synchronizes multiple users amongst themselves. */ typedef struct { - spinlock_t slock; - int owned; + union { + struct slock_owned { + int owned; + spinlock_t slock; + }; + long combined; + }; wait_queue_head_t wq; /* * We express the mutex-alike socket_lock semantics diff --git a/net/core/sock.c b/net/core/sock.c index 2032be9a03b5..9d841975a7a1 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3780,6 +3780,19 @@ void noinline lock_sock_nested(struct sock *sk, int subclass) mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); might_sleep(); +#ifdef CONFIG_64BIT + if (sizeof(struct slock_owned) == sizeof(long)) { + socket_lock_t tmp, old; + + tmp.slock = __SPIN_LOCK_UNLOCKED(tmp.slock); + tmp.owned = 1; + old.slock = __SPIN_LOCK_UNLOCKED(old.slock); + old.owned = 0; + if (likely(try_cmpxchg(&sk->sk_lock.combined, + &old.combined, tmp.combined))) + return; + } +#endif spin_lock_bh(&sk->sk_lock.slock); if (unlikely(sock_owned_by_user_nocheck(sk))) __lock_sock(sk); From da89f2e3121531804f45abd33db05b432e42606c Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 26 Feb 2026 15:26:27 +0100 Subject: [PATCH 0088/1561] tls: don't select STREAM_PARSER ktls was converted to its own stream parser in commit 84c61fe1a75b ("tls: rx: do not use the standard strparser"), but the Kconfig dependency was left. The only part of the original strparser that's shared with ktls are a few structs (strp_msg, sk_skb_cb) and the strp_msg helper, those don't require building the net/strparser code. Signed-off-by: Sabrina Dubroca Link: https://patch.msgid.link/cb41e513a30eeaac0b419284cc87433f049b2ee0.1771871995.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- net/tls/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/net/tls/Kconfig b/net/tls/Kconfig index ce8d56a19187..a25bf57f2673 100644 --- a/net/tls/Kconfig +++ b/net/tls/Kconfig @@ -8,7 +8,6 @@ config TLS select CRYPTO select CRYPTO_AES select CRYPTO_GCM - select STREAM_PARSER select NET_SOCK_MSG default n help From 9a04d3b2f0708a9e5e1f731bafb69b040bb934a0 Mon Sep 17 00:00:00 2001 From: Joris Vaisvila Date: Thu, 26 Feb 2026 15:45:18 +0000 Subject: [PATCH 0089/1561] net: ethernet: mtk_eth_soc: avoid writing to ESW registers on MT7628 The MT7628 has a fixed-link PHY and does not expose MAC control registers. Writes to these registers only corrupt the ESW VLAN configuration. This patch explicitly registers no-op phylink_mac_ops for MT7628, as after removing the invalid register accesses, the existing phylink_mac_ops effectively become no-ops. This code was introduced by commit 296c9120752b ("net: ethernet: mediatek: Add MT7628/88 SoC support") Signed-off-by: Joris Vaisvila Reviewed-by: Daniel Golle Reviewed-by: Stefan Roese Link: https://patch.msgid.link/20260226154547.68553-1-joey@tinyisr.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 34 ++++++++++++++++++--- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index e5e2ffa9c542..1060764b9f17 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -562,9 +562,7 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode, int val, ge_mode, err = 0; u32 i; - /* MT76x8 has no hardware settings between for the MAC */ - if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628) && - mac->interface != state->interface) { + if (mac->interface != state->interface) { /* Setup soc pin functions */ switch (state->interface) { case PHY_INTERFACE_MODE_TRGMII: @@ -956,6 +954,30 @@ static const struct phylink_mac_ops mtk_phylink_ops = { .mac_enable_tx_lpi = mtk_mac_enable_tx_lpi, }; +static void rt5350_mac_config(struct phylink_config *config, unsigned int mode, + const struct phylink_link_state *state) +{ +} + +static void rt5350_mac_link_down(struct phylink_config *config, unsigned int mode, + phy_interface_t interface) +{ +} + +static void rt5350_mac_link_up(struct phylink_config *config, + struct phy_device *phy, + unsigned int mode, phy_interface_t interface, + int speed, int duplex, bool tx_pause, bool rx_pause) +{ +} + +/* MT76x8 (rt5350-eth) does not expose any MAC control registers */ +static const struct phylink_mac_ops rt5350_phylink_ops = { + .mac_config = rt5350_mac_config, + .mac_link_down = rt5350_mac_link_down, + .mac_link_up = rt5350_mac_link_up, +}; + static void mtk_mdio_config(struct mtk_eth *eth) { u32 val; @@ -4771,6 +4793,7 @@ static const struct net_device_ops mtk_netdev_ops = { static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) { + const struct phylink_mac_ops *mac_ops = &mtk_phylink_ops; const __be32 *_id = of_get_property(np, "reg", NULL); phy_interface_t phy_mode; struct phylink *phylink; @@ -4905,9 +4928,12 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) mac->phylink_config.supported_interfaces); } + if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) + mac_ops = &rt5350_phylink_ops; + phylink = phylink_create(&mac->phylink_config, of_fwnode_handle(mac->of_node), - phy_mode, &mtk_phylink_ops); + phy_mode, mac_ops); if (IS_ERR(phylink)) { err = PTR_ERR(phylink); goto free_netdev; From c31770c49348fb019167fa95119f330597c99193 Mon Sep 17 00:00:00 2001 From: Aaron Tomlin Date: Thu, 26 Feb 2026 11:31:21 -0500 Subject: [PATCH 0090/1561] vmxnet3: Suppress page allocation warning for massive Rx Data ring The vmxnet3 driver supports an Rx Data ring (rx-mini) to optimise the processing of small packets. The size of this ring's DMA-coherent memory allocation is determined by the product of the primary Rx ring size and the data ring descriptor size: sz = rq->rx_ring[0].size * rq->data_ring.desc_size; When a user configures the maximum supported parameters via ethtool (rx_ring[0].size = 4096, data_ring.desc_size = 2048), the required contiguous memory allocation reaches 8 MB (8,388,608 bytes). In environments lacking Contiguous Memory Allocator (CMA), dma_alloc_coherent() falls back to the standard zone buddy allocator. An 8 MB allocation translates to a page order of 11, which strictly exceeds the default MAX_PAGE_ORDER (10) on most architectures. Consequently, __alloc_pages_noprof() catches the oversize request and triggers a loud kernel warning stack trace: WARN_ON_ONCE_GFP(order > MAX_PAGE_ORDER, gfp) This warning is unnecessary and alarming to system administrators because the vmxnet3 driver already handles this allocation failure gracefully. If dma_alloc_coherent() returns NULL, the driver safely disables the Rx Data ring (adapter->rxdataring_enabled = false) and falls back to standard, streaming DMA packet processing. To resolve this, append the __GFP_NOWARN flag to the dma_alloc_coherent() gfp_mask. This instructs the page allocator to silently fail the allocation if it exceeds order limits or memory is too fragmented, preventing the spurious warning stack trace. Furthermore, enhance the subsequent netdev_err() fallback message to include the requested allocation size. This provides critical debugging context to the administrator (e.g., revealing that an 8 MB allocation was attempted and failed) without making hardcoded assumptions about the state of the system's configurations. Reviewed-by: Jijie Shao Signed-off-by: Aaron Tomlin Link: https://patch.msgid.link/20260226163121.4045808-1-atomlin@atomlin.com Signed-off-by: Jakub Kicinski --- drivers/net/vmxnet3/vmxnet3_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 0572f6a9bdb6..40522afc0532 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -2268,10 +2268,10 @@ vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter) rq->data_ring.base = dma_alloc_coherent(&adapter->pdev->dev, sz, &rq->data_ring.basePA, - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (!rq->data_ring.base) { netdev_err(adapter->netdev, - "rx data ring will be disabled\n"); + "failed to allocate %zu bytes, rx data ring will be disabled\n", sz); adapter->rxdataring_enabled = false; } } else { From e8e83b67960c20a9e60d4181631509d59f03488b Mon Sep 17 00:00:00 2001 From: Birger Koblitz Date: Tue, 24 Feb 2026 18:40:14 +0100 Subject: [PATCH 0091/1561] r8152: Add 2500baseT EEE status/configuration support The r8152 driver supports the RTL8156, which is a 2.5Gbit Ethernet controller for USB 3.0, for which support is added for configuring and displaying the EEE advertisement status for 2.5GBit connections. The patch also corrects the determination of whether EEE is active to include the 2.5GBit connection status and make the determination dependent not on the desired speed configuration (tp->speed), but on the actual speed used by the controller. For consistency, this is corrected also for the RTL8152/3. This was tested on an Edimax EU-4307 V1.0 USB-Ethernet adapter with RTL8156, and a SECOMP Value 12.99.1115 USB-C 3.1 Ethernet converter with RTL8153. Signed-off-by: Birger Koblitz Link: https://patch.msgid.link/20260224-b4-eee2g5-v2-1-cf5c83df036e@birger-koblitz.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8152.c | 50 ++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 4af85728ac4f..ea7c1eadc864 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -207,6 +207,7 @@ #define OCP_EEE_LPABLE 0xa5d2 #define OCP_10GBT_CTRL 0xa5d4 #define OCP_10GBT_STAT 0xa5d6 +#define OCP_EEE_LPABLE2 0xa6d0 #define OCP_EEE_ADV2 0xa6d4 #define OCP_PHY_STATE 0xa708 /* nway state for 8153 */ #define OCP_PHY_PATCH_STAT 0xb800 @@ -948,6 +949,7 @@ struct r8152 { u16 ocp_base; u16 speed; u16 eee_adv; + u16 eee_adv2; u8 *intr_buff; u8 version; u8 duplex; @@ -5393,7 +5395,7 @@ static void r8156_eee_en(struct r8152 *tp, bool enable) config = ocp_reg_read(tp, OCP_EEE_ADV2); - if (enable) + if (enable && (tp->eee_adv2 & MDIO_EEE_2_5GT)) config |= MDIO_EEE_2_5GT; else config &= ~MDIO_EEE_2_5GT; @@ -8922,7 +8924,8 @@ static void rtl8152_get_strings(struct net_device *dev, u32 stringset, u8 *data) static int r8152_get_eee(struct r8152 *tp, struct ethtool_keee *eee) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(common); + __ETHTOOL_DECLARE_LINK_MODE_MASK(common) = {}; + u16 speed = rtl8152_get_speed(tp); u16 val; val = r8152_mmd_read(tp, MDIO_MMD_PCS, MDIO_PCS_EEE_ABLE); @@ -8936,8 +8939,14 @@ static int r8152_get_eee(struct r8152 *tp, struct ethtool_keee *eee) eee->eee_enabled = tp->eee_en; - linkmode_and(common, eee->advertised, eee->lp_advertised); - eee->eee_active = phy_check_valid(tp->speed, tp->duplex, common); + if (speed & _1000bps) + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, common); + if (speed & _100bps) + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, common); + + linkmode_and(common, common, eee->advertised); + linkmode_and(common, common, eee->lp_advertised); + eee->eee_active = !linkmode_empty(common); return 0; } @@ -8948,7 +8957,10 @@ static int r8152_set_eee(struct r8152 *tp, struct ethtool_keee *eee) tp->eee_en = eee->eee_enabled; tp->eee_adv = val; - + if (tp->support_2500full) { + val = linkmode_to_mii_eee_cap2_t(eee->advertised); + tp->eee_adv2 = val; + } rtl_eee_enable(tp, tp->eee_en); return 0; @@ -8956,7 +8968,8 @@ static int r8152_set_eee(struct r8152 *tp, struct ethtool_keee *eee) static int r8153_get_eee(struct r8152 *tp, struct ethtool_keee *eee) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(common); + __ETHTOOL_DECLARE_LINK_MODE_MASK(common) = {}; + u16 speed = rtl8152_get_speed(tp); u16 val; val = ocp_reg_read(tp, OCP_EEE_ABLE); @@ -8968,10 +8981,29 @@ static int r8153_get_eee(struct r8152 *tp, struct ethtool_keee *eee) val = ocp_reg_read(tp, OCP_EEE_LPABLE); mii_eee_cap1_mod_linkmode_t(eee->lp_advertised, val); + if (tp->support_2500full) { + linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, eee->supported); + + val = ocp_reg_read(tp, OCP_EEE_ADV2); + mii_eee_cap2_mod_linkmode_adv_t(eee->advertised, val); + + val = ocp_reg_read(tp, OCP_EEE_LPABLE2); + mii_eee_cap2_mod_linkmode_adv_t(eee->lp_advertised, val); + + if (speed & _2500bps) + linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, common); + } + eee->eee_enabled = tp->eee_en; - linkmode_and(common, eee->advertised, eee->lp_advertised); - eee->eee_active = phy_check_valid(tp->speed, tp->duplex, common); + if (speed & _1000bps) + linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, common); + if (speed & _100bps) + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, common); + + linkmode_and(common, common, eee->advertised); + linkmode_and(common, common, eee->lp_advertised); + eee->eee_active = !linkmode_empty(common); return 0; } @@ -9512,6 +9544,7 @@ static int rtl_ops_init(struct r8152 *tp) case RTL_VER_11: tp->eee_en = true; tp->eee_adv = MDIO_EEE_1000T | MDIO_EEE_100TX; + tp->eee_adv2 = MDIO_EEE_2_5GT; fallthrough; case RTL_VER_10: ops->init = r8156_init; @@ -9537,6 +9570,7 @@ static int rtl_ops_init(struct r8152 *tp) case RTL_VER_15: tp->eee_en = true; tp->eee_adv = MDIO_EEE_1000T | MDIO_EEE_100TX; + tp->eee_adv2 = MDIO_EEE_2_5GT; ops->init = r8156b_init; ops->enable = rtl8156b_enable; ops->disable = rtl8153_disable; From 9ff2d2a9837015ff4b3579b028aeae8c180aa8d3 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 25 Feb 2026 14:37:58 +0100 Subject: [PATCH 0092/1561] ipv6: discard fragment queue earlier if there is malformed datagram Currently the kernel IPv6 implementation is not dicarding the fragment queue upon receiving a IPv6 fragment that is not 8 bytes aligned. It relies on queue expiration to free the queue. While RFC 8200 section 4.5 does not explicitly mention that the rest of fragments must be discarded, it does not make sense to keep them. The parameter problem message is sent regardless that. In addition, if the sender is able to re-compose the datagram so it is 8 bytes aligned it would qualify as a new whole datagram not fitting into the same fragment queue. The same situation happens if segment end is exceeding the IPv6 maximum packet length. The sooner we can free resources the better during reassembly, the better. Signed-off-by: Fernando Fernandez Mancera Link: https://patch.msgid.link/20260225133758.4553-1-fmancera@suse.de Signed-off-by: Jakub Kicinski --- net/ipv6/reassembly.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 25ec8001898d..11f9144bebbe 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -132,6 +132,9 @@ static int ip6_frag_queue(struct net *net, /* note that if prob_offset is set, the skb is freed elsewhere, * we do not free it here. */ + inet_frag_kill(&fq->q, refs); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_REASMFAILS); return -1; } @@ -163,6 +166,9 @@ static int ip6_frag_queue(struct net *net, * this case. -DaveM */ *prob_offset = offsetof(struct ipv6hdr, payload_len); + inet_frag_kill(&fq->q, refs); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_REASMFAILS); return -1; } if (end > fq->q.len) { From 6466441a5ecd1c1168264e4c322bae455579b156 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Feb 2026 04:12:13 +0000 Subject: [PATCH 0093/1561] net: inline skb_add_rx_frag_netmem() This critical helper (via skb_add_rx_frag()) is mostly used from drivers rx fast path. It is time to inline it, this actually saves space in vmlinux: size vmlinux.old vmlinux text data bss dec hex filename 37350766 23092977 4846992 65290735 3e441ef vmlinux.old 37350600 23092977 4846992 65290569 3e44149 vmlinux Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260226041213.1892561-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 13 +++++++++++-- net/core/skbuff.c | 11 ----------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index daa4e4944ce3..9cc98f850f1d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2682,8 +2682,17 @@ static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i, shinfo->nr_frags = i + 1; } -void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem, - int off, int size, unsigned int truesize); +static inline void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, + netmem_ref netmem, int off, + int size, unsigned int truesize) +{ + DEBUG_NET_WARN_ON_ONCE(size > truesize); + + skb_fill_netmem_desc(skb, i, netmem, off, size); + skb->len += size; + skb->data_len += size; + skb->truesize += truesize; +} static inline void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, int size, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0e217041958a..513cbfed19bc 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -891,17 +891,6 @@ skb_fail: } EXPORT_SYMBOL(napi_alloc_skb); -void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem, - int off, int size, unsigned int truesize) -{ - DEBUG_NET_WARN_ON_ONCE(size > truesize); - - skb_fill_netmem_desc(skb, i, netmem, off, size); - skb->len += size; - skb->data_len += size; - skb->truesize += truesize; -} -EXPORT_SYMBOL(skb_add_rx_frag_netmem); void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, unsigned int truesize) From 2164242c50084bd5b359b7d554d3a124e2c19074 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 26 Feb 2026 14:10:04 -0800 Subject: [PATCH 0094/1561] NFC: fix header file kernel-doc warnings Repair some of the comments: - use the correct enum names - don't use "/**" for a non-kernel-doc comment to fix these warnings: Warning: include/uapi/linux/nfc.h:127 Excess enum value '@NFC_EVENT_DEVICE_DEACTIVATED' description in 'nfc_commands' Warning: include/uapi/linux/nfc.h:204 Excess enum value '@NFC_ATTR_APDU' description in 'nfc_attrs' Warning: include/uapi/linux/nfc.h:302 expecting prototype for Pseudo(). Prototype was for NFC_RAW_HEADER_SIZE() instead Signed-off-by: Randy Dunlap Link: https://patch.msgid.link/20260226221004.1037909-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/nfc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index 2f5b4be25261..82805eee4357 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -55,7 +55,7 @@ * (it sends %NFC_ATTR_DEVICE_INDEX) * @NFC_EVENT_TM_ACTIVATED: event emitted when the adapter is activated in * target mode. - * @NFC_EVENT_DEVICE_DEACTIVATED: event emitted when the adapter is deactivated + * @NFC_EVENT_TM_DEACTIVATED: event emitted when the adapter is deactivated * from target mode. * @NFC_CMD_LLC_GET_PARAMS: request LTO, RW, and MIUX parameters for a device * @NFC_CMD_LLC_SET_PARAMS: set one or more of LTO, RW, and MIUX parameters for @@ -156,7 +156,7 @@ enum nfc_commands { * @NFC_ATTR_SE_INDEX: Secure element index * @NFC_ATTR_SE_TYPE: Secure element type (UICC or EMBEDDED) * @NFC_ATTR_FIRMWARE_DOWNLOAD_STATUS: Firmware download operation status - * @NFC_ATTR_APDU: Secure element APDU + * @NFC_ATTR_SE_APDU: Secure element APDU * @NFC_ATTR_TARGET_ISO15693_DSFID: ISO 15693 Data Storage Format Identifier * @NFC_ATTR_TARGET_ISO15693_UID: ISO 15693 Unique Identifier * @NFC_ATTR_SE_PARAMS: Parameters data from an evt_transaction @@ -291,7 +291,7 @@ struct sockaddr_nfc_llcp { #define NFC_HEADER_SIZE 1 -/** +/* * Pseudo-header info for raw socket packets * First byte is the adapter index * Second byte contains flags From 57cc8ab3e9f2c460204bc8facb7932b9b53be878 Mon Sep 17 00:00:00 2001 From: Leon Kral Date: Fri, 27 Feb 2026 00:07:47 +0000 Subject: [PATCH 0095/1561] net/handshake: Fixed grammar mistake The word "a" was used instead of "an" which is grammatically incorrect. Fixed by changing from "a" to "an". This improves readability of the documentation. Signed-off-by: Leon Kral Reviewed-by: Alistair Francis Link: https://patch.msgid.link/20260227001151.41610-1-leon.j.kral@protonmail.com Signed-off-by: Jakub Kicinski --- Documentation/networking/tls-handshake.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/tls-handshake.rst b/Documentation/networking/tls-handshake.rst index 6f5ea1646a47..4f7bc1087df9 100644 --- a/Documentation/networking/tls-handshake.rst +++ b/Documentation/networking/tls-handshake.rst @@ -7,7 +7,7 @@ In-Kernel TLS Handshake Overview ======== -Transport Layer Security (TLS) is a Upper Layer Protocol (ULP) that runs +Transport Layer Security (TLS) is an Upper Layer Protocol (ULP) that runs over TCP. TLS provides end-to-end data integrity and confidentiality in addition to peer authentication. From 97c55c1298ac89f4dc00c055a593c13004141661 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Fri, 27 Feb 2026 08:33:47 +0800 Subject: [PATCH 0096/1561] net: ethernet: litex: use devm_register_netdev() to register netdev Use devm_register_netdev to avoid unnecessary remove() callback in platform_driver structure. Signed-off-by: Inochi Amaoto Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20260227003351.752934-2-inochiama@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/litex/litex_liteeth.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/litex/litex_liteeth.c b/drivers/net/ethernet/litex/litex_liteeth.c index 829a4b828f8e..67ad1058c2ab 100644 --- a/drivers/net/ethernet/litex/litex_liteeth.c +++ b/drivers/net/ethernet/litex/litex_liteeth.c @@ -232,6 +232,7 @@ static void liteeth_setup_slots(struct liteeth *priv) static int liteeth_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; struct net_device *netdev; void __iomem *buf_base; struct liteeth *priv; @@ -282,7 +283,7 @@ static int liteeth_probe(struct platform_device *pdev) netdev->netdev_ops = &liteeth_netdev_ops; - err = register_netdev(netdev); + err = devm_register_netdev(dev, netdev); if (err) { dev_err(&pdev->dev, "Failed to register netdev %d\n", err); return err; @@ -294,13 +295,6 @@ static int liteeth_probe(struct platform_device *pdev) return 0; } -static void liteeth_remove(struct platform_device *pdev) -{ - struct net_device *netdev = platform_get_drvdata(pdev); - - unregister_netdev(netdev); -} - static const struct of_device_id liteeth_of_match[] = { { .compatible = "litex,liteeth" }, { } @@ -309,7 +303,6 @@ MODULE_DEVICE_TABLE(of, liteeth_of_match); static struct platform_driver liteeth_driver = { .probe = liteeth_probe, - .remove = liteeth_remove, .driver = { .name = DRV_NAME, .of_match_table = liteeth_of_match, From 621e3634dfab890e8d97e082588b3d6d6d688d91 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Fri, 27 Feb 2026 08:33:48 +0800 Subject: [PATCH 0097/1561] net: ethernet: litex: use device pointer to simplify code. As there is already a device pointer in the probe function, replace all "&pdev->dev" pattern with this predefined device pointer. Signed-off-by: Inochi Amaoto Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20260227003351.752934-3-inochiama@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/litex/litex_liteeth.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/litex/litex_liteeth.c b/drivers/net/ethernet/litex/litex_liteeth.c index 67ad1058c2ab..108d0a0db206 100644 --- a/drivers/net/ethernet/litex/litex_liteeth.c +++ b/drivers/net/ethernet/litex/litex_liteeth.c @@ -238,7 +238,7 @@ static int liteeth_probe(struct platform_device *pdev) struct liteeth *priv; int irq, err; - netdev = devm_alloc_etherdev(&pdev->dev, sizeof(*priv)); + netdev = devm_alloc_etherdev(dev, sizeof(*priv)); if (!netdev) return -ENOMEM; @@ -247,9 +247,9 @@ static int liteeth_probe(struct platform_device *pdev) priv = netdev_priv(netdev); priv->netdev = netdev; - priv->dev = &pdev->dev; + priv->dev = dev; - netdev->tstats = devm_netdev_alloc_pcpu_stats(&pdev->dev, + netdev->tstats = devm_netdev_alloc_pcpu_stats(dev, struct pcpu_sw_netstats); if (!netdev->tstats) return -ENOMEM; @@ -277,7 +277,7 @@ static int liteeth_probe(struct platform_device *pdev) priv->tx_base = buf_base + priv->num_rx_slots * priv->slot_size; priv->tx_slot = 0; - err = of_get_ethdev_address(pdev->dev.of_node, netdev); + err = of_get_ethdev_address(dev->of_node, netdev); if (err) eth_hw_addr_random(netdev); @@ -285,7 +285,7 @@ static int liteeth_probe(struct platform_device *pdev) err = devm_register_netdev(dev, netdev); if (err) { - dev_err(&pdev->dev, "Failed to register netdev %d\n", err); + dev_err(dev, "Failed to register netdev %d\n", err); return err; } From 58e443b773ef054ae069cf777ba19adb99d73829 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Sat, 28 Feb 2026 19:13:18 +0800 Subject: [PATCH 0098/1561] net: fix sock compilation error under CONFIG_PREEMPT_RT When CONFIG_PREEMPT_RT is enabled, __SPIN_LOCK_UNLOCKED() expands to a brace-enclosed initializer rather than a compound literal, which cannot be used in assignment expressions. This causes a build failure: net/core/sock.c:3787:29: error: expected expression before '{' token 3787 | tmp.slock = __SPIN_LOCK_UNLOCKED(tmp.slock); Use declaration-with-initializer instead of assignment, consistent with how __SPIN_LOCK_UNLOCKED() is used elsewhere in the kernel (e.g. DEFINE_SPINLOCK). Fixes: 5151ec54f586 ("net: use try_cmpxchg() in lock_sock_nested()") Suggested-by: Eric Dumazet Signed-off-by: Jiayuan Chen Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260228111319.79506-1-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- net/core/sock.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 9d841975a7a1..fba4d5b8553c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3782,12 +3782,15 @@ void noinline lock_sock_nested(struct sock *sk, int subclass) might_sleep(); #ifdef CONFIG_64BIT if (sizeof(struct slock_owned) == sizeof(long)) { - socket_lock_t tmp, old; + socket_lock_t tmp = { + .slock = __SPIN_LOCK_UNLOCKED(tmp.slock), + .owned = 1, + }; + socket_lock_t old = { + .slock = __SPIN_LOCK_UNLOCKED(old.slock), + .owned = 0, + }; - tmp.slock = __SPIN_LOCK_UNLOCKED(tmp.slock); - tmp.owned = 1; - old.slock = __SPIN_LOCK_UNLOCKED(old.slock); - old.owned = 0; if (likely(try_cmpxchg(&sk->sk_lock.combined, &old.combined, tmp.combined))) return; From a9a13c7379ec577e676dca57aa2d7b7980aaa05b Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Thu, 26 Feb 2026 22:49:29 +0100 Subject: [PATCH 0099/1561] keys, dns: Use kmalloc_flex to improve dns_resolver_preparse Use kmalloc_flex() when allocating a new 'struct user_key_payload' in dns_resolver_preparse() to replace the open-coded size arithmetic. Signed-off-by: Thorsten Blum Link: https://patch.msgid.link/20260226214930.785423-3-thorsten.blum@linux.dev Signed-off-by: Jakub Kicinski --- net/dns_resolver/dns_key.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index c42ddd85ff1f..891287a86979 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -215,7 +215,7 @@ store_result: kdebug("store result"); prep->quotalen = result_len; - upayload = kmalloc(sizeof(*upayload) + result_len + 1, GFP_KERNEL); + upayload = kmalloc_flex(*upayload, data, result_len + 1); if (!upayload) { kleave(" = -ENOMEM"); return -ENOMEM; From 07993df560917357610e0625a9a2e7531c3211fc Mon Sep 17 00:00:00 2001 From: Matt Olson Date: Wed, 25 Feb 2026 10:23:41 -0800 Subject: [PATCH 0100/1561] gve: Update QPL page registration logic For DQO, change QPL page registration logic to be more flexible to honor the "max_registered_pages" parameter from the gVNIC device. Previously the number of RX pages per QPL was hardcoded to twice the ring size, and the number of TX pages per QPL was dictated by the device in the DQO-QPL device option. Now [in DQO-QPL mode], the driver will ignore the "tx_pages_per_qpl" parameter indicated in the DQO-QPL device option and instead allocate up to (tx_queue_length / 2) pages per TX QPL and up to (rx_queue_length * 2) pages per RX QPL while keeping the total number of pages under the "max_registered_pages". Merge DQO and GQI QPL page calculation logic into a unified gve_update_num_qpl_pages function. Add rx_pages_per_qpl to the priv struct for consumption by both DQO and GQI. Signed-off-by: Matt Olson Signed-off-by: Max Yuan Reviewed-by: Jordan Rhee Reviewed-by: Harshitha Ramamurthy Reviewed-by: Willem de Bruijn Reviewed-by: Praveen Kaligineedi Signed-off-by: Joshua Washington Link: https://patch.msgid.link/20260225182342.1049816-2-joshwash@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve.h | 18 ++++---- drivers/net/ethernet/google/gve/gve_adminq.c | 8 ---- .../ethernet/google/gve/gve_buffer_mgmt_dqo.c | 2 +- drivers/net/ethernet/google/gve/gve_main.c | 41 +++++++++++++++++++ drivers/net/ethernet/google/gve/gve_rx.c | 5 +-- drivers/net/ethernet/google/gve/gve_rx_dqo.c | 6 +-- drivers/net/ethernet/google/gve/gve_tx.c | 5 +-- drivers/net/ethernet/google/gve/gve_tx_dqo.c | 4 +- 8 files changed, 54 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index cbdf3a842cfe..1d66d3834f7e 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -79,8 +79,6 @@ #define GVE_DEFAULT_HEADER_BUFFER_SIZE 128 -#define DQO_QPL_DEFAULT_TX_PAGES 512 - /* Maximum TSO size supported on DQO */ #define GVE_DQO_TX_MAX 0x3FFFF @@ -711,6 +709,7 @@ struct gve_ptype_lut { /* Parameters for allocating resources for tx queues */ struct gve_tx_alloc_rings_cfg { struct gve_tx_queue_config *qcfg; + u16 pages_per_qpl; u16 num_xdp_rings; @@ -726,6 +725,7 @@ struct gve_rx_alloc_rings_cfg { /* tx config is also needed to determine QPL ids */ struct gve_rx_queue_config *qcfg_rx; struct gve_tx_queue_config *qcfg_tx; + u16 pages_per_qpl; u16 ring_size; u16 packet_buffer_size; @@ -816,7 +816,8 @@ struct gve_priv { u16 min_rx_desc_cnt; bool modify_ring_size_enabled; bool default_min_ring_size; - u16 tx_pages_per_qpl; /* Suggested number of pages per qpl for TX queues by NIC */ + u16 tx_pages_per_qpl; + u16 rx_pages_per_qpl; u64 max_registered_pages; u64 num_registered_pages; /* num pages registered with NIC */ struct bpf_prog *xdp_prog; /* XDP BPF program */ @@ -1150,14 +1151,6 @@ static inline u32 gve_rx_start_qpl_id(const struct gve_tx_queue_config *tx_cfg) return gve_get_rx_qpl_id(tx_cfg, 0); } -static inline u32 gve_get_rx_pages_per_qpl_dqo(u32 rx_desc_cnt) -{ - /* For DQO, page count should be more than ring size for - * out-of-order completions. Set it to two times of ring size. - */ - return 2 * rx_desc_cnt; -} - /* Returns the correct dma direction for tx and rx qpls */ static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv, int id) @@ -1308,6 +1301,9 @@ int gve_reset(struct gve_priv *priv, bool attempt_teardown); void gve_get_curr_alloc_cfgs(struct gve_priv *priv, struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, struct gve_rx_alloc_rings_cfg *rx_alloc_cfg); +void gve_update_num_qpl_pages(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg); int gve_adjust_config(struct gve_priv *priv, struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, struct gve_rx_alloc_rings_cfg *rx_alloc_cfg); diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index b72cc0fa2ba2..2c233009621b 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -970,14 +970,6 @@ static void gve_enable_supported_features(struct gve_priv *priv, priv->dev->max_mtu = be16_to_cpu(dev_op_jumbo_frames->max_mtu); } - /* Override pages for qpl for DQO-QPL */ - if (dev_op_dqo_qpl) { - priv->tx_pages_per_qpl = - be16_to_cpu(dev_op_dqo_qpl->tx_pages_per_qpl); - if (priv->tx_pages_per_qpl == 0) - priv->tx_pages_per_qpl = DQO_QPL_DEFAULT_TX_PAGES; - } - if (dev_op_buffer_sizes && (supported_features_mask & GVE_SUP_BUFFER_SIZES_MASK)) { priv->max_rx_buffer_size = diff --git a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c index 0e2b703c673a..6880d1531ba5 100644 --- a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c @@ -133,7 +133,7 @@ int gve_alloc_qpl_page_dqo(struct gve_rx_ring *rx, u32 idx; idx = rx->dqo.next_qpl_page_idx; - if (idx >= gve_get_rx_pages_per_qpl_dqo(priv->rx_desc_cnt)) { + if (idx >= priv->rx_pages_per_qpl) { net_err_ratelimited("%s: Out of QPL pages\n", priv->dev->name); return -ENOMEM; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 9eb4b3614c4f..c654cf503c1a 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -966,6 +967,7 @@ static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, cfg->qcfg = &priv->tx_cfg; cfg->raw_addressing = !gve_is_qpl(priv); cfg->ring_size = priv->tx_desc_cnt; + cfg->pages_per_qpl = priv->tx_pages_per_qpl; cfg->num_xdp_rings = cfg->qcfg->num_xdp_queues; cfg->tx = priv->tx; } @@ -997,12 +999,48 @@ static void gve_tx_start_rings(struct gve_priv *priv, int num_rings) } } +void gve_update_num_qpl_pages(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg) +{ + u64 ideal_tx_pages, ideal_rx_pages; + u16 tx_num_queues, rx_num_queues; + u64 max_pages, tx_pages; + + if (priv->queue_format == GVE_GQI_QPL_FORMAT) { + rx_alloc_cfg->pages_per_qpl = rx_alloc_cfg->ring_size; + } else if (priv->queue_format == GVE_DQO_QPL_FORMAT) { + /* + * We want 2 pages per RX descriptor and half a page per TX + * descriptor, which means the fraction ideal_tx_pages / + * (ideal_tx_pages + ideal_rx_pages) of the pages we allocate + * should be for TX. Shrink proportionally as necessary to avoid + * allocating more than max_registered_pages total pages. + */ + tx_num_queues = tx_alloc_cfg->qcfg->num_queues; + rx_num_queues = rx_alloc_cfg->qcfg_rx->num_queues; + + ideal_tx_pages = tx_alloc_cfg->ring_size * tx_num_queues / 2; + ideal_rx_pages = rx_alloc_cfg->ring_size * rx_num_queues * 2; + max_pages = min(priv->max_registered_pages, + ideal_tx_pages + ideal_rx_pages); + + tx_pages = div64_u64(max_pages * ideal_tx_pages, + ideal_tx_pages + ideal_rx_pages); + tx_alloc_cfg->pages_per_qpl = div_u64(tx_pages, tx_num_queues); + rx_alloc_cfg->pages_per_qpl = div_u64(max_pages - tx_pages, + rx_num_queues); + } +} + static int gve_queues_mem_alloc(struct gve_priv *priv, struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) { int err; + gve_update_num_qpl_pages(priv, rx_alloc_cfg, tx_alloc_cfg); + if (gve_is_gqi(priv)) err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg); else @@ -1293,6 +1331,7 @@ static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, cfg->raw_addressing = !gve_is_qpl(priv); cfg->enable_header_split = priv->header_split_enabled; cfg->ring_size = priv->rx_desc_cnt; + cfg->pages_per_qpl = priv->rx_pages_per_qpl; cfg->packet_buffer_size = priv->rx_cfg.packet_buffer_size; cfg->rx = priv->rx; cfg->xdp = !!cfg->qcfg_tx->num_xdp_queues; @@ -1372,6 +1411,8 @@ static int gve_queues_start(struct gve_priv *priv, priv->rx_cfg = *rx_alloc_cfg->qcfg_rx; priv->tx_desc_cnt = tx_alloc_cfg->ring_size; priv->rx_desc_cnt = rx_alloc_cfg->ring_size; + priv->tx_pages_per_qpl = tx_alloc_cfg->pages_per_qpl; + priv->rx_pages_per_qpl = rx_alloc_cfg->pages_per_qpl; gve_tx_start_rings(priv, gve_num_tx_queues(priv)); gve_rx_start_rings(priv, rx_alloc_cfg->qcfg_rx->num_queues); diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 4eb6ed29d3cc..81ea800e66e9 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -278,7 +278,6 @@ int gve_rx_alloc_ring_gqi(struct gve_priv *priv, struct device *hdev = &priv->pdev->dev; u32 slots = cfg->ring_size; int filled_pages; - int qpl_page_cnt; u32 qpl_id = 0; size_t bytes; int err; @@ -314,10 +313,8 @@ int gve_rx_alloc_ring_gqi(struct gve_priv *priv, if (!rx->data.raw_addressing) { qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); - qpl_page_cnt = cfg->ring_size; - rx->data.qpl = gve_alloc_queue_page_list(priv, qpl_id, - qpl_page_cnt); + cfg->pages_per_qpl); if (!rx->data.qpl) { err = -ENOMEM; goto abort_with_copy_pool; diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index c706c7932159..3b10139941ea 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -218,7 +218,6 @@ int gve_rx_alloc_ring_dqo(struct gve_priv *priv, { struct device *hdev = &priv->pdev->dev; struct page_pool *pool; - int qpl_page_cnt; size_t size; u32 qpl_id; @@ -246,7 +245,7 @@ int gve_rx_alloc_ring_dqo(struct gve_priv *priv, XSK_CHECK_PRIV_TYPE(struct gve_xdp_buff); rx->dqo.num_buf_states = cfg->raw_addressing ? buffer_queue_slots : - gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); + cfg->pages_per_qpl; rx->dqo.buf_states = kvcalloc_node(rx->dqo.num_buf_states, sizeof(rx->dqo.buf_states[0]), GFP_KERNEL, priv->numa_node); @@ -281,10 +280,9 @@ int gve_rx_alloc_ring_dqo(struct gve_priv *priv, rx->dqo.page_pool = pool; } else { qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); - qpl_page_cnt = gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); rx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id, - qpl_page_cnt); + cfg->pages_per_qpl); if (!rx->dqo.qpl) goto err; rx->dqo.next_qpl_page_idx = 0; diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 0b856e269eac..79338fd9bf66 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -264,7 +264,6 @@ static int gve_tx_alloc_ring_gqi(struct gve_priv *priv, int idx) { struct device *hdev = &priv->pdev->dev; - int qpl_page_cnt; u32 qpl_id = 0; size_t bytes; @@ -291,10 +290,8 @@ static int gve_tx_alloc_ring_gqi(struct gve_priv *priv, tx->dev = hdev; if (!tx->raw_addressing) { qpl_id = gve_tx_qpl_id(priv, tx->q_num); - qpl_page_cnt = priv->tx_pages_per_qpl; - tx->tx_fifo.qpl = gve_alloc_queue_page_list(priv, qpl_id, - qpl_page_cnt); + cfg->pages_per_qpl); if (!tx->tx_fifo.qpl) goto abort_with_desc; diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index 4db9d4f9ed6f..f33035018b89 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -311,7 +311,6 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, { struct device *hdev = &priv->pdev->dev; int num_pending_packets; - int qpl_page_cnt; size_t bytes; u32 qpl_id; int i; @@ -392,10 +391,9 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, if (!cfg->raw_addressing) { qpl_id = gve_tx_qpl_id(priv, tx->q_num); - qpl_page_cnt = priv->tx_pages_per_qpl; tx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id, - qpl_page_cnt); + cfg->pages_per_qpl); if (!tx->dqo.qpl) goto err; From a2f19184014f309165d2d4cfb41088b75c1121a4 Mon Sep 17 00:00:00 2001 From: Matt Olson Date: Wed, 25 Feb 2026 10:23:42 -0800 Subject: [PATCH 0101/1561] gve: Enable reading max ring size from the device in DQO-QPL mode The gVNIC device indicates a device option (MODIFY_RING) to the driver, which presents a range of ring sizes from which the user is allowed to select. But in DQO-QPL queue format, the driver ignores the "max" of this range and instead allows the user to configure the ring size in the range [min, default]. This was done because increasing the ring size could result in the number of registered pages being higher than the max allowed by the device. In order to support large ring sizes, stop ignoring the "max" of the range presented in the MODIFY_RING option. Signed-off-by: Matt Olson Signed-off-by: Max Yuan Reviewed-by: Jordan Rhee Reviewed-by: Harshitha Ramamurthy Reviewed-by: Praveen Kaligineedi Signed-off-by: Joshua Washington Link: https://patch.msgid.link/20260225182342.1049816-3-joshwash@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_adminq.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 2c233009621b..b5f105709e49 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -989,12 +989,10 @@ static void gve_enable_supported_features(struct gve_priv *priv, if (dev_op_modify_ring && (supported_features_mask & GVE_SUP_MODIFY_RING_MASK)) { priv->modify_ring_size_enabled = true; - - /* max ring size for DQO QPL should not be overwritten because of device limit */ - if (priv->queue_format != GVE_DQO_QPL_FORMAT) { - priv->max_rx_desc_cnt = be16_to_cpu(dev_op_modify_ring->max_rx_ring_size); - priv->max_tx_desc_cnt = be16_to_cpu(dev_op_modify_ring->max_tx_ring_size); - } + priv->max_rx_desc_cnt = + be16_to_cpu(dev_op_modify_ring->max_rx_ring_size); + priv->max_tx_desc_cnt = + be16_to_cpu(dev_op_modify_ring->max_tx_ring_size); if (priv->default_min_ring_size) { /* If device hasn't provided minimums, use default minimums */ priv->min_tx_desc_cnt = GVE_DEFAULT_MIN_TX_RING_SIZE; From ded4a02e7d0497c91ec8665e904c7dc817922993 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Fri, 27 Feb 2026 01:45:42 +0100 Subject: [PATCH 0102/1561] ipv6: sit: Replace deprecated strcpy with strscpy strcpy() has been deprecated [1] because it performs no bounds checking on the destination buffer, which can lead to buffer overflows. Replace it with the safer strscpy(). Use the two-argument version of strscpy() to copy 'parms->name' in ipip6_tunnel_locate(). Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strcpy [1] Signed-off-by: Thorsten Blum Link: https://patch.msgid.link/20260227004541.798966-3-thorsten.blum@linux.dev Signed-off-by: Jakub Kicinski --- net/ipv6/sit.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 6a7b8abb0477..ef2e5111fb3a 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -256,9 +257,9 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, if (parms->name[0]) { if (!dev_valid_name(parms->name)) goto failed; - strscpy(name, parms->name, IFNAMSIZ); + strscpy(name, parms->name); } else { - strcpy(name, "sit%d"); + strscpy(name, "sit%d"); } dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, ipip6_tunnel_setup); @@ -275,7 +276,7 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, goto failed_free; if (!parms->name[0]) - strcpy(parms->name, dev->name); + strscpy(parms->name, dev->name); return nt; @@ -1442,7 +1443,7 @@ static int ipip6_tunnel_init(struct net_device *dev) int err; tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); + strscpy(tunnel->parms.name, dev->name); ipip6_tunnel_bind_dev(dev); @@ -1863,7 +1864,7 @@ static int __net_init sit_init_net(struct net *net) ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); - strcpy(t->parms.name, sitn->fb_tunnel_dev->name); + strscpy(t->parms.name, sitn->fb_tunnel_dev->name); return 0; err_reg_dev: From 1396771b0b7741d517d591cfb63333d3f97a8f6d Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Tue, 24 Feb 2026 13:29:57 +0530 Subject: [PATCH 0103/1561] octeontx2-af: npc: cn20k: Index management In CN20K silicon, the MCAM is divided vertically into two banks. Each bank has a depth of 8192. The MCAM is divided horizontally into 32 subbanks, with each subbank having a depth of 256. Each subbank can accommodate either x2 keys or x4 keys. x2 keys are 256 bits in size, and x4 keys are 512 bits in size. Bank1 Bank0 |-----------------------------| | | | subbank 31 { depth 256 } | | | |-----------------------------| | | | subbank 30 | | | ------------------------------ ............................... |-----------------------------| | | | subbank 0 | | | ------------------------------| This patch implements the following allocation schemes in NPC. The allocation API accepts reference (ref), limit, contig, priority, and count values. For example, specifying ref=100, limit=200, contig=1, priority=LOW, and count=20 will allocate 20 contiguous MCAM entries between entries 100 and 200. 1. Contiguous allocation with ref, limit, and priority. 2. Non-contiguous allocation with ref, limit, and priority. 3. Non-contiguous allocation without ref. 4. Contiguous allocation without ref. Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260224080009.4147301-2-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- .../ethernet/marvell/octeontx2/af/Makefile | 2 +- .../marvell/octeontx2/af/cn20k/debugfs.c | 12 + .../marvell/octeontx2/af/cn20k/debugfs.h | 3 + .../ethernet/marvell/octeontx2/af/cn20k/npc.c | 1786 +++++++++++++++++ .../ethernet/marvell/octeontx2/af/cn20k/npc.h | 110 + .../ethernet/marvell/octeontx2/af/cn20k/reg.h | 3 + .../ethernet/marvell/octeontx2/af/common.h | 4 - .../net/ethernet/marvell/octeontx2/af/mbox.h | 18 + .../marvell/octeontx2/af/rvu_debugfs.c | 3 + .../ethernet/marvell/octeontx2/af/rvu_npc.c | 8 +- 10 files changed, 1943 insertions(+), 6 deletions(-) create mode 100644 drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c create mode 100644 drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile index 244de500963e..91b7d6e96a61 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile @@ -13,4 +13,4 @@ rvu_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \ rvu_cpt.o rvu_devlink.o rpm.o rvu_cn10k.o rvu_switch.o \ rvu_sdp.o rvu_npc_hash.o mcs.o mcs_rvu_if.o mcs_cnf10kb.o \ rvu_rep.o cn20k/mbox_init.o cn20k/nix.o cn20k/debugfs.o \ - cn20k/npa.o + cn20k/npa.o cn20k/npc.o diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c index 498968bf4cf5..0ba123be6643 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c @@ -11,7 +11,19 @@ #include #include "struct.h" +#include "rvu.h" #include "debugfs.h" +#include "cn20k/npc.h" + +int npc_cn20k_debugfs_init(struct rvu *rvu) +{ + return 0; +} + +void npc_cn20k_debugfs_deinit(struct rvu *rvu) +{ + debugfs_remove_recursive(rvu->rvu_dbg.npc); +} void print_nix_cn20k_sq_ctx(struct seq_file *m, struct nix_cn20k_sq_ctx_s *sq_ctx) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.h index a2e3a2cd6edb..0c5f05883666 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.h @@ -16,6 +16,9 @@ #include "struct.h" #include "../mbox.h" +int npc_cn20k_debugfs_init(struct rvu *rvu); +void npc_cn20k_debugfs_deinit(struct rvu *rvu); + void print_nix_cn20k_sq_ctx(struct seq_file *m, struct nix_cn20k_sq_ctx_s *sq_ctx); void print_nix_cn20k_cq_ctx(struct seq_file *m, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c new file mode 100644 index 000000000000..4ee946380212 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c @@ -0,0 +1,1786 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell RVU Admin Function driver + * + * Copyright (C) 2026 Marvell. + * + */ +#include +#include + +#include "cn20k/npc.h" +#include "cn20k/reg.h" + +static struct npc_priv_t npc_priv = { + .num_banks = MAX_NUM_BANKS, +}; + +static const char *npc_kw_name[NPC_MCAM_KEY_MAX] = { + [NPC_MCAM_KEY_DYN] = "DYNAMIC", + [NPC_MCAM_KEY_X2] = "X2", + [NPC_MCAM_KEY_X4] = "X4", +}; + +struct npc_priv_t *npc_priv_get(void) +{ + return &npc_priv; +} + +static int npc_subbank_idx_2_mcam_idx(struct rvu *rvu, struct npc_subbank *sb, + u16 sub_off, u16 *mcam_idx) +{ + int off, bot; + + /* for x4 section, maximum allowed subbank index = + * subsection depth - 1 + */ + if (sb->key_type == NPC_MCAM_KEY_X4 && + sub_off >= npc_priv.subbank_depth) { + dev_err(rvu->dev, + "%s: Failed to get mcam idx (x4) sb->idx=%u sub_off=%u", + __func__, sb->idx, sub_off); + return -EINVAL; + } + + /* for x2 section, maximum allowed subbank index = + * 2 * subsection depth - 1 + */ + if (sb->key_type == NPC_MCAM_KEY_X2 && + sub_off >= npc_priv.subbank_depth * 2) { + dev_err(rvu->dev, + "%s: Failed to get mcam idx (x2) sb->idx=%u sub_off=%u", + __func__, sb->idx, sub_off); + return -EINVAL; + } + + /* Find subbank offset from respective subbank (w.r.t bank) */ + off = sub_off & (npc_priv.subbank_depth - 1); + + /* if subsection idx is in bank1, add bank depth, + * which is part of sb->b1b + */ + bot = sub_off >= npc_priv.subbank_depth ? sb->b1b : sb->b0b; + + *mcam_idx = bot + off; + return 0; +} + +static int npc_mcam_idx_2_subbank_idx(struct rvu *rvu, u16 mcam_idx, + struct npc_subbank **sb, + int *sb_off) +{ + int bank_off, sb_id; + + /* mcam_idx should be less than (2 * bank depth) */ + if (mcam_idx >= npc_priv.bank_depth * 2) { + dev_err(rvu->dev, "%s: Invalid mcam idx %u\n", + __func__, mcam_idx); + return -EINVAL; + } + + /* find mcam offset per bank */ + bank_off = mcam_idx & (npc_priv.bank_depth - 1); + + /* Find subbank id */ + sb_id = bank_off / npc_priv.subbank_depth; + + /* Check if subbank id is more than maximum + * number of subbanks available + */ + if (sb_id >= npc_priv.num_subbanks) { + dev_err(rvu->dev, "%s: invalid subbank %d\n", + __func__, sb_id); + return -EINVAL; + } + + *sb = &npc_priv.sb[sb_id]; + + /* Subbank offset per bank */ + *sb_off = bank_off % npc_priv.subbank_depth; + + /* Index in a subbank should add subbank depth + * if it is in bank1 + */ + if (mcam_idx >= npc_priv.bank_depth) + *sb_off += npc_priv.subbank_depth; + + return 0; +} + +static int __npc_subbank_contig_alloc(struct rvu *rvu, + struct npc_subbank *sb, + int key_type, int sidx, + int eidx, int prio, + int count, int t, int b, + unsigned long *bmap, + u16 *save) +{ + int k, offset, delta = 0; + int cnt = 0, sbd; + + sbd = npc_priv.subbank_depth; + + if (sidx >= npc_priv.bank_depth) + delta = sbd; + + switch (prio) { + case NPC_MCAM_LOWER_PRIO: + case NPC_MCAM_ANY_PRIO: + /* Find an area of size 'count' from sidx to eidx */ + offset = bitmap_find_next_zero_area(bmap, sbd, sidx - b, + count, 0); + + if (offset >= sbd) { + dev_err(rvu->dev, + "%s: Could not find contiguous(%d) entries\n", + __func__, count); + return -EFAULT; + } + + dev_dbg(rvu->dev, + "%s: sidx=%d eidx=%d t=%d b=%d offset=%d count=%d delta=%d\n", + __func__, sidx, eidx, t, b, offset, + count, delta); + + for (cnt = 0; cnt < count; cnt++) + save[cnt] = offset + cnt + delta; + + break; + + case NPC_MCAM_HIGHER_PRIO: + /* Find an area of 'count' from eidx to sidx */ + for (k = eidx - b; cnt < count && k >= (sidx - b); k--) { + /* If an intermediate slot is not free, + * reset the counter (cnt) to zero as + * request is for contiguous. + */ + if (test_bit(k, bmap)) { + cnt = 0; + continue; + } + + save[cnt++] = k + delta; + } + break; + } + + /* Found 'count' number of free slots */ + if (cnt == count) + return 0; + + dev_dbg(rvu->dev, + "%s: Could not find contiguous(%d) entries in subbank=%u\n", + __func__, count, sb->idx); + return -EFAULT; +} + +static int __npc_subbank_non_contig_alloc(struct rvu *rvu, + struct npc_subbank *sb, + int key_type, int sidx, + int eidx, int prio, + int t, int b, + unsigned long *bmap, + int count, u16 *save, + bool max_alloc, int *alloc_cnt) +{ + unsigned long index; + int cnt = 0, delta; + int k, sbd; + + sbd = npc_priv.subbank_depth; + delta = sidx >= npc_priv.bank_depth ? sbd : 0; + + switch (prio) { + /* Find an area of size 'count' from sidx to eidx */ + case NPC_MCAM_LOWER_PRIO: + case NPC_MCAM_ANY_PRIO: + index = find_next_zero_bit(bmap, sbd, sidx - b); + if (index >= sbd) { + dev_err(rvu->dev, + "%s: Error happened to alloc %u, bitmap_weight=%u, sb->idx=%u\n", + __func__, count, + bitmap_weight(bmap, sbd), + sb->idx); + break; + } + + for (k = index; cnt < count && k <= (eidx - b); k++) { + /* Skip used slots */ + if (test_bit(k, bmap)) + continue; + + save[cnt++] = k + delta; + } + break; + + /* Find an area of 'count' from eidx to sidx */ + case NPC_MCAM_HIGHER_PRIO: + for (k = eidx - b; cnt < count && k >= (sidx - b); k--) { + /* Skip used slots */ + if (test_bit(k, bmap)) + continue; + + save[cnt++] = k + delta; + } + break; + } + + /* Update allocated 'cnt' to alloc_cnt */ + *alloc_cnt = cnt; + + /* Successfully allocated requested count slots */ + if (cnt == count) + return 0; + + /* Allocation successful for cnt < count */ + if (max_alloc && cnt > 0) + return 0; + + dev_dbg(rvu->dev, + "%s: Could not find non contiguous entries(%u) in subbank(%u) cnt=%d max_alloc=%d\n", + __func__, count, sb->idx, cnt, max_alloc); + + return -EFAULT; +} + +static void __npc_subbank_sboff_2_off(struct rvu *rvu, struct npc_subbank *sb, + int sb_off, unsigned long **bmap, + int *off) +{ + int sbd; + + sbd = npc_priv.subbank_depth; + + *off = sb_off & (sbd - 1); + *bmap = (sb_off >= sbd) ? sb->b1map : sb->b0map; +} + +/* set/clear bitmap */ +static bool __npc_subbank_mark_slot(struct rvu *rvu, + struct npc_subbank *sb, + int sb_off, bool set) +{ + unsigned long *bmap; + int off; + + /* if sb_off >= subbank.depth, then slots are in + * bank1 + */ + __npc_subbank_sboff_2_off(rvu, sb, sb_off, &bmap, &off); + + dev_dbg(rvu->dev, + "%s: Marking set=%d sb_off=%d sb->idx=%d off=%d\n", + __func__, set, sb_off, sb->idx, off); + + if (set) { + /* Slot is already used */ + if (test_bit(off, bmap)) + return false; + + sb->free_cnt--; + set_bit(off, bmap); + return true; + } + + /* Slot is already free */ + if (!test_bit(off, bmap)) + return false; + + sb->free_cnt++; + clear_bit(off, bmap); + return true; +} + +static int __npc_subbank_mark_free(struct rvu *rvu, struct npc_subbank *sb) +{ + int rc, blkaddr; + + sb->flags = NPC_SUBBANK_FLAG_FREE; + sb->key_type = 0; + + bitmap_clear(sb->b0map, 0, npc_priv.subbank_depth); + bitmap_clear(sb->b1map, 0, npc_priv.subbank_depth); + + if (!xa_erase(&npc_priv.xa_sb_used, sb->arr_idx)) { + dev_err(rvu->dev, + "%s: Error to delete from xa_sb_used array\n", + __func__); + return -EFAULT; + } + + rc = xa_insert(&npc_priv.xa_sb_free, sb->arr_idx, + xa_mk_value(sb->idx), GFP_KERNEL); + if (rc) { + rc = xa_insert(&npc_priv.xa_sb_used, sb->arr_idx, + xa_mk_value(sb->idx), GFP_KERNEL); + if (rc) + dev_err(rvu->dev, + "%s: Failed to roll back sb(%u) arr_idx=%d\n", + __func__, sb->idx, sb->arr_idx); + + dev_err(rvu->dev, + "%s: Error to add sb(%u) to xa_sb_free array at arr_idx=%d\n", + __func__, sb->idx, sb->arr_idx); + return rc; + } + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + rvu_write64(rvu, blkaddr, + NPC_AF_MCAM_SECTIONX_CFG_EXT(sb->idx), + NPC_MCAM_KEY_X2); + + return rc; +} + +static int __npc_subbank_mark_used(struct rvu *rvu, struct npc_subbank *sb, + int key_type) +{ + int rc; + + sb->flags = NPC_SUBBANK_FLAG_USED; + sb->key_type = key_type; + if (key_type == NPC_MCAM_KEY_X4) + sb->free_cnt = npc_priv.subbank_depth; + else + sb->free_cnt = 2 * npc_priv.subbank_depth; + + bitmap_clear(sb->b0map, 0, npc_priv.subbank_depth); + bitmap_clear(sb->b1map, 0, npc_priv.subbank_depth); + + if (!xa_erase(&npc_priv.xa_sb_free, sb->arr_idx)) { + dev_err(rvu->dev, + "%s: Error to delete from xa_sb_free array\n", + __func__); + return -EFAULT; + } + + rc = xa_insert(&npc_priv.xa_sb_used, sb->arr_idx, + xa_mk_value(sb->idx), GFP_KERNEL); + if (rc) + dev_err(rvu->dev, + "%s: Error to add to xa_sb_used array\n", __func__); + + return rc; +} + +static bool __npc_subbank_free(struct rvu *rvu, struct npc_subbank *sb, + u16 sb_off) +{ + bool deleted = false; + unsigned long *bmap; + int rc, off; + + deleted = __npc_subbank_mark_slot(rvu, sb, sb_off, false); + if (!deleted) + goto done; + + __npc_subbank_sboff_2_off(rvu, sb, sb_off, &bmap, &off); + + /* Check whether we can mark whole subbank as free */ + if (sb->key_type == NPC_MCAM_KEY_X4) { + if (sb->free_cnt < npc_priv.subbank_depth) + goto done; + } else { + if (sb->free_cnt < 2 * npc_priv.subbank_depth) + goto done; + } + + /* All slots in subbank are unused. Mark the subbank as free + * and add to free pool + */ + rc = __npc_subbank_mark_free(rvu, sb); + if (rc) + dev_err(rvu->dev, "%s: Error to free subbank\n", __func__); + +done: + return deleted; +} + +static int +npc_subbank_free(struct rvu *rvu, struct npc_subbank *sb, u16 sb_off) +{ + bool deleted; + + mutex_lock(&sb->lock); + deleted = __npc_subbank_free(rvu, sb, sb_off); + mutex_unlock(&sb->lock); + + return deleted ? 0 : -EFAULT; +} + +static int __npc_subbank_alloc(struct rvu *rvu, struct npc_subbank *sb, + int key_type, int ref, int limit, int prio, + bool contig, int count, u16 *mcam_idx, + int idx_sz, bool max_alloc, int *alloc_cnt) +{ + int cnt, t, b, i, blkaddr; + bool new_sub_bank = false; + unsigned long *bmap; + u16 *save = NULL; + int sidx, eidx; + bool diffbank; + int bw, bfree; + int rc = 0; + bool ret; + + /* Check if enough space is there to return requested number of + * mcam indexes in case of contiguous allocation + */ + if (!max_alloc && count > idx_sz) { + dev_err(rvu->dev, + "%s: Less space, count=%d idx_sz=%d sb_id=%d\n", + __func__, count, idx_sz, sb->idx); + return -ENOSPC; + } + + /* Allocation on multiple subbank is not supported by this function. + * it means that ref and limit should be on same subbank. + * + * ref and limit values should be validated w.r.t prio as below. + * say ref = 100, limit = 200, + * if NPC_MCAM_LOWER_PRIO, allocate index 100 + * if NPC_MCAM_HIGHER_PRIO, below sanity test returns error. + * if NPC_MCAM_ANY_PRIO, allocate index 100 + * + * say ref = 200, limit = 100 + * if NPC_MCAM_LOWER_PRIO, below sanity test returns error. + * if NPC_MCAM_HIGHER_PRIO, allocate index 200 + * if NPC_MCAM_ANY_PRIO, allocate index 100 + * + * Please note that NPC_MCAM_ANY_PRIO does not have any restriction + * on "ref" and "limit" values. ie, ref > limit and limit > ref + * are valid cases. + */ + if ((prio == NPC_MCAM_LOWER_PRIO && ref > limit) || + (prio == NPC_MCAM_HIGHER_PRIO && ref < limit)) { + dev_err(rvu->dev, "%s: Wrong ref_enty(%d) or limit(%d)\n", + __func__, ref, limit); + return -EINVAL; + } + + /* x4 indexes are from 0 to bank size as it combines two x2 banks */ + if (key_type == NPC_MCAM_KEY_X4 && + (ref >= npc_priv.bank_depth || limit >= npc_priv.bank_depth)) { + dev_err(rvu->dev, + "%s: Wrong ref_enty(%d) or limit(%d) for x4\n", + __func__, ref, limit); + return -EINVAL; + } + + /* This function is called either bank0 or bank1 portion of a subbank. + * so ref and limit should be on same bank. + */ + diffbank = !!((ref & npc_priv.bank_depth) ^ + (limit & npc_priv.bank_depth)); + if (diffbank) { + dev_err(rvu->dev, + "%s: request ref and limit should be from same bank\n", + __func__); + return -EINVAL; + } + + sidx = min_t(int, limit, ref); + eidx = max_t(int, limit, ref); + + /* Find total number of slots available; both used and free */ + cnt = eidx - sidx + 1; + if (contig && cnt < count) { + dev_err(rvu->dev, + "%s: Wrong ref_enty(%d) or limit(%d) for count(%d)\n", + __func__, ref, limit, count); + return -EINVAL; + } + + /* If subbank is free, check if requested number of indexes is less than + * or equal to mcam entries available in the subbank if contig. + */ + if (sb->flags & NPC_SUBBANK_FLAG_FREE) { + if (contig && count > npc_priv.subbank_depth) { + dev_err(rvu->dev, "%s: Less number of entries\n", + __func__); + return -ENOSPC; + } + + new_sub_bank = true; + goto process; + } + + /* Flag should be set for all used subbanks */ + WARN_ONCE(!(sb->flags & NPC_SUBBANK_FLAG_USED), + "Used flag is not set(%#x)\n", sb->flags); + + /* If subbank key type does not match with requested key_type, + * return error + */ + if (sb->key_type != key_type) { + dev_dbg(rvu->dev, "%s: subbank key_type mismatch\n", __func__); + return -EINVAL; + } + +process: + /* if ref or limit >= npc_priv.bank_depth, index are in bank1. + * else bank0. + */ + if (ref >= npc_priv.bank_depth) { + bmap = sb->b1map; + t = sb->b1t; + b = sb->b1b; + } else { + bmap = sb->b0map; + t = sb->b0t; + b = sb->b0b; + } + + /* Calculate free slots */ + bw = bitmap_weight(bmap, npc_priv.subbank_depth); + bfree = npc_priv.subbank_depth - bw; + + if (!bfree) { + dev_dbg(rvu->dev, "%s: subbank is full\n", __func__); + return -ENOSPC; + } + + /* If request is for contiguous , then max we can allocate is + * equal to subbank_depth + */ + if (contig && bfree < count) { + dev_dbg(rvu->dev, "%s: no space for entry\n", __func__); + return -ENOSPC; + } + + /* 'save' array stores available indexes temporarily before + * marking it as allocated + */ + save = kcalloc(count, sizeof(u16), GFP_KERNEL); + if (!save) { + rc = -ENOMEM; + goto err1; + } + + if (contig) { + rc = __npc_subbank_contig_alloc(rvu, sb, key_type, + sidx, eidx, prio, + count, t, b, + bmap, save); + /* contiguous allocation success means that + * requested number of free slots got + * allocated + */ + if (!rc) + *alloc_cnt = count; + + } else { + rc = __npc_subbank_non_contig_alloc(rvu, sb, key_type, + sidx, eidx, prio, + t, b, bmap, + count, save, + max_alloc, alloc_cnt); + } + + if (rc) + goto err1; + + /* Mark new subbank bank as used */ + if (new_sub_bank) { + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + if (blkaddr < 0) { + dev_err(rvu->dev, + "%s: NPC block not implemented\n", __func__); + rc = -EFAULT; + goto err1; + } + + rc = __npc_subbank_mark_used(rvu, sb, key_type); + if (rc) { + dev_err(rvu->dev, + "%s: Error to mark subbank as used\n", + __func__); + goto err2; + } + + /* Configure section type to key_type */ + rvu_write64(rvu, blkaddr, + NPC_AF_MCAM_SECTIONX_CFG_EXT(sb->idx), + key_type); + } + + for (i = 0; i < *alloc_cnt; i++) { + rc = npc_subbank_idx_2_mcam_idx(rvu, sb, save[i], + &mcam_idx[i]); + if (rc) { + dev_err(rvu->dev, + "%s: Error to find mcam idx for %u\n", + __func__, save[i]); + /* TODO: handle err case gracefully */ + goto err3; + } + + /* Mark all slots as used */ + ret = __npc_subbank_mark_slot(rvu, sb, save[i], true); + if (!ret) { + dev_err(rvu->dev, "%s: Error to mark mcam_idx %u\n", + __func__, mcam_idx[i]); + rc = -EFAULT; + goto err3; + } + } + kfree(save); + return 0; + +err3: + for (int j = 0; j < i; j++) + __npc_subbank_mark_slot(rvu, sb, save[j], false); +err2: + if (new_sub_bank) + __npc_subbank_mark_free(rvu, sb); +err1: + kfree(save); + return rc; +} + +static int +npc_subbank_alloc(struct rvu *rvu, struct npc_subbank *sb, + int key_type, int ref, int limit, int prio, + bool contig, int count, u16 *mcam_idx, + int idx_sz, bool max_alloc, int *alloc_cnt) +{ + int rc; + + mutex_lock(&sb->lock); + rc = __npc_subbank_alloc(rvu, sb, key_type, ref, limit, prio, + contig, count, mcam_idx, idx_sz, + max_alloc, alloc_cnt); + mutex_unlock(&sb->lock); + + return rc; +} + +static int +npc_del_from_pf_maps(struct rvu *rvu, u16 mcam_idx) +{ + int pcifunc, idx; + void *map; + + map = xa_erase(&npc_priv.xa_idx2pf_map, mcam_idx); + if (!map) { + dev_err(rvu->dev, + "%s: failed to erase mcam_idx(%u) from xa_idx2pf map\n", + __func__, mcam_idx); + return -EFAULT; + } + + pcifunc = xa_to_value(map); + map = xa_load(&npc_priv.xa_pf_map, pcifunc); + if (!map) { + dev_err(rvu->dev, + "%s: failed to find entry for (%u) from xa_pf_map, mcam=%u\n", + __func__, pcifunc, mcam_idx); + return -ESRCH; + } + + idx = xa_to_value(map); + + map = xa_erase(&npc_priv.xa_pf2idx_map[idx], mcam_idx); + if (!map) { + dev_err(rvu->dev, + "%s: failed to erase mcam_idx(%u) from xa_pf2idx_map map\n", + __func__, mcam_idx); + return -EFAULT; + } + return 0; +} + +static int +npc_add_to_pf_maps(struct rvu *rvu, u16 mcam_idx, int pcifunc) +{ + int rc, idx; + void *map; + + dev_dbg(rvu->dev, + "%s: add2maps mcam_idx(%u) to xa_idx2pf map pcifunc=%#x\n", + __func__, mcam_idx, pcifunc); + + rc = xa_insert(&npc_priv.xa_idx2pf_map, mcam_idx, + xa_mk_value(pcifunc), GFP_KERNEL); + + if (rc) { + map = xa_load(&npc_priv.xa_idx2pf_map, mcam_idx); + dev_err(rvu->dev, + "%s: failed to insert mcam_idx(%u) to xa_idx2pf map, existing value=%lu\n", + __func__, mcam_idx, xa_to_value(map)); + return -EFAULT; + } + + map = xa_load(&npc_priv.xa_pf_map, pcifunc); + if (!map) { + dev_err(rvu->dev, + "%s: failed to find pf map entry for pcifunc=%#x, mcam=%u\n", + __func__, pcifunc, mcam_idx); + return -ESRCH; + } + + idx = xa_to_value(map); + + rc = xa_insert(&npc_priv.xa_pf2idx_map[idx], mcam_idx, + xa_mk_value(pcifunc), GFP_KERNEL); + + if (rc) { + map = xa_load(&npc_priv.xa_pf2idx_map[idx], mcam_idx); + xa_erase(&npc_priv.xa_idx2pf_map, mcam_idx); + dev_err(rvu->dev, + "%s: failed to insert mcam_idx(%u) to xa_pf2idx_map map, earlier value=%lu idx=%u\n", + __func__, mcam_idx, xa_to_value(map), idx); + + return -EFAULT; + } + + return 0; +} + +static bool +npc_subbank_suits(struct npc_subbank *sb, int key_type) +{ + mutex_lock(&sb->lock); + + if (!sb->key_type) { + mutex_unlock(&sb->lock); + return true; + } + + if (sb->key_type == key_type) { + mutex_unlock(&sb->lock); + return true; + } + + mutex_unlock(&sb->lock); + return false; +} + +#define SB_ALIGN_UP(val) (((val) + npc_priv.subbank_depth) & \ + ~((npc_priv.subbank_depth) - 1)) +#define SB_ALIGN_DOWN(val) ALIGN_DOWN((val), npc_priv.subbank_depth) + +static void npc_subbank_iter_down(struct rvu *rvu, + int ref, int limit, + int *cur_ref, int *cur_limit, + bool *start, bool *stop) +{ + int align; + + *stop = false; + + /* ALIGN_DOWN the limit to current subbank boundary bottom index */ + if (*start) { + *start = false; + *cur_ref = ref; + align = SB_ALIGN_DOWN(ref); + if (align < limit) { + *stop = true; + *cur_limit = limit; + return; + } + *cur_limit = align; + return; + } + + *cur_ref = *cur_limit - 1; + align = *cur_ref - npc_priv.subbank_depth + 1; + if (align <= limit) { + *stop = true; + *cur_limit = limit; + return; + } + + *cur_limit = align; +} + +static void npc_subbank_iter_up(struct rvu *rvu, + int ref, int limit, + int *cur_ref, int *cur_limit, + bool *start, bool *stop) +{ + int align; + + *stop = false; + + /* ALIGN_UP the limit to current subbank boundary top index */ + if (*start) { + *start = false; + *cur_ref = ref; + + /* Find next lower prio subbank's bottom index */ + align = SB_ALIGN_UP(ref); + + /* Crosses limit ? */ + if (align - 1 > limit) { + *stop = true; + *cur_limit = limit; + return; + } + + /* Current subbank's top index */ + *cur_limit = align - 1; + return; + } + + *cur_ref = *cur_limit + 1; + align = *cur_ref + npc_priv.subbank_depth - 1; + + if (align >= limit) { + *stop = true; + *cur_limit = limit; + return; + } + + *cur_limit = align; +} + +static int +npc_subbank_iter(struct rvu *rvu, int key_type, + int ref, int limit, int prio, + int *cur_ref, int *cur_limit, + bool *start, bool *stop) +{ + if (prio != NPC_MCAM_HIGHER_PRIO) + npc_subbank_iter_up(rvu, ref, limit, + cur_ref, cur_limit, + start, stop); + else + npc_subbank_iter_down(rvu, ref, limit, + cur_ref, cur_limit, + start, stop); + + /* limit and ref should < bank_depth for x4 */ + if (key_type == NPC_MCAM_KEY_X4) { + if (*cur_ref >= npc_priv.bank_depth) + return -EINVAL; + + if (*cur_limit >= npc_priv.bank_depth) + return -EINVAL; + } + /* limit and ref should < 2 * bank_depth, for x2 */ + if (*cur_ref >= 2 * npc_priv.bank_depth) + return -EINVAL; + + if (*cur_limit >= 2 * npc_priv.bank_depth) + return -EINVAL; + + return 0; +} + +static int npc_idx_free(struct rvu *rvu, u16 *mcam_idx, int count, + bool maps_del) +{ + struct npc_subbank *sb; + int idx, i; + bool ret; + int rc; + + /* Check if we can dealloc indexes properly ? */ + for (i = 0; i < count; i++) { + rc = npc_mcam_idx_2_subbank_idx(rvu, mcam_idx[i], + &sb, &idx); + if (rc) { + dev_err(rvu->dev, + "Failed to free mcam idx=%u\n", mcam_idx[i]); + return rc; + } + } + + for (i = 0; i < count; i++) { + rc = npc_mcam_idx_2_subbank_idx(rvu, mcam_idx[i], + &sb, &idx); + if (rc) + return rc; + + ret = npc_subbank_free(rvu, sb, idx); + if (ret) + return -EINVAL; + + if (!maps_del) + continue; + + rc = npc_del_from_pf_maps(rvu, mcam_idx[i]); + if (rc) + return rc; + } + + return 0; +} + +static int npc_multi_subbank_ref_alloc(struct rvu *rvu, int key_type, + int ref, int limit, int prio, + bool contig, int count, + u16 *mcam_idx) +{ + struct npc_subbank *sb; + unsigned long *bmap; + int sb_off, off, rc; + int cnt = 0; + bool bitset; + + if (prio != NPC_MCAM_HIGHER_PRIO) { + while (ref <= limit) { + /* Calculate subbank and subbank index */ + rc = npc_mcam_idx_2_subbank_idx(rvu, ref, + &sb, &sb_off); + if (rc) + goto err; + + /* If subbank is not suitable for requested key type + * restart search from next subbank + */ + if (!npc_subbank_suits(sb, key_type)) { + ref = SB_ALIGN_UP(ref); + if (contig) { + rc = npc_idx_free(rvu, mcam_idx, + cnt, false); + if (rc) + return rc; + cnt = 0; + } + continue; + } + + mutex_lock(&sb->lock); + + /* If subbank is free; mark it as used */ + if (sb->flags & NPC_SUBBANK_FLAG_FREE) { + rc = __npc_subbank_mark_used(rvu, sb, + key_type); + if (rc) { + mutex_unlock(&sb->lock); + dev_err(rvu->dev, + "%s:Error to add to use array\n", + __func__); + goto err; + } + } + + /* Find correct bmap */ + __npc_subbank_sboff_2_off(rvu, sb, sb_off, &bmap, &off); + + /* if bit is already set, reset 'cnt' */ + bitset = test_bit(off, bmap); + if (bitset) { + mutex_unlock(&sb->lock); + if (contig) { + rc = npc_idx_free(rvu, mcam_idx, + cnt, false); + if (rc) + return rc; + cnt = 0; + } + + ref++; + continue; + } + + set_bit(off, bmap); + sb->free_cnt--; + mcam_idx[cnt++] = ref; + mutex_unlock(&sb->lock); + + if (cnt == count) + return 0; + ref++; + } + + /* Could not allocate request count slots */ + goto err; + } + while (ref >= limit) { + rc = npc_mcam_idx_2_subbank_idx(rvu, ref, + &sb, &sb_off); + if (rc) + goto err; + + if (!npc_subbank_suits(sb, key_type)) { + ref = SB_ALIGN_DOWN(ref) - 1; + if (contig) { + rc = npc_idx_free(rvu, mcam_idx, cnt, false); + if (rc) + return rc; + + cnt = 0; + } + continue; + } + + mutex_lock(&sb->lock); + + if (sb->flags & NPC_SUBBANK_FLAG_FREE) { + rc = __npc_subbank_mark_used(rvu, sb, key_type); + if (rc) { + mutex_unlock(&sb->lock); + dev_err(rvu->dev, + "%s:Error to add to use array\n", + __func__); + goto err; + } + } + + __npc_subbank_sboff_2_off(rvu, sb, sb_off, &bmap, &off); + bitset = test_bit(off, bmap); + if (bitset) { + mutex_unlock(&sb->lock); + if (contig) { + rc = npc_idx_free(rvu, mcam_idx, cnt, false); + if (rc) + return rc; + + cnt = 0; + } + ref--; + continue; + } + + mcam_idx[cnt++] = ref; + sb->free_cnt--; + set_bit(off, bmap); + mutex_unlock(&sb->lock); + + if (cnt == count) + return 0; + ref--; + } + +err: + rc = npc_idx_free(rvu, mcam_idx, cnt, false); + if (rc) + dev_err(rvu->dev, + "%s: Error happened while freeing cnt=%u indexes\n", + __func__, cnt); + + return -ENOSPC; +} + +static int npc_subbank_free_cnt(struct rvu *rvu, struct npc_subbank *sb, + int key_type) +{ + int cnt, spd; + + spd = npc_priv.subbank_depth; + mutex_lock(&sb->lock); + + if (sb->flags & NPC_SUBBANK_FLAG_FREE) + cnt = key_type == NPC_MCAM_KEY_X4 ? spd : 2 * spd; + else + cnt = sb->free_cnt; + + mutex_unlock(&sb->lock); + return cnt; +} + +static int npc_subbank_ref_alloc(struct rvu *rvu, int key_type, + int ref, int limit, int prio, + bool contig, int count, + u16 *mcam_idx) +{ + struct npc_subbank *sb1, *sb2; + bool max_alloc, start, stop; + int r, l, sb_idx1, sb_idx2; + int tot = 0, rc; + int alloc_cnt; + + max_alloc = !contig; + + start = true; + stop = false; + + /* Loop until we cross the ref/limit boundary */ + while (!stop) { + rc = npc_subbank_iter(rvu, key_type, ref, limit, prio, + &r, &l, &start, &stop); + + dev_dbg(rvu->dev, + "%s: ref=%d limit=%d r=%d l=%d start=%d stop=%d tot=%d count=%d rc=%d\n", + __func__, ref, limit, r, l, + start, stop, tot, count, rc); + + if (rc) + goto err; + + /* Find subbank and subbank index for ref */ + rc = npc_mcam_idx_2_subbank_idx(rvu, r, &sb1, + &sb_idx1); + if (rc) + goto err; + + dev_dbg(rvu->dev, + "%s: ref subbank=%d off=%d\n", + __func__, sb1->idx, sb_idx1); + + /* Skip subbank if it is not available for the keytype */ + if (!npc_subbank_suits(sb1, key_type)) { + dev_dbg(rvu->dev, + "%s: not suitable sb=%d key_type=%d\n", + __func__, sb1->idx, key_type); + continue; + } + + /* Find subbank and subbank index for limit */ + rc = npc_mcam_idx_2_subbank_idx(rvu, l, &sb2, + &sb_idx2); + if (rc) + goto err; + + dev_dbg(rvu->dev, + "%s: limit subbank=%d off=%d\n", + __func__, sb_idx1, sb_idx2); + + /* subbank of ref and limit should be same */ + if (sb1 != sb2) { + dev_err(rvu->dev, + "%s: l(%d) and r(%d) are not in same subbank\n", + __func__, r, l); + goto err; + } + + if (contig && + npc_subbank_free_cnt(rvu, sb1, key_type) < count) { + dev_dbg(rvu->dev, "%s: less count =%d\n", + __func__, + npc_subbank_free_cnt(rvu, sb1, key_type)); + continue; + } + + /* Try in one bank of a subbank */ + alloc_cnt = 0; + rc = npc_subbank_alloc(rvu, sb1, key_type, + r, l, prio, contig, + count - tot, mcam_idx + tot, + count - tot, max_alloc, + &alloc_cnt); + + tot += alloc_cnt; + + dev_dbg(rvu->dev, "%s: Allocated tot=%d alloc_cnt=%d\n", + __func__, tot, alloc_cnt); + + if (!rc && count == tot) + return 0; + } +err: + dev_dbg(rvu->dev, "%s: Error to allocate\n", + __func__); + + /* non contiguous allocation fails. We need to do clean up */ + if (max_alloc) { + rc = npc_idx_free(rvu, mcam_idx, tot, false); + if (rc) + dev_err(rvu->dev, + "%s: failed to free %u indexes\n", + __func__, tot); + } + + return -EFAULT; +} + +/* Minimize allocation from bottom and top subbanks for noref allocations. + * Default allocations are ref based, and will be allocated from top + * subbanks (least priority subbanks). Since default allocation is at very + * early stage of kernel netdev probes, this subbanks will be moved to + * used subbanks list. This will pave a way for noref allocation from these + * used subbanks. Skip allocation for these top and bottom, and try free + * bank next. If none slot is available, come back and search in these + * subbanks. + */ + +static int npc_subbank_restricted_idxs[2]; +static bool restrict_valid = true; + +static bool npc_subbank_restrict_usage(struct rvu *rvu, int index) +{ + int i; + + if (!restrict_valid) + return false; + + for (i = 0; i < ARRAY_SIZE(npc_subbank_restricted_idxs); i++) { + if (index == npc_subbank_restricted_idxs[i]) + return true; + } + + return false; +} + +static int npc_subbank_noref_alloc(struct rvu *rvu, int key_type, bool contig, + int count, u16 *mcam_idx) +{ + struct npc_subbank *sb; + unsigned long index; + int tot = 0, rc; + bool max_alloc; + int alloc_cnt; + int idx, i; + void *val; + + max_alloc = !contig; + + /* Check used subbanks for free slots */ + xa_for_each(&npc_priv.xa_sb_used, index, val) { + idx = xa_to_value(val); + + /* Minimize allocation from restricted subbanks + * in noref allocations. + */ + if (npc_subbank_restrict_usage(rvu, idx)) + continue; + + sb = &npc_priv.sb[idx]; + + /* Skip if not suitable subbank */ + if (!npc_subbank_suits(sb, key_type)) + continue; + + if (contig && npc_subbank_free_cnt(rvu, sb, key_type) < count) + continue; + + /* try in bank 0. Try passing ref and limit equal to + * subbank boundaries + */ + alloc_cnt = 0; + rc = npc_subbank_alloc(rvu, sb, key_type, + sb->b0b, sb->b0t, 0, + contig, count - tot, + mcam_idx + tot, + count - tot, + max_alloc, &alloc_cnt); + + /* Non contiguous allocation may allocate less than + * requested 'count'. + */ + tot += alloc_cnt; + + dev_dbg(rvu->dev, + "%s: Allocated %d from subbank %d, tot=%d count=%d\n", + __func__, alloc_cnt, sb->idx, tot, count); + + /* Successfully allocated */ + if (!rc && count == tot) + return 0; + + /* x4 entries can be allocated from bank 0 only */ + if (key_type == NPC_MCAM_KEY_X4) + continue; + + /* try in bank 1 for x2 */ + alloc_cnt = 0; + rc = npc_subbank_alloc(rvu, sb, key_type, + sb->b1b, sb->b1t, 0, + contig, count - tot, + mcam_idx + tot, + count - tot, max_alloc, + &alloc_cnt); + + tot += alloc_cnt; + + dev_dbg(rvu->dev, + "%s: Allocated %d from subbank %d, tot=%d count=%d\n", + __func__, alloc_cnt, sb->idx, tot, count); + + if (!rc && count == tot) + return 0; + } + + /* Allocate in free subbanks */ + xa_for_each(&npc_priv.xa_sb_free, index, val) { + idx = xa_to_value(val); + sb = &npc_priv.sb[idx]; + + /* Minimize allocation from restricted subbanks + * in noref allocations. + */ + if (npc_subbank_restrict_usage(rvu, idx)) + continue; + + if (!npc_subbank_suits(sb, key_type)) + continue; + + /* try in bank 0 */ + alloc_cnt = 0; + rc = npc_subbank_alloc(rvu, sb, key_type, + sb->b0b, sb->b0t, 0, + contig, count - tot, + mcam_idx + tot, + count - tot, + max_alloc, &alloc_cnt); + + tot += alloc_cnt; + + dev_dbg(rvu->dev, + "%s: Allocated %d from subbank %d, tot=%d count=%d\n", + __func__, alloc_cnt, sb->idx, tot, count); + + /* Successfully allocated */ + if (!rc && count == tot) + return 0; + + /* x4 entries can be allocated from bank 0 only */ + if (key_type == NPC_MCAM_KEY_X4) + continue; + + /* try in bank 1 for x2 */ + alloc_cnt = 0; + rc = npc_subbank_alloc(rvu, sb, + key_type, sb->b1b, sb->b1t, 0, + contig, count - tot, + mcam_idx + tot, count - tot, + max_alloc, &alloc_cnt); + + tot += alloc_cnt; + + dev_dbg(rvu->dev, + "%s: Allocated %d from subbank %d, tot=%d count=%d\n", + __func__, alloc_cnt, sb->idx, tot, count); + + if (!rc && count == tot) + return 0; + } + + /* Allocate from restricted subbanks */ + for (i = 0; restrict_valid && + (i < ARRAY_SIZE(npc_subbank_restricted_idxs)); i++) { + idx = npc_subbank_restricted_idxs[i]; + sb = &npc_priv.sb[idx]; + + /* Skip if not suitable subbank */ + if (!npc_subbank_suits(sb, key_type)) + continue; + + if (contig && npc_subbank_free_cnt(rvu, sb, key_type) < count) + continue; + + /* try in bank 0. Try passing ref and limit equal to + * subbank boundaries + */ + alloc_cnt = 0; + rc = npc_subbank_alloc(rvu, sb, key_type, + sb->b0b, sb->b0t, 0, + contig, count - tot, + mcam_idx + tot, + count - tot, + max_alloc, &alloc_cnt); + + /* Non contiguous allocation may allocate less than + * requested 'count'. + */ + tot += alloc_cnt; + + dev_dbg(rvu->dev, + "%s: Allocated %d from subbank %d, tot=%d count=%d\n", + __func__, alloc_cnt, sb->idx, tot, count); + + /* Successfully allocated */ + if (!rc && count == tot) + return 0; + + /* x4 entries can be allocated from bank 0 only */ + if (key_type == NPC_MCAM_KEY_X4) + continue; + + /* try in bank 1 for x2 */ + alloc_cnt = 0; + rc = npc_subbank_alloc(rvu, sb, key_type, + sb->b1b, sb->b1t, 0, + contig, count - tot, + mcam_idx + tot, + count - tot, max_alloc, + &alloc_cnt); + + tot += alloc_cnt; + + dev_dbg(rvu->dev, + "%s: Allocated %d from subbank %d, tot=%d count=%d\n", + __func__, alloc_cnt, sb->idx, tot, count); + + if (!rc && count == tot) + return 0; + } + + /* non contiguous allocation fails. We need to do clean up */ + if (max_alloc) + npc_idx_free(rvu, mcam_idx, tot, false); + + dev_dbg(rvu->dev, "%s: non-contig allocation fails\n", + __func__); + + return -EFAULT; +} + +int npc_cn20k_idx_free(struct rvu *rvu, u16 *mcam_idx, int count) +{ + return npc_idx_free(rvu, mcam_idx, count, true); +} + +int npc_cn20k_ref_idx_alloc(struct rvu *rvu, int pcifunc, int key_type, + int prio, u16 *mcam_idx, int ref, int limit, + bool contig, int count) +{ + int i, eidx, rc, bd; + bool ref_valid; + + bd = npc_priv.bank_depth; + + /* Special case: ref == 0 && limit= 0 && prio == HIGH && count == 1 + * Here user wants to allocate 0th entry + */ + if (!ref && !limit && prio == NPC_MCAM_HIGHER_PRIO && + count == 1) { + rc = npc_subbank_ref_alloc(rvu, key_type, ref, limit, + prio, contig, count, mcam_idx); + + if (rc) + return rc; + goto add2map; + } + + ref_valid = !!(limit || ref); + if (!ref_valid) { + if (contig && count > npc_priv.subbank_depth) + goto try_noref_multi_subbank; + + rc = npc_subbank_noref_alloc(rvu, key_type, contig, + count, mcam_idx); + if (!rc) + goto add2map; + +try_noref_multi_subbank: + eidx = (key_type == NPC_MCAM_KEY_X4) ? bd - 1 : 2 * bd - 1; + + if (prio == NPC_MCAM_HIGHER_PRIO) + rc = npc_multi_subbank_ref_alloc(rvu, key_type, + eidx, 0, + NPC_MCAM_HIGHER_PRIO, + contig, count, + mcam_idx); + else + rc = npc_multi_subbank_ref_alloc(rvu, key_type, + 0, eidx, + NPC_MCAM_LOWER_PRIO, + contig, count, + mcam_idx); + + if (!rc) + goto add2map; + + return rc; + } + + if ((prio == NPC_MCAM_LOWER_PRIO && ref > limit) || + (prio == NPC_MCAM_HIGHER_PRIO && ref < limit)) { + dev_err(rvu->dev, "%s: Wrong ref_enty(%d) or limit(%d)\n", + __func__, ref, limit); + return -EINVAL; + } + + if ((key_type == NPC_MCAM_KEY_X4 && (ref >= bd || limit >= bd)) || + (key_type == NPC_MCAM_KEY_X2 && + (ref >= 2 * bd || limit >= 2 * bd))) { + dev_err(rvu->dev, "%s: Wrong ref_enty(%d) or limit(%d)\n", + __func__, ref, limit); + return -EINVAL; + } + + if (contig && count > npc_priv.subbank_depth) + goto try_ref_multi_subbank; + + rc = npc_subbank_ref_alloc(rvu, key_type, ref, limit, + prio, contig, count, mcam_idx); + if (!rc) + goto add2map; + +try_ref_multi_subbank: + rc = npc_multi_subbank_ref_alloc(rvu, key_type, + ref, limit, prio, + contig, count, mcam_idx); + if (!rc) + goto add2map; + + return rc; + +add2map: + for (i = 0; i < count; i++) { + rc = npc_add_to_pf_maps(rvu, mcam_idx[i], pcifunc); + if (rc) { + for (int j = 0; j < i; j++) + npc_del_from_pf_maps(rvu, mcam_idx[j]); + + return rc; + } + } + + return 0; +} + +void npc_cn20k_subbank_calc_free(struct rvu *rvu, int *x2_free, + int *x4_free, int *sb_free) +{ + struct npc_subbank *sb; + int i; + + /* Reset all stats to zero */ + *x2_free = 0; + *x4_free = 0; + *sb_free = 0; + + for (i = 0; i < npc_priv.num_subbanks; i++) { + sb = &npc_priv.sb[i]; + mutex_lock(&sb->lock); + + /* Count number of free subbanks */ + if (sb->flags & NPC_SUBBANK_FLAG_FREE) { + (*sb_free)++; + goto next; + } + + /* Sumup x4 free count */ + if (sb->key_type == NPC_MCAM_KEY_X4) { + (*x4_free) += sb->free_cnt; + goto next; + } + + /* Sumup x2 free counts */ + (*x2_free) += sb->free_cnt; +next: + mutex_unlock(&sb->lock); + } +} + +int +rvu_mbox_handler_npc_cn20k_get_fcnt(struct rvu *rvu, + struct msg_req *req, + struct npc_cn20k_get_fcnt_rsp *rsp) +{ + npc_cn20k_subbank_calc_free(rvu, &rsp->free_x2, + &rsp->free_x4, &rsp->free_subbanks); + return 0; +} + +static int *subbank_srch_order; + +static void npc_populate_restricted_idxs(int num_subbanks) +{ + npc_subbank_restricted_idxs[0] = num_subbanks - 1; + npc_subbank_restricted_idxs[1] = 0; +} + +static int npc_create_srch_order(int cnt) +{ + int val = 0; + + subbank_srch_order = kcalloc(cnt, sizeof(int), + GFP_KERNEL); + if (!subbank_srch_order) + return -ENOMEM; + + /* cnt(subbank depth) is always a power of 2. There is a check in + * npc_priv_init() to check the same. + */ + for (int i = 0; i < cnt; i += 2) { + subbank_srch_order[i] = cnt / 2 - val - 1; + subbank_srch_order[i + 1] = cnt / 2 + 1 + val; + val++; + } + + subbank_srch_order[cnt - 1] = cnt / 2; + return 0; +} + +static void npc_subbank_init(struct rvu *rvu, struct npc_subbank *sb, int idx) +{ + mutex_init(&sb->lock); + + sb->b0b = idx * npc_priv.subbank_depth; + sb->b0t = sb->b0b + npc_priv.subbank_depth - 1; + + sb->b1b = npc_priv.bank_depth + idx * npc_priv.subbank_depth; + sb->b1t = sb->b1b + npc_priv.subbank_depth - 1; + + sb->flags = NPC_SUBBANK_FLAG_FREE; + sb->idx = idx; + sb->arr_idx = subbank_srch_order[idx]; + + dev_dbg(rvu->dev, "%s: sb->idx=%u sb->arr_idx=%u\n", + __func__, sb->idx, sb->arr_idx); + + /* Keep first and last subbank at end of free array; so that + * it will be used at last + */ + xa_store(&npc_priv.xa_sb_free, sb->arr_idx, + xa_mk_value(sb->idx), GFP_KERNEL); +} + +static int npc_pcifunc_map_create(struct rvu *rvu) +{ + int pf, vf, numvfs; + int cnt = 0; + u16 pcifunc; + u64 cfg; + + for (pf = 0; pf < rvu->hw->total_pfs; pf++) { + cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf)); + numvfs = (cfg >> 12) & 0xFF; + + /* Skip not enabled PFs */ + if (!(cfg & BIT_ULL(20))) + goto chk_vfs; + + /* If Admin function, check on VFs */ + if (cfg & BIT_ULL(21)) + goto chk_vfs; + + pcifunc = pf << 9; + + xa_store(&npc_priv.xa_pf_map, (unsigned long)pcifunc, + xa_mk_value(cnt), GFP_KERNEL); + + cnt++; + +chk_vfs: + for (vf = 0; vf < numvfs; vf++) { + pcifunc = (pf << 9) | (vf + 1); + + xa_store(&npc_priv.xa_pf_map, (unsigned long)pcifunc, + xa_mk_value(cnt), GFP_KERNEL); + cnt++; + } + } + + return cnt; +} + +static int npc_priv_init(struct rvu *rvu) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + int blkaddr, num_banks, bank_depth; + int num_subbanks, subbank_depth; + u64 npc_const1, npc_const2 = 0; + struct npc_subbank *sb; + u64 cfg; + int i; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + if (blkaddr < 0) { + dev_err(rvu->dev, "%s: NPC block not implemented\n", + __func__); + return -ENODEV; + } + + npc_const1 = rvu_read64(rvu, blkaddr, NPC_AF_CONST1); + if (npc_const1 & BIT_ULL(63)) + npc_const2 = rvu_read64(rvu, blkaddr, NPC_AF_CONST2); + + num_banks = mcam->banks; + bank_depth = mcam->banksize; + + num_subbanks = FIELD_GET(GENMASK_ULL(39, 32), npc_const2); + if (!num_subbanks) { + dev_err(rvu->dev, "Number of subbanks is zero\n"); + return -EFAULT; + } + + if (num_subbanks & (num_subbanks - 1)) { + dev_err(rvu->dev, + "subbanks cnt(%u) should be a power of 2\n", + num_subbanks); + return -EINVAL; + } + + npc_priv.num_subbanks = num_subbanks; + + subbank_depth = bank_depth / num_subbanks; + + npc_priv.bank_depth = bank_depth; + npc_priv.subbank_depth = subbank_depth; + + /* Get kex configured key size */ + cfg = rvu_read64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(0)); + npc_priv.kw = FIELD_GET(GENMASK_ULL(34, 32), cfg); + + dev_info(rvu->dev, + "banks=%u depth=%u, subbanks=%u depth=%u, key type=%s\n", + num_banks, bank_depth, num_subbanks, subbank_depth, + npc_kw_name[npc_priv.kw]); + + npc_priv.sb = kcalloc(num_subbanks, sizeof(struct npc_subbank), + GFP_KERNEL); + if (!npc_priv.sb) + return -ENOMEM; + + xa_init_flags(&npc_priv.xa_sb_used, XA_FLAGS_ALLOC); + xa_init_flags(&npc_priv.xa_sb_free, XA_FLAGS_ALLOC); + xa_init_flags(&npc_priv.xa_idx2pf_map, XA_FLAGS_ALLOC); + xa_init_flags(&npc_priv.xa_pf_map, XA_FLAGS_ALLOC); + + if (npc_create_srch_order(num_subbanks)) + goto fail1; + + npc_populate_restricted_idxs(num_subbanks); + + /* Initialize subbanks */ + for (i = 0, sb = npc_priv.sb; i < num_subbanks; i++, sb++) + npc_subbank_init(rvu, sb, i); + + /* Get number of pcifuncs in the system */ + npc_priv.pf_cnt = npc_pcifunc_map_create(rvu); + npc_priv.xa_pf2idx_map = kcalloc(npc_priv.pf_cnt, + sizeof(struct xarray), + GFP_KERNEL); + if (!npc_priv.xa_pf2idx_map) + goto fail2; + + for (i = 0; i < npc_priv.pf_cnt; i++) + xa_init_flags(&npc_priv.xa_pf2idx_map[i], XA_FLAGS_ALLOC); + + return 0; + +fail2: + kfree(subbank_srch_order); + subbank_srch_order = NULL; + +fail1: + xa_destroy(&npc_priv.xa_sb_used); + xa_destroy(&npc_priv.xa_sb_free); + xa_destroy(&npc_priv.xa_idx2pf_map); + xa_destroy(&npc_priv.xa_pf_map); + kfree(npc_priv.sb); + npc_priv.sb = NULL; + return -ENOMEM; +} + +void npc_cn20k_deinit(struct rvu *rvu) +{ + int i; + + xa_destroy(&npc_priv.xa_sb_used); + xa_destroy(&npc_priv.xa_sb_free); + xa_destroy(&npc_priv.xa_idx2pf_map); + xa_destroy(&npc_priv.xa_pf_map); + + for (i = 0; i < npc_priv.pf_cnt; i++) + xa_destroy(&npc_priv.xa_pf2idx_map[i]); + + kfree(npc_priv.xa_pf2idx_map); + /* No need to destroy mutex lock as it is + * part of subbank structure + */ + kfree(npc_priv.sb); + kfree(subbank_srch_order); +} + +int npc_cn20k_init(struct rvu *rvu) +{ + int err; + + err = npc_priv_init(rvu); + if (err) { + dev_err(rvu->dev, "%s: Error to init\n", + __func__); + return err; + } + + npc_priv.init_done = true; + + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h new file mode 100644 index 000000000000..d54253fd1e68 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell RVU Admin Function driver + * + * Copyright (C) 2026 Marvell. + * + */ + +#ifndef NPC_CN20K_H +#define NPC_CN20K_H + +#define MAX_NUM_BANKS 2 +#define MAX_NUM_SUB_BANKS 32 +#define MAX_SUBBANK_DEPTH 256 + +/** + * enum npc_subbank_flag - NPC subbank status + * + * subbank flag indicates whether the subbank is free + * or used. + * + * @NPC_SUBBANK_FLAG_UNINIT: Subbank is not initialized. + * @NPC_SUBBANK_FLAG_FREE: Subbank is free. + * @NPC_SUBBANK_FLAG_USED: Subbank is used. + */ +enum npc_subbank_flag { + NPC_SUBBANK_FLAG_UNINIT, + NPC_SUBBANK_FLAG_FREE = BIT(0), + NPC_SUBBANK_FLAG_USED = BIT(1), +}; + +/** + * struct npc_subbank - Subbank fields. + * @b0b: Subbanks bottom index for bank0 + * @b1b: Subbanks bottom index for bank1 + * @b0t: Subbanks top index for bank0 + * @b1t: Subbanks top index for bank1 + * @flags: Subbank flags + * @lock: Mutex lock for flags and rsrc mofiication + * @b0map: Bitmap map for bank0 indexes + * @b1map: Bitmap map for bank1 indexes + * @idx: Subbank index + * @arr_idx: Index to the free array or used array + * @free_cnt: Number of free slots in the subbank. + * @key_type: X4 or X2 subbank. + * + * MCAM resource is divided horizontally into multiple subbanks and + * Resource allocation from each subbank is managed by this data + * structure. + */ +struct npc_subbank { + u16 b0t, b0b, b1t, b1b; + enum npc_subbank_flag flags; + struct mutex lock; /* Protect subbank resources */ + DECLARE_BITMAP(b0map, MAX_SUBBANK_DEPTH); + DECLARE_BITMAP(b1map, MAX_SUBBANK_DEPTH); + u16 idx; + u16 arr_idx; + u16 free_cnt; + u8 key_type; +}; + +/** + * struct npc_priv_t - NPC private structure. + * @bank_depth: Total entries in each bank. + * @num_banks: Number of banks. + * @num_subbanks: Number of subbanks. + * @subbank_depth: Depth of subbank. + * @kw: Kex configured key type. + * @sb: Subbank array. + * @xa_sb_used: Array of used subbanks. + * @xa_sb_free: Array of free subbanks. + * @xa_pf2idx_map: PF to mcam index map. + * @xa_idx2pf_map: Mcam index to PF map. + * @xa_pf_map: Pcifunc to index map. + * @pf_cnt: Number of PFs. + * @init_done: Indicates MCAM initialization is done. + * + * This structure is populated during probing time by reading + * HW csr registers. + */ +struct npc_priv_t { + int bank_depth; + const int num_banks; + int num_subbanks; + int subbank_depth; + u8 kw; + struct npc_subbank *sb; + struct xarray xa_sb_used; + struct xarray xa_sb_free; + struct xarray *xa_pf2idx_map; + struct xarray xa_idx2pf_map; + struct xarray xa_pf_map; + int pf_cnt; + bool init_done; +}; + +struct rvu; + +struct npc_priv_t *npc_priv_get(void); +int npc_cn20k_init(struct rvu *rvu); +void npc_cn20k_deinit(struct rvu *rvu); + +void npc_cn20k_subbank_calc_free(struct rvu *rvu, int *x2_free, + int *x4_free, int *sb_free); + +int npc_cn20k_ref_idx_alloc(struct rvu *rvu, int pcifunc, int key_type, + int prio, u16 *mcam_idx, int ref, int limit, + bool contig, int count); +int npc_cn20k_idx_free(struct rvu *rvu, u16 *mcam_idx, int count); +#endif /* NPC_CN20K_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/reg.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/reg.h index affb39803120..098b0247848b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/reg.h @@ -77,5 +77,8 @@ #define RVU_MBOX_VF_INT_ENA_W1S (0x30) #define RVU_MBOX_VF_INT_ENA_W1C (0x38) +/* NPC registers */ +#define NPC_AF_MCAM_SECTIONX_CFG_EXT(a) (0xf000000ull | (a) << 3) + #define RVU_MBOX_VF_VFAF_TRIGX(a) (0x2000 | (a) << 3) #endif /* RVU_MBOX_REG_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h index 8a08bebf08c2..779413a383b7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h @@ -177,10 +177,6 @@ enum nix_scheduler { #define NIX_TX_ACTIONOP_MCAST (0x3ull) #define NIX_TX_ACTIONOP_DROP_VIOL (0x5ull) -#define NPC_MCAM_KEY_X1 0 -#define NPC_MCAM_KEY_X2 1 -#define NPC_MCAM_KEY_X4 2 - #define NIX_INTFX_RX(a) (0x0ull | (a) << 1) #define NIX_INTFX_TX(a) (0x1ull | (a) << 1) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index a3e273126e4e..1c4b36832788 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -52,6 +52,14 @@ #define MBOX_DIR_PFVF_UP 6 /* PF sends messages to VF */ #define MBOX_DIR_VFPF_UP 7 /* VF replies to PF */ +enum { + NPC_MCAM_KEY_X1 = 0, + NPC_MCAM_KEY_DYN = NPC_MCAM_KEY_X1, + NPC_MCAM_KEY_X2, + NPC_MCAM_KEY_X4, + NPC_MCAM_KEY_MAX, +}; + enum { TYPE_AFVF, TYPE_AFPF, @@ -275,6 +283,8 @@ M(NPC_GET_FIELD_HASH_INFO, 0x6013, npc_get_field_hash_info, M(NPC_GET_FIELD_STATUS, 0x6014, npc_get_field_status, \ npc_get_field_status_req, \ npc_get_field_status_rsp) \ +M(NPC_CN20K_MCAM_GET_FREE_COUNT, 0x6015, npc_cn20k_get_fcnt, \ + msg_req, npc_cn20k_get_fcnt_rsp) \ /* NIX mbox IDs (range 0x8000 - 0xFFFF) */ \ M(NIX_LF_ALLOC, 0x8000, nix_lf_alloc, \ nix_lf_alloc_req, nix_lf_alloc_rsp) \ @@ -1797,6 +1807,14 @@ struct npc_mcam_read_entry_rsp { u8 enable; }; +/* Available entries to use */ +struct npc_cn20k_get_fcnt_rsp { + struct mbox_msghdr hdr; + int free_x2; + int free_x4; + int free_subbanks; +}; + struct npc_mcam_read_base_rule_rsp { struct mbox_msghdr hdr; struct mcam_entry entry; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 15d3cb0b9da6..425d3a43c0b8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -3745,6 +3745,9 @@ static void rvu_dbg_npc_init(struct rvu *rvu) debugfs_create_file("rx_miss_act_stats", 0444, rvu->rvu_dbg.npc, rvu, &rvu_dbg_npc_rx_miss_act_fops); + if (is_cn20k(rvu->pdev)) + npc_cn20k_debugfs_init(rvu); + if (!rvu->hw->cap.npc_exact_match_enabled) return; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 8658cb2143df..638d6d1f2071 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -16,6 +16,7 @@ #include "cgx.h" #include "npc_profile.h" #include "rvu_npc_hash.h" +#include "cn20k/npc.h" #define RSVD_MCAM_ENTRIES_PER_PF 3 /* Broadcast, Promisc and AllMulticast */ #define RSVD_MCAM_ENTRIES_PER_NIXLF 1 /* Ucast for LFs */ @@ -2162,6 +2163,9 @@ int rvu_npc_init(struct rvu *rvu) npc_load_mkex_profile(rvu, blkaddr, def_pfl_name); } + if (is_cn20k(rvu->pdev)) + return npc_cn20k_init(rvu); + return 0; } @@ -2177,6 +2181,9 @@ void rvu_npc_freemem(struct rvu *rvu) else kfree(rvu->kpu_fwdata); mutex_destroy(&mcam->lock); + + if (is_cn20k(rvu->pdev)) + npc_cn20k_deinit(rvu); } void rvu_npc_get_mcam_entry_alloc_info(struct rvu *rvu, u16 pcifunc, @@ -3032,7 +3039,6 @@ static int __npc_mcam_alloc_counter(struct rvu *rvu, if (!req->contig && req->count > NPC_MAX_NONCONTIG_COUNTERS) return NPC_MCAM_INVALID_REQ; - /* Check if unused counters are available or not */ if (!rvu_rsrc_free_count(&mcam->counters)) { return NPC_MCAM_ALLOC_FAILED; From 5868682b68dd4ff3b8ef7fcbf3db4e2de5ae8cb2 Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Tue, 24 Feb 2026 13:29:58 +0530 Subject: [PATCH 0104/1561] octeontx2-af: npc: cn20k: KPM profile changes KPU (Kangaroo Processing Unit) profiles are primarily used to set the required packet pointers that will be used in later stages for key generation. In the new CN20K silicon variant, a new KPM profile is introduced alongside the existing KPU profiles. In CN20K, a total of 16 KPUs are grouped into 8 KPM profiles. As per the current hardware design, each KPM configuration contains a combination of 2 KPUs: KPM0 = KPU0 + KPU8 KPM1 = KPU1 + KPU9 ... KPM7 = KPU7 + KPU15 This configuration enables more efficient use of KPU resources. This patch adds support for the new KPM profile configuration. Signed-off-by: Suman Ghosh Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260224080009.4147301-3-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- .../ethernet/marvell/octeontx2/af/cn20k/npc.c | 229 ++++++++++++++++++ .../ethernet/marvell/octeontx2/af/cn20k/npc.h | 37 +++ .../ethernet/marvell/octeontx2/af/cn20k/reg.h | 19 +- .../net/ethernet/marvell/octeontx2/af/rvu.h | 4 +- .../ethernet/marvell/octeontx2/af/rvu_npc.c | 52 ++-- .../ethernet/marvell/octeontx2/af/rvu_npc.h | 17 ++ 6 files changed, 340 insertions(+), 18 deletions(-) create mode 100644 drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.h diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c index 4ee946380212..f65db0698c33 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c @@ -9,6 +9,7 @@ #include "cn20k/npc.h" #include "cn20k/reg.h" +#include "rvu_npc.h" static struct npc_priv_t npc_priv = { .num_banks = MAX_NUM_BANKS, @@ -20,6 +21,234 @@ static const char *npc_kw_name[NPC_MCAM_KEY_MAX] = { [NPC_MCAM_KEY_X4] = "X4", }; +static void npc_config_kpmcam(struct rvu *rvu, int blkaddr, + const struct npc_kpu_profile_cam *kpucam, + int kpm, int entry) +{ + struct npc_kpu_cam cam0 = {0}; + struct npc_kpu_cam cam1 = {0}; + + cam1.state = kpucam->state & kpucam->state_mask; + cam1.dp0_data = kpucam->dp0 & kpucam->dp0_mask; + cam1.dp1_data = kpucam->dp1 & kpucam->dp1_mask; + cam1.dp2_data = kpucam->dp2 & kpucam->dp2_mask; + + cam0.state = ~kpucam->state & kpucam->state_mask; + cam0.dp0_data = ~kpucam->dp0 & kpucam->dp0_mask; + cam0.dp1_data = ~kpucam->dp1 & kpucam->dp1_mask; + cam0.dp2_data = ~kpucam->dp2 & kpucam->dp2_mask; + + rvu_write64(rvu, blkaddr, + NPC_AF_KPMX_ENTRYX_CAMX(kpm, entry, 0), *(u64 *)&cam0); + rvu_write64(rvu, blkaddr, + NPC_AF_KPMX_ENTRYX_CAMX(kpm, entry, 1), *(u64 *)&cam1); +} + +static void +npc_config_kpmaction(struct rvu *rvu, int blkaddr, + const struct npc_kpu_profile_action *kpuaction, + int kpm, int entry, bool pkind) +{ + struct npc_kpm_action0 action0 = {0}; + struct npc_kpu_action1 action1 = {0}; + u64 reg; + + action1.errlev = kpuaction->errlev; + action1.errcode = kpuaction->errcode; + action1.dp0_offset = kpuaction->dp0_offset; + action1.dp1_offset = kpuaction->dp1_offset; + action1.dp2_offset = kpuaction->dp2_offset; + + if (pkind) + reg = NPC_AF_PKINDX_ACTION1(entry); + else + reg = NPC_AF_KPMX_ENTRYX_ACTION1(kpm, entry); + + rvu_write64(rvu, blkaddr, reg, *(u64 *)&action1); + + action0.byp_count = kpuaction->bypass_count & 0x7; + action0.capture_ena = kpuaction->cap_ena & 1; + action0.parse_done = kpuaction->parse_done & 1; + action0.next_state = kpuaction->next_state & 0xf; + action0.capture_lid = kpuaction->lid & 0x7; + + /* Parser functionality will work correctly even though + * upper flag bits are silently discarded + */ + action0.capture_ltype = kpuaction->ltype & 0xf; + action0.capture_flags = kpuaction->flags & 0xf; + action0.ptr_advance = kpuaction->ptr_advance; + + action0.var_len_offset = kpuaction->offset; + action0.var_len_mask = kpuaction->mask; + action0.var_len_right = kpuaction->right & 1; + action0.var_len_shift = kpuaction->shift & 1; + + if (pkind) + reg = NPC_AF_PKINDX_ACTION0(entry); + else + reg = NPC_AF_KPMX_ENTRYX_ACTION0(kpm, entry); + + rvu_write64(rvu, blkaddr, reg, *(u64 *)&action0); +} + +static void +npc_program_single_kpm_profile(struct rvu *rvu, int blkaddr, + int kpm, int start_entry, + const struct npc_kpu_profile *profile) +{ + int entry, num_entries, max_entries; + u64 idx; + + if (profile->cam_entries != profile->action_entries) { + dev_err(rvu->dev, + "kpm%d: CAM and action entries [%d != %d] not equal\n", + kpm, profile->cam_entries, profile->action_entries); + + WARN(1, "Fatal error\n"); + return; + } + + max_entries = rvu->hw->npc_kpu_entries / 2; + entry = start_entry; + /* Program CAM match entries for previous kpm extracted data */ + num_entries = min_t(int, profile->cam_entries, max_entries); + for (idx = 0; entry < num_entries + start_entry; entry++, idx++) + npc_config_kpmcam(rvu, blkaddr, &profile->cam[idx], + kpm, entry); + + entry = start_entry; + /* Program this kpm's actions */ + num_entries = min_t(int, profile->action_entries, max_entries); + for (idx = 0; entry < num_entries + start_entry; entry++, idx++) + npc_config_kpmaction(rvu, blkaddr, &profile->action[idx], + kpm, entry, false); +} + +static void +npc_enable_kpm_entry(struct rvu *rvu, int blkaddr, int kpm, int num_entries) +{ + u64 entry_mask; + + entry_mask = npc_enable_mask(num_entries); + /* Disable first KPU_MAX_CST_ENT entries for built-in profile */ + if (!rvu->kpu.custom) + entry_mask |= GENMASK_ULL(KPU_MAX_CST_ENT - 1, 0); + rvu_write64(rvu, blkaddr, + NPC_AF_KPMX_ENTRY_DISX(kpm, 0), entry_mask); + if (num_entries <= 64) { + /* Disable all the entries in W1, W2 and W3 */ + rvu_write64(rvu, blkaddr, + NPC_AF_KPMX_ENTRY_DISX(kpm, 1), + npc_enable_mask(0)); + rvu_write64(rvu, blkaddr, + NPC_AF_KPMX_ENTRY_DISX(kpm, 2), + npc_enable_mask(0)); + rvu_write64(rvu, blkaddr, + NPC_AF_KPMX_ENTRY_DISX(kpm, 3), + npc_enable_mask(0)); + return; + } + + num_entries = num_entries - 64; + entry_mask = npc_enable_mask(num_entries); + rvu_write64(rvu, blkaddr, + NPC_AF_KPMX_ENTRY_DISX(kpm, 1), entry_mask); + if (num_entries <= 64) { + /* Disable all the entries in W2 and W3 */ + rvu_write64(rvu, blkaddr, + NPC_AF_KPMX_ENTRY_DISX(kpm, 2), + npc_enable_mask(0)); + rvu_write64(rvu, blkaddr, + NPC_AF_KPMX_ENTRY_DISX(kpm, 3), + npc_enable_mask(0)); + return; + } + + num_entries = num_entries - 64; + entry_mask = npc_enable_mask(num_entries); + rvu_write64(rvu, blkaddr, + NPC_AF_KPMX_ENTRY_DISX(kpm, 2), entry_mask); + if (num_entries <= 64) { + /* Disable all the entries in W3 */ + rvu_write64(rvu, blkaddr, + NPC_AF_KPMX_ENTRY_DISX(kpm, 3), + npc_enable_mask(0)); + return; + } + + num_entries = num_entries - 64; + entry_mask = npc_enable_mask(num_entries); + rvu_write64(rvu, blkaddr, + NPC_AF_KPMX_ENTRY_DISX(kpm, 3), entry_mask); +} + +#define KPU_OFFSET 8 +static void npc_program_kpm_profile(struct rvu *rvu, int blkaddr, int num_kpms) +{ + const struct npc_kpu_profile *profile1, *profile2; + int idx, total_cam_entries; + + for (idx = 0; idx < num_kpms; idx++) { + profile1 = &rvu->kpu.kpu[idx]; + npc_program_single_kpm_profile(rvu, blkaddr, idx, 0, profile1); + profile2 = &rvu->kpu.kpu[idx + KPU_OFFSET]; + npc_program_single_kpm_profile(rvu, blkaddr, idx, + profile1->cam_entries, + profile2); + total_cam_entries = profile1->cam_entries + + profile2->cam_entries; + npc_enable_kpm_entry(rvu, blkaddr, idx, total_cam_entries); + rvu_write64(rvu, blkaddr, NPC_AF_KPMX_PASS2_OFFSET(idx), + profile1->cam_entries); + /* Enable the KPUs associated with this KPM */ + rvu_write64(rvu, blkaddr, NPC_AF_KPUX_CFG(idx), 0x01); + rvu_write64(rvu, blkaddr, NPC_AF_KPUX_CFG(idx + KPU_OFFSET), + 0x01); + } +} + +void npc_cn20k_parser_profile_init(struct rvu *rvu, int blkaddr) +{ + struct rvu_hwinfo *hw = rvu->hw; + int num_pkinds, idx; + + /* Disable all KPMs and their entries */ + for (idx = 0; idx < hw->npc_kpms; idx++) { + rvu_write64(rvu, blkaddr, + NPC_AF_KPMX_ENTRY_DISX(idx, 0), ~0ULL); + rvu_write64(rvu, blkaddr, + NPC_AF_KPMX_ENTRY_DISX(idx, 1), ~0ULL); + rvu_write64(rvu, blkaddr, + NPC_AF_KPMX_ENTRY_DISX(idx, 2), ~0ULL); + rvu_write64(rvu, blkaddr, + NPC_AF_KPMX_ENTRY_DISX(idx, 3), ~0ULL); + } + + for (idx = 0; idx < hw->npc_kpus; idx++) + rvu_write64(rvu, blkaddr, NPC_AF_KPUX_CFG(idx), 0x00); + + /* Load and customize KPU profile. */ + npc_load_kpu_profile(rvu); + + /* Configure KPU and KPM mapping for second pass */ + rvu_write64(rvu, blkaddr, NPC_AF_KPM_PASS2_CFG, 0x76543210); + + /* First program IKPU profile i.e PKIND configs. + * Check HW max count to avoid configuring junk or + * writing to unsupported CSR addresses. + */ + num_pkinds = rvu->kpu.pkinds; + num_pkinds = min_t(int, hw->npc_pkinds, num_pkinds); + + for (idx = 0; idx < num_pkinds; idx++) + npc_config_kpmaction(rvu, blkaddr, &rvu->kpu.ikpu[idx], + 0, idx, true); + + /* Program KPM CAM and Action profiles */ + npc_program_kpm_profile(rvu, blkaddr, hw->npc_kpms); +} + struct npc_priv_t *npc_priv_get(void) { return &npc_priv; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h index d54253fd1e68..fb4d5f463461 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h @@ -94,6 +94,42 @@ struct npc_priv_t { bool init_done; }; +struct npc_kpm_action0 { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 rsvd_63_57 : 7; + u64 byp_count : 3; + u64 capture_ena : 1; + u64 parse_done : 1; + u64 next_state : 8; + u64 rsvd_43 : 1; + u64 capture_lid : 3; + u64 capture_ltype : 4; + u64 rsvd_32_35 : 4; + u64 capture_flags : 4; + u64 ptr_advance : 8; + u64 var_len_offset : 8; + u64 var_len_mask : 8; + u64 var_len_right : 1; + u64 var_len_shift : 3; +#else + u64 var_len_shift : 3; + u64 var_len_right : 1; + u64 var_len_mask : 8; + u64 var_len_offset : 8; + u64 ptr_advance : 8; + u64 capture_flags : 4; + u64 rsvd_32_35 : 4; + u64 capture_ltype : 4; + u64 capture_lid : 3; + u64 rsvd_43 : 1; + u64 next_state : 8; + u64 parse_done : 1; + u64 capture_ena : 1; + u64 byp_count : 3; + u64 rsvd_63_57 : 7; +#endif +}; + struct rvu; struct npc_priv_t *npc_priv_get(void); @@ -107,4 +143,5 @@ int npc_cn20k_ref_idx_alloc(struct rvu *rvu, int pcifunc, int key_type, int prio, u16 *mcam_idx, int ref, int limit, bool contig, int count); int npc_cn20k_idx_free(struct rvu *rvu, u16 *mcam_idx, int count); +void npc_cn20k_parser_profile_init(struct rvu *rvu, int blkaddr); #endif /* NPC_CN20K_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/reg.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/reg.h index 098b0247848b..073d4b815681 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/reg.h @@ -77,8 +77,21 @@ #define RVU_MBOX_VF_INT_ENA_W1S (0x30) #define RVU_MBOX_VF_INT_ENA_W1C (0x38) -/* NPC registers */ -#define NPC_AF_MCAM_SECTIONX_CFG_EXT(a) (0xf000000ull | (a) << 3) - #define RVU_MBOX_VF_VFAF_TRIGX(a) (0x2000 | (a) << 3) +/* NPC registers */ +#define NPC_AF_INTFX_EXTRACTORX_CFG(a, b) \ + (0x908000ull | (a) << 10 | (b) << 3) +#define NPC_AF_INTFX_EXTRACTORX_LTX_CFG(a, b, c) \ + (0x900000ull | (a) << 13 | (b) << 8 | (c) << 3) +#define NPC_AF_KPMX_ENTRYX_CAMX(a, b, c) \ + (0x100000ull | (a) << 14 | (b) << 6 | (c) << 3) +#define NPC_AF_KPMX_ENTRYX_ACTION0(a, b) \ + (0x100020ull | (a) << 14 | (b) << 6) +#define NPC_AF_KPMX_ENTRYX_ACTION1(a, b) \ + (0x100028ull | (a) << 14 | (b) << 6) +#define NPC_AF_KPMX_ENTRY_DISX(a, b) (0x180000ull | (a) << 6 | (b) << 3) +#define NPC_AF_KPM_PASS2_CFG 0x580 +#define NPC_AF_KPMX_PASS2_OFFSET(a) (0x190000ull | (a) << 3) +#define NPC_AF_MCAM_SECTIONX_CFG_EXT(a) (0xC000000ull | (a) << 3) + #endif /* RVU_MBOX_REG_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index e85dac2c806d..14ca28ab493a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -447,9 +447,11 @@ struct rvu_hwinfo { u8 sdp_links; u8 cpt_links; /* Number of CPT links */ u8 npc_kpus; /* No of parser units */ + u8 npc_kpms; /* Number of enhanced parser units */ + u8 npc_kex_extr; /* Number of LDATA extractors per KEX */ u8 npc_pkinds; /* No of port kinds */ u8 npc_intfs; /* No of interfaces */ - u8 npc_kpu_entries; /* No of KPU entries */ + u16 npc_kpu_entries; /* No of KPU entries */ u16 npc_counters; /* No of match stats counters */ u32 lbk_bufsize; /* FIFO size supported by LBK */ bool npc_ext_set; /* Extended register set */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 638d6d1f2071..a113cab1e1a5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -17,6 +17,7 @@ #include "npc_profile.h" #include "rvu_npc_hash.h" #include "cn20k/npc.h" +#include "rvu_npc.h" #define RSVD_MCAM_ENTRIES_PER_PF 3 /* Broadcast, Promisc and AllMulticast */ #define RSVD_MCAM_ENTRIES_PER_NIXLF 1 /* Ucast for LFs */ @@ -1413,9 +1414,9 @@ program_mkex: iounmap(mkex_prfl_addr); } -static void npc_config_kpuaction(struct rvu *rvu, int blkaddr, - const struct npc_kpu_profile_action *kpuaction, - int kpu, int entry, bool pkind) +void npc_config_kpuaction(struct rvu *rvu, int blkaddr, + const struct npc_kpu_profile_action *kpuaction, + int kpu, int entry, bool pkind) { struct npc_kpu_action0 action0 = {0}; struct npc_kpu_action1 action1 = {0}; @@ -1478,7 +1479,7 @@ static void npc_config_kpucam(struct rvu *rvu, int blkaddr, NPC_AF_KPUX_ENTRYX_CAMX(kpu, entry, 1), *(u64 *)&cam1); } -static inline u64 enable_mask(int count) +u64 npc_enable_mask(int count) { return (((count) < 64) ? ~(BIT_ULL(count) - 1) : (0x00ULL)); } @@ -1511,7 +1512,7 @@ static void npc_program_kpu_profile(struct rvu *rvu, int blkaddr, int kpu, /* Enable all programmed entries */ num_entries = min_t(int, profile->action_entries, profile->cam_entries); - entry_mask = enable_mask(num_entries); + entry_mask = npc_enable_mask(num_entries); /* Disable first KPU_MAX_CST_ENT entries for built-in profile */ if (!rvu->kpu.custom) entry_mask |= GENMASK_ULL(KPU_MAX_CST_ENT - 1, 0); @@ -1520,7 +1521,7 @@ static void npc_program_kpu_profile(struct rvu *rvu, int blkaddr, int kpu, if (num_entries > 64) { rvu_write64(rvu, blkaddr, NPC_AF_KPUX_ENTRY_DISX(kpu, 1), - enable_mask(num_entries - 64)); + npc_enable_mask(num_entries - 64)); } /* Enable this KPU */ @@ -1708,7 +1709,7 @@ done: return ret; } -static void npc_load_kpu_profile(struct rvu *rvu) +void npc_load_kpu_profile(struct rvu *rvu) { struct npc_kpu_profile_adapter *profile = &rvu->kpu; const char *kpu_profile = rvu->kpu_pfl_name; @@ -1850,12 +1851,19 @@ int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr) mcam->keysize = cfg; /* Number of banks combined per MCAM entry */ - if (cfg == NPC_MCAM_KEY_X4) - mcam->banks_per_entry = 4; - else if (cfg == NPC_MCAM_KEY_X2) - mcam->banks_per_entry = 2; - else - mcam->banks_per_entry = 1; + if (is_cn20k(rvu->pdev)) { + if (cfg == NPC_MCAM_KEY_X2) + mcam->banks_per_entry = 1; + else + mcam->banks_per_entry = 2; + } else { + if (cfg == NPC_MCAM_KEY_X4) + mcam->banks_per_entry = 4; + else if (cfg == NPC_MCAM_KEY_X2) + mcam->banks_per_entry = 2; + else + mcam->banks_per_entry = 1; + } /* Reserve one MCAM entry for each of the NIX LF to * guarantee space to install default matching DMAC rule. @@ -1985,6 +1993,19 @@ static void rvu_npc_hw_init(struct rvu *rvu, int blkaddr) hw->npc_pkinds = (npc_const1 >> 12) & 0xFFULL; hw->npc_kpu_entries = npc_const1 & 0xFFFULL; hw->npc_kpus = (npc_const >> 8) & 0x1FULL; + /* For Cn20k silicon, check if enhanced parser + * is present, then set the NUM_KPMS = NUM_KPUS / 2 and + * number of LDATA extractors per KEX. + */ + if (is_cn20k(rvu->pdev)) { + if (!(npc_const1 & BIT_ULL(62))) { + WARN(1, "Enhanced parser is not supported\n"); + return; + } + hw->npc_kpms = hw->npc_kpus / 2; + hw->npc_kex_extr = (npc_const1 >> 36) & 0x3FULL; + } + hw->npc_intfs = npc_const & 0xFULL; hw->npc_counters = (npc_const >> 48) & 0xFFFFULL; @@ -2119,7 +2140,10 @@ int rvu_npc_init(struct rvu *rvu) return -ENOMEM; /* Configure KPU profile */ - npc_parser_profile_init(rvu, blkaddr); + if (is_cn20k(rvu->pdev)) + npc_cn20k_parser_profile_init(rvu, blkaddr); + else + npc_parser_profile_init(rvu, blkaddr); /* Config Outer L2, IPv4's NPC layer info */ rvu_write64(rvu, blkaddr, NPC_AF_PCK_DEF_OL2, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.h new file mode 100644 index 000000000000..80c63618ec47 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell RVU Admin Function driver + * + * Copyright (C) 2026 Marvell. + * + */ + +#ifndef RVU_NPC_H +#define RVU_NPC_H + +u64 npc_enable_mask(int count); +void npc_load_kpu_profile(struct rvu *rvu); +void npc_config_kpuaction(struct rvu *rvu, int blkaddr, + const struct npc_kpu_profile_action *kpuaction, + int kpu, int entry, bool pkind); + +#endif /* RVU_NPC_H */ From a2df2f95eac06352f33636c0ba189608a64646ef Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Tue, 24 Feb 2026 13:29:59 +0530 Subject: [PATCH 0105/1561] octeontx2-af: npc: cn20k: Add default profile Default mkex profile for cn20k silicon. This commit changes attribute of objects to may_be_unused to avoid compiler warning Signed-off-by: Suman Ghosh Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260224080009.4147301-4-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- .../ethernet/marvell/octeontx2/af/cn20k/npc.c | 176 +++++++++++++++++- .../ethernet/marvell/octeontx2/af/cn20k/npc.h | 18 ++ .../marvell/octeontx2/af/npc_profile.h | 72 +++---- 3 files changed, 229 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c index f65db0698c33..0dec50b1e84f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c @@ -7,9 +7,13 @@ #include #include +#include "rvu.h" +#include "npc.h" +#include "npc_profile.h" +#include "rvu_npc_hash.h" +#include "rvu_npc.h" #include "cn20k/npc.h" #include "cn20k/reg.h" -#include "rvu_npc.h" static struct npc_priv_t npc_priv = { .num_banks = MAX_NUM_BANKS, @@ -21,6 +25,176 @@ static const char *npc_kw_name[NPC_MCAM_KEY_MAX] = { [NPC_MCAM_KEY_X4] = "X4", }; +#define KEX_EXTR_CFG(bytesm1, hdr_ofs, ena, key_ofs) \ + (((bytesm1) << 16) | ((hdr_ofs) << 8) | ((ena) << 7) | \ + ((key_ofs) & 0x3F)) + +static struct npc_mcam_kex_extr npc_mkex_extr_default = { + .mkex_sign = MKEX_SIGN, + .name = "default", + .kpu_version = NPC_KPU_PROFILE_VER, + .keyx_cfg = { + /* nibble: LA..LE (ltype only) + Error code + Channel */ + [NIX_INTF_RX] = ((u64)NPC_MCAM_KEY_DYN << 32) | + NPC_PARSE_NIBBLE_INTF_RX | + NPC_PARSE_NIBBLE_ERRCODE, + + /* nibble: LA..LE (ltype only) */ + [NIX_INTF_TX] = ((u64)NPC_MCAM_KEY_X2 << 32) | + NPC_PARSE_NIBBLE_INTF_TX, + }, + .intf_extr_lid = { + /* Default RX MCAM KEX profile */ + [NIX_INTF_RX] = { NPC_LID_LA, NPC_LID_LA, NPC_LID_LB, NPC_LID_LB, + NPC_LID_LC, NPC_LID_LC, NPC_LID_LD }, + [NIX_INTF_TX] = { NPC_LID_LA, NPC_LID_LA, NPC_LID_LB, NPC_LID_LB, + NPC_LID_LC, NPC_LID_LD }, + }, + .intf_extr_lt = { + /* Default RX MCAM KEX profile */ + [NIX_INTF_RX] = { + [0] = { + /* Layer A: Ethernet: */ + [NPC_LT_LA_ETHER] = + /* DMAC: 6 bytes, KW1[63:15] */ + KEX_EXTR_CFG(0x05, 0x0, 0x1, + NPC_KEXOF_DMAC + 1), + [NPC_LT_LA_CPT_HDR] = + /* DMAC: 6 bytes, KW1[63:15] */ + KEX_EXTR_CFG(0x05, 0x0, 0x1, + NPC_KEXOF_DMAC + 1), + }, + [1] = { + /* Layer A: Ethernet: */ + [NPC_LT_LA_ETHER] = + /* Ethertype: 2 bytes, KW0[63:48] */ + KEX_EXTR_CFG(0x01, 0xc, 0x1, 0x6), + [NPC_LT_LA_CPT_HDR] = + /* Ethertype: 2 bytes, KW0[63:48] */ + KEX_EXTR_CFG(0x01, 0xc, 0x1, 0x6), + }, + [2] = { + /* Layer B: Single VLAN (CTAG) */ + [NPC_LT_LB_CTAG] = + /* CTAG VLAN: 2 bytes, KW1[15:0] */ + KEX_EXTR_CFG(0x01, 0x2, 0x1, 0x8), + /* Layer B: Stacked VLAN (STAG|QinQ) */ + [NPC_LT_LB_STAG_QINQ] = + /* Outer VLAN: 2 bytes, KW1[15:0] */ + KEX_EXTR_CFG(0x01, 0x2, 0x1, 0x8), + [NPC_LT_LB_FDSA] = + /* SWITCH PORT: 1 byte, KW1[7:0] */ + KEX_EXTR_CFG(0x0, 0x1, 0x1, 0x8), + }, + [3] = { + [NPC_LT_LB_CTAG] = + /* Ethertype: 2 bytes, KW0[63:48] */ + KEX_EXTR_CFG(0x01, 0x4, 0x1, 0x6), + [NPC_LT_LB_STAG_QINQ] = + /* Ethertype: 2 bytes, KW0[63:48] */ + KEX_EXTR_CFG(0x01, 0x8, 0x1, 0x6), + [NPC_LT_LB_FDSA] = + /* Ethertype: 2 bytes, KW0[63:48] */ + KEX_EXTR_CFG(0x01, 0x4, 0x1, 0x6), + }, + [4] = { + /* Layer C: IPv4 */ + [NPC_LT_LC_IP] = + /* SIP+DIP: 8 bytes, KW3[7:0], KW2[63:8] */ + KEX_EXTR_CFG(0x07, 0xc, 0x1, 0x11), + /* Layer C: IPv6 */ + [NPC_LT_LC_IP6] = + /* Everything up to SADDR: 8 bytes, KW3[7:0], + * KW2[63:8] + */ + KEX_EXTR_CFG(0x07, 0x0, 0x1, 0x11), + }, + [5] = { + [NPC_LT_LC_IP] = + /* TOS: 1 byte, KW2[7:0] */ + KEX_EXTR_CFG(0x0, 0x1, 0x1, 0x10), + }, + [6] = { + /* Layer D:UDP */ + [NPC_LT_LD_UDP] = + /* SPORT+DPORT: 4 bytes, KW3[39:8] */ + KEX_EXTR_CFG(0x3, 0x0, 0x1, 0x19), + /* Layer D:TCP */ + [NPC_LT_LD_TCP] = + /* SPORT+DPORT: 4 bytes, KW3[39:8] */ + KEX_EXTR_CFG(0x3, 0x0, 0x1, 0x19), + }, + }, + /* Default TX MCAM KEX profile */ + [NIX_INTF_TX] = { + [0] = { + /* Layer A: NIX_INST_HDR_S + Ethernet */ + /* NIX appends 8 bytes of NIX_INST_HDR_S at the + * start of each TX packet supplied to NPC. + */ + [NPC_LT_LA_IH_NIX_ETHER] = + /* PF_FUNC: 2B , KW0 [47:32] */ + KEX_EXTR_CFG(0x01, 0x0, 0x1, 0x4), + /* Layer A: HiGig2: */ + [NPC_LT_LA_IH_NIX_HIGIG2_ETHER] = + /* PF_FUNC: 2B , KW0 [47:32] */ + KEX_EXTR_CFG(0x01, 0x0, 0x1, 0x4), + }, + [1] = { + [NPC_LT_LA_IH_NIX_ETHER] = + /* SQ_ID 3 bytes, KW1[63:16] */ + KEX_EXTR_CFG(0x02, 0x02, 0x1, 0xa), + [NPC_LT_LA_IH_NIX_HIGIG2_ETHER] = + /* VID: 2 bytes, KW1[31:16] */ + KEX_EXTR_CFG(0x01, 0x10, 0x1, 0xa), + }, + [2] = { + /* Layer B: Single VLAN (CTAG) */ + [NPC_LT_LB_CTAG] = + /* CTAG VLAN[2..3] KW0[63:48] */ + KEX_EXTR_CFG(0x01, 0x2, 0x1, 0x6), + /* Layer B: Stacked VLAN (STAG|QinQ) */ + [NPC_LT_LB_STAG_QINQ] = + /* Outer VLAN: 2 bytes, KW0[63:48] */ + KEX_EXTR_CFG(0x01, 0x2, 0x1, 0x6), + }, + [3] = { + [NPC_LT_LB_CTAG] = + /* CTAG VLAN[2..3] KW1[15:0] */ + KEX_EXTR_CFG(0x01, 0x4, 0x1, 0x8), + [NPC_LT_LB_STAG_QINQ] = + /* Outer VLAN: 2 Bytes, KW1[15:0] */ + KEX_EXTR_CFG(0x01, 0x8, 0x1, 0x8), + }, + [4] = { + /* Layer C: IPv4 */ + [NPC_LT_LC_IP] = + /* SIP+DIP: 8 bytes, KW2[63:0] */ + KEX_EXTR_CFG(0x07, 0xc, 0x1, 0x10), + /* Layer C: IPv6 */ + [NPC_LT_LC_IP6] = + /* Everything up to SADDR: 8 bytes, KW2[63:0] */ + KEX_EXTR_CFG(0x07, 0x0, 0x1, 0x10), + }, + [5] = { + /* Layer D:UDP */ + [NPC_LT_LD_UDP] = + /* SPORT+DPORT: 4 bytes, KW3[31:0] */ + KEX_EXTR_CFG(0x3, 0x0, 0x1, 0x18), + /* Layer D:TCP */ + [NPC_LT_LD_TCP] = + /* SPORT+DPORT: 4 bytes, KW3[31:0] */ + KEX_EXTR_CFG(0x3, 0x0, 0x1, 0x18), + }, + }, + }, +}; + +struct npc_mcam_kex_extr *npc_mkex_extr_default_get(void) +{ + return &npc_mkex_extr_default; +} + static void npc_config_kpmcam(struct rvu *rvu, int blkaddr, const struct npc_kpu_profile_cam *kpucam, int kpm, int entry) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h index fb4d5f463461..7cfca5cfe5fa 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h @@ -130,6 +130,23 @@ struct npc_kpm_action0 { #endif }; +struct npc_mcam_kex_extr { + /* MKEX Profle Header */ + u64 mkex_sign; /* "mcam-kex-profile" (8 bytes/ASCII characters) */ + u8 name[MKEX_NAME_LEN]; /* MKEX Profile name */ + u64 cpu_model; /* Format as profiled by CPU hardware */ + u64 kpu_version; /* KPU firmware/profile version */ + u64 reserved; /* Reserved for extension */ + + /* MKEX Profle Data */ + u64 keyx_cfg[NPC_MAX_INTF]; /* NPC_AF_INTF(0..1)_KEX_CFG */ +#define NPC_MAX_EXTRACTOR 24 + /* MKEX Extractor data */ + u64 intf_extr_lid[NPC_MAX_INTF][NPC_MAX_EXTRACTOR]; + /* KEX configuration per extractor */ + u64 intf_extr_lt[NPC_MAX_INTF][NPC_MAX_EXTRACTOR][NPC_MAX_LT]; +} __packed; + struct rvu; struct npc_priv_t *npc_priv_get(void); @@ -144,4 +161,5 @@ int npc_cn20k_ref_idx_alloc(struct rvu *rvu, int pcifunc, int key_type, bool contig, int count); int npc_cn20k_idx_free(struct rvu *rvu, u16 *mcam_idx, int count); void npc_cn20k_parser_profile_init(struct rvu *rvu, int blkaddr); +struct npc_mcam_kex_extr *npc_mkex_extr_default_get(void); #endif /* NPC_CN20K_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h index 41de72c8607f..561b01fcdbde 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h @@ -489,7 +489,7 @@ enum NPC_ERRLEV_E { 0, 0, 0, 0, \ } -static struct npc_kpu_profile_action ikpu_action_entries[] = { +static struct npc_kpu_profile_action ikpu_action_entries[] __maybe_unused = { { NPC_ERRLEV_RE, NPC_EC_NOERR, 12, 16, 20, 0, 0, @@ -1068,7 +1068,7 @@ static struct npc_kpu_profile_action ikpu_action_entries[] = { }, }; -static struct npc_kpu_profile_cam kpu1_cam_entries[] = { +static struct npc_kpu_profile_cam kpu1_cam_entries[] __maybe_unused = { NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, @@ -1878,7 +1878,7 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = { }, }; -static struct npc_kpu_profile_cam kpu2_cam_entries[] = { +static struct npc_kpu_profile_cam kpu2_cam_entries[] __maybe_unused = { NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, @@ -2823,7 +2823,7 @@ static struct npc_kpu_profile_cam kpu2_cam_entries[] = { }, }; -static struct npc_kpu_profile_cam kpu3_cam_entries[] = { +static struct npc_kpu_profile_cam kpu3_cam_entries[] __maybe_unused = { NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, @@ -3804,7 +3804,7 @@ static struct npc_kpu_profile_cam kpu3_cam_entries[] = { }, }; -static struct npc_kpu_profile_cam kpu4_cam_entries[] = { +static struct npc_kpu_profile_cam kpu4_cam_entries[] __maybe_unused = { NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, @@ -4119,7 +4119,7 @@ static struct npc_kpu_profile_cam kpu4_cam_entries[] = { }, }; -static struct npc_kpu_profile_cam kpu5_cam_entries[] = { +static struct npc_kpu_profile_cam kpu5_cam_entries[] __maybe_unused = { NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, @@ -5172,7 +5172,7 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = { }, }; -static struct npc_kpu_profile_cam kpu6_cam_entries[] = { +static struct npc_kpu_profile_cam kpu6_cam_entries[] __maybe_unused = { NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, @@ -5901,7 +5901,7 @@ static struct npc_kpu_profile_cam kpu6_cam_entries[] = { }, }; -static struct npc_kpu_profile_cam kpu7_cam_entries[] = { +static struct npc_kpu_profile_cam kpu7_cam_entries[] __maybe_unused = { NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, @@ -6252,7 +6252,7 @@ static struct npc_kpu_profile_cam kpu7_cam_entries[] = { }, }; -static struct npc_kpu_profile_cam kpu8_cam_entries[] = { +static struct npc_kpu_profile_cam kpu8_cam_entries[] __maybe_unused = { NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, @@ -7089,7 +7089,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = { }, }; -static struct npc_kpu_profile_cam kpu9_cam_entries[] = { +static struct npc_kpu_profile_cam kpu9_cam_entries[] __maybe_unused = { NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, @@ -7575,7 +7575,7 @@ static struct npc_kpu_profile_cam kpu9_cam_entries[] = { }, }; -static struct npc_kpu_profile_cam kpu10_cam_entries[] = { +static struct npc_kpu_profile_cam kpu10_cam_entries[] __maybe_unused = { NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, @@ -7746,7 +7746,7 @@ static struct npc_kpu_profile_cam kpu10_cam_entries[] = { }, }; -static struct npc_kpu_profile_cam kpu11_cam_entries[] = { +static struct npc_kpu_profile_cam kpu11_cam_entries[] __maybe_unused = { NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, @@ -8061,7 +8061,7 @@ static struct npc_kpu_profile_cam kpu11_cam_entries[] = { }, }; -static struct npc_kpu_profile_cam kpu12_cam_entries[] = { +static struct npc_kpu_profile_cam kpu12_cam_entries[] __maybe_unused = { NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, @@ -8322,7 +8322,7 @@ static struct npc_kpu_profile_cam kpu12_cam_entries[] = { }, }; -static struct npc_kpu_profile_cam kpu13_cam_entries[] = { +static struct npc_kpu_profile_cam kpu13_cam_entries[] __maybe_unused = { NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, @@ -8340,7 +8340,7 @@ static struct npc_kpu_profile_cam kpu13_cam_entries[] = { }, }; -static struct npc_kpu_profile_cam kpu14_cam_entries[] = { +static struct npc_kpu_profile_cam kpu14_cam_entries[] __maybe_unused = { NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, @@ -8358,7 +8358,7 @@ static struct npc_kpu_profile_cam kpu14_cam_entries[] = { }, }; -static struct npc_kpu_profile_cam kpu15_cam_entries[] = { +static struct npc_kpu_profile_cam kpu15_cam_entries[] __maybe_unused = { NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, @@ -8565,7 +8565,7 @@ static struct npc_kpu_profile_cam kpu15_cam_entries[] = { }, }; -static struct npc_kpu_profile_cam kpu16_cam_entries[] = { +static struct npc_kpu_profile_cam kpu16_cam_entries[] __maybe_unused = { NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, NPC_KPU_NOP_CAM, @@ -8628,7 +8628,7 @@ static struct npc_kpu_profile_cam kpu16_cam_entries[] = { }, }; -static struct npc_kpu_profile_action kpu1_action_entries[] = { +static struct npc_kpu_profile_action kpu1_action_entries[] __maybe_unused = { NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, @@ -9368,7 +9368,7 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = { }, }; -static struct npc_kpu_profile_action kpu2_action_entries[] = { +static struct npc_kpu_profile_action kpu2_action_entries[] __maybe_unused = { NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, @@ -10209,7 +10209,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = { }, }; -static struct npc_kpu_profile_action kpu3_action_entries[] = { +static struct npc_kpu_profile_action kpu3_action_entries[] __maybe_unused = { NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, @@ -11082,7 +11082,7 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = { }, }; -static struct npc_kpu_profile_action kpu4_action_entries[] = { +static struct npc_kpu_profile_action kpu4_action_entries[] __maybe_unused = { NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, @@ -11363,7 +11363,7 @@ static struct npc_kpu_profile_action kpu4_action_entries[] = { }, }; -static struct npc_kpu_profile_action kpu5_action_entries[] = { +static struct npc_kpu_profile_action kpu5_action_entries[] __maybe_unused = { NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, @@ -12300,7 +12300,7 @@ static struct npc_kpu_profile_action kpu5_action_entries[] = { }, }; -static struct npc_kpu_profile_action kpu6_action_entries[] = { +static struct npc_kpu_profile_action kpu6_action_entries[] __maybe_unused = { NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, @@ -12949,7 +12949,7 @@ static struct npc_kpu_profile_action kpu6_action_entries[] = { }, }; -static struct npc_kpu_profile_action kpu7_action_entries[] = { +static struct npc_kpu_profile_action kpu7_action_entries[] __maybe_unused = { NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, @@ -13262,7 +13262,7 @@ static struct npc_kpu_profile_action kpu7_action_entries[] = { }, }; -static struct npc_kpu_profile_action kpu8_action_entries[] = { +static struct npc_kpu_profile_action kpu8_action_entries[] __maybe_unused = { NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, @@ -14007,7 +14007,7 @@ static struct npc_kpu_profile_action kpu8_action_entries[] = { }, }; -static struct npc_kpu_profile_action kpu9_action_entries[] = { +static struct npc_kpu_profile_action kpu9_action_entries[] __maybe_unused = { NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, @@ -14440,7 +14440,7 @@ static struct npc_kpu_profile_action kpu9_action_entries[] = { }, }; -static struct npc_kpu_profile_action kpu10_action_entries[] = { +static struct npc_kpu_profile_action kpu10_action_entries[] __maybe_unused = { NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, @@ -14593,7 +14593,7 @@ static struct npc_kpu_profile_action kpu10_action_entries[] = { }, }; -static struct npc_kpu_profile_action kpu11_action_entries[] = { +static struct npc_kpu_profile_action kpu11_action_entries[] __maybe_unused = { NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, @@ -14874,7 +14874,7 @@ static struct npc_kpu_profile_action kpu11_action_entries[] = { }, }; -static struct npc_kpu_profile_action kpu12_action_entries[] = { +static struct npc_kpu_profile_action kpu12_action_entries[] __maybe_unused = { NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, @@ -15107,7 +15107,7 @@ static struct npc_kpu_profile_action kpu12_action_entries[] = { }, }; -static struct npc_kpu_profile_action kpu13_action_entries[] = { +static struct npc_kpu_profile_action kpu13_action_entries[] __maybe_unused = { NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, @@ -15124,7 +15124,7 @@ static struct npc_kpu_profile_action kpu13_action_entries[] = { }, }; -static struct npc_kpu_profile_action kpu14_action_entries[] = { +static struct npc_kpu_profile_action kpu14_action_entries[] __maybe_unused = { NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, @@ -15141,7 +15141,7 @@ static struct npc_kpu_profile_action kpu14_action_entries[] = { }, }; -static struct npc_kpu_profile_action kpu15_action_entries[] = { +static struct npc_kpu_profile_action kpu15_action_entries[] __maybe_unused = { NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, @@ -15326,7 +15326,7 @@ static struct npc_kpu_profile_action kpu15_action_entries[] = { }, }; -static struct npc_kpu_profile_action kpu16_action_entries[] = { +static struct npc_kpu_profile_action kpu16_action_entries[] __maybe_unused = { NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, NPC_KPU_NOP_ACTION, @@ -15383,7 +15383,7 @@ static struct npc_kpu_profile_action kpu16_action_entries[] = { }, }; -static struct npc_kpu_profile npc_kpu_profiles[] = { +static struct npc_kpu_profile npc_kpu_profiles[] __maybe_unused = { { ARRAY_SIZE(kpu1_cam_entries), ARRAY_SIZE(kpu1_action_entries), @@ -15482,7 +15482,7 @@ static struct npc_kpu_profile npc_kpu_profiles[] = { }, }; -static struct npc_lt_def_cfg npc_lt_defaults = { +static struct npc_lt_def_cfg npc_lt_defaults __maybe_unused = { .rx_ol2 = { .lid = NPC_LID_LA, .ltype_match = NPC_LT_LA_ETHER, @@ -15604,7 +15604,7 @@ static struct npc_lt_def_cfg npc_lt_defaults = { }, }; -static struct npc_mcam_kex npc_mkex_default = { +static struct npc_mcam_kex npc_mkex_default __maybe_unused = { .mkex_sign = MKEX_SIGN, .name = "default", .kpu_version = NPC_KPU_PROFILE_VER, From ef992a0f12e8268908ebabe91ab40442a876f303 Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Tue, 24 Feb 2026 13:30:00 +0530 Subject: [PATCH 0106/1561] octeontx2-af: npc: cn20k: MKEX profile support In new silicon variant cn20k, a new parser profile is introduced. Instead of having two layer-data information per key field type, a new key extractor concept is introduced. As part of this change now a maximum of 24 extractor can be configured per packet parsing profile. For example, LA type(ether) can have 24 unique parsing key, LC type(ip), LD type(tcp/udp) also can have unique 24 parsing key associated. Signed-off-by: Suman Ghosh Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260224080009.4147301-5-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- .../ethernet/marvell/octeontx2/af/cn20k/npc.c | 361 +++++++++++++++++- .../ethernet/marvell/octeontx2/af/cn20k/npc.h | 103 +++++ .../ethernet/marvell/octeontx2/af/cn20k/reg.h | 60 ++- .../net/ethernet/marvell/octeontx2/af/mbox.h | 17 +- .../net/ethernet/marvell/octeontx2/af/npc.h | 1 + .../marvell/octeontx2/af/npc_profile.h | 12 + .../net/ethernet/marvell/octeontx2/af/rvu.h | 6 +- .../ethernet/marvell/octeontx2/af/rvu_npc.c | 73 +++- .../ethernet/marvell/octeontx2/af/rvu_npc.h | 2 + .../marvell/octeontx2/af/rvu_npc_fs.c | 229 +++++++++-- .../marvell/octeontx2/af/rvu_npc_hash.c | 15 + 11 files changed, 809 insertions(+), 70 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c index 0dec50b1e84f..41164b65085f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c @@ -29,19 +29,21 @@ static const char *npc_kw_name[NPC_MCAM_KEY_MAX] = { (((bytesm1) << 16) | ((hdr_ofs) << 8) | ((ena) << 7) | \ ((key_ofs) & 0x3F)) +static const char cn20k_def_pfl_name[] = "default"; + static struct npc_mcam_kex_extr npc_mkex_extr_default = { - .mkex_sign = MKEX_SIGN, + .mkex_sign = MKEX_CN20K_SIGN, .name = "default", .kpu_version = NPC_KPU_PROFILE_VER, .keyx_cfg = { /* nibble: LA..LE (ltype only) + Error code + Channel */ [NIX_INTF_RX] = ((u64)NPC_MCAM_KEY_DYN << 32) | NPC_PARSE_NIBBLE_INTF_RX | - NPC_PARSE_NIBBLE_ERRCODE, + NPC_CN20K_PARSE_NIBBLE_ERRCODE, /* nibble: LA..LE (ltype only) */ [NIX_INTF_TX] = ((u64)NPC_MCAM_KEY_X2 << 32) | - NPC_PARSE_NIBBLE_INTF_TX, + NPC_CN20K_PARSE_NIBBLE_INTF_TX, }, .intf_extr_lid = { /* Default RX MCAM KEX profile */ @@ -305,9 +307,9 @@ npc_enable_kpm_entry(struct rvu *rvu, int blkaddr, int kpm, int num_entries) u64 entry_mask; entry_mask = npc_enable_mask(num_entries); - /* Disable first KPU_MAX_CST_ENT entries for built-in profile */ + /* Disable first KPU_CN20K_MAX_CST_ENT entries for built-in profile */ if (!rvu->kpu.custom) - entry_mask |= GENMASK_ULL(KPU_MAX_CST_ENT - 1, 0); + entry_mask |= GENMASK_ULL(KPU_CN20K_MAX_CST_ENT - 1, 0); rvu_write64(rvu, blkaddr, NPC_AF_KPMX_ENTRY_DISX(kpm, 0), entry_mask); if (num_entries <= 64) { @@ -428,6 +430,299 @@ struct npc_priv_t *npc_priv_get(void) return &npc_priv; } +static void npc_program_mkex_rx(struct rvu *rvu, int blkaddr, + struct npc_mcam_kex_extr *mkex_extr, + u8 intf) +{ + u8 num_extr = rvu->hw->npc_kex_extr; + int extr, lt; + u64 val; + + if (is_npc_intf_tx(intf)) + return; + + rvu_write64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(intf), + mkex_extr->keyx_cfg[NIX_INTF_RX]); + + /* Program EXTRACTOR */ + for (extr = 0; extr < num_extr; extr++) + rvu_write64(rvu, blkaddr, + NPC_AF_INTFX_EXTRACTORX_CFG(intf, extr), + mkex_extr->intf_extr_lid[intf][extr]); + + /* Program EXTRACTOR_LTYPE */ + for (extr = 0; extr < num_extr; extr++) { + for (lt = 0; lt < NPC_MAX_LT; lt++) { + val = mkex_extr->intf_extr_lt[intf][extr][lt]; + CN20K_SET_EXTR_LT(intf, extr, lt, val); + } + } +} + +static void npc_program_mkex_tx(struct rvu *rvu, int blkaddr, + struct npc_mcam_kex_extr *mkex_extr, + u8 intf) +{ + u8 num_extr = rvu->hw->npc_kex_extr; + int extr, lt; + u64 val; + + if (is_npc_intf_rx(intf)) + return; + + rvu_write64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(intf), + mkex_extr->keyx_cfg[NIX_INTF_TX]); + + /* Program EXTRACTOR */ + for (extr = 0; extr < num_extr; extr++) + rvu_write64(rvu, blkaddr, + NPC_AF_INTFX_EXTRACTORX_CFG(intf, extr), + mkex_extr->intf_extr_lid[intf][extr]); + + /* Program EXTRACTOR_LTYPE */ + for (extr = 0; extr < num_extr; extr++) { + for (lt = 0; lt < NPC_MAX_LT; lt++) { + val = mkex_extr->intf_extr_lt[intf][extr][lt]; + CN20K_SET_EXTR_LT(intf, extr, lt, val); + } + } +} + +static void npc_program_mkex_profile(struct rvu *rvu, int blkaddr, + struct npc_mcam_kex_extr *mkex_extr) +{ + struct rvu_hwinfo *hw = rvu->hw; + u8 intf; + + for (intf = 0; intf < hw->npc_intfs; intf++) { + npc_program_mkex_rx(rvu, blkaddr, mkex_extr, intf); + npc_program_mkex_tx(rvu, blkaddr, mkex_extr, intf); + } + + /* Programme mkex hash profile */ + npc_program_mkex_hash(rvu, blkaddr); +} + +void npc_cn20k_load_mkex_profile(struct rvu *rvu, int blkaddr, + const char *mkex_profile) +{ + struct npc_mcam_kex_extr *mcam_kex_extr; + struct device *dev = &rvu->pdev->dev; + void __iomem *mkex_prfl_addr = NULL; + u64 prfl_sz; + int ret; + + /* If user not selected mkex profile */ + if (rvu->kpu_fwdata_sz || + !strncmp(mkex_profile, cn20k_def_pfl_name, MKEX_NAME_LEN)) + goto program_mkex_extr; + + /* Setting up the mapping for mkex profile image */ + ret = npc_fwdb_prfl_img_map(rvu, &mkex_prfl_addr, &prfl_sz); + if (ret < 0) + goto program_mkex_extr; + + mcam_kex_extr = (struct npc_mcam_kex_extr __force *)mkex_prfl_addr; + + while (((s64)prfl_sz > 0) && + (mcam_kex_extr->mkex_sign != MKEX_END_SIGN)) { + /* Compare with mkex mod_param name string */ + if (mcam_kex_extr->mkex_sign == MKEX_CN20K_SIGN && + !strncmp(mcam_kex_extr->name, mkex_profile, + MKEX_NAME_LEN)) { + rvu->kpu.mcam_kex_prfl.mkex_extr = mcam_kex_extr; + goto program_mkex_extr; + } + + mcam_kex_extr++; + prfl_sz -= sizeof(struct npc_mcam_kex_extr); + } + dev_warn(dev, "Failed to load requested profile: %s\n", mkex_profile); + rvu->kpu.mcam_kex_prfl.mkex_extr = npc_mkex_extr_default_get(); + +program_mkex_extr: + dev_info(rvu->dev, "Using %s mkex profile\n", + rvu->kpu.mcam_kex_prfl.mkex_extr->name); + /* Program selected mkex profile */ + npc_program_mkex_profile(rvu, blkaddr, + rvu->kpu.mcam_kex_prfl.mkex_extr); + if (mkex_prfl_addr) + iounmap(mkex_prfl_addr); +} + +static u8 npc_map2cn20k_flag(u8 flag) +{ + switch (flag) { + case NPC_F_LC_U_IP_FRAG: + return NPC_CN20K_F_LC_L_IP_FRAG; + + case NPC_F_LC_U_IP6_FRAG: + return NPC_CN20K_F_LC_L_IP6_FRAG; + + case NPC_F_LC_L_6TO4: + return NPC_CN20K_F_LC_L_6TO4; + + case NPC_F_LC_L_MPLS_IN_IP: + return NPC_CN20K_F_LC_U_MPLS_IN_IP; + + case NPC_F_LC_L_IP6_TUN_IP6: + return NPC_CN20K_F_LC_U_IP6_TUN_IP6; + + case NPC_F_LC_L_IP6_MPLS_IN_IP: + return NPC_CN20K_F_LC_U_IP6_MPLS_IN_IP; + + default: + break; + } + + WARN(1, "%s: Invalid flag=%u\n", __func__, flag); + return 0xff; +} + +void +npc_cn20k_update_action_entries_n_flags(struct rvu *rvu, + struct npc_kpu_profile_adapter *pfl) +{ + struct npc_kpu_profile_action *action; + int entries, ltype; + u8 flags, val; + + for (int i = 0; i < pfl->kpus; i++) { + action = pfl->kpu[i].action; + entries = pfl->kpu[i].action_entries; + + for (int j = 0; j < entries; j++) { + if (action[j].lid != NPC_LID_LC) + continue; + + ltype = action[j].ltype; + + if (ltype != NPC_LT_LC_IP && + ltype != NPC_LT_LC_IP6 && + ltype != NPC_LT_LC_IP_OPT && + ltype != NPC_LT_LC_IP6_EXT) + continue; + + flags = action[j].flags; + + switch (flags) { + case NPC_F_LC_U_IP_FRAG: + case NPC_F_LC_U_IP6_FRAG: + case NPC_F_LC_L_6TO4: + case NPC_F_LC_L_MPLS_IN_IP: + case NPC_F_LC_L_IP6_TUN_IP6: + case NPC_F_LC_L_IP6_MPLS_IN_IP: + val = npc_map2cn20k_flag(flags); + if (val == 0xFF) { + dev_err(rvu->dev, + "%s: Error to get flag value\n", + __func__); + return; + } + + action[j].flags = val; + break; + default: + break; + } + } + } +} + +int npc_cn20k_apply_custom_kpu(struct rvu *rvu, + struct npc_kpu_profile_adapter *profile) +{ + struct npc_cn20k_kpu_profile_fwdata *fw = rvu->kpu_fwdata; + struct npc_kpu_profile_action *action; + struct npc_kpu_profile_cam *cam; + struct npc_kpu_fwdata *fw_kpu; + size_t hdr_sz, offset = 0; + u16 kpu, entry; + int entries; + + hdr_sz = sizeof(struct npc_cn20k_kpu_profile_fwdata); + + if (rvu->kpu_fwdata_sz < hdr_sz) { + dev_warn(rvu->dev, "Invalid KPU profile size\n"); + return -EINVAL; + } + + if (le64_to_cpu(fw->signature) != KPU_SIGN) { + dev_warn(rvu->dev, "Invalid KPU profile signature %llx\n", + fw->signature); + return -EINVAL; + } + + /* Verify if the using known profile structure */ + if (NPC_KPU_VER_MAJ(profile->version) > + NPC_KPU_VER_MAJ(NPC_KPU_PROFILE_VER)) { + dev_warn(rvu->dev, "Not supported Major version: %d > %d\n", + NPC_KPU_VER_MAJ(profile->version), + NPC_KPU_VER_MAJ(NPC_KPU_PROFILE_VER)); + return -EINVAL; + } + + /* Verify if profile is aligned with the required kernel changes */ + if (NPC_KPU_VER_MIN(profile->version) < + NPC_KPU_VER_MIN(NPC_KPU_PROFILE_VER)) { + dev_warn(rvu->dev, + "Invalid KPU profile version: %d.%d.%d expected version <= %d.%d.%d\n", + NPC_KPU_VER_MAJ(profile->version), + NPC_KPU_VER_MIN(profile->version), + NPC_KPU_VER_PATCH(profile->version), + NPC_KPU_VER_MAJ(NPC_KPU_PROFILE_VER), + NPC_KPU_VER_MIN(NPC_KPU_PROFILE_VER), + NPC_KPU_VER_PATCH(NPC_KPU_PROFILE_VER)); + return -EINVAL; + } + + /* Verify if profile fits the HW */ + if (fw->kpus > profile->kpus) { + dev_warn(rvu->dev, "Not enough KPUs: %d > %ld\n", fw->kpus, + profile->kpus); + return -EINVAL; + } + + profile->mcam_kex_prfl.mkex_extr = &fw->mkex; + if (profile->mcam_kex_prfl.mkex_extr->mkex_sign != MKEX_CN20K_SIGN) { + dev_warn(rvu->dev, "Invalid MKEX profile signature:%llx\n", + profile->mcam_kex_prfl.mkex_extr->mkex_sign); + return -EINVAL; + } + + profile->custom = 1; + profile->name = fw->name; + profile->version = le64_to_cpu(fw->version); + profile->lt_def = &fw->lt_def; + + for (kpu = 0; kpu < fw->kpus; kpu++) { + fw_kpu = (struct npc_kpu_fwdata *)(fw->data + offset); + if (fw_kpu->entries > KPU_CN20K_MAX_CST_ENT) + dev_warn(rvu->dev, + "Too many custom entries on KPU%d: %d > %d\n", + kpu, fw_kpu->entries, KPU_CN20K_MAX_CST_ENT); + entries = min(fw_kpu->entries, KPU_CN20K_MAX_CST_ENT); + cam = (struct npc_kpu_profile_cam *)fw_kpu->data; + offset += sizeof(*fw_kpu) + fw_kpu->entries * sizeof(*cam); + action = (struct npc_kpu_profile_action *)(fw->data + offset); + offset += fw_kpu->entries * sizeof(*action); + if (rvu->kpu_fwdata_sz < hdr_sz + offset) { + dev_warn(rvu->dev, + "Profile size mismatch on KPU%i parsing.\n", + kpu + 1); + return -EINVAL; + } + + for (entry = 0; entry < entries; entry++) { + profile->kpu[kpu].cam[entry] = cam[entry]; + profile->kpu[kpu].action[entry] = action[entry]; + } + } + npc_cn20k_update_action_entries_n_flags(rvu, profile); + + return 0; +} + static int npc_subbank_idx_2_mcam_idx(struct rvu *rvu, struct npc_subbank *sb, u16 sub_off, u16 *mcam_idx) { @@ -1960,6 +2255,38 @@ rvu_mbox_handler_npc_cn20k_get_fcnt(struct rvu *rvu, return 0; } +int +rvu_mbox_handler_npc_cn20k_get_kex_cfg(struct rvu *rvu, + struct msg_req *req, + struct npc_cn20k_get_kex_cfg_rsp *rsp) +{ + int extr, lt; + + rsp->rx_keyx_cfg = CN20K_GET_KEX_CFG(NIX_INTF_RX); + rsp->tx_keyx_cfg = CN20K_GET_KEX_CFG(NIX_INTF_TX); + + /* Get EXTRACTOR LID */ + for (extr = 0; extr < NPC_MAX_EXTRACTOR; extr++) { + rsp->intf_extr_lid[NIX_INTF_RX][extr] = + CN20K_GET_EXTR_LID(NIX_INTF_RX, extr); + rsp->intf_extr_lid[NIX_INTF_TX][extr] = + CN20K_GET_EXTR_LID(NIX_INTF_TX, extr); + } + + /* Get EXTRACTOR LTYPE */ + for (extr = 0; extr < NPC_MAX_EXTRACTOR; extr++) { + for (lt = 0; lt < NPC_MAX_LT; lt++) { + rsp->intf_extr_lt[NIX_INTF_RX][extr][lt] = + CN20K_GET_EXTR_LT(NIX_INTF_RX, extr, lt); + rsp->intf_extr_lt[NIX_INTF_TX][extr][lt] = + CN20K_GET_EXTR_LT(NIX_INTF_TX, extr, lt); + } + } + + memcpy(rsp->mkex_pfl_name, rvu->mkex_pfl_name, MKEX_NAME_LEN); + return 0; +} + static int *subbank_srch_order; static void npc_populate_restricted_idxs(int num_subbanks) @@ -2172,6 +2499,23 @@ void npc_cn20k_deinit(struct rvu *rvu) kfree(subbank_srch_order); } +static int npc_setup_mcam_section(struct rvu *rvu, int key_type) +{ + int blkaddr, sec; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + if (blkaddr < 0) { + dev_err(rvu->dev, "%s: NPC block not implemented\n", __func__); + return -ENODEV; + } + + for (sec = 0; sec < npc_priv.num_subbanks; sec++) + rvu_write64(rvu, blkaddr, + NPC_AF_MCAM_SECTIONX_CFG_EXT(sec), key_type); + + return 0; +} + int npc_cn20k_init(struct rvu *rvu) { int err; @@ -2183,6 +2527,13 @@ int npc_cn20k_init(struct rvu *rvu) return err; } + err = npc_setup_mcam_section(rvu, NPC_MCAM_KEY_X2); + if (err) { + dev_err(rvu->dev, "%s: mcam section configuration failure\n", + __func__); + return err; + } + npc_priv.init_done = true; return 0; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h index 7cfca5cfe5fa..2ed59b3955f4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h @@ -8,10 +8,77 @@ #ifndef NPC_CN20K_H #define NPC_CN20K_H +#define MKEX_CN20K_SIGN 0x19bbfdbd160 + #define MAX_NUM_BANKS 2 #define MAX_NUM_SUB_BANKS 32 #define MAX_SUBBANK_DEPTH 256 +/* strtoull of "mkexprof" with base:36 */ +#define MKEX_END_SIGN 0xdeadbeef + +#define NPC_CN20K_BYTESM GENMASK_ULL(18, 16) +#define NPC_CN20K_PARSE_NIBBLE GENMASK_ULL(22, 0) +#define NPC_CN20K_TOTAL_NIBBLE 23 + +#define CN20K_SET_EXTR_LT(intf, extr, ltype, cfg) \ + rvu_write64(rvu, BLKADDR_NPC, \ + NPC_AF_INTFX_EXTRACTORX_LTX_CFG(intf, extr, ltype), cfg) + +#define CN20K_GET_KEX_CFG(intf) \ + rvu_read64(rvu, BLKADDR_NPC, NPC_AF_INTFX_KEX_CFG(intf)) + +#define CN20K_GET_EXTR_LID(intf, extr) \ + rvu_read64(rvu, BLKADDR_NPC, \ + NPC_AF_INTFX_EXTRACTORX_CFG(intf, extr)) + +#define CN20K_SET_EXTR_LT(intf, extr, ltype, cfg) \ + rvu_write64(rvu, BLKADDR_NPC, \ + NPC_AF_INTFX_EXTRACTORX_LTX_CFG(intf, extr, ltype), cfg) + +#define CN20K_GET_EXTR_LT(intf, extr, ltype) \ + rvu_read64(rvu, BLKADDR_NPC, \ + NPC_AF_INTFX_EXTRACTORX_LTX_CFG(intf, extr, ltype)) + +/* NPC_PARSE_KEX_S nibble definitions for each field */ +#define NPC_CN20K_PARSE_NIBBLE_CHAN GENMASK_ULL(2, 0) +#define NPC_CN20K_PARSE_NIBBLE_ERRLEV BIT_ULL(3) +#define NPC_CN20K_PARSE_NIBBLE_ERRCODE GENMASK_ULL(5, 4) +#define NPC_CN20K_PARSE_NIBBLE_L2L3_BCAST BIT_ULL(6) +#define NPC_CN20K_PARSE_NIBBLE_LA_FLAGS BIT_ULL(7) +#define NPC_CN20K_PARSE_NIBBLE_LA_LTYPE BIT_ULL(8) +#define NPC_CN20K_PARSE_NIBBLE_LB_FLAGS BIT_ULL(9) +#define NPC_CN20K_PARSE_NIBBLE_LB_LTYPE BIT_ULL(10) +#define NPC_CN20K_PARSE_NIBBLE_LC_FLAGS BIT_ULL(11) +#define NPC_CN20K_PARSE_NIBBLE_LC_LTYPE BIT_ULL(12) +#define NPC_CN20K_PARSE_NIBBLE_LD_FLAGS BIT_ULL(13) +#define NPC_CN20K_PARSE_NIBBLE_LD_LTYPE BIT_ULL(14) +#define NPC_CN20K_PARSE_NIBBLE_LE_FLAGS BIT_ULL(15) +#define NPC_CN20K_PARSE_NIBBLE_LE_LTYPE BIT_ULL(16) +#define NPC_CN20K_PARSE_NIBBLE_LF_FLAGS BIT_ULL(17) +#define NPC_CN20K_PARSE_NIBBLE_LF_LTYPE BIT_ULL(18) +#define NPC_CN20K_PARSE_NIBBLE_LG_FLAGS BIT_ULL(19) +#define NPC_CN20K_PARSE_NIBBLE_LG_LTYPE BIT_ULL(20) +#define NPC_CN20K_PARSE_NIBBLE_LH_FLAGS BIT_ULL(21) +#define NPC_CN20K_PARSE_NIBBLE_LH_LTYPE BIT_ULL(22) + +/* Rx parse key extract nibble enable */ +#define NPC_CN20K_PARSE_NIBBLE_INTF_RX (NPC_CN20K_PARSE_NIBBLE_CHAN | \ + NPC_CN20K_PARSE_NIBBLE_L2L3_BCAST | \ + NPC_CN20K_PARSE_NIBBLE_LA_LTYPE | \ + NPC_CN20K_PARSE_NIBBLE_LB_LTYPE | \ + NPC_CN20K_PARSE_NIBBLE_LC_FLAGS | \ + NPC_CN20K_PARSE_NIBBLE_LC_LTYPE | \ + NPC_CN20K_PARSE_NIBBLE_LD_LTYPE | \ + NPC_CN20K_PARSE_NIBBLE_LE_LTYPE) + +/* Tx parse key extract nibble enable */ +#define NPC_CN20K_PARSE_NIBBLE_INTF_TX (NPC_CN20K_PARSE_NIBBLE_LA_LTYPE | \ + NPC_CN20K_PARSE_NIBBLE_LB_LTYPE | \ + NPC_CN20K_PARSE_NIBBLE_LC_LTYPE | \ + NPC_CN20K_PARSE_NIBBLE_LD_LTYPE | \ + NPC_CN20K_PARSE_NIBBLE_LE_LTYPE) + /** * enum npc_subbank_flag - NPC subbank status * @@ -147,6 +214,34 @@ struct npc_mcam_kex_extr { u64 intf_extr_lt[NPC_MAX_INTF][NPC_MAX_EXTRACTOR][NPC_MAX_LT]; } __packed; +struct npc_cn20k_kpu_profile_fwdata { +#define KPU_SIGN 0x00666f727075706b +#define KPU_NAME_LEN 32 + /* Maximum number of custom KPU entries supported by + * the built-in profile. + */ +#define KPU_CN20K_MAX_CST_ENT 6 + /* KPU Profle Header */ + __le64 signature; /* "kpuprof\0" (8 bytes/ASCII characters) */ + u8 name[KPU_NAME_LEN]; /* KPU Profile name */ + __le64 version; /* KPU profile version */ + u8 kpus; + u8 reserved[7]; + + /* Default MKEX profile to be used with this KPU profile. May be + * overridden with mkex_profile module parameter. + * Format is same as for the MKEX profile to streamline processing. + */ + struct npc_mcam_kex_extr mkex; + /* LTYPE values for specific HW offloaded protocols. */ + struct npc_lt_def_cfg lt_def; + /* Dynamically sized data: + * Custom KPU CAM and ACTION configuration entries. + * struct npc_kpu_fwdata kpu[kpus]; + */ + u8 data[]; +} __packed; + struct rvu; struct npc_priv_t *npc_priv_get(void); @@ -162,4 +257,12 @@ int npc_cn20k_ref_idx_alloc(struct rvu *rvu, int pcifunc, int key_type, int npc_cn20k_idx_free(struct rvu *rvu, u16 *mcam_idx, int count); void npc_cn20k_parser_profile_init(struct rvu *rvu, int blkaddr); struct npc_mcam_kex_extr *npc_mkex_extr_default_get(void); +void npc_cn20k_load_mkex_profile(struct rvu *rvu, int blkaddr, + const char *mkex_profile); +int npc_cn20k_apply_custom_kpu(struct rvu *rvu, + struct npc_kpu_profile_adapter *profile); + +void +npc_cn20k_update_action_entries_n_flags(struct rvu *rvu, + struct npc_kpu_profile_adapter *pfl); #endif /* NPC_CN20K_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/reg.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/reg.h index 073d4b815681..bf50d999528b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/reg.h @@ -80,18 +80,60 @@ #define RVU_MBOX_VF_VFAF_TRIGX(a) (0x2000 | (a) << 3) /* NPC registers */ #define NPC_AF_INTFX_EXTRACTORX_CFG(a, b) \ - (0x908000ull | (a) << 10 | (b) << 3) + (0x20c000ull | (a) << 16 | (b) << 8) #define NPC_AF_INTFX_EXTRACTORX_LTX_CFG(a, b, c) \ - (0x900000ull | (a) << 13 | (b) << 8 | (c) << 3) + (0x204000ull | (a) << 16 | (b) << 8 | (c) << 3) #define NPC_AF_KPMX_ENTRYX_CAMX(a, b, c) \ - (0x100000ull | (a) << 14 | (b) << 6 | (c) << 3) + (0x20000ull | (a) << 12 | (b) << 3 | (c) << 16) #define NPC_AF_KPMX_ENTRYX_ACTION0(a, b) \ - (0x100020ull | (a) << 14 | (b) << 6) + (0x40000ull | (a) << 12 | (b) << 3) #define NPC_AF_KPMX_ENTRYX_ACTION1(a, b) \ - (0x100028ull | (a) << 14 | (b) << 6) -#define NPC_AF_KPMX_ENTRY_DISX(a, b) (0x180000ull | (a) << 6 | (b) << 3) -#define NPC_AF_KPM_PASS2_CFG 0x580 -#define NPC_AF_KPMX_PASS2_OFFSET(a) (0x190000ull | (a) << 3) -#define NPC_AF_MCAM_SECTIONX_CFG_EXT(a) (0xC000000ull | (a) << 3) + (0x50000ull | (a) << 12 | (b) << 3) +#define NPC_AF_KPMX_ENTRY_DISX(a, b) (0x60000ull | (a) << 12 | (b) << 3) +#define NPC_AF_KPM_PASS2_CFG 0x10210 +#define NPC_AF_KPMX_PASS2_OFFSET(a) (0x60040ull | (a) << 12) +#define NPC_AF_MCAM_SECTIONX_CFG_EXT(a) (0xf000000ull | (a) << 3) + +#define NPC_AF_CN20K_MCAMEX_BANKX_CAMX_INTF_EXT(a, b, c) ({ \ + u64 offset; \ + offset = (0x8000000ull | (a) << 4 | (b) << 20 | (c) << 3); \ + offset; }) + +#define NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W0_EXT(a, b, c) ({ \ + u64 offset; \ + offset = (0x9000000ull | (a) << 4 | (b) << 20 | (c) << 3); \ + offset; }) + +#define NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W1_EXT(a, b, c) ({ \ + u64 offset; \ + offset = (0x9400000ull | (a) << 4 | (b) << 20 | (c) << 3); \ + offset; }) + +#define NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W2_EXT(a, b, c) ({ \ + u64 offset; \ + offset = (0x9800000ull | (a) << 4 | (b) << 20 | (c) << 3); \ + offset; }) + +#define NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W3_EXT(a, b, c) ({ \ + u64 offset; \ + offset = (0x9c00000ull | (a) << 4 | (b) << 20 | (c) << 3); \ + offset; }) + +#define NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(a, b) ({ \ + u64 offset; \ + offset = (0xa000000ull | (a) << 4 | (b) << 20); \ + offset; }) + +#define NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(a, b, c) ({ \ + u64 offset; \ + offset = (0xc000000ull | (a) << 4 | (b) << 20 | (c) << 22); \ + offset; }) + +#define NPC_AF_INTFX_MISS_ACTX(a, b) (0xf003000 | (a) << 6 | (b) << 4) + +#define NPC_AF_CN20K_MCAMEX_BANKX_STAT_EXT(a, b) ({ \ + u64 offset; \ + offset = (0xb000000ull | (a) << 4 | (b) << 20); \ + offset; }) #endif /* RVU_MBOX_REG_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 1c4b36832788..a393bf884fd6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -285,6 +285,8 @@ M(NPC_GET_FIELD_STATUS, 0x6014, npc_get_field_status, \ npc_get_field_status_rsp) \ M(NPC_CN20K_MCAM_GET_FREE_COUNT, 0x6015, npc_cn20k_get_fcnt, \ msg_req, npc_cn20k_get_fcnt_rsp) \ +M(NPC_CN20K_GET_KEX_CFG, 0x6016, npc_cn20k_get_kex_cfg, \ + msg_req, npc_cn20k_get_kex_cfg_rsp) \ /* NIX mbox IDs (range 0x8000 - 0xFFFF) */ \ M(NIX_LF_ALLOC, 0x8000, nix_lf_alloc, \ nix_lf_alloc_req, nix_lf_alloc_rsp) \ @@ -1559,7 +1561,7 @@ struct npc_mcam_free_entry_req { }; struct mcam_entry { -#define NPC_MAX_KWS_IN_KEY 7 /* Number of keywords in max keywidth */ +#define NPC_MAX_KWS_IN_KEY 8 /* Number of keywords in max keywidth */ u64 kw[NPC_MAX_KWS_IN_KEY]; u64 kw_mask[NPC_MAX_KWS_IN_KEY]; u64 action; @@ -1663,6 +1665,19 @@ struct npc_get_kex_cfg_rsp { u8 mkex_pfl_name[MKEX_NAME_LEN]; }; +struct npc_cn20k_get_kex_cfg_rsp { + struct mbox_msghdr hdr; + u64 rx_keyx_cfg; /* NPC_AF_INTF(0)_KEX_CFG */ + u64 tx_keyx_cfg; /* NPC_AF_INTF(1)_KEX_CFG */ +#define NPC_MAX_EXTRACTOR 24 + /* MKEX Extractor data */ + u64 intf_extr_lid[NPC_MAX_INTF][NPC_MAX_EXTRACTOR]; + /* KEX configuration per extractor */ + u64 intf_extr_lt[NPC_MAX_INTF][NPC_MAX_EXTRACTOR][NPC_MAX_LT]; +#define MKEX_NAME_LEN 128 + u8 mkex_pfl_name[MKEX_NAME_LEN]; +}; + struct ptp_get_cap_rsp { struct mbox_msghdr hdr; #define PTP_CAP_HW_ATOMIC_UPDATE BIT_ULL(0) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h index 6c3aca6f278d..cb05ec69e0b3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h @@ -429,6 +429,7 @@ struct nix_rx_action { /* NPC_AF_INTFX_KEX_CFG field masks */ #define NPC_PARSE_NIBBLE GENMASK_ULL(30, 0) +#define NPC_TOTAL_NIBBLE 31 /* NPC_PARSE_KEX_S nibble definitions for each field */ #define NPC_PARSE_NIBBLE_CHAN GENMASK_ULL(2, 0) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h index 561b01fcdbde..db74f7fdf028 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h @@ -321,6 +321,18 @@ enum npc_kpu_lb_lflag { NPC_F_LB_L_FDSA, }; +enum npc_cn20k_kpu_lc_uflag { + NPC_CN20K_F_LC_U_MPLS_IN_IP = 0x20, + NPC_CN20K_F_LC_U_IP6_TUN_IP6 = 0x40, + NPC_CN20K_F_LC_U_IP6_MPLS_IN_IP = 0x80, +}; + +enum npc_cn20k_kpu_lc_lflag { + NPC_CN20K_F_LC_L_IP_FRAG = 2, + NPC_CN20K_F_LC_L_IP6_FRAG, + NPC_CN20K_F_LC_L_6TO4, +}; + enum npc_kpu_lc_uflag { NPC_F_LC_U_UNK_PROTO = 0x10, NPC_F_LC_U_IP_FRAG = 0x20, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 14ca28ab493a..dd930aa05582 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -554,7 +554,11 @@ struct npc_kpu_profile_adapter { const struct npc_lt_def_cfg *lt_def; const struct npc_kpu_profile_action *ikpu; /* array[pkinds] */ const struct npc_kpu_profile *kpu; /* array[kpus] */ - struct npc_mcam_kex *mkex; + union npc_mcam_key_prfl { + struct npc_mcam_kex *mkex; + /* used for cn9k and cn10k */ + struct npc_mcam_kex_extr *mkex_extr; /* used for cn20k */ + } mcam_kex_prfl; struct npc_mcam_kex_hash *mkex_hash; bool custom; size_t pkinds; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index a113cab1e1a5..3e4f4fee791a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -1340,8 +1340,8 @@ static void npc_program_mkex_profile(struct rvu *rvu, int blkaddr, npc_program_mkex_hash(rvu, blkaddr); } -static int npc_fwdb_prfl_img_map(struct rvu *rvu, void __iomem **prfl_img_addr, - u64 *size) +int npc_fwdb_prfl_img_map(struct rvu *rvu, void __iomem **prfl_img_addr, + u64 *size) { u64 prfl_addr, prfl_sz; @@ -1397,7 +1397,7 @@ static void npc_load_mkex_profile(struct rvu *rvu, int blkaddr, */ if (!is_rvu_96xx_B0(rvu) || mcam_kex->keyx_cfg[NIX_INTF_RX] == mcam_kex->keyx_cfg[NIX_INTF_TX]) - rvu->kpu.mkex = mcam_kex; + rvu->kpu.mcam_kex_prfl.mkex = mcam_kex; goto program_mkex; } @@ -1407,9 +1407,10 @@ static void npc_load_mkex_profile(struct rvu *rvu, int blkaddr, dev_warn(dev, "Failed to load requested profile: %s\n", mkex_profile); program_mkex: - dev_info(rvu->dev, "Using %s mkex profile\n", rvu->kpu.mkex->name); + dev_info(rvu->dev, "Using %s mkex profile\n", + rvu->kpu.mcam_kex_prfl.mkex->name); /* Program selected mkex profile */ - npc_program_mkex_profile(rvu, blkaddr, rvu->kpu.mkex); + npc_program_mkex_profile(rvu, blkaddr, rvu->kpu.mcam_kex_prfl.mkex); if (mkex_prfl_addr) iounmap(mkex_prfl_addr); } @@ -1528,7 +1529,8 @@ static void npc_program_kpu_profile(struct rvu *rvu, int blkaddr, int kpu, rvu_write64(rvu, blkaddr, NPC_AF_KPUX_CFG(kpu), 0x01); } -static int npc_prepare_default_kpu(struct npc_kpu_profile_adapter *profile) +static void npc_prepare_default_kpu(struct rvu *rvu, + struct npc_kpu_profile_adapter *profile) { profile->custom = 0; profile->name = def_pfl_name; @@ -1538,23 +1540,38 @@ static int npc_prepare_default_kpu(struct npc_kpu_profile_adapter *profile) profile->kpu = npc_kpu_profiles; profile->kpus = ARRAY_SIZE(npc_kpu_profiles); profile->lt_def = &npc_lt_defaults; - profile->mkex = &npc_mkex_default; profile->mkex_hash = &npc_mkex_hash_default; - return 0; + if (!is_cn20k(rvu->pdev)) { + profile->mcam_kex_prfl.mkex = &npc_mkex_default; + return; + } + + profile->mcam_kex_prfl.mkex_extr = npc_mkex_extr_default_get(); + ikpu_action_entries[NPC_RX_CPT_HDR_PKIND].offset = 6; + ikpu_action_entries[NPC_RX_CPT_HDR_PKIND].mask = 0xe0; + ikpu_action_entries[NPC_RX_CPT_HDR_PKIND].shift = 0x5; + ikpu_action_entries[NPC_RX_CPT_HDR_PKIND].right = 0x1; + + npc_cn20k_update_action_entries_n_flags(rvu, profile); } static int npc_apply_custom_kpu(struct rvu *rvu, struct npc_kpu_profile_adapter *profile) { size_t hdr_sz = sizeof(struct npc_kpu_profile_fwdata), offset = 0; - struct npc_kpu_profile_fwdata *fw = rvu->kpu_fwdata; struct npc_kpu_profile_action *action; + struct npc_kpu_profile_fwdata *fw; struct npc_kpu_profile_cam *cam; struct npc_kpu_fwdata *fw_kpu; int entries; u16 kpu, entry; + if (is_cn20k(rvu->pdev)) + return npc_cn20k_apply_custom_kpu(rvu, profile); + + fw = rvu->kpu_fwdata; + if (rvu->kpu_fwdata_sz < hdr_sz) { dev_warn(rvu->dev, "Invalid KPU profile size\n"); return -EINVAL; @@ -1595,7 +1612,7 @@ static int npc_apply_custom_kpu(struct rvu *rvu, profile->custom = 1; profile->name = fw->name; profile->version = le64_to_cpu(fw->version); - profile->mkex = &fw->mkex; + profile->mcam_kex_prfl.mkex = &fw->mkex; profile->lt_def = &fw->lt_def; for (kpu = 0; kpu < fw->kpus; kpu++) { @@ -1720,7 +1737,7 @@ void npc_load_kpu_profile(struct rvu *rvu) if (!strncmp(kpu_profile, def_pfl_name, KPU_NAME_LEN)) goto revert_to_default; /* First prepare default KPU, then we'll customize top entries. */ - npc_prepare_default_kpu(profile); + npc_prepare_default_kpu(rvu, profile); /* Order of preceedence for load loading NPC profile (high to low) * Firmware binary in filesystem. @@ -1783,7 +1800,7 @@ program_kpu: return; revert_to_default: - npc_prepare_default_kpu(profile); + npc_prepare_default_kpu(rvu, profile); } static void npc_parser_profile_init(struct rvu *rvu, int blkaddr) @@ -2036,12 +2053,21 @@ static void rvu_npc_hw_init(struct rvu *rvu, int blkaddr) static void rvu_npc_setup_interfaces(struct rvu *rvu, int blkaddr) { - struct npc_mcam_kex *mkex = rvu->kpu.mkex; + struct npc_mcam_kex_extr *mkex_extr = rvu->kpu.mcam_kex_prfl.mkex_extr; + struct npc_mcam_kex *mkex = rvu->kpu.mcam_kex_prfl.mkex; struct npc_mcam *mcam = &rvu->hw->mcam; struct rvu_hwinfo *hw = rvu->hw; u64 nibble_ena, rx_kex, tx_kex; + u64 *keyx_cfg; u8 intf; + if (is_cn20k(rvu->pdev)) { + keyx_cfg = mkex_extr->keyx_cfg; + goto skip_miss_cntr; + } + + keyx_cfg = mkex->keyx_cfg; + /* Reserve last counter for MCAM RX miss action which is set to * drop packet. This way we will know how many pkts didn't match * any MCAM entry. @@ -2049,15 +2075,17 @@ static void rvu_npc_setup_interfaces(struct rvu *rvu, int blkaddr) mcam->counters.max--; mcam->rx_miss_act_cntr = mcam->counters.max; - rx_kex = mkex->keyx_cfg[NIX_INTF_RX]; - tx_kex = mkex->keyx_cfg[NIX_INTF_TX]; +skip_miss_cntr: + rx_kex = keyx_cfg[NIX_INTF_RX]; + tx_kex = keyx_cfg[NIX_INTF_TX]; + nibble_ena = FIELD_GET(NPC_PARSE_NIBBLE, rx_kex); nibble_ena = rvu_npc_get_tx_nibble_cfg(rvu, nibble_ena); if (nibble_ena) { tx_kex &= ~NPC_PARSE_NIBBLE; tx_kex |= FIELD_PREP(NPC_PARSE_NIBBLE, nibble_ena); - mkex->keyx_cfg[NIX_INTF_TX] = tx_kex; + keyx_cfg[NIX_INTF_TX] = tx_kex; } /* Configure RX interfaces */ @@ -2069,6 +2097,9 @@ static void rvu_npc_setup_interfaces(struct rvu *rvu, int blkaddr) rvu_write64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(intf), rx_kex); + if (is_cn20k(rvu->pdev)) + continue; + /* If MCAM lookup doesn't result in a match, drop the received * packet. And map this action to a counter to count dropped * packets. @@ -2174,7 +2205,10 @@ int rvu_npc_init(struct rvu *rvu) npc_config_secret_key(rvu, blkaddr); /* Configure MKEX profile */ - npc_load_mkex_profile(rvu, blkaddr, rvu->mkex_pfl_name); + if (is_cn20k(rvu->pdev)) + npc_cn20k_load_mkex_profile(rvu, blkaddr, rvu->mkex_pfl_name); + else + npc_load_mkex_profile(rvu, blkaddr, rvu->mkex_pfl_name); err = npc_mcam_rsrcs_init(rvu, blkaddr); if (err) @@ -2184,7 +2218,10 @@ int rvu_npc_init(struct rvu *rvu) if (err) { dev_err(rvu->dev, "Incorrect mkex profile loaded using default mkex\n"); - npc_load_mkex_profile(rvu, blkaddr, def_pfl_name); + if (is_cn20k(rvu->pdev)) + npc_cn20k_load_mkex_profile(rvu, blkaddr, def_pfl_name); + else + npc_load_mkex_profile(rvu, blkaddr, def_pfl_name); } if (is_cn20k(rvu->pdev)) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.h index 80c63618ec47..346e6ada158e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.h @@ -13,5 +13,7 @@ void npc_load_kpu_profile(struct rvu *rvu); void npc_config_kpuaction(struct rvu *rvu, int blkaddr, const struct npc_kpu_profile_action *kpuaction, int kpu, int entry, bool pkind); +int npc_fwdb_prfl_img_map(struct rvu *rvu, void __iomem **prfl_img_addr, + u64 *size); #endif /* RVU_NPC_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index 1930b54e72f2..5f51a05de5de 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -12,6 +12,8 @@ #include "npc.h" #include "rvu_npc_fs.h" #include "rvu_npc_hash.h" +#include "cn20k/reg.h" +#include "cn20k/npc.h" static const char * const npc_flow_names[] = { [NPC_DMAC] = "dmac", @@ -81,19 +83,26 @@ const char *npc_get_field_name(u8 hdr) /* Compute keyword masks and figure out the number of keywords a field * spans in the key. */ -static void npc_set_kw_masks(struct npc_mcam *mcam, u8 type, +static void npc_set_kw_masks(struct rvu *rvu, struct npc_mcam *mcam, u8 type, u8 nr_bits, int start_kwi, int offset, u8 intf) { struct npc_key_field *field = &mcam->rx_key_fields[type]; u8 bits_in_kw; int max_kwi; - if (mcam->banks_per_entry == 1) - max_kwi = 1; /* NPC_MCAM_KEY_X1 */ - else if (mcam->banks_per_entry == 2) - max_kwi = 3; /* NPC_MCAM_KEY_X2 */ - else - max_kwi = 6; /* NPC_MCAM_KEY_X4 */ + if (is_cn20k(rvu->pdev)) { + if (mcam->banks_per_entry == 1) + max_kwi = 3; /* NPC_MCAM_KEY_X2 */ + else + max_kwi = 7; /* NPC_MCAM_KEY_X4 */ + } else { + if (mcam->banks_per_entry == 1) + max_kwi = 1; /* NPC_MCAM_KEY_X1 */ + else if (mcam->banks_per_entry == 2) + max_kwi = 3; /* NPC_MCAM_KEY_X2 */ + else + max_kwi = 6; /* NPC_MCAM_KEY_X4 */ + } if (is_npc_intf_tx(intf)) field = &mcam->tx_key_fields[type]; @@ -155,7 +164,8 @@ static bool npc_is_same(struct npc_key_field *input, sizeof(struct npc_layer_mdata)) == 0; } -static void npc_set_layer_mdata(struct npc_mcam *mcam, enum key_fields type, +static void npc_set_layer_mdata(struct rvu *rvu, + struct npc_mcam *mcam, enum key_fields type, u64 cfg, u8 lid, u8 lt, u8 intf) { struct npc_key_field *input = &mcam->rx_key_fields[type]; @@ -165,13 +175,17 @@ static void npc_set_layer_mdata(struct npc_mcam *mcam, enum key_fields type, input->layer_mdata.hdr = FIELD_GET(NPC_HDR_OFFSET, cfg); input->layer_mdata.key = FIELD_GET(NPC_KEY_OFFSET, cfg); - input->layer_mdata.len = FIELD_GET(NPC_BYTESM, cfg) + 1; + if (is_cn20k(rvu->pdev)) + input->layer_mdata.len = FIELD_GET(NPC_CN20K_BYTESM, cfg) + 1; + else + input->layer_mdata.len = FIELD_GET(NPC_BYTESM, cfg) + 1; input->layer_mdata.ltype = lt; input->layer_mdata.lid = lid; } static bool npc_check_overlap_fields(struct npc_key_field *input1, - struct npc_key_field *input2) + struct npc_key_field *input2, + int max_kw) { int kwi; @@ -182,7 +196,7 @@ static bool npc_check_overlap_fields(struct npc_key_field *input1, input1->layer_mdata.ltype != input2->layer_mdata.ltype) return false; - for (kwi = 0; kwi < NPC_MAX_KWS_IN_KEY; kwi++) { + for (kwi = 0; kwi < max_kw; kwi++) { if (input1->kw_mask[kwi] & input2->kw_mask[kwi]) return true; } @@ -202,6 +216,7 @@ static bool npc_check_overlap(struct rvu *rvu, int blkaddr, struct npc_key_field *dummy, *input; int start_kwi, offset; u8 nr_bits, lid, lt, ld; + int extr, kws; u64 cfg; dummy = &mcam->rx_key_fields[NPC_UNKNOWN]; @@ -212,6 +227,10 @@ static bool npc_check_overlap(struct rvu *rvu, int blkaddr, input = &mcam->tx_key_fields[type]; } + if (is_cn20k(rvu->pdev)) + goto skip_cn10k_config; + + kws = NPC_MAX_KWS_IN_KEY - 1; for (lid = start_lid; lid < NPC_MAX_LID; lid++) { for (lt = 0; lt < NPC_MAX_LT; lt++) { for (ld = 0; ld < NPC_MAX_LD; ld++) { @@ -221,8 +240,8 @@ static bool npc_check_overlap(struct rvu *rvu, int blkaddr, if (!FIELD_GET(NPC_LDATA_EN, cfg)) continue; memset(dummy, 0, sizeof(struct npc_key_field)); - npc_set_layer_mdata(mcam, NPC_UNKNOWN, cfg, - lid, lt, intf); + npc_set_layer_mdata(rvu, mcam, NPC_UNKNOWN, + cfg, lid, lt, intf); /* exclude input */ if (npc_is_same(input, dummy)) continue; @@ -230,16 +249,50 @@ static bool npc_check_overlap(struct rvu *rvu, int blkaddr, offset = (dummy->layer_mdata.key * 8) % 64; nr_bits = dummy->layer_mdata.len * 8; /* form KW masks */ - npc_set_kw_masks(mcam, NPC_UNKNOWN, nr_bits, - start_kwi, offset, intf); + npc_set_kw_masks(rvu, mcam, NPC_UNKNOWN, + nr_bits, start_kwi, + offset, intf); /* check any input field bits falls in any * other field bits. */ - if (npc_check_overlap_fields(dummy, input)) + if (npc_check_overlap_fields(dummy, input, + kws)) return true; } } } + return false; + +skip_cn10k_config: + for (extr = 0 ; extr < rvu->hw->npc_kex_extr; extr++) { + lid = CN20K_GET_EXTR_LID(intf, extr); + if (lid < start_lid) + continue; + for (lt = 0; lt < NPC_MAX_LT; lt++) { + cfg = CN20K_GET_EXTR_LT(intf, extr, lt); + if (!FIELD_GET(NPC_LDATA_EN, cfg)) + continue; + + memset(dummy, 0, sizeof(struct npc_key_field)); + npc_set_layer_mdata(rvu, mcam, NPC_UNKNOWN, cfg, + lid, lt, intf); + /* exclude input */ + if (npc_is_same(input, dummy)) + continue; + start_kwi = dummy->layer_mdata.key / 8; + offset = (dummy->layer_mdata.key * 8) % 64; + nr_bits = dummy->layer_mdata.len * 8; + /* form KW masks */ + npc_set_kw_masks(rvu, mcam, NPC_UNKNOWN, nr_bits, + start_kwi, offset, intf); + /* check any input field bits falls in any other + * field bits + */ + if (npc_check_overlap_fields(dummy, input, + NPC_MAX_KWS_IN_KEY)) + return true; + } + } return false; } @@ -253,7 +306,8 @@ static bool npc_check_field(struct rvu *rvu, int blkaddr, enum key_fields type, return true; } -static void npc_scan_exact_result(struct npc_mcam *mcam, u8 bit_number, +static void npc_scan_exact_result(struct rvu *rvu, + struct npc_mcam *mcam, u8 bit_number, u8 key_nibble, u8 intf) { u8 offset = (key_nibble * 4) % 64; /* offset within key word */ @@ -269,10 +323,63 @@ static void npc_scan_exact_result(struct npc_mcam *mcam, u8 bit_number, default: return; } - npc_set_kw_masks(mcam, type, nr_bits, kwi, offset, intf); + npc_set_kw_masks(rvu, mcam, type, nr_bits, kwi, offset, intf); } -static void npc_scan_parse_result(struct npc_mcam *mcam, u8 bit_number, +static void npc_cn20k_scan_parse_result(struct rvu *rvu, struct npc_mcam *mcam, + u8 bit_number, u8 key_nibble, u8 intf) +{ + u8 offset = (key_nibble * 4) % 64; /* offset within key word */ + u8 kwi = (key_nibble * 4) / 64; /* which word in key */ + u8 nr_bits = 4; /* bits in a nibble */ + u8 type; + + switch (bit_number) { + case 0 ... 2: + type = NPC_CHAN; + break; + case 3: + type = NPC_ERRLEV; + break; + case 4 ... 5: + type = NPC_ERRCODE; + break; + case 6: + type = NPC_LXMB; + break; + case 8: + type = NPC_LA; + break; + case 10: + type = NPC_LB; + break; + case 12: + type = NPC_LC; + break; + case 14: + type = NPC_LD; + break; + case 16: + type = NPC_LE; + break; + case 18: + type = NPC_LF; + break; + case 20: + type = NPC_LG; + break; + case 22: + type = NPC_LH; + break; + default: + return; + } + + npc_set_kw_masks(rvu, mcam, type, nr_bits, kwi, offset, intf); +} + +static void npc_scan_parse_result(struct rvu *rvu, + struct npc_mcam *mcam, u8 bit_number, u8 key_nibble, u8 intf) { u8 offset = (key_nibble * 4) % 64; /* offset within key word */ @@ -280,6 +387,12 @@ static void npc_scan_parse_result(struct npc_mcam *mcam, u8 bit_number, u8 nr_bits = 4; /* bits in a nibble */ u8 type; + if (is_cn20k(rvu->pdev)) { + npc_cn20k_scan_parse_result(rvu, mcam, bit_number, + key_nibble, intf); + return; + } + switch (bit_number) { case 0 ... 2: type = NPC_CHAN; @@ -322,7 +435,7 @@ static void npc_scan_parse_result(struct npc_mcam *mcam, u8 bit_number, return; } - npc_set_kw_masks(mcam, type, nr_bits, kwi, offset, intf); + npc_set_kw_masks(rvu, mcam, type, nr_bits, kwi, offset, intf); } static void npc_handle_multi_layer_fields(struct rvu *rvu, int blkaddr, u8 intf) @@ -343,8 +456,13 @@ static void npc_handle_multi_layer_fields(struct rvu *rvu, int blkaddr, u8 intf) /* Inner VLAN TCI for double tagged frames */ struct npc_key_field *vlan_tag3; u64 *features; + int i, max_kw; u8 start_lid; - int i; + + if (is_cn20k(rvu->pdev)) + max_kw = NPC_MAX_KWS_IN_KEY; + else + max_kw = NPC_MAX_KWS_IN_KEY - 1; key_fields = mcam->rx_key_fields; features = &mcam->rx_features; @@ -382,7 +500,7 @@ static void npc_handle_multi_layer_fields(struct rvu *rvu, int blkaddr, u8 intf) /* if key profile programmed extracts Ethertype from multiple layers */ if (etype_ether->nr_kws && etype_tag1->nr_kws) { - for (i = 0; i < NPC_MAX_KWS_IN_KEY; i++) { + for (i = 0; i < max_kw; i++) { if (etype_ether->kw_mask[i] != etype_tag1->kw_mask[i]) { dev_err(rvu->dev, "mkex: Etype pos is different for untagged and tagged pkts.\n"); goto vlan_tci; @@ -391,7 +509,7 @@ static void npc_handle_multi_layer_fields(struct rvu *rvu, int blkaddr, u8 intf) key_fields[NPC_ETYPE] = *etype_tag1; } if (etype_ether->nr_kws && etype_tag2->nr_kws) { - for (i = 0; i < NPC_MAX_KWS_IN_KEY; i++) { + for (i = 0; i < max_kw; i++) { if (etype_ether->kw_mask[i] != etype_tag2->kw_mask[i]) { dev_err(rvu->dev, "mkex: Etype pos is different for untagged and double tagged pkts.\n"); goto vlan_tci; @@ -400,7 +518,7 @@ static void npc_handle_multi_layer_fields(struct rvu *rvu, int blkaddr, u8 intf) key_fields[NPC_ETYPE] = *etype_tag2; } if (etype_tag1->nr_kws && etype_tag2->nr_kws) { - for (i = 0; i < NPC_MAX_KWS_IN_KEY; i++) { + for (i = 0; i < max_kw; i++) { if (etype_tag1->kw_mask[i] != etype_tag2->kw_mask[i]) { dev_err(rvu->dev, "mkex: Etype pos is different for tagged and double tagged pkts.\n"); goto vlan_tci; @@ -431,7 +549,7 @@ vlan_tci: /* if key profile extracts outer vlan tci from multiple layers */ if (vlan_tag1->nr_kws && vlan_tag2->nr_kws) { - for (i = 0; i < NPC_MAX_KWS_IN_KEY; i++) { + for (i = 0; i < max_kw; i++) { if (vlan_tag1->kw_mask[i] != vlan_tag2->kw_mask[i]) { dev_err(rvu->dev, "mkex: Out vlan tci pos is different for tagged and double tagged pkts.\n"); goto done; @@ -466,7 +584,11 @@ static void npc_scan_ldata(struct rvu *rvu, int blkaddr, u8 lid, /* starting KW index and starting bit position */ int start_kwi, offset; - nr_bytes = FIELD_GET(NPC_BYTESM, cfg) + 1; + if (is_cn20k(rvu->pdev)) + nr_bytes = FIELD_GET(NPC_CN20K_BYTESM, cfg) + 1; + else + nr_bytes = FIELD_GET(NPC_BYTESM, cfg) + 1; + hdr = FIELD_GET(NPC_HDR_OFFSET, cfg); key = FIELD_GET(NPC_KEY_OFFSET, cfg); @@ -489,11 +611,12 @@ do { \ if ((hstart) >= hdr && \ ((hstart) + (hlen)) <= (hdr + nr_bytes)) { \ bit_offset = (hdr + nr_bytes - (hstart) - (hlen)) * 8; \ - npc_set_layer_mdata(mcam, (name), cfg, lid, lt, intf); \ + npc_set_layer_mdata(rvu, mcam, (name), cfg, lid, lt, \ + intf); \ offset += bit_offset; \ start_kwi += offset / 64; \ offset %= 64; \ - npc_set_kw_masks(mcam, (name), (hlen) * 8, \ + npc_set_kw_masks(rvu, mcam, (name), (hlen) * 8, \ start_kwi, offset, intf); \ } \ } \ @@ -636,6 +759,7 @@ static int npc_scan_kex(struct rvu *rvu, int blkaddr, u8 intf) u8 lid, lt, ld, bitnr; u64 cfg, masked_cfg; u8 key_nibble = 0; + int extr; /* Scan and note how parse result is going to be in key. * A bit set in PARSE_NIBBLE_ENA corresponds to a nibble from @@ -643,10 +767,22 @@ static int npc_scan_kex(struct rvu *rvu, int blkaddr, u8 intf) * will be concatenated in key. */ cfg = rvu_read64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(intf)); - masked_cfg = cfg & NPC_PARSE_NIBBLE; - for_each_set_bit(bitnr, (unsigned long *)&masked_cfg, 31) { - npc_scan_parse_result(mcam, bitnr, key_nibble, intf); - key_nibble++; + if (is_cn20k(rvu->pdev)) { + masked_cfg = cfg & NPC_CN20K_PARSE_NIBBLE; + for_each_set_bit(bitnr, (unsigned long *)&masked_cfg, + NPC_CN20K_TOTAL_NIBBLE) { + npc_scan_parse_result(rvu, mcam, bitnr, + key_nibble, intf); + key_nibble++; + } + } else { + masked_cfg = cfg & NPC_PARSE_NIBBLE; + for_each_set_bit(bitnr, (unsigned long *)&masked_cfg, + NPC_TOTAL_NIBBLE) { + npc_scan_parse_result(rvu, mcam, bitnr, + key_nibble, intf); + key_nibble++; + } } /* Ignore exact match bits for mcam entries except the first rule @@ -656,10 +792,13 @@ static int npc_scan_kex(struct rvu *rvu, int blkaddr, u8 intf) masked_cfg = cfg & NPC_EXACT_NIBBLE; bitnr = NPC_EXACT_NIBBLE_START; for_each_set_bit_from(bitnr, (unsigned long *)&masked_cfg, NPC_EXACT_NIBBLE_END + 1) { - npc_scan_exact_result(mcam, bitnr, key_nibble, intf); + npc_scan_exact_result(rvu, mcam, bitnr, key_nibble, intf); key_nibble++; } + if (is_cn20k(rvu->pdev)) + goto skip_cn10k_config; + /* Scan and note how layer data is going to be in key */ for (lid = 0; lid < NPC_MAX_LID; lid++) { for (lt = 0; lt < NPC_MAX_LT; lt++) { @@ -676,6 +815,19 @@ static int npc_scan_kex(struct rvu *rvu, int blkaddr, u8 intf) } return 0; + +skip_cn10k_config: + for (extr = 0 ; extr < rvu->hw->npc_kex_extr; extr++) { + lid = CN20K_GET_EXTR_LID(intf, extr); + for (lt = 0; lt < NPC_MAX_LT; lt++) { + cfg = CN20K_GET_EXTR_LT(intf, extr, lt); + if (!FIELD_GET(NPC_LDATA_EN, cfg)) + continue; + npc_scan_ldata(rvu, blkaddr, lid, lt, cfg, + intf); + } + } + return 0; } static int npc_scan_verify_kex(struct rvu *rvu, int blkaddr) @@ -758,8 +910,8 @@ void npc_update_entry(struct rvu *rvu, enum key_fields type, struct mcam_entry dummy = { {0} }; struct npc_key_field *field; u64 kw1, kw2, kw3; + int i, max_kw; u8 shift; - int i; field = &mcam->rx_key_fields[type]; if (is_npc_intf_tx(intf)) @@ -768,7 +920,12 @@ void npc_update_entry(struct rvu *rvu, enum key_fields type, if (!field->nr_kws) return; - for (i = 0; i < NPC_MAX_KWS_IN_KEY; i++) { + if (is_cn20k(rvu->pdev)) + max_kw = NPC_MAX_KWS_IN_KEY; + else + max_kw = NPC_MAX_KWS_IN_KEY - 1; + + for (i = 0; i < max_kw; i++) { if (!field->kw_mask[i]) continue; /* place key value in kw[x] */ @@ -820,7 +977,7 @@ void npc_update_entry(struct rvu *rvu, enum key_fields type, /* dummy is ready with values and masks for given key * field now clear and update input entry with those */ - for (i = 0; i < NPC_MAX_KWS_IN_KEY; i++) { + for (i = 0; i < max_kw; i++) { if (!field->kw_mask[i]) continue; entry->kw[i] &= ~field->kw_mask[i]; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c index 906d712cef19..beebcd6aa3d8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c @@ -125,6 +125,9 @@ static void npc_program_mkex_hash_rx(struct rvu *rvu, int blkaddr, struct npc_mcam_kex_hash *mkex_hash = rvu->kpu.mkex_hash; int lid, lt, ld, hash_cnt = 0; + if (is_cn20k(rvu->pdev)) + return; + if (is_npc_intf_tx(intf)) return; @@ -165,6 +168,9 @@ static void npc_program_mkex_hash_tx(struct rvu *rvu, int blkaddr, struct npc_mcam_kex_hash *mkex_hash = rvu->kpu.mkex_hash; int lid, lt, ld, hash_cnt = 0; + if (is_cn20k(rvu->pdev)) + return; + if (is_npc_intf_rx(intf)) return; @@ -224,6 +230,9 @@ void npc_program_mkex_hash(struct rvu *rvu, int blkaddr) struct rvu_hwinfo *hw = rvu->hw; u64 cfg; + if (is_cn20k(rvu->pdev)) + return; + /* Check if hardware supports hash extraction */ if (!hwcap->npc_hash_extract) return; @@ -288,6 +297,9 @@ void npc_update_field_hash(struct rvu *rvu, u8 intf, u32 field_hash; u8 hash_idx; + if (is_cn20k(rvu->pdev)) + return; + if (!rvu->hw->cap.npc_hash_extract) { dev_dbg(rvu->dev, "%s: Field hash extract feature is not supported\n", __func__); return; @@ -1874,6 +1886,9 @@ int rvu_npc_exact_init(struct rvu *rvu) u64 cfg; bool rc; + if (is_cn20k(rvu->pdev)) + return 0; + /* Read NPC_AF_CONST3 and check for have exact * match functionality is present */ From 09d3b7a1403f018331afb42e16404942d831b029 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Tue, 24 Feb 2026 13:30:01 +0530 Subject: [PATCH 0107/1561] octeontx2-af: npc: cn20k: Allocate default MCAM indexes Reserving MCAM entries in the AF driver for installing default MCAM entries is not an efficient allocation method, as it results in significant wastage of entries. This patch allocates MCAM indexes for promiscuous, multicast, broadcast, and unicast traffic in descending order of indexes (from lower to higher priority) when the NIX LF is attached to the PF/VF. Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260224080009.4147301-6-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- .../ethernet/marvell/octeontx2/af/cn20k/npc.c | 361 ++++++++++++++++++ .../ethernet/marvell/octeontx2/af/cn20k/npc.h | 29 ++ .../net/ethernet/marvell/octeontx2/af/mbox.h | 8 +- .../net/ethernet/marvell/octeontx2/af/rvu.c | 32 +- .../net/ethernet/marvell/octeontx2/af/rvu.h | 1 + .../ethernet/marvell/octeontx2/af/rvu_npc.c | 36 +- .../marvell/octeontx2/nic/otx2_flows.c | 2 +- 7 files changed, 447 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c index 41164b65085f..a15d408b0fa9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c @@ -25,10 +25,28 @@ static const char *npc_kw_name[NPC_MCAM_KEY_MAX] = { [NPC_MCAM_KEY_X4] = "X4", }; +static const char *npc_dft_rule_name[NPC_DFT_RULE_MAX_ID] = { + [NPC_DFT_RULE_PROMISC_ID] = "Promisc", + [NPC_DFT_RULE_MCAST_ID] = "Mcast", + [NPC_DFT_RULE_BCAST_ID] = "Bcast", + [NPC_DFT_RULE_UCAST_ID] = "Ucast", +}; + #define KEX_EXTR_CFG(bytesm1, hdr_ofs, ena, key_ofs) \ (((bytesm1) << 16) | ((hdr_ofs) << 8) | ((ena) << 7) | \ ((key_ofs) & 0x3F)) +#define NPC_DFT_RULE_ID_MK(pcifunc, id) \ + ((pcifunc) | FIELD_PREP(GENMASK_ULL(31, 16), id)) + +#define NPC_DFT_RULE_ID_2_PCIFUNC(rid) \ + FIELD_GET(GENMASK_ULL(15, 0), rid) + +#define NPC_DFT_RULE_ID_2_ID(rid) \ + FIELD_GET(GENMASK_ULL(31, 16), rid) + +#define NPC_DFT_RULE_PRIO 127 + static const char cn20k_def_pfl_name[] = "default"; static struct npc_mcam_kex_extr npc_mkex_extr_default = { @@ -2380,6 +2398,346 @@ chk_vfs: return cnt; } +int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast, + u16 *mcast, u16 *promisc, u16 *ucast) +{ + u16 *ptr[4] = {promisc, mcast, bcast, ucast}; + unsigned long idx; + bool set = false; + void *val; + int i, j; + + if (!npc_priv.init_done) + return 0; + + if (is_lbk_vf(rvu, pcifunc)) { + if (!ptr[0]) + return -EINVAL; + + idx = NPC_DFT_RULE_ID_MK(pcifunc, NPC_DFT_RULE_PROMISC_ID); + val = xa_load(&npc_priv.xa_pf2dfl_rmap, idx); + if (!val) { + pr_debug("%s: Failed to find %s index for pcifunc=%#x\n", + __func__, + npc_dft_rule_name[NPC_DFT_RULE_PROMISC_ID], + pcifunc); + + *ptr[0] = USHRT_MAX; + return -ESRCH; + } + + *ptr[0] = xa_to_value(val); + return 0; + } + + if (is_vf(pcifunc)) { + if (!ptr[3]) + return -EINVAL; + + idx = NPC_DFT_RULE_ID_MK(pcifunc, NPC_DFT_RULE_UCAST_ID); + val = xa_load(&npc_priv.xa_pf2dfl_rmap, idx); + if (!val) { + pr_debug("%s: Failed to find %s index for pcifunc=%#x\n", + __func__, + npc_dft_rule_name[NPC_DFT_RULE_UCAST_ID], + pcifunc); + + *ptr[3] = USHRT_MAX; + return -ESRCH; + } + + *ptr[3] = xa_to_value(val); + return 0; + } + + for (i = NPC_DFT_RULE_START_ID, j = 0; i < NPC_DFT_RULE_MAX_ID; i++, + j++) { + if (!ptr[j]) + continue; + + idx = NPC_DFT_RULE_ID_MK(pcifunc, i); + val = xa_load(&npc_priv.xa_pf2dfl_rmap, idx); + if (!val) { + pr_debug("%s: Failed to find %s index for pcifunc=%#x\n", + __func__, + npc_dft_rule_name[i], pcifunc); + + *ptr[j] = USHRT_MAX; + continue; + } + + *ptr[j] = xa_to_value(val); + set = true; + } + + return set ? 0 : -ESRCH; +} + +static bool npc_is_cgx_or_lbk(struct rvu *rvu, u16 pcifunc) +{ + return is_pf_cgxmapped(rvu, rvu_get_pf(rvu->pdev, pcifunc)) || + is_lbk_vf(rvu, pcifunc); +} + +void npc_cn20k_dft_rules_free(struct rvu *rvu, u16 pcifunc) +{ + struct npc_mcam_free_entry_req free_req = { 0 }; + unsigned long index; + struct msg_rsp rsp; + u16 ptr[4]; + int rc, i; + void *map; + + if (!npc_priv.init_done) + return; + + if (!npc_is_cgx_or_lbk(rvu, pcifunc)) { + dev_dbg(rvu->dev, + "%s: dft rule allocation is only for cgx mapped device, pcifunc=%#x\n", + __func__, pcifunc); + return; + } + + rc = npc_cn20k_dft_rules_idx_get(rvu, pcifunc, &ptr[0], &ptr[1], + &ptr[2], &ptr[3]); + if (rc) + return; + + /* LBK */ + if (is_lbk_vf(rvu, pcifunc)) { + index = NPC_DFT_RULE_ID_MK(pcifunc, NPC_DFT_RULE_PROMISC_ID); + map = xa_erase(&npc_priv.xa_pf2dfl_rmap, index); + if (!map) + dev_dbg(rvu->dev, + "%s: Err from delete %s mcam idx from xarray (pcifunc=%#x\n", + __func__, + npc_dft_rule_name[NPC_DFT_RULE_PROMISC_ID], + pcifunc); + + goto free_rules; + } + + /* VF */ + if (is_vf(pcifunc)) { + index = NPC_DFT_RULE_ID_MK(pcifunc, NPC_DFT_RULE_UCAST_ID); + map = xa_erase(&npc_priv.xa_pf2dfl_rmap, index); + if (!map) + dev_dbg(rvu->dev, + "%s: Err from delete %s mcam idx from xarray (pcifunc=%#x\n", + __func__, + npc_dft_rule_name[NPC_DFT_RULE_UCAST_ID], + pcifunc); + + goto free_rules; + } + + /* PF */ + for (i = NPC_DFT_RULE_START_ID; i < NPC_DFT_RULE_MAX_ID; i++) { + index = NPC_DFT_RULE_ID_MK(pcifunc, i); + map = xa_erase(&npc_priv.xa_pf2dfl_rmap, index); + if (!map) + dev_dbg(rvu->dev, + "%s: Err from delete %s mcam idx from xarray (pcifunc=%#x\n", + __func__, npc_dft_rule_name[i], + pcifunc); + } + +free_rules: + + free_req.hdr.pcifunc = pcifunc; + free_req.all = 1; + rc = rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &rsp); + if (rc) + dev_err(rvu->dev, + "%s: Error deleting default entries (pcifunc=%#x\n", + __func__, pcifunc); +} + +int npc_cn20k_dft_rules_alloc(struct rvu *rvu, u16 pcifunc) +{ + struct npc_mcam_free_entry_req free_req = { 0 }; + u16 mcam_idx[4] = { 0 }, pf_ucast, pf_pcifunc; + struct npc_mcam_alloc_entry_req req = { 0 }; + struct npc_mcam_alloc_entry_rsp rsp = { 0 }; + int ret, eidx, i, k, pf, cnt; + struct rvu_pfvf *pfvf; + unsigned long index; + struct msg_rsp free_rsp; + u16 b, m, p, u; + + if (!npc_priv.init_done) + return 0; + + if (!npc_is_cgx_or_lbk(rvu, pcifunc)) { + dev_dbg(rvu->dev, + "%s: dft rule allocation is only for cgx mapped device, pcifunc=%#x\n", + __func__, pcifunc); + return 0; + } + + /* Check if default rules are already alloced for this pcifunc */ + ret = npc_cn20k_dft_rules_idx_get(rvu, pcifunc, &b, &m, &p, &u); + if (!ret) { + dev_dbg(rvu->dev, + "%s: default rules are already installed (pcifunc=%#x)\n", + __func__, pcifunc); + dev_dbg(rvu->dev, + "%s: bcast(%u) mcast(%u) promisc(%u) ucast(%u)\n", + __func__, b, m, p, u); + return 0; + } + + /* Set ref index as lowest priority index */ + eidx = 2 * npc_priv.bank_depth - 1; + + /* Install only UCAST for VF */ + cnt = is_vf(pcifunc) ? 1 : ARRAY_SIZE(mcam_idx); + + /* For VF pcifunc, allocate default mcam indexes by taking + * ref as PF's ucast index. + */ + if (is_vf(pcifunc)) { + pf = rvu_get_pf(rvu->pdev, pcifunc); + pf_pcifunc = pf << RVU_CN20K_PFVF_PF_SHIFT; + + /* Get PF's ucast entry index */ + ret = npc_cn20k_dft_rules_idx_get(rvu, pf_pcifunc, NULL, + NULL, NULL, &pf_ucast); + + /* There is no PF rules installed; and VF installation comes + * first. PF may come later. + * TODO: Install PF rules before installing VF rules. + */ + + /* Set PF's ucast as ref entry */ + if (!ret) + eidx = pf_ucast; + } + + pfvf = rvu_get_pfvf(rvu, pcifunc); + pfvf->hw_prio = NPC_DFT_RULE_PRIO; + + req.contig = false; + req.ref_prio = NPC_MCAM_HIGHER_PRIO; + req.ref_entry = eidx; + req.kw_type = NPC_MCAM_KEY_X2; + req.count = cnt; + req.hdr.pcifunc = pcifunc; + + ret = rvu_mbox_handler_npc_mcam_alloc_entry(rvu, &req, &rsp); + + /* successfully allocated index */ + if (!ret) { + /* Copy indexes to local array */ + for (i = 0; i < cnt; i++) + mcam_idx[i] = rsp.entry_list[i]; + + goto chk_sanity; + } + + /* If there is no slots available and request is for PF, + * return error. + */ + if (!is_vf(pcifunc)) { + dev_err(rvu->dev, + "%s: Default index allocation failed for pcifunc=%#x\n", + __func__, pcifunc); + return ret; + } + + /* We could not find an index with higher priority index for VF. + * Find rule with lower priority index and set hardware priority + * as NPC_DFT_RULE_PRIO - 1 (higher hw priority) + */ + req.contig = false; + req.kw_type = NPC_MCAM_KEY_X2; + req.count = cnt; + req.hdr.pcifunc = pcifunc; + req.ref_prio = NPC_MCAM_LOWER_PRIO; + req.ref_entry = eidx + 1; + ret = rvu_mbox_handler_npc_mcam_alloc_entry(rvu, &req, &rsp); + if (ret) { + dev_err(rvu->dev, + "%s: Default index allocation failed for pcifunc=%#x\n", + __func__, pcifunc); + return ret; + } + + /* Copy indexes to local array */ + for (i = 0; i < cnt; i++) + mcam_idx[i] = rsp.entry_list[i]; + + pfvf->hw_prio = NPC_DFT_RULE_PRIO - 1; + +chk_sanity: + /* LBK */ + if (is_lbk_vf(rvu, pcifunc)) { + index = NPC_DFT_RULE_ID_MK(pcifunc, NPC_DFT_RULE_PROMISC_ID); + ret = xa_insert(&npc_priv.xa_pf2dfl_rmap, index, + xa_mk_value(mcam_idx[0]), GFP_KERNEL); + if (ret) { + dev_err(rvu->dev, + "%s: Err to insert %s mcam idx to xarray pcifunc=%#x\n", + __func__, + npc_dft_rule_name[NPC_DFT_RULE_PROMISC_ID], + pcifunc); + goto err; + } + + goto done; + } + + /* VF */ + if (is_vf(pcifunc)) { + index = NPC_DFT_RULE_ID_MK(pcifunc, NPC_DFT_RULE_UCAST_ID); + ret = xa_insert(&npc_priv.xa_pf2dfl_rmap, index, + xa_mk_value(mcam_idx[0]), GFP_KERNEL); + if (ret) { + dev_err(rvu->dev, + "%s: Err to insert %s mcam idx to xarray pcifunc=%#x\n", + __func__, + npc_dft_rule_name[NPC_DFT_RULE_UCAST_ID], + pcifunc); + goto err; + } + + goto done; + } + + /* PF */ + for (i = NPC_DFT_RULE_START_ID, k = 0; i < NPC_DFT_RULE_MAX_ID && + k < cnt; i++, k++) { + index = NPC_DFT_RULE_ID_MK(pcifunc, i); + ret = xa_insert(&npc_priv.xa_pf2dfl_rmap, index, + xa_mk_value(mcam_idx[k]), GFP_KERNEL); + if (ret) { + dev_err(rvu->dev, + "%s: Err to insert %s mcam idx to xarray pcifunc=%#x\n", + __func__, npc_dft_rule_name[i], + pcifunc); + for (int p = NPC_DFT_RULE_START_ID; p < i; p++) { + index = NPC_DFT_RULE_ID_MK(pcifunc, p); + xa_erase(&npc_priv.xa_pf2dfl_rmap, index); + } + goto err; + } + } + +done: + return 0; + +err: + free_req.hdr.pcifunc = pcifunc; + free_req.all = 1; + ret = rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &free_rsp); + if (ret) + dev_err(rvu->dev, + "%s: Error deleting default entries (pcifunc=%#x\n", + __func__, pcifunc); + + return -EFAULT; +} + static int npc_priv_init(struct rvu *rvu) { struct npc_mcam *mcam = &rvu->hw->mcam; @@ -2442,6 +2800,7 @@ static int npc_priv_init(struct rvu *rvu) xa_init_flags(&npc_priv.xa_sb_free, XA_FLAGS_ALLOC); xa_init_flags(&npc_priv.xa_idx2pf_map, XA_FLAGS_ALLOC); xa_init_flags(&npc_priv.xa_pf_map, XA_FLAGS_ALLOC); + xa_init_flags(&npc_priv.xa_pf2dfl_rmap, XA_FLAGS_ALLOC); if (npc_create_srch_order(num_subbanks)) goto fail1; @@ -2474,6 +2833,7 @@ fail1: xa_destroy(&npc_priv.xa_sb_free); xa_destroy(&npc_priv.xa_idx2pf_map); xa_destroy(&npc_priv.xa_pf_map); + xa_destroy(&npc_priv.xa_pf2dfl_rmap); kfree(npc_priv.sb); npc_priv.sb = NULL; return -ENOMEM; @@ -2487,6 +2847,7 @@ void npc_cn20k_deinit(struct rvu *rvu) xa_destroy(&npc_priv.xa_sb_free); xa_destroy(&npc_priv.xa_idx2pf_map); xa_destroy(&npc_priv.xa_pf_map); + xa_destroy(&npc_priv.xa_pf2dfl_rmap); for (i = 0; i < npc_priv.pf_cnt; i++) xa_destroy(&npc_priv.xa_pf2idx_map[i]); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h index 2ed59b3955f4..c4a20f1a5f12 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h @@ -95,6 +95,27 @@ enum npc_subbank_flag { NPC_SUBBANK_FLAG_USED = BIT(1), }; +/** + * enum npc_dft_rule_id - Default rule type + * + * Mcam default rule type. + * + * @NPC_DFT_RULE_START_ID: Not used + * @NPC_DFT_RULE_PROMISC_ID: promiscuous rule + * @NPC_DFT_RULE_MCAST_ID: multicast rule + * @NPC_DFT_RULE_BCAST_ID: broadcast rule + * @NPC_DFT_RULE_UCAST_ID: unicast rule + * @NPC_DFT_RULE_MAX_ID: Maximum index. + */ +enum npc_dft_rule_id { + NPC_DFT_RULE_START_ID = 1, + NPC_DFT_RULE_PROMISC_ID = NPC_DFT_RULE_START_ID, + NPC_DFT_RULE_MCAST_ID, + NPC_DFT_RULE_BCAST_ID, + NPC_DFT_RULE_UCAST_ID, + NPC_DFT_RULE_MAX_ID, +}; + /** * struct npc_subbank - Subbank fields. * @b0b: Subbanks bottom index for bank0 @@ -141,6 +162,7 @@ struct npc_subbank { * @xa_pf_map: Pcifunc to index map. * @pf_cnt: Number of PFs. * @init_done: Indicates MCAM initialization is done. + * @xa_pf2dfl_rmap: PF to default rule index map. * * This structure is populated during probing time by reading * HW csr registers. @@ -157,6 +179,7 @@ struct npc_priv_t { struct xarray *xa_pf2idx_map; struct xarray xa_idx2pf_map; struct xarray xa_pf_map; + struct xarray xa_pf2dfl_rmap; int pf_cnt; bool init_done; }; @@ -265,4 +288,10 @@ int npc_cn20k_apply_custom_kpu(struct rvu *rvu, void npc_cn20k_update_action_entries_n_flags(struct rvu *rvu, struct npc_kpu_profile_adapter *pfl); + +int npc_cn20k_dft_rules_alloc(struct rvu *rvu, u16 pcifunc); +void npc_cn20k_dft_rules_free(struct rvu *rvu, u16 pcifunc); + +int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast, + u16 *mcast, u16 *promisc, u16 *ucast); #endif /* NPC_CN20K_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index a393bf884fd6..1b7f1103f78f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1539,9 +1539,10 @@ struct npc_mcam_alloc_entry_req { #define NPC_MCAM_ANY_PRIO 0 #define NPC_MCAM_LOWER_PRIO 1 #define NPC_MCAM_HIGHER_PRIO 2 - u8 priority; /* Lower or higher w.r.t ref_entry */ + u8 ref_prio; /* Lower or higher w.r.t ref_entry */ u16 ref_entry; u16 count; /* Number of entries requested */ + u8 kw_type; /* entry key type, valid for cn20k */ }; struct npc_mcam_alloc_entry_rsp { @@ -1634,10 +1635,12 @@ struct npc_mcam_alloc_and_write_entry_req { struct mbox_msghdr hdr; struct mcam_entry entry_data; u16 ref_entry; - u8 priority; /* Lower or higher w.r.t ref_entry */ + u8 ref_prio; /* Lower or higher w.r.t ref_entry */ u8 intf; /* Rx or Tx interface */ u8 enable_entry;/* Enable this MCAM entry ? */ u8 alloc_cntr; /* Allocate counter and map ? */ + /* hardware priority, supported for cn20k */ + u8 hw_prio; }; struct npc_mcam_alloc_and_write_entry_rsp { @@ -1790,6 +1793,7 @@ struct npc_install_flow_req { u8 vtag1_op; /* old counter value */ u16 cntr_val; + u8 hw_prio; }; struct npc_install_flow_rsp { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 5b1129558e8b..40591134cb1c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -22,6 +22,7 @@ #include "rvu_npc_hash.h" #include "cn20k/reg.h" #include "cn20k/api.h" +#include "cn20k/npc.h" #define DRV_NAME "rvu_af" #define DRV_STRING "Marvell OcteonTX2 RVU Admin Function Driver" @@ -1467,6 +1468,13 @@ static int rvu_detach_rsrcs(struct rvu *rvu, struct rsrc_detach *detach, else if ((blkid == BLKADDR_CPT1) && !detach->cptlfs) continue; } + + if (detach_all || + (detach && (blkid == BLKADDR_NIX0 || + blkid == BLKADDR_NIX1) && + detach->nixlf)) + npc_cn20k_dft_rules_free(rvu, pcifunc); + rvu_detach_block(rvu, pcifunc, block->type); } @@ -1750,6 +1758,12 @@ int rvu_mbox_handler_attach_resources(struct rvu *rvu, err = rvu_attach_block(rvu, pcifunc, BLKTYPE_NIX, 1, attach); if (err) goto fail2; + + if (is_cn20k(rvu->pdev)) { + err = npc_cn20k_dft_rules_alloc(rvu, pcifunc); + if (err) + goto fail3; + } } if (attach->sso) { @@ -1763,7 +1777,7 @@ int rvu_mbox_handler_attach_resources(struct rvu *rvu, err = rvu_attach_block(rvu, pcifunc, BLKTYPE_SSO, attach->sso, attach); if (err) - goto fail3; + goto fail4; } if (attach->ssow) { @@ -1772,7 +1786,7 @@ int rvu_mbox_handler_attach_resources(struct rvu *rvu, err = rvu_attach_block(rvu, pcifunc, BLKTYPE_SSOW, attach->ssow, attach); if (err) - goto fail4; + goto fail5; } if (attach->timlfs) { @@ -1781,7 +1795,7 @@ int rvu_mbox_handler_attach_resources(struct rvu *rvu, err = rvu_attach_block(rvu, pcifunc, BLKTYPE_TIM, attach->timlfs, attach); if (err) - goto fail5; + goto fail6; } if (attach->cptlfs) { @@ -1791,24 +1805,28 @@ int rvu_mbox_handler_attach_resources(struct rvu *rvu, err = rvu_attach_block(rvu, pcifunc, BLKTYPE_CPT, attach->cptlfs, attach); if (err) - goto fail6; + goto fail7; } mutex_unlock(&rvu->rsrc_lock); return 0; -fail6: +fail7: if (attach->timlfs) rvu_detach_block(rvu, pcifunc, BLKTYPE_TIM); -fail5: +fail6: if (attach->ssow) rvu_detach_block(rvu, pcifunc, BLKTYPE_SSOW); -fail4: +fail5: if (attach->sso) rvu_detach_block(rvu, pcifunc, BLKTYPE_SSO); +fail4: + if (is_cn20k(rvu->pdev)) + npc_cn20k_dft_rules_free(rvu, pcifunc); + fail3: if (attach->nixlf) rvu_detach_block(rvu, pcifunc, BLKTYPE_NIX); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index dd930aa05582..d2a0f6821cf9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -308,6 +308,7 @@ struct rvu_pfvf { u64 lmt_map_ent_w1; /* Preseving the word1 of lmtst map table entry*/ unsigned long flags; struct sdp_node_info *sdp_info; + u8 hw_prio; /* Hw priority of default rules */ }; enum rvu_pfvf_flags { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 3e4f4fee791a..bdb11f72a08b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -653,6 +653,9 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, req.match_id = action.match_id; req.flow_key_alg = action.flow_key_alg; + if (is_cn20k(rvu->pdev)) + req.hw_prio = pfvf->hw_prio; + rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); } @@ -741,6 +744,9 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, req.match_id = action.match_id; req.flow_key_alg = flow_key_alg; + if (is_cn20k(rvu->pdev)) + req.hw_prio = pfvf->hw_prio; + rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); } @@ -821,6 +827,9 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, req.hdr.pcifunc = 0; /* AF is requester */ req.vf = pcifunc; + if (is_cn20k(rvu->pdev)) + req.hw_prio = pfvf->hw_prio; + rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); } @@ -909,6 +918,9 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, req.match_id = action.match_id; req.flow_key_alg = flow_key_alg; + if (is_cn20k(rvu->pdev)) + req.hw_prio = pfvf->hw_prio; + rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); } @@ -2491,7 +2503,7 @@ npc_get_mcam_search_range_priority(struct npc_mcam *mcam, { u16 fcnt; - if (req->priority == NPC_MCAM_HIGHER_PRIO) + if (req->ref_prio == NPC_MCAM_HIGHER_PRIO) goto hprio; /* For a low priority entry allocation @@ -2591,7 +2603,7 @@ static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc, goto lprio_alloc; /* Get the search range for priority allocation request */ - if (req->priority) { + if (req->ref_prio) { npc_get_mcam_search_range_priority(mcam, req, &start, &end, &reverse); goto alloc; @@ -2632,11 +2644,11 @@ lprio_alloc: * and not in mid zone. */ if (!(pcifunc & RVU_PFVF_FUNC_MASK) && - req->priority == NPC_MCAM_HIGHER_PRIO) + req->ref_prio == NPC_MCAM_HIGHER_PRIO) end = req->ref_entry; if (!(pcifunc & RVU_PFVF_FUNC_MASK) && - req->priority == NPC_MCAM_LOWER_PRIO) + req->ref_prio == NPC_MCAM_LOWER_PRIO) start = req->ref_entry; } @@ -2685,7 +2697,7 @@ alloc: /* If allocating requested no of entries is unsucessful, * expand the search range to full bitmap length and retry. */ - if (!req->priority && (rsp->count < req->count) && + if (!req->ref_prio && rsp->count < req->count && ((end - start) != mcam->bmap_entries)) { reverse = true; start = 0; @@ -2696,14 +2708,14 @@ alloc: /* For priority entry allocation requests, if allocation is * failed then expand search to max possible range and retry. */ - if (req->priority && rsp->count < req->count) { - if (req->priority == NPC_MCAM_LOWER_PRIO && + if (req->ref_prio && rsp->count < req->count) { + if (req->ref_prio == NPC_MCAM_LOWER_PRIO && (start != (req->ref_entry + 1))) { start = req->ref_entry + 1; end = mcam->bmap_entries; reverse = false; goto alloc; - } else if ((req->priority == NPC_MCAM_HIGHER_PRIO) && + } else if ((req->ref_prio == NPC_MCAM_HIGHER_PRIO) && ((end - start) != req->ref_entry)) { start = 0; end = req->ref_entry; @@ -2817,9 +2829,9 @@ int rvu_mbox_handler_npc_mcam_alloc_entry(struct rvu *rvu, /* ref_entry can't be '0' if requested priority is high. * Can't be last entry if requested priority is low. */ - if ((!req->ref_entry && req->priority == NPC_MCAM_HIGHER_PRIO) || - ((req->ref_entry == mcam->bmap_entries) && - req->priority == NPC_MCAM_LOWER_PRIO)) + if ((!req->ref_entry && req->ref_prio == NPC_MCAM_HIGHER_PRIO) || + (req->ref_entry == mcam->bmap_entries && + req->ref_prio == NPC_MCAM_LOWER_PRIO)) return NPC_MCAM_INVALID_REQ; /* Since list of allocated indices needs to be sent to requester, @@ -3365,7 +3377,7 @@ int rvu_mbox_handler_npc_mcam_alloc_and_write_entry(struct rvu *rvu, /* Try to allocate a MCAM entry */ entry_req.hdr.pcifunc = req->hdr.pcifunc; entry_req.contig = true; - entry_req.priority = req->priority; + entry_req.ref_prio = req->ref_prio; entry_req.ref_entry = req->ref_entry; entry_req.count = 1; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c index 12c001ee34e2..531f6adba45f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c @@ -104,7 +104,7 @@ int otx2_alloc_mcam_entries(struct otx2_nic *pfvf, u16 count) * will be on top of PF. */ if (!is_otx2_vf(pfvf->pcifunc)) { - req->priority = NPC_MCAM_HIGHER_PRIO; + req->ref_prio = NPC_MCAM_HIGHER_PRIO; req->ref_entry = flow_cfg->def_ent[0]; } From 6d1e70282f7607948085a510ea03b9a10b5571f7 Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Tue, 24 Feb 2026 13:30:02 +0530 Subject: [PATCH 0108/1561] octeontx2-af: npc: cn20k: Use common APIs In cn20k silicon, the register definitions and the algorithms used to read, write, copy, and enable MCAM entries have changed. This patch updates the common APIs to support both cn20k and previous silicon variants. Additionally, cn20k introduces a new algorithm for MCAM index management. The common APIs are updated to invoke the cn20k-specific index management routines for allocating, freeing, and retrieving default MCAM entries. Signed-off-by: Suman Ghosh Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260224080009.4147301-7-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- .../ethernet/marvell/octeontx2/af/cn20k/npc.c | 503 +++++++++++++++++- .../ethernet/marvell/octeontx2/af/cn20k/npc.h | 15 + .../net/ethernet/marvell/octeontx2/af/mbox.h | 3 + .../net/ethernet/marvell/octeontx2/af/rvu.h | 4 +- .../ethernet/marvell/octeontx2/af/rvu_nix.c | 1 - .../ethernet/marvell/octeontx2/af/rvu_npc.c | 348 +++++++++--- .../marvell/octeontx2/af/rvu_npc_fs.c | 11 +- 7 files changed, 795 insertions(+), 90 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c index a15d408b0fa9..cb8a37a2756a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c @@ -568,6 +568,472 @@ program_mkex_extr: iounmap(mkex_prfl_addr); } +void +npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, + int index, bool enable) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + int mcam_idx = index % mcam->banksize; + int bank = index / mcam->banksize; + u64 cfg, hw_prio; + u8 kw_type; + + npc_mcam_idx_2_key_type(rvu, index, &kw_type); + if (kw_type == NPC_MCAM_KEY_X2) { + cfg = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, + bank)); + hw_prio = cfg & GENMASK_ULL(14, 8); + cfg = enable ? 1 : 0; + cfg |= hw_prio; + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank), + cfg); + return; + } + + /* For NPC_CN20K_MCAM_KEY_X4 keys, both the banks + * need to be programmed with the same value. + */ + for (bank = 0; bank < mcam->banks_per_entry; bank++) { + cfg = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, + bank)); + hw_prio = cfg & GENMASK_ULL(14, 8); + cfg = enable ? 1 : 0; + cfg |= hw_prio; + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank), + cfg); + } +} + +void +npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, int bank, int index) +{ + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_INTF_EXT(index, bank, 1), + 0); + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_INTF_EXT(index, bank, 0), + 0); + + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W0_EXT(index, bank, 1), 0); + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W0_EXT(index, bank, 0), 0); + + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W1_EXT(index, bank, 1), 0); + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W1_EXT(index, bank, 0), 0); + + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W2_EXT(index, bank, 1), 0); + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W2_EXT(index, bank, 0), 0); + + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W3_EXT(index, bank, 1), 0); + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W3_EXT(index, bank, 0), 0); +} + +static void npc_cn20k_get_keyword(struct mcam_entry *entry, int idx, + u64 *cam0, u64 *cam1) +{ + u64 kw_mask; + + /* The two banks of every MCAM entry are used as a single double-wide + * entry that is compared with the search key as follows: + * + * NPC_AF_MCAME()_BANK(0)_CAM(0..1)_W0_EXT[MD] ->NPC_MCAM_KEY_X4_S[KW0] + * NPC_AF_MCAME()_BANK(0)_CAM(0..1)_W1_EXT[MD] ->NPC_MCAM_KEY_X4_S[KW1] + * NPC_AF_MCAME()_BANK(0)_CAM(0..1)_W2_EXT[MD] ->NPC_MCAM_KEY_X4_S[KW2] + * NPC_AF_MCAME()_BANK(0)_CAM(0..1)_W3_EXT[MD] ->NPC_MCAM_KEY_X4_S[KW3] + * NPC_AF_MCAME()_BANK(1)_CAM(0..1)_W0_EXT[MD] ->NPC_MCAM_KEY_X4_S[KW4] + * NPC_AF_MCAME()_BANK(1)_CAM(0..1)_W1_EXT[MD] ->NPC_MCAM_KEY_X4_S[KW5] + * NPC_AF_MCAME()_BANK(1)_CAM(0..1)_W2_EXT[MD] ->NPC_MCAM_KEY_X4_S[KW6] + * NPC_AF_MCAME()_BANK(1)_CAM(0..1)_W3_EXT[MD] ->NPC_MCAM_KEY_X4_S[KW7] + */ + *cam1 = entry->kw[idx]; + kw_mask = entry->kw_mask[idx]; + *cam1 &= kw_mask; + *cam0 = ~*cam1 & kw_mask; +} + +/*-----------------------------------------------------------------------------| + *Kex type | mcam entry | cam1 | cam 0 || <----- output ----> | + *profile | len | (key type) | (key type) || len | type | + *-----------------------------------------------------------------------------| + *X2 | 256 (X2) | 001b | 110b || X2 | X2 | + *-----------------------------------------------------------------------------| + *X4 | 256 (X2) | 000b | 000b || X2 | DYNAMIC | + *-----------------------------------------------------------------------------| + *X4 | 512 (X4) | 010b | 101b || X4 | X4 | + *-----------------------------------------------------------------------------| + *DYN | 256 (X2) | 000b | 000b || X2 | DYNAMIC | + *-----------------------------------------------------------------------------| + *DYN | 512 (X4) | 010b | 101b || X4 | X4 | + *-----------------------------------------------------------------------------| + */ +static void npc_cn20k_config_kw_x2(struct rvu *rvu, struct npc_mcam *mcam, + int blkaddr, int index, u8 intf, + struct mcam_entry *entry, + int bank, u8 kw_type, int kw) +{ + u64 intf_ext = 0, intf_ext_mask = 0; + u8 tx_intf_mask = ~intf & 0x3; + u8 tx_intf = intf, kex_type; + u8 kw_type_mask = ~kw_type; + u64 cam0, cam1, kex_cfg; + + if (is_npc_intf_tx(intf)) { + /* Last bit must be set and rest don't care + * for TX interfaces + */ + tx_intf_mask = 0x1; + tx_intf = intf & tx_intf_mask; + tx_intf_mask = ~tx_intf & tx_intf_mask; + } + + kex_cfg = rvu_read64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(intf)); + kex_type = (kex_cfg & GENMASK_ULL(34, 32)) >> 32; + if ((kex_type == NPC_MCAM_KEY_DYN || kex_type == NPC_MCAM_KEY_X4) && + kw_type == NPC_MCAM_KEY_X2) { + kw_type = 0; + kw_type_mask = 0; + } + + intf_ext = ((u64)kw_type << 16) | tx_intf; + intf_ext_mask = (((u64)kw_type_mask << 16) & GENMASK_ULL(18, 16)) | + tx_intf_mask; + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_INTF_EXT(index, bank, 1), + intf_ext); + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_INTF_EXT(index, bank, 0), + intf_ext_mask); + + /* Set the match key */ + npc_cn20k_get_keyword(entry, kw, &cam0, &cam1); + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W0_EXT(index, bank, 1), + cam1); + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W0_EXT(index, bank, 0), + cam0); + + npc_cn20k_get_keyword(entry, kw + 1, &cam0, &cam1); + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W1_EXT(index, bank, 1), + cam1); + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W1_EXT(index, bank, 0), + cam0); + + npc_cn20k_get_keyword(entry, kw + 2, &cam0, &cam1); + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W2_EXT(index, bank, 1), + cam1); + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W2_EXT(index, bank, 0), + cam0); + + npc_cn20k_get_keyword(entry, kw + 3, &cam0, &cam1); + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W3_EXT(index, bank, 1), + cam1); + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W3_EXT(index, bank, 0), + cam0); +} + +static void npc_cn20k_config_kw_x4(struct rvu *rvu, struct npc_mcam *mcam, + int blkaddr, int index, u8 intf, + struct mcam_entry *entry, u8 kw_type) +{ + int kw = 0, bank; + + for (bank = 0; bank < mcam->banks_per_entry; bank++, kw = kw + 4) + npc_cn20k_config_kw_x2(rvu, mcam, blkaddr, + index, intf, + entry, bank, kw_type, kw); +} + +static void +npc_cn20k_set_mcam_bank_cfg(struct rvu *rvu, int blkaddr, int mcam_idx, + int bank, u8 kw_type, bool enable, u8 hw_prio) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + u64 bank_cfg; + + bank_cfg = (u64)hw_prio << 8; + if (enable) + bank_cfg |= 0x1; + + if (kw_type == NPC_MCAM_KEY_X2) { + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank), + bank_cfg); + return; + } + + /* For NPC_MCAM_KEY_X4 keys, both the banks + * need to be programmed with the same value. + */ + for (bank = 0; bank < mcam->banks_per_entry; bank++) { + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank), + bank_cfg); + } +} + +void +npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, u8 intf, + struct mcam_entry *entry, bool enable, u8 hw_prio) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + int mcam_idx = index % mcam->banksize; + int bank = index / mcam->banksize; + int kw = 0; + u8 kw_type; + + /* Disable before mcam entry update */ + npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, false); + + npc_mcam_idx_2_key_type(rvu, index, &kw_type); + /* CAM1 takes the comparison value and + * CAM0 specifies match for a bit in key being '0' or '1' or 'dontcare'. + * CAM1 = 0 & CAM0 = 1 => match if key = 0 + * CAM1 = 1 & CAM0 = 0 => match if key = 1 + * CAM1 = 0 & CAM0 = 0 => always match i.e dontcare. + */ + if (kw_type == NPC_MCAM_KEY_X2) { + /* Clear mcam entry to avoid writes being suppressed by NPC */ + npc_cn20k_clear_mcam_entry(rvu, blkaddr, bank, mcam_idx); + npc_cn20k_config_kw_x2(rvu, mcam, blkaddr, + mcam_idx, intf, entry, + bank, kw_type, kw); + /* Set 'action' */ + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, + bank, 0), + entry->action); + + /* Set TAG 'action' */ + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, + bank, 1), + entry->vtag_action); + + goto set_cfg; + } + /* Clear mcam entry to avoid writes being suppressed by NPC */ + npc_cn20k_clear_mcam_entry(rvu, blkaddr, 0, mcam_idx); + npc_cn20k_clear_mcam_entry(rvu, blkaddr, 1, mcam_idx); + + npc_cn20k_config_kw_x4(rvu, mcam, blkaddr, + mcam_idx, intf, entry, kw_type); + for (bank = 0; bank < mcam->banks_per_entry; bank++) { + /* Set 'action' */ + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, + bank, 0), + entry->action); + + /* Set TAG 'action' */ + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, + bank, 1), + entry->vtag_action); + } + +set_cfg: + /* TODO: */ + /* PF installing VF rule */ + npc_cn20k_set_mcam_bank_cfg(rvu, blkaddr, mcam_idx, bank, + kw_type, enable, hw_prio); +} + +void npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, u16 src, u16 dest) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + u64 cfg, sreg, dreg, soff, doff; + u8 src_kwtype, dest_kwtype; + int bank, i, sb, db; + int dbank, sbank; + + dbank = npc_get_bank(mcam, dest); + sbank = npc_get_bank(mcam, src); + npc_mcam_idx_2_key_type(rvu, src, &src_kwtype); + npc_mcam_idx_2_key_type(rvu, dest, &dest_kwtype); + if (src_kwtype != dest_kwtype) + return; + + src &= (mcam->banksize - 1); + dest &= (mcam->banksize - 1); + + /* Copy INTF's, W0's, W1's, W2's, W3s CAM0 and CAM1 configuration */ + for (bank = 0; bank < mcam->banks_per_entry; bank++) { + sb = sbank + bank; + sreg = NPC_AF_CN20K_MCAMEX_BANKX_CAMX_INTF_EXT(src, sb, 0); + db = dbank + bank; + dreg = NPC_AF_CN20K_MCAMEX_BANKX_CAMX_INTF_EXT(dest, db, 0); + for (i = 0; i < 10; i++) { + cfg = rvu_read64(rvu, blkaddr, sreg + (i * 8)); + rvu_write64(rvu, blkaddr, dreg + (i * 8), cfg); + } + + /* Copy action */ + for (i = 0; i < 3; i++) { + soff = NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(src, + sb, i); + cfg = rvu_read64(rvu, blkaddr, soff); + + doff = NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(dest, db, + i); + rvu_write64(rvu, blkaddr, doff, cfg); + } + + /* Copy bank configuration */ + cfg = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(src, sb)); + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(dest, db), cfg); + if (src_kwtype == NPC_MCAM_KEY_X2) + break; + } +} + +static void npc_cn20k_fill_entryword(struct mcam_entry *entry, int idx, + u64 cam0, u64 cam1) +{ + entry->kw[idx] = cam1; + entry->kw_mask[idx] = cam1 ^ cam0; +} + +void npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, + struct mcam_entry *entry, u8 *intf, u8 *ena, + u8 *hw_prio) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + u64 cam0, cam1, bank_cfg, cfg; + int kw = 0, bank; + u8 kw_type; + + npc_mcam_idx_2_key_type(rvu, index, &kw_type); + + bank = npc_get_bank(mcam, index); + index &= (mcam->banksize - 1); + + cfg = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_INTF_EXT(index, + bank, 1)) & 3; + *intf = cfg; + + bank_cfg = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(index, bank)); + *ena = bank_cfg & 0x1; + *hw_prio = (bank_cfg & GENMASK_ULL(14, 8)) >> 8; + if (kw_type == NPC_MCAM_KEY_X2) { + cam1 = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W0_EXT(index, + bank, + 1)); + cam0 = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W0_EXT(index, + bank, + 0)); + npc_cn20k_fill_entryword(entry, kw, cam0, cam1); + + cam1 = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W1_EXT(index, + bank, + 1)); + cam0 = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W1_EXT(index, + bank, + 0)); + npc_cn20k_fill_entryword(entry, kw + 1, cam0, cam1); + + cam1 = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W2_EXT(index, + bank, + 1)); + cam0 = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W2_EXT(index, + bank, + 0)); + npc_cn20k_fill_entryword(entry, kw + 2, cam0, cam1); + + cam1 = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W3_EXT(index, + bank, + 1)); + cam0 = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W3_EXT(index, + bank, + 0)); + npc_cn20k_fill_entryword(entry, kw + 3, cam0, cam1); + goto read_action; + } + + for (bank = 0; bank < mcam->banks_per_entry; bank++, kw = kw + 4) { + cam1 = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W0_EXT(index, + bank, + 1)); + cam0 = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W0_EXT(index, + bank, + 0)); + npc_cn20k_fill_entryword(entry, kw, cam0, cam1); + + cam1 = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W1_EXT(index, + bank, + 1)); + cam0 = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W1_EXT(index, + bank, + 0)); + npc_cn20k_fill_entryword(entry, kw + 1, cam0, cam1); + + cam1 = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W2_EXT(index, + bank, + 1)); + cam0 = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W2_EXT(index, + bank, + 0)); + npc_cn20k_fill_entryword(entry, kw + 2, cam0, cam1); + + cam1 = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W3_EXT(index, + bank, + 1)); + cam0 = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W3_EXT(index, + bank, + 0)); + npc_cn20k_fill_entryword(entry, kw + 3, cam0, cam1); + } + +read_action: + /* 'action' is set to same value for both bank '0' and '1'. + * Hence, reading bank '0' should be enough. + */ + cfg = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, 0, 0)); + entry->action = cfg; + + cfg = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, 0, 1)); + entry->vtag_action = cfg; +} + static u8 npc_map2cn20k_flag(u8 flag) { switch (flag) { @@ -650,11 +1116,12 @@ npc_cn20k_update_action_entries_n_flags(struct rvu *rvu, int npc_cn20k_apply_custom_kpu(struct rvu *rvu, struct npc_kpu_profile_adapter *profile) { + size_t hdr_sz = sizeof(struct npc_cn20k_kpu_profile_fwdata); struct npc_cn20k_kpu_profile_fwdata *fw = rvu->kpu_fwdata; struct npc_kpu_profile_action *action; struct npc_kpu_profile_cam *cam; struct npc_kpu_fwdata *fw_kpu; - size_t hdr_sz, offset = 0; + size_t offset = 0; u16 kpu, entry; int entries; @@ -741,6 +1208,40 @@ int npc_cn20k_apply_custom_kpu(struct rvu *rvu, return 0; } +int npc_mcam_idx_2_key_type(struct rvu *rvu, u16 mcam_idx, u8 *key_type) +{ + struct npc_subbank *sb; + int bank_off, sb_id; + + /* mcam_idx should be less than (2 * bank depth) */ + if (mcam_idx >= npc_priv.bank_depth * 2) { + dev_err(rvu->dev, "%s:%d bad params\n", + __func__, __LINE__); + return -EINVAL; + } + + /* find mcam offset per bank */ + bank_off = mcam_idx & (npc_priv.bank_depth - 1); + + /* Find subbank id */ + sb_id = bank_off / npc_priv.subbank_depth; + + /* Check if subbank id is more than maximum + * number of subbanks available + */ + if (sb_id >= npc_priv.num_subbanks) { + dev_err(rvu->dev, "%s:%d invalid subbank %d\n", + __func__, __LINE__, sb_id); + return -EINVAL; + } + + sb = &npc_priv.sb[sb_id]; + + *key_type = sb->key_type; + + return 0; +} + static int npc_subbank_idx_2_mcam_idx(struct rvu *rvu, struct npc_subbank *sb, u16 sub_off, u16 *mcam_idx) { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h index c4a20f1a5f12..861622741174 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h @@ -294,4 +294,19 @@ void npc_cn20k_dft_rules_free(struct rvu *rvu, u16 pcifunc); int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast, u16 *mcast, u16 *promisc, u16 *ucast); + +void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, + u8 intf, struct mcam_entry *entry, + bool enable, u8 hw_prio); +void npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, + int index, bool enable); +void npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, + u16 src, u16 dest); +void npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, + struct mcam_entry *entry, u8 *intf, u8 *ena, + u8 *hw_prio); +void npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, + int bank, int index); +int npc_mcam_idx_2_key_type(struct rvu *rvu, u16 mcam_idx, u8 *key_type); + #endif /* NPC_CN20K_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 1b7f1103f78f..8aa684bd83f0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1577,6 +1577,8 @@ struct npc_mcam_write_entry_req { u8 intf; /* Rx or Tx interface */ u8 enable_entry;/* Enable this MCAM entry ? */ u8 set_cntr; /* Set counter for this entry ? */ + u8 hw_prio; /* hardware priority, valid for cn20k */ + u64 reserved; /* reserved for future use */ }; /* Enable/Disable a given entry */ @@ -1824,6 +1826,7 @@ struct npc_mcam_read_entry_rsp { struct mcam_entry entry_data; u8 intf; u8 enable; + u8 hw_prio; /* valid for cn20k */ }; /* Available entries to use */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index d2a0f6821cf9..a53bb5c924ef 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -1124,8 +1124,8 @@ int rvu_cgx_cfg_pause_frm(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause void rvu_mac_reset(struct rvu *rvu, u16 pcifunc); u32 rvu_cgx_get_lmac_fifolen(struct rvu *rvu, int cgx, int lmac); void cgx_start_linkup(struct rvu *rvu); -int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, u16 pcifunc, int nixlf, - int type); +int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, + u16 pcifunc, int nixlf, int type); bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, int index); int rvu_npc_init(struct rvu *rvu); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 98dd68137a09..badfa1d64252 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -5289,7 +5289,6 @@ int rvu_mbox_handler_nix_lf_stop_rx(struct rvu *rvu, struct msg_req *req, /* Disable the interface if it is in any multicast list */ nix_mcast_update_mce_entry(rvu, pcifunc, 0); - pfvf = rvu_get_pfvf(rvu, pcifunc); clear_bit(NIXLF_INITIALIZED, &pfvf->flags); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index bdb11f72a08b..5208cd084055 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -18,6 +18,7 @@ #include "rvu_npc_hash.h" #include "cn20k/npc.h" #include "rvu_npc.h" +#include "cn20k/reg.h" #define RSVD_MCAM_ENTRIES_PER_PF 3 /* Broadcast, Promisc and AllMulticast */ #define RSVD_MCAM_ENTRIES_PER_NIXLF 1 /* Ucast for LFs */ @@ -151,10 +152,33 @@ int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, { struct rvu_hwinfo *hw = container_of(mcam, struct rvu_hwinfo, mcam); struct rvu *rvu = hw->rvu; - int pf = rvu_get_pf(rvu->pdev, pcifunc); + u16 bcast, mcast, promisc, ucast; int index; + int rc; + int pf; + + if (is_cn20k(rvu->pdev)) { + rc = npc_cn20k_dft_rules_idx_get(rvu, pcifunc, &bcast, &mcast, + &promisc, &ucast); + if (rc) + return -EFAULT; + + switch (type) { + case NIXLF_BCAST_ENTRY: + return bcast; + case NIXLF_ALLMULTI_ENTRY: + return mcast; + case NIXLF_PROMISC_ENTRY: + return promisc; + case NIXLF_UCAST_ENTRY: + return ucast; + default: + return -EINVAL; + } + } /* Check if this is for a PF */ + pf = rvu_get_pf(rvu->pdev, pcifunc); if (pf && !(pcifunc & RVU_PFVF_FUNC_MASK)) { /* Reserved entries exclude PF0 */ pf--; @@ -175,7 +199,12 @@ int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, int npc_get_bank(struct npc_mcam *mcam, int index) { + struct rvu_hwinfo *hw = container_of(mcam, struct rvu_hwinfo, mcam); int bank = index / mcam->banksize; + struct rvu *rvu = hw->rvu; + + if (is_cn20k(rvu->pdev)) + return bank; /* 0,1 & 2,3 banks are combined for this keysize */ if (mcam->keysize == NPC_MCAM_KEY_X2) @@ -191,7 +220,14 @@ bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam, u64 cfg; index &= (mcam->banksize - 1); - cfg = rvu_read64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_CFG(index, bank)); + if (is_cn20k(rvu->pdev)) + cfg = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(index, + bank)); + else + cfg = rvu_read64(rvu, blkaddr, + NPC_AF_MCAMEX_BANKX_CFG(index, bank)); + return (cfg & 1); } @@ -201,6 +237,13 @@ void npc_enable_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, int bank = npc_get_bank(mcam, index); int actbank = bank; + if (is_cn20k(rvu->pdev)) { + if (index < 0 || index >= mcam->banksize * mcam->banks) + return; + + return npc_cn20k_enable_mcam_entry(rvu, blkaddr, index, enable); + } + index &= (mcam->banksize - 1); for (; bank < (actbank + mcam->banks_per_entry); bank++) { rvu_write64(rvu, blkaddr, @@ -369,6 +412,7 @@ static u64 npc_get_default_entry_action(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, u16 pf_func) { int bank, nixlf, index; + u64 reg; /* get ucast entry rule entry index */ if (nix_get_nixlf(rvu, pf_func, &nixlf, NULL)) { @@ -383,8 +427,12 @@ static u64 npc_get_default_entry_action(struct rvu *rvu, struct npc_mcam *mcam, bank = npc_get_bank(mcam, index); index &= (mcam->banksize - 1); - return rvu_read64(rvu, blkaddr, - NPC_AF_MCAMEX_BANKX_ACTION(index, bank)); + if (is_cn20k(rvu->pdev)) + reg = NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, bank, 0); + else + reg = NPC_AF_MCAMEX_BANKX_ACTION(index, bank); + + return rvu_read64(rvu, blkaddr, reg); } static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam, @@ -549,6 +597,9 @@ static void npc_copy_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, u64 cfg, sreg, dreg; int bank, i; + if (is_cn20k(rvu->pdev)) + return npc_cn20k_copy_mcam_entry(rvu, blkaddr, src, dest); + src &= (mcam->banksize - 1); dest &= (mcam->banksize - 1); @@ -585,20 +636,31 @@ u64 npc_get_mcam_action(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, int index) { int bank = npc_get_bank(mcam, index); + u64 reg; index &= (mcam->banksize - 1); - return rvu_read64(rvu, blkaddr, - NPC_AF_MCAMEX_BANKX_ACTION(index, bank)); + + if (is_cn20k(rvu->pdev)) + reg = NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, bank, 0); + else + reg = NPC_AF_MCAMEX_BANKX_ACTION(index, bank); + return rvu_read64(rvu, blkaddr, reg); } void npc_set_mcam_action(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, int index, u64 cfg) { int bank = npc_get_bank(mcam, index); + u64 reg; index &= (mcam->banksize - 1); - return rvu_write64(rvu, blkaddr, - NPC_AF_MCAMEX_BANKX_ACTION(index, bank), cfg); + + if (is_cn20k(rvu->pdev)) + reg = NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, bank, 0); + else + reg = NPC_AF_MCAMEX_BANKX_ACTION(index, bank); + + return rvu_write64(rvu, blkaddr, reg, cfg); } void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, @@ -954,33 +1016,42 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam, mutex_lock(&mcam->lock); for (index = 0; index < mcam->bmap_entries; index++) { - if (mcam->entry2target_pffunc[index] == pcifunc) { - update = true; - /* update not needed for the rules added via ntuple filters */ - list_for_each_entry(rule, &mcam->mcam_rules, list) { - if (rule->entry == index) - update = false; - } - if (!update) - continue; - bank = npc_get_bank(mcam, index); - actindex = index; - entry = index & (mcam->banksize - 1); + if (mcam->entry2target_pffunc[index] != pcifunc) + continue; + update = true; + /* update not needed for the rules added via ntuple filters */ + list_for_each_entry(rule, &mcam->mcam_rules, list) { + if (rule->entry == index) + update = false; + } + if (!update) + continue; + bank = npc_get_bank(mcam, index); + actindex = index; + entry = index & (mcam->banksize - 1); - /* read vf flow entry enable status */ - enable = is_mcam_entry_enabled(rvu, mcam, blkaddr, - actindex); - /* disable before mcam entry update */ - npc_enable_mcam_entry(rvu, mcam, blkaddr, actindex, - false); - /* update 'action' */ + /* read vf flow entry enable status */ + enable = is_mcam_entry_enabled(rvu, mcam, blkaddr, + actindex); + /* disable before mcam entry update */ + npc_enable_mcam_entry(rvu, mcam, blkaddr, actindex, + false); + + /* update 'action' */ + if (is_cn20k(rvu->pdev)) + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(entry, + bank, + 0), + rx_action); + else rvu_write64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_ACTION(entry, bank), rx_action); - if (enable) - npc_enable_mcam_entry(rvu, mcam, blkaddr, - actindex, true); - } + + if (enable) + npc_enable_mcam_entry(rvu, mcam, blkaddr, + actindex, true); } mutex_unlock(&mcam->lock); } @@ -993,6 +1064,7 @@ static void npc_update_rx_action_with_alg_idx(struct rvu *rvu, struct nix_rx_act struct npc_mcam *mcam = &rvu->hw->mcam; struct rvu_hwinfo *hw = rvu->hw; int bank, op_rss; + u64 reg; if (!is_mcam_entry_enabled(rvu, mcam, blkaddr, mcam_index)) return; @@ -1002,15 +1074,19 @@ static void npc_update_rx_action_with_alg_idx(struct rvu *rvu, struct nix_rx_act bank = npc_get_bank(mcam, mcam_index); mcam_index &= (mcam->banksize - 1); + if (is_cn20k(rvu->pdev)) + reg = NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_index, + bank, 0); + else + reg = NPC_AF_MCAMEX_BANKX_ACTION(mcam_index, bank); + /* If Rx action is MCAST update only RSS algorithm index */ if (!op_rss) { - *(u64 *)&action = rvu_read64(rvu, blkaddr, - NPC_AF_MCAMEX_BANKX_ACTION(mcam_index, bank)); + *(u64 *)&action = rvu_read64(rvu, blkaddr, reg); action.flow_key_alg = alg_idx; } - rvu_write64(rvu, blkaddr, - NPC_AF_MCAMEX_BANKX_ACTION(mcam_index, bank), *(u64 *)&action); + rvu_write64(rvu, blkaddr, reg, *(u64 *)&action); } void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, @@ -1020,6 +1096,7 @@ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, struct nix_rx_action action; int blkaddr, index, bank; struct rvu_pfvf *pfvf; + u64 reg; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) @@ -1042,8 +1119,12 @@ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, bank = npc_get_bank(mcam, index); index &= (mcam->banksize - 1); - *(u64 *)&action = rvu_read64(rvu, blkaddr, - NPC_AF_MCAMEX_BANKX_ACTION(index, bank)); + if (is_cn20k(rvu->pdev)) + reg = NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(index, bank, 0); + else + reg = NPC_AF_MCAMEX_BANKX_ACTION(index, bank); + + *(u64 *)&action = rvu_read64(rvu, blkaddr, reg); /* Ignore if no action was set earlier */ if (!*(u64 *)&action) return; @@ -1053,8 +1134,11 @@ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, action.index = group; action.flow_key_alg = alg_idx; - rvu_write64(rvu, blkaddr, - NPC_AF_MCAMEX_BANKX_ACTION(index, bank), *(u64 *)&action); + rvu_write64(rvu, blkaddr, reg, *(u64 *)&action); + /* update the VF flow rule action with the VF default entry action */ + if (mcam_index < 0) + npc_update_vf_flow_entry(rvu, mcam, blkaddr, pcifunc, + *(u64 *)&action); /* update the action change in default rule */ pfvf = rvu_get_pfvf(rvu, pcifunc); @@ -1873,46 +1957,56 @@ int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr) int cntr; u64 cfg; - /* Actual number of MCAM entries vary by entry size */ cfg = (rvu_read64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(0)) >> 32) & 0x07; - mcam->total_entries = (mcam->banks / BIT_ULL(cfg)) * mcam->banksize; mcam->keysize = cfg; /* Number of banks combined per MCAM entry */ if (is_cn20k(rvu->pdev)) { + /* In cn20k, x2 entries is allowed for x4 profile. + * set total_entries as 8192 * 2 and key size as x2. + */ + mcam->total_entries = mcam->banks * mcam->banksize; if (cfg == NPC_MCAM_KEY_X2) mcam->banks_per_entry = 1; else mcam->banks_per_entry = 2; + + rsvd = 0; } else { + mcam->total_entries = (mcam->banks / BIT_ULL(cfg)) * + mcam->banksize; + if (cfg == NPC_MCAM_KEY_X4) mcam->banks_per_entry = 4; else if (cfg == NPC_MCAM_KEY_X2) mcam->banks_per_entry = 2; else mcam->banks_per_entry = 1; - } - /* Reserve one MCAM entry for each of the NIX LF to - * guarantee space to install default matching DMAC rule. - * Also reserve 2 MCAM entries for each PF for default - * channel based matching or 'bcast & promisc' matching to - * support BCAST and PROMISC modes of operation for PFs. - * PF0 is excluded. - */ - rsvd = (nixlf_count * RSVD_MCAM_ENTRIES_PER_NIXLF) + - ((rvu->hw->total_pfs - 1) * RSVD_MCAM_ENTRIES_PER_PF); - if (mcam->total_entries <= rsvd) { - dev_warn(rvu->dev, - "Insufficient NPC MCAM size %d for pkt I/O, exiting\n", - mcam->total_entries); - return -ENOMEM; + /* Reserve one MCAM entry for each of the NIX LF to + * guarantee space to install default matching DMAC rule. + * Also reserve 2 MCAM entries for each PF for default + * channel based matching or 'bcast & promisc' matching to + * support BCAST and PROMISC modes of operation for PFs. + * PF0 is excluded. + */ + rsvd = (nixlf_count * RSVD_MCAM_ENTRIES_PER_NIXLF) + + ((rvu->hw->total_pfs - 1) * RSVD_MCAM_ENTRIES_PER_PF); + if (mcam->total_entries <= rsvd) { + dev_warn(rvu->dev, + "Insufficient NPC MCAM size %d for pkt I/O, exiting\n", + mcam->total_entries); + return -ENOMEM; + } } mcam->bmap_entries = mcam->total_entries - rsvd; - mcam->nixlf_offset = mcam->bmap_entries; - mcam->pf_offset = mcam->nixlf_offset + nixlf_count; + /* cn20k does not need offsets to alloc mcam entries */ + if (!is_cn20k(rvu->pdev)) { + mcam->nixlf_offset = mcam->bmap_entries; + mcam->pf_offset = mcam->nixlf_offset + nixlf_count; + } /* Allocate bitmaps for managing MCAM entries */ mcam->bmap = bitmap_zalloc(mcam->bmap_entries, GFP_KERNEL); @@ -1936,13 +2030,15 @@ int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr) * allocations and another 1/8th at the top for high priority * allocations. */ - mcam->lprio_count = mcam->bmap_entries / 8; - if (mcam->lprio_count > BITS_PER_LONG) - mcam->lprio_count = round_down(mcam->lprio_count, - BITS_PER_LONG); - mcam->lprio_start = mcam->bmap_entries - mcam->lprio_count; - mcam->hprio_count = mcam->lprio_count; - mcam->hprio_end = mcam->hprio_count; + if (!is_cn20k(rvu->pdev)) { + mcam->lprio_count = mcam->bmap_entries / 8; + if (mcam->lprio_count > BITS_PER_LONG) + mcam->lprio_count = round_down(mcam->lprio_count, + BITS_PER_LONG); + mcam->lprio_start = mcam->bmap_entries - mcam->lprio_count; + mcam->hprio_count = mcam->lprio_count; + mcam->hprio_end = mcam->hprio_count; + } /* Allocate bitmap for managing MCAM counters and memory * for saving counter to RVU PFFUNC allocation mapping. @@ -2070,24 +2166,21 @@ static void rvu_npc_setup_interfaces(struct rvu *rvu, int blkaddr) struct npc_mcam *mcam = &rvu->hw->mcam; struct rvu_hwinfo *hw = rvu->hw; u64 nibble_ena, rx_kex, tx_kex; - u64 *keyx_cfg; + u64 *keyx_cfg, reg; u8 intf; if (is_cn20k(rvu->pdev)) { keyx_cfg = mkex_extr->keyx_cfg; - goto skip_miss_cntr; + } else { + keyx_cfg = mkex->keyx_cfg; + /* Reserve last counter for MCAM RX miss action which is set to + * drop packet. This way we will know how many pkts didn't + * match any MCAM entry. + */ + mcam->counters.max--; + mcam->rx_miss_act_cntr = mcam->counters.max; } - keyx_cfg = mkex->keyx_cfg; - - /* Reserve last counter for MCAM RX miss action which is set to - * drop packet. This way we will know how many pkts didn't match - * any MCAM entry. - */ - mcam->counters.max--; - mcam->rx_miss_act_cntr = mcam->counters.max; - -skip_miss_cntr: rx_kex = keyx_cfg[NIX_INTF_RX]; tx_kex = keyx_cfg[NIX_INTF_TX]; @@ -2110,14 +2203,18 @@ skip_miss_cntr: rx_kex); if (is_cn20k(rvu->pdev)) - continue; + reg = NPC_AF_INTFX_MISS_ACTX(intf, 0); + else + reg = NPC_AF_INTFX_MISS_ACT(intf); /* If MCAM lookup doesn't result in a match, drop the received * packet. And map this action to a counter to count dropped * packets. */ - rvu_write64(rvu, blkaddr, - NPC_AF_INTFX_MISS_ACT(intf), NIX_RX_ACTIONOP_DROP); + rvu_write64(rvu, blkaddr, reg, NIX_RX_ACTIONOP_DROP); + + if (is_cn20k(rvu->pdev)) + continue; /* NPC_AF_INTFX_MISS_STAT_ACT[14:12] - counter[11:9] * NPC_AF_INTFX_MISS_STAT_ACT[8:0] - counter[8:0] @@ -2137,12 +2234,15 @@ skip_miss_cntr: rvu_write64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(intf), tx_kex); + if (is_cn20k(rvu->pdev)) + reg = NPC_AF_INTFX_MISS_ACTX(intf, 0); + else + reg = NPC_AF_INTFX_MISS_ACT(intf); + /* Set TX miss action to UCAST_DEFAULT i.e * transmit the packet on NIX LF SQ's default channel. */ - rvu_write64(rvu, blkaddr, - NPC_AF_INTFX_MISS_ACT(intf), - NIX_TX_ACTIONOP_UCAST_DEFAULT); + rvu_write64(rvu, blkaddr, reg, NIX_TX_ACTIONOP_UCAST_DEFAULT); } } @@ -2340,6 +2440,10 @@ static void npc_map_mcam_entry_and_cntr(struct rvu *rvu, struct npc_mcam *mcam, /* Set mapping and increment counter's refcnt */ mcam->entry2cntr_map[entry] = cntr; mcam->cntr_refcnt[cntr]++; + + if (is_cn20k(rvu->pdev)) + return; + /* Enable stats */ rvu_write64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_STAT_ACT(index, bank), @@ -2397,6 +2501,7 @@ static void npc_mcam_free_all_entries(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, u16 pcifunc) { u16 index, cntr; + int rc; /* Scan all MCAM entries and free the ones mapped to 'pcifunc' */ for (index = 0; index < mcam->bmap_entries; index++) { @@ -2414,6 +2519,13 @@ static void npc_mcam_free_all_entries(struct rvu *rvu, struct npc_mcam *mcam, blkaddr, index, cntr); mcam->entry2target_pffunc[index] = 0x0; + if (is_cn20k(rvu->pdev)) { + rc = npc_cn20k_idx_free(rvu, &index, 1); + if (rc) + dev_err(rvu->dev, + "Failed to free mcam idx=%u pcifunc=%#x\n", + index, pcifunc); + } } } } @@ -2560,14 +2672,76 @@ static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc, struct npc_mcam_alloc_entry_req *req, struct npc_mcam_alloc_entry_rsp *rsp) { + struct rvu_hwinfo *hw = container_of(mcam, struct rvu_hwinfo, mcam); u16 entry_list[NPC_MAX_NONCONTIG_ENTRIES]; u16 fcnt, hp_fcnt, lp_fcnt; + struct rvu *rvu = hw->rvu; u16 start, end, index; int entry, next_start; bool reverse = false; unsigned long *bmap; + int ret, limit = 0; u16 max_contig; + if (!is_cn20k(rvu->pdev)) + goto not_cn20k; + + /* The below table is being followed during allocation, + * + * 1. ref_entry == 0 && prio == HIGH && count == 1 : user wants to + * allocate 0th index + * 2. ref_entry == 0 && prio == HIGH && count > 1 : Invalid request + * 3. ref_entry == 0 && prio == LOW && count >= 1 : limit = 0 + * 4. ref_entry != 0 && prio == HIGH && count >= 1 : limit = 0 + * 5. ref_entry != 0 && prio == LOW && count >=1 : limit = Max + * (X2 2*8192, X4 8192) + */ + if (req->ref_entry && req->ref_prio == NPC_MCAM_LOWER_PRIO) { + if (req->kw_type == NPC_MCAM_KEY_X2) + limit = 2 * mcam->bmap_entries; + else + limit = mcam->bmap_entries; + } + + ret = npc_cn20k_ref_idx_alloc(rvu, pcifunc, req->kw_type, + req->ref_prio, rsp->entry_list, + req->ref_entry, limit, + req->contig, req->count); + + if (ret) { + rsp->count = 0; + return NPC_MCAM_ALLOC_FAILED; + } + + rsp->count = req->count; + if (req->contig) + rsp->entry = rsp->entry_list[0]; + + /* cn20k, entries allocation algorithm is different. + * This common API updates some bitmap on usage etc, which + * will be used by other functions. So update those for + * cn20k as well. + */ + + mutex_lock(&mcam->lock); + /* Mark the allocated entries as used and set nixlf mapping */ + for (entry = 0; entry < rsp->count; entry++) { + index = rsp->entry_list[entry]; + npc_mcam_set_bit(mcam, index); + mcam->entry2pfvf_map[index] = pcifunc; + mcam->entry2cntr_map[index] = NPC_MCAM_INVALID_MAP; + } + + /* cn20k, free count is provided thru different mbox message. + * one counter to indicate free x2 slots and free x4 slots + * does not provide any useful information to the user. + */ + rsp->free_count = -1; + mutex_unlock(&mcam->lock); + + return 0; + +not_cn20k: mutex_lock(&mcam->lock); /* Check if there are any free entries */ @@ -2888,6 +3062,14 @@ int rvu_mbox_handler_npc_mcam_free_entry(struct rvu *rvu, npc_unmap_mcam_entry_and_cntr(rvu, mcam, blkaddr, req->entry, cntr); + if (is_cn20k(rvu->pdev)) { + rc = npc_cn20k_idx_free(rvu, &req->entry, 1); + if (rc) + dev_err(rvu->dev, + "Failed to free index=%u\n", + req->entry); + } + goto exit; free_all: diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index 5f51a05de5de..8136550cf927 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -1761,14 +1761,19 @@ static int npc_update_dmac_value(struct rvu *rvu, int npcblkaddr, struct npc_mcam_write_entry_req write_req = { 0 }; struct mcam_entry *entry = &write_req.entry_data; struct npc_mcam *mcam = &rvu->hw->mcam; + u8 intf, enable, hw_prio; struct msg_rsp rsp; - u8 intf, enable; int err; ether_addr_copy(rule->packet.dmac, pfvf->mac_addr); - npc_read_mcam_entry(rvu, mcam, npcblkaddr, rule->entry, - entry, &intf, &enable); + if (is_cn20k(rvu->pdev)) + npc_cn20k_read_mcam_entry(rvu, npcblkaddr, rule->entry, + entry, &intf, + &enable, &hw_prio); + else + npc_read_mcam_entry(rvu, mcam, npcblkaddr, rule->entry, + entry, &intf, &enable); npc_update_entry(rvu, NPC_DMAC, entry, ether_addr_to_u64(pfvf->mac_addr), 0, From de3f88b465c8fda07b1dc44648a99f7dec390705 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Tue, 24 Feb 2026 13:30:03 +0530 Subject: [PATCH 0109/1561] octeontx2-af: npc: cn20k: Prepare for new SoC Current code pass mcam_entry structure to all low level functions. This is not proper: 1) We need to modify all functions to support a new SoC 2) It does not look good to pass soc specific structure to all common functions. This patch adds a mcam meta data structure, which is populated and passed to low level functions. Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260224080009.4147301-8-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/octeontx2/af/mbox.h | 8 + .../marvell/octeontx2/af/rvu_npc_fs.c | 143 +++++++++++------- .../marvell/octeontx2/af/rvu_npc_fs.h | 2 +- .../marvell/octeontx2/af/rvu_npc_hash.c | 105 ++++++------- .../marvell/octeontx2/af/rvu_npc_hash.h | 2 +- 5 files changed, 151 insertions(+), 109 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 8aa684bd83f0..a4e79828a84c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1561,6 +1561,14 @@ struct npc_mcam_free_entry_req { u8 all; /* If all entries allocated to this PFVF to be freed */ }; +struct mcam_entry_mdata { + u64 *kw; + u64 *kw_mask; + u64 *action; + u64 *vtag_action; + u8 max_kw; +}; + struct mcam_entry { #define NPC_MAX_KWS_IN_KEY 8 /* Number of keywords in max keywidth */ u64 kw[NPC_MAX_KWS_IN_KEY]; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index 8136550cf927..117038c793ea 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -903,11 +903,12 @@ static int npc_check_unsupported_flows(struct rvu *rvu, u64 features, u8 intf) * dont care. */ void npc_update_entry(struct rvu *rvu, enum key_fields type, - struct mcam_entry *entry, u64 val_lo, + struct mcam_entry_mdata *mdata, u64 val_lo, u64 val_hi, u64 mask_lo, u64 mask_hi, u8 intf) { struct npc_mcam *mcam = &rvu->hw->mcam; struct mcam_entry dummy = { {0} }; + u64 *kw, *kw_mask, *val, *mask; struct npc_key_field *field; u64 kw1, kw2, kw3; int i, max_kw; @@ -920,10 +921,9 @@ void npc_update_entry(struct rvu *rvu, enum key_fields type, if (!field->nr_kws) return; - if (is_cn20k(rvu->pdev)) - max_kw = NPC_MAX_KWS_IN_KEY; - else - max_kw = NPC_MAX_KWS_IN_KEY - 1; + max_kw = NPC_MAX_KWS_IN_KEY; + kw = dummy.kw; + kw_mask = dummy.kw_mask; for (i = 0; i < max_kw; i++) { if (!field->kw_mask[i]) @@ -932,10 +932,10 @@ void npc_update_entry(struct rvu *rvu, enum key_fields type, shift = __ffs64(field->kw_mask[i]); /* update entry value */ kw1 = (val_lo << shift) & field->kw_mask[i]; - dummy.kw[i] = kw1; + kw[i] = kw1; /* update entry mask */ kw1 = (mask_lo << shift) & field->kw_mask[i]; - dummy.kw_mask[i] = kw1; + kw_mask[i] = kw1; if (field->nr_kws == 1) break; @@ -945,12 +945,12 @@ void npc_update_entry(struct rvu *rvu, enum key_fields type, kw2 = shift ? val_lo >> (64 - shift) : 0; kw2 |= (val_hi << shift); kw2 &= field->kw_mask[i + 1]; - dummy.kw[i + 1] = kw2; + kw[i + 1] = kw2; /* update entry mask */ kw2 = shift ? mask_lo >> (64 - shift) : 0; kw2 |= (mask_hi << shift); kw2 &= field->kw_mask[i + 1]; - dummy.kw_mask[i + 1] = kw2; + kw_mask[i + 1] = kw2; break; } /* place remaining bits of key value in kw[x + 1], kw[x + 2] */ @@ -961,34 +961,40 @@ void npc_update_entry(struct rvu *rvu, enum key_fields type, kw2 &= field->kw_mask[i + 1]; kw3 = shift ? val_hi >> (64 - shift) : 0; kw3 &= field->kw_mask[i + 2]; - dummy.kw[i + 1] = kw2; - dummy.kw[i + 2] = kw3; + kw[i + 1] = kw2; + kw[i + 2] = kw3; /* update entry mask */ kw2 = shift ? mask_lo >> (64 - shift) : 0; kw2 |= (mask_hi << shift); kw2 &= field->kw_mask[i + 1]; kw3 = shift ? mask_hi >> (64 - shift) : 0; kw3 &= field->kw_mask[i + 2]; - dummy.kw_mask[i + 1] = kw2; - dummy.kw_mask[i + 2] = kw3; + kw_mask[i + 1] = kw2; + kw_mask[i + 2] = kw3; break; } } /* dummy is ready with values and masks for given key * field now clear and update input entry with those */ - for (i = 0; i < max_kw; i++) { + + val = mdata->kw; + mask = mdata->kw_mask; + + for (i = 0; i < max_kw; i++, val++, mask++) { if (!field->kw_mask[i]) continue; - entry->kw[i] &= ~field->kw_mask[i]; - entry->kw_mask[i] &= ~field->kw_mask[i]; - entry->kw[i] |= dummy.kw[i]; - entry->kw_mask[i] |= dummy.kw_mask[i]; + *val &= ~field->kw_mask[i]; + *mask &= ~field->kw_mask[i]; + + *val |= kw[i]; + *mask |= kw_mask[i]; } } -static void npc_update_ipv6_flow(struct rvu *rvu, struct mcam_entry *entry, +static void npc_update_ipv6_flow(struct rvu *rvu, + struct mcam_entry_mdata *mdata, u64 features, struct flow_msg *pkt, struct flow_msg *mask, struct rvu_npc_mcam_rule *output, u8 intf) @@ -1014,7 +1020,7 @@ static void npc_update_ipv6_flow(struct rvu *rvu, struct mcam_entry *entry, val_hi = (u64)src_ip[0] << 32 | src_ip[1]; val_lo = (u64)src_ip[2] << 32 | src_ip[3]; - npc_update_entry(rvu, NPC_SIP_IPV6, entry, val_lo, val_hi, + npc_update_entry(rvu, NPC_SIP_IPV6, mdata, val_lo, val_hi, mask_lo, mask_hi, intf); memcpy(opkt->ip6src, pkt->ip6src, sizeof(opkt->ip6src)); memcpy(omask->ip6src, mask->ip6src, sizeof(omask->ip6src)); @@ -1028,14 +1034,15 @@ static void npc_update_ipv6_flow(struct rvu *rvu, struct mcam_entry *entry, val_hi = (u64)dst_ip[0] << 32 | dst_ip[1]; val_lo = (u64)dst_ip[2] << 32 | dst_ip[3]; - npc_update_entry(rvu, NPC_DIP_IPV6, entry, val_lo, val_hi, + npc_update_entry(rvu, NPC_DIP_IPV6, mdata, val_lo, val_hi, mask_lo, mask_hi, intf); memcpy(opkt->ip6dst, pkt->ip6dst, sizeof(opkt->ip6dst)); memcpy(omask->ip6dst, mask->ip6dst, sizeof(omask->ip6dst)); } } -static void npc_update_vlan_features(struct rvu *rvu, struct mcam_entry *entry, +static void npc_update_vlan_features(struct rvu *rvu, + struct mcam_entry_mdata *mdata, u64 features, u8 intf) { bool ctag = !!(features & BIT_ULL(NPC_VLAN_ETYPE_CTAG)); @@ -1044,20 +1051,20 @@ static void npc_update_vlan_features(struct rvu *rvu, struct mcam_entry *entry, /* If only VLAN id is given then always match outer VLAN id */ if (vid && !ctag && !stag) { - npc_update_entry(rvu, NPC_LB, entry, + npc_update_entry(rvu, NPC_LB, mdata, NPC_LT_LB_STAG_QINQ | NPC_LT_LB_CTAG, 0, NPC_LT_LB_STAG_QINQ & NPC_LT_LB_CTAG, 0, intf); return; } if (ctag) - npc_update_entry(rvu, NPC_LB, entry, NPC_LT_LB_CTAG, 0, + npc_update_entry(rvu, NPC_LB, mdata, NPC_LT_LB_CTAG, 0, ~0ULL, 0, intf); if (stag) - npc_update_entry(rvu, NPC_LB, entry, NPC_LT_LB_STAG_QINQ, 0, + npc_update_entry(rvu, NPC_LB, mdata, NPC_LT_LB_STAG_QINQ, 0, ~0ULL, 0, intf); } -static void npc_update_flow(struct rvu *rvu, struct mcam_entry *entry, +static void npc_update_flow(struct rvu *rvu, struct mcam_entry_mdata *mdata, u64 features, struct flow_msg *pkt, struct flow_msg *mask, struct rvu_npc_mcam_rule *output, u8 intf, @@ -1075,39 +1082,39 @@ static void npc_update_flow(struct rvu *rvu, struct mcam_entry *entry, /* For tcp/udp/sctp LTYPE should be present in entry */ if (features & BIT_ULL(NPC_IPPROTO_TCP)) - npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_TCP, + npc_update_entry(rvu, NPC_LD, mdata, NPC_LT_LD_TCP, 0, ~0ULL, 0, intf); if (features & BIT_ULL(NPC_IPPROTO_UDP)) - npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_UDP, + npc_update_entry(rvu, NPC_LD, mdata, NPC_LT_LD_UDP, 0, ~0ULL, 0, intf); if (features & BIT_ULL(NPC_IPPROTO_SCTP)) - npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_SCTP, + npc_update_entry(rvu, NPC_LD, mdata, NPC_LT_LD_SCTP, 0, ~0ULL, 0, intf); if (features & BIT_ULL(NPC_IPPROTO_ICMP)) - npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_ICMP, + npc_update_entry(rvu, NPC_LD, mdata, NPC_LT_LD_ICMP, 0, ~0ULL, 0, intf); if (features & BIT_ULL(NPC_IPPROTO_ICMP6)) - npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_ICMP6, + npc_update_entry(rvu, NPC_LD, mdata, NPC_LT_LD_ICMP6, 0, ~0ULL, 0, intf); /* For AH, LTYPE should be present in entry */ if (features & BIT_ULL(NPC_IPPROTO_AH)) - npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_AH, + npc_update_entry(rvu, NPC_LD, mdata, NPC_LT_LD_AH, 0, ~0ULL, 0, intf); /* For ESP, LTYPE should be present in entry */ if (features & BIT_ULL(NPC_IPPROTO_ESP)) - npc_update_entry(rvu, NPC_LE, entry, NPC_LT_LE_ESP, + npc_update_entry(rvu, NPC_LE, mdata, NPC_LT_LE_ESP, 0, ~0ULL, 0, intf); if (features & BIT_ULL(NPC_LXMB)) { output->lxmb = is_broadcast_ether_addr(pkt->dmac) ? 2 : 1; - npc_update_entry(rvu, NPC_LXMB, entry, output->lxmb, 0, + npc_update_entry(rvu, NPC_LXMB, mdata, output->lxmb, 0, output->lxmb, 0, intf); } #define NPC_WRITE_FLOW(field, member, val_lo, val_hi, mask_lo, mask_hi) \ do { \ if (features & BIT_ULL((field))) { \ - npc_update_entry(rvu, (field), entry, (val_lo), (val_hi), \ + npc_update_entry(rvu, (field), mdata, (val_lo), (val_hi), \ (mask_lo), (mask_hi), intf); \ memcpy(&opkt->member, &pkt->member, sizeof(pkt->member)); \ memcpy(&omask->member, &mask->member, sizeof(mask->member)); \ @@ -1195,10 +1202,10 @@ do { \ NPC_WRITE_FLOW(NPC_IPFRAG_IPV6, next_header, pkt->next_header, 0, mask->next_header, 0); - npc_update_ipv6_flow(rvu, entry, features, pkt, mask, output, intf); - npc_update_vlan_features(rvu, entry, features, intf); + npc_update_ipv6_flow(rvu, mdata, features, pkt, mask, output, intf); + npc_update_vlan_features(rvu, mdata, features, intf); - npc_update_field_hash(rvu, intf, entry, blkaddr, features, + npc_update_field_hash(rvu, intf, mdata, blkaddr, features, pkt, mask, opkt, omask); } @@ -1286,8 +1293,20 @@ static int npc_mcast_update_action_index(struct rvu *rvu, struct npc_install_flo return 0; } +static void +npc_populate_mcam_mdata(struct rvu *rvu, + struct mcam_entry_mdata *mdata, + struct mcam_entry *entry) +{ + mdata->kw = entry->kw; + mdata->kw_mask = entry->kw_mask; + mdata->action = &entry->action; + mdata->vtag_action = &entry->vtag_action; + mdata->max_kw = NPC_MAX_KWS_IN_KEY; +} + static int npc_update_rx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, - struct mcam_entry *entry, + struct mcam_entry_mdata *mdata, struct npc_install_flow_req *req, u16 target, bool pf_set_vfs_mac) { @@ -1298,7 +1317,7 @@ static int npc_update_rx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, if (rswitch->mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && pf_set_vfs_mac) req->chan_mask = 0x0; /* Do not care channel */ - npc_update_entry(rvu, NPC_CHAN, entry, req->channel, 0, req->chan_mask, + npc_update_entry(rvu, NPC_CHAN, mdata, req->channel, 0, req->chan_mask, 0, NIX_INTF_RX); *(u64 *)&action = 0x00; @@ -1330,12 +1349,12 @@ static int npc_update_rx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, action.match_id = req->match_id; } - entry->action = *(u64 *)&action; + *mdata->action = *(u64 *)&action; /* VTAG0 starts at 0th byte of LID_B. * VTAG1 starts at 4th byte of LID_B. */ - entry->vtag_action = FIELD_PREP(RX_VTAG0_VALID_BIT, req->vtag0_valid) | + *mdata->vtag_action = FIELD_PREP(RX_VTAG0_VALID_BIT, req->vtag0_valid) | FIELD_PREP(RX_VTAG0_TYPE_MASK, req->vtag0_type) | FIELD_PREP(RX_VTAG0_LID_MASK, NPC_LID_LB) | FIELD_PREP(RX_VTAG0_RELPTR_MASK, 0) | @@ -1348,7 +1367,7 @@ static int npc_update_rx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, } static int npc_update_tx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, - struct mcam_entry *entry, + struct mcam_entry_mdata *mdata, struct npc_install_flow_req *req, u16 target) { struct nix_tx_action action; @@ -1361,7 +1380,7 @@ static int npc_update_tx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, if (is_pffunc_af(req->hdr.pcifunc)) mask = 0; - npc_update_entry(rvu, NPC_PF_FUNC, entry, (__force u16)htons(target), + npc_update_entry(rvu, NPC_PF_FUNC, mdata, (__force u16)htons(target), 0, mask, 0, NIX_INTF_TX); *(u64 *)&action = 0x00; @@ -1374,12 +1393,12 @@ static int npc_update_tx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, action.match_id = req->match_id; - entry->action = *(u64 *)&action; + *mdata->action = *(u64 *)&action; /* VTAG0 starts at 0th byte of LID_B. * VTAG1 starts at 4th byte of LID_B. */ - entry->vtag_action = FIELD_PREP(TX_VTAG0_DEF_MASK, req->vtag0_def) | + *mdata->vtag_action = FIELD_PREP(TX_VTAG0_DEF_MASK, req->vtag0_def) | FIELD_PREP(TX_VTAG0_OP_MASK, req->vtag0_op) | FIELD_PREP(TX_VTAG0_LID_MASK, NPC_LID_LA) | FIELD_PREP(TX_VTAG0_RELPTR_MASK, 20) | @@ -1401,6 +1420,7 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target, u64 features, installed_features, missing_features = 0; struct npc_mcam_write_entry_req write_req = { 0 }; struct npc_mcam *mcam = &rvu->hw->mcam; + struct mcam_entry_mdata mdata = { }; struct rvu_npc_mcam_rule dummy = { 0 }; struct rvu_npc_mcam_rule *rule; u16 owner = req->hdr.pcifunc; @@ -1415,15 +1435,19 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target, entry = &write_req.entry_data; entry_index = req->entry; - npc_update_flow(rvu, entry, features, &req->packet, &req->mask, &dummy, + npc_populate_mcam_mdata(rvu, &mdata, + &write_req.entry_data); + + npc_update_flow(rvu, &mdata, features, &req->packet, &req->mask, &dummy, req->intf, blkaddr); if (is_npc_intf_rx(req->intf)) { - err = npc_update_rx_entry(rvu, pfvf, entry, req, target, pf_set_vfs_mac); + err = npc_update_rx_entry(rvu, pfvf, &mdata, req, target, + pf_set_vfs_mac); if (err) return err; } else { - err = npc_update_tx_entry(rvu, pfvf, entry, req, target); + err = npc_update_tx_entry(rvu, pfvf, &mdata, req, target); if (err) return err; } @@ -1443,7 +1467,7 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target, missing_features = (def_ucast_rule->features ^ features) & def_ucast_rule->features; if (missing_features) - npc_update_flow(rvu, entry, missing_features, + npc_update_flow(rvu, &mdata, missing_features, &def_ucast_rule->packet, &def_ucast_rule->mask, &dummy, req->intf, @@ -1759,12 +1783,17 @@ static int npc_update_dmac_value(struct rvu *rvu, int npcblkaddr, struct rvu_pfvf *pfvf) { struct npc_mcam_write_entry_req write_req = { 0 }; - struct mcam_entry *entry = &write_req.entry_data; struct npc_mcam *mcam = &rvu->hw->mcam; + struct mcam_entry_mdata mdata = { }; + struct mcam_entry *entry; u8 intf, enable, hw_prio; struct msg_rsp rsp; int err; + entry = &write_req.entry_data; + + npc_populate_mcam_mdata(rvu, &mdata, entry); + ether_addr_copy(rule->packet.dmac, pfvf->mac_addr); if (is_cn20k(rvu->pdev)) @@ -1775,7 +1804,7 @@ static int npc_update_dmac_value(struct rvu *rvu, int npcblkaddr, npc_read_mcam_entry(rvu, mcam, npcblkaddr, rule->entry, entry, &intf, &enable); - npc_update_entry(rvu, NPC_DMAC, entry, + npc_update_entry(rvu, NPC_DMAC, &mdata, ether_addr_to_u64(pfvf->mac_addr), 0, 0xffffffffffffull, 0, intf); @@ -1876,6 +1905,7 @@ int npc_install_mcam_drop_rule(struct rvu *rvu, int mcam_idx, u16 *counter_idx, struct npc_mcam_alloc_counter_rsp cntr_rsp = { 0 }; struct npc_mcam_write_entry_req req = { 0 }; struct npc_mcam *mcam = &rvu->hw->mcam; + struct mcam_entry_mdata mdata = { }; struct rvu_npc_mcam_rule *rule; struct msg_rsp rsp; bool enabled; @@ -1931,12 +1961,15 @@ int npc_install_mcam_drop_rule(struct rvu *rvu, int mcam_idx, u16 *counter_idx, } *counter_idx = cntr_rsp.cntr; + npc_populate_mcam_mdata(rvu, &mdata, + &req.entry_data); + /* Fill in fields for this mcam entry */ - npc_update_entry(rvu, NPC_EXACT_RESULT, &req.entry_data, exact_val, 0, + npc_update_entry(rvu, NPC_EXACT_RESULT, &mdata, exact_val, 0, exact_mask, 0, NIX_INTF_RX); - npc_update_entry(rvu, NPC_CHAN, &req.entry_data, chan_val, 0, + npc_update_entry(rvu, NPC_CHAN, &mdata, chan_val, 0, chan_mask, 0, NIX_INTF_RX); - npc_update_entry(rvu, NPC_LXMB, &req.entry_data, bcast_mcast_val, 0, + npc_update_entry(rvu, NPC_LXMB, &mdata, bcast_mcast_val, 0, bcast_mcast_mask, 0, NIX_INTF_RX); req.intf = NIX_INTF_RX; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.h index 3f5c9042d10e..442287ee7baa 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.h @@ -15,7 +15,7 @@ #define NPC_LDATA_EN BIT_ULL(7) void npc_update_entry(struct rvu *rvu, enum key_fields type, - struct mcam_entry *entry, u64 val_lo, + struct mcam_entry_mdata *mdata, u64 val_lo, u64 val_hi, u64 mask_lo, u64 mask_hi, u8 intf); #endif /* RVU_NPC_FS_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c index beebcd6aa3d8..03bb485a1aca 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c @@ -282,7 +282,7 @@ void npc_program_mkex_hash(struct rvu *rvu, int blkaddr) } void npc_update_field_hash(struct rvu *rvu, u8 intf, - struct mcam_entry *entry, + struct mcam_entry_mdata *mdata, int blkaddr, u64 features, struct flow_msg *pkt, @@ -293,9 +293,10 @@ void npc_update_field_hash(struct rvu *rvu, u8 intf, struct npc_mcam_kex_hash *mkex_hash = rvu->kpu.mkex_hash; struct npc_get_field_hash_info_req req; struct npc_get_field_hash_info_rsp rsp; + u8 hash_idx, lid, ltype, ltype_mask; u64 ldata[2], cfg; u32 field_hash; - u8 hash_idx; + bool en; if (is_cn20k(rvu->pdev)) return; @@ -310,60 +311,60 @@ void npc_update_field_hash(struct rvu *rvu, u8 intf, for (hash_idx = 0; hash_idx < NPC_MAX_HASH; hash_idx++) { cfg = rvu_read64(rvu, blkaddr, NPC_AF_INTFX_HASHX_CFG(intf, hash_idx)); - if ((cfg & BIT_ULL(11)) && (cfg & BIT_ULL(12))) { - u8 lid = (cfg & GENMASK_ULL(10, 8)) >> 8; - u8 ltype = (cfg & GENMASK_ULL(7, 4)) >> 4; - u8 ltype_mask = cfg & GENMASK_ULL(3, 0); + en = !!(cfg & BIT_ULL(11)) && (cfg & BIT_ULL(12)); + if (!en) + continue; - if (mkex_hash->lid_lt_ld_hash_en[intf][lid][ltype][hash_idx]) { - switch (ltype & ltype_mask) { - /* If hash extract enabled is supported for IPv6 then - * 128 bit IPv6 source and destination addressed - * is hashed to 32 bit value. - */ - case NPC_LT_LC_IP6: - /* ld[0] == hash_idx[0] == Source IPv6 - * ld[1] == hash_idx[1] == Destination IPv6 - */ - if ((features & BIT_ULL(NPC_SIP_IPV6)) && !hash_idx) { - u32 src_ip[IPV6_WORDS]; + lid = (cfg & GENMASK_ULL(10, 8)) >> 8; + ltype = (cfg & GENMASK_ULL(7, 4)) >> 4; + ltype_mask = cfg & GENMASK_ULL(3, 0); - be32_to_cpu_array(src_ip, pkt->ip6src, IPV6_WORDS); - ldata[1] = (u64)src_ip[0] << 32 | src_ip[1]; - ldata[0] = (u64)src_ip[2] << 32 | src_ip[3]; - field_hash = npc_field_hash_calc(ldata, - rsp, - intf, - hash_idx); - npc_update_entry(rvu, NPC_SIP_IPV6, entry, - field_hash, 0, - GENMASK(31, 0), 0, intf); - memcpy(&opkt->ip6src, &pkt->ip6src, - sizeof(pkt->ip6src)); - memcpy(&omask->ip6src, &mask->ip6src, - sizeof(mask->ip6src)); - } else if ((features & BIT_ULL(NPC_DIP_IPV6)) && hash_idx) { - u32 dst_ip[IPV6_WORDS]; + if (!mkex_hash->lid_lt_ld_hash_en[intf][lid][ltype][hash_idx]) + continue; - be32_to_cpu_array(dst_ip, pkt->ip6dst, IPV6_WORDS); - ldata[1] = (u64)dst_ip[0] << 32 | dst_ip[1]; - ldata[0] = (u64)dst_ip[2] << 32 | dst_ip[3]; - field_hash = npc_field_hash_calc(ldata, - rsp, - intf, - hash_idx); - npc_update_entry(rvu, NPC_DIP_IPV6, entry, - field_hash, 0, - GENMASK(31, 0), 0, intf); - memcpy(&opkt->ip6dst, &pkt->ip6dst, - sizeof(pkt->ip6dst)); - memcpy(&omask->ip6dst, &mask->ip6dst, - sizeof(mask->ip6dst)); - } + /* If hash extract enabled is supported for IPv6 then + * 128 bit IPv6 source and destination addressed + * is hashed to 32 bit value. + */ + if ((ltype & ltype_mask) != NPC_LT_LC_IP6) + continue; - break; - } - } + /* ld[0] == hash_idx[0] == Source IPv6 + * ld[1] == hash_idx[1] == Destination IPv6 + */ + if ((features & BIT_ULL(NPC_SIP_IPV6)) && !hash_idx) { + u32 src_ip[IPV6_WORDS]; + + be32_to_cpu_array(src_ip, pkt->ip6src, IPV6_WORDS); + ldata[1] = (u64)src_ip[0] << 32 | src_ip[1]; + ldata[0] = (u64)src_ip[2] << 32 | src_ip[3]; + field_hash = npc_field_hash_calc(ldata, rsp, intf, + hash_idx); + npc_update_entry(rvu, NPC_SIP_IPV6, mdata, field_hash, + 0, GENMASK(31, 0), 0, intf); + memcpy(&opkt->ip6src, &pkt->ip6src, + sizeof(pkt->ip6src)); + memcpy(&omask->ip6src, &mask->ip6src, + sizeof(mask->ip6src)); + continue; + } + + if ((features & BIT_ULL(NPC_DIP_IPV6)) && hash_idx) { + u32 dst_ip[IPV6_WORDS]; + + be32_to_cpu_array(dst_ip, pkt->ip6dst, IPV6_WORDS); + ldata[1] = (u64)dst_ip[0] << 32 | dst_ip[1]; + ldata[0] = (u64)dst_ip[2] << 32 | dst_ip[3]; + field_hash = npc_field_hash_calc(ldata, rsp, intf, + hash_idx); + npc_update_entry(rvu, NPC_DIP_IPV6, mdata, + field_hash, 0, GENMASK(31, 0), + 0, intf); + memcpy(&opkt->ip6dst, &pkt->ip6dst, + sizeof(pkt->ip6dst)); + memcpy(&omask->ip6dst, &mask->ip6dst, + sizeof(mask->ip6dst)); + continue; } } } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.h index cb25cf478f1f..4cbcae69b6d3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.h @@ -53,7 +53,7 @@ struct npc_mcam_kex_hash { } __packed; void npc_update_field_hash(struct rvu *rvu, u8 intf, - struct mcam_entry *entry, + struct mcam_entry_mdata *mdata, int blkaddr, u64 features, struct flow_msg *pkt, From 4e527f1e5c155ce28164ba34cad47b635d16f827 Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Tue, 24 Feb 2026 13:30:04 +0530 Subject: [PATCH 0110/1561] octeontx2-af: npc: cn20k: Add new mailboxes for CN20K silicon To enable enhanced MCAM capabilities for CN20K, the struct mcam_entry has been extended to support expanded keyword requirements. Specifically, the kw and kw_mask arrays have been increased from a size of 7 to 8 to accommodate the additional keyword field introduced for CN20K. To ensure seamless integration while preserving compatibility with existing platforms, dedicated CN20K-specific mailboxes have been introduced that leverage the updated struct mcam_entry. This approach allows CN20K to utilize the extended structure without impacting current implementations. This patch identifies the relevant mailboxes and introduces the following CN20K-specific additions: New mailboxes added: 1. `NPC_CN20K_MCAM_WRITE_ENTRY` 2. `NPC_CN20K_MCAM_ALLOC_AND_WRITE_ENTRY` 3. `NPC_CN20K_MCAM_READ_ENTRY` 4. `NPC_CN20K_MCAM_READ_BASE_RULE` Signed-off-by: Suman Ghosh Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260224080009.4147301-9-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- .../marvell/octeontx2/af/cn20k/mbox_init.c | 17 ++ .../ethernet/marvell/octeontx2/af/cn20k/npc.c | 189 +++++++++++++- .../ethernet/marvell/octeontx2/af/cn20k/npc.h | 7 +- .../ethernet/marvell/octeontx2/af/cn20k/reg.h | 7 + .../net/ethernet/marvell/octeontx2/af/mbox.h | 58 ++++- .../net/ethernet/marvell/octeontx2/af/npc.h | 1 + .../net/ethernet/marvell/octeontx2/af/rvu.c | 82 +++++- .../net/ethernet/marvell/octeontx2/af/rvu.h | 3 +- .../marvell/octeontx2/af/rvu_debugfs.c | 32 ++- .../ethernet/marvell/octeontx2/af/rvu_npc.c | 77 +++++- .../marvell/octeontx2/af/rvu_npc_fs.c | 233 +++++++++++++----- 11 files changed, 600 insertions(+), 106 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/mbox_init.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/mbox_init.c index bd3aab7770dd..71401dec0d77 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/mbox_init.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/mbox_init.c @@ -397,6 +397,12 @@ int rvu_alloc_cint_qint_mem(struct rvu *rvu, struct rvu_pfvf *pfvf, if (is_rvu_otx2(rvu) || is_cn20k(rvu->pdev)) return 0; + /* sanity check */ + cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_NIXX_CFG(0) | + (RVU_AFPF << 16)); + if (!cfg) + return 0; + ctx_cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST3); /* Alloc memory for CQINT's HW contexts */ cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST2); @@ -420,5 +426,16 @@ int rvu_alloc_cint_qint_mem(struct rvu *rvu, struct rvu_pfvf *pfvf, rvu_write64(rvu, blkaddr, NIX_AF_LFX_QINTS_BASE(nixlf), (u64)pfvf->nix_qints_ctx->iova); + rvu_write64(rvu, BLKADDR_NIX0, RVU_AF_BAR2_SEL, RVU_AF_BAR2_PFID); + rvu_write64(rvu, BLKADDR_NIX0, + AF_BAR2_ALIASX(0, NIX_GINT_INT_W1S), ALTAF_RDY); + /* wait for ack */ + err = rvu_poll_reg(rvu, BLKADDR_NIX0, + AF_BAR2_ALIASX(0, NIX_GINT_INT), ALTAF_RDY, true); + if (err) + rvu->altaf_ready = false; + else + rvu->altaf_ready = true; + return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c index cb8a37a2756a..7c3fdccbce2a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c @@ -637,9 +637,13 @@ npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, int bank, int index) NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W3_EXT(index, bank, 1), 0); rvu_write64(rvu, blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W3_EXT(index, bank, 0), 0); + + /* Clear corresponding stats register */ + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_STAT_EXT(index, bank), 0); } -static void npc_cn20k_get_keyword(struct mcam_entry *entry, int idx, +static void npc_cn20k_get_keyword(struct cn20k_mcam_entry *entry, int idx, u64 *cam0, u64 *cam1) { u64 kw_mask; @@ -679,7 +683,7 @@ static void npc_cn20k_get_keyword(struct mcam_entry *entry, int idx, */ static void npc_cn20k_config_kw_x2(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, int index, u8 intf, - struct mcam_entry *entry, + struct cn20k_mcam_entry *entry, int bank, u8 kw_type, int kw) { u64 intf_ext = 0, intf_ext_mask = 0; @@ -751,7 +755,8 @@ static void npc_cn20k_config_kw_x2(struct rvu *rvu, struct npc_mcam *mcam, static void npc_cn20k_config_kw_x4(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, int index, u8 intf, - struct mcam_entry *entry, u8 kw_type) + struct cn20k_mcam_entry *entry, + u8 kw_type) { int kw = 0, bank; @@ -789,9 +794,9 @@ npc_cn20k_set_mcam_bank_cfg(struct rvu *rvu, int blkaddr, int mcam_idx, } } -void -npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, u8 intf, - struct mcam_entry *entry, bool enable, u8 hw_prio) +void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, + u8 intf, struct cn20k_mcam_entry *entry, + bool enable, u8 hw_prio) { struct npc_mcam *mcam = &rvu->hw->mcam; int mcam_idx = index % mcam->banksize; @@ -906,7 +911,7 @@ void npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, u16 src, u16 dest) } } -static void npc_cn20k_fill_entryword(struct mcam_entry *entry, int idx, +static void npc_cn20k_fill_entryword(struct cn20k_mcam_entry *entry, int idx, u64 cam0, u64 cam1) { entry->kw[idx] = cam1; @@ -914,8 +919,8 @@ static void npc_cn20k_fill_entryword(struct mcam_entry *entry, int idx, } void npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, - struct mcam_entry *entry, u8 *intf, u8 *ena, - u8 *hw_prio) + struct cn20k_mcam_entry *entry, + u8 *intf, u8 *ena, u8 *hw_prio) { struct npc_mcam *mcam = &rvu->hw->mcam; u64 cam0, cam1, bank_cfg, cfg; @@ -1034,6 +1039,172 @@ read_action: entry->vtag_action = cfg; } +int rvu_mbox_handler_npc_cn20k_mcam_write_entry(struct rvu *rvu, + struct npc_cn20k_mcam_write_entry_req *req, + struct msg_rsp *rsp) +{ + struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc); + struct npc_mcam *mcam = &rvu->hw->mcam; + u16 pcifunc = req->hdr.pcifunc; + int blkaddr, rc; + u8 nix_intf; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + if (blkaddr < 0) + return NPC_MCAM_INVALID_REQ; + + mutex_lock(&mcam->lock); + rc = npc_mcam_verify_entry(mcam, pcifunc, req->entry); + if (rc) + goto exit; + + if (!is_npc_interface_valid(rvu, req->intf)) { + rc = NPC_MCAM_INVALID_REQ; + goto exit; + } + + if (is_npc_intf_tx(req->intf)) + nix_intf = pfvf->nix_tx_intf; + else + nix_intf = pfvf->nix_rx_intf; + + /* For AF installed rules, the nix_intf should be set to target NIX */ + if (is_pffunc_af(req->hdr.pcifunc)) + nix_intf = req->intf; + + npc_cn20k_config_mcam_entry(rvu, blkaddr, req->entry, nix_intf, + &req->entry_data, req->enable_entry, + req->hw_prio); + + rc = 0; +exit: + mutex_unlock(&mcam->lock); + return rc; +} + +int rvu_mbox_handler_npc_cn20k_mcam_read_entry(struct rvu *rvu, + struct npc_mcam_read_entry_req *req, + struct npc_cn20k_mcam_read_entry_rsp *rsp) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + u16 pcifunc = req->hdr.pcifunc; + int blkaddr, rc; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + if (blkaddr < 0) + return NPC_MCAM_INVALID_REQ; + + mutex_lock(&mcam->lock); + rc = npc_mcam_verify_entry(mcam, pcifunc, req->entry); + if (!rc) + npc_cn20k_read_mcam_entry(rvu, blkaddr, req->entry, + &rsp->entry_data, &rsp->intf, + &rsp->enable, &rsp->hw_prio); + + mutex_unlock(&mcam->lock); + return rc; +} + +int rvu_mbox_handler_npc_cn20k_mcam_alloc_and_write_entry(struct rvu *rvu, + struct npc_cn20k_mcam_alloc_and_write_entry_req *req, + struct npc_mcam_alloc_and_write_entry_rsp *rsp) +{ + struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc); + struct npc_mcam_alloc_entry_req entry_req; + struct npc_mcam_alloc_entry_rsp entry_rsp; + struct npc_mcam *mcam = &rvu->hw->mcam; + u16 entry = NPC_MCAM_ENTRY_INVALID; + int blkaddr, rc; + u8 nix_intf; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + if (blkaddr < 0) + return NPC_MCAM_INVALID_REQ; + + if (!is_npc_interface_valid(rvu, req->intf)) + return NPC_MCAM_INVALID_REQ; + + /* Try to allocate a MCAM entry */ + entry_req.hdr.pcifunc = req->hdr.pcifunc; + entry_req.contig = true; + entry_req.ref_prio = req->ref_prio; + entry_req.ref_entry = req->ref_entry; + entry_req.count = 1; + + rc = rvu_mbox_handler_npc_mcam_alloc_entry(rvu, + &entry_req, &entry_rsp); + if (rc) + return rc; + + if (!entry_rsp.count) + return NPC_MCAM_ALLOC_FAILED; + + /* entry_req.count is 1, so single entry is allocated */ + entry = entry_rsp.entry; + mutex_lock(&mcam->lock); + + if (is_npc_intf_tx(req->intf)) + nix_intf = pfvf->nix_tx_intf; + else + nix_intf = pfvf->nix_rx_intf; + + npc_cn20k_config_mcam_entry(rvu, blkaddr, entry, nix_intf, + &req->entry_data, req->enable_entry, + req->hw_prio); + + mutex_unlock(&mcam->lock); + + rsp->entry = entry; + return 0; +} + +int rvu_mbox_handler_npc_cn20k_read_base_steer_rule(struct rvu *rvu, + struct msg_req *req, + struct npc_cn20k_mcam_read_base_rule_rsp *rsp) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + int index, blkaddr, nixlf, rc = 0; + u16 pcifunc = req->hdr.pcifunc; + u8 intf, enable, hw_prio; + struct rvu_pfvf *pfvf; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + if (blkaddr < 0) + return NPC_MCAM_INVALID_REQ; + + /* Return the channel number in case of PF */ + if (!(pcifunc & RVU_PFVF_FUNC_MASK)) { + pfvf = rvu_get_pfvf(rvu, pcifunc); + rsp->entry.kw[0] = pfvf->rx_chan_base; + rsp->entry.kw_mask[0] = 0xFFFULL; + goto out; + } + + /* Find the pkt steering rule installed by PF to this VF */ + mutex_lock(&mcam->lock); + for (index = 0; index < mcam->bmap_entries; index++) { + if (mcam->entry2target_pffunc[index] == pcifunc) + goto read_entry; + } + + rc = nix_get_nixlf(rvu, pcifunc, &nixlf, NULL); + if (rc < 0) { + mutex_unlock(&mcam->lock); + goto out; + } + /* Read the default ucast entry if there is no pkt steering rule */ + index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, + NIXLF_UCAST_ENTRY); +read_entry: + /* Read the mcam entry */ + npc_cn20k_read_mcam_entry(rvu, blkaddr, index, + &rsp->entry, &intf, + &enable, &hw_prio); + mutex_unlock(&mcam->lock); +out: + return rc; +} + static u8 npc_map2cn20k_flag(u8 flag) { switch (flag) { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h index 861622741174..4b5cdbef334c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h @@ -296,15 +296,16 @@ int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast, u16 *mcast, u16 *promisc, u16 *ucast); void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, - u8 intf, struct mcam_entry *entry, + u8 intf, + struct cn20k_mcam_entry *entry, bool enable, u8 hw_prio); void npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, int index, bool enable); void npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, u16 src, u16 dest); void npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, - struct mcam_entry *entry, u8 *intf, u8 *ena, - u8 *hw_prio); + struct cn20k_mcam_entry *entry, u8 *intf, + u8 *ena, u8 *hw_prio); void npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, int bank, int index); int npc_mcam_idx_2_key_type(struct rvu *rvu, u16 mcam_idx, u8 *key_type); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/reg.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/reg.h index bf50d999528b..8bfaa507ee50 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/reg.h @@ -78,6 +78,13 @@ #define RVU_MBOX_VF_INT_ENA_W1C (0x38) #define RVU_MBOX_VF_VFAF_TRIGX(a) (0x2000 | (a) << 3) + +#define NIX_GINT_INT (0x200) +#define NIX_GINT_INT_W1S (0x208) + +#define ALTAF_FLR BIT_ULL(0) +#define ALTAF_RDY BIT_ULL(1) + /* NPC registers */ #define NPC_AF_INTFX_EXTRACTORX_CFG(a, b) \ (0x20c000ull | (a) << 16 | (b) << 8) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index a4e79828a84c..e004271124df 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -287,6 +287,17 @@ M(NPC_CN20K_MCAM_GET_FREE_COUNT, 0x6015, npc_cn20k_get_fcnt, \ msg_req, npc_cn20k_get_fcnt_rsp) \ M(NPC_CN20K_GET_KEX_CFG, 0x6016, npc_cn20k_get_kex_cfg, \ msg_req, npc_cn20k_get_kex_cfg_rsp) \ +M(NPC_CN20K_MCAM_WRITE_ENTRY, 0x6017, npc_cn20k_mcam_write_entry, \ + npc_cn20k_mcam_write_entry_req, msg_rsp) \ +M(NPC_CN20K_MCAM_ALLOC_AND_WRITE_ENTRY, 0x6018, \ +npc_cn20k_mcam_alloc_and_write_entry, \ + npc_cn20k_mcam_alloc_and_write_entry_req, \ + npc_mcam_alloc_and_write_entry_rsp) \ +M(NPC_CN20K_MCAM_READ_ENTRY, 0x6019, npc_cn20k_mcam_read_entry, \ + npc_mcam_read_entry_req, \ + npc_cn20k_mcam_read_entry_rsp) \ +M(NPC_CN20K_MCAM_READ_BASE_RULE, 0x601a, npc_cn20k_read_base_steer_rule, \ + msg_req, npc_cn20k_mcam_read_base_rule_rsp) \ /* NIX mbox IDs (range 0x8000 - 0xFFFF) */ \ M(NIX_LF_ALLOC, 0x8000, nix_lf_alloc, \ nix_lf_alloc_req, nix_lf_alloc_rsp) \ @@ -1570,13 +1581,32 @@ struct mcam_entry_mdata { }; struct mcam_entry { -#define NPC_MAX_KWS_IN_KEY 8 /* Number of keywords in max keywidth */ +#define NPC_MAX_KWS_IN_KEY 7 /* Number of keywords in max keywidth */ u64 kw[NPC_MAX_KWS_IN_KEY]; u64 kw_mask[NPC_MAX_KWS_IN_KEY]; u64 action; u64 vtag_action; }; +struct cn20k_mcam_entry { +#define NPC_CN20K_MAX_KWS_IN_KEY 8 + u64 kw[NPC_CN20K_MAX_KWS_IN_KEY]; + u64 kw_mask[NPC_CN20K_MAX_KWS_IN_KEY]; + u64 action; + u64 vtag_action; +}; + +struct npc_cn20k_mcam_write_entry_req { + struct mbox_msghdr hdr; + struct cn20k_mcam_entry entry_data; + u16 entry; /* MCAM entry to write this match key */ + u16 cntr; /* Counter for this MCAM entry */ + u8 intf; /* Rx or Tx interface */ + u8 enable_entry;/* Enable this MCAM entry ? */ + u8 hw_prio; /* hardware priority, valid for cn20k */ + u64 reserved; /* reserved for future use */ +}; + struct npc_mcam_write_entry_req { struct mbox_msghdr hdr; struct mcam_entry entry_data; @@ -1649,8 +1679,30 @@ struct npc_mcam_alloc_and_write_entry_req { u8 intf; /* Rx or Tx interface */ u8 enable_entry;/* Enable this MCAM entry ? */ u8 alloc_cntr; /* Allocate counter and map ? */ - /* hardware priority, supported for cn20k */ - u8 hw_prio; +}; + +struct npc_cn20k_mcam_alloc_and_write_entry_req { + struct mbox_msghdr hdr; + struct cn20k_mcam_entry entry_data; + u16 ref_entry; + u8 ref_prio; /* Lower or higher w.r.t ref_entry */ + u8 intf; /* Rx or Tx interface */ + u8 enable_entry;/* Enable this MCAM entry ? */ + u8 hw_prio; /* hardware priority, valid for cn20k */ + u16 reserved[4]; /* reserved for future use */ +}; + +struct npc_cn20k_mcam_read_entry_rsp { + struct mbox_msghdr hdr; + struct cn20k_mcam_entry entry_data; + u8 intf; + u8 enable; + u8 hw_prio; /* valid for cn20k */ +}; + +struct npc_cn20k_mcam_read_base_rule_rsp { + struct mbox_msghdr hdr; + struct cn20k_mcam_entry entry; }; struct npc_mcam_alloc_and_write_entry_rsp { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h index cb05ec69e0b3..cefc5d70f3e4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h @@ -644,6 +644,7 @@ struct rvu_npc_mcam_rule { u16 chan; u16 chan_mask; u8 lxmb; + u8 hw_prio; }; #endif /* NPC_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 40591134cb1c..1c8f657719b2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -600,6 +600,7 @@ check_pf: static int rvu_setup_msix_resources(struct rvu *rvu) { + struct altaf_intr_notify *altaf_intr_data; struct rvu_hwinfo *hw = rvu->hw; int pf, vf, numvfs, hwvf, err; int nvecs, offset, max_msix; @@ -706,7 +707,30 @@ setup_vfmsix: rvu->msix_base_iova = iova; rvu->msixtr_base_phy = phy_addr; + if (is_rvu_otx2(rvu) || is_cn20k(rvu->pdev)) + return 0; + + if (!rvu->fwdata) + goto fail; + + altaf_intr_data = &rvu->fwdata->altaf_intr_info; + if (altaf_intr_data->gint_paddr) { + iova = dma_map_resource(rvu->dev, altaf_intr_data->gint_paddr, + PCI_MSIX_ENTRY_SIZE, + DMA_BIDIRECTIONAL, 0); + + if (dma_mapping_error(rvu->dev, iova)) + goto fail; + + altaf_intr_data->gint_iova_addr = iova; + } + return 0; + +fail: + dma_unmap_resource(rvu->dev, phy_addr, max_msix * PCI_MSIX_ENTRY_SIZE, + DMA_BIDIRECTIONAL, 0); + return -EFAULT; } static void rvu_reset_msix(struct rvu *rvu) @@ -1397,7 +1421,6 @@ static void rvu_detach_block(struct rvu *rvu, int pcifunc, int blktype) if (blkaddr < 0) return; - block = &hw->block[blkaddr]; num_lfs = rvu_get_rsrc_mapcount(pfvf, block->addr); @@ -2203,6 +2226,33 @@ int rvu_mbox_handler_ndc_sync_op(struct rvu *rvu, return 0; } +static void rvu_notify_altaf(struct rvu *rvu, u16 pcifunc, u64 op) +{ + int pf, vf; + + if (!rvu->fwdata) + return; + + if (op == ALTAF_FLR) { + pf = rvu_get_pf(rvu->pdev, pcifunc); + set_bit(pf, rvu->fwdata->altaf_intr_info.flr_pf_bmap); + if (pcifunc & RVU_PFVF_FUNC_MASK) { + vf = pcifunc & RVU_PFVF_FUNC_MASK; + if (vf >= 128) { + WARN(1, + "flr_vf_bmap size is 128 bits, vf=%u\n", + vf); + return; + } + + set_bit(vf, rvu->fwdata->altaf_intr_info.flr_vf_bmap); + } + } + + rvu_write64(rvu, BLKADDR_NIX0, AF_BAR2_ALIASX(0, NIX_GINT_INT_W1S), op); + usleep_range(5000, 6000); +} + static int rvu_process_mbox_msg(struct otx2_mbox *mbox, int devid, struct mbox_msghdr *req) { @@ -2286,7 +2336,8 @@ static void __rvu_mbox_handler(struct rvu_work *mwork, int type, bool poll) offset = mbox->rx_start + ALIGN(sizeof(*req_hdr), MBOX_MSG_ALIGN); - if (req_hdr->sig && !(is_rvu_otx2(rvu) || is_cn20k(rvu->pdev))) { + if (req_hdr->sig && rvu->altaf_ready && + !(is_rvu_otx2(rvu) || is_cn20k(rvu->pdev))) { req_hdr->opt_msg = mw->mbox_wrk[devid].num_msgs; rvu_write64(rvu, BLKADDR_NIX0, RVU_AF_BAR2_SEL, RVU_AF_BAR2_PFID); @@ -2795,6 +2846,16 @@ static void rvu_blklf_teardown(struct rvu *rvu, u16 pcifunc, u8 blkaddr) block = &rvu->hw->block[blkaddr]; num_lfs = rvu_get_rsrc_mapcount(rvu_get_pfvf(rvu, pcifunc), block->addr); + + if (block->addr == BLKADDR_TIM && rvu->altaf_ready) { + rvu_notify_altaf(rvu, pcifunc, ALTAF_FLR); + return; + } + + if ((block->addr == BLKADDR_SSO || block->addr == BLKADDR_SSOW) && + rvu->altaf_ready) + return; + if (!num_lfs) return; for (slot = 0; slot < num_lfs; slot++) { @@ -3078,12 +3139,12 @@ static int rvu_afvf_msix_vectors_num_ok(struct rvu *rvu) static int rvu_register_interrupts(struct rvu *rvu) { - int ret, offset, pf_vec_start; + int i, ret, offset, pf_vec_start; rvu->num_vec = pci_msix_vec_count(rvu->pdev); - rvu->irq_name = devm_kmalloc_array(rvu->dev, rvu->num_vec, - NAME_SIZE, GFP_KERNEL); + rvu->irq_name = devm_kcalloc(rvu->dev, rvu->num_vec, + NAME_SIZE, GFP_KERNEL); if (!rvu->irq_name) return -ENOMEM; @@ -3269,6 +3330,13 @@ static int rvu_register_interrupts(struct rvu *rvu) if (ret) goto fail; + for (i = 0; i < rvu->num_vec; i++) { + if (strstr(&rvu->irq_name[i * NAME_SIZE], "Mbox") || + strstr(&rvu->irq_name[i * NAME_SIZE], "FLR")) + irq_set_affinity(pci_irq_vector(rvu->pdev, i), + cpumask_of(0)); + } + return 0; fail: @@ -3297,8 +3365,8 @@ static int rvu_flr_init(struct rvu *rvu) cfg | BIT_ULL(22)); } - rvu->flr_wq = alloc_ordered_workqueue("rvu_afpf_flr", - WQ_HIGHPRI | WQ_MEM_RECLAIM); + rvu->flr_wq = alloc_workqueue("rvu_afpf_flr", + WQ_HIGHPRI | WQ_MEM_RECLAIM, 0); if (!rvu->flr_wq) return -ENOMEM; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index a53bb5c924ef..f811d6b5c545 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -197,7 +197,7 @@ struct npc_key_field { /* Masks where all set bits indicate position * of a field in the key */ - u64 kw_mask[NPC_MAX_KWS_IN_KEY]; + u64 kw_mask[NPC_CN20K_MAX_KWS_IN_KEY]; /* Number of words in the key a field spans. If a field is * of 16 bytes and key offset is 4 then the field will use * 4 bytes in KW0, 8 bytes in KW1 and 4 bytes in KW2 and @@ -1191,4 +1191,5 @@ int rvu_rep_pf_init(struct rvu *rvu); int rvu_rep_install_mcam_rules(struct rvu *rvu); void rvu_rep_update_rules(struct rvu *rvu, u16 pcifunc, bool ena); int rvu_rep_notify_pfvf_state(struct rvu *rvu, u16 pcifunc, bool enable); +int npc_mcam_verify_entry(struct npc_mcam *mcam, u16 pcifunc, int entry); #endif /* RVU_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 425d3a43c0b8..620724dad093 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -21,6 +21,7 @@ #include "rvu_npc_hash.h" #include "mcs.h" +#include "cn20k/reg.h" #include "cn20k/debugfs.h" #define DEBUGFS_DIR_NAME "octeontx2" @@ -3506,11 +3507,11 @@ static int rvu_dbg_npc_mcam_show_rules(struct seq_file *s, void *unused) struct rvu_npc_mcam_rule *iter; struct rvu *rvu = s->private; struct npc_mcam *mcam; - int pf, vf = -1; + int pf, vf = -1, bank; + u16 target, index; bool enabled; + u64 hits, off; int blkaddr; - u16 target; - u64 hits; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) @@ -3554,6 +3555,15 @@ static int rvu_dbg_npc_mcam_show_rules(struct seq_file *s, void *unused) enabled = is_mcam_entry_enabled(rvu, mcam, blkaddr, iter->entry); seq_printf(s, "\tenabled: %s\n", enabled ? "yes" : "no"); + if (is_cn20k(rvu->pdev)) { + seq_printf(s, "\tpriority: %u\n", iter->hw_prio); + index = iter->entry & (mcam->banksize - 1); + bank = npc_get_bank(mcam, iter->entry); + off = NPC_AF_CN20K_MCAMEX_BANKX_STAT_EXT(index, bank); + hits = rvu_read64(rvu, blkaddr, off); + seq_printf(s, "\thits: %lld\n", hits); + continue; + } if (!iter->has_cntr) continue; @@ -3698,9 +3708,9 @@ static int rvu_dbg_npc_exact_drop_cnt(struct seq_file *s, void *unused) struct npc_exact_table *table; struct rvu *rvu = s->private; struct npc_key_field *field; + u64 cfg, cam1, off; u16 chan, pcifunc; int blkaddr, i; - u64 cfg, cam1; char *str; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); @@ -3721,11 +3731,17 @@ static int rvu_dbg_npc_exact_drop_cnt(struct seq_file *s, void *unused) chan = field->kw_mask[0] & cam1; str = (cfg & 1) ? "enabled" : "disabled"; + if (is_cn20k(rvu->pdev)) { + off = NPC_AF_CN20K_MCAMEX_BANKX_STAT_EXT(i, 0); + seq_printf(s, "0x%x\t%d\t\t%llu\t0x%x\t%s\n", pcifunc, + i, rvu_read64(rvu, blkaddr, off), chan, str); + } else { + off = NPC_AF_MATCH_STATX(table->counter_idx[i]); + seq_printf(s, "0x%x\t%d\t\t%llu\t0x%x\t%s\n", pcifunc, + i, rvu_read64(rvu, blkaddr, off), + chan, str); + } - seq_printf(s, "0x%x\t%d\t\t%llu\t0x%x\t%s\n", pcifunc, i, - rvu_read64(rvu, blkaddr, - NPC_AF_MATCH_STATX(table->counter_idx[i])), - chan, str); } return 0; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 5208cd084055..a7e84c862475 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -2397,8 +2397,8 @@ void rvu_npc_get_mcam_counter_alloc_info(struct rvu *rvu, u16 pcifunc, } } -static int npc_mcam_verify_entry(struct npc_mcam *mcam, - u16 pcifunc, int entry) +int npc_mcam_verify_entry(struct npc_mcam *mcam, + u16 pcifunc, int entry) { /* verify AF installed entries */ if (is_pffunc_af(pcifunc)) @@ -2943,6 +2943,10 @@ int npc_config_cntr_default_entries(struct rvu *rvu, bool enable) struct rvu_npc_mcam_rule *rule; int blkaddr; + /* Counter is set for each rule by default */ + if (is_cn20k(rvu->pdev)) + return -EINVAL; + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) return -EINVAL; @@ -3123,10 +3127,13 @@ int rvu_mbox_handler_npc_mcam_write_entry(struct rvu *rvu, if (rc) goto exit; - if (req->set_cntr && - npc_mcam_verify_counter(mcam, pcifunc, req->cntr)) { - rc = NPC_MCAM_INVALID_REQ; - goto exit; + if (!is_cn20k(rvu->pdev)) { + /* Verify counter in SoCs other than cn20k */ + if (req->set_cntr && + npc_mcam_verify_counter(mcam, pcifunc, req->cntr)) { + rc = NPC_MCAM_INVALID_REQ; + goto exit; + } } if (!is_npc_interface_valid(rvu, req->intf)) { @@ -3338,6 +3345,10 @@ int rvu_mbox_handler_npc_mcam_alloc_counter(struct rvu *rvu, struct npc_mcam *mcam = &rvu->hw->mcam; int err; + /* Counter is not supported for CN20K */ + if (is_cn20k(rvu->pdev)) + return NPC_MCAM_INVALID_REQ; + mutex_lock(&mcam->lock); err = __npc_mcam_alloc_counter(rvu, req, rsp); @@ -3392,6 +3403,10 @@ int rvu_mbox_handler_npc_mcam_free_counter(struct rvu *rvu, struct npc_mcam *mcam = &rvu->hw->mcam; int err; + /* Counter is not supported for CN20K */ + if (is_cn20k(rvu->pdev)) + return NPC_MCAM_INVALID_REQ; + mutex_lock(&mcam->lock); err = __npc_mcam_free_counter(rvu, req, rsp); @@ -3450,6 +3465,10 @@ int rvu_mbox_handler_npc_mcam_unmap_counter(struct rvu *rvu, u16 index, entry = 0; int blkaddr, rc; + /* Counter is not supported for CN20K */ + if (is_cn20k(rvu->pdev)) + return NPC_MCAM_INVALID_REQ; + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) return NPC_MCAM_INVALID_REQ; @@ -3494,12 +3513,23 @@ int rvu_mbox_handler_npc_mcam_clear_counter(struct rvu *rvu, struct npc_mcam_oper_counter_req *req, struct msg_rsp *rsp) { struct npc_mcam *mcam = &rvu->hw->mcam; - int blkaddr, err; + int blkaddr, err, index, bank; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) return NPC_MCAM_INVALID_REQ; + /* For cn20k, npc mcam index is passed as cntr, as each + * mcam entry has corresponding counter. + */ + if (is_cn20k(rvu->pdev)) { + index = req->cntr & (mcam->banksize - 1); + bank = npc_get_bank(mcam, req->cntr); + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_STAT_EXT(index, bank), 0); + return 0; + } + mutex_lock(&mcam->lock); err = npc_mcam_verify_counter(mcam, req->hdr.pcifunc, req->cntr); mutex_unlock(&mcam->lock); @@ -3516,12 +3546,26 @@ int rvu_mbox_handler_npc_mcam_counter_stats(struct rvu *rvu, struct npc_mcam_oper_counter_rsp *rsp) { struct npc_mcam *mcam = &rvu->hw->mcam; - int blkaddr, err; + int blkaddr, err, index, bank; + u64 regval; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) return NPC_MCAM_INVALID_REQ; + /* In CN20K, mcam index is passed cntr. Each cn20k mcam entry + * has its own counter. No need to verify the counter index. + */ + if (is_cn20k(rvu->pdev)) { + index = req->cntr & (mcam->banksize - 1); + bank = npc_get_bank(mcam, req->cntr); + regval = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_STAT_EXT(index, + bank)); + rsp->stat = regval; + return 0; + } + mutex_lock(&mcam->lock); err = npc_mcam_verify_counter(mcam, req->hdr.pcifunc, req->cntr); mutex_unlock(&mcam->lock); @@ -3816,13 +3860,24 @@ int rvu_mbox_handler_npc_mcam_entry_stats(struct rvu *rvu, if (blkaddr < 0) return NPC_MCAM_INVALID_REQ; - mutex_lock(&mcam->lock); - index = req->entry & (mcam->banksize - 1); bank = npc_get_bank(mcam, req->entry); + mutex_lock(&mcam->lock); + + if (is_cn20k(rvu->pdev)) { + regval = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_STAT_EXT(index, + bank)); + rsp->stat_ena = 1; + rsp->stat = regval; + mutex_unlock(&mcam->lock); + return 0; + } + /* read MCAM entry STAT_ACT register */ - regval = rvu_read64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_STAT_ACT(index, bank)); + regval = rvu_read64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_STAT_ACT(index, + bank)); if (!(regval & rvu->hw->npc_stat_ena)) { rsp->stat_ena = 0; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index 117038c793ea..802b68cf228c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -227,10 +227,11 @@ static bool npc_check_overlap(struct rvu *rvu, int blkaddr, input = &mcam->tx_key_fields[type]; } + kws = NPC_MAX_KWS_IN_KEY; + if (is_cn20k(rvu->pdev)) goto skip_cn10k_config; - kws = NPC_MAX_KWS_IN_KEY - 1; for (lid = start_lid; lid < NPC_MAX_LID; lid++) { for (lt = 0; lt < NPC_MAX_LT; lt++) { for (ld = 0; ld < NPC_MAX_LD; ld++) { @@ -255,8 +256,7 @@ static bool npc_check_overlap(struct rvu *rvu, int blkaddr, /* check any input field bits falls in any * other field bits. */ - if (npc_check_overlap_fields(dummy, input, - kws)) + if (npc_check_overlap_fields(dummy, input, kws)) return true; } } @@ -289,7 +289,7 @@ skip_cn10k_config: * field bits */ if (npc_check_overlap_fields(dummy, input, - NPC_MAX_KWS_IN_KEY)) + NPC_CN20K_MAX_KWS_IN_KEY)) return true; } } @@ -460,9 +460,9 @@ static void npc_handle_multi_layer_fields(struct rvu *rvu, int blkaddr, u8 intf) u8 start_lid; if (is_cn20k(rvu->pdev)) - max_kw = NPC_MAX_KWS_IN_KEY; + max_kw = NPC_CN20K_MAX_KWS_IN_KEY; else - max_kw = NPC_MAX_KWS_IN_KEY - 1; + max_kw = NPC_MAX_KWS_IN_KEY; key_fields = mcam->rx_key_fields; features = &mcam->rx_features; @@ -906,6 +906,7 @@ void npc_update_entry(struct rvu *rvu, enum key_fields type, struct mcam_entry_mdata *mdata, u64 val_lo, u64 val_hi, u64 mask_lo, u64 mask_hi, u8 intf) { + struct cn20k_mcam_entry cn20k_dummy = { {0} }; struct npc_mcam *mcam = &rvu->hw->mcam; struct mcam_entry dummy = { {0} }; u64 *kw, *kw_mask, *val, *mask; @@ -921,9 +922,15 @@ void npc_update_entry(struct rvu *rvu, enum key_fields type, if (!field->nr_kws) return; - max_kw = NPC_MAX_KWS_IN_KEY; - kw = dummy.kw; - kw_mask = dummy.kw_mask; + if (is_cn20k(rvu->pdev)) { + max_kw = NPC_CN20K_MAX_KWS_IN_KEY; + kw = cn20k_dummy.kw; + kw_mask = cn20k_dummy.kw_mask; + } else { + max_kw = NPC_MAX_KWS_IN_KEY; + kw = dummy.kw; + kw_mask = dummy.kw_mask; + } for (i = 0; i < max_kw; i++) { if (!field->kw_mask[i]) @@ -1247,6 +1254,10 @@ static void rvu_mcam_remove_counter_from_rule(struct rvu *rvu, u16 pcifunc, { struct npc_mcam *mcam = &rvu->hw->mcam; + /* There is no counter allotted for cn20k */ + if (is_cn20k(rvu->pdev)) + return; + mutex_lock(&mcam->lock); __rvu_mcam_remove_counter_from_rule(rvu, pcifunc, rule); @@ -1296,8 +1307,17 @@ static int npc_mcast_update_action_index(struct rvu *rvu, struct npc_install_flo static void npc_populate_mcam_mdata(struct rvu *rvu, struct mcam_entry_mdata *mdata, + struct cn20k_mcam_entry *cn20k_entry, struct mcam_entry *entry) { + if (is_cn20k(rvu->pdev)) { + mdata->kw = cn20k_entry->kw; + mdata->kw_mask = cn20k_entry->kw_mask; + mdata->action = &cn20k_entry->action; + mdata->vtag_action = &cn20k_entry->vtag_action; + mdata->max_kw = NPC_CN20K_MAX_KWS_IN_KEY; + return; + } mdata->kw = entry->kw; mdata->kw_mask = entry->kw_mask; mdata->action = &entry->action; @@ -1417,9 +1437,11 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target, bool pf_set_vfs_mac) { struct rvu_npc_mcam_rule *def_ucast_rule = pfvf->def_ucast_rule; + struct npc_cn20k_mcam_write_entry_req cn20k_wreq = { 0 }; u64 features, installed_features, missing_features = 0; struct npc_mcam_write_entry_req write_req = { 0 }; struct npc_mcam *mcam = &rvu->hw->mcam; + struct cn20k_mcam_entry *cn20k_entry; struct mcam_entry_mdata mdata = { }; struct rvu_npc_mcam_rule dummy = { 0 }; struct rvu_npc_mcam_rule *rule; @@ -1432,11 +1454,12 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target, installed_features = req->features; features = req->features; - entry = &write_req.entry_data; entry_index = req->entry; - npc_populate_mcam_mdata(rvu, &mdata, - &write_req.entry_data); + cn20k_entry = &cn20k_wreq.entry_data; + entry = &write_req.entry_data; + + npc_populate_mcam_mdata(rvu, &mdata, cn20k_entry, entry); npc_update_flow(rvu, &mdata, features, &req->packet, &req->mask, &dummy, req->intf, blkaddr); @@ -1484,51 +1507,90 @@ find_rule: new = true; } - /* allocate new counter if rule has no counter */ - if (!req->default_rule && req->set_cntr && !rule->has_cntr) - rvu_mcam_add_counter_to_rule(rvu, owner, rule, rsp); + if (!is_cn20k(rvu->pdev)) { + write_req.hdr.pcifunc = owner; - /* if user wants to delete an existing counter for a rule then - * free the counter - */ - if (!req->set_cntr && rule->has_cntr) - rvu_mcam_remove_counter_from_rule(rvu, owner, rule); + /* allocate new counter if rule has no counter */ + if (!req->default_rule && req->set_cntr && !rule->has_cntr) + rvu_mcam_add_counter_to_rule(rvu, owner, rule, rsp); - write_req.hdr.pcifunc = owner; + /* if user wants to delete an existing counter for a rule then + * free the counter + */ + if (!req->set_cntr && rule->has_cntr) + rvu_mcam_remove_counter_from_rule(rvu, owner, rule); - /* AF owns the default rules so change the owner just to relax - * the checks in rvu_mbox_handler_npc_mcam_write_entry - */ - if (req->default_rule) - write_req.hdr.pcifunc = 0; + /* AF owns the default rules so change the owner just to relax + * the checks in rvu_mbox_handler_npc_mcam_write_entry + */ + if (req->default_rule) + write_req.hdr.pcifunc = 0; - write_req.entry = entry_index; - write_req.intf = req->intf; - write_req.enable_entry = (u8)enable; - /* if counter is available then clear and use it */ - if (req->set_cntr && rule->has_cntr) { - rvu_write64(rvu, blkaddr, NPC_AF_MATCH_STATX(rule->cntr), req->cntr_val); - write_req.set_cntr = 1; - write_req.cntr = rule->cntr; + write_req.entry = entry_index; + write_req.intf = req->intf; + write_req.enable_entry = (u8)enable; + /* if counter is available then clear and use it */ + if (req->set_cntr && rule->has_cntr) { + rvu_write64(rvu, blkaddr, + NPC_AF_MATCH_STATX(rule->cntr), + req->cntr_val); + write_req.set_cntr = 1; + write_req.cntr = rule->cntr; + } + goto update_rule; } + cn20k_wreq.hdr.pcifunc = owner; + + if (req->default_rule) + cn20k_wreq.hdr.pcifunc = 0; + + cn20k_wreq.entry = entry_index; + cn20k_wreq.intf = req->intf; + cn20k_wreq.enable_entry = (u8)enable; + cn20k_wreq.hw_prio = req->hw_prio; + +update_rule: + /* update rule */ memcpy(&rule->packet, &dummy.packet, sizeof(rule->packet)); memcpy(&rule->mask, &dummy.mask, sizeof(rule->mask)); rule->entry = entry_index; - memcpy(&rule->rx_action, &entry->action, sizeof(struct nix_rx_action)); - if (is_npc_intf_tx(req->intf)) - memcpy(&rule->tx_action, &entry->action, - sizeof(struct nix_tx_action)); - rule->vtag_action = entry->vtag_action; + if (is_cn20k(rvu->pdev)) { + memcpy(&rule->rx_action, &cn20k_entry->action, + sizeof(struct nix_rx_action)); + if (is_npc_intf_tx(req->intf)) + memcpy(&rule->tx_action, &cn20k_entry->action, + sizeof(struct nix_tx_action)); + rule->vtag_action = cn20k_entry->vtag_action; + } else { + memcpy(&rule->rx_action, &entry->action, + sizeof(struct nix_rx_action)); + if (is_npc_intf_tx(req->intf)) + memcpy(&rule->tx_action, &entry->action, + sizeof(struct nix_tx_action)); + rule->vtag_action = entry->vtag_action; + } + rule->features = installed_features; rule->default_rule = req->default_rule; rule->owner = owner; rule->enable = enable; - rule->chan_mask = write_req.entry_data.kw_mask[0] & NPC_KEX_CHAN_MASK; - rule->chan = write_req.entry_data.kw[0] & NPC_KEX_CHAN_MASK; + + if (is_cn20k(rvu->pdev)) { + rule->chan_mask = cn20k_wreq.entry_data.kw_mask[0] & + NPC_KEX_CHAN_MASK; + rule->chan = cn20k_wreq.entry_data.kw[0] & + NPC_KEX_CHAN_MASK; + } else { + rule->chan_mask = write_req.entry_data.kw_mask[0] & + NPC_KEX_CHAN_MASK; + rule->chan = write_req.entry_data.kw[0] & NPC_KEX_CHAN_MASK; + } + rule->chan &= rule->chan_mask; rule->lxmb = dummy.lxmb; + rule->hw_prio = req->hw_prio; if (is_npc_intf_tx(req->intf)) rule->intf = pfvf->nix_tx_intf; else @@ -1540,8 +1602,14 @@ find_rule: pfvf->def_ucast_rule = rule; /* write to mcam entry registers */ - err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req, - &write_rsp); + if (is_cn20k(rvu->pdev)) + err = rvu_mbox_handler_npc_cn20k_mcam_write_entry(rvu, + &cn20k_wreq, + &write_rsp); + else + err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req, + &write_rsp); + if (err) { rvu_mcam_remove_counter_from_rule(rvu, owner, rule); if (new) { @@ -1782,23 +1850,25 @@ static int npc_update_dmac_value(struct rvu *rvu, int npcblkaddr, struct rvu_npc_mcam_rule *rule, struct rvu_pfvf *pfvf) { + struct npc_cn20k_mcam_write_entry_req cn20k_wreq = { 0 }; struct npc_mcam_write_entry_req write_req = { 0 }; - struct npc_mcam *mcam = &rvu->hw->mcam; struct mcam_entry_mdata mdata = { }; + struct npc_mcam *mcam = &rvu->hw->mcam; + struct cn20k_mcam_entry *cn20k_entry; struct mcam_entry *entry; u8 intf, enable, hw_prio; struct msg_rsp rsp; int err; + cn20k_entry = &cn20k_wreq.entry_data; entry = &write_req.entry_data; - - npc_populate_mcam_mdata(rvu, &mdata, entry); + npc_populate_mcam_mdata(rvu, &mdata, cn20k_entry, entry); ether_addr_copy(rule->packet.dmac, pfvf->mac_addr); if (is_cn20k(rvu->pdev)) npc_cn20k_read_mcam_entry(rvu, npcblkaddr, rule->entry, - entry, &intf, + cn20k_entry, &intf, &enable, &hw_prio); else npc_read_mcam_entry(rvu, mcam, npcblkaddr, rule->entry, @@ -1808,12 +1878,21 @@ static int npc_update_dmac_value(struct rvu *rvu, int npcblkaddr, ether_addr_to_u64(pfvf->mac_addr), 0, 0xffffffffffffull, 0, intf); - write_req.hdr.pcifunc = rule->owner; - write_req.entry = rule->entry; - write_req.intf = pfvf->nix_rx_intf; - mutex_unlock(&mcam->lock); - err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req, &rsp); + if (is_cn20k(rvu->pdev)) { + cn20k_wreq.hdr.pcifunc = rule->owner; + cn20k_wreq.entry = rule->entry; + cn20k_wreq.intf = pfvf->nix_rx_intf; + err = rvu_mbox_handler_npc_cn20k_mcam_write_entry(rvu, + &cn20k_wreq, + &rsp); + } else { + write_req.hdr.pcifunc = rule->owner; + write_req.entry = rule->entry; + write_req.intf = pfvf->nix_rx_intf; + err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req, + &rsp); + } mutex_lock(&mcam->lock); return err; @@ -1901,6 +1980,7 @@ int npc_install_mcam_drop_rule(struct rvu *rvu, int mcam_idx, u16 *counter_idx, u64 chan_val, u64 chan_mask, u64 exact_val, u64 exact_mask, u64 bcast_mcast_val, u64 bcast_mcast_mask) { + struct npc_cn20k_mcam_write_entry_req cn20k_req = { 0 }; struct npc_mcam_alloc_counter_req cntr_req = { 0 }; struct npc_mcam_alloc_counter_rsp cntr_rsp = { 0 }; struct npc_mcam_write_entry_req req = { 0 }; @@ -1949,19 +2029,24 @@ int npc_install_mcam_drop_rule(struct rvu *rvu, int mcam_idx, u16 *counter_idx, /* Reserve slot 0 */ npc_mcam_rsrcs_reserve(rvu, blkaddr, mcam_idx); - /* Allocate counter for this single drop on non hit rule */ - cntr_req.hdr.pcifunc = 0; /* AF request */ - cntr_req.contig = true; - cntr_req.count = 1; - err = rvu_mbox_handler_npc_mcam_alloc_counter(rvu, &cntr_req, &cntr_rsp); - if (err) { - dev_err(rvu->dev, "%s: Err to allocate cntr for drop rule (err=%d)\n", - __func__, err); - return -EFAULT; + if (!is_cn20k(rvu->pdev)) { + /* Allocate counter for this single drop on non hit rule */ + cntr_req.hdr.pcifunc = 0; /* AF request */ + cntr_req.contig = true; + cntr_req.count = 1; + err = rvu_mbox_handler_npc_mcam_alloc_counter(rvu, &cntr_req, + &cntr_rsp); + if (err) { + dev_err(rvu->dev, + "%s: Err to allocate cntr for drop rule (err=%d)\n", + __func__, err); + return -EFAULT; + } + *counter_idx = cntr_rsp.cntr; } - *counter_idx = cntr_rsp.cntr; npc_populate_mcam_mdata(rvu, &mdata, + &cn20k_req.entry_data, &req.entry_data); /* Fill in fields for this mcam entry */ @@ -1972,6 +2057,23 @@ int npc_install_mcam_drop_rule(struct rvu *rvu, int mcam_idx, u16 *counter_idx, npc_update_entry(rvu, NPC_LXMB, &mdata, bcast_mcast_val, 0, bcast_mcast_mask, 0, NIX_INTF_RX); + if (is_cn20k(rvu->pdev)) { + cn20k_req.intf = NIX_INTF_RX; + cn20k_req.entry = mcam_idx; + + err = rvu_mbox_handler_npc_cn20k_mcam_write_entry(rvu, + &cn20k_req, + &rsp); + if (err) { + dev_err(rvu->dev, + "%s: Installation of single drop on non hit rule at %d failed\n", + __func__, mcam_idx); + return err; + } + + goto enable_entry; + } + req.intf = NIX_INTF_RX; req.set_cntr = true; req.cntr = cntr_rsp.cntr; @@ -1979,14 +2081,17 @@ int npc_install_mcam_drop_rule(struct rvu *rvu, int mcam_idx, u16 *counter_idx, err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &req, &rsp); if (err) { - dev_err(rvu->dev, "%s: Installation of single drop on non hit rule at %d failed\n", + dev_err(rvu->dev, + "%s: Installation of single drop on non hit rule at %d failed\n", __func__, mcam_idx); return err; } - dev_err(rvu->dev, "%s: Installed single drop on non hit rule at %d, cntr=%d\n", + dev_err(rvu->dev, + "%s: Installed single drop on non hit rule at %d, cntr=%d\n", __func__, mcam_idx, req.cntr); +enable_entry: /* disable entry at Bank 0, index 0 */ npc_enable_mcam_entry(rvu, mcam, blkaddr, mcam_idx, false); From 645c6e3c19999007446790b4b823452a7b3a3981 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Tue, 24 Feb 2026 13:30:05 +0530 Subject: [PATCH 0111/1561] octeontx2-af: npc: cn20k: virtual index support This patch adds support for virtual MCAM index allocation and improves CN20K MCAM defragmentation handling. A new field is introduced in the non-ref, non-contiguous MCAM allocation mailbox request to indicate that virtual indexes should be returned instead of physical ones. Virtual indexes allow the hardware to move mapped MCAM entries internally, enabling defragmentation and preventing scattered allocations across subbanks. The patch also enhances defragmentation by treating non-ref, non-contiguous allocations as ideal candidates for packing sparsely used regions, which can free up subbanks for potential x2 or x4 configuration. All such allocations are tracked and always returned as virtual indexes so they remain stable even when entries are moved during defrag. During defragmentation, MCAM entries may shift between subbanks, but their virtual indexes remain unchanged. Additionally, this update fixes an issue where entry statistics were not being restored correctly after defragmentation. Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260224080009.4147301-10-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- .../ethernet/marvell/octeontx2/af/cn20k/npc.c | 859 +++++++++++++++++- .../ethernet/marvell/octeontx2/af/cn20k/npc.h | 30 +- .../net/ethernet/marvell/octeontx2/af/mbox.h | 5 + .../marvell/octeontx2/af/rvu_devlink.c | 95 +- .../ethernet/marvell/octeontx2/af/rvu_npc.c | 24 +- .../ethernet/marvell/octeontx2/af/rvu_npc.h | 2 + .../marvell/octeontx2/af/rvu_npc_fs.c | 6 + 7 files changed, 992 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c index 7c3fdccbce2a..acd065f44efa 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c @@ -215,6 +215,235 @@ struct npc_mcam_kex_extr *npc_mkex_extr_default_get(void) return &npc_mkex_extr_default; } +static u16 npc_idx2vidx(u16 idx) +{ + unsigned long index; + void *map; + u16 vidx; + int val; + + vidx = idx; + index = idx; + + map = xa_load(&npc_priv.xa_idx2vidx_map, index); + if (!map) + goto done; + + val = xa_to_value(map); + if (val == -1) + goto done; + + vidx = val; + +done: + return vidx; +} + +static bool npc_is_vidx(u16 vidx) +{ + return vidx >= npc_priv.bank_depth * 2; +} + +static u16 npc_vidx2idx(u16 vidx) +{ + unsigned long sentinel = (unsigned long)-1; + unsigned long index; + void *map; + int val; + u16 idx; + + idx = vidx; + index = vidx; + + map = xa_load(&npc_priv.xa_vidx2idx_map, index); + if (!map) + goto done; + + val = xa_to_value(map); + if (val == sentinel) + goto done; + + idx = val; + +done: + return idx; +} + +u16 npc_cn20k_vidx2idx(u16 idx) +{ + if (!npc_priv.init_done) + return idx; + + if (!npc_is_vidx(idx)) + return idx; + + return npc_vidx2idx(idx); +} + +u16 npc_cn20k_idx2vidx(u16 idx) +{ + if (!npc_priv.init_done) + return idx; + + if (npc_is_vidx(idx)) + return idx; + + return npc_idx2vidx(idx); +} + +static int npc_vidx_maps_del_entry(struct rvu *rvu, u16 vidx, u16 *old_midx) +{ + u16 mcam_idx; + void *map; + + if (!npc_is_vidx(vidx)) { + dev_err(rvu->dev, + "%s: vidx(%u) does not map to proper mcam idx\n", + __func__, vidx); + return -ESRCH; + } + + mcam_idx = npc_vidx2idx(vidx); + + map = xa_erase(&npc_priv.xa_vidx2idx_map, vidx); + if (!map) { + dev_err(rvu->dev, + "%s: vidx(%u) does not map to proper mcam idx\n", + __func__, vidx); + return -ESRCH; + } + + map = xa_erase(&npc_priv.xa_idx2vidx_map, mcam_idx); + if (!map) { + dev_err(rvu->dev, + "%s: vidx(%u) is not valid\n", + __func__, vidx); + return -ESRCH; + } + + if (old_midx) + *old_midx = mcam_idx; + + return 0; +} + +static int npc_vidx_maps_modify(struct rvu *rvu, u16 vidx, u16 new_midx) +{ + u16 old_midx; + void *map; + int rc; + + if (!npc_is_vidx(vidx)) { + dev_err(rvu->dev, + "%s: vidx(%u) does not map to proper mcam idx\n", + __func__, vidx); + return -ESRCH; + } + + map = xa_erase(&npc_priv.xa_vidx2idx_map, vidx); + if (!map) { + dev_err(rvu->dev, + "%s: vidx(%u) could not be deleted from vidx2idx map\n", + __func__, vidx); + return -ESRCH; + } + + old_midx = xa_to_value(map); + + rc = xa_insert(&npc_priv.xa_vidx2idx_map, vidx, + xa_mk_value(new_midx), GFP_KERNEL); + if (rc) { + dev_err(rvu->dev, + "%s: vidx(%u) cannot be added to vidx2idx map\n", + __func__, vidx); + goto fail1; + } + + map = xa_erase(&npc_priv.xa_idx2vidx_map, old_midx); + if (!map) { + dev_err(rvu->dev, + "%s: old_midx(%u, vidx(%u)) cannot be added to idx2vidx map\n", + __func__, old_midx, vidx); + rc = -ESRCH; + goto fail2; + } + + rc = xa_insert(&npc_priv.xa_idx2vidx_map, new_midx, + xa_mk_value(vidx), GFP_KERNEL); + if (rc) { + dev_err(rvu->dev, + "%s: new_midx(%u, vidx(%u)) cannot be added to idx2vidx map\n", + __func__, new_midx, vidx); + goto fail3; + } + + return 0; + +fail3: + /* Restore vidx at old_midx location */ + if (xa_insert(&npc_priv.xa_idx2vidx_map, old_midx, + xa_mk_value(vidx), GFP_KERNEL)) + dev_err(rvu->dev, + "%s: Error to roll back idx2vidx old_midx=%u vidx=%u\n", + __func__, old_midx, vidx); +fail2: + /* Erase new_midx inserted at vidx */ + if (!xa_erase(&npc_priv.xa_vidx2idx_map, vidx)) + dev_err(rvu->dev, + "%s: Failed to roll back vidx2idx vidx=%u\n", + __func__, vidx); + +fail1: + /* Restore old_midx at vidx location */ + if (xa_insert(&npc_priv.xa_vidx2idx_map, vidx, + xa_mk_value(old_midx), GFP_KERNEL)) + dev_err(rvu->dev, + "%s: Failed to roll back vidx2idx to old_midx=%u, vidx=%u\n", + __func__, old_midx, vidx); + + return rc; +} + +static int npc_vidx_maps_add_entry(struct rvu *rvu, u16 mcam_idx, int pcifunc, + u16 *vidx) +{ + int rc, max, min; + u32 id; + + /* Virtual index start from maximum mcam index + 1 */ + max = npc_priv.bank_depth * 2 * 2 - 1; + min = npc_priv.bank_depth * 2; + + rc = xa_alloc(&npc_priv.xa_vidx2idx_map, &id, + xa_mk_value(mcam_idx), + XA_LIMIT(min, max), GFP_KERNEL); + if (rc) { + dev_err(rvu->dev, + "%s: Failed to add to vidx2idx map (%u)\n", + __func__, mcam_idx); + goto fail1; + } + + rc = xa_insert(&npc_priv.xa_idx2vidx_map, mcam_idx, + xa_mk_value(id), GFP_KERNEL); + if (rc) { + dev_err(rvu->dev, + "%s: Failed to add to idx2vidx map (%u)\n", + __func__, mcam_idx); + goto fail2; + } + + if (vidx) + *vidx = id; + + return 0; + +fail2: + xa_erase(&npc_priv.xa_vidx2idx_map, id); +fail1: + return rc; +} + static void npc_config_kpmcam(struct rvu *rvu, int blkaddr, const struct npc_kpu_profile_cam *kpucam, int kpm, int entry) @@ -1049,6 +1278,8 @@ int rvu_mbox_handler_npc_cn20k_mcam_write_entry(struct rvu *rvu, int blkaddr, rc; u8 nix_intf; + req->entry = npc_cn20k_vidx2idx(req->entry); + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) return NPC_MCAM_INVALID_REQ; @@ -1090,6 +1321,8 @@ int rvu_mbox_handler_npc_cn20k_mcam_read_entry(struct rvu *rvu, u16 pcifunc = req->hdr.pcifunc; int blkaddr, rc; + req->entry = npc_cn20k_vidx2idx(req->entry); + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) return NPC_MCAM_INVALID_REQ; @@ -1130,6 +1363,7 @@ int rvu_mbox_handler_npc_cn20k_mcam_alloc_and_write_entry(struct rvu *rvu, entry_req.ref_prio = req->ref_prio; entry_req.ref_entry = req->ref_entry; entry_req.count = 1; + entry_req.virt = req->virt; rc = rvu_mbox_handler_npc_mcam_alloc_entry(rvu, &entry_req, &entry_rsp); @@ -1140,7 +1374,8 @@ int rvu_mbox_handler_npc_cn20k_mcam_alloc_and_write_entry(struct rvu *rvu, return NPC_MCAM_ALLOC_FAILED; /* entry_req.count is 1, so single entry is allocated */ - entry = entry_rsp.entry; + entry = npc_cn20k_vidx2idx(entry_rsp.entry); + mutex_lock(&mcam->lock); if (is_npc_intf_tx(req->intf)) @@ -1154,7 +1389,7 @@ int rvu_mbox_handler_npc_cn20k_mcam_alloc_and_write_entry(struct rvu *rvu, mutex_unlock(&mcam->lock); - rsp->entry = entry; + rsp->entry = entry_rsp.entry; return 0; } @@ -1386,8 +1621,8 @@ int npc_mcam_idx_2_key_type(struct rvu *rvu, u16 mcam_idx, u8 *key_type) /* mcam_idx should be less than (2 * bank depth) */ if (mcam_idx >= npc_priv.bank_depth * 2) { - dev_err(rvu->dev, "%s:%d bad params\n", - __func__, __LINE__); + dev_err(rvu->dev, "%s: bad params\n", + __func__); return -EINVAL; } @@ -1401,8 +1636,8 @@ int npc_mcam_idx_2_key_type(struct rvu *rvu, u16 mcam_idx, u8 *key_type) * number of subbanks available */ if (sb_id >= npc_priv.num_subbanks) { - dev_err(rvu->dev, "%s:%d invalid subbank %d\n", - __func__, __LINE__, sb_id); + dev_err(rvu->dev, "%s: invalid subbank %d\n", + __func__, sb_id); return -EINVAL; } @@ -2259,14 +2494,15 @@ static int npc_idx_free(struct rvu *rvu, u16 *mcam_idx, int count, bool maps_del) { struct npc_subbank *sb; - int idx, i; + u16 vidx, midx; + int sb_off, i; bool ret; int rc; /* Check if we can dealloc indexes properly ? */ for (i = 0; i < count; i++) { - rc = npc_mcam_idx_2_subbank_idx(rvu, mcam_idx[i], - &sb, &idx); + rc = npc_mcam_idx_2_subbank_idx(rvu, npc_vidx2idx(mcam_idx[i]), + &sb, &sb_off); if (rc) { dev_err(rvu->dev, "Failed to free mcam idx=%u\n", mcam_idx[i]); @@ -2275,19 +2511,50 @@ static int npc_idx_free(struct rvu *rvu, u16 *mcam_idx, int count, } for (i = 0; i < count; i++) { - rc = npc_mcam_idx_2_subbank_idx(rvu, mcam_idx[i], - &sb, &idx); - if (rc) - return rc; + if (npc_is_vidx(mcam_idx[i])) { + vidx = mcam_idx[i]; + midx = npc_vidx2idx(vidx); + } else { + midx = mcam_idx[i]; + vidx = npc_idx2vidx(midx); + } - ret = npc_subbank_free(rvu, sb, idx); - if (ret) + if (midx >= npc_priv.bank_depth * npc_priv.num_banks) { + dev_err(rvu->dev, + "%s: Invalid mcam_idx=%u cannot be deleted\n", + __func__, mcam_idx[i]); return -EINVAL; + } + + rc = npc_mcam_idx_2_subbank_idx(rvu, midx, + &sb, &sb_off); + if (rc) { + dev_err(rvu->dev, + "%s: Failed to find subbank info for vidx=%u\n", + __func__, vidx); + return rc; + } + + ret = npc_subbank_free(rvu, sb, sb_off); + if (ret) { + dev_err(rvu->dev, + "%s: Failed to find subbank info for vidx=%u\n", + __func__, vidx); + return -EINVAL; + } if (!maps_del) continue; - rc = npc_del_from_pf_maps(rvu, mcam_idx[i]); + rc = npc_del_from_pf_maps(rvu, midx); + if (rc) + return rc; + + /* If there is no vidx mapping; continue */ + if (vidx == midx) + continue; + + rc = npc_vidx_maps_del_entry(rvu, vidx, NULL); if (rc) return rc; } @@ -2803,10 +3070,12 @@ int npc_cn20k_idx_free(struct rvu *rvu, u16 *mcam_idx, int count) int npc_cn20k_ref_idx_alloc(struct rvu *rvu, int pcifunc, int key_type, int prio, u16 *mcam_idx, int ref, int limit, - bool contig, int count) + bool contig, int count, bool virt) { + bool defrag_candidate = false; int i, eidx, rc, bd; bool ref_valid; + u16 vidx; bd = npc_priv.bank_depth; @@ -2824,6 +3093,7 @@ int npc_cn20k_ref_idx_alloc(struct rvu *rvu, int pcifunc, int key_type, } ref_valid = !!(limit || ref); + defrag_candidate = !ref_valid && !contig && virt; if (!ref_valid) { if (contig && count > npc_priv.subbank_depth) goto try_noref_multi_subbank; @@ -2890,15 +3160,35 @@ try_ref_multi_subbank: add2map: for (i = 0; i < count; i++) { rc = npc_add_to_pf_maps(rvu, mcam_idx[i], pcifunc); - if (rc) { - for (int j = 0; j < i; j++) - npc_del_from_pf_maps(rvu, mcam_idx[j]); + if (rc) + goto err; - return rc; + if (!defrag_candidate) + continue; + + rc = npc_vidx_maps_add_entry(rvu, mcam_idx[i], pcifunc, &vidx); + if (rc) { + npc_del_from_pf_maps(rvu, mcam_idx[i]); + goto err; } + + /* Return vidx to caller */ + mcam_idx[i] = vidx; } return 0; +err: + for (int j = 0; j < i; j++) { + npc_del_from_pf_maps(rvu, npc_vidx2idx(mcam_idx[j])); + + if (!defrag_candidate) + continue; + + npc_vidx_maps_del_entry(rvu, mcam_idx[j], NULL); + } + + return rc; + } void npc_cn20k_subbank_calc_free(struct rvu *rvu, int *x2_free, @@ -3070,6 +3360,524 @@ chk_vfs: return cnt; } +struct npc_defrag_node { + u8 idx; + u8 key_type; + bool valid; + bool refs; + u16 free_cnt; + u16 vidx_cnt; + u16 *vidx; + struct list_head list; +}; + +static bool npc_defrag_skip_restricted_sb(int sb_id) +{ + int i; + + if (!restrict_valid) + return false; + + for (i = 0; i < ARRAY_SIZE(npc_subbank_restricted_idxs); i++) + if (sb_id == npc_subbank_restricted_idxs[i]) + return true; + return false; +} + +/* Find subbank with minimum number of virtual indexes */ +static struct npc_defrag_node *npc_subbank_min_vidx(struct list_head *lh) +{ + struct npc_defrag_node *node, *tnode = NULL; + int min = INT_MAX; + + list_for_each_entry(node, lh, list) { + if (!node->valid) + continue; + + /* if subbank has ref allocated mcam indexes, that subbank + * is not a good candidate to move out indexes. + */ + if (node->refs) + continue; + + if (min > node->vidx_cnt) { + min = node->vidx_cnt; + tnode = node; + } + } + + return tnode; +} + +/* Find subbank with maximum number of free spaces */ +static struct npc_defrag_node *npc_subbank_max_free(struct list_head *lh) +{ + struct npc_defrag_node *node, *tnode = NULL; + int max = INT_MIN; + + list_for_each_entry(node, lh, list) { + if (!node->valid) + continue; + + if (max < node->free_cnt) { + max = node->free_cnt; + tnode = node; + } + } + + return tnode; +} + +static int npc_defrag_alloc_free_slots(struct rvu *rvu, + struct npc_defrag_node *f, + int cnt, u16 *save) +{ + int alloc_cnt1, alloc_cnt2; + struct npc_subbank *sb; + int rc, sb_off, i; + bool deleted; + + sb = &npc_priv.sb[f->idx]; + + alloc_cnt1 = 0; + alloc_cnt2 = 0; + + rc = __npc_subbank_alloc(rvu, sb, + NPC_MCAM_KEY_X2, sb->b0b, + sb->b0t, + NPC_MCAM_LOWER_PRIO, + false, cnt, save, cnt, true, + &alloc_cnt1); + if (alloc_cnt1 < cnt) { + rc = __npc_subbank_alloc(rvu, sb, + NPC_MCAM_KEY_X2, sb->b1b, + sb->b1t, + NPC_MCAM_LOWER_PRIO, + false, cnt - alloc_cnt1, + save + alloc_cnt1, + cnt - alloc_cnt1, + true, &alloc_cnt2); + } + + if (alloc_cnt1 + alloc_cnt2 != cnt) { + dev_err(rvu->dev, + "%s: Failed to alloc cnt=%u alloc_cnt1=%u alloc_cnt2=%u\n", + __func__, cnt, alloc_cnt1, alloc_cnt2); + goto fail_free_alloc; + } + return 0; + +fail_free_alloc: + for (i = 0; i < alloc_cnt1 + alloc_cnt2; i++) { + rc = npc_mcam_idx_2_subbank_idx(rvu, save[i], + &sb, &sb_off); + if (rc) { + dev_err(rvu->dev, + "%s: Error to find subbank for mcam idx=%u\n", + __func__, save[i]); + break; + } + + deleted = __npc_subbank_free(rvu, sb, sb_off); + if (!deleted) { + dev_err(rvu->dev, + "%s: Error to free mcam idx=%u\n", + __func__, save[i]); + break; + } + } + + return rc; +} + +static int npc_defrag_add_2_show_list(struct rvu *rvu, u16 old_midx, + u16 new_midx, u16 vidx) +{ + struct npc_defrag_show_node *node; + + node = kcalloc(1, sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + node->old_midx = old_midx; + node->new_midx = new_midx; + node->vidx = vidx; + INIT_LIST_HEAD(&node->list); + + mutex_lock(&npc_priv.lock); + list_add_tail(&node->list, &npc_priv.defrag_lh); + mutex_unlock(&npc_priv.lock); + + return 0; +} + +static +int npc_defrag_move_vdx_to_free(struct rvu *rvu, + struct npc_defrag_node *f, + struct npc_defrag_node *v, + int cnt, u16 *save) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + int i, vidx_cnt, rc, sb_off; + u16 new_midx, old_midx, vidx; + struct npc_subbank *sb; + bool deleted; + u16 pcifunc; + int blkaddr; + void *map; + u8 bank; + u16 midx; + u64 stats; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + + vidx_cnt = v->vidx_cnt; + for (i = 0; i < cnt; i++) { + vidx = v->vidx[vidx_cnt - i - 1]; + old_midx = npc_vidx2idx(vidx); + new_midx = save[cnt - i - 1]; + + dev_dbg(rvu->dev, + "%s: Moving %u ---> %u (vidx=%u)\n", + __func__, + old_midx, new_midx, vidx); + + rc = npc_defrag_add_2_show_list(rvu, old_midx, new_midx, vidx); + if (rc) + dev_err(rvu->dev, + "%s: Error happened to add to show list vidx=%u\n", + __func__, vidx); + + /* Modify vidx to point to new mcam idx */ + rc = npc_vidx_maps_modify(rvu, vidx, new_midx); + if (rc) + return rc; + + midx = old_midx % mcam->banksize; + bank = old_midx / mcam->banksize; + stats = rvu_read64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_STAT_EXT(midx, + bank)); + + npc_cn20k_enable_mcam_entry(rvu, blkaddr, old_midx, false); + npc_cn20k_copy_mcam_entry(rvu, blkaddr, old_midx, new_midx); + npc_cn20k_enable_mcam_entry(rvu, blkaddr, new_midx, true); + + midx = new_midx % mcam->banksize; + bank = new_midx / mcam->banksize; + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_STAT_EXT(midx, bank), + stats); + + /* Free the old mcam idx */ + rc = npc_mcam_idx_2_subbank_idx(rvu, old_midx, + &sb, &sb_off); + if (rc) { + dev_err(rvu->dev, + "%s: Unable to calculate subbank off for mcamidx=%u\n", + __func__, old_midx); + return rc; + } + + deleted = __npc_subbank_free(rvu, sb, sb_off); + if (!deleted) { + dev_err(rvu->dev, + "%s: Failed to free mcamidx=%u sb=%u sb_off=%u\n", + __func__, old_midx, sb->idx, sb_off); + return -EFAULT; + } + + /* save pcifunc */ + map = xa_load(&npc_priv.xa_idx2pf_map, old_midx); + pcifunc = xa_to_value(map); + + /* delete from pf maps */ + rc = npc_del_from_pf_maps(rvu, old_midx); + if (rc) { + dev_err(rvu->dev, + "%s: Failed to delete pf maps for mcamidx=%u\n", + __func__, old_midx); + return rc; + } + + /* add new mcam_idx to pf map */ + rc = npc_add_to_pf_maps(rvu, new_midx, pcifunc); + if (rc) { + dev_err(rvu->dev, + "%s: Failed to add pf maps for mcamidx=%u\n", + __func__, new_midx); + return rc; + } + + /* Remove from mcam maps */ + mcam->entry2pfvf_map[old_midx] = NPC_MCAM_INVALID_MAP; + mcam->entry2cntr_map[old_midx] = NPC_MCAM_INVALID_MAP; + npc_mcam_clear_bit(mcam, old_midx); + + mcam->entry2pfvf_map[new_midx] = pcifunc; + /* Counter is not preserved */ + mcam->entry2cntr_map[new_midx] = new_midx; + npc_mcam_set_bit(mcam, new_midx); + + /* Mark as invalid */ + v->vidx[vidx_cnt - i - 1] = -1; + save[cnt - i - 1] = -1; + + f->free_cnt--; + v->vidx_cnt--; + } + + return 0; +} + +static int npc_defrag_process(struct rvu *rvu, struct list_head *lh) +{ + struct npc_defrag_node *v = NULL; + struct npc_defrag_node *f = NULL; + int rc = 0, cnt; + u16 *save; + + while (1) { + /* Find subbank with minimum vidx */ + if (!v) { + v = npc_subbank_min_vidx(lh); + if (!v) + break; + } + + /* Find subbank with maximum free slots */ + if (!f) { + f = npc_subbank_max_free(lh); + if (!f) + break; + } + + if (!v->vidx_cnt) { + list_del_init(&v->list); + v = NULL; + continue; + } + + if (!f->free_cnt) { + list_del_init(&f->list); + f = NULL; + continue; + } + + /* If both subbanks are same, choose vidx and + * search for free list again + */ + if (f == v) { + list_del_init(&f->list); + f = NULL; + continue; + } + + /* Calculate minimum free slots needs to be allocated */ + cnt = f->free_cnt > v->vidx_cnt ? v->vidx_cnt : + f->free_cnt; + + dev_dbg(rvu->dev, + "%s: cnt=%u free_cnt=%u(sb=%u) vidx_cnt=%u(sb=%u)\n", + __func__, cnt, f->free_cnt, f->idx, + v->vidx_cnt, v->idx); + + /* Allocate an array to store newly allocated + * free slots (mcam indexes) + */ + save = kcalloc(cnt, sizeof(*save), GFP_KERNEL); + if (!save) { + rc = -ENOMEM; + goto err; + } + + /* Alloc free slots for existing vidx */ + rc = npc_defrag_alloc_free_slots(rvu, f, cnt, save); + if (rc) { + kfree(save); + goto err; + } + + /* Move vidx to free slots; update pf_map and vidx maps, + * and free existing vidx mcam slots + */ + rc = npc_defrag_move_vdx_to_free(rvu, f, v, cnt, save); + if (rc) { + kfree(save); + goto err; + } + + kfree(save); + + if (!f->free_cnt) { + list_del_init(&f->list); + f = NULL; + } + + if (!v->vidx_cnt) { + list_del_init(&v->list); + v = NULL; + } + } + +err: + /* Whole defragmentation process is done within locks. if there + * is an error, it would be hard to roll back as index remove/add + * can fail again if it failed before. This would mean that there + * is bug in the index management algorithm. + * Return from here than rolling back. + */ + return rc; +} + +static void npc_defrag_list_clear(void) +{ + struct npc_defrag_show_node *node, *next; + + mutex_lock(&npc_priv.lock); + list_for_each_entry_safe(node, next, &npc_priv.defrag_lh, list) { + list_del_init(&node->list); + kfree(node); + } + + mutex_unlock(&npc_priv.lock); +} + +static void npc_lock_all_subbank(void) +{ + int i; + + for (i = 0; i < npc_priv.num_subbanks; i++) + mutex_lock(&npc_priv.sb[i].lock); +} + +static void npc_unlock_all_subbank(void) +{ + int i; + + for (i = npc_priv.num_subbanks - 1; i >= 0; i--) + mutex_unlock(&npc_priv.sb[i].lock); +} + +/* Only non-ref non-contigous mcam indexes + * are picked for defrag process + */ +int npc_cn20k_defrag(struct rvu *rvu) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + struct npc_defrag_node *node, *tnode; + struct list_head x4lh, x2lh, *lh; + int rc = 0, i, sb_off, tot; + struct npc_subbank *sb; + unsigned long index; + void *map; + u16 midx; + + /* Free previous show list */ + npc_defrag_list_clear(); + + INIT_LIST_HEAD(&x4lh); + INIT_LIST_HEAD(&x2lh); + + node = kcalloc(npc_priv.num_subbanks, sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + /* Lock mcam */ + mutex_lock(&mcam->lock); + npc_lock_all_subbank(); + + /* Fill in node with subbank properties */ + for (i = 0; i < npc_priv.num_subbanks; i++) { + sb = &npc_priv.sb[i]; + + node[i].idx = i; + node[i].key_type = sb->key_type; + node[i].free_cnt = sb->free_cnt; + node[i].vidx = kcalloc(npc_priv.subbank_depth * 2, + sizeof(*node[i].vidx), + GFP_KERNEL); + if (!node[i].vidx) { + rc = -ENOMEM; + goto free_vidx; + } + + /* If subbank is empty, dont include it in defrag + * process + */ + if (sb->flags & NPC_SUBBANK_FLAG_FREE) { + node[i].valid = false; + continue; + } + + if (npc_defrag_skip_restricted_sb(i)) { + node[i].valid = false; + continue; + } + + node[i].valid = true; + INIT_LIST_HEAD(&node[i].list); + + /* Add node to x2 or x4 list */ + lh = sb->key_type == NPC_MCAM_KEY_X2 ? &x2lh : &x4lh; + list_add_tail(&node[i].list, lh); + } + + /* Filling vidx[] array with all vidx in that subbank */ + xa_for_each_start(&npc_priv.xa_vidx2idx_map, index, map, + npc_priv.bank_depth * 2) { + midx = xa_to_value(map); + rc = npc_mcam_idx_2_subbank_idx(rvu, midx, + &sb, &sb_off); + if (rc) { + dev_err(rvu->dev, + "%s: Error to get mcam_idx for vidx=%lu\n", + __func__, index); + goto free_vidx; + } + + tnode = &node[sb->idx]; + tnode->vidx[tnode->vidx_cnt] = index; + tnode->vidx_cnt++; + } + + /* Mark all subbank which has ref allocation */ + for (i = 0; i < npc_priv.num_subbanks; i++) { + tnode = &node[i]; + + if (!tnode->valid) + continue; + + tot = (tnode->key_type == NPC_MCAM_KEY_X2) ? + npc_priv.subbank_depth * 2 : npc_priv.subbank_depth; + + if (node[i].vidx_cnt != tot - tnode->free_cnt) + tnode->refs = true; + } + + rc = npc_defrag_process(rvu, &x2lh); + if (rc) + goto free_vidx; + + rc = npc_defrag_process(rvu, &x4lh); + if (rc) + goto free_vidx; + +free_vidx: + npc_unlock_all_subbank(); + mutex_unlock(&mcam->lock); + for (i = 0; i < npc_priv.num_subbanks; i++) + kfree(node[i].vidx); + kfree(node); + return rc; +} + +int rvu_mbox_handler_npc_defrag(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + return npc_cn20k_defrag(rvu); +} + int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast, u16 *mcast, u16 *promisc, u16 *ucast) { @@ -3473,6 +4281,8 @@ static int npc_priv_init(struct rvu *rvu) xa_init_flags(&npc_priv.xa_idx2pf_map, XA_FLAGS_ALLOC); xa_init_flags(&npc_priv.xa_pf_map, XA_FLAGS_ALLOC); xa_init_flags(&npc_priv.xa_pf2dfl_rmap, XA_FLAGS_ALLOC); + xa_init_flags(&npc_priv.xa_idx2vidx_map, XA_FLAGS_ALLOC); + xa_init_flags(&npc_priv.xa_vidx2idx_map, XA_FLAGS_ALLOC); if (npc_create_srch_order(num_subbanks)) goto fail1; @@ -3494,6 +4304,9 @@ static int npc_priv_init(struct rvu *rvu) for (i = 0; i < npc_priv.pf_cnt; i++) xa_init_flags(&npc_priv.xa_pf2idx_map[i], XA_FLAGS_ALLOC); + INIT_LIST_HEAD(&npc_priv.defrag_lh); + mutex_init(&npc_priv.lock); + return 0; fail2: @@ -3506,6 +4319,8 @@ fail1: xa_destroy(&npc_priv.xa_idx2pf_map); xa_destroy(&npc_priv.xa_pf_map); xa_destroy(&npc_priv.xa_pf2dfl_rmap); + xa_destroy(&npc_priv.xa_idx2vidx_map); + xa_destroy(&npc_priv.xa_vidx2idx_map); kfree(npc_priv.sb); npc_priv.sb = NULL; return -ENOMEM; @@ -3520,6 +4335,8 @@ void npc_cn20k_deinit(struct rvu *rvu) xa_destroy(&npc_priv.xa_idx2pf_map); xa_destroy(&npc_priv.xa_pf_map); xa_destroy(&npc_priv.xa_pf2dfl_rmap); + xa_destroy(&npc_priv.xa_idx2vidx_map); + xa_destroy(&npc_priv.xa_vidx2idx_map); for (i = 0; i < npc_priv.pf_cnt; i++) xa_destroy(&npc_priv.xa_pf2idx_map[i]); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h index 4b5cdbef334c..f3c01b46c58c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h @@ -147,6 +147,23 @@ struct npc_subbank { u8 key_type; }; +/** + * struct npc_defrag_show_node - Defragmentation show node + * @old_midx: Old mcam index. + * @new_midx: New mcam index. + * @vidx: Virtual index + * @list: Linked list of these nodes + * + * This structure holds information on last defragmentation + * executed on mcam resource. + */ +struct npc_defrag_show_node { + u16 old_midx; + u16 new_midx; + u16 vidx; + struct list_head list; +}; + /** * struct npc_priv_t - NPC private structure. * @bank_depth: Total entries in each bank. @@ -163,6 +180,10 @@ struct npc_subbank { * @pf_cnt: Number of PFs. * @init_done: Indicates MCAM initialization is done. * @xa_pf2dfl_rmap: PF to default rule index map. + * @xa_idx2vidx_map: Mcam index to virtual index map. + * @xa_vidx2idx_map: virtual index to mcam index map. + * @defrag_lh: Defrag list head. + * @lock: Lock for defrag list * * This structure is populated during probing time by reading * HW csr registers. @@ -180,6 +201,10 @@ struct npc_priv_t { struct xarray xa_idx2pf_map; struct xarray xa_pf_map; struct xarray xa_pf2dfl_rmap; + struct xarray xa_idx2vidx_map; + struct xarray xa_vidx2idx_map; + struct list_head defrag_lh; + struct mutex lock; /* protect defrag nodes */ int pf_cnt; bool init_done; }; @@ -276,7 +301,7 @@ void npc_cn20k_subbank_calc_free(struct rvu *rvu, int *x2_free, int npc_cn20k_ref_idx_alloc(struct rvu *rvu, int pcifunc, int key_type, int prio, u16 *mcam_idx, int ref, int limit, - bool contig, int count); + bool contig, int count, bool virt); int npc_cn20k_idx_free(struct rvu *rvu, u16 *mcam_idx, int count); void npc_cn20k_parser_profile_init(struct rvu *rvu, int blkaddr); struct npc_mcam_kex_extr *npc_mkex_extr_default_get(void); @@ -309,5 +334,8 @@ void npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, void npc_cn20k_clear_mcam_entry(struct rvu *rvu, int blkaddr, int bank, int index); int npc_mcam_idx_2_key_type(struct rvu *rvu, u16 mcam_idx, u8 *key_type); +u16 npc_cn20k_vidx2idx(u16 index); +u16 npc_cn20k_idx2vidx(u16 idx); +int npc_cn20k_defrag(struct rvu *rvu); #endif /* NPC_CN20K_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index e004271124df..1638bf4e15fd 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -298,6 +298,9 @@ M(NPC_CN20K_MCAM_READ_ENTRY, 0x6019, npc_cn20k_mcam_read_entry, \ npc_cn20k_mcam_read_entry_rsp) \ M(NPC_CN20K_MCAM_READ_BASE_RULE, 0x601a, npc_cn20k_read_base_steer_rule, \ msg_req, npc_cn20k_mcam_read_base_rule_rsp) \ +M(NPC_MCAM_DEFRAG, 0x601b, npc_defrag, \ + msg_req, \ + msg_rsp) \ /* NIX mbox IDs (range 0x8000 - 0xFFFF) */ \ M(NIX_LF_ALLOC, 0x8000, nix_lf_alloc, \ nix_lf_alloc_req, nix_lf_alloc_rsp) \ @@ -1554,6 +1557,7 @@ struct npc_mcam_alloc_entry_req { u16 ref_entry; u16 count; /* Number of entries requested */ u8 kw_type; /* entry key type, valid for cn20k */ + u8 virt; /* Request virtual index */ }; struct npc_mcam_alloc_entry_rsp { @@ -1689,6 +1693,7 @@ struct npc_cn20k_mcam_alloc_and_write_entry_req { u8 intf; /* Rx or Tx interface */ u8 enable_entry;/* Enable this MCAM entry ? */ u8 hw_prio; /* hardware priority, valid for cn20k */ + u8 virt; /* Allocate virtual index */ u16 reserved[4]; /* reserved for future use */ }; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index fb15c794efc9..287ff0eda152 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -11,6 +11,7 @@ #include "rvu_reg.h" #include "rvu_struct.h" #include "rvu_npc_hash.h" +#include "cn20k/npc.h" #define DRV_NAME "octeontx2-af" @@ -1256,9 +1257,71 @@ enum rvu_af_dl_param_id { RVU_AF_DEVLINK_PARAM_ID_NPC_MCAM_ZONE_PERCENT, RVU_AF_DEVLINK_PARAM_ID_NPC_EXACT_FEATURE_DISABLE, RVU_AF_DEVLINK_PARAM_ID_NPC_DEF_RULE_CNTR_ENABLE, + RVU_AF_DEVLINK_PARAM_ID_NPC_DEFRAG, RVU_AF_DEVLINK_PARAM_ID_NIX_MAXLF, }; +static int rvu_af_npc_defrag_feature_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) +{ + struct rvu_devlink *rvu_dl = devlink_priv(devlink); + struct rvu *rvu = rvu_dl->rvu; + bool enabled; + + enabled = is_cn20k(rvu->pdev); + + snprintf(ctx->val.vstr, sizeof(ctx->val.vstr), "%s", + enabled ? "enabled" : "disabled"); + + return 0; +} + +static int rvu_af_npc_defrag(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) +{ + struct rvu_devlink *rvu_dl = devlink_priv(devlink); + struct rvu *rvu = rvu_dl->rvu; + + /* It is hard to roll back if defrag process fails. + * print a error message and return fault. + */ + if (npc_cn20k_defrag(rvu)) { + dev_err(rvu->dev, "Defrag process failed\n"); + return -EFAULT; + } + return 0; +} + +static int rvu_af_npc_defrag_feature_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct rvu_devlink *rvu_dl = devlink_priv(devlink); + struct rvu *rvu = rvu_dl->rvu; + u64 enable; + + if (kstrtoull(val.vstr, 10, &enable)) { + NL_SET_ERR_MSG_MOD(extack, + "Only 1 value is supported"); + return -EINVAL; + } + + if (enable != 1) { + NL_SET_ERR_MSG_MOD(extack, + "Only initiating defrag is supported"); + return -EINVAL; + } + + if (is_cn20k(rvu->pdev)) + return 0; + + NL_SET_ERR_MSG_MOD(extack, + "Can defrag NPC only in cn20k silicon"); + return -EFAULT; +} + static int rvu_af_npc_exact_feature_get(struct devlink *devlink, u32 id, struct devlink_param_gset_ctx *ctx, struct netlink_ext_ack *extack) @@ -1561,6 +1624,15 @@ static const struct devlink_ops rvu_devlink_ops = { .eswitch_mode_set = rvu_devlink_eswitch_mode_set, }; +static const struct devlink_param rvu_af_dl_param_defrag[] = { + DEVLINK_PARAM_DRIVER(RVU_AF_DEVLINK_PARAM_ID_NPC_DEFRAG, + "npc_defrag", DEVLINK_PARAM_TYPE_STRING, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + rvu_af_npc_defrag_feature_get, + rvu_af_npc_defrag, + rvu_af_npc_defrag_feature_validate), +}; + int rvu_register_dl(struct rvu *rvu) { struct rvu_devlink *rvu_dl; @@ -1593,6 +1665,17 @@ int rvu_register_dl(struct rvu *rvu) goto err_dl_health; } + if (is_cn20k(rvu->pdev)) { + err = devlink_params_register(dl, rvu_af_dl_param_defrag, + ARRAY_SIZE(rvu_af_dl_param_defrag)); + if (err) { + dev_err(rvu->dev, + "devlink defrag params register failed with error %d", + err); + goto err_dl_defrag; + } + } + /* Register exact match devlink only for CN10K-B */ if (!rvu_npc_exact_has_match_table(rvu)) goto done; @@ -1601,7 +1684,8 @@ int rvu_register_dl(struct rvu *rvu) ARRAY_SIZE(rvu_af_dl_param_exact_match)); if (err) { dev_err(rvu->dev, - "devlink exact match params register failed with error %d", err); + "devlink exact match params register failed with error %d", + err); goto err_dl_exact_match; } @@ -1610,6 +1694,11 @@ done: return 0; err_dl_exact_match: + if (is_cn20k(rvu->pdev)) + devlink_params_unregister(dl, rvu_af_dl_param_defrag, + ARRAY_SIZE(rvu_af_dl_param_defrag)); + +err_dl_defrag: devlink_params_unregister(dl, rvu_af_dl_params, ARRAY_SIZE(rvu_af_dl_params)); err_dl_health: @@ -1627,6 +1716,10 @@ void rvu_unregister_dl(struct rvu *rvu) devlink_params_unregister(dl, rvu_af_dl_params, ARRAY_SIZE(rvu_af_dl_params)); + if (is_cn20k(rvu->pdev)) + devlink_params_unregister(dl, rvu_af_dl_param_defrag, + ARRAY_SIZE(rvu_af_dl_param_defrag)); + /* Unregister exact match devlink only for CN10K-B */ if (rvu_npc_exact_has_match_table(rvu)) devlink_params_unregister(dl, rvu_af_dl_param_exact_match, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index a7e84c862475..38a84c122465 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -2460,6 +2460,10 @@ static void npc_unmap_mcam_entry_and_cntr(struct rvu *rvu, /* Remove mapping and reduce counter's refcnt */ mcam->entry2cntr_map[entry] = NPC_MCAM_INVALID_MAP; mcam->cntr_refcnt[cntr]--; + + if (is_cn20k(rvu->pdev)) + return; + /* Disable stats */ rvu_write64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_STAT_ACT(index, bank), 0x00); @@ -2469,7 +2473,7 @@ static void npc_unmap_mcam_entry_and_cntr(struct rvu *rvu, * reverse bitmap too. Should be called with * 'mcam->lock' held. */ -static void npc_mcam_set_bit(struct npc_mcam *mcam, u16 index) +void npc_mcam_set_bit(struct npc_mcam *mcam, u16 index) { u16 entry, rentry; @@ -2485,7 +2489,7 @@ static void npc_mcam_set_bit(struct npc_mcam *mcam, u16 index) * reverse bitmap too. Should be called with * 'mcam->lock' held. */ -static void npc_mcam_clear_bit(struct npc_mcam *mcam, u16 index) +void npc_mcam_clear_bit(struct npc_mcam *mcam, u16 index) { u16 entry, rentry; @@ -2706,7 +2710,7 @@ static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc, ret = npc_cn20k_ref_idx_alloc(rvu, pcifunc, req->kw_type, req->ref_prio, rsp->entry_list, req->ref_entry, limit, - req->contig, req->count); + req->contig, req->count, !!req->virt); if (ret) { rsp->count = 0; @@ -2726,7 +2730,7 @@ static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc, mutex_lock(&mcam->lock); /* Mark the allocated entries as used and set nixlf mapping */ for (entry = 0; entry < rsp->count; entry++) { - index = rsp->entry_list[entry]; + index = npc_cn20k_vidx2idx(rsp->entry_list[entry]); npc_mcam_set_bit(mcam, index); mcam->entry2pfvf_map[index] = pcifunc; mcam->entry2cntr_map[index] = NPC_MCAM_INVALID_MAP; @@ -3038,6 +3042,8 @@ int rvu_mbox_handler_npc_mcam_free_entry(struct rvu *rvu, int blkaddr, rc = 0; u16 cntr; + req->entry = npc_cn20k_vidx2idx(req->entry); + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) return NPC_MCAM_INVALID_REQ; @@ -3171,6 +3177,8 @@ int rvu_mbox_handler_npc_mcam_ena_entry(struct rvu *rvu, u16 pcifunc = req->hdr.pcifunc; int blkaddr, rc; + req->entry = npc_cn20k_vidx2idx(req->entry); + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) return NPC_MCAM_INVALID_REQ; @@ -3194,6 +3202,8 @@ int rvu_mbox_handler_npc_mcam_dis_entry(struct rvu *rvu, u16 pcifunc = req->hdr.pcifunc; int blkaddr, rc; + req->entry = npc_cn20k_vidx2idx(req->entry); + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) return NPC_MCAM_INVALID_REQ; @@ -3228,8 +3238,8 @@ int rvu_mbox_handler_npc_mcam_shift_entry(struct rvu *rvu, mutex_lock(&mcam->lock); for (index = 0; index < req->shift_count; index++) { - old_entry = req->curr_entry[index]; - new_entry = req->new_entry[index]; + old_entry = npc_cn20k_vidx2idx(req->curr_entry[index]); + new_entry = npc_cn20k_vidx2idx(req->new_entry[index]); /* Check if both old and new entries are valid and * does belong to this PFFUNC or not. @@ -3860,6 +3870,8 @@ int rvu_mbox_handler_npc_mcam_entry_stats(struct rvu *rvu, if (blkaddr < 0) return NPC_MCAM_INVALID_REQ; + req->entry = npc_cn20k_vidx2idx(req->entry); + index = req->entry & (mcam->banksize - 1); bank = npc_get_bank(mcam, req->entry); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.h index 346e6ada158e..83c5e32e2afc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.h @@ -16,4 +16,6 @@ void npc_config_kpuaction(struct rvu *rvu, int blkaddr, int npc_fwdb_prfl_img_map(struct rvu *rvu, void __iomem **prfl_img_addr, u64 *size); +void npc_mcam_clear_bit(struct npc_mcam *mcam, u16 index); +void npc_mcam_set_bit(struct npc_mcam *mcam, u16 index); #endif /* RVU_NPC_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index 802b68cf228c..f4f375bba5c9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -1655,6 +1655,8 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu, bool enable = true; u16 target; + req->entry = npc_cn20k_vidx2idx(req->entry); + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) { dev_err(rvu->dev, "%s: NPC block not implemented\n", __func__); @@ -1808,6 +1810,10 @@ int rvu_mbox_handler_npc_delete_flow(struct rvu *rvu, struct list_head del_list; int blkaddr; + req->entry = npc_cn20k_vidx2idx(req->entry); + req->start = npc_cn20k_vidx2idx(req->start); + req->end = npc_cn20k_vidx2idx(req->end); + INIT_LIST_HEAD(&del_list); mutex_lock(&mcam->lock); From 9000cada7aa9593df9b0c105f906fdfb43a5b341 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Tue, 24 Feb 2026 13:30:06 +0530 Subject: [PATCH 0112/1561] octeontx2-af: npc: cn20k: Allocate MCAM entry for flow installation In CN20K, the PF/VF driver is unaware of the NPC MCAM entry type (x2/x4) required for a particular TC rule when the user installs rules through the TC command. This forces the PF/VF driver to first query the AF driver for the rule size, then allocate an entry, and finally install the flow. This sequence requires three mailbox request/response exchanges from the PF. To speed up the installation, the `install_flow` mailbox request message is extended with additional fields that allow the AF driver to determine the required NPC MCAM entry type, allocate the MCAM entry, and complete the flow installation in a single step. Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260224080009.4147301-11-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- .../ethernet/marvell/octeontx2/af/cn20k/npc.c | 189 +++++++++++-- .../ethernet/marvell/octeontx2/af/cn20k/npc.h | 5 +- .../net/ethernet/marvell/octeontx2/af/mbox.h | 48 ++++ .../ethernet/marvell/octeontx2/af/rvu_npc.c | 4 + .../marvell/octeontx2/af/rvu_npc_fs.c | 137 ++++++++- .../marvell/octeontx2/af/rvu_npc_fs.h | 12 + .../marvell/octeontx2/nic/otx2_flows.c | 265 +++++++++++++++++- 7 files changed, 603 insertions(+), 57 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c index acd065f44efa..63a287ee8f70 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c @@ -14,6 +14,7 @@ #include "rvu_npc.h" #include "cn20k/npc.h" #include "cn20k/reg.h" +#include "rvu_npc_fs.h" static struct npc_priv_t npc_priv = { .num_banks = MAX_NUM_BANKS, @@ -812,7 +813,7 @@ npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, cfg = rvu_read64(rvu, blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank)); - hw_prio = cfg & GENMASK_ULL(14, 8); + hw_prio = cfg & GENMASK_ULL(30, 24); cfg = enable ? 1 : 0; cfg |= hw_prio; rvu_write64(rvu, blkaddr, @@ -828,7 +829,7 @@ npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, cfg = rvu_read64(rvu, blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(mcam_idx, bank)); - hw_prio = cfg & GENMASK_ULL(14, 8); + hw_prio = cfg & GENMASK_ULL(30, 24); cfg = enable ? 1 : 0; cfg |= hw_prio; rvu_write64(rvu, blkaddr, @@ -895,25 +896,33 @@ static void npc_cn20k_get_keyword(struct cn20k_mcam_entry *entry, int idx, *cam0 = ~*cam1 & kw_mask; } -/*-----------------------------------------------------------------------------| - *Kex type | mcam entry | cam1 | cam 0 || <----- output ----> | - *profile | len | (key type) | (key type) || len | type | - *-----------------------------------------------------------------------------| - *X2 | 256 (X2) | 001b | 110b || X2 | X2 | - *-----------------------------------------------------------------------------| - *X4 | 256 (X2) | 000b | 000b || X2 | DYNAMIC | - *-----------------------------------------------------------------------------| - *X4 | 512 (X4) | 010b | 101b || X4 | X4 | - *-----------------------------------------------------------------------------| - *DYN | 256 (X2) | 000b | 000b || X2 | DYNAMIC | - *-----------------------------------------------------------------------------| - *DYN | 512 (X4) | 010b | 101b || X4 | X4 | - *-----------------------------------------------------------------------------| +/*------------------------------------------------------------------------- + *Kex type| mcam | cam1 |cam0 | req_kwtype||<----- output > | + * in | | | | || | | + * profile| len | | | ||len | type | + *------------------------------------------------------------------------- + *X2 | 256 (X2) | 001b |110b | 0 ||X2 | X2 | + *------------------------------------------------------------------------| + *X4 | 256 (X2) | 000b |000b | 0 ||X2 | DYN | + *------------------------------------------------------------------------| + *X4 | 512 (X4) | 010b |101b | 0 ||X4 | X4 | + *------------------------------------------------------------------------| + *DYN | 256 (X2) | 000b |000b | 0 ||X2 | DYN | + *------------------------------------------------------------------------| + *DYN | 512 (X4) | 010b |101b | 0 ||X4 | X4 | + *------------------------------------------------------------------------| + *X4 | 256 (X2) | 000b |000b | X2 ||DYN | DYN | + *------------------------------------------------------------------------| + *DYNC | 256 (X2) | 000b |000b | X2 ||DYN | DYN | + *------------------------------------------------------------------------| + * X2 | 512 (X4) | xxxb |xxxb | X4 ||INVAL | INVAL | + *------------------------------------------------------------------------| */ static void npc_cn20k_config_kw_x2(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, int index, u8 intf, struct cn20k_mcam_entry *entry, - int bank, u8 kw_type, int kw) + int bank, u8 kw_type, int kw, + u8 req_kw_type) { u64 intf_ext = 0, intf_ext_mask = 0; u8 tx_intf_mask = ~intf & 0x3; @@ -938,6 +947,15 @@ static void npc_cn20k_config_kw_x2(struct rvu *rvu, struct npc_mcam *mcam, kw_type_mask = 0; } + /* Say, we need to write x2 keyword in an x4 subbank. + * req_kw_type will be x2, and kw_type will be x4. + * So in the case ignore kw bits in mcam. + */ + if (kw_type == NPC_MCAM_KEY_X4 && req_kw_type == NPC_MCAM_KEY_X2) { + kw_type = 0; + kw_type_mask = 0; + } + intf_ext = ((u64)kw_type << 16) | tx_intf; intf_ext_mask = (((u64)kw_type_mask << 16) & GENMASK_ULL(18, 16)) | tx_intf_mask; @@ -985,14 +1003,15 @@ static void npc_cn20k_config_kw_x2(struct rvu *rvu, struct npc_mcam *mcam, static void npc_cn20k_config_kw_x4(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, int index, u8 intf, struct cn20k_mcam_entry *entry, - u8 kw_type) + u8 kw_type, u8 req_kw_type) { int kw = 0, bank; for (bank = 0; bank < mcam->banks_per_entry; bank++, kw = kw + 4) npc_cn20k_config_kw_x2(rvu, mcam, blkaddr, index, intf, - entry, bank, kw_type, kw); + entry, bank, kw_type, + kw, req_kw_type); } static void @@ -1002,7 +1021,7 @@ npc_cn20k_set_mcam_bank_cfg(struct rvu *rvu, int blkaddr, int mcam_idx, struct npc_mcam *mcam = &rvu->hw->mcam; u64 bank_cfg; - bank_cfg = (u64)hw_prio << 8; + bank_cfg = (u64)hw_prio << 24; if (enable) bank_cfg |= 0x1; @@ -1025,7 +1044,7 @@ npc_cn20k_set_mcam_bank_cfg(struct rvu *rvu, int blkaddr, int mcam_idx, void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, u8 intf, struct cn20k_mcam_entry *entry, - bool enable, u8 hw_prio) + bool enable, u8 hw_prio, u8 req_kw_type) { struct npc_mcam *mcam = &rvu->hw->mcam; int mcam_idx = index % mcam->banksize; @@ -1048,27 +1067,34 @@ void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, npc_cn20k_clear_mcam_entry(rvu, blkaddr, bank, mcam_idx); npc_cn20k_config_kw_x2(rvu, mcam, blkaddr, mcam_idx, intf, entry, - bank, kw_type, kw); + bank, kw_type, kw, req_kw_type); /* Set 'action' */ rvu_write64(rvu, blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, bank, 0), entry->action); + /* Set 'action2' for inline receive */ + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, + bank, 2), + entry->action2); + /* Set TAG 'action' */ rvu_write64(rvu, blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, bank, 1), entry->vtag_action); - goto set_cfg; } + /* Clear mcam entry to avoid writes being suppressed by NPC */ npc_cn20k_clear_mcam_entry(rvu, blkaddr, 0, mcam_idx); npc_cn20k_clear_mcam_entry(rvu, blkaddr, 1, mcam_idx); npc_cn20k_config_kw_x4(rvu, mcam, blkaddr, - mcam_idx, intf, entry, kw_type); + mcam_idx, intf, entry, + kw_type, req_kw_type); for (bank = 0; bank < mcam->banks_per_entry; bank++) { /* Set 'action' */ rvu_write64(rvu, blkaddr, @@ -1081,6 +1107,12 @@ void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, bank, 1), entry->vtag_action); + + /* Set 'action2' for inline receive */ + rvu_write64(rvu, blkaddr, + NPC_AF_CN20K_MCAMEX_BANKX_ACTIONX_EXT(mcam_idx, + bank, 2), + entry->action2); } set_cfg: @@ -1169,7 +1201,7 @@ void npc_cn20k_read_mcam_entry(struct rvu *rvu, int blkaddr, u16 index, bank_cfg = rvu_read64(rvu, blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_CFG_EXT(index, bank)); *ena = bank_cfg & 0x1; - *hw_prio = (bank_cfg & GENMASK_ULL(14, 8)) >> 8; + *hw_prio = (bank_cfg & GENMASK_ULL(30, 24)) >> 24; if (kw_type == NPC_MCAM_KEY_X2) { cam1 = rvu_read64(rvu, blkaddr, NPC_AF_CN20K_MCAMEX_BANKX_CAMX_W0_EXT(index, @@ -1305,7 +1337,7 @@ int rvu_mbox_handler_npc_cn20k_mcam_write_entry(struct rvu *rvu, npc_cn20k_config_mcam_entry(rvu, blkaddr, req->entry, nix_intf, &req->entry_data, req->enable_entry, - req->hw_prio); + req->hw_prio, req->req_kw_type); rc = 0; exit: @@ -1385,7 +1417,7 @@ int rvu_mbox_handler_npc_cn20k_mcam_alloc_and_write_entry(struct rvu *rvu, npc_cn20k_config_mcam_entry(rvu, blkaddr, entry, nix_intf, &req->entry_data, req->enable_entry, - req->hw_prio); + req->hw_prio, req->req_kw_type); mutex_unlock(&mcam->lock); @@ -1393,6 +1425,14 @@ int rvu_mbox_handler_npc_cn20k_mcam_alloc_and_write_entry(struct rvu *rvu, return 0; } +static int rvu_npc_get_base_steer_rule_type(struct rvu *rvu, u16 pcifunc) +{ + if (is_lbk_vf(rvu, pcifunc)) + return NIXLF_PROMISC_ENTRY; + + return NIXLF_UCAST_ENTRY; +} + int rvu_mbox_handler_npc_cn20k_read_base_steer_rule(struct rvu *rvu, struct msg_req *req, struct npc_cn20k_mcam_read_base_rule_rsp *rsp) @@ -1402,6 +1442,7 @@ int rvu_mbox_handler_npc_cn20k_read_base_steer_rule(struct rvu *rvu, u16 pcifunc = req->hdr.pcifunc; u8 intf, enable, hw_prio; struct rvu_pfvf *pfvf; + int rl_type; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) @@ -1427,9 +1468,16 @@ int rvu_mbox_handler_npc_cn20k_read_base_steer_rule(struct rvu *rvu, mutex_unlock(&mcam->lock); goto out; } + + rl_type = rvu_npc_get_base_steer_rule_type(rvu, pcifunc); + /* Read the default ucast entry if there is no pkt steering rule */ - index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, - NIXLF_UCAST_ENTRY); + index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, rl_type); + if (index < 0) { + mutex_unlock(&mcam->lock); + goto out; + } + read_entry: /* Read the mcam entry */ npc_cn20k_read_mcam_entry(rvu, blkaddr, index, @@ -3953,6 +4001,89 @@ int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast, return set ? 0 : -ESRCH; } +int rvu_mbox_handler_npc_get_pfl_info(struct rvu *rvu, struct msg_req *req, + struct npc_get_pfl_info_rsp *rsp) +{ + if (!is_cn20k(rvu->pdev)) { + dev_err(rvu->dev, "Mbox support is only for cn20k\n"); + return -EOPNOTSUPP; + } + + rsp->kw_type = npc_priv.kw; + rsp->x4_slots = npc_priv.bank_depth; + return 0; +} + +int rvu_mbox_handler_npc_get_num_kws(struct rvu *rvu, + struct npc_get_num_kws_req *req, + struct npc_get_num_kws_rsp *rsp) +{ + struct rvu_npc_mcam_rule dummy = { 0 }; + struct cn20k_mcam_entry cn20k_entry = { 0 }; + struct mcam_entry_mdata mdata = { }; + struct mcam_entry entry = { 0 }; + struct npc_install_flow_req *fl; + int i, cnt = 0, blkaddr; + + if (!is_cn20k(rvu->pdev)) { + dev_err(rvu->dev, "Mbox support is only for cn20k\n"); + return -EOPNOTSUPP; + } + + fl = &req->fl; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + if (blkaddr < 0) { + dev_err(rvu->dev, "%s: NPC block not implemented\n", __func__); + return NPC_MCAM_INVALID_REQ; + } + + npc_populate_mcam_mdata(rvu, &mdata, &cn20k_entry, &entry); + + npc_update_flow(rvu, &mdata, fl->features, &fl->packet, + &fl->mask, &dummy, fl->intf, blkaddr); + + /* Find the most significant word valid. Traverse from + * MSB to LSB, check if cam0 or cam1 is set + */ + for (i = NPC_CN20K_MAX_KWS_IN_KEY - 1; i >= 0; i--) { + if (cn20k_entry.kw[i] || cn20k_entry.kw_mask[i]) { + cnt = i + 1; + break; + } + } + + rsp->kws = cnt; + + return 0; +} + +int rvu_mbox_handler_npc_get_dft_rl_idxs(struct rvu *rvu, struct msg_req *req, + struct npc_get_dft_rl_idxs_rsp *rsp) +{ + u16 bcast, mcast, promisc, ucast; + u16 pcifunc; + int rc; + + if (!is_cn20k(rvu->pdev)) { + dev_err(rvu->dev, "Mbox support is only for cn20k\n"); + return -EOPNOTSUPP; + } + + pcifunc = req->hdr.pcifunc; + + rc = npc_cn20k_dft_rules_idx_get(rvu, pcifunc, &bcast, &mcast, + &promisc, &ucast); + if (rc) + return rc; + + rsp->bcast = bcast; + rsp->mcast = mcast; + rsp->promisc = promisc; + rsp->ucast = ucast; + return 0; +} + static bool npc_is_cgx_or_lbk(struct rvu *rvu, u16 pcifunc) { return is_pf_cgxmapped(rvu, rvu_get_pf(rvu->pdev, pcifunc)) || diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h index f3c01b46c58c..815d0b257a7e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.h @@ -321,9 +321,8 @@ int npc_cn20k_dft_rules_idx_get(struct rvu *rvu, u16 pcifunc, u16 *bcast, u16 *mcast, u16 *promisc, u16 *ucast); void npc_cn20k_config_mcam_entry(struct rvu *rvu, int blkaddr, int index, - u8 intf, - struct cn20k_mcam_entry *entry, - bool enable, u8 hw_prio); + u8 intf, struct cn20k_mcam_entry *entry, + bool enable, u8 hw_prio, u8 req_kw_type); void npc_cn20k_enable_mcam_entry(struct rvu *rvu, int blkaddr, int index, bool enable); void npc_cn20k_copy_mcam_entry(struct rvu *rvu, int blkaddr, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 1638bf4e15fd..79cb1c752c2b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -301,6 +301,15 @@ M(NPC_CN20K_MCAM_READ_BASE_RULE, 0x601a, npc_cn20k_read_base_steer_rule, \ M(NPC_MCAM_DEFRAG, 0x601b, npc_defrag, \ msg_req, \ msg_rsp) \ +M(NPC_MCAM_GET_NUM_KWS, 0x601c, npc_get_num_kws, \ + npc_get_num_kws_req, \ + npc_get_num_kws_rsp) \ +M(NPC_MCAM_GET_DFT_RL_IDXS, 0x601d, npc_get_dft_rl_idxs, \ + msg_req, \ + npc_get_dft_rl_idxs_rsp)\ +M(NPC_MCAM_GET_NPC_PFL_INFO, 0x601e, npc_get_pfl_info, \ + msg_req, \ + npc_get_pfl_info_rsp) \ /* NIX mbox IDs (range 0x8000 - 0xFFFF) */ \ M(NIX_LF_ALLOC, 0x8000, nix_lf_alloc, \ nix_lf_alloc_req, nix_lf_alloc_rsp) \ @@ -1598,6 +1607,7 @@ struct cn20k_mcam_entry { u64 kw_mask[NPC_CN20K_MAX_KWS_IN_KEY]; u64 action; u64 vtag_action; + u64 action2; }; struct npc_cn20k_mcam_write_entry_req { @@ -1608,6 +1618,7 @@ struct npc_cn20k_mcam_write_entry_req { u8 intf; /* Rx or Tx interface */ u8 enable_entry;/* Enable this MCAM entry ? */ u8 hw_prio; /* hardware priority, valid for cn20k */ + u8 req_kw_type; /* Type of kw which should be written */ u64 reserved; /* reserved for future use */ }; @@ -1694,6 +1705,7 @@ struct npc_cn20k_mcam_alloc_and_write_entry_req { u8 enable_entry;/* Enable this MCAM entry ? */ u8 hw_prio; /* hardware priority, valid for cn20k */ u8 virt; /* Allocate virtual index */ + u8 req_kw_type; /* Key type to be written */ u16 reserved[4]; /* reserved for future use */ }; @@ -1861,11 +1873,47 @@ struct npc_install_flow_req { /* old counter value */ u16 cntr_val; u8 hw_prio; + u8 req_kw_type; /* Key type to be written */ + u8 alloc_entry; /* only for cn20k */ + u16 ref_prio; + u16 ref_entry; }; struct npc_install_flow_rsp { struct mbox_msghdr hdr; int counter; /* negative if no counter else counter number */ + u16 entry; + u8 kw_type; +}; + +struct npc_get_num_kws_req { + struct mbox_msghdr hdr; + struct npc_install_flow_req fl; + u32 rsvd[4]; +}; + +struct npc_get_num_kws_rsp { + struct mbox_msghdr hdr; + int kws; + u32 rsvd[4]; +}; + +struct npc_get_dft_rl_idxs_rsp { + struct mbox_msghdr hdr; + u16 bcast; + u16 mcast; + u16 promisc; + u16 ucast; + u16 vf_ucast; + u16 rsvd[7]; +}; + +struct npc_get_pfl_info_rsp { + struct mbox_msghdr hdr; + u16 x4_slots; + u8 kw_type; + u8 rsvd1[3]; + u32 rsvd2[4]; }; struct npc_delete_flow_req { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 38a84c122465..c2ca5ed1d028 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -2690,6 +2690,10 @@ static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc, if (!is_cn20k(rvu->pdev)) goto not_cn20k; + /* Only x2 or x4 key types are accepted */ + if (req->kw_type != NPC_MCAM_KEY_X2 && req->kw_type != NPC_MCAM_KEY_X4) + return NPC_MCAM_INVALID_REQ; + /* The below table is being followed during allocation, * * 1. ref_entry == 0 && prio == HIGH && count == 1 : user wants to diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index f4f375bba5c9..de613b27068d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -1071,11 +1071,11 @@ static void npc_update_vlan_features(struct rvu *rvu, ~0ULL, 0, intf); } -static void npc_update_flow(struct rvu *rvu, struct mcam_entry_mdata *mdata, - u64 features, struct flow_msg *pkt, - struct flow_msg *mask, - struct rvu_npc_mcam_rule *output, u8 intf, - int blkaddr) +void npc_update_flow(struct rvu *rvu, struct mcam_entry_mdata *mdata, + u64 features, struct flow_msg *pkt, + struct flow_msg *mask, + struct rvu_npc_mcam_rule *output, u8 intf, + int blkaddr) { u64 dmac_mask = ether_addr_to_u64(mask->dmac); u64 smac_mask = ether_addr_to_u64(mask->smac); @@ -1304,7 +1304,7 @@ static int npc_mcast_update_action_index(struct rvu *rvu, struct npc_install_flo return 0; } -static void +void npc_populate_mcam_mdata(struct rvu *rvu, struct mcam_entry_mdata *mdata, struct cn20k_mcam_entry *cn20k_entry, @@ -1549,6 +1549,7 @@ find_rule: cn20k_wreq.intf = req->intf; cn20k_wreq.enable_entry = (u8)enable; cn20k_wreq.hw_prio = req->hw_prio; + cn20k_wreq.req_kw_type = req->req_kw_type; update_rule: @@ -1642,6 +1643,79 @@ update_rule: return 0; } +static int +rvu_npc_free_entry_for_flow_install(struct rvu *rvu, u16 pcifunc, + bool free_entry, int mcam_idx) +{ + struct npc_mcam_free_entry_req free_req = { 0 }; + struct msg_rsp rsp; + int rc; + + if (!free_entry) + return 0; + + free_req.hdr.pcifunc = pcifunc; + free_req.entry = mcam_idx; + rc = rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &rsp); + return rc; +} + +static int +rvu_npc_alloc_entry_for_flow_install(struct rvu *rvu, + struct npc_install_flow_req *fl_req, + u16 *mcam_idx, u8 *kw_type, + bool *allocated) +{ + struct npc_mcam_alloc_entry_req entry_req; + struct npc_mcam_alloc_entry_rsp entry_rsp; + struct npc_get_num_kws_req kws_req; + struct npc_get_num_kws_rsp kws_rsp; + int off, kw_bits, rc; + u8 *src, *dst; + + if (!is_cn20k(rvu->pdev)) { + *kw_type = -1; + return 0; + } + + if (!fl_req->alloc_entry) { + *kw_type = -1; + return 0; + } + + off = offsetof(struct npc_install_flow_req, packet); + dst = (u8 *)&kws_req.fl + off; + src = (u8 *)fl_req + off; + memcpy(dst, src, sizeof(struct npc_install_flow_req) - off); + rc = rvu_mbox_handler_npc_get_num_kws(rvu, &kws_req, &kws_rsp); + if (rc) + return rc; + + kw_bits = kws_rsp.kws * 64; + + *kw_type = NPC_MCAM_KEY_X2; + if (kw_bits > 256) + *kw_type = NPC_MCAM_KEY_X4; + + memset(&entry_req, 0, sizeof(entry_req)); + memset(&entry_rsp, 0, sizeof(entry_rsp)); + + entry_req.hdr.pcifunc = fl_req->hdr.pcifunc; + entry_req.ref_prio = fl_req->ref_prio; + entry_req.ref_entry = fl_req->ref_entry; + entry_req.kw_type = *kw_type; + entry_req.count = 1; + rc = rvu_mbox_handler_npc_mcam_alloc_entry(rvu, + &entry_req, + &entry_rsp); + if (rc) + return rc; + + *mcam_idx = entry_rsp.entry_list[0]; + *allocated = true; + return 0; +} + int rvu_mbox_handler_npc_install_flow(struct rvu *rvu, struct npc_install_flow_req *req, struct npc_install_flow_rsp *rsp) @@ -1652,11 +1726,11 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu, int blkaddr, nixlf, err; struct rvu_pfvf *pfvf; bool pf_set_vfs_mac = false; + bool allocated = false; bool enable = true; + u8 kw_type; u16 target; - req->entry = npc_cn20k_vidx2idx(req->entry); - blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) { dev_err(rvu->dev, "%s: NPC block not implemented\n", __func__); @@ -1666,6 +1740,17 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu, if (!is_npc_interface_valid(rvu, req->intf)) return NPC_FLOW_INTF_INVALID; + err = rvu_npc_alloc_entry_for_flow_install(rvu, req, &req->entry, + &kw_type, &allocated); + if (err) { + dev_err(rvu->dev, + "%s: Error to alloc mcam entry for pcifunc=%#x\n", + __func__, req->hdr.pcifunc); + return err; + } + + req->entry = npc_cn20k_vidx2idx(req->entry); + /* If DMAC is not extracted in MKEX, rules installed by AF * can rely on L2MB bit set by hardware protocol checker for * broadcast and multicast addresses. @@ -1679,6 +1764,10 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu, dev_warn(rvu->dev, "%s: mkex profile does not support ucast flow\n", __func__); + rvu_npc_free_entry_for_flow_install(rvu, + req->hdr.pcifunc, + allocated, + req->entry); return NPC_FLOW_NOT_SUPPORTED; } @@ -1686,6 +1775,10 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu, dev_warn(rvu->dev, "%s: mkex profile does not support bcast/mcast flow", __func__); + rvu_npc_free_entry_for_flow_install(rvu, + req->hdr.pcifunc, + allocated, + req->entry); return NPC_FLOW_NOT_SUPPORTED; } @@ -1695,8 +1788,11 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu, } process_flow: - if (from_vf && req->default_rule) + if (from_vf && req->default_rule) { + rvu_npc_free_entry_for_flow_install(rvu, req->hdr.pcifunc, + allocated, req->entry); return NPC_FLOW_VF_PERM_DENIED; + } /* Each PF/VF info is maintained in struct rvu_pfvf. * rvu_pfvf for the target PF/VF needs to be retrieved @@ -1724,8 +1820,11 @@ process_flow: req->chan_mask = 0xFFF; err = npc_check_unsupported_flows(rvu, req->features, req->intf); - if (err) + if (err) { + rvu_npc_free_entry_for_flow_install(rvu, req->hdr.pcifunc, + allocated, req->entry); return NPC_FLOW_NOT_SUPPORTED; + } pfvf = rvu_get_pfvf(rvu, target); @@ -1744,8 +1843,11 @@ process_flow: /* Proceed if NIXLF is attached or not for TX rules */ err = nix_get_nixlf(rvu, target, &nixlf, NULL); - if (err && is_npc_intf_rx(req->intf) && !pf_set_vfs_mac) + if (err && is_npc_intf_rx(req->intf) && !pf_set_vfs_mac) { + rvu_npc_free_entry_for_flow_install(rvu, req->hdr.pcifunc, + allocated, req->entry); return NPC_FLOW_NO_NIXLF; + } /* don't enable rule when nixlf not attached or initialized */ if (!(is_nixlf_attached(rvu, target) && @@ -1760,20 +1862,31 @@ process_flow: enable = true; /* Do not allow requests from uninitialized VFs */ - if (from_vf && !enable) + if (from_vf && !enable) { + rvu_npc_free_entry_for_flow_install(rvu, req->hdr.pcifunc, + allocated, req->entry); return NPC_FLOW_VF_NOT_INIT; + } /* PF sets VF mac & VF NIXLF is not attached, update the mac addr */ if (pf_set_vfs_mac && !enable) { ether_addr_copy(pfvf->default_mac, req->packet.dmac); ether_addr_copy(pfvf->mac_addr, req->packet.dmac); set_bit(PF_SET_VF_MAC, &pfvf->flags); + rvu_npc_free_entry_for_flow_install(rvu, req->hdr.pcifunc, + allocated, req->entry); return 0; } mutex_lock(&rswitch->switch_lock); err = npc_install_flow(rvu, blkaddr, target, nixlf, pfvf, req, rsp, enable, pf_set_vfs_mac); + if (err) + rvu_npc_free_entry_for_flow_install(rvu, req->hdr.pcifunc, + allocated, req->entry); + + rsp->kw_type = kw_type; + rsp->entry = req->entry; mutex_unlock(&rswitch->switch_lock); return err; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.h index 442287ee7baa..d3ba86c23959 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.h @@ -18,4 +18,16 @@ void npc_update_entry(struct rvu *rvu, enum key_fields type, struct mcam_entry_mdata *mdata, u64 val_lo, u64 val_hi, u64 mask_lo, u64 mask_hi, u8 intf); +void npc_update_flow(struct rvu *rvu, struct mcam_entry_mdata *mdata, + u64 features, struct flow_msg *pkt, + struct flow_msg *mask, + struct rvu_npc_mcam_rule *output, u8 intf, + int blkaddr); + +void +npc_populate_mcam_mdata(struct rvu *rvu, + struct mcam_entry_mdata *mdata, + struct cn20k_mcam_entry *cn20k_entry, + struct mcam_entry *entry); + #endif /* RVU_NPC_FS_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c index 531f6adba45f..38cc539d724d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c @@ -37,6 +37,98 @@ static void otx2_clear_ntuple_flow_info(struct otx2_nic *pfvf, struct otx2_flow_ flow_cfg->max_flows = 0; } +static int otx2_mcam_pfl_info_get(struct otx2_nic *pfvf, bool *is_x2, + u16 *x4_slots) +{ + struct npc_get_pfl_info_rsp *rsp; + struct msg_req *req; + static struct { + bool is_set; + bool is_x2; + u16 x4_slots; + } pfl_info; + + /* Avoid sending mboxes for constant information + * like x4_slots + */ + mutex_lock(&pfvf->mbox.lock); + if (pfl_info.is_set) { + *is_x2 = pfl_info.is_x2; + *x4_slots = pfl_info.x4_slots; + mutex_unlock(&pfvf->mbox.lock); + return 0; + } + + req = otx2_mbox_alloc_msg_npc_get_pfl_info(&pfvf->mbox); + if (!req) { + mutex_unlock(&pfvf->mbox.lock); + return -ENOMEM; + } + + /* Send message to AF */ + if (otx2_sync_mbox_msg(&pfvf->mbox)) { + mutex_unlock(&pfvf->mbox.lock); + return -EFAULT; + } + + rsp = (struct npc_get_pfl_info_rsp *)otx2_mbox_get_rsp + (&pfvf->mbox.mbox, 0, &req->hdr); + + if (IS_ERR(rsp)) { + mutex_unlock(&pfvf->mbox.lock); + return -EFAULT; + } + + *is_x2 = (rsp->kw_type == NPC_MCAM_KEY_X2); + if (*is_x2) + *x4_slots = 0; + else + *x4_slots = rsp->x4_slots; + + pfl_info.is_x2 = *is_x2; + pfl_info.x4_slots = *x4_slots; + pfl_info.is_set = true; + + mutex_unlock(&pfvf->mbox.lock); + return 0; +} + +static int otx2_get_dft_rl_idx(struct otx2_nic *pfvf, u16 *mcam_idx) +{ + struct npc_get_dft_rl_idxs_rsp *rsp; + struct msg_req *req; + + mutex_lock(&pfvf->mbox.lock); + + req = otx2_mbox_alloc_msg_npc_get_dft_rl_idxs(&pfvf->mbox); + if (!req) { + mutex_unlock(&pfvf->mbox.lock); + return -ENOMEM; + } + + /* Send message to AF */ + if (otx2_sync_mbox_msg(&pfvf->mbox)) { + mutex_unlock(&pfvf->mbox.lock); + return -EINVAL; + } + + rsp = (struct npc_get_dft_rl_idxs_rsp *)otx2_mbox_get_rsp + (&pfvf->mbox.mbox, 0, &req->hdr); + + if (IS_ERR(rsp)) { + mutex_unlock(&pfvf->mbox.lock); + return -EFAULT; + } + + if (is_otx2_lbkvf(pfvf->pdev)) + *mcam_idx = rsp->promisc; + else + *mcam_idx = rsp->ucast; + + mutex_unlock(&pfvf->mbox.lock); + return 0; +} + static int otx2_free_ntuple_mcam_entries(struct otx2_nic *pfvf) { struct otx2_flow_config *flow_cfg = pfvf->flow_cfg; @@ -69,7 +161,10 @@ int otx2_alloc_mcam_entries(struct otx2_nic *pfvf, u16 count) struct otx2_flow_config *flow_cfg = pfvf->flow_cfg; struct npc_mcam_alloc_entry_req *req; struct npc_mcam_alloc_entry_rsp *rsp; - int ent, allocated = 0; + u16 dft_idx = 0, x4_slots = 0; + int ent, allocated = 0, ref; + bool is_x2 = false; + int rc; /* Free current ones and allocate new ones with requested count */ otx2_free_ntuple_mcam_entries(pfvf); @@ -86,6 +181,22 @@ int otx2_alloc_mcam_entries(struct otx2_nic *pfvf, u16 count) return -ENOMEM; } + if (is_cn20k(pfvf->pdev)) { + rc = otx2_mcam_pfl_info_get(pfvf, &is_x2, &x4_slots); + if (rc) { + netdev_err(pfvf->netdev, "Error to retrieve profile info\n"); + return rc; + } + + rc = otx2_get_dft_rl_idx(pfvf, &dft_idx); + if (rc) { + netdev_err(pfvf->netdev, + "Error to retrieve ucast mcam idx for pcifunc %#x\n", + pfvf->pcifunc); + return rc; + } + } + mutex_lock(&pfvf->mbox.lock); /* In a single request a max of NPC_MAX_NONCONTIG_ENTRIES MCAM entries @@ -96,18 +207,31 @@ int otx2_alloc_mcam_entries(struct otx2_nic *pfvf, u16 count) if (!req) goto exit; + req->kw_type = is_x2 ? NPC_MCAM_KEY_X2 : NPC_MCAM_KEY_X4; req->contig = false; req->count = (count - allocated) > NPC_MAX_NONCONTIG_ENTRIES ? NPC_MAX_NONCONTIG_ENTRIES : count - allocated; + ref = 0; + + if (is_cn20k(pfvf->pdev)) { + req->ref_prio = NPC_MCAM_HIGHER_PRIO; + ref = dft_idx; + } + /* Allocate higher priority entries for PFs, so that VF's entries * will be on top of PF. */ if (!is_otx2_vf(pfvf->pcifunc)) { req->ref_prio = NPC_MCAM_HIGHER_PRIO; - req->ref_entry = flow_cfg->def_ent[0]; + ref = flow_cfg->def_ent[0]; } + if (is_cn20k(pfvf->pdev)) + ref = is_x2 ? ref : ref & (x4_slots - 1); + + req->ref_entry = ref; + /* Send message to AF */ if (otx2_sync_mbox_msg(&pfvf->mbox)) goto exit; @@ -163,8 +287,24 @@ int otx2_mcam_entry_init(struct otx2_nic *pfvf) struct npc_get_field_status_rsp *frsp; struct npc_mcam_alloc_entry_req *req; struct npc_mcam_alloc_entry_rsp *rsp; - int vf_vlan_max_flows; - int ent, count; + int vf_vlan_max_flows, count; + int rc, ref, prio, ent; + u16 dft_idx; + + ref = 0; + prio = 0; + if (is_cn20k(pfvf->pdev)) { + rc = otx2_get_dft_rl_idx(pfvf, &dft_idx); + if (rc) { + netdev_err(pfvf->netdev, + "Error to retrieve ucast mcam idx for pcifunc %#x\n", + pfvf->pcifunc); + return rc; + } + + ref = dft_idx; + prio = NPC_MCAM_HIGHER_PRIO; + } vf_vlan_max_flows = pfvf->total_vfs * OTX2_PER_VF_VLAN_FLOWS; count = flow_cfg->ucast_flt_cnt + @@ -183,8 +323,11 @@ int otx2_mcam_entry_init(struct otx2_nic *pfvf) return -ENOMEM; } + req->kw_type = NPC_MCAM_KEY_X2; req->contig = false; req->count = count; + req->ref_prio = prio; + req->ref_entry = ref; /* Send message to AF */ if (otx2_sync_mbox_msg(&pfvf->mbox)) { @@ -819,7 +962,7 @@ static int otx2_prepare_ipv6_flow(struct ethtool_rx_flow_spec *fsp, } static int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp, - struct npc_install_flow_req *req) + struct npc_install_flow_req *req) { struct ethhdr *eth_mask = &fsp->m_u.ether_spec; struct ethhdr *eth_hdr = &fsp->h_u.ether_spec; @@ -945,6 +1088,58 @@ static int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp, return 0; } +static int otx2_get_kw_type(struct otx2_nic *pfvf, + struct npc_install_flow_req *fl_req, + u8 *kw_type) +{ + struct npc_get_num_kws_req *req; + struct npc_get_num_kws_rsp *rsp; + u8 *src, *dst; + int off, err; + int kw_bits; + + off = offsetof(struct npc_install_flow_req, packet); + + mutex_lock(&pfvf->mbox.lock); + + req = otx2_mbox_alloc_msg_npc_get_num_kws(&pfvf->mbox); + if (!req) { + mutex_unlock(&pfvf->mbox.lock); + return -ENOMEM; + } + + dst = (u8 *)&req->fl + off; + src = (u8 *)fl_req + off; + + memcpy(dst, src, sizeof(struct npc_install_flow_req) - off); + + err = otx2_sync_mbox_msg(&pfvf->mbox); + if (err) { + mutex_unlock(&pfvf->mbox.lock); + netdev_err(pfvf->netdev, + "Error to get default number of keywords\n"); + return err; + } + + rsp = (struct npc_get_num_kws_rsp *)otx2_mbox_get_rsp + (&pfvf->mbox.mbox, 0, &req->hdr); + if (IS_ERR(rsp)) { + mutex_unlock(&pfvf->mbox.lock); + return -EFAULT; + } + + kw_bits = rsp->kws * 64; + + if (kw_bits <= 256) + *kw_type = NPC_MCAM_KEY_X2; + else + *kw_type = NPC_MCAM_KEY_X4; + + mutex_unlock(&pfvf->mbox.lock); + + return 0; +} + static int otx2_is_flow_rule_dmacfilter(struct otx2_nic *pfvf, struct ethtool_rx_flow_spec *fsp) { @@ -973,12 +1168,41 @@ static int otx2_is_flow_rule_dmacfilter(struct otx2_nic *pfvf, static int otx2_add_flow_msg(struct otx2_nic *pfvf, struct otx2_flow *flow) { + struct otx2_flow_config *flow_cfg = pfvf->flow_cfg; + struct npc_install_flow_req *req, treq = { 0 }; u64 ring_cookie = flow->flow_spec.ring_cookie; #ifdef CONFIG_DCB int vlan_prio, qidx, pfc_rule = 0; #endif - struct npc_install_flow_req *req; - int err, vf = 0; + int err, vf = 0, off, sz; + bool modify = false; + u8 kw_type = 0; + u8 *src, *dst; + u16 x4_slots; + bool is_x2; + + if (is_cn20k(pfvf->pdev)) { + err = otx2_mcam_pfl_info_get(pfvf, &is_x2, &x4_slots); + if (err) { + netdev_err(pfvf->netdev, + "Error to retrieve NPC profile info, pcifunc=%#x\n", + pfvf->pcifunc); + return -EFAULT; + } + + if (!is_x2) { + err = otx2_prepare_flow_request(&flow->flow_spec, + &treq); + if (err) + return err; + + err = otx2_get_kw_type(pfvf, &treq, &kw_type); + if (err) + return err; + + modify = true; + } + } mutex_lock(&pfvf->mbox.lock); req = otx2_mbox_alloc_msg_npc_install_flow(&pfvf->mbox); @@ -987,14 +1211,29 @@ static int otx2_add_flow_msg(struct otx2_nic *pfvf, struct otx2_flow *flow) return -ENOMEM; } - err = otx2_prepare_flow_request(&flow->flow_spec, req); - if (err) { - /* free the allocated msg above */ - otx2_mbox_reset(&pfvf->mbox.mbox, 0); - mutex_unlock(&pfvf->mbox.lock); - return err; + if (modify) { + off = offsetof(struct npc_install_flow_req, packet); + sz = sizeof(struct npc_install_flow_req) - off; + dst = (u8 *)req + off; + src = (u8 *)&treq + off; + + memcpy(dst, src, sz); + req->req_kw_type = kw_type; + } else { + err = otx2_prepare_flow_request(&flow->flow_spec, req); + if (err) { + /* free the allocated msg above */ + otx2_mbox_reset(&pfvf->mbox.mbox, 0); + mutex_unlock(&pfvf->mbox.lock); + return err; + } } + netdev_dbg(pfvf->netdev, + "flow entry (%u) installed at loc:%u kw_type=%u\n", + flow_cfg->flow_ent[flow->location], + flow->location, kw_type); + req->entry = flow->entry; req->intf = NIX_INTF_RX; req->set_cntr = 1; From 0d12d26701b0d35fd992551cf5608485a706e686 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Tue, 24 Feb 2026 13:30:07 +0530 Subject: [PATCH 0113/1561] octeontx2-pf: cn20k: Add TC rules support Unlike previous silicons, MCAM entries required for TC rules in CN20K are allocated dynamically. The key size can also be dynamic, i.e., X2 or X4. Based on the size of the TC rule match criteria, the AF driver allocates an X2 or X4 rule. This patch implements the required changes for CN20K TC by requesting an MCAM entry from the AF driver on the fly when the user installs a rule. Based on the TC rule priority added or deleted by the user, the PF driver shifts MCAM entries accordingly. If there is a mix of X2 and X4 rules and the user tries to install a rule in the middle of existing rules, the PF driver detects this and rejects the rule since X2 and X4 rules cannot be shifted in hardware. Signed-off-by: Subbaraya Sundeep Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260224080009.4147301-12-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/octeontx2/af/mbox.h | 5 + .../ethernet/marvell/octeontx2/nic/cn20k.c | 266 ++++++++++++++++++ .../ethernet/marvell/octeontx2/nic/cn20k.h | 14 + .../marvell/octeontx2/nic/otx2_common.h | 35 +++ .../ethernet/marvell/octeontx2/nic/otx2_tc.c | 79 +++--- 5 files changed, 361 insertions(+), 38 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 79cb1c752c2b..2d9f6cb4820f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1875,6 +1875,11 @@ struct npc_install_flow_req { u8 hw_prio; u8 req_kw_type; /* Key type to be written */ u8 alloc_entry; /* only for cn20k */ +/* For now use any priority, once AF driver is changed to + * allocate least priority entry instead of mid zone then make + * NPC_MCAM_LEAST_PRIO as 3 + */ +#define NPC_MCAM_LEAST_PRIO NPC_MCAM_ANY_PRIO u16 ref_prio; u16 ref_entry; }; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c index a60f8cf53feb..a5a8f4558717 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.c @@ -251,6 +251,272 @@ static u8 cn20k_aura_bpid_idx(struct otx2_nic *pfvf, int aura_id) #endif } +static int cn20k_tc_get_entry_index(struct otx2_flow_config *flow_cfg, + struct otx2_tc_flow *node) +{ + struct otx2_tc_flow *tmp; + int index = 0; + + list_for_each_entry(tmp, &flow_cfg->flow_list_tc, list) { + if (tmp == node) + return index; + + index++; + } + + return -1; +} + +int cn20k_tc_free_mcam_entry(struct otx2_nic *nic, u16 entry) +{ + struct npc_mcam_free_entry_req *req; + int err; + + mutex_lock(&nic->mbox.lock); + req = otx2_mbox_alloc_msg_npc_mcam_free_entry(&nic->mbox); + if (!req) { + mutex_unlock(&nic->mbox.lock); + return -ENOMEM; + } + + req->entry = entry; + /* Send message to AF to free MCAM entries */ + err = otx2_sync_mbox_msg(&nic->mbox); + if (err) { + mutex_unlock(&nic->mbox.lock); + return err; + } + + mutex_unlock(&nic->mbox.lock); + + return 0; +} + +static bool cn20k_tc_check_entry_shiftable(struct otx2_nic *nic, + struct otx2_flow_config *flow_cfg, + struct otx2_tc_flow *node, int index, + bool error) +{ + struct otx2_tc_flow *first, *tmp, *n; + u32 prio = 0; + int i = 0; + u8 type; + + first = list_first_entry(&flow_cfg->flow_list_tc, struct otx2_tc_flow, + list); + type = first->kw_type; + + /* Check all the nodes from start to given index (including index) has + * same type i.e, either X2 or X4 + */ + list_for_each_entry_safe(tmp, n, &flow_cfg->flow_list_tc, list) { + if (i > index) + break; + + if (type != tmp->kw_type) { + /* List has both X2 and X4 entries so entries cannot be + * shifted to save MCAM space. + */ + if (error) + dev_err(nic->dev, "Rule %d cannot be shifted to %d\n", + tmp->prio, prio); + return false; + } + + type = tmp->kw_type; + prio = tmp->prio; + i++; + } + + return true; +} + +void cn20k_tc_update_mcam_table_del_req(struct otx2_nic *nic, + struct otx2_flow_config *flow_cfg, + struct otx2_tc_flow *node) +{ + struct otx2_tc_flow *first, *tmp, *n; + int i = 0, index; + u16 cntr_val = 0; + u16 entry; + + index = cn20k_tc_get_entry_index(flow_cfg, node); + if (index < 0) { + netdev_dbg(nic->netdev, "Could not find node\n"); + return; + } + + first = list_first_entry(&flow_cfg->flow_list_tc, struct otx2_tc_flow, + list); + entry = first->entry; + + /* If entries cannot be shifted then delete given entry + * and free it to AF too. + */ + if (!cn20k_tc_check_entry_shiftable(nic, flow_cfg, node, + index, false)) { + list_del(&node->list); + entry = node->entry; + goto free_mcam_entry; + } + + /* Find and delete the entry from the list and re-install + * all the entries from beginning to the index of the + * deleted entry to higher mcam indexes. + */ + list_for_each_entry_safe(tmp, n, &flow_cfg->flow_list_tc, list) { + if (node == tmp) { + list_del(&tmp->list); + break; + } + + otx2_del_mcam_flow_entry(nic, tmp->entry, &cntr_val); + tmp->entry = (list_next_entry(tmp, list))->entry; + tmp->req.entry = tmp->entry; + tmp->req.cntr_val = cntr_val; + } + + list_for_each_entry_safe(tmp, n, &flow_cfg->flow_list_tc, list) { + if (i == index) + break; + + otx2_add_mcam_flow_entry(nic, &tmp->req); + i++; + } + +free_mcam_entry: + if (cn20k_tc_free_mcam_entry(nic, entry)) + netdev_err(nic->netdev, "Freeing entry %d to AF failed\n", + entry); +} + +int cn20k_tc_update_mcam_table_add_req(struct otx2_nic *nic, + struct otx2_flow_config *flow_cfg, + struct otx2_tc_flow *node) +{ + struct otx2_tc_flow *tmp; + u16 cntr_val = 0; + int list_idx, i; + int entry, prev; + + /* Find the index of the entry(list_idx) whose priority + * is greater than the new entry and re-install all + * the entries from beginning to list_idx to higher + * mcam indexes. + */ + list_idx = otx2_tc_add_to_flow_list(flow_cfg, node); + entry = node->entry; + if (!cn20k_tc_check_entry_shiftable(nic, flow_cfg, node, + list_idx, true)) + /* Due to mix of X2 and X4, entries cannot be shifted. + * In this case free the entry allocated for this rule. + */ + return -EINVAL; + + for (i = 0; i < list_idx; i++) { + tmp = otx2_tc_get_entry_by_index(flow_cfg, i); + if (!tmp) + return -ENOMEM; + + otx2_del_mcam_flow_entry(nic, tmp->entry, &cntr_val); + prev = tmp->entry; + tmp->entry = entry; + tmp->req.entry = tmp->entry; + tmp->req.cntr_val = cntr_val; + otx2_add_mcam_flow_entry(nic, &tmp->req); + entry = prev; + } + + return entry; +} + +#define MAX_TC_HW_PRIORITY 125 +#define MAX_TC_VF_PRIORITY 126 +#define MAX_TC_PF_PRIORITY 127 + +static int __cn20k_tc_alloc_entry(struct otx2_nic *nic, + struct npc_install_flow_req *flow_req, + u16 *entry, u8 *type, + u32 tc_priority, bool hw_priority) +{ + struct otx2_flow_config *flow_cfg = nic->flow_cfg; + struct npc_install_flow_req *req; + struct npc_install_flow_rsp *rsp; + struct otx2_tc_flow *tmp; + int ret = 0; + + req = otx2_mbox_alloc_msg_npc_install_flow(&nic->mbox); + if (!req) + return -ENOMEM; + + memcpy(&flow_req->hdr, &req->hdr, sizeof(struct mbox_msghdr)); + memcpy(req, flow_req, sizeof(struct npc_install_flow_req)); + req->alloc_entry = 1; + + /* Allocate very least priority for first rule */ + if (hw_priority || list_empty(&flow_cfg->flow_list_tc)) { + req->ref_prio = NPC_MCAM_LEAST_PRIO; + } else { + req->ref_prio = NPC_MCAM_HIGHER_PRIO; + tmp = list_first_entry(&flow_cfg->flow_list_tc, + struct otx2_tc_flow, list); + req->ref_entry = tmp->entry; + } + + ret = otx2_sync_mbox_msg(&nic->mbox); + if (ret) + return ret; + + rsp = (struct npc_install_flow_rsp *)otx2_mbox_get_rsp(&nic->mbox.mbox, + 0, &req->hdr); + if (IS_ERR(rsp)) + return -EFAULT; + + if (entry) + *entry = rsp->entry; + if (type) + *type = rsp->kw_type; + + return ret; +} + +int cn20k_tc_alloc_entry(struct otx2_nic *nic, + struct flow_cls_offload *tc_flow_cmd, + struct otx2_tc_flow *new_node, + struct npc_install_flow_req *flow_req) +{ + bool hw_priority = false; + u16 entry_from_af; + u8 entry_type; + int ret; + + if (is_otx2_vf(nic->pcifunc)) + flow_req->hw_prio = MAX_TC_VF_PRIORITY; + else + flow_req->hw_prio = MAX_TC_PF_PRIORITY; + + if (new_node->prio <= MAX_TC_HW_PRIORITY) { + flow_req->hw_prio = new_node->prio; + hw_priority = true; + } + + mutex_lock(&nic->mbox.lock); + + ret = __cn20k_tc_alloc_entry(nic, flow_req, &entry_from_af, &entry_type, + new_node->prio, hw_priority); + if (ret) { + mutex_unlock(&nic->mbox.lock); + return ret; + } + + new_node->kw_type = entry_type; + new_node->entry = entry_from_af; + + mutex_unlock(&nic->mbox.lock); + + return 0; +} + static int cn20k_aura_aq_init(struct otx2_nic *pfvf, int aura_id, int pool_id, int numptrs) { diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.h b/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.h index 832adaf8c57f..b5e527f6d7eb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn20k.h @@ -10,8 +10,22 @@ #include "otx2_common.h" +struct otx2_flow_config; +struct otx2_tc_flow; + void cn20k_init(struct otx2_nic *pfvf); int cn20k_register_pfvf_mbox_intr(struct otx2_nic *pf, int numvfs); void cn20k_disable_pfvf_mbox_intr(struct otx2_nic *pf, int numvfs); void cn20k_enable_pfvf_mbox_intr(struct otx2_nic *pf, int numvfs); +void cn20k_tc_update_mcam_table_del_req(struct otx2_nic *nic, + struct otx2_flow_config *flow_cfg, + struct otx2_tc_flow *node); +int cn20k_tc_update_mcam_table_add_req(struct otx2_nic *nic, + struct otx2_flow_config *flow_cfg, + struct otx2_tc_flow *node); +int cn20k_tc_alloc_entry(struct otx2_nic *nic, + struct flow_cls_offload *tc_flow_cmd, + struct otx2_tc_flow *new_node, + struct npc_install_flow_req *dummy); +int cn20k_tc_free_mcam_entry(struct otx2_nic *nic, u16 entry); #endif /* CN20K_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 255c7e2633bb..eecee612b7b2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -366,6 +366,31 @@ struct otx2_flow_config { u16 ntuple_cnt; }; +struct otx2_tc_flow_stats { + u64 bytes; + u64 pkts; + u64 used; +}; + +struct otx2_tc_flow { + struct list_head list; + unsigned long cookie; + struct rcu_head rcu; + struct otx2_tc_flow_stats stats; + spinlock_t lock; /* lock for stats */ + u16 rq; + u16 entry; + u16 leaf_profile; + bool is_act_police; + u32 prio; + struct npc_install_flow_req req; + u64 rate; + u32 burst; + u32 mcast_grp_idx; + bool is_pps; + u8 kw_type; /* X2/X4 */ +}; + struct dev_hw_ops { int (*sq_aq_init)(void *dev, u16 qidx, u8 chan_offset, u16 sqb_aura); @@ -1223,4 +1248,14 @@ void otx2_dma_unmap_skb_frags(struct otx2_nic *pfvf, struct sg_list *sg); int otx2_read_free_sqe(struct otx2_nic *pfvf, u16 qidx); void otx2_queue_vf_work(struct mbox *mw, struct workqueue_struct *mbox_wq, int first, int mdevs, u64 intr); +int otx2_del_mcam_flow_entry(struct otx2_nic *nic, u16 entry, + u16 *cntr_val); +int otx2_add_mcam_flow_entry(struct otx2_nic *nic, + struct npc_install_flow_req *req); +int otx2_tc_add_to_flow_list(struct otx2_flow_config *flow_cfg, + struct otx2_tc_flow *node); + +struct otx2_tc_flow * +otx2_tc_get_entry_by_index(struct otx2_flow_config *flow_cfg, + int index); #endif /* OTX2_COMMON_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index 04fcfbdcf69b..40162b08014d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -31,30 +31,6 @@ #define MCAST_INVALID_GRP (-1U) -struct otx2_tc_flow_stats { - u64 bytes; - u64 pkts; - u64 used; -}; - -struct otx2_tc_flow { - struct list_head list; - unsigned long cookie; - struct rcu_head rcu; - struct otx2_tc_flow_stats stats; - spinlock_t lock; /* lock for stats */ - u16 rq; - u16 entry; - u16 leaf_profile; - bool is_act_police; - u32 prio; - struct npc_install_flow_req req; - u32 mcast_grp_idx; - u64 rate; - u32 burst; - bool is_pps; -}; - static void otx2_get_egress_burst_cfg(struct otx2_nic *nic, u32 burst, u32 *burst_exp, u32 *burst_mantissa) { @@ -971,8 +947,9 @@ static void otx2_destroy_tc_flow_list(struct otx2_nic *pfvf) } } -static struct otx2_tc_flow *otx2_tc_get_entry_by_cookie(struct otx2_flow_config *flow_cfg, - unsigned long cookie) +static struct otx2_tc_flow * +otx2_tc_get_entry_by_cookie(struct otx2_flow_config *flow_cfg, + unsigned long cookie) { struct otx2_tc_flow *tmp; @@ -984,8 +961,8 @@ static struct otx2_tc_flow *otx2_tc_get_entry_by_cookie(struct otx2_flow_config return NULL; } -static struct otx2_tc_flow *otx2_tc_get_entry_by_index(struct otx2_flow_config *flow_cfg, - int index) +struct otx2_tc_flow * +otx2_tc_get_entry_by_index(struct otx2_flow_config *flow_cfg, int index) { struct otx2_tc_flow *tmp; int i = 0; @@ -1014,8 +991,8 @@ static void otx2_tc_del_from_flow_list(struct otx2_flow_config *flow_cfg, } } -static int otx2_tc_add_to_flow_list(struct otx2_flow_config *flow_cfg, - struct otx2_tc_flow *node) +int otx2_tc_add_to_flow_list(struct otx2_flow_config *flow_cfg, + struct otx2_tc_flow *node) { struct list_head *pos, *n; struct otx2_tc_flow *tmp; @@ -1038,7 +1015,8 @@ static int otx2_tc_add_to_flow_list(struct otx2_flow_config *flow_cfg, return index; } -static int otx2_add_mcam_flow_entry(struct otx2_nic *nic, struct npc_install_flow_req *req) +int otx2_add_mcam_flow_entry(struct otx2_nic *nic, + struct npc_install_flow_req *req) { struct npc_install_flow_req *tmp_req; int err; @@ -1064,7 +1042,7 @@ static int otx2_add_mcam_flow_entry(struct otx2_nic *nic, struct npc_install_flo return 0; } -static int otx2_del_mcam_flow_entry(struct otx2_nic *nic, u16 entry, u16 *cntr_val) +int otx2_del_mcam_flow_entry(struct otx2_nic *nic, u16 entry, u16 *cntr_val) { struct npc_delete_flow_rsp *rsp; struct npc_delete_flow_req *req; @@ -1114,6 +1092,11 @@ static int otx2_tc_update_mcam_table_del_req(struct otx2_nic *nic, int i = 0, index = 0; u16 cntr_val = 0; + if (is_cn20k(nic->pdev)) { + cn20k_tc_update_mcam_table_del_req(nic, flow_cfg, node); + return 0; + } + /* Find and delete the entry from the list and re-install * all the entries from beginning to the index of the * deleted entry to higher mcam indexes. @@ -1153,6 +1136,9 @@ static int otx2_tc_update_mcam_table_add_req(struct otx2_nic *nic, int list_idx, i; u16 cntr_val = 0; + if (is_cn20k(nic->pdev)) + return cn20k_tc_update_mcam_table_add_req(nic, flow_cfg, node); + /* Find the index of the entry(list_idx) whose priority * is greater than the new entry and re-install all * the entries from beginning to list_idx to higher @@ -1172,7 +1158,7 @@ static int otx2_tc_update_mcam_table_add_req(struct otx2_nic *nic, mcam_idx++; } - return mcam_idx; + return flow_cfg->flow_ent[mcam_idx]; } static int otx2_tc_update_mcam_table(struct otx2_nic *nic, @@ -1238,7 +1224,6 @@ static int otx2_tc_del_flow(struct otx2_nic *nic, mutex_unlock(&nic->mbox.lock); } - free_mcam_flow: otx2_del_mcam_flow_entry(nic, flow_node->entry, NULL); otx2_tc_update_mcam_table(nic, flow_cfg, flow_node, false); @@ -1254,7 +1239,7 @@ static int otx2_tc_add_flow(struct otx2_nic *nic, struct otx2_flow_config *flow_cfg = nic->flow_cfg; struct otx2_tc_flow *new_node, *old_node; struct npc_install_flow_req *req, dummy; - int rc, err, mcam_idx; + int rc, err, entry; if (!(nic->flags & OTX2_FLAG_TC_FLOWER_SUPPORT)) return -ENOMEM; @@ -1264,7 +1249,7 @@ static int otx2_tc_add_flow(struct otx2_nic *nic, return -EINVAL; } - if (flow_cfg->nr_flows == flow_cfg->max_flows) { + if (!is_cn20k(nic->pdev) && flow_cfg->nr_flows == flow_cfg->max_flows) { NL_SET_ERR_MSG_MOD(extack, "Free MCAM entry not available to add the flow"); return -ENOMEM; @@ -1292,7 +1277,23 @@ static int otx2_tc_add_flow(struct otx2_nic *nic, if (old_node) otx2_tc_del_flow(nic, tc_flow_cmd); - mcam_idx = otx2_tc_update_mcam_table(nic, flow_cfg, new_node, true); + if (is_cn20k(nic->pdev)) { + rc = cn20k_tc_alloc_entry(nic, tc_flow_cmd, new_node, &dummy); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, + "MCAM rule allocation failed"); + kfree_rcu(new_node, rcu); + return rc; + } + } + + entry = otx2_tc_update_mcam_table(nic, flow_cfg, new_node, true); + if (entry < 0) { + NL_SET_ERR_MSG_MOD(extack, "Adding rule failed"); + rc = entry; + goto free_leaf; + } + mutex_lock(&nic->mbox.lock); req = otx2_mbox_alloc_msg_npc_install_flow(&nic->mbox); if (!req) { @@ -1304,7 +1305,7 @@ static int otx2_tc_add_flow(struct otx2_nic *nic, memcpy(&dummy.hdr, &req->hdr, sizeof(struct mbox_msghdr)); memcpy(req, &dummy, sizeof(struct npc_install_flow_req)); req->channel = nic->hw.rx_chan_base; - req->entry = flow_cfg->flow_ent[mcam_idx]; + req->entry = (u16)entry; req->intf = NIX_INTF_RX; req->vf = nic->pcifunc; req->set_cntr = 1; @@ -1325,6 +1326,8 @@ static int otx2_tc_add_flow(struct otx2_nic *nic, return 0; free_leaf: + if (is_cn20k(nic->pdev)) + cn20k_tc_free_mcam_entry(nic, new_node->entry); otx2_tc_del_from_flow_list(flow_cfg, new_node); if (new_node->is_act_police) { mutex_lock(&nic->mbox.lock); From 528530dff56b4a39a19b1ead9de376d22d0baf71 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Tue, 24 Feb 2026 13:30:08 +0530 Subject: [PATCH 0114/1561] octeontx2-af: npc: cn20k: add debugfs support CN20K silicon divides the NPC MCAM into banks and subbanks, with each subbank configurable for x2 or x4 key widths. This patch adds debugfs entries to expose subbank usage details and their configured key type. A debugfs entry is also added to display the default MCAM indexes allocated for each pcifunc. Additionally, debugfs support is introduced to show the mapping between virtual indexes and real MCAM indexes, and vice versa. Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260224080009.4147301-13-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- .../marvell/octeontx2/af/cn20k/debugfs.c | 261 ++++++++++++++++++ .../marvell/octeontx2/af/rvu_debugfs.c | 41 ++- 2 files changed, 292 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c index 0ba123be6643..3debf2fae1a4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/debugfs.c @@ -15,8 +15,269 @@ #include "debugfs.h" #include "cn20k/npc.h" +static int npc_mcam_layout_show(struct seq_file *s, void *unused) +{ + int i, j, sbd, idx0, idx1, vidx0, vidx1; + struct npc_priv_t *npc_priv; + char buf0[32], buf1[32]; + struct npc_subbank *sb; + unsigned int bw0, bw1; + bool v0, v1; + int pf1, pf2; + bool e0, e1; + void *map; + + npc_priv = s->private; + + sbd = npc_priv->subbank_depth; + + for (i = npc_priv->num_subbanks - 1; i >= 0; i--) { + sb = &npc_priv->sb[i]; + mutex_lock(&sb->lock); + + if (sb->flags & NPC_SUBBANK_FLAG_FREE) + goto next; + + bw0 = bitmap_weight(sb->b0map, npc_priv->subbank_depth); + if (sb->key_type == NPC_MCAM_KEY_X4) { + seq_printf(s, "\n\nsubbank:%u, x4, free=%u, used=%u\n", + sb->idx, sb->free_cnt, bw0); + + for (j = sbd - 1; j >= 0; j--) { + if (!test_bit(j, sb->b0map)) + continue; + + idx0 = sb->b0b + j; + map = xa_load(&npc_priv->xa_idx2pf_map, idx0); + pf1 = xa_to_value(map); + + map = xa_load(&npc_priv->xa_idx2vidx_map, idx0); + if (map) { + vidx0 = xa_to_value(map); + snprintf(buf0, sizeof(buf0), + "v:%u", vidx0); + } + + seq_printf(s, "\t%u(%#x) %s\n", idx0, pf1, + map ? buf0 : " "); + } + goto next; + } + + bw1 = bitmap_weight(sb->b1map, npc_priv->subbank_depth); + seq_printf(s, "\n\nsubbank:%u, x2, free=%u, used=%u\n", + sb->idx, sb->free_cnt, bw0 + bw1); + seq_printf(s, "bank1(%u)\t\tbank0(%u)\n", bw1, bw0); + + for (j = sbd - 1; j >= 0; j--) { + e0 = test_bit(j, sb->b0map); + e1 = test_bit(j, sb->b1map); + + if (!e1 && !e0) + continue; + + if (e1 && e0) { + idx0 = sb->b0b + j; + map = xa_load(&npc_priv->xa_idx2pf_map, idx0); + pf1 = xa_to_value(map); + + map = xa_load(&npc_priv->xa_idx2vidx_map, idx0); + v0 = !!map; + if (v0) { + vidx0 = xa_to_value(map); + snprintf(buf0, sizeof(buf0), "v:%05u", + vidx0); + } + + idx1 = sb->b1b + j; + map = xa_load(&npc_priv->xa_idx2pf_map, idx1); + pf2 = xa_to_value(map); + + map = xa_load(&npc_priv->xa_idx2vidx_map, idx1); + v1 = !!map; + if (v1) { + vidx1 = xa_to_value(map); + snprintf(buf1, sizeof(buf1), "v:%05u", + vidx1); + } + + seq_printf(s, "%05u(%#x) %s\t\t%05u(%#x) %s\n", + idx1, pf2, v1 ? buf1 : " ", + idx0, pf1, v0 ? buf0 : " "); + + continue; + } + + if (e0) { + idx0 = sb->b0b + j; + map = xa_load(&npc_priv->xa_idx2pf_map, idx0); + pf1 = xa_to_value(map); + + map = xa_load(&npc_priv->xa_idx2vidx_map, idx0); + if (map) { + vidx0 = xa_to_value(map); + snprintf(buf0, sizeof(buf0), "v:%05u", + vidx0); + } + + seq_printf(s, "\t\t \t\t%05u(%#x) %s\n", idx0, + pf1, map ? buf0 : " "); + continue; + } + + idx1 = sb->b1b + j; + map = xa_load(&npc_priv->xa_idx2pf_map, idx1); + pf1 = xa_to_value(map); + map = xa_load(&npc_priv->xa_idx2vidx_map, idx1); + if (map) { + vidx1 = xa_to_value(map); + snprintf(buf1, sizeof(buf1), "v:%05u", vidx1); + } + + seq_printf(s, "%05u(%#x) %s\n", idx1, pf1, + map ? buf1 : " "); + } +next: + mutex_unlock(&sb->lock); + } + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(npc_mcam_layout); + +static int npc_mcam_default_show(struct seq_file *s, void *unused) +{ + struct npc_priv_t *npc_priv; + unsigned long index; + u16 ptr[4], pcifunc; + struct rvu *rvu; + int rc, i; + void *map; + + npc_priv = npc_priv_get(); + rvu = s->private; + + seq_puts(s, "\npcifunc\tBcast\tmcast\tpromisc\tucast\n"); + + xa_for_each(&npc_priv->xa_pf_map, index, map) { + pcifunc = index; + + for (i = 0; i < ARRAY_SIZE(ptr); i++) + ptr[i] = USHRT_MAX; + + rc = npc_cn20k_dft_rules_idx_get(rvu, pcifunc, &ptr[0], + &ptr[1], &ptr[2], &ptr[3]); + if (rc) + continue; + + seq_printf(s, "%#x\t", pcifunc); + for (i = 0; i < ARRAY_SIZE(ptr); i++) { + if (ptr[i] != USHRT_MAX) + seq_printf(s, "%u\t", ptr[i]); + else + seq_puts(s, "\t"); + } + seq_puts(s, "\n"); + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(npc_mcam_default); + +static int npc_vidx2idx_map_show(struct seq_file *s, void *unused) +{ + struct npc_priv_t *npc_priv; + unsigned long index, start; + struct xarray *xa; + void *map; + + npc_priv = s->private; + start = npc_priv->bank_depth * 2; + xa = &npc_priv->xa_vidx2idx_map; + + seq_puts(s, "\nvidx\tmcam_idx\n"); + + xa_for_each_start(xa, index, map, start) + seq_printf(s, "%lu\t%lu\n", index, xa_to_value(map)); + return 0; +} +DEFINE_SHOW_ATTRIBUTE(npc_vidx2idx_map); + +static int npc_idx2vidx_map_show(struct seq_file *s, void *unused) +{ + struct npc_priv_t *npc_priv; + unsigned long index; + struct xarray *xa; + void *map; + + npc_priv = s->private; + xa = &npc_priv->xa_idx2vidx_map; + + seq_puts(s, "\nmidx\tvidx\n"); + + xa_for_each(xa, index, map) + seq_printf(s, "%lu\t%lu\n", index, xa_to_value(map)); + return 0; +} +DEFINE_SHOW_ATTRIBUTE(npc_idx2vidx_map); + +static int npc_defrag_show(struct seq_file *s, void *unused) +{ + struct npc_defrag_show_node *node; + struct npc_priv_t *npc_priv; + u16 sbd, bdm; + + npc_priv = s->private; + bdm = npc_priv->bank_depth - 1; + sbd = npc_priv->subbank_depth; + + seq_puts(s, "\nold(sb) -> new(sb)\t\tvidx\n"); + + mutex_lock(&npc_priv->lock); + list_for_each_entry(node, &npc_priv->defrag_lh, list) + seq_printf(s, "%u(%u)\t%u(%u)\t%u\n", node->old_midx, + (node->old_midx & bdm) / sbd, + node->new_midx, + (node->new_midx & bdm) / sbd, + node->vidx); + mutex_unlock(&npc_priv->lock); + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(npc_defrag); + int npc_cn20k_debugfs_init(struct rvu *rvu) { + struct npc_priv_t *npc_priv = npc_priv_get(); + struct dentry *npc_dentry; + + npc_dentry = debugfs_create_file("mcam_layout", 0444, rvu->rvu_dbg.npc, + npc_priv, &npc_mcam_layout_fops); + + if (!npc_dentry) + return -EFAULT; + + npc_dentry = debugfs_create_file("mcam_default", 0444, rvu->rvu_dbg.npc, + rvu, &npc_mcam_default_fops); + + if (!npc_dentry) + return -EFAULT; + + npc_dentry = debugfs_create_file("vidx2idx", 0444, rvu->rvu_dbg.npc, + npc_priv, &npc_vidx2idx_map_fops); + if (!npc_dentry) + return -EFAULT; + + npc_dentry = debugfs_create_file("idx2vidx", 0444, rvu->rvu_dbg.npc, + npc_priv, &npc_idx2vidx_map_fops); + if (!npc_dentry) + return -EFAULT; + + npc_dentry = debugfs_create_file("defrag", 0444, rvu->rvu_dbg.npc, + npc_priv, &npc_defrag_fops); + if (!npc_dentry) + return -EFAULT; + return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 620724dad093..413f9fa40b33 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -23,6 +23,7 @@ #include "cn20k/reg.h" #include "cn20k/debugfs.h" +#include "cn20k/npc.h" #define DEBUGFS_DIR_NAME "octeontx2" @@ -3197,7 +3198,9 @@ static void rvu_print_npc_mcam_info(struct seq_file *s, static int rvu_dbg_npc_mcam_info_display(struct seq_file *filp, void *unsued) { struct rvu *rvu = filp->private; + int x4_free, x2_free, sb_free; int pf, vf, numvfs, blkaddr; + struct npc_priv_t *npc_priv; struct npc_mcam *mcam; u16 pcifunc, counters; u64 cfg; @@ -3211,16 +3214,34 @@ static int rvu_dbg_npc_mcam_info_display(struct seq_file *filp, void *unsued) seq_puts(filp, "\nNPC MCAM info:\n"); /* MCAM keywidth on receive and transmit sides */ - cfg = rvu_read64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(NIX_INTF_RX)); - cfg = (cfg >> 32) & 0x07; - seq_printf(filp, "\t\t RX keywidth \t: %s\n", (cfg == NPC_MCAM_KEY_X1) ? - "112bits" : ((cfg == NPC_MCAM_KEY_X2) ? - "224bits" : "448bits")); - cfg = rvu_read64(rvu, blkaddr, NPC_AF_INTFX_KEX_CFG(NIX_INTF_TX)); - cfg = (cfg >> 32) & 0x07; - seq_printf(filp, "\t\t TX keywidth \t: %s\n", (cfg == NPC_MCAM_KEY_X1) ? - "112bits" : ((cfg == NPC_MCAM_KEY_X2) ? - "224bits" : "448bits")); + if (is_cn20k(rvu->pdev)) { + npc_priv = npc_priv_get(); + seq_printf(filp, "\t\t RX keywidth \t: %s\n", + (npc_priv->kw == NPC_MCAM_KEY_X1) ? + "256bits" : "512bits"); + + npc_cn20k_subbank_calc_free(rvu, &x2_free, &x4_free, &sb_free); + seq_printf(filp, "\t\t free x4 slots\t: %d\n", x4_free); + + seq_printf(filp, "\t\t free x2 slots\t: %d\n", x2_free); + + seq_printf(filp, "\t\t free subbanks\t: %d\n", sb_free); + } else { + cfg = rvu_read64(rvu, blkaddr, + NPC_AF_INTFX_KEX_CFG(NIX_INTF_RX)); + cfg = (cfg >> 32) & 0x07; + seq_printf(filp, "\t\t RX keywidth \t: %s\n", + (cfg == NPC_MCAM_KEY_X1) ? + "112bits" : ((cfg == NPC_MCAM_KEY_X2) ? + "224bits" : "448bits")); + cfg = rvu_read64(rvu, blkaddr, + NPC_AF_INTFX_KEX_CFG(NIX_INTF_TX)); + cfg = (cfg >> 32) & 0x07; + seq_printf(filp, "\t\t TX keywidth \t: %s\n", + (cfg == NPC_MCAM_KEY_X1) ? + "112bits" : ((cfg == NPC_MCAM_KEY_X2) ? + "224bits" : "448bits")); + } mutex_lock(&mcam->lock); /* MCAM entries */ From 2e8aeb7ff0b27966e988b0f464fb1b91e7dfafb7 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Tue, 24 Feb 2026 13:30:09 +0530 Subject: [PATCH 0115/1561] octeontx2-af: npc: Use common structures CN20K and legacy silicon differ in the size of key words used in NPC MCAM. However, SoC-specific structures are not required for low-level functions. Remove the SoC-specific structures and rename the macros to improve readability. Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260224080009.4147301-14-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- .../ethernet/marvell/octeontx2/af/cn20k/npc.c | 11 ++++--- .../net/ethernet/marvell/octeontx2/af/mbox.h | 16 ++++++---- .../net/ethernet/marvell/octeontx2/af/rvu.h | 2 +- .../marvell/octeontx2/af/rvu_npc_fs.c | 31 ++++++++----------- 4 files changed, 30 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c index 63a287ee8f70..7291fdb89b03 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c @@ -4018,10 +4018,10 @@ int rvu_mbox_handler_npc_get_num_kws(struct rvu *rvu, struct npc_get_num_kws_req *req, struct npc_get_num_kws_rsp *rsp) { + u64 kw_mask[NPC_KWS_IN_KEY_SZ_MAX] = { 0 }; + u64 kw[NPC_KWS_IN_KEY_SZ_MAX] = { 0 }; struct rvu_npc_mcam_rule dummy = { 0 }; - struct cn20k_mcam_entry cn20k_entry = { 0 }; struct mcam_entry_mdata mdata = { }; - struct mcam_entry entry = { 0 }; struct npc_install_flow_req *fl; int i, cnt = 0, blkaddr; @@ -4038,7 +4038,8 @@ int rvu_mbox_handler_npc_get_num_kws(struct rvu *rvu, return NPC_MCAM_INVALID_REQ; } - npc_populate_mcam_mdata(rvu, &mdata, &cn20k_entry, &entry); + mdata.kw = kw; + mdata.kw_mask = kw_mask; npc_update_flow(rvu, &mdata, fl->features, &fl->packet, &fl->mask, &dummy, fl->intf, blkaddr); @@ -4046,8 +4047,8 @@ int rvu_mbox_handler_npc_get_num_kws(struct rvu *rvu, /* Find the most significant word valid. Traverse from * MSB to LSB, check if cam0 or cam1 is set */ - for (i = NPC_CN20K_MAX_KWS_IN_KEY - 1; i >= 0; i--) { - if (cn20k_entry.kw[i] || cn20k_entry.kw_mask[i]) { + for (i = NPC_KWS_IN_KEY_SZ_MAX - 1; i >= 0; i--) { + if (kw[i] || kw_mask[i]) { cnt = i + 1; break; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 2d9f6cb4820f..dc42c81c0942 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1593,18 +1593,22 @@ struct mcam_entry_mdata { u8 max_kw; }; +enum npc_kws_in_key_sz { + NPC_KWS_IN_KEY_SZ_7 = 7, + NPC_KWS_IN_KEY_SZ_8 = 8, + NPC_KWS_IN_KEY_SZ_MAX, +}; + struct mcam_entry { -#define NPC_MAX_KWS_IN_KEY 7 /* Number of keywords in max keywidth */ - u64 kw[NPC_MAX_KWS_IN_KEY]; - u64 kw_mask[NPC_MAX_KWS_IN_KEY]; + u64 kw[NPC_KWS_IN_KEY_SZ_7]; + u64 kw_mask[NPC_KWS_IN_KEY_SZ_7]; u64 action; u64 vtag_action; }; struct cn20k_mcam_entry { -#define NPC_CN20K_MAX_KWS_IN_KEY 8 - u64 kw[NPC_CN20K_MAX_KWS_IN_KEY]; - u64 kw_mask[NPC_CN20K_MAX_KWS_IN_KEY]; + u64 kw[NPC_KWS_IN_KEY_SZ_8]; + u64 kw_mask[NPC_KWS_IN_KEY_SZ_8]; u64 action; u64 vtag_action; u64 action2; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index f811d6b5c545..a466181cf908 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -197,7 +197,7 @@ struct npc_key_field { /* Masks where all set bits indicate position * of a field in the key */ - u64 kw_mask[NPC_CN20K_MAX_KWS_IN_KEY]; + u64 kw_mask[NPC_KWS_IN_KEY_SZ_MAX]; /* Number of words in the key a field spans. If a field is * of 16 bytes and key offset is 4 then the field will use * 4 bytes in KW0, 8 bytes in KW1 and 4 bytes in KW2 and diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index de613b27068d..b45798d9fdab 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -227,7 +227,7 @@ static bool npc_check_overlap(struct rvu *rvu, int blkaddr, input = &mcam->tx_key_fields[type]; } - kws = NPC_MAX_KWS_IN_KEY; + kws = NPC_KWS_IN_KEY_SZ_7; if (is_cn20k(rvu->pdev)) goto skip_cn10k_config; @@ -289,7 +289,7 @@ skip_cn10k_config: * field bits */ if (npc_check_overlap_fields(dummy, input, - NPC_CN20K_MAX_KWS_IN_KEY)) + NPC_KWS_IN_KEY_SZ_8)) return true; } } @@ -460,9 +460,9 @@ static void npc_handle_multi_layer_fields(struct rvu *rvu, int blkaddr, u8 intf) u8 start_lid; if (is_cn20k(rvu->pdev)) - max_kw = NPC_CN20K_MAX_KWS_IN_KEY; + max_kw = NPC_KWS_IN_KEY_SZ_8; else - max_kw = NPC_MAX_KWS_IN_KEY; + max_kw = NPC_KWS_IN_KEY_SZ_7; key_fields = mcam->rx_key_fields; features = &mcam->rx_features; @@ -906,12 +906,12 @@ void npc_update_entry(struct rvu *rvu, enum key_fields type, struct mcam_entry_mdata *mdata, u64 val_lo, u64 val_hi, u64 mask_lo, u64 mask_hi, u8 intf) { - struct cn20k_mcam_entry cn20k_dummy = { {0} }; + u64 kw_mask[NPC_KWS_IN_KEY_SZ_MAX] = { 0 }; + u64 kw[NPC_KWS_IN_KEY_SZ_MAX] = { 0 }; struct npc_mcam *mcam = &rvu->hw->mcam; - struct mcam_entry dummy = { {0} }; - u64 *kw, *kw_mask, *val, *mask; struct npc_key_field *field; u64 kw1, kw2, kw3; + u64 *val, *mask; int i, max_kw; u8 shift; @@ -922,15 +922,10 @@ void npc_update_entry(struct rvu *rvu, enum key_fields type, if (!field->nr_kws) return; - if (is_cn20k(rvu->pdev)) { - max_kw = NPC_CN20K_MAX_KWS_IN_KEY; - kw = cn20k_dummy.kw; - kw_mask = cn20k_dummy.kw_mask; - } else { - max_kw = NPC_MAX_KWS_IN_KEY; - kw = dummy.kw; - kw_mask = dummy.kw_mask; - } + if (is_cn20k(rvu->pdev)) + max_kw = NPC_KWS_IN_KEY_SZ_8; + else + max_kw = NPC_KWS_IN_KEY_SZ_7; for (i = 0; i < max_kw; i++) { if (!field->kw_mask[i]) @@ -1315,14 +1310,14 @@ npc_populate_mcam_mdata(struct rvu *rvu, mdata->kw_mask = cn20k_entry->kw_mask; mdata->action = &cn20k_entry->action; mdata->vtag_action = &cn20k_entry->vtag_action; - mdata->max_kw = NPC_CN20K_MAX_KWS_IN_KEY; + mdata->max_kw = NPC_KWS_IN_KEY_SZ_8; return; } mdata->kw = entry->kw; mdata->kw_mask = entry->kw_mask; mdata->action = &entry->action; mdata->vtag_action = &entry->vtag_action; - mdata->max_kw = NPC_MAX_KWS_IN_KEY; + mdata->max_kw = NPC_KWS_IN_KEY_SZ_7; } static int npc_update_rx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, From 595da751c8222ca957cfdc0161d9845a75c67046 Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Thu, 26 Feb 2026 11:27:51 +0100 Subject: [PATCH 0116/1561] icmp: fix ICMP error source address when xfrm policy matches When an IPsec gateway generates an ICMP error (e.g., Destination Host Unreachable), the source address incorrectly shows the unreachable destination instead of the gateway's address. IPv6 behaves correctly. Before fix: ping 10.1.6.3 From 10.1.6.3 icmp_seq=1 Destination Host Unreachable (wrong - 10.1.6.3 is the unreachable host) After fix: ping 10.1.6.3 From 10.1.5.2 icmp_seq=1 Destination Host Unreachable (correct - 10.1.5.2 is the gateway) The fix removes the memcpy that overwrote fl4 with fl4_dec after xfrm_lookup(). A follow-up commit adds a selftest. Fixes: 415b3334a21a ("icmp: Fix regression in nexthop resolution during replies.") Cc: stable+noautosel@kernel.org # Avoid false positives in tests Signed-off-by: Antony Antony Acked-by: Tobias Brunner Reviewed-by: David Ahern Link: https://patch.msgid.link/19a0156ff6e76baa323a81d710510d399a6ff63a.1772101380.git.antony.antony@secunet.com Signed-off-by: Jakub Kicinski --- net/ipv4/icmp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 1cf9e391aa0c..ac6d2ffc1963 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -591,7 +591,6 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, rt2 = dst_rtable(dst2); if (!IS_ERR(dst2)) { dst_release(&rt->dst); - memcpy(fl4, &fl4_dec, sizeof(*fl4)); rt = rt2; } else if (PTR_ERR(dst2) == -EPERM) { if (rt) From 5b43d35e571fc957fd126c310663a4665f1e7511 Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Thu, 26 Feb 2026 11:28:21 +0100 Subject: [PATCH 0117/1561] selftests: net: add ICMP error source address test over xfrm tunnel Test that ICMP error messages generated by an IPsec gateway use the correct source address (the gateway's address, not the unreachable destination). Signed-off-by: Antony Antony Link: https://patch.msgid.link/79d526f96cf2252d71550d38772876bc72c7e3c7.1772101380.git.antony.antony@secunet.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/xfrm_state.sh | 613 ++++++++++++++++++++++ 2 files changed, 614 insertions(+) create mode 100755 tools/testing/selftests/net/xfrm_state.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index e97c90886f34..ede107fa6ea8 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -120,6 +120,7 @@ TEST_PROGS := \ vrf_route_leaking.sh \ vrf_strict_mode_test.sh \ xfrm_policy.sh \ + xfrm_state.sh \ # end of TEST_PROGS TEST_PROGS_EXTENDED := \ diff --git a/tools/testing/selftests/net/xfrm_state.sh b/tools/testing/selftests/net/xfrm_state.sh new file mode 100755 index 000000000000..f6c54a6496d7 --- /dev/null +++ b/tools/testing/selftests/net/xfrm_state.sh @@ -0,0 +1,613 @@ +#!/bin/bash -e +# SPDX-License-Identifier: GPL-2.0 +# +# xfrm/IPsec tests. +# Currently implemented: +# - ICMP error source address verification (IETF RFC 4301 section 6) +# - ICMP MTU exceeded handling over IPsec tunnels. +# +# Addresses and topology: +# IPv4 prefix 10.1.c.d IPv6 prefix fc00:c::d/64 where c is the segment number +# and d is the interface identifier. +# IPv6 uses the same c:d as IPv4, and start with IPv6 prefix instead ipv4 prefix +# +# Network topology default: ns_set_v4 or ns_set_v6 +# 1.1 1.2 2.1 2.2 3.1 3.2 4.1 4.2 5.1 5.2 6.1 6.2 +# eth0 eth1 eth0 eth1 eth0 eth1 eth0 eth1 eth0 eth1 eth0 eth1 +# a -------- r1 -------- s1 -------- r2 -------- s2 -------- r3 -------- b +# a, b = Alice and Bob hosts without IPsec. +# r1, r2, r3 routers, without IPsec +# s1, s2, IPsec gateways/routers that setup tunnel(s). + +# Network topology x: IPsec gateway that generates ICMP response - ns_set_v4x or ns_set_v6x +# 1.1 1.2 2.1 2.2 3.1 3.2 4.1 4.2 5.1 5.2 +# eth0 eth1 eth0 eth1 eth0 eth1 eth0 eth1 eth0 eth1 +# a -------- r1 -------- s1 -------- r2 -------- s2 -------- b + +. lib.sh + +EXIT_ON_TEST_FAIL=no +PAUSE=no +VERBOSE=${VERBOSE:-0} +DEBUG=0 + +# Name Description +tests=" + unreachable_ipv4 IPv4 unreachable from router r3 + unreachable_ipv6 IPv6 unreachable from router r3 + unreachable_gw_ipv4 IPv4 unreachable from IPsec gateway s2 + unreachable_gw_ipv6 IPv6 unreachable from IPsec gateway s2 + mtu_ipv4_s2 IPv4 MTU exceeded from IPsec gateway s2 + mtu_ipv6_s2 IPv6 MTU exceeded from IPsec gateway s2 + mtu_ipv4_r2 IPv4 MTU exceeded from ESP router r2 + mtu_ipv6_r2 IPv6 MTU exceeded from ESP router r2 + mtu_ipv4_r3 IPv4 MTU exceeded from router r3 + mtu_ipv6_r3 IPv6 MTU exceeded from router r3" + +prefix4="10.1" +prefix6="fc00" + +run_cmd_err() { + cmd="$*" + + if [ "$VERBOSE" -gt 0 ]; then + printf " COMMAND: %s\n" "$cmd" + fi + + out="$($cmd 2>&1)" && rc=0 || rc=$? + if [ "$VERBOSE" -gt 1 ] && [ -n "$out" ]; then + echo " $out" + echo + fi + return 0 +} + +run_cmd() { + run_cmd_err "$@" || exit 1 +} + +run_test() { + # If errexit is set, unset it for sub-shell and restore after test + errexit=0 + if [[ $- =~ "e" ]]; then + errexit=1 + set +e + fi + + ( + unset IFS + + # shellcheck disable=SC2030 # fail is read by trap/cleanup within this subshell + fail="yes" + + # Since cleanup() relies on variables modified by this sub shell, + # it has to run in this context. + trap 'log_test_error $?; cleanup' EXIT INT TERM + + if [ "$VERBOSE" -gt 0 ]; then + printf "\n#############################################################\n\n" + fi + + ret=0 + case "${name}" in + # can't use eval and test names shell check will complain about unused code + unreachable_ipv4) test_unreachable_ipv4 ;; + unreachable_ipv6) test_unreachable_ipv6 ;; + unreachable_gw_ipv4) test_unreachable_gw_ipv4 ;; + unreachable_gw_ipv6) test_unreachable_gw_ipv6 ;; + mtu_ipv4_s2) test_mtu_ipv4_s2 ;; + mtu_ipv6_s2) test_mtu_ipv6_s2 ;; + mtu_ipv4_r2) test_mtu_ipv4_r2 ;; + mtu_ipv6_r2) test_mtu_ipv6_r2 ;; + mtu_ipv4_r3) test_mtu_ipv4_r3 ;; + mtu_ipv6_r3) test_mtu_ipv6_r3 ;; + esac + ret=$? + + if [ $ret -eq 0 ]; then + fail="no" + + if [ "$VERBOSE" -gt 1 ]; then + show_icmp_filter + fi + + printf "TEST: %-60s [ PASS ]\n" "${desc}" + elif [ $ret -eq "$ksft_skip" ]; then + fail="no" + printf "TEST: %-60s [SKIP]\n" "${desc}" + fi + + return $ret + ) + ret=$? + + [ $errexit -eq 1 ] && set -e + + case $ret in + 0) + all_skipped=false + [ "$exitcode" -eq "$ksft_skip" ] && exitcode=0 + ;; + "$ksft_skip") + [ $all_skipped = true ] && exitcode=$ksft_skip + ;; + *) + all_skipped=false + exitcode=1 + ;; + esac + + return 0 # don't trigger errexit (-e); actual status in exitcode +} + +setup_namespaces() { + local namespaces="" + + NS_A="" + NS_B="" + NS_R1="" + NS_R2="" + NS_R3="" + NS_S1="" + NS_S2="" + + for ns in ${ns_set}; do + namespaces="$namespaces NS_${ns^^}" + done + + # shellcheck disable=SC2086 # setup_ns expects unquoted list + setup_ns $namespaces + + ns_active= #ordered list of namespaces for this test. + + [ -n "${NS_A}" ] && ns_a=(ip netns exec "${NS_A}") && ns_active="${ns_active} $NS_A" + [ -n "${NS_R1}" ] && ns_active="${ns_active} $NS_R1" + [ -n "${NS_S1}" ] && ns_s1=(ip netns exec "${NS_S1}") && ns_active="${ns_active} $NS_S1" + [ -n "${NS_R2}" ] && ns_r2=(ip netns exec "${NS_R2}") && ns_active="${ns_active} $NS_R2" + [ -n "${NS_S2}" ] && ns_s2=(ip netns exec "${NS_S2}") && ns_active="${ns_active} $NS_S2" + [ -n "${NS_R3}" ] && ns_r3=(ip netns exec "${NS_R3}") && ns_active="${ns_active} $NS_R3" + [ -n "${NS_B}" ] && ns_active="${ns_active} $NS_B" +} + +addr_add() { + local -a ns_cmd=(ip netns exec "$1") + local addr="$2" + local dev="$3" + + run_cmd "${ns_cmd[@]}" ip addr add "${addr}" dev "${dev}" + run_cmd "${ns_cmd[@]}" ip link set up "${dev}" +} + +veth_add() { + local ns=$2 + local pns=$1 + local -a ns_cmd=(ip netns exec "${pns}") + local ln="eth0" + local rn="eth1" + + run_cmd "${ns_cmd[@]}" ip link add "${ln}" type veth peer name "${rn}" netns "${ns}" +} + +show_icmp_filter() { + run_cmd "${ns_r2[@]}" nft list ruleset + echo "$out" +} + +setup_icmp_filter() { + run_cmd "${ns_r2[@]}" nft add table inet filter + run_cmd "${ns_r2[@]}" nft add chain inet filter FORWARD \ + '{ type filter hook forward priority filter; policy drop ; }' + run_cmd "${ns_r2[@]}" nft add rule inet filter FORWARD counter ip protocol esp \ + counter log accept + run_cmd "${ns_r2[@]}" nft add rule inet filter FORWARD counter ip protocol \ + icmp counter log drop + + if [ "$VERBOSE" -gt 0 ]; then + run_cmd "${ns_r2[@]}" nft list ruleset + echo "$out" + fi +} + +setup_icmpv6_filter() { + run_cmd "${ns_r2[@]}" nft add table inet filter + run_cmd "${ns_r2[@]}" nft add chain inet filter FORWARD \ + '{ type filter hook forward priority filter; policy drop ; }' + run_cmd "${ns_r2[@]}" nft add rule inet filter FORWARD ip6 nexthdr \ + ipv6-icmp icmpv6 type echo-request counter log drop + run_cmd "${ns_r2[@]}" nft add rule inet filter FORWARD ip6 nexthdr esp \ + counter log accept + run_cmd "${ns_r2[@]}" nft add rule inet filter FORWARD ip6 nexthdr \ + ipv6-icmp icmpv6 type \ + '{nd-neighbor-solicit,nd-neighbor-advert,nd-router-solicit,nd-router-advert}' \ + counter log drop + if [ "$VERBOSE" -gt 0 ]; then + run_cmd "${ns_r2[@]}" nft list ruleset + echo "$out" + fi +} + +set_xfrm_params() { + s1_src=${src} + s1_dst=${dst} + s1_src_net=${src_net} + s1_dst_net=${dst_net} +} + +setup_ns_set_v4() { + ns_set="a r1 s1 r2 s2 r3 b" # Network topology default + imax=$(echo "$ns_set" | wc -w) # number of namespaces in this topology + + src="10.1.3.1" + dst="10.1.4.2" + src_net="10.1.1.0/24" + dst_net="10.1.6.0/24" + + prefix=${prefix4} + prefix_len=24 + s="." + S="." + + set_xfrm_params +} + +setup_ns_set_v4x() { + ns_set="a r1 s1 r2 s2 b" # Network topology: x + imax=$(echo "$ns_set" | wc -w) # number of namespaces in this topology + prefix=${prefix4} + s="." + S="." + src="10.1.3.1" + dst="10.1.4.2" + src_net="10.1.1.0/24" + dst_net="10.1.5.0/24" + prefix_len=24 + + set_xfrm_params +} + +setup_ns_set_v6() { + ns_set="a r1 s1 r2 s2 r3 b" # Network topology default + imax=$(echo "$ns_set" | wc -w) # number of namespaces in this topology + prefix=${prefix6} + s=":" + S="::" + src="fc00:3::1" + dst="fc00:4::2" + src_net="fc00:1::0/64" + dst_net="fc00:6::0/64" + prefix_len=64 + + set_xfrm_params +} + +setup_ns_set_v6x() { + ns_set="a r1 s1 r2 s2 b" # Network topology: x + imax=$(echo "$ns_set" | wc -w) + prefix=${prefix6} + s=":" + S="::" + src="fc00:3::1" + dst="fc00:4::2" + src_net="fc00:1::0/64" + dst_net="fc00:5::0/64" + prefix_len=64 + + set_xfrm_params +} + +setup_network() { + # Create veths and add addresses + local -a ns_cmd + i=1 + p="" + for ns in ${ns_active}; do + ns_cmd=(ip netns exec "${ns}") + + if [ "${i}" -ne 1 ]; then + # Create veth between previous and current namespace + veth_add "${p}" "${ns}" + # Add addresses: previous gets .1 on eth0, current gets .2 on eth1 + addr_add "${p}" "${prefix}${s}$((i-1))${S}1/${prefix_len}" eth0 + addr_add "${ns}" "${prefix}${s}$((i-1))${S}2/${prefix_len}" eth1 + fi + + # Enable forwarding + run_cmd "${ns_cmd[@]}" sysctl -q net/ipv4/ip_forward=1 + run_cmd "${ns_cmd[@]}" sysctl -q net/ipv6/conf/all/forwarding=1 + run_cmd "${ns_cmd[@]}" sysctl -q net/ipv6/conf/default/accept_dad=0 + + p=${ns} + i=$((i + 1)) + done + + # Add routes (needs all addresses to exist first) + i=1 + for ns in ${ns_active}; do + ns_cmd=(ip netns exec "${ns}") + + # Forward routes to networks beyond this node + if [ "${i}" -ne "${imax}" ]; then + nhf="${prefix}${s}${i}${S}2" # nexthop forward + for j in $(seq $((i + 1)) "${imax}"); do + run_cmd "${ns_cmd[@]}" ip route replace \ + "${prefix}${s}${j}${S}0/${prefix_len}" via "${nhf}" + done + fi + + # Reverse routes to networks before this node + if [ "${i}" -gt 1 ]; then + nhr="${prefix}${s}$((i-1))${S}1" # nexthop reverse + for j in $(seq 1 $((i - 2))); do + run_cmd "${ns_cmd[@]}" ip route replace \ + "${prefix}${s}${j}${S}0/${prefix_len}" via "${nhr}" + done + fi + + i=$((i + 1)) + done +} + +setup_xfrm_mode() { + local MODE=${1:-tunnel} + if [ "${MODE}" != "tunnel" ] && [ "${MODE}" != "beet" ]; then + echo "xfrm mode ${MODE} not supported" + log_test_error + return 1 + fi + + run_cmd "${ns_s1[@]}" ip xfrm policy add src "${s1_src_net}" dst "${s1_dst_net}" dir out \ + tmpl src "${s1_src}" dst "${s1_dst}" proto esp reqid 1 mode "${MODE}" + + # no "input" policies. we are only doing forwarding so far + + run_cmd "${ns_s1[@]}" ip xfrm policy add src "${s1_dst_net}" dst "${s1_src_net}" dir fwd \ + flag icmp tmpl src "${s1_dst}" dst "${s1_src}" proto esp reqid 2 mode "${MODE}" + + run_cmd "${ns_s1[@]}" ip xfrm state add src "${s1_src}" dst "${s1_dst}" proto esp spi 1 \ + reqid 1 mode "${MODE}" aead 'rfc4106(gcm(aes))' \ + 0x1111111111111111111111111111111111111111 96 \ + sel src "${s1_src_net}" dst "${s1_dst_net}" dir out + + run_cmd "${ns_s1[@]}" ip xfrm state add src "${s1_dst}" dst "${s1_src}" proto esp spi 2 \ + reqid 2 flag icmp replay-window 8 mode "${MODE}" aead 'rfc4106(gcm(aes))' \ + 0x2222222222222222222222222222222222222222 96 \ + sel src "${s1_dst_net}" dst "${s1_src_net}" dir in + + run_cmd "${ns_s2[@]}" ip xfrm policy add src "${s1_dst_net}" dst "${s1_src_net}" dir out \ + flag icmp tmpl src "${s1_dst}" dst "${s1_src}" proto esp reqid 2 mode "${MODE}" + + run_cmd "${ns_s2[@]}" ip xfrm policy add src "${s1_src_net}" dst "${s1_dst_net}" dir fwd \ + tmpl src "${s1_src}" dst "${s1_dst}" proto esp reqid 1 mode "${MODE}" + + run_cmd "${ns_s2[@]}" ip xfrm state add src "${s1_dst}" dst "${s1_src}" proto esp spi 2 \ + reqid 2 mode "${MODE}" aead 'rfc4106(gcm(aes))' \ + 0x2222222222222222222222222222222222222222 96 \ + sel src "${s1_dst_net}" dst "${s1_src_net}" dir out + + run_cmd "${ns_s2[@]}" ip xfrm state add src "${s1_src}" dst "${s1_dst}" proto esp spi 1 \ + reqid 1 flag icmp replay-window 8 mode "${MODE}" aead 'rfc4106(gcm(aes))' \ + 0x1111111111111111111111111111111111111111 96 \ + sel src "${s1_src_net}" dst "${s1_dst_net}" dir in +} + +setup_xfrm() { + setup_xfrm_mode tunnel +} + +setup() { + [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return "$ksft_skip" + + for arg; do + case "${arg}" in + ns_set_v4) setup_ns_set_v4 ;; + ns_set_v4x) setup_ns_set_v4x ;; + ns_set_v6) setup_ns_set_v6 ;; + ns_set_v6x) setup_ns_set_v6x ;; + namespaces) setup_namespaces ;; + network) setup_network ;; + xfrm) setup_xfrm ;; + icmp_filter) setup_icmp_filter ;; + icmpv6_filter) setup_icmpv6_filter ;; + *) echo " ${arg} not supported"; return 1 ;; + esac || return 1 + done +} + +# shellcheck disable=SC2317 # called via trap +pause() { + echo + echo "Pausing. Hit enter to continue" + read -r _ +} + +# shellcheck disable=SC2317 # called via trap +log_test_error() { + # shellcheck disable=SC2031 # fail is set in subshell, read via trap + if [ "${fail}" = "yes" ] && [ -n "${desc}" ]; then + if [ "$VERBOSE" -gt 0 ]; then + show_icmp_filter + fi + printf "TEST: %-60s [ FAIL ] %s\n" "${desc}" "${name}" + [ -n "${cmd}" ] && printf '%s\n\n' "${cmd}" + [ -n "${out}" ] && printf '%s\n\n' "${out}" + fi +} + +# shellcheck disable=SC2317 # called via trap +cleanup() { + # shellcheck disable=SC2031 # fail is set in subshell, read via trap + [[ "$PAUSE" = "always" || ( "$PAUSE" = "fail" && "$fail" = "yes" ) ]] && pause + cleanup_all_ns + # shellcheck disable=SC2031 # fail is set in subshell, read via trap + [ "${EXIT_ON_TEST_FAIL}" = "yes" ] && [ "${fail}" = "yes" ] && exit 1 +} + +test_unreachable_ipv6() { + setup ns_set_v6 namespaces network xfrm icmpv6_filter || return "$ksft_skip" + run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 fc00:6::2 + run_cmd_err "${ns_a[@]}" ping -W 5 -w 4 -c 1 fc00:6::3 + rc=0 + echo -e "$out" | grep -q -E 'From fc00:5::2 icmp_seq.* Destination' || rc=1 + return "${rc}" +} + +test_unreachable_gw_ipv6() { + setup ns_set_v6x namespaces network xfrm icmpv6_filter || return "$ksft_skip" + run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 fc00:5::2 + run_cmd_err "${ns_a[@]}" ping -W 5 -w 4 -c 1 fc00:5::3 + rc=0 + echo -e "$out" | grep -q -E 'From fc00:4::2 icmp_seq.* Destination' || rc=1 + return "${rc}" +} + +test_unreachable_ipv4() { + setup ns_set_v4 namespaces network icmp_filter xfrm || return "$ksft_skip" + run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 10.1.6.2 + run_cmd_err "${ns_a[@]}" ping -W 5 -w 4 -c 1 10.1.6.3 + rc=0 + echo -e "$out" | grep -q -E 'From 10.1.5.2 icmp_seq.* Destination' || rc=1 + return "${rc}" +} + +test_unreachable_gw_ipv4() { + setup ns_set_v4x namespaces network icmp_filter xfrm || return "$ksft_skip" + run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 10.1.5.2 + run_cmd_err "${ns_a[@]}" ping -W 5 -w 4 -c 1 10.1.5.3 + rc=0 + echo -e "$out" | grep -q -E 'From 10.1.4.2 icmp_seq.* Destination' || rc=1 + return "${rc}" +} + +test_mtu_ipv4_r2() { + setup ns_set_v4 namespaces network icmp_filter xfrm || return "$ksft_skip" + run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 10.1.6.2 + run_cmd "${ns_r2[@]}" ip route replace 10.1.3.0/24 dev eth1 src 10.1.3.2 mtu 1300 + run_cmd "${ns_r2[@]}" ip route replace 10.1.4.0/24 dev eth0 src 10.1.4.1 mtu 1300 + # shellcheck disable=SC1010 # -M do: do = dont-fragment, not shell keyword + run_cmd "${ns_a[@]}" ping -M do -s 1300 -W 5 -w 4 -c 1 10.1.6.2 || true + rc=0 + echo -e "$out" | grep -q -E "From 10.1.2.2 icmp_seq=.* Frag needed and DF set" || rc=1 + return "${rc}" +} + +test_mtu_ipv6_r2() { + setup ns_set_v6 namespaces network xfrm icmpv6_filter || return "$ksft_skip" + run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 fc00:6::2 + run_cmd "${ns_r2[@]}" ip -6 route replace fc00:3::/64 \ + dev eth1 metric 256 src fc00:3::2 mtu 1300 + run_cmd "${ns_r2[@]}" ip -6 route replace fc00:4::/64 \ + dev eth0 metric 256 src fc00:4::1 mtu 1300 + # shellcheck disable=SC1010 # -M do: do = dont-fragment, not shell keyword + run_cmd "${ns_a[@]}" ping -M do -s 1300 -W 5 -w 4 -c 1 fc00:6::2 || true + rc=0 + echo -e "$out" | grep -q -E "From fc00:2::2 icmp_seq=.* Packet too big: mtu=1230" || rc=1 + return "${rc}" +} + +test_mtu_ipv4_r3() { + setup ns_set_v4 namespaces network icmp_filter xfrm || return "$ksft_skip" + run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 10.1.6.2 + run_cmd "${ns_r3[@]}" ip route replace 10.1.6.0/24 dev eth0 mtu 1300 + # shellcheck disable=SC1010 # -M do: do = dont-fragment, not shell keyword + run_cmd "${ns_a[@]}" ping -M do -s 1350 -W 5 -w 4 -c 1 10.1.6.2 || true + rc=0 + echo -e "$out" | grep -q -E "From 10.1.5.2 .* Frag needed and DF set \(mtu = 1300\)" || rc=1 + return "${rc}" +} + +test_mtu_ipv4_s2() { + setup ns_set_v4x namespaces network icmp_filter xfrm || return "$ksft_skip" + run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 10.1.5.2 + run_cmd "${ns_s2[@]}" ip route replace 10.1.5.0/24 dev eth0 src 10.1.5.1 mtu 1300 + # shellcheck disable=SC1010 # -M do: do = dont-fragment, not shell keyword + run_cmd "${ns_a[@]}" ping -M do -s 1350 -W 5 -w 4 -c 1 10.1.5.2 || true + rc=0 + echo -e "$out" | grep -q -E "From 10.1.4.2.*Frag needed and DF set \(mtu = 1300\)" || rc=1 + return "${rc}" +} + +test_mtu_ipv6_s2() { + setup ns_set_v6x namespaces network xfrm icmpv6_filter || return "$ksft_skip" + run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 fc00:5::2 + run_cmd "${ns_s2[@]}" ip -6 route replace fc00:5::/64 dev eth0 metric 256 mtu 1300 + # shellcheck disable=SC1010 # -M do: do = dont-fragment, not shell keyword + run_cmd "${ns_a[@]}" ping -M do -s 1350 -W 5 -w 4 -c 1 fc00:5::2 || true + rc=0 + echo -e "$out" | grep -q -E "From fc00:4::2.*Packet too big: mtu=1300" || rc=1 + return "${rc}" +} + +test_mtu_ipv6_r3() { + setup ns_set_v6 namespaces network xfrm icmpv6_filter || return "$ksft_skip" + run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 fc00:6::2 + run_cmd "${ns_r3[@]}" ip -6 route replace fc00:6::/64 dev eth1 metric 256 mtu 1300 + # shellcheck disable=SC1010 # -M do: do = dont-fragment, not shell keyword + run_cmd "${ns_a[@]}" ping -M do -s 1300 -W 5 -w 4 -c 1 fc00:6::2 || true + rc=0 + echo -e "$out" | grep -q -E "From fc00:5::2 icmp_seq=.* Packet too big: mtu=1300" || rc=1 + return "${rc}" +} + +################################################################################ +# +usage() { + echo + echo "$0 [OPTIONS] [TEST]..." + echo "If no TEST argument is given, all tests will be run." + echo + echo -e "\t-p Pause on fail. Namespaces are kept for diagnostics" + echo -e "\t-P Pause after the test. Namespaces are kept for diagnostics" + echo -e "\t-v Verbose output. Show commands; -vv Show output and nft rules also" + echo "Available tests${tests}" + exit 1 +} + +################################################################################ +# +exitcode=0 +all_skipped=true +out= +cmd= + +while getopts :epPv o; do + case $o in + e) EXIT_ON_TEST_FAIL=yes ;; + P) PAUSE=always ;; + p) PAUSE=fail ;; + v) VERBOSE=$((VERBOSE + 1)) ;; + *) usage ;; + esac +done +shift $((OPTIND - 1)) + +IFS=$'\t\n' + +for arg; do + # Check first that all requested tests are available before running any + command -v "test_${arg}" >/dev/null || { + echo "=== Test ${arg} not found" + usage + } +done + +name="" +desc="" +fail="no" + +for t in ${tests}; do + [ "${name}" = "" ] && name="${t}" && continue + [ "${desc}" = "" ] && desc="${t}" + + run_this=1 + for arg; do + [ "${arg}" = "${name}" ] && run_this=1 && break + run_this=0 + done + if [ $run_this -eq 1 ]; then + run_test + fi + name="" + desc="" +done + +exit ${exitcode} From ff2998f29f390d963299103f0b247cc79106ced5 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 26 Feb 2026 14:44:12 +0100 Subject: [PATCH 0118/1561] net: sched: introduce qdisc-specific drop reason tracing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create new enum qdisc_drop_reason and trace_qdisc_drop tracepoint for qdisc layer drop diagnostics with direct qdisc context visibility. The new tracepoint includes qdisc handle, parent, kind (name), and device information. Existing SKB_DROP_REASON_QDISC_DROP is retained for backwards compatibility via kfree_skb_reason(). Convert qdiscs with drop reasons to use the new infrastructure. Change CAKE's cobalt_should_drop() return type from enum skb_drop_reason to enum qdisc_drop_reason to fix implicit enum conversion warnings. Use QDISC_DROP_UNSPEC as the 'not dropped' sentinel instead of SKB_NOT_DROPPED_YET. Both have the same compiled value (0), so the comparison logic remains semantically equivalent. Signed-off-by: Jesper Dangaard Brouer Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/177211345275.3011628.1974310302645218067.stgit@firesoul Signed-off-by: Jakub Kicinski --- include/net/dropreason-core.h | 42 ++------------- include/net/dropreason-qdisc.h | 94 ++++++++++++++++++++++++++++++++++ include/net/dropreason.h | 6 +++ include/net/sch_generic.h | 43 +++++++++++----- include/trace/events/qdisc.h | 51 ++++++++++++++++++ net/core/dev.c | 8 +-- net/sched/sch_cake.c | 26 +++++----- net/sched/sch_codel.c | 5 +- net/sched/sch_dualpi2.c | 8 ++- net/sched/sch_fq.c | 7 ++- net/sched/sch_fq_codel.c | 4 +- net/sched/sch_fq_pie.c | 4 +- net/sched/sch_generic.c | 29 ++++++++++- net/sched/sch_gred.c | 4 +- net/sched/sch_pie.c | 4 +- net/sched/sch_red.c | 4 +- net/sched/sch_sfb.c | 4 +- 17 files changed, 249 insertions(+), 94 deletions(-) create mode 100644 include/net/dropreason-qdisc.h diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index a7b7abd66e21..3d8d284e05c8 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -68,12 +68,6 @@ FN(SECURITY_HOOK) \ FN(QDISC_DROP) \ FN(QDISC_BURST_DROP) \ - FN(QDISC_OVERLIMIT) \ - FN(QDISC_CONGESTED) \ - FN(CAKE_FLOOD) \ - FN(FQ_BAND_LIMIT) \ - FN(FQ_HORIZON_LIMIT) \ - FN(FQ_FLOW_LIMIT) \ FN(CPU_BACKLOG) \ FN(XDP) \ FN(TC_INGRESS) \ @@ -371,8 +365,10 @@ enum skb_drop_reason { /** @SKB_DROP_REASON_SECURITY_HOOK: dropped due to security HOOK */ SKB_DROP_REASON_SECURITY_HOOK, /** - * @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc when packet outputting ( - * failed to enqueue to current qdisc) + * @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc during enqueue or + * dequeue. More specific drop reasons are available via the + * qdisc:qdisc_drop tracepoint, which also provides qdisc handle + * and name for identifying the source. */ SKB_DROP_REASON_QDISC_DROP, /** @@ -380,36 +376,6 @@ enum skb_drop_reason { * limit is hit. */ SKB_DROP_REASON_QDISC_BURST_DROP, - /** - * @SKB_DROP_REASON_QDISC_OVERLIMIT: dropped by qdisc when a qdisc - * instance exceeds its total buffer size limit. - */ - SKB_DROP_REASON_QDISC_OVERLIMIT, - /** - * @SKB_DROP_REASON_QDISC_CONGESTED: dropped by a qdisc AQM algorithm - * due to congestion. - */ - SKB_DROP_REASON_QDISC_CONGESTED, - /** - * @SKB_DROP_REASON_CAKE_FLOOD: dropped by the flood protection part of - * CAKE qdisc AQM algorithm (BLUE). - */ - SKB_DROP_REASON_CAKE_FLOOD, - /** - * @SKB_DROP_REASON_FQ_BAND_LIMIT: dropped by fq qdisc when per band - * limit is reached. - */ - SKB_DROP_REASON_FQ_BAND_LIMIT, - /** - * @SKB_DROP_REASON_FQ_HORIZON_LIMIT: dropped by fq qdisc when packet - * timestamp is too far in the future. - */ - SKB_DROP_REASON_FQ_HORIZON_LIMIT, - /** - * @SKB_DROP_REASON_FQ_FLOW_LIMIT: dropped by fq qdisc when a flow - * exceeds its limits. - */ - SKB_DROP_REASON_FQ_FLOW_LIMIT, /** * @SKB_DROP_REASON_CPU_BACKLOG: failed to enqueue the skb to the per CPU * backlog queue. This can be caused by backlog queue full (see diff --git a/include/net/dropreason-qdisc.h b/include/net/dropreason-qdisc.h new file mode 100644 index 000000000000..80a2d557e5f7 --- /dev/null +++ b/include/net/dropreason-qdisc.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _LINUX_DROPREASON_QDISC_H +#define _LINUX_DROPREASON_QDISC_H +#include + +#define DEFINE_QDISC_DROP_REASON(FN, FNe) \ + FN(UNSPEC) \ + FN(GENERIC) \ + FN(OVERLIMIT) \ + FN(CONGESTED) \ + FN(CAKE_FLOOD) \ + FN(FQ_BAND_LIMIT) \ + FN(FQ_HORIZON_LIMIT) \ + FN(FQ_FLOW_LIMIT) \ + FNe(MAX) + +#undef FN +#undef FNe +#define FN(reason) QDISC_DROP_##reason, +#define FNe(reason) QDISC_DROP_##reason + +/** + * enum qdisc_drop_reason - reason why a qdisc dropped a packet + * + * Qdisc-specific drop reasons for packet drops that occur within the + * traffic control (TC) queueing discipline layer. These reasons provide + * detailed diagnostics about why packets were dropped by various qdisc + * algorithms, enabling fine-grained monitoring and troubleshooting of + * queue behavior. + */ +enum qdisc_drop_reason { + /** + * @QDISC_DROP_UNSPEC: unspecified/invalid qdisc drop reason. + * Value 0 serves as analogous to SKB_NOT_DROPPED_YET for enum skb_drop_reason. + * Used for catching zero-initialized drop_reason fields. + */ + QDISC_DROP_UNSPEC = 0, + /** + * @__QDISC_DROP_REASON: subsystem base value for qdisc drop reasons + */ + __QDISC_DROP_REASON = SKB_DROP_REASON_SUBSYS_QDISC << + SKB_DROP_REASON_SUBSYS_SHIFT, + /** + * @QDISC_DROP_GENERIC: generic/default qdisc drop, used when no + * more specific reason applies + */ + QDISC_DROP_GENERIC, + /** + * @QDISC_DROP_OVERLIMIT: packet dropped because the qdisc queue + * length exceeded its configured limit (sch->limit). This typically + * indicates the queue is full and cannot accept more packets. + */ + QDISC_DROP_OVERLIMIT, + /** + * @QDISC_DROP_CONGESTED: packet dropped due to active congestion + * control algorithms (e.g., CoDel, PIE, RED) detecting network + * congestion. The qdisc proactively dropped the packet to signal + * congestion to the sender and prevent bufferbloat. + */ + QDISC_DROP_CONGESTED, + /** + * @QDISC_DROP_CAKE_FLOOD: CAKE qdisc dropped packet due to flood + * protection mechanism (BLUE algorithm). This indicates potential + * DoS/flood attack or unresponsive flow behavior. + */ + QDISC_DROP_CAKE_FLOOD, + /** + * @QDISC_DROP_FQ_BAND_LIMIT: FQ (Fair Queue) dropped packet because + * the priority band's packet limit was reached. Each priority band + * in FQ has its own limit. + */ + QDISC_DROP_FQ_BAND_LIMIT, + /** + * @QDISC_DROP_FQ_HORIZON_LIMIT: FQ dropped packet because its + * timestamp is too far in the future (beyond the configured horizon). + */ + QDISC_DROP_FQ_HORIZON_LIMIT, + /** + * @QDISC_DROP_FQ_FLOW_LIMIT: FQ dropped packet because an individual + * flow exceeded its per-flow packet limit. + */ + QDISC_DROP_FQ_FLOW_LIMIT, + /** + * @QDISC_DROP_MAX: the maximum of qdisc drop reasons, which + * shouldn't be used as a real 'reason' - only for tracing code gen + */ + QDISC_DROP_MAX, +}; + +#undef FN +#undef FNe + +#endif diff --git a/include/net/dropreason.h b/include/net/dropreason.h index 7d3b1a2a6fec..1df60645fb27 100644 --- a/include/net/dropreason.h +++ b/include/net/dropreason.h @@ -23,6 +23,12 @@ enum skb_drop_reason_subsys { */ SKB_DROP_REASON_SUBSYS_OPENVSWITCH, + /** + * @SKB_DROP_REASON_SUBSYS_QDISC: TC qdisc drop reasons, + * see include/net/dropreason-qdisc.h + */ + SKB_DROP_REASON_SUBSYS_QDISC, + /** @SKB_DROP_REASON_SUBSYS_NUM: number of subsystems defined */ SKB_DROP_REASON_SUBSYS_NUM }; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c3a7268b567e..31c25a6d6acc 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -20,12 +20,15 @@ #include #include #include +#include struct Qdisc_ops; struct qdisc_walker; struct tcf_walker; struct module; struct bpf_flow_keys; +struct Qdisc; +struct netdev_queue; struct qdisc_rate_table { struct tc_ratespec rate; @@ -1106,36 +1109,50 @@ static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb) return cb; } +/* TC classifier accessors - use enum skb_drop_reason */ static inline enum skb_drop_reason tcf_get_drop_reason(const struct sk_buff *skb) { - return tc_skb_cb(skb)->drop_reason; + return (enum skb_drop_reason)tc_skb_cb(skb)->drop_reason; } static inline void tcf_set_drop_reason(const struct sk_buff *skb, enum skb_drop_reason reason) +{ + tc_skb_cb(skb)->drop_reason = (enum qdisc_drop_reason)reason; +} + +/* Qdisc accessors - use enum qdisc_drop_reason */ +static inline enum qdisc_drop_reason +tcf_get_qdisc_drop_reason(const struct sk_buff *skb) +{ + return tc_skb_cb(skb)->drop_reason; +} + +static inline void tcf_set_qdisc_drop_reason(const struct sk_buff *skb, + enum qdisc_drop_reason reason) { tc_skb_cb(skb)->drop_reason = reason; } -static inline void tcf_kfree_skb_list(struct sk_buff *skb) -{ - while (unlikely(skb)) { - struct sk_buff *next = skb->next; +void __tcf_kfree_skb_list(struct sk_buff *skb, struct Qdisc *q, + struct netdev_queue *txq, struct net_device *dev); - prefetch(next); - kfree_skb_reason(skb, tcf_get_drop_reason(skb)); - skb = next; - } +static inline void tcf_kfree_skb_list(struct sk_buff *skb, struct Qdisc *q, + struct netdev_queue *txq, + struct net_device *dev) +{ + if (unlikely(skb)) + __tcf_kfree_skb_list(skb, q, txq, dev); } static inline void qdisc_dequeue_drop(struct Qdisc *q, struct sk_buff *skb, - enum skb_drop_reason reason) + enum qdisc_drop_reason reason) { DEBUG_NET_WARN_ON_ONCE(!(q->flags & TCQ_F_DEQUEUE_DROPS)); DEBUG_NET_WARN_ON_ONCE(q->flags & TCQ_F_NOLOCK); - tcf_set_drop_reason(skb, reason); + tcf_set_qdisc_drop_reason(skb, reason); skb->next = q->to_free; q->to_free = skb; } @@ -1312,9 +1329,9 @@ static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch, static inline int qdisc_drop_reason(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free, - enum skb_drop_reason reason) + enum qdisc_drop_reason reason) { - tcf_set_drop_reason(skb, reason); + tcf_set_qdisc_drop_reason(skb, reason); return qdisc_drop(skb, sch, to_free); } diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index ff33f41a9db7..d8a5c2677470 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -74,6 +74,57 @@ TRACE_EVENT(qdisc_enqueue, __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr) ); +#undef FN +#undef FNe +#define FN(reason) TRACE_DEFINE_ENUM(QDISC_DROP_##reason); +#define FNe(reason) TRACE_DEFINE_ENUM(QDISC_DROP_##reason); +DEFINE_QDISC_DROP_REASON(FN, FNe) + +#undef FN +#undef FNe +#define FN(reason) { QDISC_DROP_##reason, #reason }, +#define FNe(reason) { QDISC_DROP_##reason, #reason } + +TRACE_EVENT(qdisc_drop, + + TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, + struct net_device *dev, struct sk_buff *skb, + enum qdisc_drop_reason reason), + + TP_ARGS(qdisc, txq, dev, skb, reason), + + TP_STRUCT__entry( + __field(struct Qdisc *, qdisc) + __field(const struct netdev_queue *, txq) + __field(void *, skbaddr) + __field(int, ifindex) + __field(u32, handle) + __field(u32, parent) + __field(enum qdisc_drop_reason, reason) + __string(kind, qdisc->ops->id) + ), + + TP_fast_assign( + __entry->qdisc = qdisc; + __entry->txq = txq; + __entry->skbaddr = skb; + __entry->ifindex = dev ? dev->ifindex : 0; + __entry->handle = qdisc->handle; + __entry->parent = qdisc->parent; + __entry->reason = reason; + __assign_str(kind); + ), + + TP_printk("drop ifindex=%d kind=%s handle=0x%X parent=0x%X skbaddr=%p reason=%s", + __entry->ifindex, __get_str(kind), __entry->handle, + __entry->parent, __entry->skbaddr, + __print_symbolic(__entry->reason, + DEFINE_QDISC_DROP_REASON(FN, FNe))) +); + +#undef FN +#undef FNe + TRACE_EVENT(qdisc_reset, TP_PROTO(struct Qdisc *q), diff --git a/net/core/dev.c b/net/core/dev.c index c1a9f7fdcffa..1cf3ad840697 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4166,7 +4166,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_calculate_pkt_len(skb, q); - tcf_set_drop_reason(skb, SKB_DROP_REASON_QDISC_DROP); + tcf_set_qdisc_drop_reason(skb, QDISC_DROP_GENERIC); if (q->flags & TCQ_F_NOLOCK) { if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) && @@ -4274,8 +4274,8 @@ unlock: spin_unlock(root_lock); free_skbs: - tcf_kfree_skb_list(to_free); - tcf_kfree_skb_list(to_free2); + tcf_kfree_skb_list(to_free, q, txq, dev); + tcf_kfree_skb_list(to_free2, q, txq, dev); return rc; } @@ -5811,7 +5811,7 @@ static __latent_entropy void net_tx_action(void) to_free = qdisc_run(q); if (root_lock) spin_unlock(root_lock); - tcf_kfree_skb_list(to_free); + tcf_kfree_skb_list(to_free, q, NULL, qdisc_dev(q)); } rcu_read_unlock(); diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index a01f14b1c216..ca00fba7e451 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -497,13 +497,13 @@ static bool cobalt_queue_empty(struct cobalt_vars *vars, /* Call this with a freshly dequeued packet for possible congestion marking. * Returns true as an instruction to drop the packet, false for delivery. */ -static enum skb_drop_reason cobalt_should_drop(struct cobalt_vars *vars, - struct cobalt_params *p, - ktime_t now, - struct sk_buff *skb, - u32 bulk_flows) +static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars, + struct cobalt_params *p, + ktime_t now, + struct sk_buff *skb, + u32 bulk_flows) { - enum skb_drop_reason reason = SKB_NOT_DROPPED_YET; + enum qdisc_drop_reason reason = QDISC_DROP_UNSPEC; bool next_due, over_target; ktime_t schedule; u64 sojourn; @@ -548,7 +548,7 @@ static enum skb_drop_reason cobalt_should_drop(struct cobalt_vars *vars, if (next_due && vars->dropping) { /* Use ECN mark if possible, otherwise drop */ if (!(vars->ecn_marked = INET_ECN_set_ce(skb))) - reason = SKB_DROP_REASON_QDISC_CONGESTED; + reason = QDISC_DROP_CONGESTED; vars->count++; if (!vars->count) @@ -571,14 +571,14 @@ static enum skb_drop_reason cobalt_should_drop(struct cobalt_vars *vars, } /* Simple BLUE implementation. Lack of ECN is deliberate. */ - if (vars->p_drop && reason == SKB_NOT_DROPPED_YET && + if (vars->p_drop && reason == QDISC_DROP_UNSPEC && get_random_u32() < vars->p_drop) - reason = SKB_DROP_REASON_CAKE_FLOOD; + reason = QDISC_DROP_CAKE_FLOOD; /* Overload the drop_next field as an activity timeout */ if (!vars->count) vars->drop_next = ktime_add_ns(now, p->interval); - else if (ktime_to_ns(schedule) > 0 && reason == SKB_NOT_DROPPED_YET) + else if (ktime_to_ns(schedule) > 0 && reason == QDISC_DROP_UNSPEC) vars->drop_next = now; return reason; @@ -1604,7 +1604,7 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) if (q->config->rate_flags & CAKE_FLAG_INGRESS) cake_advance_shaper(q, b, skb, now, true); - qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_OVERLIMIT); + qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_OVERLIMIT); sch->q.qlen--; cake_heapify(q, 0); @@ -2004,7 +2004,7 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch) { struct cake_sched_data *q = qdisc_priv(sch); struct cake_tin_data *b = &q->tins[q->cur_tin]; - enum skb_drop_reason reason; + enum qdisc_drop_reason reason; ktime_t now = ktime_get(); struct cake_flow *flow; struct list_head *head; @@ -2225,7 +2225,7 @@ retry: !!(q->config->rate_flags & CAKE_FLAG_INGRESS))); /* Last packet in queue may be marked, shouldn't be dropped */ - if (reason == SKB_NOT_DROPPED_YET || !flow->head) + if (reason == QDISC_DROP_UNSPEC || !flow->head) break; /* drop this packet, get another one */ diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index c6551578f1cf..dc2be90666ff 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -52,7 +52,7 @@ static void drop_func(struct sk_buff *skb, void *ctx) { struct Qdisc *sch = ctx; - qdisc_dequeue_drop(sch, skb, SKB_DROP_REASON_QDISC_CONGESTED); + qdisc_dequeue_drop(sch, skb, QDISC_DROP_CONGESTED); qdisc_qstats_drop(sch); } @@ -86,8 +86,7 @@ static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, } q = qdisc_priv(sch); q->drop_overlimit++; - return qdisc_drop_reason(skb, sch, to_free, - SKB_DROP_REASON_QDISC_OVERLIMIT); + return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_OVERLIMIT); } static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = { diff --git a/net/sched/sch_dualpi2.c b/net/sched/sch_dualpi2.c index 6d7e6389758d..020cc20c6b56 100644 --- a/net/sched/sch_dualpi2.c +++ b/net/sched/sch_dualpi2.c @@ -393,13 +393,11 @@ static int dualpi2_enqueue_skb(struct sk_buff *skb, struct Qdisc *sch, qdisc_qstats_overlimit(sch); if (skb_in_l_queue(skb)) qdisc_qstats_overlimit(q->l_queue); - return qdisc_drop_reason(skb, sch, to_free, - SKB_DROP_REASON_QDISC_OVERLIMIT); + return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_OVERLIMIT); } if (q->drop_early && must_drop(sch, q, skb)) { - qdisc_drop_reason(skb, sch, to_free, - SKB_DROP_REASON_QDISC_CONGESTED); + qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_CONGESTED); return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; } @@ -593,7 +591,7 @@ static struct sk_buff *dualpi2_qdisc_dequeue(struct Qdisc *sch) while ((skb = dequeue_packet(sch, q, &credit_change, now))) { if (!q->drop_early && must_drop(sch, q, skb)) { drop_and_retry(q, skb, sch, - SKB_DROP_REASON_QDISC_CONGESTED); + SKB_DROP_REASON_QDISC_DROP); continue; } diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 80235e85f844..81322187bbe2 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -539,7 +539,7 @@ static bool fq_packet_beyond_horizon(const struct sk_buff *skb, return unlikely((s64)skb->tstamp > (s64)(now + q->horizon)); } -#define FQDR(reason) SKB_DROP_REASON_FQ_##reason +#define FQDR(reason) QDISC_DROP_FQ_##reason static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) @@ -552,8 +552,7 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, band = fq_prio2band(q->prio2band, skb->priority & TC_PRIO_MAX); if (unlikely(q->band_pkt_count[band] >= sch->limit)) { q->stat_band_drops[band]++; - return qdisc_drop_reason(skb, sch, to_free, - FQDR(BAND_LIMIT)); + return qdisc_drop_reason(skb, sch, to_free, FQDR(BAND_LIMIT)); } now = ktime_get_ns(); @@ -579,7 +578,7 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (unlikely(f->qlen >= q->flow_plimit)) { q->stat_flows_plimit++; return qdisc_drop_reason(skb, sch, to_free, - FQDR(FLOW_LIMIT)); + QDISC_DROP_FQ_FLOW_LIMIT); } if (fq_flow_is_detached(f)) { diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 8181b52dd9a8..2a3d758f67ab 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -168,7 +168,7 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets, skb = dequeue_head(flow); len += qdisc_pkt_len(skb); mem += get_codel_cb(skb)->mem_usage; - tcf_set_drop_reason(skb, SKB_DROP_REASON_QDISC_OVERLIMIT); + tcf_set_qdisc_drop_reason(skb, QDISC_DROP_OVERLIMIT); __qdisc_drop(skb, to_free); } while (++i < max_packets && len < threshold); @@ -275,7 +275,7 @@ static void drop_func(struct sk_buff *skb, void *ctx) { struct Qdisc *sch = ctx; - qdisc_dequeue_drop(sch, skb, SKB_DROP_REASON_QDISC_CONGESTED); + qdisc_dequeue_drop(sch, skb, QDISC_DROP_CONGESTED); qdisc_qstats_drop(sch); } diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index d8ac3519e937..154c70f489f2 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -130,7 +130,7 @@ static inline void flow_queue_add(struct fq_pie_flow *flow, static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { - enum skb_drop_reason reason = SKB_DROP_REASON_QDISC_OVERLIMIT; + enum qdisc_drop_reason reason = QDISC_DROP_OVERLIMIT; struct fq_pie_sched_data *q = qdisc_priv(sch); struct fq_pie_flow *sel_flow; int ret; @@ -162,7 +162,7 @@ static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, q->overmemory++; } - reason = SKB_DROP_REASON_QDISC_CONGESTED; + reason = QDISC_DROP_CONGESTED; if (!pie_drop_early(sch, &q->p_params, &sel_flow->vars, sel_flow->backlog, skb->len)) { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 98ffe64de51f..556e0d800316 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -25,11 +25,11 @@ #include #include #include +#include #include #include #include #include -#include #include #include @@ -37,6 +37,31 @@ const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops; EXPORT_SYMBOL(default_qdisc_ops); +void __tcf_kfree_skb_list(struct sk_buff *skb, struct Qdisc *q, + struct netdev_queue *txq, struct net_device *dev) +{ + while (skb) { + u32 reason = tc_skb_cb(skb)->drop_reason; + struct sk_buff *next = skb->next; + enum skb_drop_reason skb_reason; + + prefetch(next); + /* TC classifier and qdisc share drop_reason storage. + * Check subsystem mask to identify qdisc drop reasons, + * else pass through skb_drop_reason set by TC classifier. + */ + if ((reason & SKB_DROP_REASON_SUBSYS_MASK) == __QDISC_DROP_REASON) { + trace_qdisc_drop(q, txq, dev, skb, (enum qdisc_drop_reason)reason); + skb_reason = SKB_DROP_REASON_QDISC_DROP; + } else { + skb_reason = (enum skb_drop_reason)reason; + } + kfree_skb_reason(skb, skb_reason); + skb = next; + } +} +EXPORT_SYMBOL(__tcf_kfree_skb_list); + static void qdisc_maybe_clear_missed(struct Qdisc *q, const struct netdev_queue *txq) { @@ -741,7 +766,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, err = skb_array_produce(q, skb); if (unlikely(err)) { - tcf_set_drop_reason(skb, SKB_DROP_REASON_QDISC_OVERLIMIT); + tcf_set_qdisc_drop_reason(skb, QDISC_DROP_OVERLIMIT); if (qdisc_is_percpu_stats(qdisc)) return qdisc_drop_cpu(skb, qdisc, to_free); diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 6706faba95b9..36d0cafac206 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -251,10 +251,10 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch, q->stats.pdrop++; drop: - return qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_OVERLIMIT); + return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_OVERLIMIT); congestion_drop: - qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_CONGESTED); + qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_CONGESTED); return NET_XMIT_CN; } diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 0a377313b6a9..16f3f629cb8e 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -85,7 +85,7 @@ EXPORT_SYMBOL_GPL(pie_drop_early); static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { - enum skb_drop_reason reason = SKB_DROP_REASON_QDISC_OVERLIMIT; + enum qdisc_drop_reason reason = QDISC_DROP_OVERLIMIT; struct pie_sched_data *q = qdisc_priv(sch); bool enqueue = false; @@ -94,7 +94,7 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, goto out; } - reason = SKB_DROP_REASON_QDISC_CONGESTED; + reason = QDISC_DROP_CONGESTED; if (!pie_drop_early(sch, &q->params, &q->vars, sch->qstats.backlog, skb->len)) { diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 479c42d11083..c8d3d09f15e3 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -70,7 +70,7 @@ static int red_use_nodrop(struct red_sched_data *q) static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { - enum skb_drop_reason reason = SKB_DROP_REASON_QDISC_CONGESTED; + enum qdisc_drop_reason reason = QDISC_DROP_CONGESTED; struct red_sched_data *q = qdisc_priv(sch); struct Qdisc *child = q->qdisc; unsigned int len; @@ -108,7 +108,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, break; case RED_HARD_MARK: - reason = SKB_DROP_REASON_QDISC_OVERLIMIT; + reason = QDISC_DROP_OVERLIMIT; qdisc_qstats_overlimit(sch); if (red_use_harddrop(q) || !red_use_ecn(q)) { q->stats.forced_drop++; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index d2835f1168e1..013738662128 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -280,7 +280,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { - enum skb_drop_reason reason = SKB_DROP_REASON_QDISC_OVERLIMIT; + enum qdisc_drop_reason reason = QDISC_DROP_OVERLIMIT; struct sfb_sched_data *q = qdisc_priv(sch); unsigned int len = qdisc_pkt_len(skb); struct Qdisc *child = q->qdisc; @@ -381,7 +381,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, } r = get_random_u16() & SFB_MAX_PROB; - reason = SKB_DROP_REASON_QDISC_CONGESTED; + reason = QDISC_DROP_CONGESTED; if (unlikely(r < p_min)) { if (unlikely(p_min > SFB_MAX_PROB / 2)) { From 3e28f8ad478f165260deba751858afac46cffd2f Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 26 Feb 2026 14:44:19 +0100 Subject: [PATCH 0119/1561] net: sched: sfq: convert to qdisc drop reasons MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert SFQ to use the new qdisc-specific drop reason infrastructure. This patch demonstrates how to convert a flow-based qdisc to use the new enum qdisc_drop_reason. As part of this conversion: - Add QDISC_DROP_MAXFLOWS for flow table exhaustion - Rename FQ_FLOW_LIMIT to generic FLOW_LIMIT, now shared by FQ and SFQ - Use QDISC_DROP_OVERLIMIT for sfq_drop() when overall limit exceeded - Use QDISC_DROP_FLOW_LIMIT for per-flow depth limit exceeded The FLOW_LIMIT reason is now a common drop reason for per-flow limits, applicable to both FQ and SFQ qdiscs. Signed-off-by: Jesper Dangaard Brouer Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/177211345946.3011628.12770616071857185664.stgit@firesoul Signed-off-by: Jakub Kicinski --- include/net/dropreason-qdisc.h | 18 ++++++++++++++---- net/sched/sch_fq.c | 2 +- net/sched/sch_sfq.c | 8 ++++---- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/include/net/dropreason-qdisc.h b/include/net/dropreason-qdisc.h index 80a2d557e5f7..02a9f580411b 100644 --- a/include/net/dropreason-qdisc.h +++ b/include/net/dropreason-qdisc.h @@ -9,10 +9,11 @@ FN(GENERIC) \ FN(OVERLIMIT) \ FN(CONGESTED) \ + FN(MAXFLOWS) \ FN(CAKE_FLOOD) \ FN(FQ_BAND_LIMIT) \ FN(FQ_HORIZON_LIMIT) \ - FN(FQ_FLOW_LIMIT) \ + FN(FLOW_LIMIT) \ FNe(MAX) #undef FN @@ -59,6 +60,13 @@ enum qdisc_drop_reason { * congestion to the sender and prevent bufferbloat. */ QDISC_DROP_CONGESTED, + /** + * @QDISC_DROP_MAXFLOWS: packet dropped because the qdisc's flow + * tracking table is full and no free slots are available to allocate + * for a new flow. This indicates flow table exhaustion in flow-based + * qdiscs that maintain per-flow state (e.g., SFQ). + */ + QDISC_DROP_MAXFLOWS, /** * @QDISC_DROP_CAKE_FLOOD: CAKE qdisc dropped packet due to flood * protection mechanism (BLUE algorithm). This indicates potential @@ -77,10 +85,12 @@ enum qdisc_drop_reason { */ QDISC_DROP_FQ_HORIZON_LIMIT, /** - * @QDISC_DROP_FQ_FLOW_LIMIT: FQ dropped packet because an individual - * flow exceeded its per-flow packet limit. + * @QDISC_DROP_FLOW_LIMIT: packet dropped because an individual flow + * exceeded its per-flow packet/depth limit. Used by FQ and SFQ qdiscs + * to enforce per-flow fairness and prevent a single flow from + * monopolizing queue resources. */ - QDISC_DROP_FQ_FLOW_LIMIT, + QDISC_DROP_FLOW_LIMIT, /** * @QDISC_DROP_MAX: the maximum of qdisc drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gen diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 81322187bbe2..eb5ae2b15cc0 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -578,7 +578,7 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (unlikely(f->qlen >= q->flow_plimit)) { q->stat_flows_plimit++; return qdisc_drop_reason(skb, sch, to_free, - QDISC_DROP_FQ_FLOW_LIMIT); + QDISC_DROP_FLOW_LIMIT); } if (fq_flow_is_detached(f)) { diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 503d7d3ca081..c3f3181dba54 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -302,7 +302,7 @@ drop: sfq_dec(q, x); sch->q.qlen--; qdisc_qstats_backlog_dec(sch, skb); - qdisc_drop(skb, sch, to_free); + qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_OVERLIMIT); return len; } @@ -363,7 +363,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) if (x == SFQ_EMPTY_SLOT) { x = q->dep[0].next; /* get a free slot */ if (x >= SFQ_MAX_FLOWS) - return qdisc_drop(skb, sch, to_free); + return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_MAXFLOWS); q->ht[hash] = x; slot = &q->slots[x]; slot->hash = hash; @@ -420,14 +420,14 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) if (slot->qlen >= q->maxdepth) { congestion_drop: if (!sfq_headdrop(q)) - return qdisc_drop(skb, sch, to_free); + return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_FLOW_LIMIT); /* We know we have at least one packet in queue */ head = slot_dequeue_head(slot); delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb); sch->qstats.backlog -= delta; slot->backlog -= delta; - qdisc_drop(head, sch, to_free); + qdisc_drop_reason(head, sch, to_free, QDISC_DROP_FLOW_LIMIT); slot_queue_add(slot, skb); qdisc_tree_reduce_backlog(sch, 0, delta); From f30d9073ec1909a3b06a9cee57215bed3458da80 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 26 Feb 2026 14:44:29 +0100 Subject: [PATCH 0120/1561] net: sched: rename QDISC_DROP_FQ_* to generic names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename FQ-specific drop reasons to generic names: - QDISC_DROP_FQ_BAND_LIMIT -> QDISC_DROP_BAND_LIMIT - QDISC_DROP_FQ_HORIZON_LIMIT -> QDISC_DROP_HORIZON_LIMIT This follows the principle that drop reasons should describe the drop mechanism rather than being tied to a specific qdisc implementation. These concepts (priority band limits, timestamp horizon) could apply to other qdiscs as well. Remove the local macro define FQDR() and instead use the full QDISC_DROP_* name to make it easier to navigate code. Signed-off-by: Jesper Dangaard Brouer Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/177211346902.3011628.12523261489552097455.stgit@firesoul Signed-off-by: Jakub Kicinski --- include/net/dropreason-qdisc.h | 19 ++++++++++--------- net/sched/sch_fq.c | 7 ++----- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/include/net/dropreason-qdisc.h b/include/net/dropreason-qdisc.h index 02a9f580411b..a167302e79e5 100644 --- a/include/net/dropreason-qdisc.h +++ b/include/net/dropreason-qdisc.h @@ -11,8 +11,8 @@ FN(CONGESTED) \ FN(MAXFLOWS) \ FN(CAKE_FLOOD) \ - FN(FQ_BAND_LIMIT) \ - FN(FQ_HORIZON_LIMIT) \ + FN(BAND_LIMIT) \ + FN(HORIZON_LIMIT) \ FN(FLOW_LIMIT) \ FNe(MAX) @@ -74,16 +74,17 @@ enum qdisc_drop_reason { */ QDISC_DROP_CAKE_FLOOD, /** - * @QDISC_DROP_FQ_BAND_LIMIT: FQ (Fair Queue) dropped packet because - * the priority band's packet limit was reached. Each priority band - * in FQ has its own limit. + * @QDISC_DROP_BAND_LIMIT: packet dropped because the priority band's + * limit was reached. Used by qdiscs with priority bands that have + * per-band packet limits (e.g., FQ). */ - QDISC_DROP_FQ_BAND_LIMIT, + QDISC_DROP_BAND_LIMIT, /** - * @QDISC_DROP_FQ_HORIZON_LIMIT: FQ dropped packet because its - * timestamp is too far in the future (beyond the configured horizon). + * @QDISC_DROP_HORIZON_LIMIT: packet dropped because its timestamp + * is too far in the future (beyond the configured horizon). + * Used by qdiscs with time-based scheduling (e.g., FQ). */ - QDISC_DROP_FQ_HORIZON_LIMIT, + QDISC_DROP_HORIZON_LIMIT, /** * @QDISC_DROP_FLOW_LIMIT: packet dropped because an individual flow * exceeded its per-flow packet/depth limit. Used by FQ and SFQ qdiscs diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index eb5ae2b15cc0..9a550f832d78 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -539,8 +539,6 @@ static bool fq_packet_beyond_horizon(const struct sk_buff *skb, return unlikely((s64)skb->tstamp > (s64)(now + q->horizon)); } -#define FQDR(reason) QDISC_DROP_FQ_##reason - static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -552,7 +550,7 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, band = fq_prio2band(q->prio2band, skb->priority & TC_PRIO_MAX); if (unlikely(q->band_pkt_count[band] >= sch->limit)) { q->stat_band_drops[band]++; - return qdisc_drop_reason(skb, sch, to_free, FQDR(BAND_LIMIT)); + return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_BAND_LIMIT); } now = ktime_get_ns(); @@ -564,7 +562,7 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (q->horizon_drop) { q->stat_horizon_drops++; return qdisc_drop_reason(skb, sch, to_free, - FQDR(HORIZON_LIMIT)); + QDISC_DROP_HORIZON_LIMIT); } q->stat_horizon_caps++; skb->tstamp = now + q->horizon; @@ -603,7 +601,6 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, return NET_XMIT_SUCCESS; } -#undef FQDR static void fq_check_throttled(struct fq_sched_data *q, u64 now) { From 9d3e7f9718987338d9cfbd64292aab6a739d9d32 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 26 Feb 2026 14:44:35 +0100 Subject: [PATCH 0121/1561] net: sched: rename QDISC_DROP_CAKE_FLOOD to QDISC_DROP_FLOOD_PROTECTION MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename QDISC_DROP_CAKE_FLOOD to QDISC_DROP_FLOOD_PROTECTION to use a generic name without embedding the qdisc name. This follows the principle that drop reasons should describe the drop mechanism rather than being tied to a specific qdisc implementation. The flood protection drop reason is used by qdiscs implementing probabilistic drop algorithms (like BLUE) that detect unresponsive flows indicating potential DoS or flood attacks. CAKE uses this via its Cobalt AQM component. Signed-off-by: Jesper Dangaard Brouer Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/177211347537.3011628.13759059534638729639.stgit@firesoul Signed-off-by: Jakub Kicinski --- include/net/dropreason-qdisc.h | 11 ++++++----- net/sched/sch_cake.c | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/net/dropreason-qdisc.h b/include/net/dropreason-qdisc.h index a167302e79e5..84f19a51382c 100644 --- a/include/net/dropreason-qdisc.h +++ b/include/net/dropreason-qdisc.h @@ -10,7 +10,7 @@ FN(OVERLIMIT) \ FN(CONGESTED) \ FN(MAXFLOWS) \ - FN(CAKE_FLOOD) \ + FN(FLOOD_PROTECTION) \ FN(BAND_LIMIT) \ FN(HORIZON_LIMIT) \ FN(FLOW_LIMIT) \ @@ -68,11 +68,12 @@ enum qdisc_drop_reason { */ QDISC_DROP_MAXFLOWS, /** - * @QDISC_DROP_CAKE_FLOOD: CAKE qdisc dropped packet due to flood - * protection mechanism (BLUE algorithm). This indicates potential - * DoS/flood attack or unresponsive flow behavior. + * @QDISC_DROP_FLOOD_PROTECTION: packet dropped by flood protection + * mechanism detecting unresponsive flows (potential DoS/flood). + * Used by qdiscs implementing probabilistic drop algorithms like + * BLUE (e.g., CAKE's Cobalt AQM). */ - QDISC_DROP_CAKE_FLOOD, + QDISC_DROP_FLOOD_PROTECTION, /** * @QDISC_DROP_BAND_LIMIT: packet dropped because the priority band's * limit was reached. Used by qdiscs with priority bands that have diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index ca00fba7e451..a64d27476231 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -573,7 +573,7 @@ static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars, /* Simple BLUE implementation. Lack of ECN is deliberate. */ if (vars->p_drop && reason == QDISC_DROP_UNSPEC && get_random_u32() < vars->p_drop) - reason = QDISC_DROP_CAKE_FLOOD; + reason = QDISC_DROP_FLOOD_PROTECTION; /* Overload the drop_next field as an activity timeout */ if (!vars->count) From 67713dff6398315461db56fdf208e7fd7e37078e Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 26 Feb 2026 14:45:19 +0100 Subject: [PATCH 0122/1561] net: sched: sch_dualpi2: use qdisc_dequeue_drop() for dequeue drops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DualPI2 drops packets during dequeue but was using kfree_skb_reason() directly, bypassing trace_qdisc_drop. Convert to qdisc_dequeue_drop() and add QDISC_DROP_L4S_STEP_NON_ECN to the qdisc drop reason enum. - Set TCQ_F_DEQUEUE_DROPS flag in dualpi2_init() - Use enum qdisc_drop_reason in drop_and_retry() - Replace kfree_skb_reason() with qdisc_dequeue_drop() Signed-off-by: Jesper Dangaard Brouer Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/177211351978.3011628.11267023360997620069.stgit@firesoul Signed-off-by: Jakub Kicinski --- include/net/dropreason-core.h | 6 ------ include/net/dropreason-qdisc.h | 8 ++++++++ net/sched/sch_dualpi2.c | 12 ++++++------ 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 3d8d284e05c8..5c8c2eb3d2c5 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -121,7 +121,6 @@ FN(CANFD_RX_INVALID_FRAME) \ FN(CANXL_RX_INVALID_FRAME) \ FN(PFMEMALLOC) \ - FN(DUALPI2_STEP_DROP) \ FN(PSP_INPUT) \ FN(PSP_OUTPUT) \ FNe(MAX) @@ -579,11 +578,6 @@ enum skb_drop_reason { * reached a path or socket not eligible for use of memory reserves */ SKB_DROP_REASON_PFMEMALLOC, - /** - * @SKB_DROP_REASON_DUALPI2_STEP_DROP: dropped by the step drop - * threshold of DualPI2 qdisc. - */ - SKB_DROP_REASON_DUALPI2_STEP_DROP, /** @SKB_DROP_REASON_PSP_INPUT: PSP input checks failed */ SKB_DROP_REASON_PSP_INPUT, /** @SKB_DROP_REASON_PSP_OUTPUT: PSP output checks failed */ diff --git a/include/net/dropreason-qdisc.h b/include/net/dropreason-qdisc.h index 84f19a51382c..fb151cd31751 100644 --- a/include/net/dropreason-qdisc.h +++ b/include/net/dropreason-qdisc.h @@ -14,6 +14,7 @@ FN(BAND_LIMIT) \ FN(HORIZON_LIMIT) \ FN(FLOW_LIMIT) \ + FN(L4S_STEP_NON_ECN) \ FNe(MAX) #undef FN @@ -93,6 +94,13 @@ enum qdisc_drop_reason { * monopolizing queue resources. */ QDISC_DROP_FLOW_LIMIT, + /** + * @QDISC_DROP_L4S_STEP_NON_ECN: DualPI2 qdisc dropped a non-ECN-capable + * packet because the L4S queue delay exceeded the step threshold. + * Since the packet cannot be ECN-marked, it must be dropped to signal + * congestion. See RFC 9332 for the DualQ Coupled AQM step mechanism. + */ + QDISC_DROP_L4S_STEP_NON_ECN, /** * @QDISC_DROP_MAX: the maximum of qdisc drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gen diff --git a/net/sched/sch_dualpi2.c b/net/sched/sch_dualpi2.c index 020cc20c6b56..fe6f5e889625 100644 --- a/net/sched/sch_dualpi2.c +++ b/net/sched/sch_dualpi2.c @@ -571,11 +571,11 @@ static int do_step_aqm(struct dualpi2_sched_data *q, struct sk_buff *skb, } static void drop_and_retry(struct dualpi2_sched_data *q, struct sk_buff *skb, - struct Qdisc *sch, enum skb_drop_reason reason) + struct Qdisc *sch, enum qdisc_drop_reason reason) { ++q->deferred_drops_cnt; q->deferred_drops_len += qdisc_pkt_len(skb); - kfree_skb_reason(skb, reason); + qdisc_dequeue_drop(sch, skb, reason); qdisc_qstats_drop(sch); } @@ -590,15 +590,13 @@ static struct sk_buff *dualpi2_qdisc_dequeue(struct Qdisc *sch) while ((skb = dequeue_packet(sch, q, &credit_change, now))) { if (!q->drop_early && must_drop(sch, q, skb)) { - drop_and_retry(q, skb, sch, - SKB_DROP_REASON_QDISC_DROP); + drop_and_retry(q, skb, sch, QDISC_DROP_CONGESTED); continue; } if (skb_in_l_queue(skb) && do_step_aqm(q, skb, now)) { qdisc_qstats_drop(q->l_queue); - drop_and_retry(q, skb, sch, - SKB_DROP_REASON_DUALPI2_STEP_DROP); + drop_and_retry(q, skb, sch, QDISC_DROP_L4S_STEP_NON_ECN); continue; } @@ -915,6 +913,8 @@ static int dualpi2_init(struct Qdisc *sch, struct nlattr *opt, struct dualpi2_sched_data *q = qdisc_priv(sch); int err; + sch->flags |= TCQ_F_DEQUEUE_DROPS; + q->l_queue = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, 1), extack); if (!q->l_queue) From 990a73dec3fdc145fef6c827c29205437d533ece Mon Sep 17 00:00:00 2001 From: Zilin Guan Date: Mon, 19 Jan 2026 09:26:25 +0000 Subject: [PATCH 0123/1561] wifi: mwifiex: Fix memory leak in mwifiex_11n_aggregate_pkt() In mwifiex_11n_aggregate_pkt(), skb_aggr is allocated via mwifiex_alloc_dma_align_buf(). If mwifiex_is_ralist_valid() returns false, the function currently returns -1 immediately without freeing the previously allocated skb_aggr, causing a memory leak. Since skb_aggr has not yet been queued via skb_queue_tail(), no other references to this memory exist. Therefore, it has to be freed locally before returning the error. Fix this by calling mwifiex_write_data_complete() to free skb_aggr before returning the error status. Compile tested only. Issue found using a prototype static analysis tool and code review. Fixes: 5e6e3a92b9a4 ("wireless: mwifiex: initial commit for Marvell mwifiex driver") Signed-off-by: Zilin Guan Reviewed-by: Jeff Chen Link: https://patch.msgid.link/20260119092625.1349934-1-zilin@seu.edu.cn Signed-off-by: Johannes Berg --- drivers/net/wireless/marvell/mwifiex/11n_aggr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/marvell/mwifiex/11n_aggr.c b/drivers/net/wireless/marvell/mwifiex/11n_aggr.c index 34b4b34276d6..042b1fe5f0d6 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_aggr.c +++ b/drivers/net/wireless/marvell/mwifiex/11n_aggr.c @@ -203,6 +203,7 @@ mwifiex_11n_aggregate_pkt(struct mwifiex_private *priv, if (!mwifiex_is_ralist_valid(priv, pra_list, ptrindex)) { spin_unlock_bh(&priv->wmm.ra_list_spinlock); + mwifiex_write_data_complete(adapter, skb_aggr, 1, -1); return -1; } From 92fecd2744552702285fe4e31ab9cb31a59e7391 Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Fri, 6 Feb 2026 18:15:48 +0100 Subject: [PATCH 0124/1561] wifi: cfg80211: fix background CAC Fix: - Send CAC_ABORT event when background CAC is canceled - Cancel CAC done workqueue when radar is detected - Release background wdev ownership when CAC is aborted or passed - Clean lower layer background radar state when CAC is aborted or passed - Prevent sending abort event when radar event is sent Signed-off-by: Janusz Dziedzic Link: https://patch.msgid.link/20260206171830.553879-2-janusz.dziedzic@gmail.com Signed-off-by: Johannes Berg --- net/wireless/mlme.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 3fc175f9f868..212178d04efa 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -1115,8 +1115,10 @@ void __cfg80211_radar_event(struct wiphy *wiphy, */ cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_UNAVAILABLE); - if (offchan) + if (offchan) { + cancel_delayed_work(&rdev->background_cac_done_wk); queue_work(cfg80211_wq, &rdev->background_cac_abort_wk); + } cfg80211_sched_dfs_chan_update(rdev); @@ -1187,21 +1189,16 @@ __cfg80211_background_cac_event(struct cfg80211_registered_device *rdev, if (!cfg80211_chandef_valid(chandef)) return; - if (!rdev->background_radar_wdev) - return; - switch (event) { case NL80211_RADAR_CAC_FINISHED: cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_AVAILABLE); memcpy(&rdev->cac_done_chandef, chandef, sizeof(*chandef)); queue_work(cfg80211_wq, &rdev->propagate_cac_done_wk); cfg80211_sched_dfs_chan_update(rdev); - wdev = rdev->background_radar_wdev; break; case NL80211_RADAR_CAC_ABORTED: if (!cancel_delayed_work(&rdev->background_cac_done_wk)) return; - wdev = rdev->background_radar_wdev; break; case NL80211_RADAR_CAC_STARTED: break; @@ -1213,17 +1210,6 @@ __cfg80211_background_cac_event(struct cfg80211_registered_device *rdev, nl80211_radar_notify(rdev, chandef, event, netdev, GFP_KERNEL); } -static void -cfg80211_background_cac_event(struct cfg80211_registered_device *rdev, - const struct cfg80211_chan_def *chandef, - enum nl80211_radar_event event) -{ - guard(wiphy)(&rdev->wiphy); - - __cfg80211_background_cac_event(rdev, rdev->background_radar_wdev, - chandef, event); -} - void cfg80211_background_cac_done_wk(struct work_struct *work) { struct delayed_work *delayed_work = to_delayed_work(work); @@ -1231,18 +1217,30 @@ void cfg80211_background_cac_done_wk(struct work_struct *work) rdev = container_of(delayed_work, struct cfg80211_registered_device, background_cac_done_wk); - cfg80211_background_cac_event(rdev, &rdev->background_radar_chandef, - NL80211_RADAR_CAC_FINISHED); + + guard(wiphy)(&rdev->wiphy); + + rdev_set_radar_background(rdev, NULL); + rdev->background_radar_wdev = NULL; + + __cfg80211_background_cac_event(rdev, rdev->background_radar_wdev, + &rdev->background_radar_chandef, + NL80211_RADAR_CAC_FINISHED); } void cfg80211_background_cac_abort_wk(struct work_struct *work) { struct cfg80211_registered_device *rdev; + struct wireless_dev *wdev; rdev = container_of(work, struct cfg80211_registered_device, background_cac_abort_wk); - cfg80211_background_cac_event(rdev, &rdev->background_radar_chandef, - NL80211_RADAR_CAC_ABORTED); + + guard(wiphy)(&rdev->wiphy); + + wdev = rdev->background_radar_wdev; + if (wdev) + cfg80211_stop_background_radar_detection(wdev); } void cfg80211_background_cac_abort(struct wiphy *wiphy) From d69cb039ab1930706428566caf5a714d0cb3ed3d Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Fri, 6 Feb 2026 18:15:49 +0100 Subject: [PATCH 0125/1561] wifi: cfg80211: set and report chandef CAC ongoing Allow to track and check CAC state from user mode by simple check phy channels eg. using iw phy1 channels command. This is done for regular CAC and background CAC. It is important for background CAC while we can start it from any app (eg. iw or hostapd). Signed-off-by: Janusz Dziedzic Link: https://patch.msgid.link/20260206171830.553879-3-janusz.dziedzic@gmail.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 +++ include/uapi/linux/nl80211.h | 6 ++++++ net/wireless/chan.c | 27 +++++++++++++++++++++++++++ net/wireless/core.h | 4 ++++ net/wireless/mlme.c | 7 +++++++ net/wireless/nl80211.c | 7 +++++++ 6 files changed, 54 insertions(+) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fc01de19c798..e00045c150e7 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -190,6 +190,8 @@ enum ieee80211_channel_flags { * on this channel. * @dfs_state_entered: timestamp (jiffies) when the dfs state was entered. * @dfs_cac_ms: DFS CAC time in milliseconds, this is valid for DFS channels. + * @cac_start_time: timestamp (CLOCK_BOOTTIME, nanoseconds) when CAC was + * started on this channel. Zero when CAC is not in progress. * @psd: power spectral density (in dBm) */ struct ieee80211_channel { @@ -207,6 +209,7 @@ struct ieee80211_channel { enum nl80211_dfs_state dfs_state; unsigned long dfs_state_entered; unsigned int dfs_cac_ms; + u64 cac_start_time; s8 psd; }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index b63f71850906..c75aa039f096 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4480,6 +4480,10 @@ enum nl80211_wmm_rule { * as a non-primary subchannel. Only applicable to S1G channels. * @NL80211_FREQUENCY_ATTR_NO_UHR: UHR operation is not allowed on this channel * in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_CAC_START_TIME: Channel Availability Check (CAC) + * start time (CLOCK_BOOTTIME, nanoseconds). Only present when CAC is + * currently in progress on this channel. + * @NL80211_FREQUENCY_ATTR_PAD: attribute used for padding for 64-bit alignment * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -4530,6 +4534,8 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_NO_16MHZ, NL80211_FREQUENCY_ATTR_S1G_NO_PRIMARY, NL80211_FREQUENCY_ATTR_NO_UHR, + NL80211_FREQUENCY_ATTR_CAC_START_TIME, + NL80211_FREQUENCY_ATTR_PAD, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 68221b1ab45e..dfe319565280 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -642,6 +642,33 @@ void cfg80211_set_dfs_state(struct wiphy *wiphy, } } +void cfg80211_set_cac_state(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + bool cac_ongoing) +{ + struct ieee80211_channel *c; + int width; + u64 cac_time; + + if (WARN_ON(!cfg80211_chandef_valid(chandef))) + return; + + width = cfg80211_chandef_get_width(chandef); + if (width < 0) + return; + + /* Get the same timestamp for all subchannels */ + cac_time = cac_ongoing ? ktime_get_boottime_ns() : 0; + + for_each_subchan(chandef, freq, cf) { + c = ieee80211_get_channel_khz(wiphy, freq); + if (!c) + continue; + + c->cac_start_time = cac_time; + } +} + static bool cfg80211_dfs_permissive_check_wdev(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, diff --git a/net/wireless/core.h b/net/wireless/core.h index 6ac57b7b2615..6cace846d7a3 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -481,6 +481,10 @@ void cfg80211_set_dfs_state(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, enum nl80211_dfs_state dfs_state); +void cfg80211_set_cac_state(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + bool cac_ongoing); + void cfg80211_dfs_channels_update_work(struct work_struct *work); void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 212178d04efa..283ea4c7c61e 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -1162,9 +1162,11 @@ void cfg80211_cac_event(struct net_device *netdev, fallthrough; case NL80211_RADAR_CAC_ABORTED: wdev->links[link_id].cac_started = false; + cfg80211_set_cac_state(wiphy, chandef, false); break; case NL80211_RADAR_CAC_STARTED: wdev->links[link_id].cac_started = true; + cfg80211_set_cac_state(wiphy, chandef, true); break; default: WARN_ON(1); @@ -1192,15 +1194,18 @@ __cfg80211_background_cac_event(struct cfg80211_registered_device *rdev, switch (event) { case NL80211_RADAR_CAC_FINISHED: cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_AVAILABLE); + cfg80211_set_cac_state(wiphy, chandef, false); memcpy(&rdev->cac_done_chandef, chandef, sizeof(*chandef)); queue_work(cfg80211_wq, &rdev->propagate_cac_done_wk); cfg80211_sched_dfs_chan_update(rdev); break; case NL80211_RADAR_CAC_ABORTED: + cfg80211_set_cac_state(wiphy, chandef, false); if (!cancel_delayed_work(&rdev->background_cac_done_wk)) return; break; case NL80211_RADAR_CAC_STARTED: + cfg80211_set_cac_state(wiphy, chandef, true); break; default: return; @@ -1307,6 +1312,8 @@ void cfg80211_stop_radar_detection(struct wireless_dev *wdev) chandef = *wdev_chandef(wdev, link_id); rdev_end_cac(rdev, wdev->netdev, link_id); + wdev->links[link_id].cac_started = false; + cfg80211_set_cac_state(wiphy, &chandef, false); nl80211_radar_notify(rdev, &chandef, NL80211_RADAR_CAC_ABORTED, wdev->netdev, GFP_KERNEL); } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b94231c8441c..7e288d3ce5ae 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1333,6 +1333,12 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy, if ((chan->flags & IEEE80211_CHAN_NO_UHR) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_UHR)) goto nla_put_failure; + if (chan->cac_start_time && + nla_put_u64_64bit(msg, + NL80211_FREQUENCY_ATTR_CAC_START_TIME, + chan->cac_start_time, + NL80211_FREQUENCY_ATTR_PAD)) + goto nla_put_failure; } if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, @@ -11353,6 +11359,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, wdev->links[link_id].cac_started = true; wdev->links[link_id].cac_start_time = jiffies; wdev->links[link_id].cac_time_ms = cac_time_ms; + cfg80211_set_cac_state(wiphy, &chandef, true); return 0; } From 68b908b3c83b29442cc80266fc2146ca01d15dd1 Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Fri, 6 Feb 2026 18:15:50 +0100 Subject: [PATCH 0126/1561] wifi: cfg80211: events, report background radar In case we report radar event add also information this is connected with background one, so user mode application like hostapd, could check it and behave correctly. Signed-off-by: Janusz Dziedzic Link: https://patch.msgid.link/20260206171830.553879-4-janusz.dziedzic@gmail.com Signed-off-by: Johannes Berg --- net/wireless/mlme.c | 6 ++++-- net/wireless/nl80211.c | 7 +++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 283ea4c7c61e..5cd86253a62e 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -1226,11 +1226,12 @@ void cfg80211_background_cac_done_wk(struct work_struct *work) guard(wiphy)(&rdev->wiphy); rdev_set_radar_background(rdev, NULL); - rdev->background_radar_wdev = NULL; __cfg80211_background_cac_event(rdev, rdev->background_radar_wdev, &rdev->background_radar_chandef, NL80211_RADAR_CAC_FINISHED); + + rdev->background_radar_wdev = NULL; } void cfg80211_background_cac_abort_wk(struct work_struct *work) @@ -1330,11 +1331,12 @@ void cfg80211_stop_background_radar_detection(struct wireless_dev *wdev) return; rdev_set_radar_background(rdev, NULL); - rdev->background_radar_wdev = NULL; /* Release offchain ownership */ __cfg80211_background_cac_event(rdev, wdev, &rdev->background_radar_chandef, NL80211_RADAR_CAC_ABORTED); + + rdev->background_radar_wdev = NULL; } int cfg80211_assoc_ml_reconf(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7e288d3ce5ae..b619f99c221e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -21229,6 +21229,13 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev, goto nla_put_failure; } + if (rdev->background_radar_wdev && + cfg80211_chandef_identical(&rdev->background_radar_chandef, + chandef)) { + if (nla_put_flag(msg, NL80211_ATTR_RADAR_BACKGROUND)) + goto nla_put_failure; + } + if (nla_put_u32(msg, NL80211_ATTR_RADAR_EVENT, event)) goto nla_put_failure; From 668b233b7a3e50815ff79798236780bedf8b2aae Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Fri, 6 Feb 2026 18:15:51 +0100 Subject: [PATCH 0127/1561] wifi: mac80211_hwsim: background CAC support Report background CAC support and add allow to cancel background CAC and simulate radar. echo cancel > /sys/kernel/debug/ieee80211/phy2/hwsim/dfs_background_cac echo radar > /sys/kernel/debug/ieee80211/phy2/hwsim/dfs_background_cac Signed-off-by: Janusz Dziedzic Link: https://patch.msgid.link/20260206171830.553879-5-janusz.dziedzic@gmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/virtual/mac80211_hwsim.c | 77 +++++++++++++++++++ drivers/net/wireless/virtual/mac80211_hwsim.h | 2 + 2 files changed, 79 insertions(+) diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index e89173f91637..d2a43eec4641 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -715,6 +715,7 @@ struct mac80211_hwsim_data { } ps; bool ps_poll_pending; struct dentry *debugfs; + struct cfg80211_chan_def radar_background_chandef; atomic_t pending_cookie; struct sk_buff_head pending; /* packets pending */ @@ -936,6 +937,7 @@ static const struct nla_policy hwsim_genl_policy[HWSIM_ATTR_MAX + 1] = { [HWSIM_ATTR_PMSR_RESULT] = NLA_POLICY_NESTED(hwsim_pmsr_peers_result_policy), [HWSIM_ATTR_MULTI_RADIO] = { .type = NLA_FLAG }, [HWSIM_ATTR_SUPPORT_NAN_DEVICE] = { .type = NLA_FLAG }, + [HWSIM_ATTR_SUPPORT_BACKGROUND_RADAR] = { .type = NLA_FLAG }, }; #if IS_REACHABLE(CONFIG_VIRTIO) @@ -1164,6 +1166,41 @@ static int hwsim_write_simulate_radar(void *dat, u64 val) DEFINE_DEBUGFS_ATTRIBUTE(hwsim_simulate_radar, NULL, hwsim_write_simulate_radar, "%llu\n"); +static ssize_t hwsim_background_cac_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct mac80211_hwsim_data *data = file->private_data; + char buf[8] = {}; + + if (count >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(buf, user_buf, count)) + return -EFAULT; + + /* Check if background radar channel is configured */ + if (!data->radar_background_chandef.chan) + return -ENOENT; + + if (sysfs_streq(buf, "radar")) + cfg80211_background_radar_event(data->hw->wiphy, + &data->radar_background_chandef, + GFP_KERNEL); + else if (sysfs_streq(buf, "cancel")) + cfg80211_background_cac_abort(data->hw->wiphy); + else + return -EINVAL; + + return count; +} + +static const struct file_operations hwsim_background_cac_ops = { + .write = hwsim_background_cac_write, + .open = simple_open, + .llseek = default_llseek, +}; + static int hwsim_fops_group_read(void *dat, u64 *val) { struct mac80211_hwsim_data *data = dat; @@ -4154,6 +4191,24 @@ static int mac80211_hwsim_change_nan_config(struct ieee80211_hw *hw, return 0; } +static int mac80211_hwsim_set_radar_background(struct ieee80211_hw *hw, + struct cfg80211_chan_def *chan) +{ + struct mac80211_hwsim_data *data = hw->priv; + + if (!wiphy_ext_feature_isset(hw->wiphy, + NL80211_EXT_FEATURE_RADAR_BACKGROUND)) + return -EOPNOTSUPP; + + if (chan) + data->radar_background_chandef = *chan; + else + memset(&data->radar_background_chandef, 0, + sizeof(data->radar_background_chandef)); + + return 0; +} + #ifdef CONFIG_MAC80211_DEBUGFS #define HWSIM_DEBUGFS_OPS \ .link_add_debugfs = mac80211_hwsim_link_add_debugfs, @@ -4189,6 +4244,7 @@ static int mac80211_hwsim_change_nan_config(struct ieee80211_hw *hw, .start_nan = mac80211_hwsim_start_nan, \ .stop_nan = mac80211_hwsim_stop_nan, \ .nan_change_conf = mac80211_hwsim_change_nan_config, \ + .set_radar_background = mac80211_hwsim_set_radar_background, \ HWSIM_DEBUGFS_OPS #define HWSIM_NON_MLO_OPS \ @@ -4255,6 +4311,7 @@ struct hwsim_new_radio_params { bool mlo; const struct cfg80211_pmsr_capabilities *pmsr_capa; bool nan_device; + bool background_radar; }; static void hwsim_mcast_config_msg(struct sk_buff *mcast_skb, @@ -4340,6 +4397,12 @@ static int append_radio_msg(struct sk_buff *skb, int id, if (ret < 0) return ret; } + + if (param->background_radar) { + ret = nla_put_flag(skb, HWSIM_ATTR_SUPPORT_BACKGROUND_RADAR); + if (ret < 0) + return ret; + } return 0; } @@ -5794,6 +5857,9 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_DFS_CONCURRENT); + if (param->background_radar) + wiphy_ext_feature_set(hw->wiphy, + NL80211_EXT_FEATURE_RADAR_BACKGROUND); if (param->no_vif) ieee80211_hw_set(hw, NO_AUTO_VIF); @@ -5832,6 +5898,10 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, debugfs_create_file("dfs_simulate_radar", 0222, data->debugfs, data, &hwsim_simulate_radar); + if (param->background_radar) + debugfs_create_file("dfs_background_cac", 0200, + data->debugfs, + data, &hwsim_background_cac_ops); if (param->pmsr_capa) { data->pmsr_capa = *param->pmsr_capa; @@ -5950,6 +6020,9 @@ static int mac80211_hwsim_get_radio(struct sk_buff *skb, param.channels = data->channels; param.hwname = wiphy_name(data->hw->wiphy); param.pmsr_capa = &data->pmsr_capa; + param.background_radar = + wiphy_ext_feature_isset(data->hw->wiphy, + NL80211_EXT_FEATURE_RADAR_BACKGROUND); res = append_radio_msg(skb, data->idx, ¶m); if (res < 0) @@ -6387,6 +6460,9 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) if (info->attrs[HWSIM_ATTR_MULTI_RADIO]) param.multi_radio = true; + if (info->attrs[HWSIM_ATTR_SUPPORT_BACKGROUND_RADAR]) + param.background_radar = true; + if (info->attrs[HWSIM_ATTR_REG_HINT_ALPHA2]) param.reg_alpha2 = nla_data(info->attrs[HWSIM_ATTR_REG_HINT_ALPHA2]); @@ -7165,6 +7241,7 @@ static int __init init_mac80211_hwsim(void) param.p2p_device = support_p2p_device; param.mlo = mlo; param.multi_radio = multi_radio; + param.background_radar = true; param.use_chanctx = channels > 1 || mlo || multi_radio; param.iftypes = HWSIM_IFTYPE_SUPPORT_MASK; if (param.p2p_device) diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.h b/drivers/net/wireless/virtual/mac80211_hwsim.h index c2d06cf852a5..a022cd5c0f1c 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.h +++ b/drivers/net/wireless/virtual/mac80211_hwsim.h @@ -161,6 +161,7 @@ enum hwsim_commands { * Adds one radio for each band. Number of supported channels will be set for * each radio instead of for the wiphy. * @HWSIM_ATTR_SUPPORT_NAN_DEVICE: support NAN Device virtual interface (flag) + * @HWSIM_ATTR_SUPPORT_BACKGROUND_RADAR: background radar/CAC support (flag) * @__HWSIM_ATTR_MAX: enum limit */ enum hwsim_attrs { @@ -195,6 +196,7 @@ enum hwsim_attrs { HWSIM_ATTR_PMSR_RESULT, HWSIM_ATTR_MULTI_RADIO, HWSIM_ATTR_SUPPORT_NAN_DEVICE, + HWSIM_ATTR_SUPPORT_BACKGROUND_RADAR, __HWSIM_ATTR_MAX, }; #define HWSIM_ATTR_MAX (__HWSIM_ATTR_MAX - 1) From ae5e95d4157481693be2317e3ffcd84e36010cbb Mon Sep 17 00:00:00 2001 From: Daniel Hodges Date: Fri, 6 Feb 2026 14:44:01 -0500 Subject: [PATCH 0128/1561] wifi: mwifiex: fix use-after-free in mwifiex_adapter_cleanup() The mwifiex_adapter_cleanup() function uses timer_delete() (non-synchronous) for the wakeup_timer before the adapter structure is freed. This is incorrect because timer_delete() does not wait for any running timer callback to complete. If the wakeup_timer callback (wakeup_timer_fn) is executing when mwifiex_adapter_cleanup() is called, the callback will continue to access adapter fields (adapter->hw_status, adapter->if_ops.card_reset, etc.) which may be freed by mwifiex_free_adapter() called later in the mwifiex_remove_card() path. Use timer_delete_sync() instead to ensure any running timer callback has completed before returning. Fixes: 4636187da60b ("mwifiex: add wakeup timer based recovery mechanism") Cc: stable@vger.kernel.org Signed-off-by: Daniel Hodges Link: https://patch.msgid.link/20260206194401.2346-1-git@danielhodges.dev Signed-off-by: Johannes Berg --- drivers/net/wireless/marvell/mwifiex/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/init.c b/drivers/net/wireless/marvell/mwifiex/init.c index 5c9a46e64d23..0c8925013724 100644 --- a/drivers/net/wireless/marvell/mwifiex/init.c +++ b/drivers/net/wireless/marvell/mwifiex/init.c @@ -391,7 +391,7 @@ static void mwifiex_invalidate_lists(struct mwifiex_adapter *adapter) static void mwifiex_adapter_cleanup(struct mwifiex_adapter *adapter) { - timer_delete(&adapter->wakeup_timer); + timer_delete_sync(&adapter->wakeup_timer); cancel_delayed_work_sync(&adapter->devdump_work); mwifiex_cancel_all_pending_cmd(adapter); wake_up_interruptible(&adapter->cmd_wait_q.wait); From 73e7df69edb6f1271ea0fa876794761e6c73e76a Mon Sep 17 00:00:00 2001 From: Suraj P Kizhakkethil Date: Fri, 13 Feb 2026 15:31:25 +0530 Subject: [PATCH 0129/1561] wifi: mac80211: set band information only for non-MLD when probing stations using NULL frame Currently, when sending a NULL frame to probe a station, the band information is derived from the chanctx_conf in the mac80211 vif's bss_conf. However, for AP MLD, chanctx_conf is not assigned to the vif's bss_conf; instead it is assigned on a per-link basis. As a result, for AP MLD, sending a NULL packet to probe will trigger a warning. WARNING: net/mac80211/cfg.c:4635 at ieee80211_probe_client+0x1a8/0x1d8 [mac80211], CPU#2: hostapd/244 Call trace: ieee80211_probe_client+0x1a8/0x1d8 [mac80211] (P) nl80211_probe_client+0xac/0x170 [cfg80211] genl_family_rcv_msg_doit+0xc8/0x134 genl_rcv_msg+0x200/0x280 netlink_rcv_skb+0x38/0xf0 genl_rcv+0x34/0x48 netlink_unicast+0x314/0x3a0 netlink_sendmsg+0x150/0x390 ____sys_sendmsg+0x1f4/0x21c ___sys_sendmsg+0x98/0xc0 __sys_sendmsg+0x74/0xcc __arm64_sys_sendmsg+0x20/0x34 invoke_syscall.constprop.0+0x4c/0xd0 do_el0_svc+0x3c/0xd0 el0_svc+0x28/0xc0 el0t_64_sync_handler+0x98/0xdc el0t_64_sync+0x154/0x158 ---[ end trace 0000000000000000 ]--- For NULL packets sent to probe stations, set the band information only for non-MLD, since MLD transmissions does not rely on band. Signed-off-by: Suraj P Kizhakkethil Link: https://patch.msgid.link/20260213100126.1414398-2-suraj.kizhakkethil@oss.qualcomm.com Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index b92b4a5c2636..3cdb3e5addd9 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -4629,12 +4629,17 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, qos = sta->sta.wme; - chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); - if (WARN_ON(!chanctx_conf)) { - ret = -EINVAL; - goto unlock; + if (ieee80211_vif_is_mld(&sdata->vif)) { + /* MLD transmissions must not rely on the band */ + band = 0; + } else { + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); + if (WARN_ON(!chanctx_conf)) { + ret = -EINVAL; + goto unlock; + } + band = chanctx_conf->def.chan->band; } - band = chanctx_conf->def.chan->band; if (qos) { fc = cpu_to_le16(IEEE80211_FTYPE_DATA | From f3f52e6f20ac875ebbef6bd3a235a06d9c4d3fa0 Mon Sep 17 00:00:00 2001 From: Suraj P Kizhakkethil Date: Fri, 13 Feb 2026 15:31:26 +0530 Subject: [PATCH 0130/1561] wifi: mac80211: Set link ID for NULL packets sent to probe stations Currently, for AP MLD, the link ID is not provided when a NULL packet is triggered to probe a station. For non-MLO stations connected to an AP MLD, use the station's default link to send the NULL packets and set addr2 and addr3 to the link address. For MLO stations, set the link ID to unspecified to let the driver select the appropriate link. Co-developed-by: Sriram R Signed-off-by: Sriram R Co-developed-by: Rameshkumar Sundaram Signed-off-by: Rameshkumar Sundaram Signed-off-by: Suraj P Kizhakkethil Link: https://patch.msgid.link/20260213100126.1414398-3-suraj.kizhakkethil@oss.qualcomm.com [init link_id in each branch instead of default to zero] Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 3cdb3e5addd9..df5153772592 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -4614,7 +4614,9 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_tx_info *info; struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; + struct ieee80211_bss_conf *conf; enum nl80211_band band; + u8 link_id; int ret; /* the lock is needed to assign the cookie later */ @@ -4630,6 +4632,23 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, qos = sta->sta.wme; if (ieee80211_vif_is_mld(&sdata->vif)) { + if (sta->sta.mlo) { + link_id = IEEE80211_LINK_UNSPECIFIED; + } else { + /* + * For non-MLO clients connected to an AP MLD, band + * information is not used; instead, sta->deflink is + * used to send packets. + */ + link_id = sta->deflink.link_id; + + conf = rcu_dereference(sdata->vif.link_conf[link_id]); + + if (unlikely(!conf)) { + ret = -ENOLINK; + goto unlock; + } + } /* MLD transmissions must not rely on the band */ band = 0; } else { @@ -4639,6 +4658,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, goto unlock; } band = chanctx_conf->def.chan->band; + link_id = 0; } if (qos) { @@ -4666,8 +4686,13 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, nullfunc->frame_control = fc; nullfunc->duration_id = 0; memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); - memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN); - memcpy(nullfunc->addr3, sdata->vif.addr, ETH_ALEN); + if (ieee80211_vif_is_mld(&sdata->vif) && !sta->sta.mlo) { + memcpy(nullfunc->addr2, conf->addr, ETH_ALEN); + memcpy(nullfunc->addr3, conf->addr, ETH_ALEN); + } else { + memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN); + memcpy(nullfunc->addr3, sdata->vif.addr, ETH_ALEN); + } nullfunc->seq_ctrl = 0; info = IEEE80211_SKB_CB(skb); @@ -4676,6 +4701,8 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, IEEE80211_TX_INTFL_NL80211_FRAME_TX; info->band = band; + info->control.flags |= u32_encode_bits(link_id, + IEEE80211_TX_CTRL_MLO_LINK); skb_set_queue_mapping(skb, IEEE80211_AC_VO); skb->priority = 7; if (qos) From 6a584e336cefb230e2d981a464f4d85562eb750c Mon Sep 17 00:00:00 2001 From: Hari Chandrakanthan Date: Mon, 16 Feb 2026 08:50:26 +0530 Subject: [PATCH 0131/1561] wifi: cfg80211: add support to handle incumbent signal detected event from mac80211/driver When any incumbent signal is detected by an AP/mesh interface operating in 6 GHz band, FCC mandates the AP/mesh to vacate the channels affected by it [1]. Add a new API cfg80211_incumbent_signal_notify() that can be used by mac80211 or drivers to notify the higher layers about the signal interference event with the interference bitmap in which each bit denotes the affected 20 MHz in the operating channel. Add support for the new nl80211 event and nl80211 attribute as well to notify userspace on the details about the interference event. Userspace is expected to process it and take further action - vacate the channel, or reduce the bandwidth. [1] - https://apps.fcc.gov/kdb/GetAttachment.html?id=nXQiRC%2B4mfiA54Zha%2BrW4Q%3D%3D&desc=987594%20D02%20U-NII%206%20GHz%20EMC%20Measurement%20v03&tracking_number=277034 Signed-off-by: Hari Chandrakanthan Signed-off-by: Amith A Link: https://patch.msgid.link/20260216032027.2310956-2-amith.a@oss.qualcomm.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 23 +++++++++++++++++++++ include/uapi/linux/nl80211.h | 19 +++++++++++++++++ net/wireless/nl80211.c | 40 ++++++++++++++++++++++++++++++++++++ net/wireless/trace.h | 19 +++++++++++++++++ 4 files changed, 101 insertions(+) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e00045c150e7..cea77bf90cfe 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -10475,4 +10475,27 @@ cfg80211_s1g_get_primary_sibling(struct wiphy *wiphy, return ieee80211_get_channel_khz(wiphy, sibling_1mhz_khz); } + +/** + * cfg80211_incumbent_signal_notify - Notify userspace of incumbent signal detection + * @wiphy: the wiphy to use + * @chandef: channel definition in which the interference was detected + * @signal_interference_bitmap: bitmap indicating interference across 20 MHz segments + * @gfp: allocation context for message creation and multicast; pass GFP_ATOMIC + * if called from atomic context (e.g. firmware event handler), otherwise + * GFP_KERNEL + * + * Use this function to notify userspace when an incumbent signal is detected on + * the operating channel in the 6 GHz band. The notification includes the + * current channel definition and a bitmap representing interference across + * the operating bandwidth. Each bit in the bitmap corresponds to a 20 MHz + * segment, with the lowest bit representing the lowest frequency segment. + * Punctured sub-channels are included in the bitmap structure but are always + * set to zero since interference detection is not performed on them. + */ +void cfg80211_incumbent_signal_notify(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + u32 signal_interference_bitmap, + gfp_t gfp); + #endif /* __NET_CFG80211_H */ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c75aa039f096..fe2c8c8d6dd6 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1361,6 +1361,12 @@ * user space that the NAN new cluster has been joined. The cluster ID is * indicated by %NL80211_ATTR_MAC. * + * @NL80211_CMD_INCUMBENT_SIGNAL_DETECT: Once any incumbent signal is detected + * on the operating channel in 6 GHz band, userspace is notified with the + * signal interference bitmap using + * %NL80211_ATTR_INCUMBENT_SIGNAL_INTERFERENCE_BITMAP. The current channel + * definition is also sent. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1624,6 +1630,8 @@ enum nl80211_commands { NL80211_CMD_NAN_NEXT_DW_NOTIFICATION, NL80211_CMD_NAN_CLUSTER_JOINED, + NL80211_CMD_INCUMBENT_SIGNAL_DETECT, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2984,6 +2992,15 @@ enum nl80211_commands { * this feature during association. This is a flag attribute. * Currently only supported in mac80211 drivers. * + * @NL80211_ATTR_INCUMBENT_SIGNAL_INTERFERENCE_BITMAP: u32 attribute specifying + * the signal interference bitmap detected on the operating bandwidth for + * %NL80211_CMD_INCUMBENT_SIGNAL_DETECT. Each bit represents a 20 MHz + * segment, lowest bit corresponds to the lowest 20 MHz segment, in the + * operating bandwidth where the interference is detected. Punctured + * sub-channels are included in the bitmap structure; however, since + * interference detection is not performed on these sub-channels, their + * corresponding bits are consistently set to zero. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3557,6 +3574,8 @@ enum nl80211_attrs { NL80211_ATTR_UHR_CAPABILITY, NL80211_ATTR_DISABLE_UHR, + NL80211_ATTR_INCUMBENT_SIGNAL_INTERFERENCE_BITMAP, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b619f99c221e..0da055dad159 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -21127,6 +21127,46 @@ void cfg80211_ch_switch_notify(struct net_device *dev, } EXPORT_SYMBOL(cfg80211_ch_switch_notify); +void cfg80211_incumbent_signal_notify(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + u32 signal_interference_bitmap, + gfp_t gfp) +{ + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct sk_buff *msg; + void *hdr; + + trace_cfg80211_incumbent_signal_notify(wiphy, chandef, signal_interference_bitmap); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_INCUMBENT_SIGNAL_DETECT); + if (!hdr) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx)) + goto nla_put_failure; + + if (nl80211_send_chandef(msg, chandef)) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_INCUMBENT_SIGNAL_INTERFERENCE_BITMAP, + signal_interference_bitmap)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, + NL80211_MCGRP_MLME, gfp); + return; + +nla_put_failure: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_incumbent_signal_notify); + void cfg80211_ch_switch_started_notify(struct net_device *dev, struct cfg80211_chan_def *chandef, unsigned int link_id, u8 count, diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 643ccf4f0227..352a57d8b968 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -4225,6 +4225,25 @@ TRACE_EVENT(cfg80211_nan_cluster_joined, WDEV_PR_ARG, __entry->cluster_id, __entry->new_cluster ? " [new]" : "") ); + +TRACE_EVENT(cfg80211_incumbent_signal_notify, + TP_PROTO(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + u32 signal_interference_bitmap), + TP_ARGS(wiphy, chandef, signal_interference_bitmap), + TP_STRUCT__entry( + WIPHY_ENTRY + CHAN_DEF_ENTRY + __field(u32, signal_interference_bitmap) + ), + TP_fast_assign( + WIPHY_ASSIGN; + CHAN_DEF_ASSIGN(chandef); + __entry->signal_interference_bitmap = signal_interference_bitmap; + ), + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", signal_interference_bitmap=0x%x", + WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->signal_interference_bitmap) +); #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH From abf37167e78f87c131a1de22ba1bd1cdc81a131f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 16 Feb 2026 18:25:14 +0900 Subject: [PATCH 0132/1561] wifi: iwlegacy: Avoid multiple -Wflex-array-member-not-at-end warnings -Wflex-array-member-not-at-end was introduced in GCC-14, and we are getting ready to enable it, globally. Move the conflicting declarations (which in a couple of cases happen to be in a union, so the entire unions are moved) to the end of the corresponding structures, struct il_frame, and struct il3945_frame. Notice that `struct il_tx_beacon_cmd`, `struct il4965_tx_resp`, and `struct il3945_tx_beacon_cmd` are flexible structures, this is structures that contain a flexible-array member. The case for struct il4965_beacon_notif is different. Since this structure is defined by hardware, we create the new `struct il4965_tx_resp_hdr` type. We then use this newly created type to replace the object type causing trouble in struct il4965_beacon_notif, namely `struct il4965_tx_resp`. Also, once -fms-extensions is enabled, we can use transparent struct members in struct il4965_tx_resp. Notice that the newly created type does not contain the flex-array member `agg_status`, which is the object causing the -Wfamnae warnings. This object is currently in a union along with `__le32 status`, so anything using struct il4965_beacon_notif needs to have its own view of `status`. To preserve the memory layout, we therefore add member `__le32 beacon_tx_status` to struct il4965_beacon_notif. After these changes, the size of struct il4965_beacon_notif along with its member's offsets remain the same, hence the memory layout doesn't change: Before changes: struct il4965_beacon_notif { struct il4965_tx_resp beacon_notify_hdr; /* 0 24 */ __le32 low_tsf; /* 24 4 */ __le32 high_tsf; /* 28 4 */ __le32 ibss_mgr_status; /* 32 4 */ /* size: 36, cachelines: 1, members: 4 */ /* last cacheline: 36 bytes */ }; After changes: struct il4965_beacon_notif { struct il4965_tx_resp_hdr beacon_notify_hdr; /* 0 20 */ __le32 beacon_tx_status; /* 20 4 */ __le32 low_tsf; /* 24 4 */ __le32 high_tsf; /* 28 4 */ __le32 ibss_mgr_status; /* 32 4 */ /* size: 36, cachelines: 1, members: 5 */ /* last cacheline: 36 bytes */ }; Lastly, adjust the rest of the code, accordingly. With these changes fix the following warnings: 11 drivers/net/wireless/intel/iwlegacy/common.h:526:11: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] 11 drivers/net/wireless/intel/iwlegacy/commands.h:2667:31: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] 4 drivers/net/wireless/intel/iwlegacy/3945.h:131:11: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] Signed-off-by: Gustavo A. R. Silva Acked-by: Stanislaw Gruszka Link: https://patch.msgid.link/aZLienEatf9KC6Rx@kspp Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlegacy/3945.h | 4 +++- drivers/net/wireless/intel/iwlegacy/4965-mac.c | 2 +- drivers/net/wireless/intel/iwlegacy/commands.h | 9 +++++++-- drivers/net/wireless/intel/iwlegacy/common.h | 4 +++- 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlegacy/3945.h b/drivers/net/wireless/intel/iwlegacy/3945.h index fb1e33c89d0e..ed63b31fee9a 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945.h +++ b/drivers/net/wireless/intel/iwlegacy/3945.h @@ -123,13 +123,15 @@ enum il3945_antenna { #define IEEE80211_FRAME_LEN (IEEE80211_DATA_LEN + IEEE80211_HLEN) struct il3945_frame { + struct list_head list; + + /* Must be last as it ends in a flexible-array member. */ union { struct ieee80211_hdr frame; struct il3945_tx_beacon_cmd beacon; u8 raw[IEEE80211_FRAME_LEN]; u8 cmd[360]; } u; - struct list_head list; }; #define SUP_RATE_11A_MAX_NUM_CHANNELS 8 diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c index 7921bc45e21d..18bb55682643 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c @@ -4076,7 +4076,7 @@ il4965_hdl_beacon(struct il_priv *il, struct il_rx_buf *rxb) u8 rate = il4965_hw_get_rate(beacon->beacon_notify_hdr.rate_n_flags); D_RX("beacon status %x retries %d iss %d tsf:0x%.8x%.8x rate %d\n", - le32_to_cpu(beacon->beacon_notify_hdr.u.status) & TX_STATUS_MSK, + le32_to_cpu(beacon->beacon_tx_status) & TX_STATUS_MSK, beacon->beacon_notify_hdr.failure_frame, le32_to_cpu(beacon->ibss_mgr_status), le32_to_cpu(beacon->high_tsf), le32_to_cpu(beacon->low_tsf), rate); diff --git a/drivers/net/wireless/intel/iwlegacy/commands.h b/drivers/net/wireless/intel/iwlegacy/commands.h index b61b8f377702..7aba84ac88a1 100644 --- a/drivers/net/wireless/intel/iwlegacy/commands.h +++ b/drivers/net/wireless/intel/iwlegacy/commands.h @@ -1690,7 +1690,7 @@ struct agg_tx_status { __le16 sequence; } __packed; -struct il4965_tx_resp { +struct il4965_tx_resp_hdr { u8 frame_count; /* 1 no aggregation, >1 aggregation */ u8 bt_kill_count; /* # blocked by bluetooth (unused for agg) */ u8 failure_rts; /* # failures due to unsuccessful RTS */ @@ -1707,6 +1707,10 @@ struct il4965_tx_resp { __le16 reserved; __le32 pa_power1; /* RF power amplifier measurement (not used) */ __le32 pa_power2; +} __packed; + +struct il4965_tx_resp { + struct il4965_tx_resp_hdr; /* * For non-agg: frame status TX_STATUS_* @@ -2664,7 +2668,8 @@ struct il3945_beacon_notif { } __packed; struct il4965_beacon_notif { - struct il4965_tx_resp beacon_notify_hdr; + struct il4965_tx_resp_hdr beacon_notify_hdr; + __le32 beacon_tx_status; __le32 low_tsf; __le32 high_tsf; __le32 ibss_mgr_status; diff --git a/drivers/net/wireless/intel/iwlegacy/common.h b/drivers/net/wireless/intel/iwlegacy/common.h index 4c9836ab11dd..21f1c7702add 100644 --- a/drivers/net/wireless/intel/iwlegacy/common.h +++ b/drivers/net/wireless/intel/iwlegacy/common.h @@ -518,13 +518,15 @@ struct il_channel_info { #define IEEE80211_FRAME_LEN (IEEE80211_DATA_LEN + IEEE80211_HLEN) struct il_frame { + struct list_head list; + + /* Must be last as it ends in a flexible-array member. */ union { struct ieee80211_hdr frame; struct il_tx_beacon_cmd beacon; u8 raw[IEEE80211_FRAME_LEN]; u8 cmd[360]; } u; - struct list_head list; }; enum { From 49a1e65c6d706703a8fcd54a5c5ca1f11f7e319b Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 19 Feb 2026 11:47:11 +0200 Subject: [PATCH 0133/1561] wifi: nl80211: refactor nl80211_parse_chandef In order to be able to use this function also for nested attributes, change this function to receive a pointer to extack and to the attributes array, instead of receiving the info and extracting them out of it. While at it, use NL_SET_ERR_MSG_ATTR with the frequency of the chandef. Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260219114327.2b994566a63b.I6c2b6f4c7e2e09f4c47285ca4ac8a37b20700e19@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 67 ++++++++++++++++++++++++------------------ net/wireless/nl80211.h | 5 ++-- net/wireless/pmsr.c | 5 ++-- 3 files changed, 44 insertions(+), 33 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0da055dad159..3486475dc119 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3573,11 +3573,10 @@ static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev) } static int _nl80211_parse_chandef(struct cfg80211_registered_device *rdev, - struct genl_info *info, bool monitor, + struct netlink_ext_ack *extack, + struct nlattr **attrs, bool monitor, struct cfg80211_chan_def *chandef) { - struct netlink_ext_ack *extack = info->extack; - struct nlattr **attrs = info->attrs; u32 control_freq; if (!attrs[NL80211_ATTR_WIPHY_FREQ]) { @@ -3587,10 +3586,10 @@ static int _nl80211_parse_chandef(struct cfg80211_registered_device *rdev, } control_freq = MHZ_TO_KHZ( - nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ])); - if (info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]) + nla_get_u32(attrs[NL80211_ATTR_WIPHY_FREQ])); + if (attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]) control_freq += - nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]); + nla_get_u32(attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]); memset(chandef, 0, sizeof(*chandef)); chandef->chan = ieee80211_get_channel_khz(&rdev->wiphy, control_freq); @@ -3661,40 +3660,43 @@ static int _nl80211_parse_chandef(struct cfg80211_registered_device *rdev, attrs[NL80211_ATTR_S1G_PRIMARY_2MHZ]); } - if (info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]) { + if (attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]) { chandef->edmg.channels = - nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]); + nla_get_u8(attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]); - if (info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]) + if (attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]) chandef->edmg.bw_config = - nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]); + nla_get_u8(attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]); } else { chandef->edmg.bw_config = 0; chandef->edmg.channels = 0; } - if (info->attrs[NL80211_ATTR_PUNCT_BITMAP]) { + if (attrs[NL80211_ATTR_PUNCT_BITMAP]) { chandef->punctured = - nla_get_u32(info->attrs[NL80211_ATTR_PUNCT_BITMAP]); + nla_get_u32(attrs[NL80211_ATTR_PUNCT_BITMAP]); if (chandef->punctured && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_PUNCT)) { - NL_SET_ERR_MSG(extack, - "driver doesn't support puncturing"); + NL_SET_ERR_MSG_ATTR(extack, + attrs[NL80211_ATTR_WIPHY_FREQ], + "driver doesn't support puncturing"); return -EINVAL; } } if (!cfg80211_chandef_valid(chandef)) { - NL_SET_ERR_MSG(extack, "invalid channel definition"); + NL_SET_ERR_MSG_ATTR(extack, attrs[NL80211_ATTR_WIPHY_FREQ], + "invalid channel definition"); return -EINVAL; } if (!_cfg80211_chandef_usable(&rdev->wiphy, chandef, IEEE80211_CHAN_DISABLED, monitor ? IEEE80211_CHAN_CAN_MONITOR : 0)) { - NL_SET_ERR_MSG(extack, "(extension) channel is disabled"); + NL_SET_ERR_MSG_ATTR(extack, attrs[NL80211_ATTR_WIPHY_FREQ], + "(extension) channel is disabled"); return -EINVAL; } @@ -3709,10 +3711,11 @@ static int _nl80211_parse_chandef(struct cfg80211_registered_device *rdev, } int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, - struct genl_info *info, + struct netlink_ext_ack *extack, + struct nlattr **attrs, struct cfg80211_chan_def *chandef) { - return _nl80211_parse_chandef(rdev, info, false, chandef); + return _nl80211_parse_chandef(rdev, extack, attrs, false, chandef); } static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, @@ -3739,7 +3742,7 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, link_id = 0; } - result = _nl80211_parse_chandef(rdev, info, + result = _nl80211_parse_chandef(rdev, info->extack, info->attrs, iftype == NL80211_IFTYPE_MONITOR, &chandef); if (result) @@ -6817,7 +6820,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { - err = nl80211_parse_chandef(rdev, info, ¶ms->chandef); + err = nl80211_parse_chandef(rdev, info->extack, info->attrs, + ¶ms->chandef); if (err) goto out; } else if (wdev->valid_links) { @@ -11293,7 +11297,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, if (dfs_region == NL80211_DFS_UNSET) return -EINVAL; - err = nl80211_parse_chandef(rdev, info, &chandef); + err = nl80211_parse_chandef(rdev, info->extack, info->attrs, &chandef); if (err) return err; @@ -11382,7 +11386,7 @@ static int nl80211_notify_radar_detection(struct sk_buff *skb, return -EINVAL; } - err = nl80211_parse_chandef(rdev, info, &chandef); + err = nl80211_parse_chandef(rdev, info->extack, info->attrs, &chandef); if (err) { GENL_SET_ERR_MSG(info, "Unable to extract chandef info"); return err; @@ -11567,7 +11571,8 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) goto free; skip_beacons: - err = nl80211_parse_chandef(rdev, info, ¶ms.chandef); + err = nl80211_parse_chandef(rdev, info->extack, info->attrs, + ¶ms.chandef); if (err) goto free; @@ -12788,7 +12793,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) ibss.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } - err = nl80211_parse_chandef(rdev, info, &ibss.chandef); + err = nl80211_parse_chandef(rdev, info->extack, info->attrs, + &ibss.chandef); if (err) return err; @@ -13786,7 +13792,7 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, duration > rdev->wiphy.max_remain_on_channel_duration) return -EINVAL; - err = nl80211_parse_chandef(rdev, info, &chandef); + err = nl80211_parse_chandef(rdev, info->extack, info->attrs, &chandef); if (err) return err; @@ -14002,7 +14008,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) */ chandef.chan = NULL; if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { - err = nl80211_parse_chandef(rdev, info, &chandef); + err = nl80211_parse_chandef(rdev, info->extack, info->attrs, + &chandef); if (err) return err; } @@ -14404,7 +14411,8 @@ static int nl80211_join_ocb(struct sk_buff *skb, struct genl_info *info) struct ocb_setup setup = {}; int err; - err = nl80211_parse_chandef(rdev, info, &setup.chandef); + err = nl80211_parse_chandef(rdev, info->extack, info->attrs, + &setup.chandef); if (err) return err; @@ -14479,7 +14487,8 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) cfg.auto_open_plinks = false; if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { - err = nl80211_parse_chandef(rdev, info, &setup.chandef); + err = nl80211_parse_chandef(rdev, info->extack, info->attrs, + &setup.chandef); if (err) return err; } else { @@ -16954,7 +16963,7 @@ static int nl80211_tdls_channel_switch(struct sk_buff *skb, !info->attrs[NL80211_ATTR_OPER_CLASS]) return -EINVAL; - err = nl80211_parse_chandef(rdev, info, &chandef); + err = nl80211_parse_chandef(rdev, info->extack, info->attrs, &chandef); if (err) return err; diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 5e25782af1e0..048ba92c3e42 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Portions of this file - * Copyright (C) 2018, 2020-2024 Intel Corporation + * Copyright (C) 2018, 2020-2025 Intel Corporation */ #ifndef __NET_WIRELESS_NL80211_H #define __NET_WIRELESS_NL80211_H @@ -23,7 +23,8 @@ static inline u64 wdev_id(struct wireless_dev *wdev) } int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, - struct genl_info *info, + struct netlink_ext_ack *extack, + struct nlattr **attrs, struct cfg80211_chan_def *chandef); int nl80211_parse_random_mac(struct nlattr **attrs, u8 *mac_addr, u8 *mac_addr_mask); diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index 44bd88c9ea66..556f30f5d60a 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2018 - 2021, 2023 - 2024 Intel Corporation + * Copyright (C) 2018 - 2021, 2023 - 2026 Intel Corporation */ #include #include "core.h" @@ -237,7 +237,8 @@ static int pmsr_parse_peer(struct cfg80211_registered_device *rdev, if (err) return err; - err = nl80211_parse_chandef(rdev, info, &out->chandef); + err = nl80211_parse_chandef(rdev, info->extack, info->attrs, + &out->chandef); if (err) return err; From 137b61fdfc989a66db1831ac05edee613323552f Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 19 Feb 2026 11:47:12 +0200 Subject: [PATCH 0134/1561] wifi: cfg80211: remove unneeded call to cfg80211_leave In cfg80211_destroy_ifaces, we first close all netdev wdevs, which will trigger a NETDEV_GOING_DOWN event that will call cfg80211_leave, and for non-netdev wdevs, we call cfg80211_remove_virtual_intf which calles cfg80211_unregister_wdev, which handles the "leaving" for those interfaces (i.e. stop_nan and stop_p2p_device) Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260219114327.c43709c9d3af.I3179a28f237ea3b8ec368af720fbf77455a7763f@changeid Signed-off-by: Johannes Berg --- net/wireless/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/wireless/core.c b/net/wireless/core.c index 28ca4290ca99..23afc250bc10 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -349,7 +349,6 @@ void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev) guard(wiphy)(&rdev->wiphy); - cfg80211_leave(rdev, wdev, -1); cfg80211_remove_virtual_intf(rdev, wdev); } } From 033fe322f5852d5144a85978e880e01b1787fd0d Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 19 Feb 2026 11:47:13 +0200 Subject: [PATCH 0135/1561] wifi: nl80211/cfg80211: support stations of non-netdev interfaces Currently, a station can only be added to a netdev interface, mainly because there was no need for a station of a non-netdev interface. But for NAN, we will have stations that belong to the NL80211_IFTYPE_NAN interface. Prepare for adding/changing/deleting a station that belongs to a non-netdev interface. This doesn't actually allow such stations - this will be done in a different patch. Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260219114327.65c9cc96f814.Ic02066b88bb8ad6b21e15cbea8d720280008c83b@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 9 +- drivers/net/wireless/ath/ath6kl/main.c | 4 +- drivers/net/wireless/ath/wil6210/cfg80211.c | 20 ++-- drivers/net/wireless/ath/wil6210/main.c | 3 +- drivers/net/wireless/ath/wil6210/wmi.c | 5 +- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 23 ++-- drivers/net/wireless/marvell/libertas/cfg.c | 2 +- .../net/wireless/marvell/mwifiex/cfg80211.c | 24 ++-- .../net/wireless/marvell/mwifiex/uap_event.c | 7 +- .../wireless/microchip/wilc1000/cfg80211.c | 26 ++--- .../net/wireless/quantenna/qtnfmac/cfg80211.c | 26 ++--- .../net/wireless/quantenna/qtnfmac/event.c | 6 +- drivers/net/wireless/virtual/virt_wifi.c | 12 +- .../staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 24 ++-- include/net/cfg80211.h | 24 ++-- net/mac80211/cfg.c | 21 ++-- net/mac80211/sta_info.c | 4 +- net/wireless/nl80211.c | 84 ++++++++------ net/wireless/rdev-ops.h | 30 ++--- net/wireless/trace.h | 104 ++++++++++++------ net/wireless/util.c | 2 +- net/wireless/wext-compat.c | 6 +- 22 files changed, 264 insertions(+), 202 deletions(-) diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 88f0197fc041..eecba2201b10 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -1775,9 +1775,10 @@ static bool is_rate_ht40(s32 rate, u8 *mcs, bool *sgi) return false; } -static int ath6kl_get_station(struct wiphy *wiphy, struct net_device *dev, +static int ath6kl_get_station(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac, struct station_info *sinfo) { + struct net_device *dev = wdev->netdev; struct ath6kl *ar = ath6kl_priv(dev); struct ath6kl_vif *vif = netdev_priv(dev); long left; @@ -2992,9 +2993,10 @@ static int ath6kl_stop_ap(struct wiphy *wiphy, struct net_device *dev, static const u8 bcast_addr[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; -static int ath6kl_del_station(struct wiphy *wiphy, struct net_device *dev, +static int ath6kl_del_station(struct wiphy *wiphy, struct wireless_dev *wdev, struct station_del_parameters *params) { + struct net_device *dev = wdev->netdev; struct ath6kl *ar = ath6kl_priv(dev); struct ath6kl_vif *vif = netdev_priv(dev); const u8 *addr = params->mac ? params->mac : bcast_addr; @@ -3003,10 +3005,11 @@ static int ath6kl_del_station(struct wiphy *wiphy, struct net_device *dev, addr, WLAN_REASON_PREV_AUTH_NOT_VALID); } -static int ath6kl_change_station(struct wiphy *wiphy, struct net_device *dev, +static int ath6kl_change_station(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac, struct station_parameters *params) { + struct net_device *dev = wdev->netdev; struct ath6kl *ar = ath6kl_priv(dev); struct ath6kl_vif *vif = netdev_priv(dev); int err; diff --git a/drivers/net/wireless/ath/ath6kl/main.c b/drivers/net/wireless/ath/ath6kl/main.c index 85d6ad53cf94..8afc6589fc51 100644 --- a/drivers/net/wireless/ath/ath6kl/main.c +++ b/drivers/net/wireless/ath/ath6kl/main.c @@ -494,7 +494,7 @@ void ath6kl_connect_ap_mode_sta(struct ath6kl_vif *vif, u16 aid, u8 *mac_addr, sinfo->assoc_req_ies = ies; sinfo->assoc_req_ies_len = ies_len; - cfg80211_new_sta(vif->ndev, mac_addr, sinfo, GFP_KERNEL); + cfg80211_new_sta(&vif->wdev, mac_addr, sinfo, GFP_KERNEL); netif_wake_queue(vif->ndev); @@ -1011,7 +1011,7 @@ void ath6kl_disconnect_event(struct ath6kl_vif *vif, u8 reason, u8 *bssid, if (!is_broadcast_ether_addr(bssid)) { /* send event to application */ - cfg80211_del_sta(vif->ndev, bssid, GFP_KERNEL); + cfg80211_del_sta(&vif->wdev, bssid, GFP_KERNEL); } if (memcmp(vif->ndev->dev_addr, bssid, ETH_ALEN) == 0) { diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index c74f5e66166d..2d8660ccc6f3 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -533,11 +533,11 @@ int wil_cid_fill_sinfo(struct wil6210_vif *vif, int cid, } static int wil_cfg80211_get_station(struct wiphy *wiphy, - struct net_device *ndev, + struct wireless_dev *wdev, const u8 *mac, struct station_info *sinfo) { - struct wil6210_vif *vif = ndev_to_vif(ndev); struct wil6210_priv *wil = wiphy_to_wil(wiphy); + struct wil6210_vif *vif = wdev_to_vif(wil, wdev); int rc; int cid = wil_find_cid(wil, vif->mid, mac); @@ -573,11 +573,11 @@ int wil_find_cid_by_idx(struct wil6210_priv *wil, u8 mid, int idx) } static int wil_cfg80211_dump_station(struct wiphy *wiphy, - struct net_device *dev, int idx, + struct wireless_dev *wdev, int idx, u8 *mac, struct station_info *sinfo) { - struct wil6210_vif *vif = ndev_to_vif(dev); struct wil6210_priv *wil = wiphy_to_wil(wiphy); + struct wil6210_vif *vif = wdev_to_vif(wil, wdev); int rc; int cid = wil_find_cid_by_idx(wil, vif->mid, idx); @@ -2225,12 +2225,12 @@ static int wil_cfg80211_stop_ap(struct wiphy *wiphy, } static int wil_cfg80211_add_station(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, const u8 *mac, struct station_parameters *params) { - struct wil6210_vif *vif = ndev_to_vif(dev); struct wil6210_priv *wil = wiphy_to_wil(wiphy); + struct wil6210_vif *vif = wdev_to_vif(wil, wdev); wil_dbg_misc(wil, "add station %pM aid %d mid %d mask 0x%x set 0x%x\n", mac, params->aid, vif->mid, @@ -2250,11 +2250,11 @@ static int wil_cfg80211_add_station(struct wiphy *wiphy, } static int wil_cfg80211_del_station(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, struct station_del_parameters *params) { - struct wil6210_vif *vif = ndev_to_vif(dev); struct wil6210_priv *wil = wiphy_to_wil(wiphy); + struct wil6210_vif *vif = wdev_to_vif(wil, wdev); wil_dbg_misc(wil, "del_station: %pM, reason=%d mid=%d\n", params->mac, params->reason_code, vif->mid); @@ -2267,12 +2267,12 @@ static int wil_cfg80211_del_station(struct wiphy *wiphy, } static int wil_cfg80211_change_station(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, const u8 *mac, struct station_parameters *params) { - struct wil6210_vif *vif = ndev_to_vif(dev); struct wil6210_priv *wil = wiphy_to_wil(wiphy); + struct wil6210_vif *vif = wdev_to_vif(wil, wdev); int authorize; int cid, i; struct wil_ring_tx_data *txdata = NULL; diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index 44c24c6c8360..d5aec72ecdce 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -245,7 +245,6 @@ __acquires(&sta->tid_rx_lock) __releases(&sta->tid_rx_lock) { uint i; struct wil6210_priv *wil = vif_to_wil(vif); - struct net_device *ndev = vif_to_ndev(vif); struct wireless_dev *wdev = vif_to_wdev(vif); struct wil_sta_info *sta = &wil->sta[cid]; int min_ring_id = wil_get_min_tx_ring_id(wil); @@ -265,7 +264,7 @@ __acquires(&sta->tid_rx_lock) __releases(&sta->tid_rx_lock) case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: /* AP-like interface */ - cfg80211_del_sta(ndev, sta->addr, GFP_KERNEL); + cfg80211_del_sta(wdev, sta->addr, GFP_KERNEL); break; default: break; diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c index 05b040c684e8..479b2418ca34 100644 --- a/drivers/net/wireless/ath/wil6210/wmi.c +++ b/drivers/net/wireless/ath/wil6210/wmi.c @@ -1076,7 +1076,7 @@ static void wmi_evt_connect(struct wil6210_vif *vif, int id, void *d, int len) if (rc) { if (disable_ap_sme) /* notify new_sta has failed */ - cfg80211_del_sta(ndev, evt->bssid, GFP_KERNEL); + cfg80211_del_sta(wdev, evt->bssid, GFP_KERNEL); goto out; } @@ -1093,7 +1093,8 @@ static void wmi_evt_connect(struct wil6210_vif *vif, int id, void *d, int len) sinfo->assoc_req_ies_len = assoc_req_ielen; } - cfg80211_new_sta(ndev, evt->bssid, sinfo, GFP_KERNEL); + cfg80211_new_sta(ndev->ieee80211_ptr, evt->bssid, sinfo, + GFP_KERNEL); kfree(sinfo); } else { diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index cea02b33b798..f7e17994e59a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -3132,11 +3132,11 @@ brcmf_cfg80211_get_station_ibss(struct brcmf_if *ifp, } static s32 -brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev, +brcmf_cfg80211_get_station(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac, struct station_info *sinfo) { struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy); - struct brcmf_if *ifp = netdev_priv(ndev); + struct brcmf_if *ifp = netdev_priv(wdev->netdev); struct brcmf_pub *drvr = cfg->pub; struct brcmf_scb_val_le scb_val; s32 err = 0; @@ -3255,11 +3255,11 @@ done: } static int -brcmf_cfg80211_dump_station(struct wiphy *wiphy, struct net_device *ndev, +brcmf_cfg80211_dump_station(struct wiphy *wiphy, struct wireless_dev *wdev, int idx, u8 *mac, struct station_info *sinfo) { struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy); - struct brcmf_if *ifp = netdev_priv(ndev); + struct brcmf_if *ifp = netdev_priv(wdev->netdev); struct brcmf_pub *drvr = cfg->pub; s32 err; @@ -3284,7 +3284,8 @@ brcmf_cfg80211_dump_station(struct wiphy *wiphy, struct net_device *ndev, } if (idx < le32_to_cpu(cfg->assoclist.count)) { memcpy(mac, cfg->assoclist.mac[idx], ETH_ALEN); - return brcmf_cfg80211_get_station(wiphy, ndev, mac, sinfo); + return brcmf_cfg80211_get_station(wiphy, wdev, + mac, sinfo); } return -ENOENT; } @@ -5452,12 +5453,13 @@ brcmf_cfg80211_change_beacon(struct wiphy *wiphy, struct net_device *ndev, } static int -brcmf_cfg80211_del_station(struct wiphy *wiphy, struct net_device *ndev, +brcmf_cfg80211_del_station(struct wiphy *wiphy, struct wireless_dev *wdev, struct station_del_parameters *params) { struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy); struct brcmf_pub *drvr = cfg->pub; struct brcmf_scb_val_le scbval; + struct net_device *ndev = wdev->netdev; struct brcmf_if *ifp = netdev_priv(ndev); s32 err; @@ -5484,12 +5486,12 @@ brcmf_cfg80211_del_station(struct wiphy *wiphy, struct net_device *ndev, } static int -brcmf_cfg80211_change_station(struct wiphy *wiphy, struct net_device *ndev, +brcmf_cfg80211_change_station(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac, struct station_parameters *params) { struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy); struct brcmf_pub *drvr = cfg->pub; - struct brcmf_if *ifp = netdev_priv(ndev); + struct brcmf_if *ifp = netdev_priv(wdev->netdev); s32 err; brcmf_dbg(TRACE, "Enter, MAC %pM, mask 0x%04x set 0x%04x\n", mac, @@ -6548,13 +6550,14 @@ brcmf_notify_connect_status_ap(struct brcmf_cfg80211_info *cfg, sinfo->assoc_req_ies_len = e->datalen; generation++; sinfo->generation = generation; - cfg80211_new_sta(ndev, e->addr, sinfo, GFP_KERNEL); + cfg80211_new_sta(ndev->ieee80211_ptr, e->addr, sinfo, + GFP_KERNEL); kfree(sinfo); } else if ((event == BRCMF_E_DISASSOC_IND) || (event == BRCMF_E_DEAUTH_IND) || (event == BRCMF_E_DEAUTH)) { - cfg80211_del_sta(ndev, e->addr, GFP_KERNEL); + cfg80211_del_sta(ndev->ieee80211_ptr, e->addr, GFP_KERNEL); } return 0; } diff --git a/drivers/net/wireless/marvell/libertas/cfg.c b/drivers/net/wireless/marvell/libertas/cfg.c index 98517888dba7..56a82b26a1e9 100644 --- a/drivers/net/wireless/marvell/libertas/cfg.c +++ b/drivers/net/wireless/marvell/libertas/cfg.c @@ -1607,7 +1607,7 @@ static int lbs_cfg_del_key(struct wiphy *wiphy, struct net_device *netdev, * Get station */ -static int lbs_cfg_get_station(struct wiphy *wiphy, struct net_device *dev, +static int lbs_cfg_get_station(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac, struct station_info *sinfo) { struct lbs_private *priv = wiphy_priv(wiphy); diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index eb28fe718e71..71e71a5af453 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -1554,10 +1554,10 @@ mwifiex_dump_station_info(struct mwifiex_private *priv, * requested station information, if available. */ static int -mwifiex_cfg80211_get_station(struct wiphy *wiphy, struct net_device *dev, +mwifiex_cfg80211_get_station(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac, struct station_info *sinfo) { - struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); + struct mwifiex_private *priv = mwifiex_netdev_get_priv(wdev->netdev); if (!priv->media_connected) return -ENOENT; @@ -1571,10 +1571,10 @@ mwifiex_cfg80211_get_station(struct wiphy *wiphy, struct net_device *dev, * CFG802.11 operation handler to dump station information. */ static int -mwifiex_cfg80211_dump_station(struct wiphy *wiphy, struct net_device *dev, +mwifiex_cfg80211_dump_station(struct wiphy *wiphy, struct wireless_dev *wdev, int idx, u8 *mac, struct station_info *sinfo) { - struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); + struct mwifiex_private *priv = mwifiex_netdev_get_priv(wdev->netdev); struct mwifiex_sta_node *node; int i; @@ -1901,10 +1901,11 @@ static int mwifiex_cfg80211_change_beacon(struct wiphy *wiphy, * associated stations list, no action is taken. */ static int -mwifiex_cfg80211_del_station(struct wiphy *wiphy, struct net_device *dev, +mwifiex_cfg80211_del_station(struct wiphy *wiphy, struct wireless_dev *wdev, struct station_del_parameters *params) { - struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); + struct mwifiex_private *priv = + mwifiex_netdev_get_priv(wdev->netdev); struct mwifiex_sta_node *sta_node; u8 deauth_mac[ETH_ALEN]; @@ -3993,7 +3994,8 @@ mwifiex_cfg80211_uap_add_station(struct mwifiex_private *priv, const u8 *mac, if (!sinfo) return -ENOMEM; - cfg80211_new_sta(priv->netdev, mac, sinfo, GFP_KERNEL); + cfg80211_new_sta(priv->netdev->ieee80211_ptr, mac, sinfo, + GFP_KERNEL); kfree(sinfo); } @@ -4001,10 +4003,10 @@ mwifiex_cfg80211_uap_add_station(struct mwifiex_private *priv, const u8 *mac, } static int -mwifiex_cfg80211_add_station(struct wiphy *wiphy, struct net_device *dev, +mwifiex_cfg80211_add_station(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac, struct station_parameters *params) { - struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); + struct mwifiex_private *priv = mwifiex_netdev_get_priv(wdev->netdev); if (priv->adapter->host_mlme_enabled && (GET_BSS_ROLE(priv) == MWIFIEX_BSS_ROLE_UAP)) @@ -4240,12 +4242,12 @@ mwifiex_cfg80211_start_radar_detection(struct wiphy *wiphy, } static int -mwifiex_cfg80211_change_station(struct wiphy *wiphy, struct net_device *dev, +mwifiex_cfg80211_change_station(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac, struct station_parameters *params) { int ret; - struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); + struct mwifiex_private *priv = mwifiex_netdev_get_priv(wdev->netdev); if (priv->adapter->host_mlme_enabled && (GET_BSS_ROLE(priv) == MWIFIEX_BSS_ROLE_UAP)) diff --git a/drivers/net/wireless/marvell/mwifiex/uap_event.c b/drivers/net/wireless/marvell/mwifiex/uap_event.c index 703104fd1fbe..679fdae0f001 100644 --- a/drivers/net/wireless/marvell/mwifiex/uap_event.c +++ b/drivers/net/wireless/marvell/mwifiex/uap_event.c @@ -130,8 +130,8 @@ int mwifiex_process_uap_event(struct mwifiex_private *priv) le16_to_cpu(event->len) - (u16)len; } } - cfg80211_new_sta(priv->netdev, event->sta_addr, sinfo, - GFP_KERNEL); + cfg80211_new_sta(priv->netdev->ieee80211_ptr, event->sta_addr, + sinfo, GFP_KERNEL); node = mwifiex_add_sta_entry(priv, event->sta_addr); if (!node) { @@ -162,7 +162,8 @@ int mwifiex_process_uap_event(struct mwifiex_private *priv) case EVENT_UAP_STA_DEAUTH: deauth_mac = adapter->event_body + MWIFIEX_UAP_EVENT_EXTRA_HEADER; - cfg80211_del_sta(priv->netdev, deauth_mac, GFP_KERNEL); + cfg80211_del_sta(priv->netdev->ieee80211_ptr, deauth_mac, + GFP_KERNEL); if (priv->ap_11n_enabled) { mwifiex_11n_del_rx_reorder_tbl_by_ta(priv, deauth_mac); diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index 1561a601c7f2..21ef341e002b 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -733,9 +733,10 @@ static int set_default_mgmt_key(struct wiphy *wiphy, struct net_device *netdev, return wilc_set_default_mgmt_key_index(vif, key_index); } -static int get_station(struct wiphy *wiphy, struct net_device *dev, +static int get_station(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac, struct station_info *sinfo) { + struct net_device *dev = wdev->netdev; struct wilc_vif *vif = netdev_priv(dev); struct wilc_priv *priv = &vif->priv; struct wilc *wilc = vif->wilc; @@ -1312,10 +1313,10 @@ static int set_cqm_rssi_config(struct wiphy *wiphy, struct net_device *dev, return 0; } -static int dump_station(struct wiphy *wiphy, struct net_device *dev, +static int dump_station(struct wiphy *wiphy, struct wireless_dev *wdev, int idx, u8 *mac, struct station_info *sinfo) { - struct wilc_vif *vif = netdev_priv(dev); + struct wilc_vif *vif = netdev_priv(wdev->netdev); int ret; if (idx != 0) @@ -1450,11 +1451,11 @@ static int stop_ap(struct wiphy *wiphy, struct net_device *dev, return ret; } -static int add_station(struct wiphy *wiphy, struct net_device *dev, +static int add_station(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac, struct station_parameters *params) { int ret = 0; - struct wilc_vif *vif = netdev_priv(dev); + struct wilc_vif *vif = netdev_priv(wdev->netdev); struct wilc_priv *priv = &vif->priv; if (vif->iftype == WILC_AP_MODE || vif->iftype == WILC_GO_MODE) { @@ -1463,18 +1464,18 @@ static int add_station(struct wiphy *wiphy, struct net_device *dev, ret = wilc_add_station(vif, mac, params); if (ret) - netdev_err(dev, "Host add station fail\n"); + netdev_err(wdev->netdev, "Host add station fail\n"); } return ret; } -static int del_station(struct wiphy *wiphy, struct net_device *dev, +static int del_station(struct wiphy *wiphy, struct wireless_dev *wdev, struct station_del_parameters *params) { const u8 *mac = params->mac; int ret = 0; - struct wilc_vif *vif = netdev_priv(dev); + struct wilc_vif *vif = netdev_priv(wdev->netdev); struct wilc_priv *priv = &vif->priv; struct sta_info *info; @@ -1488,20 +1489,19 @@ static int del_station(struct wiphy *wiphy, struct net_device *dev, ret = wilc_del_station(vif, mac); if (ret) - netdev_err(dev, "Host delete station fail\n"); + netdev_err(wdev->netdev, "Host delete station fail\n"); return ret; } -static int change_station(struct wiphy *wiphy, struct net_device *dev, +static int change_station(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac, struct station_parameters *params) { int ret = 0; - struct wilc_vif *vif = netdev_priv(dev); - + struct wilc_vif *vif = netdev_priv(wdev->netdev); if (vif->iftype == WILC_AP_MODE || vif->iftype == WILC_GO_MODE) { ret = wilc_edit_station(vif, mac, params); if (ret) - netdev_err(dev, "Host edit station fail\n"); + netdev_err(wdev->netdev, "Host edit station fail\n"); } return ret; } diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index f1188368e66b..340240847a2f 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -483,26 +483,26 @@ qtnf_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, } static int -qtnf_get_station(struct wiphy *wiphy, struct net_device *dev, +qtnf_get_station(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac, struct station_info *sinfo) { - struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); + struct qtnf_vif *vif = qtnf_netdev_get_priv(wdev->netdev); sinfo->generation = vif->generation; return qtnf_cmd_get_sta_info(vif, mac, sinfo); } static int -qtnf_dump_station(struct wiphy *wiphy, struct net_device *dev, +qtnf_dump_station(struct wiphy *wiphy, struct wireless_dev *wdev, int idx, u8 *mac, struct station_info *sinfo) { - struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); + struct qtnf_vif *vif = qtnf_netdev_get_priv(wdev->netdev); const struct qtnf_sta_node *sta_node; int ret; - switch (vif->wdev.iftype) { + switch (wdev->iftype) { case NL80211_IFTYPE_STATION: - if (idx != 0 || !vif->wdev.connected) + if (idx != 0 || !wdev->connected) return -ENOENT; ether_addr_copy(mac, vif->bssid); @@ -520,9 +520,9 @@ qtnf_dump_station(struct wiphy *wiphy, struct net_device *dev, ret = qtnf_cmd_get_sta_info(vif, mac, sinfo); - if (vif->wdev.iftype == NL80211_IFTYPE_AP) { + if (wdev->iftype == NL80211_IFTYPE_AP) { if (ret == -ENOENT) { - cfg80211_del_sta(vif->netdev, mac, GFP_KERNEL); + cfg80211_del_sta(&vif->wdev, mac, GFP_KERNEL); sinfo->filled = 0; } } @@ -602,10 +602,10 @@ qtnf_set_default_mgmt_key(struct wiphy *wiphy, struct net_device *dev, } static int -qtnf_change_station(struct wiphy *wiphy, struct net_device *dev, +qtnf_change_station(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac, struct station_parameters *params) { - struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); + struct qtnf_vif *vif = qtnf_netdev_get_priv(wdev->netdev); int ret; ret = qtnf_cmd_send_change_sta(vif, mac, params); @@ -617,14 +617,14 @@ qtnf_change_station(struct wiphy *wiphy, struct net_device *dev, } static int -qtnf_del_station(struct wiphy *wiphy, struct net_device *dev, +qtnf_del_station(struct wiphy *wiphy, struct wireless_dev *wdev, struct station_del_parameters *params) { - struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); + struct qtnf_vif *vif = qtnf_netdev_get_priv(wdev->netdev); int ret; if (params->mac && - (vif->wdev.iftype == NL80211_IFTYPE_AP) && + (wdev->iftype == NL80211_IFTYPE_AP) && !is_broadcast_ether_addr(params->mac) && !qtnf_sta_list_lookup(&vif->sta_list, params->mac)) return 0; diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c index 2551d74ed56e..fb5a56b6dd05 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/event.c +++ b/drivers/net/wireless/quantenna/qtnfmac/event.c @@ -90,8 +90,8 @@ qtnf_event_handle_sta_assoc(struct qtnf_wmac *mac, struct qtnf_vif *vif, goto out; } - cfg80211_new_sta(vif->netdev, sta_assoc->sta_addr, sinfo, - GFP_KERNEL); + cfg80211_new_sta(vif->netdev->ieee80211_ptr, sta_assoc->sta_addr, + sinfo, GFP_KERNEL); out: kfree(sinfo); @@ -126,7 +126,7 @@ qtnf_event_handle_sta_deauth(struct qtnf_wmac *mac, struct qtnf_vif *vif, sta_addr, reason); if (qtnf_sta_list_del(vif, sta_addr)) - cfg80211_del_sta(vif->netdev, sta_deauth->sta_addr, + cfg80211_del_sta(&vif->wdev, sta_deauth->sta_addr, GFP_KERNEL); return 0; diff --git a/drivers/net/wireless/virtual/virt_wifi.c b/drivers/net/wireless/virtual/virt_wifi.c index 885dc7243e8d..bc349c763578 100644 --- a/drivers/net/wireless/virtual/virt_wifi.c +++ b/drivers/net/wireless/virtual/virt_wifi.c @@ -320,9 +320,11 @@ static int virt_wifi_disconnect(struct wiphy *wiphy, struct net_device *netdev, } /* Called with the rtnl lock held. */ -static int virt_wifi_get_station(struct wiphy *wiphy, struct net_device *dev, - const u8 *mac, struct station_info *sinfo) +static int virt_wifi_get_station(struct wiphy *wiphy, + struct wireless_dev *wdev, const u8 *mac, + struct station_info *sinfo) { + struct net_device *dev = wdev->netdev; struct virt_wifi_netdev_priv *priv = netdev_priv(dev); wiphy_debug(wiphy, "get_station\n"); @@ -345,10 +347,10 @@ static int virt_wifi_get_station(struct wiphy *wiphy, struct net_device *dev, } /* Called with the rtnl lock held. */ -static int virt_wifi_dump_station(struct wiphy *wiphy, struct net_device *dev, +static int virt_wifi_dump_station(struct wiphy *wiphy, struct wireless_dev *wdev, int idx, u8 *mac, struct station_info *sinfo) { - struct virt_wifi_netdev_priv *priv = netdev_priv(dev); + struct virt_wifi_netdev_priv *priv = netdev_priv(wdev->netdev); wiphy_debug(wiphy, "dump_station\n"); @@ -356,7 +358,7 @@ static int virt_wifi_dump_station(struct wiphy *wiphy, struct net_device *dev, return -ENOENT; ether_addr_copy(mac, fake_router_bssid); - return virt_wifi_get_station(wiphy, dev, fake_router_bssid, sinfo); + return virt_wifi_get_station(wiphy, wdev, fake_router_bssid, sinfo); } static const struct cfg80211_ops virt_wifi_cfg80211_ops = { diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c index 7cb0c6f22bf3..83422c5c8c44 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c @@ -960,11 +960,12 @@ static int cfg80211_rtw_set_default_key(struct wiphy *wiphy, } static int cfg80211_rtw_get_station(struct wiphy *wiphy, - struct net_device *ndev, + struct wireless_dev *wdev, const u8 *mac, struct station_info *sinfo) { int ret = 0; + struct net_device *ndev = wdev_to_ndev(wdev); struct adapter *padapter = rtw_netdev_priv(ndev); struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct sta_info *psta = NULL; @@ -1912,7 +1913,7 @@ static int cfg80211_rtw_flush_pmksa(struct wiphy *wiphy, void rtw_cfg80211_indicate_sta_assoc(struct adapter *padapter, u8 *pmgmt_frame, uint frame_len) { - struct net_device *ndev = padapter->pnetdev; + struct wireless_dev *wdev = padapter->rtw_wdev; { struct station_info sinfo = {}; @@ -1926,15 +1927,15 @@ void rtw_cfg80211_indicate_sta_assoc(struct adapter *padapter, u8 *pmgmt_frame, sinfo.filled = 0; sinfo.assoc_req_ies = pmgmt_frame + WLAN_HDR_A3_LEN + ie_offset; sinfo.assoc_req_ies_len = frame_len - WLAN_HDR_A3_LEN - ie_offset; - cfg80211_new_sta(ndev, GetAddr2Ptr(pmgmt_frame), &sinfo, GFP_ATOMIC); + cfg80211_new_sta(wdev, GetAddr2Ptr(pmgmt_frame), &sinfo, GFP_ATOMIC); } } void rtw_cfg80211_indicate_sta_disassoc(struct adapter *padapter, unsigned char *da, unsigned short reason) { - struct net_device *ndev = padapter->pnetdev; + struct wireless_dev *wdev = padapter->rtw_wdev; - cfg80211_del_sta(ndev, da, GFP_ATOMIC); + cfg80211_del_sta(wdev, da, GFP_ATOMIC); } static u8 rtw_get_chan_type(struct adapter *adapter) @@ -2323,21 +2324,22 @@ static int cfg80211_rtw_stop_ap(struct wiphy *wiphy, struct net_device *ndev, } static int cfg80211_rtw_add_station(struct wiphy *wiphy, - struct net_device *ndev, + struct wireless_dev *wdev, const u8 *mac, struct station_parameters *params) { return 0; } -static int cfg80211_rtw_del_station(struct wiphy *wiphy, struct net_device *ndev, +static int cfg80211_rtw_del_station(struct wiphy *wiphy, + struct wireless_dev *wdev, struct station_del_parameters *params) { int ret = 0; struct list_head *phead, *plist, *tmp; u8 updated = false; struct sta_info *psta = NULL; - struct adapter *padapter = rtw_netdev_priv(ndev); + struct adapter *padapter = rtw_netdev_priv(wdev->netdev); struct mlme_priv *pmlmepriv = &(padapter->mlmepriv); struct sta_priv *pstapriv = &padapter->stapriv; const u8 *mac = params->mac; @@ -2388,7 +2390,7 @@ static int cfg80211_rtw_del_station(struct wiphy *wiphy, struct net_device *ndev } static int cfg80211_rtw_change_station(struct wiphy *wiphy, - struct net_device *ndev, + struct wireless_dev *wdev, const u8 *mac, struct station_parameters *params) { @@ -2416,12 +2418,12 @@ static struct sta_info *rtw_sta_info_get_by_idx(const int idx, struct sta_priv * } static int cfg80211_rtw_dump_station(struct wiphy *wiphy, - struct net_device *ndev, + struct wireless_dev *wdev, int idx, u8 *mac, struct station_info *sinfo) { int ret = 0; - struct adapter *padapter = rtw_netdev_priv(ndev); + struct adapter *padapter = rtw_netdev_priv(wdev_to_ndev(wdev)); struct sta_info *psta = NULL; struct sta_priv *pstapriv = &padapter->stapriv; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index cea77bf90cfe..c21354647da0 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4954,17 +4954,17 @@ struct cfg80211_ops { unsigned int link_id); - int (*add_station)(struct wiphy *wiphy, struct net_device *dev, + int (*add_station)(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac, struct station_parameters *params); - int (*del_station)(struct wiphy *wiphy, struct net_device *dev, + int (*del_station)(struct wiphy *wiphy, struct wireless_dev *wdev, struct station_del_parameters *params); - int (*change_station)(struct wiphy *wiphy, struct net_device *dev, + int (*change_station)(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac, struct station_parameters *params); - int (*get_station)(struct wiphy *wiphy, struct net_device *dev, + int (*get_station)(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac, struct station_info *sinfo); - int (*dump_station)(struct wiphy *wiphy, struct net_device *dev, + int (*dump_station)(struct wiphy *wiphy, struct wireless_dev *wdev, int idx, u8 *mac, struct station_info *sinfo); int (*add_mpath)(struct wiphy *wiphy, struct net_device *dev, @@ -8965,35 +8965,35 @@ static inline void cfg80211_sinfo_release_content(struct station_info *sinfo) /** * cfg80211_new_sta - notify userspace about station * - * @dev: the netdev + * @wdev: the wireless device * @mac_addr: the station's address * @sinfo: the station information * @gfp: allocation flags */ -void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, +void cfg80211_new_sta(struct wireless_dev *wdev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp); /** * cfg80211_del_sta_sinfo - notify userspace about deletion of a station - * @dev: the netdev + * @wdev: the wireless device * @mac_addr: the station's address. For MLD station, MLD address is used. * @sinfo: the station information/statistics * @gfp: allocation flags */ -void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr, +void cfg80211_del_sta_sinfo(struct wireless_dev *wdev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp); /** * cfg80211_del_sta - notify userspace about deletion of a station * - * @dev: the netdev + * @wdev: the wireless device * @mac_addr: the station's address. For MLD station, MLD address is used. * @gfp: allocation flags */ -static inline void cfg80211_del_sta(struct net_device *dev, +static inline void cfg80211_del_sta(struct wireless_dev *wdev, const u8 *mac_addr, gfp_t gfp) { - cfg80211_del_sta_sinfo(dev, mac_addr, NULL, gfp); + cfg80211_del_sta_sinfo(wdev, mac_addr, NULL, gfp); } /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index df5153772592..aa3b86644e8f 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1000,10 +1000,10 @@ void sta_set_rate_info_tx(struct sta_info *sta, rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; } -static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, +static int ieee80211_dump_station(struct wiphy *wiphy, struct wireless_dev *wdev, int idx, u8 *mac, struct station_info *sinfo) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = sdata->local; struct sta_info *sta; int ret = -ENOENT; @@ -1035,10 +1035,11 @@ static int ieee80211_dump_survey(struct wiphy *wiphy, struct net_device *dev, return drv_get_survey(local, idx, survey); } -static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, +static int ieee80211_get_station(struct wiphy *wiphy, + struct wireless_dev *wdev, const u8 *mac, struct station_info *sinfo) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = sdata->local; struct sta_info *sta; int ret = -ENOENT; @@ -2363,7 +2364,7 @@ static int sta_apply_parameters(struct ieee80211_local *local, return 0; } -static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, +static int ieee80211_add_station(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac, struct station_parameters *params) { @@ -2381,7 +2382,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, sdata->vif.type != NL80211_IFTYPE_AP) return -EINVAL; } else - sdata = IEEE80211_DEV_TO_SUB_IF(dev); + sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (ether_addr_equal(mac, sdata->vif.addr)) return -EINVAL; @@ -2435,12 +2436,12 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, return sta_info_insert(sta); } -static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, +static int ieee80211_del_station(struct wiphy *wiphy, struct wireless_dev *wdev, struct station_del_parameters *params) { struct ieee80211_sub_if_data *sdata; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); + sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (params->mac) return sta_info_destroy_addr_bss(sdata, params->mac); @@ -2450,10 +2451,10 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_change_station(struct wiphy *wiphy, - struct net_device *dev, const u8 *mac, + struct wireless_dev *wdev, const u8 *mac, struct station_parameters *params) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; struct ieee80211_sub_if_data *vlansdata; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 6dc22f1593be..4259e9c13ed7 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -974,7 +974,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) } sinfo->generation = local->sta_generation; - cfg80211_new_sta(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL); + cfg80211_new_sta(&sdata->wdev, sta->sta.addr, sinfo, GFP_KERNEL); kfree(sinfo); sta_dbg(sdata, "Inserted STA %pM\n", sta->sta.addr); @@ -1557,7 +1557,7 @@ static void __sta_info_destroy_part2(struct sta_info *sta, bool recalc) sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr); - cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL); + cfg80211_del_sta_sinfo(&sdata->wdev, sta->sta.addr, sinfo, GFP_KERNEL); kfree(sinfo); ieee80211_sta_debugfs_remove(sta); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 3486475dc119..f54b3cca6975 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7507,7 +7507,7 @@ nla_put_failure: static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, - struct net_device *dev, + struct wireless_dev *wdev, const u8 *mac_addr, struct station_info *sinfo, bool link_stats) { @@ -7523,7 +7523,10 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, return -1; } - if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || + if ((wdev->netdev && + nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr) || nla_put_u32(msg, NL80211_ATTR_GENERATION, sinfo->generation)) goto nla_put_failure; @@ -8002,7 +8005,7 @@ static int nl80211_dump_station(struct sk_buff *skb, sinfo_alloc = true; } - err = rdev_dump_station(rdev, wdev->netdev, sta_idx, + err = rdev_dump_station(rdev, wdev, sta_idx, mac_addr, &sinfo); if (err == -ENOENT) break; @@ -8020,7 +8023,7 @@ static int nl80211_dump_station(struct sk_buff *skb, if (nl80211_send_station(skb, NL80211_CMD_NEW_STATION, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - rdev, wdev->netdev, mac_addr, + rdev, wdev, mac_addr, &sinfo, false) < 0) goto out; @@ -8041,7 +8044,7 @@ static int nl80211_dump_station(struct sk_buff *skb, static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; struct station_info sinfo; struct sk_buff *msg; u8 *mac_addr = NULL; @@ -8049,6 +8052,9 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) memset(&sinfo, 0, sizeof(sinfo)); + if (!wdev->netdev) + return -EINVAL; + if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; @@ -8065,7 +8071,7 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) } } - err = rdev_get_station(rdev, dev, mac_addr, &sinfo); + err = rdev_get_station(rdev, wdev, mac_addr, &sinfo); if (err) { cfg80211_sinfo_release_content(&sinfo); return err; @@ -8082,7 +8088,7 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) if (nl80211_send_station(msg, NL80211_CMD_NEW_STATION, info->snd_portid, info->snd_seq, 0, - rdev, dev, mac_addr, &sinfo, false) < 0) { + rdev, wdev, mac_addr, &sinfo, false) < 0) { nlmsg_free(msg); return -ENOBUFS; } @@ -8444,13 +8450,17 @@ static int nl80211_parse_sta_txpower_setting(struct genl_info *info, static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; + struct net_device *dev = wdev->netdev; struct station_parameters params; u8 *mac_addr; int err; memset(¶ms, 0, sizeof(params)); + if (!dev) + return -EINVAL; + if (!rdev->ops->change_station) return -EOPNOTSUPP; @@ -8523,7 +8533,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) nla_len(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); } - if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) + if (parse_station_flags(info, wdev->iftype, ¶ms)) return -EINVAL; if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) @@ -8583,7 +8593,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(params.vlan)) return PTR_ERR(params.vlan); - switch (dev->ieee80211_ptr->iftype) { + switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: @@ -8598,7 +8608,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) } /* driver will call cfg80211_check_station_change() */ - err = rdev_change_station(rdev, dev, mac_addr, ¶ms); + err = rdev_change_station(rdev, wdev, mac_addr, ¶ms); out_put_vlan: dev_put(params.vlan); @@ -8610,8 +8620,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int err; - struct net_device *dev = info->user_ptr[1]; - struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wireless_dev *wdev = info->user_ptr[1]; + struct net_device *dev = wdev->netdev; struct station_parameters params; u8 *mac_addr = NULL; u32 auth_assoc = BIT(NL80211_STA_FLAG_AUTHENTICATED) | @@ -8619,6 +8629,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) memset(¶ms, 0, sizeof(params)); + if (!dev) + return -EINVAL; + if (!rdev->ops->add_station) return -EOPNOTSUPP; @@ -8668,7 +8681,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) * and is NOT supported for AP interface */ params.support_p2p_ps = - dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_GO; + wdev->iftype == NL80211_IFTYPE_P2P_GO; } if (info->attrs[NL80211_ATTR_PEER_AID]) @@ -8774,7 +8787,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (err) return err; - if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) + if (parse_station_flags(info, wdev->iftype, ¶ms)) return -EINVAL; /* HT/VHT requires QoS, but if we don't have that just ignore HT/VHT @@ -8802,7 +8815,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) /* When you run into this, adjust the code below for the new flag */ BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 8); - switch (dev->ieee80211_ptr->iftype) { + switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: @@ -8911,7 +8924,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.epp_peer = nla_get_flag(info->attrs[NL80211_ATTR_EPP_PEER]); - err = rdev_add_station(rdev, dev, mac_addr, ¶ms); + err = rdev_add_station(rdev, wdev, mac_addr, ¶ms); out: dev_put(params.vlan); return err; @@ -8920,13 +8933,16 @@ out: static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; - struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wireless_dev *wdev = info->user_ptr[1]; + struct net_device *dev = wdev->netdev; struct station_del_parameters params; int link_id = nl80211_link_id_or_invalid(info->attrs); memset(¶ms, 0, sizeof(params)); + if (!dev) + return -EINVAL; + if (info->attrs[NL80211_ATTR_MAC]) params.mac = nla_data(info->attrs[NL80211_ATTR_MAC]); @@ -8982,7 +8998,7 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) params.link_id = link_id; - return rdev_del_station(rdev, dev, ¶ms); + return rdev_del_station(rdev, wdev, ¶ms); } static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq, @@ -14241,7 +14257,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, mac_addr = wdev->links[0].client.current_bss->pub.bssid; - err = rdev_get_station(rdev, dev, mac_addr, &sinfo); + err = rdev_get_station(rdev, wdev, mac_addr, &sinfo); if (err) return err; @@ -17342,7 +17358,7 @@ static int nl80211_probe_mesh_link(struct sk_buff *skb, struct genl_info *info) !ether_addr_equal(buf + ETH_ALEN, dev->dev_addr)) return -EINVAL; - err = rdev_get_station(rdev, dev, dest, &sinfo); + err = rdev_get_station(rdev, wdev, dest, &sinfo); if (err) return err; @@ -18424,21 +18440,21 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_station, .dumpit = nl80211_dump_station, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV), }, { .cmd = NL80211_CMD_SET_STATION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_station, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_NEW_STATION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_station, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_DEL_STATION, @@ -18449,7 +18465,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { * whether MAC address is passed or not. If MAC address is * passed, then even during MLO, link ID is not required. */ - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_GET_MPATH, @@ -20380,21 +20396,21 @@ void cfg80211_tx_mgmt_expired(struct wireless_dev *wdev, u64 cookie, } EXPORT_SYMBOL(cfg80211_tx_mgmt_expired); -void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, +void cfg80211_new_sta(struct wireless_dev *wdev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp) { - struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; - trace_cfg80211_new_sta(dev, mac_addr, sinfo); + trace_cfg80211_new_sta(wdev, mac_addr, sinfo); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; if (nl80211_send_station(msg, NL80211_CMD_NEW_STATION, 0, 0, 0, - rdev, dev, mac_addr, sinfo, false) < 0) { + rdev, wdev, mac_addr, sinfo, false) < 0) { nlmsg_free(msg); return; } @@ -20404,10 +20420,10 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, } EXPORT_SYMBOL(cfg80211_new_sta); -void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr, +void cfg80211_del_sta_sinfo(struct wireless_dev *wdev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp) { - struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; struct station_info empty_sinfo = {}; @@ -20415,7 +20431,7 @@ void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr, if (!sinfo) sinfo = &empty_sinfo; - trace_cfg80211_del_sta(dev, mac_addr); + trace_cfg80211_del_sta(wdev, mac_addr); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) { @@ -20424,7 +20440,7 @@ void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr, } if (nl80211_send_station(msg, NL80211_CMD_DEL_STATION, 0, 0, 0, - rdev, dev, mac_addr, sinfo, false) < 0) { + rdev, wdev, mac_addr, sinfo, false) < 0) { nlmsg_free(msg); return; } diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index ac6884bacf3f..a8f1e7ddc0c0 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -193,56 +193,56 @@ static inline int rdev_stop_ap(struct cfg80211_registered_device *rdev, } static inline int rdev_add_station(struct cfg80211_registered_device *rdev, - struct net_device *dev, u8 *mac, + struct wireless_dev *wdev, u8 *mac, struct station_parameters *params) { int ret; - trace_rdev_add_station(&rdev->wiphy, dev, mac, params); - ret = rdev->ops->add_station(&rdev->wiphy, dev, mac, params); + trace_rdev_add_station(&rdev->wiphy, wdev, mac, params); + ret = rdev->ops->add_station(&rdev->wiphy, wdev, mac, params); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } static inline int rdev_del_station(struct cfg80211_registered_device *rdev, - struct net_device *dev, + struct wireless_dev *wdev, struct station_del_parameters *params) { int ret; - trace_rdev_del_station(&rdev->wiphy, dev, params); - ret = rdev->ops->del_station(&rdev->wiphy, dev, params); + trace_rdev_del_station(&rdev->wiphy, wdev, params); + ret = rdev->ops->del_station(&rdev->wiphy, wdev, params); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } static inline int rdev_change_station(struct cfg80211_registered_device *rdev, - struct net_device *dev, u8 *mac, + struct wireless_dev *wdev, u8 *mac, struct station_parameters *params) { int ret; - trace_rdev_change_station(&rdev->wiphy, dev, mac, params); - ret = rdev->ops->change_station(&rdev->wiphy, dev, mac, params); + trace_rdev_change_station(&rdev->wiphy, wdev, mac, params); + ret = rdev->ops->change_station(&rdev->wiphy, wdev, mac, params); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } static inline int rdev_get_station(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *mac, + struct wireless_dev *wdev, const u8 *mac, struct station_info *sinfo) { int ret; - trace_rdev_get_station(&rdev->wiphy, dev, mac); - ret = rdev->ops->get_station(&rdev->wiphy, dev, mac, sinfo); + trace_rdev_get_station(&rdev->wiphy, wdev, mac); + ret = rdev->ops->get_station(&rdev->wiphy, wdev, mac, sinfo); trace_rdev_return_int_station_info(&rdev->wiphy, ret, sinfo); return ret; } static inline int rdev_dump_station(struct cfg80211_registered_device *rdev, - struct net_device *dev, int idx, u8 *mac, + struct wireless_dev *wdev, int idx, u8 *mac, struct station_info *sinfo) { int ret; - trace_rdev_dump_station(&rdev->wiphy, dev, idx, mac); - ret = rdev->ops->dump_station(&rdev->wiphy, dev, idx, mac, sinfo); + trace_rdev_dump_station(&rdev->wiphy, wdev, idx, mac); + ret = rdev->ops->dump_station(&rdev->wiphy, wdev, idx, mac, sinfo); trace_rdev_return_int_station_info(&rdev->wiphy, ret, sinfo); return ret; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 352a57d8b968..8ab78a899f57 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -856,12 +856,12 @@ TRACE_EVENT(rdev_end_cac, ); DECLARE_EVENT_CLASS(station_add_change, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *mac, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u8 *mac, struct station_parameters *params), - TP_ARGS(wiphy, netdev, mac, params), + TP_ARGS(wiphy, wdev, mac, params), TP_STRUCT__entry( WIPHY_ENTRY - NETDEV_ENTRY + WDEV_ENTRY MAC_ENTRY(sta_mac) __field(u32, sta_flags_mask) __field(u32, sta_flags_set) @@ -888,7 +888,7 @@ DECLARE_EVENT_CLASS(station_add_change, ), TP_fast_assign( WIPHY_ASSIGN; - NETDEV_ASSIGN; + WDEV_ASSIGN; MAC_ASSIGN(sta_mac, mac); __entry->sta_flags_mask = params->sta_flags_mask; __entry->sta_flags_set = params->sta_flags_set; @@ -936,11 +936,11 @@ DECLARE_EVENT_CLASS(station_add_change, __entry->opmode_notif_used = params->link_sta_params.opmode_notif_used; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM" + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", station mac: %pM" ", station flags mask: 0x%x, station flags set: 0x%x, " "station modify mask: 0x%x, listen interval: %d, aid: %u, " "plink action: %u, plink state: %u, uapsd queues: %u, vlan:%s", - WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac, + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->sta_mac, __entry->sta_flags_mask, __entry->sta_flags_set, __entry->sta_modify_mask, __entry->listen_interval, __entry->aid, __entry->plink_action, __entry->plink_state, @@ -948,15 +948,15 @@ DECLARE_EVENT_CLASS(station_add_change, ); DEFINE_EVENT(station_add_change, rdev_add_station, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *mac, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u8 *mac, struct station_parameters *params), - TP_ARGS(wiphy, netdev, mac, params) + TP_ARGS(wiphy, wdev, mac, params) ); DEFINE_EVENT(station_add_change, rdev_change_station, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *mac, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u8 *mac, struct station_parameters *params), - TP_ARGS(wiphy, netdev, mac, params) + TP_ARGS(wiphy, wdev, mac, params) ); DECLARE_EVENT_CLASS(wiphy_netdev_mac_evt, @@ -977,12 +977,12 @@ DECLARE_EVENT_CLASS(wiphy_netdev_mac_evt, ); DECLARE_EVENT_CLASS(station_del, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, struct station_del_parameters *params), - TP_ARGS(wiphy, netdev, params), + TP_ARGS(wiphy, wdev, params), TP_STRUCT__entry( WIPHY_ENTRY - NETDEV_ENTRY + WDEV_ENTRY MAC_ENTRY(sta_mac) __field(u8, subtype) __field(u16, reason_code) @@ -990,28 +990,45 @@ DECLARE_EVENT_CLASS(station_del, ), TP_fast_assign( WIPHY_ASSIGN; - NETDEV_ASSIGN; + WDEV_ASSIGN; MAC_ASSIGN(sta_mac, params->mac); __entry->subtype = params->subtype; __entry->reason_code = params->reason_code; __entry->link_id = params->link_id; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM" + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", station mac: %pM" ", subtype: %u, reason_code: %u, link_id: %d", - WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac, + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->sta_mac, __entry->subtype, __entry->reason_code, __entry->link_id) ); DEFINE_EVENT(station_del, rdev_del_station, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, struct station_del_parameters *params), - TP_ARGS(wiphy, netdev, params) + TP_ARGS(wiphy, wdev, params) ); -DEFINE_EVENT(wiphy_netdev_mac_evt, rdev_get_station, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *mac), - TP_ARGS(wiphy, netdev, mac) +DECLARE_EVENT_CLASS(wiphy_wdev_mac_evt, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac), + TP_ARGS(wiphy, wdev, mac), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + MAC_ENTRY(sta_mac) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + MAC_ASSIGN(sta_mac, mac); + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", mac: %pM", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->sta_mac) +); + +DEFINE_EVENT(wiphy_wdev_mac_evt, rdev_get_station, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac), + TP_ARGS(wiphy, wdev, mac) ); DEFINE_EVENT(wiphy_netdev_mac_evt, rdev_del_mpath, @@ -1020,23 +1037,23 @@ DEFINE_EVENT(wiphy_netdev_mac_evt, rdev_del_mpath, ); TRACE_EVENT(rdev_dump_station, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, int _idx, u8 *mac), - TP_ARGS(wiphy, netdev, _idx, mac), + TP_ARGS(wiphy, wdev, _idx, mac), TP_STRUCT__entry( WIPHY_ENTRY - NETDEV_ENTRY + WDEV_ENTRY MAC_ENTRY(sta_mac) __field(int, idx) ), TP_fast_assign( WIPHY_ASSIGN; - NETDEV_ASSIGN; + WDEV_ASSIGN; MAC_ASSIGN(sta_mac, mac); __entry->idx = _idx; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM, idx: %d", - WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac, + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", station mac: %pM, idx: %d", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->sta_mac, __entry->idx) ); @@ -3153,6 +3170,21 @@ DECLARE_EVENT_CLASS(cfg80211_netdev_mac_evt, NETDEV_PR_ARG, __entry->macaddr) ); +DECLARE_EVENT_CLASS(cfg80211_wdev_mac_evt, + TP_PROTO(struct wireless_dev *wdev, const u8 *macaddr), + TP_ARGS(wdev, macaddr), + TP_STRUCT__entry( + WDEV_ENTRY + MAC_ENTRY(macaddr) + ), + TP_fast_assign( + WDEV_ASSIGN; + MAC_ASSIGN(macaddr, macaddr); + ), + TP_printk(WDEV_PR_FMT ", mac: %pM", + WDEV_PR_ARG, __entry->macaddr) +); + DEFINE_EVENT(cfg80211_netdev_mac_evt, cfg80211_notify_new_peer_candidate, TP_PROTO(struct net_device *netdev, const u8 *macaddr), TP_ARGS(netdev, macaddr) @@ -3342,26 +3374,26 @@ TRACE_EVENT(cfg80211_tx_mgmt_expired, ); TRACE_EVENT(cfg80211_new_sta, - TP_PROTO(struct net_device *netdev, const u8 *mac_addr, + TP_PROTO(struct wireless_dev *wdev, const u8 *mac_addr, struct station_info *sinfo), - TP_ARGS(netdev, mac_addr, sinfo), + TP_ARGS(wdev, mac_addr, sinfo), TP_STRUCT__entry( - NETDEV_ENTRY + WDEV_ENTRY MAC_ENTRY(mac_addr) SINFO_ENTRY ), TP_fast_assign( - NETDEV_ASSIGN; + WDEV_ASSIGN; MAC_ASSIGN(mac_addr, mac_addr); SINFO_ASSIGN; ), - TP_printk(NETDEV_PR_FMT ", %pM", - NETDEV_PR_ARG, __entry->mac_addr) + TP_printk(WDEV_PR_FMT ", %pM", + WDEV_PR_ARG, __entry->mac_addr) ); -DEFINE_EVENT(cfg80211_netdev_mac_evt, cfg80211_del_sta, - TP_PROTO(struct net_device *netdev, const u8 *macaddr), - TP_ARGS(netdev, macaddr) +DEFINE_EVENT(cfg80211_wdev_mac_evt, cfg80211_del_sta, + TP_PROTO(struct wireless_dev *wdev, const u8 *macaddr), + TP_ARGS(wdev, macaddr) ); TRACE_EVENT(cfg80211_rx_mgmt, diff --git a/net/wireless/util.c b/net/wireless/util.c index b78530c3e3f8..702904048d5a 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -2669,7 +2669,7 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, guard(wiphy)(&rdev->wiphy); - return rdev_get_station(rdev, dev, mac_addr, sinfo); + return rdev_get_station(rdev, wdev, mac_addr, sinfo); } EXPORT_SYMBOL(cfg80211_get_station); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 5a70a0120343..98a4f4c7970d 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -7,7 +7,7 @@ * we directly assign the wireless handlers of wireless interfaces. * * Copyright 2008-2009 Johannes Berg - * Copyright (C) 2019-2023 Intel Corporation + * Copyright (C) 2019-2023, 2026 Intel Corporation */ #include @@ -1261,7 +1261,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev, return err; scoped_guard(wiphy, &rdev->wiphy) { - err = rdev_get_station(rdev, dev, addr, &sinfo); + err = rdev_get_station(rdev, wdev, addr, &sinfo); } if (err) return err; @@ -1305,7 +1305,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) memset(&sinfo, 0, sizeof(sinfo)); - ret = rdev_get_station(rdev, dev, bssid, &sinfo); + ret = rdev_get_station(rdev, wdev, bssid, &sinfo); wiphy_unlock(&rdev->wiphy); if (ret) From 9e2f7f4a2c0ac937ab9dbadc9dc5db9f72d83616 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 19 Feb 2026 11:47:14 +0200 Subject: [PATCH 0136/1561] wifi: cfg80211: refactor wiphy_suspend The sequence of operations that needs to be done in wiphy_suspend is identical for the case where there is no wowlan configured, and for the case that it is but the driver refused to do wowlan (by returning 1 from rdev_suspend). The current code duplicates this set of operations for each one of the cases. In particular, next patch will change the locking of cfg80211_leave_all to not hold the wiphy lock, which will be easier to do if it is not called twice. Change the code to handle first the case that wowlan is configured, and then handle both cases (driver refused to do wowlan and no wowlan configured) in one place. Note that this changes the behaviour to set suspended=true also when we were not registered yet, but that makes sense anyway, as wiphy works can be queued also before registration. Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260108102921.00336669ac32.Id76f272662e1315cd93a628808cc2d1625036b00@changeid Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260219094725.3846371-2-miriam.rachel.korenblit@intel.com Signed-off-by: Johannes Berg --- net/wireless/sysfs.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 2e0ea69b9604..3385a27468f7 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -99,26 +99,31 @@ static int wiphy_suspend(struct device *dev) rdev->suspend_at = ktime_get_boottime_seconds(); rtnl_lock(); + if (!rdev->wiphy.registered) + goto out_unlock_rtnl; + wiphy_lock(&rdev->wiphy); - if (rdev->wiphy.registered) { - if (!rdev->wiphy.wowlan_config) { - cfg80211_leave_all(rdev); - cfg80211_process_rdev_events(rdev); - } + if (rdev->wiphy.wowlan_config) { cfg80211_process_wiphy_works(rdev, NULL); if (rdev->ops->suspend) ret = rdev_suspend(rdev, rdev->wiphy.wowlan_config); - if (ret == 1) { - /* Driver refuse to configure wowlan */ - cfg80211_leave_all(rdev); - cfg80211_process_rdev_events(rdev); - cfg80211_process_wiphy_works(rdev, NULL); - ret = rdev_suspend(rdev, NULL); - } - if (ret == 0) - rdev->suspended = true; + if (ret <= 0) + goto out_unlock_wiphy; } + + /* Driver refused to configure wowlan (ret = 1) or no wowlan */ + + cfg80211_leave_all(rdev); + cfg80211_process_rdev_events(rdev); + cfg80211_process_wiphy_works(rdev, NULL); + if (rdev->ops->suspend) + ret = rdev_suspend(rdev, NULL); + +out_unlock_wiphy: wiphy_unlock(&rdev->wiphy); +out_unlock_rtnl: + if (ret == 0) + rdev->suspended = true; rtnl_unlock(); return ret; From a34951ef56b0fc2ce2feff32a1ac71dbd8e3b998 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 19 Feb 2026 11:47:15 +0200 Subject: [PATCH 0137/1561] wifi: nl80211: don't allow DFS channels for NAN NAN cannot use DFS channels. Mark DFS channels as unusable if the chandef is to be used for NAN. Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260108102921.c2a5a0a14b9f.Idca29fb8a235df980e63b733a298fd1f2bdf2f48@changeid Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260219094725.3846371-3-miriam.rachel.korenblit@intel.com Signed-off-by: Johannes Berg --- net/wireless/chan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/chan.c b/net/wireless/chan.c index dfe319565280..d9d4e043bb39 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -781,6 +781,7 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_NAN: width = cfg80211_chandef_get_width(chandef); if (width < 0) return -EINVAL; @@ -795,7 +796,6 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_DEVICE: - case NL80211_IFTYPE_NAN: break; case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_UNSPECIFIED: From 0495b64132154dd04ed5d443bb35afd3769a13a6 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Fri, 20 Feb 2026 01:12:41 +0530 Subject: [PATCH 0138/1561] wifi: mac80211: fetch FILS discovery template by link ID Currently, the FILS discovery template is always fetched from the default link of a virtual interface in both Multi-Link Operation (MLO) and non-MLO cases. However, in the MLO case there is a need to fetch the FILS discovery template from a specific link instead of the default link. Hence, add support for fetching the FILS discovery template based on the link ID from the corresponding link data. Signed-off-by: Sriram R Co-developed-by: Raj Kumar Bhagat Signed-off-by: Raj Kumar Bhagat Link: https://patch.msgid.link/20260220-fils-prob-by-link-v1-1-a2746a853f75@oss.qualcomm.com Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath11k/mac.c | 2 +- drivers/net/wireless/ath/ath12k/mac.c | 3 ++- .../net/wireless/mediatek/mt76/mt7915/mcu.c | 2 +- .../net/wireless/mediatek/mt76/mt7996/mcu.c | 3 ++- include/net/mac80211.h | 4 +++- net/mac80211/tx.c | 20 ++++++++++++------- 6 files changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index e4ee2ba1f669..dda77f87461e 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -3305,7 +3305,7 @@ static int ath11k_mac_fils_discovery(struct ath11k_vif *arvif, if (info->fils_discovery.max_interval) { interval = info->fils_discovery.max_interval; - tmpl = ieee80211_get_fils_discovery_tmpl(ar->hw, arvif->vif); + tmpl = ieee80211_get_fils_discovery_tmpl(ar->hw, arvif->vif, 0); if (tmpl) ret = ath11k_wmi_fils_discovery_tmpl(ar, arvif->vdev_id, tmpl); diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index c6b88909b6b7..af57ac10d517 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4311,7 +4311,8 @@ static int ath12k_mac_fils_discovery(struct ath12k_link_vif *arvif, if (info->fils_discovery.max_interval) { interval = info->fils_discovery.max_interval; - tmpl = ieee80211_get_fils_discovery_tmpl(hw, vif); + tmpl = ieee80211_get_fils_discovery_tmpl(hw, vif, + info->link_id); if (tmpl) ret = ath12k_wmi_fils_discovery_tmpl(ar, arvif->vdev_id, tmpl); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c index 00bff4d3aab8..83ce06857a1e 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c @@ -1977,7 +1977,7 @@ mt7915_mcu_add_inband_discov(struct mt7915_dev *dev, struct ieee80211_vif *vif, if (changed & BSS_CHANGED_FILS_DISCOVERY) { interval = vif->bss_conf.fils_discovery.max_interval; - skb = ieee80211_get_fils_discovery_tmpl(hw, vif); + skb = ieee80211_get_fils_discovery_tmpl(hw, vif, 0); } else if (changed & BSS_CHANGED_UNSOL_BCAST_PROBE_RESP && vif->bss_conf.unsol_bcast_probe_resp_interval) { interval = vif->bss_conf.unsol_bcast_probe_resp_interval; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index c0c042de477b..968afc2967a8 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -2863,7 +2863,8 @@ int mt7996_mcu_beacon_inband_discov(struct mt7996_dev *dev, if (changed & BSS_CHANGED_FILS_DISCOVERY && link_conf->fils_discovery.max_interval) { interval = link_conf->fils_discovery.max_interval; - skb = ieee80211_get_fils_discovery_tmpl(hw, vif); + skb = ieee80211_get_fils_discovery_tmpl(hw, vif, + link_conf->link_id); } else if (changed & BSS_CHANGED_UNSOL_BCAST_PROBE_RESP && link_conf->unsol_bcast_probe_resp_interval) { interval = link_conf->unsol_bcast_probe_resp_interval; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7f9d96939a4e..d36c14a86c8a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7766,13 +7766,15 @@ u32 ieee80211_calc_tx_airtime(struct ieee80211_hw *hw, * ieee80211_get_fils_discovery_tmpl - Get FILS discovery template. * @hw: pointer obtained from ieee80211_alloc_hw(). * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @link_id: valid link_id during MLO or 0 for non-MLO. * * The driver is responsible for freeing the returned skb. * * Return: FILS discovery template. %NULL on error. */ struct sk_buff *ieee80211_get_fils_discovery_tmpl(struct ieee80211_hw *hw, - struct ieee80211_vif *vif); + struct ieee80211_vif *vif, + unsigned int link_id); /** * ieee80211_get_unsol_bcast_probe_resp_tmpl - Get unsolicited broadcast diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8cdbd417d7be..77ad85a49924 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5837,21 +5837,28 @@ out: EXPORT_SYMBOL(ieee80211_proberesp_get); struct sk_buff *ieee80211_get_fils_discovery_tmpl(struct ieee80211_hw *hw, - struct ieee80211_vif *vif) + struct ieee80211_vif *vif, + unsigned int link_id) { struct sk_buff *skb = NULL; struct fils_discovery_data *tmpl = NULL; struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_link_data *link; if (sdata->vif.type != NL80211_IFTYPE_AP) return NULL; - rcu_read_lock(); - tmpl = rcu_dereference(sdata->deflink.u.ap.fils_discovery); - if (!tmpl) { - rcu_read_unlock(); + if (link_id >= IEEE80211_MLD_MAX_NUM_LINKS) + return NULL; + + guard(rcu)(); + link = rcu_dereference(sdata->link[link_id]); + if (!link) + return NULL; + + tmpl = rcu_dereference(link->u.ap.fils_discovery); + if (!tmpl) return NULL; - } skb = dev_alloc_skb(sdata->local->hw.extra_tx_headroom + tmpl->len); if (skb) { @@ -5859,7 +5866,6 @@ struct sk_buff *ieee80211_get_fils_discovery_tmpl(struct ieee80211_hw *hw, skb_put_data(skb, tmpl->data, tmpl->len); } - rcu_read_unlock(); return skb; } EXPORT_SYMBOL(ieee80211_get_fils_discovery_tmpl); From e098c26b3524b6a8087dfc8f664d7cc76d30ecc2 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Fri, 20 Feb 2026 01:12:42 +0530 Subject: [PATCH 0139/1561] wifi: mac80211: fetch unsolicited probe response template by link ID Currently, the unsolicited probe response template is always fetched from the default link of a virtual interface in both Multi-Link Operation (MLO) and non-MLO cases. However, in the MLO case there is a need to fetch the unsolicited probe response template from a specific link instead of the default link. Hence, add support for fetching the unsolicited probe response template based on the link ID from the corresponding link data. Signed-off-by: Sriram R Co-developed-by: Raj Kumar Bhagat Signed-off-by: Raj Kumar Bhagat Link: https://patch.msgid.link/20260220-fils-prob-by-link-v1-2-a2746a853f75@oss.qualcomm.com Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath11k/mac.c | 2 +- drivers/net/wireless/ath/ath12k/mac.c | 3 ++- .../net/wireless/mediatek/mt76/mt7915/mcu.c | 2 +- .../net/wireless/mediatek/mt76/mt7996/mcu.c | 3 ++- include/net/mac80211.h | 4 +++- net/mac80211/tx.c | 20 ++++++++++++------- 6 files changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index dda77f87461e..ca08de6bbe85 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -3314,7 +3314,7 @@ static int ath11k_mac_fils_discovery(struct ath11k_vif *arvif, interval = info->unsol_bcast_probe_resp_interval; tmpl = ieee80211_get_unsol_bcast_probe_resp_tmpl(ar->hw, - arvif->vif); + arvif->vif, 0); if (tmpl) ret = ath11k_wmi_probe_resp_tmpl(ar, arvif->vdev_id, tmpl); diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index af57ac10d517..275263f77f44 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4320,7 +4320,8 @@ static int ath12k_mac_fils_discovery(struct ath12k_link_vif *arvif, unsol_bcast_probe_resp_enabled = 1; interval = info->unsol_bcast_probe_resp_interval; - tmpl = ieee80211_get_unsol_bcast_probe_resp_tmpl(hw, vif); + tmpl = ieee80211_get_unsol_bcast_probe_resp_tmpl(hw, vif, + info->link_id); if (tmpl) ret = ath12k_wmi_probe_resp_tmpl(ar, arvif->vdev_id, tmpl); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c index 83ce06857a1e..2d2f34aa465d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c @@ -1981,7 +1981,7 @@ mt7915_mcu_add_inband_discov(struct mt7915_dev *dev, struct ieee80211_vif *vif, } else if (changed & BSS_CHANGED_UNSOL_BCAST_PROBE_RESP && vif->bss_conf.unsol_bcast_probe_resp_interval) { interval = vif->bss_conf.unsol_bcast_probe_resp_interval; - skb = ieee80211_get_unsol_bcast_probe_resp_tmpl(hw, vif); + skb = ieee80211_get_unsol_bcast_probe_resp_tmpl(hw, vif, 0); } if (!skb) { diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index 968afc2967a8..b4422a4754cd 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -2868,7 +2868,8 @@ int mt7996_mcu_beacon_inband_discov(struct mt7996_dev *dev, } else if (changed & BSS_CHANGED_UNSOL_BCAST_PROBE_RESP && link_conf->unsol_bcast_probe_resp_interval) { interval = link_conf->unsol_bcast_probe_resp_interval; - skb = ieee80211_get_unsol_bcast_probe_resp_tmpl(hw, vif); + skb = ieee80211_get_unsol_bcast_probe_resp_tmpl(hw, vif, + link_conf->link_id); } if (!skb) { diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d36c14a86c8a..89027e94ba5c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7781,6 +7781,7 @@ struct sk_buff *ieee80211_get_fils_discovery_tmpl(struct ieee80211_hw *hw, * probe response template. * @hw: pointer obtained from ieee80211_alloc_hw(). * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @link_id: valid link_id during MLO or 0 for non-MLO. * * The driver is responsible for freeing the returned skb. * @@ -7788,7 +7789,8 @@ struct sk_buff *ieee80211_get_fils_discovery_tmpl(struct ieee80211_hw *hw, */ struct sk_buff * ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw, - struct ieee80211_vif *vif); + struct ieee80211_vif *vif, + unsigned int link_id); /** * ieee80211_obss_color_collision_notify - notify userland about a BSS color diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 77ad85a49924..28dcdd7f0e05 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5872,21 +5872,28 @@ EXPORT_SYMBOL(ieee80211_get_fils_discovery_tmpl); struct sk_buff * ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw, - struct ieee80211_vif *vif) + struct ieee80211_vif *vif, + unsigned int link_id) { struct sk_buff *skb = NULL; struct unsol_bcast_probe_resp_data *tmpl = NULL; struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_link_data *link; if (sdata->vif.type != NL80211_IFTYPE_AP) return NULL; - rcu_read_lock(); - tmpl = rcu_dereference(sdata->deflink.u.ap.unsol_bcast_probe_resp); - if (!tmpl) { - rcu_read_unlock(); + if (link_id >= IEEE80211_MLD_MAX_NUM_LINKS) + return NULL; + + guard(rcu)(); + link = rcu_dereference(sdata->link[link_id]); + if (!link) + return NULL; + + tmpl = rcu_dereference(link->u.ap.unsol_bcast_probe_resp); + if (!tmpl) return NULL; - } skb = dev_alloc_skb(sdata->local->hw.extra_tx_headroom + tmpl->len); if (skb) { @@ -5894,7 +5901,6 @@ ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw, skb_put_data(skb, tmpl->data, tmpl->len); } - rcu_read_unlock(); return skb; } EXPORT_SYMBOL(ieee80211_get_unsol_bcast_probe_resp_tmpl); From 5249fcc0efef8c1cf179485773f104d39b636c82 Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Mon, 23 Feb 2026 13:40:04 -0800 Subject: [PATCH 0140/1561] wifi: rt2x00: use generic nvmem_cell_get The library doesn't necessarily depend on OF. This codepath is used by both soc (OF only) and pci (no such requirement). After this, the only of specific function is of_get_mac_address, which is needed for nvmem. Signed-off-by: Rosen Penev Acked-by: Stanislaw Gruszka Link: https://patch.msgid.link/20260223214004.19960-1-rosenp@gmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/ralink/rt2x00/rt2800lib.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c index ca18a4c7e14a..2e2e382b6247 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c @@ -10965,13 +10965,13 @@ EXPORT_SYMBOL_GPL(rt2800_read_eeprom_efuse); int rt2800_read_eeprom_nvmem(struct rt2x00_dev *rt2x00dev) { - struct device_node *np = rt2x00dev->dev->of_node; + struct device *dev = rt2x00dev->dev; unsigned int len = rt2x00dev->ops->eeprom_size; struct nvmem_cell *cell; const void *data; size_t retlen; - cell = of_nvmem_cell_get(np, "eeprom"); + cell = nvmem_cell_get(dev, "eeprom"); if (IS_ERR(cell)) return PTR_ERR(cell); From a536be923191e2662369ee87e5d7beb50946c71c Mon Sep 17 00:00:00 2001 From: Sai Pratyusha Magam Date: Thu, 26 Feb 2026 09:59:59 +0530 Subject: [PATCH 0141/1561] wifi: mac80211: Fix AAD/Nonce computation for management frames with MLO Per IEEE Std 802.11be-2024, 12.5.2.3.3, if the MPDU is an individually addressed Data frame between an AP MLD and a non-AP MLD associated with the AP MLD, then A1/A2/A3 will be MLD MAC addresses. Otherwise, Al/A2/A3 will be over-the-air link MAC addresses. Currently, during AAD and Nonce computation for software based encryption/decryption cases, mac80211 directly uses the addresses it receives in the skb frame header. However, after the first authentication, management frame addresses for non-AP MLD stations are translated to MLD addresses from over the air link addresses in software. This means that the skb header could contain translated MLD addresses, which when used as is, can lead to incorrect AAD/Nonce computation. In the following manner, ensure that the right set of addresses are used: In the receive path, stash the pre-translated link addresses in ieee80211_rx_data and use them for the AAD/Nonce computations when required. In the transmit path, offload the encryption for a CCMP/GCMP key to the hwsim driver that can then ensure that encryption and hence the AAD/Nonce computations are performed on the frame containing the right set of addresses, i.e, MLD addresses if unicast data frame and link addresses otherwise. To do so, register the set key handler in hwsim driver so mac80211 is aware that it is the driver that would take care of encrypting the frame. Offload encryption for a CCMP/GCMP key, while keeping the encryption for WEP/TKIP and MMIE generation for a AES_CMAC or a AES_GMAC key still at the SW crypto in mac layer Co-developed-by: Rohan Dutta Signed-off-by: Rohan Dutta Signed-off-by: Sai Pratyusha Magam Link: https://patch.msgid.link/20260226042959.3766157-1-sai.magam@oss.qualcomm.com [only store and apply link_addrs for unicast non-data rather storing always and applying for !unicast_data] Signed-off-by: Johannes Berg --- drivers/net/wireless/virtual/mac80211_hwsim.c | 46 +++++++++++++++- include/net/mac80211.h | 7 +++ net/mac80211/ieee80211_i.h | 2 + net/mac80211/rx.c | 5 ++ net/mac80211/tx.c | 32 +++++++++++ net/mac80211/wpa.c | 55 +++++++++++++++---- 6 files changed, 134 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index d2a43eec4641..d8bb34809965 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -1992,6 +1992,25 @@ mac80211_hwsim_select_tx_link(struct mac80211_hwsim_data *data, return NULL; } +static int mac80211_hwsim_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct ieee80211_key_conf *key) +{ + switch (key->cipher) { + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + break; + default: + return 1; + } + + key->flags |= IEEE80211_KEY_FLAG_RESERVE_TAILROOM; + return 0; +} + static void mac80211_hwsim_tx(struct ieee80211_hw *hw, struct ieee80211_tx_control *control, struct sk_buff *skb) @@ -2002,7 +2021,7 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_channel *channel; struct ieee80211_vif *vif = txi->control.vif; - bool ack; + bool ack, unicast_data; enum nl80211_chan_width confbw = NL80211_CHAN_WIDTH_20_NOHT; u32 _portid, i; @@ -2012,6 +2031,16 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, return; } + unicast_data = is_unicast_ether_addr(hdr->addr1) && + ieee80211_is_data(hdr->frame_control); + + if (unicast_data && ieee80211_encrypt_tx_skb(skb) < 0) { + ieee80211_free_txskb(hw, skb); + return; + } + /* re-assign hdr since skb data may have shifted after encryption */ + hdr = (void *)skb->data; + if (vif && vif->type == NL80211_IFTYPE_NAN && !data->tmp_chan) { /* For NAN Device simulation purposes, assume that NAN is always * on channel 6 or channel 149, unless a ROC is in progress (for @@ -2097,6 +2126,13 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, } } + if (!unicast_data && ieee80211_encrypt_tx_skb(skb) < 0) { + ieee80211_free_txskb(hw, skb); + return; + } + /* re-assign hdr since skb data may have shifted after encryption */ + hdr = (void *)skb->data; + if (WARN(!channel, "TX w/o channel - queue = %d\n", txi->hw_queue)) { ieee80211_free_txskb(hw, skb); return; @@ -4245,6 +4281,7 @@ static int mac80211_hwsim_set_radar_background(struct ieee80211_hw *hw, .stop_nan = mac80211_hwsim_stop_nan, \ .nan_change_conf = mac80211_hwsim_change_nan_config, \ .set_radar_background = mac80211_hwsim_set_radar_background, \ + .set_key = mac80211_hwsim_set_key, \ HWSIM_DEBUGFS_OPS #define HWSIM_NON_MLO_OPS \ @@ -5684,6 +5721,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, WIPHY_FLAG_AP_UAPSD | WIPHY_FLAG_SUPPORTS_5_10_MHZ | WIPHY_FLAG_HAS_CHANNEL_SWITCH; + hw->wiphy->flags |= WIPHY_FLAG_IBSS_RSN; hw->wiphy->features |= NL80211_FEATURE_ACTIVE_MONITOR | NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE | NL80211_FEATURE_STATIC_SMPS | @@ -5702,6 +5740,12 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT); wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_BSS_COLOR); + wiphy_ext_feature_set(hw->wiphy, + NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT); + wiphy_ext_feature_set(hw->wiphy, + NL80211_EXT_FEATURE_CAN_REPLACE_PTK0); + wiphy_ext_feature_set(hw->wiphy, + NL80211_EXT_FEATURE_EXT_KEY_ID); hw->wiphy->interface_modes = param->iftypes; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 89027e94ba5c..9f8251fb9832 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7966,4 +7966,11 @@ int ieee80211_emulate_switch_vif_chanctx(struct ieee80211_hw *hw, * Return: %true iff the vif is a NAN interface and NAN is started */ bool ieee80211_vif_nan_started(struct ieee80211_vif *vif); + +/** + * ieee80211_encrypt_tx_skb - Encrypt the transmit skb + * @skb: the skb + * Return: 0 if success and non-zero on error + */ +int ieee80211_encrypt_tx_skb(struct sk_buff *skb); #endif /* MAC80211_H */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e60b814dd89e..a4babf7624e5 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -256,6 +256,8 @@ struct ieee80211_rx_data { u8 pn[IEEE80211_CCMP_PN_LEN]; } ccm_gcm; }; + + u8 link_addrs[3 * ETH_ALEN]; }; struct ieee80211_csa_settings { diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 11d6c56c9d7e..6c4b549444c6 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -5127,6 +5127,11 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, hdr = (struct ieee80211_hdr *)rx->skb->data; } + /* Store a copy of the pre-translated link addresses for SW crypto */ + if (unlikely(is_unicast_ether_addr(hdr->addr1) && + !ieee80211_is_data(hdr->frame_control))) + memcpy(rx->link_addrs, &hdr->addrs, 3 * ETH_ALEN); + if (unlikely(rx->sta && rx->sta->sta.mlo) && is_unicast_ether_addr(hdr->addr1) && !ieee80211_is_probe_resp(hdr->frame_control) && diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 28dcdd7f0e05..dd691ff549c3 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5315,6 +5315,38 @@ static int ieee80211_beacon_protect(struct sk_buff *skb, return 0; } +int ieee80211_encrypt_tx_skb(struct sk_buff *skb) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_sub_if_data *sdata; + struct sk_buff *check_skb; + struct ieee80211_tx_data tx; + ieee80211_tx_result res; + + if (!info->control.hw_key) + return 0; + + memset(&tx, 0, sizeof(tx)); + tx.key = container_of(info->control.hw_key, struct ieee80211_key, conf); + /* NULL it out now so we do full SW crypto */ + info->control.hw_key = NULL; + __skb_queue_head_init(&tx.skbs); + __skb_queue_tail(&tx.skbs, skb); + + sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev); + tx.sdata = sdata; + tx.local = sdata->local; + res = ieee80211_tx_h_encrypt(&tx); + check_skb = __skb_dequeue(&tx.skbs); + /* we may crash after this, but it'd be a bug in crypto */ + WARN_ON(check_skb != skb); + if (WARN_ON_ONCE(res != TX_CONTINUE)) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_GPL(ieee80211_encrypt_tx_skb); + static void ieee80211_beacon_get_finish(struct ieee80211_hw *hw, struct ieee80211_vif *vif, diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index fdf98c21d32c..64a57475ce50 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -315,7 +315,8 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) * Calculate AAD for CCMP/GCMP, returning qos_tid since we * need that in CCMP also for b_0. */ -static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad, bool spp_amsdu) +static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad, bool spp_amsdu, + bool aad_nonce_computed) { struct ieee80211_hdr *hdr = (void *)skb->data; __le16 mask_fc; @@ -358,7 +359,8 @@ static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad, bool spp_amsdu) * FC | A1 | A2 | A3 | SC | [A4] | [QC] */ put_unaligned_be16(len_a, &aad[0]); put_unaligned(mask_fc, (__le16 *)&aad[2]); - memcpy(&aad[4], &hdr->addrs, 3 * ETH_ALEN); + if (!aad_nonce_computed) + memcpy(&aad[4], &hdr->addrs, 3 * ETH_ALEN); /* Mask Seq#, leave Frag# */ aad[22] = *((u8 *) &hdr->seq_ctrl) & 0x0f; @@ -377,10 +379,10 @@ static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad, bool spp_amsdu) } static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad, - bool spp_amsdu) + bool spp_amsdu, bool aad_nonce_computed) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - u8 qos_tid = ccmp_gcmp_aad(skb, aad, spp_amsdu); + u8 qos_tid = ccmp_gcmp_aad(skb, aad, spp_amsdu, aad_nonce_computed); /* In CCM, the initial vectors (IV) used for CTR mode encryption and CBC * mode authentication are not allowed to collide, yet both are derived @@ -395,7 +397,8 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad, * Nonce Flags: Priority (b0..b3) | Management (b4) | Reserved (b5..b7) */ b_0[1] = qos_tid | (ieee80211_is_mgmt(hdr->frame_control) << 4); - memcpy(&b_0[2], hdr->addr2, ETH_ALEN); + if (!aad_nonce_computed) + memcpy(&b_0[2], hdr->addr2, ETH_ALEN); memcpy(&b_0[8], pn, IEEE80211_CCMP_PN_LEN); } @@ -488,7 +491,8 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb, pos += IEEE80211_CCMP_HDR_LEN; ccmp_special_blocks(skb, pn, b_0, aad, - key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU); + key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU, + false); return ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len, skb_put(skb, mic_len)); } @@ -566,9 +570,22 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, if (!(status->flag & RX_FLAG_DECRYPTED)) { u8 aad[2 * AES_BLOCK_SIZE]; u8 b_0[AES_BLOCK_SIZE]; + bool aad_nonce_computed = false; + + if (is_unicast_ether_addr(hdr->addr1) && + !ieee80211_is_data(hdr->frame_control)) { + /* AAD computation */ + memcpy(&aad[4], rx->link_addrs, 3 * ETH_ALEN); + /* Nonce computation */ + ether_addr_copy(&b_0[2], + &rx->link_addrs[ETH_ALEN]); + aad_nonce_computed = true; + } + /* hardware didn't decrypt/verify MIC */ ccmp_special_blocks(skb, pn, b_0, aad, - key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU); + key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU, + aad_nonce_computed); if (ieee80211_aes_ccm_decrypt( key->u.ccmp.tfm, b_0, aad, @@ -593,14 +610,15 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, } static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad, - bool spp_amsdu) + bool spp_amsdu, bool aad_nonce_computed) { struct ieee80211_hdr *hdr = (void *)skb->data; - memcpy(j_0, hdr->addr2, ETH_ALEN); + if (!aad_nonce_computed) + memcpy(j_0, hdr->addr2, ETH_ALEN); memcpy(&j_0[ETH_ALEN], pn, IEEE80211_GCMP_PN_LEN); - ccmp_gcmp_aad(skb, aad, spp_amsdu); + ccmp_gcmp_aad(skb, aad, spp_amsdu, aad_nonce_computed); } static inline void gcmp_pn2hdr(u8 *hdr, const u8 *pn, int key_id) @@ -690,7 +708,8 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) pos += IEEE80211_GCMP_HDR_LEN; gcmp_special_blocks(skb, pn, j_0, aad, - key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU); + key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU, + false); return ieee80211_aes_gcm_encrypt(key->u.gcmp.tfm, j_0, aad, pos, len, skb_put(skb, IEEE80211_GCMP_MIC_LEN)); } @@ -763,9 +782,21 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) if (!(status->flag & RX_FLAG_DECRYPTED)) { u8 aad[2 * AES_BLOCK_SIZE]; u8 j_0[AES_BLOCK_SIZE]; + bool aad_nonce_computed = false; + + if (is_unicast_ether_addr(hdr->addr1) && + !ieee80211_is_data(hdr->frame_control)) { + /* AAD computation */ + memcpy(&aad[4], rx->link_addrs, 3 * ETH_ALEN); + /* Nonce computation */ + ether_addr_copy(&j_0[0], + &rx->link_addrs[ETH_ALEN]); + aad_nonce_computed = true; + } /* hardware didn't decrypt/verify MIC */ gcmp_special_blocks(skb, pn, j_0, aad, - key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU); + key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU, + aad_nonce_computed); if (ieee80211_aes_gcm_decrypt( key->u.gcmp.tfm, j_0, aad, From ae61f43df1a99734ef55d9b2fe54c1feda9bac98 Mon Sep 17 00:00:00 2001 From: Kavita Kavita Date: Fri, 27 Feb 2026 00:25:49 +0530 Subject: [PATCH 0142/1561] wifi: mac80211_hwsim: Advertise support for (Re)Association frame encryption Advertise support for (Re)Association frame encryption in mac80211_hwsim for testing scenarios. Signed-off-by: Kavita Kavita Link: https://patch.msgid.link/20260226185553.1516290-2-kavita.kavita@oss.qualcomm.com Signed-off-by: Johannes Berg --- drivers/net/wireless/virtual/mac80211_hwsim.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index d8bb34809965..c6871c6c771a 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -5746,6 +5746,8 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, NL80211_EXT_FEATURE_CAN_REPLACE_PTK0); wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_EXT_KEY_ID); + wiphy_ext_feature_set(hw->wiphy, + NL80211_EXT_FEATURE_ASSOC_FRAME_ENCRYPTION); hw->wiphy->interface_modes = param->iftypes; From 0e88342dbd0ee8088ca2d2ae8af319d3c2b627a8 Mon Sep 17 00:00:00 2001 From: Kavita Kavita Date: Fri, 27 Feb 2026 00:25:50 +0530 Subject: [PATCH 0143/1561] wifi: mac80211: Advertise EPPKE support based on driver capabilities Advertise support for Enhanced Privacy Protection Key Exchange (EPPKE) authentication protocol in mac80211 when the driver supports (Re)Association frame encryption. Since EPPKE mandates (Re)Association frame encryption. Signed-off-by: Kavita Kavita Link: https://patch.msgid.link/20260226185553.1516290-3-kavita.kavita@oss.qualcomm.com Signed-off-by: Johannes Berg --- net/mac80211/main.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 616f86b1a7e4..246256279249 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1597,6 +1597,15 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->sband_allocated |= BIT(band); } + /* + * mac80211 supports EPPKE, if the driver supports (Re)Association + * frame encryption + */ + if (wiphy_ext_feature_isset(local->hw.wiphy, + NL80211_EXT_FEATURE_ASSOC_FRAME_ENCRYPTION)) + wiphy_ext_feature_set(local->hw.wiphy, + NL80211_EXT_FEATURE_EPPKE); + result = wiphy_register(local->hw.wiphy); if (result < 0) goto fail_wiphy_register; From bd77375097357b46af00db1316ceab5e82ccbc8b Mon Sep 17 00:00:00 2001 From: Kavita Kavita Date: Fri, 27 Feb 2026 00:25:51 +0530 Subject: [PATCH 0144/1561] wifi: cfg80211: add support for IEEE 802.1X Authentication Protocol Add an extended feature flag NL80211_EXT_FEATURE_IEEE8021X_AUTH to allow a driver to indicate support for the IEEE 802.1X authentication protocol in non-AP STA mode, as defined in "IEEE P802.11bi/D4.0, 12.16.5". In case of SME in userspace, the Authentication frame body is prepared in userspace while the driver finalizes the Authentication frame once it receives the required fields and elements. The driver indicates support for IEEE 802.1X authentication using the extended feature flag so that userspace can initiate IEEE 802.1X authentication. When the feature flag is set, process IEEE 802.1X Authentication frames from userspace in non-AP STA mode. If the flag is not set, reject IEEE 802.1X Authentication frames. Define a new authentication type NL80211_AUTHTYPE_IEEE8021X for IEEE 802.1X authentication. Signed-off-by: Kavita Kavita Link: https://patch.msgid.link/20260226185553.1516290-4-kavita.kavita@oss.qualcomm.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + include/uapi/linux/nl80211.h | 9 +++++++++ net/wireless/nl80211.c | 14 ++++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 0aa2fb8f88de..1bf806f85372 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1358,6 +1358,7 @@ struct ieee80211_tdls_data { #define WLAN_AUTH_FILS_SK 4 #define WLAN_AUTH_FILS_SK_PFS 5 #define WLAN_AUTH_FILS_PK 6 +#define WLAN_AUTH_IEEE8021X 8 #define WLAN_AUTH_EPPKE 9 #define WLAN_AUTH_LEAP 128 diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index fe2c8c8d6dd6..0b7a06c2b9f7 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5491,6 +5491,8 @@ enum nl80211_bss_status { * @NL80211_AUTHTYPE_FILS_SK_PFS: Fast Initial Link Setup shared key with PFS * @NL80211_AUTHTYPE_FILS_PK: Fast Initial Link Setup public key * @NL80211_AUTHTYPE_EPPKE: Enhanced Privacy Protection Key Exchange + * @NL80211_AUTHTYPE_IEEE8021X: IEEE 802.1X authentication utilizing + * Authentication frames * @__NL80211_AUTHTYPE_NUM: internal * @NL80211_AUTHTYPE_MAX: maximum valid auth algorithm * @NL80211_AUTHTYPE_AUTOMATIC: determine automatically (if necessary by @@ -5507,6 +5509,7 @@ enum nl80211_auth_type { NL80211_AUTHTYPE_FILS_SK_PFS, NL80211_AUTHTYPE_FILS_PK, NL80211_AUTHTYPE_EPPKE, + NL80211_AUTHTYPE_IEEE8021X, /* keep last */ __NL80211_AUTHTYPE_NUM, @@ -6820,6 +6823,11 @@ enum nl80211_feature_flags { * frames in both non‑AP STA and AP mode as specified in * "IEEE P802.11bi/D3.0, 12.16.6". * + * @NL80211_EXT_FEATURE_IEEE8021X_AUTH: Driver supports IEEE 802.1X + * authentication utilizing Authentication frames with user space SME + * (NL80211_CMD_AUTHENTICATE) in non-AP STA mode, as specified in + * "IEEE P802.11bi/D4.0, 12.16.5". + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -6898,6 +6906,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_BEACON_RATE_EHT, NL80211_EXT_FEATURE_EPPKE, NL80211_EXT_FEATURE_ASSOC_FRAME_ENCRYPTION, + NL80211_EXT_FEATURE_IEEE8021X_AUTH, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f54b3cca6975..de7956dbe0a0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6550,6 +6550,10 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev, NL80211_EXT_FEATURE_EPPKE) && auth_type == NL80211_AUTHTYPE_EPPKE) return false; + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_IEEE8021X_AUTH) && + auth_type == NL80211_AUTHTYPE_IEEE8021X) + return false; return true; case NL80211_CMD_CONNECT: if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) && @@ -6571,6 +6575,10 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev, NL80211_EXT_FEATURE_EPPKE) && auth_type == NL80211_AUTHTYPE_EPPKE) return false; + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_IEEE8021X_AUTH) && + auth_type == NL80211_AUTHTYPE_IEEE8021X) + return false; return true; case NL80211_CMD_START_AP: if (!wiphy_ext_feature_isset(&rdev->wiphy, @@ -12103,7 +12111,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) auth_type == NL80211_AUTHTYPE_FILS_SK || auth_type == NL80211_AUTHTYPE_FILS_SK_PFS || auth_type == NL80211_AUTHTYPE_FILS_PK || - auth_type == NL80211_AUTHTYPE_EPPKE) && + auth_type == NL80211_AUTHTYPE_EPPKE || + auth_type == NL80211_AUTHTYPE_IEEE8021X) && !info->attrs[NL80211_ATTR_AUTH_DATA]) return -EINVAL; @@ -12112,7 +12121,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) auth_type != NL80211_AUTHTYPE_FILS_SK && auth_type != NL80211_AUTHTYPE_FILS_SK_PFS && auth_type != NL80211_AUTHTYPE_FILS_PK && - auth_type != NL80211_AUTHTYPE_EPPKE) + auth_type != NL80211_AUTHTYPE_EPPKE && + auth_type != NL80211_AUTHTYPE_IEEE8021X) return -EINVAL; req.auth_data = nla_data(info->attrs[NL80211_ATTR_AUTH_DATA]); req.auth_data_len = nla_len(info->attrs[NL80211_ATTR_AUTH_DATA]); From 9347878b1513beee1a26bb249f5dc8326d450f75 Mon Sep 17 00:00:00 2001 From: Kavita Kavita Date: Fri, 27 Feb 2026 00:25:52 +0530 Subject: [PATCH 0145/1561] wifi: mac80211: Add support for IEEE 802.1X authentication protocol in non-AP STA mode Add support for the IEEE 802.1X authentication protocol in non-AP STA mode, as specified in "IEEE P802.11bi/D4.0, 12.16.5". IEEE 802.1X authentication involves multiple Authentication frame exchanges, with the non-AP STA and AP alternating transaction sequence numbers. The number of Authentication frame exchanges depends on the EAP method in use. For IEEE 802.1X authentication, process only Authentication frames with the expected transaction sequence number. For IEEE 802.1X Authentication, Table 9-71 specifies that the Encapsulation Length field as specified in Clause 9.4.1.82 shall be present in all IEEE 802.1X Authentication frames. Drop the frame in the mac80211 if the Encapsulation Length field is missing. After receiving the final Authentication frame with status code WLAN_STATUS_8021X_AUTH_SUCCESS from the AP, mac80211 marks the state as authenticated, as it indicates the EAP handshake has completed successfully over the Authentication frames as specified in Clause 12.16.5. In the PMKSA caching case, only two Authentication frames are exchanged if the AP identifies a valid PMKSA, then as specified in Clause 12.16.8.3, the AP shall set the Status Code to WLAN_STATUS_SUCCESS in the final Authentication frame and must not include an encapsulated EAPOL PDU. This frame will be the final Authentication frame from the AP when PMKSA caching is enabled, and mac80211 marks the state as authenticated. In case of authentication success or failure, forward the Authentication frame to userspace(e.g. wpa_supplicant), and let userspace validate the Authentication frame from the AP as per the specification. Signed-off-by: Kavita Kavita Link: https://patch.msgid.link/20260226185553.1516290-5-kavita.kavita@oss.qualcomm.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + net/mac80211/mlme.c | 78 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 73 insertions(+), 6 deletions(-) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 1bf806f85372..3651b2e6c518 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1508,6 +1508,7 @@ enum ieee80211_statuscode { WLAN_STATUS_SAE_PK = 127, WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING = 133, WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED = 134, + WLAN_STATUS_8021X_AUTH_SUCCESS = 153, }; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 810bea1aacc5..7957eacc5ab7 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4920,7 +4920,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - u16 auth_alg, auth_transaction, status_code; + u16 auth_alg, auth_transaction, status_code, encap_len; struct ieee80211_event event = { .type = MLME_EVENT, .u.mlme.data = AUTH_EVENT, @@ -4929,6 +4929,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, .subtype = IEEE80211_STYPE_AUTH, }; bool sae_need_confirm = false; + bool auth_fail = false; lockdep_assert_wiphy(sdata->local->hw.wiphy); @@ -4945,6 +4946,15 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); status_code = le16_to_cpu(mgmt->u.auth.status_code); + /* + * IEEE 802.1X Authentication: + * Header + Authentication Algorithm Number(2 byte) + Authentication + * Transaction Sequence Number(2 byte) + Status Code(2 byte) + + * Encapsulation Length(2 byte). + */ + if (auth_alg == WLAN_AUTH_IEEE8021X && len < 24 + 8) + return; + info.link_id = ifmgd->auth_data->link_id; if (auth_alg != ifmgd->auth_data->algorithm || @@ -4960,7 +4970,24 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, goto notify_driver; } - if (status_code != WLAN_STATUS_SUCCESS) { + switch (auth_alg) { + case WLAN_AUTH_IEEE8021X: + if (status_code != WLAN_STATUS_SUCCESS && + status_code != WLAN_STATUS_8021X_AUTH_SUCCESS) + auth_fail = true; + + if (!auth_fail) { + /* Indicates length of encapsulated EAPOL PDU */ + encap_len = get_unaligned_le16(mgmt->u.auth.variable); + } + break; + default: + if (status_code != WLAN_STATUS_SUCCESS) + auth_fail = true; + break; + } + + if (auth_fail) { cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); if (auth_alg == WLAN_AUTH_SAE && @@ -4997,6 +5024,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, case WLAN_AUTH_FILS_SK_PFS: case WLAN_AUTH_FILS_PK: case WLAN_AUTH_EPPKE: + case WLAN_AUTH_IEEE8021X: break; case WLAN_AUTH_SHARED_KEY: if (ifmgd->auth_data->expected_transaction != 4) { @@ -5017,8 +5045,37 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, if (ifmgd->auth_data->algorithm != WLAN_AUTH_SAE || (auth_transaction == 2 && ifmgd->auth_data->expected_transaction == 2)) { - if (!ieee80211_mark_sta_auth(sdata)) - return; /* ignore frame -- wait for timeout */ + switch (ifmgd->auth_data->algorithm) { + case WLAN_AUTH_IEEE8021X: + /* + * IEEE 802.1X authentication: + * - When the full EAP handshake completes over the + * Authentication process, the responder sets the + * Status Code to WLAN_STATUS_8021X_AUTH_SUCCESS as + * specified in "IEEE P802.11bi/D4.0, 12.16.5". + * + * - In the PMKSA caching case, only two Authentication + * frames are exchanged if the responder (e.g., AP) + * identifies a valid PMKSA, then as specified in + * "IEEE P802.11bi/D4.0, 12.16.8.3", the responder + * shall set the Status Code to SUCCESS in the final + * Authentication frame and must not include an + * encapsulated EAPOL PDU. + * + * Both conditions are treated as successful + * authentication, so mark the state to Authenticated. + */ + if (status_code != WLAN_STATUS_8021X_AUTH_SUCCESS && + !(status_code == WLAN_STATUS_SUCCESS && + encap_len == 0)) + break; + fallthrough; + default: + if (!ieee80211_mark_sta_auth(sdata)) + return; /* ignore frame -- wait for timeout */ + + break; + } } else if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE && auth_transaction == 1) { sae_need_confirm = true; @@ -8460,6 +8517,10 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata) } else if (auth_data->algorithm == WLAN_AUTH_EPPKE) { trans = auth_data->trans; status = auth_data->status; + } else if (auth_data->algorithm == WLAN_AUTH_IEEE8021X) { + trans = auth_data->trans; + status = auth_data->status; + auth_data->expected_transaction = trans + 1; } if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) @@ -9117,7 +9178,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, } if (ifmgd->auth_data && - ifmgd->auth_data->algorithm == WLAN_AUTH_EPPKE) + (ifmgd->auth_data->algorithm == WLAN_AUTH_EPPKE || + ifmgd->auth_data->algorithm == WLAN_AUTH_IEEE8021X)) new_sta->sta.epp_peer = true; new_sta->sta.mlo = mlo; @@ -9377,6 +9439,9 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, case NL80211_AUTHTYPE_EPPKE: auth_alg = WLAN_AUTH_EPPKE; break; + case NL80211_AUTHTYPE_IEEE8021X: + auth_alg = WLAN_AUTH_IEEE8021X; + break; default: return -EOPNOTSUPP; } @@ -9402,7 +9467,8 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, if (req->auth_data_len >= 4) { if (req->auth_type == NL80211_AUTHTYPE_SAE || - req->auth_type == NL80211_AUTHTYPE_EPPKE) { + req->auth_type == NL80211_AUTHTYPE_EPPKE || + req->auth_type == NL80211_AUTHTYPE_IEEE8021X) { __le16 *pos = (__le16 *) req->auth_data; auth_data->trans = le16_to_cpu(pos[0]); From bed80a08ff5e357ea722db10e2b552acf1ff7482 Mon Sep 17 00:00:00 2001 From: Kavita Kavita Date: Fri, 27 Feb 2026 00:25:53 +0530 Subject: [PATCH 0146/1561] wifi: mac80211: Advertise IEEE 802.1X authentication support Advertise support for IEEE 802.1X authentication protocol directly from mac80211, without depending on driver indication of (Re)Association frame encryption capability. As specified in "IEEE P802.11bi/D4.0, clauses 12.16.5 and 12.16.8.2", IEEE 802.1X authentication can operate with or without (Re)Association frame encryption support. Therefore, mac80211 can safely advertise 802.1X support independently of driver capabilities. Signed-off-by: Kavita Kavita Link: https://patch.msgid.link/20260226185553.1516290-6-kavita.kavita@oss.qualcomm.com Signed-off-by: Johannes Berg --- net/mac80211/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 246256279249..b0451f1c8e79 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -915,6 +915,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_TXQS); wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_RRM); + wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_IEEE8021X_AUTH); wiphy->bss_priv_size = sizeof(struct ieee80211_bss); From b8a57b979a7c2069081ed0bf88a727b17d85ac96 Mon Sep 17 00:00:00 2001 From: Kexin Sun Date: Sat, 28 Feb 2026 14:19:44 +0800 Subject: [PATCH 0147/1561] wifi: mac80211: update outdated comment The function ieee80211_start_scan() was refactored and replaced by __ieee80211_start_scan() in commit f3b85252f081 ("mac80211: fix scan races and rework scanning"). Update the comment in ieee80211_tx_h_check_assoc() accordingly. Additionally, remove the broken gmane.org link in the comment. Suggested-by: Lachlan Hodges Suggested-by: Johannes Berg Assisted-by: unnamed:deepseek-v3.2 coccinelle Signed-off-by: Kexin Sun Link: https://patch.msgid.link/20260228061944.887-1-kexinsun@smail.nju.edu.cn Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index dd691ff549c3..3844c7fbb8a8 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -287,10 +287,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) * active scan) are allowed, all other frames should not be * sent and we should not get here, but if we do * nonetheless, drop them to avoid sending them - * off-channel. See the link below and - * ieee80211_start_scan() for more. - * - * http://article.gmane.org/gmane.linux.kernel.wireless.general/30089 + * off-channel. See __ieee80211_start_scan() for more. */ return TX_DROP; From 94d865739249c0b68b0046ea22e55b93fdf420c6 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Mon, 2 Mar 2026 09:11:46 +0200 Subject: [PATCH 0148/1561] wifi: cfg80211: make cluster id an array cfg80211_nan_conf::cluster_id is currently a pointer, but there is no real reason to not have it an array. It makes things easier as there is no need to check the pointer validity each time. If a cluster ID wasn't provided by user space it will be randomized. Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260302091108.2b12e4ccf5bb.Ib16bf5cca55463d4c89e18099cf1dfe4de95d405@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mld/nan.c | 5 ++--- drivers/net/wireless/virtual/mac80211_hwsim.c | 2 +- include/net/cfg80211.h | 3 +-- net/mac80211/cfg.c | 12 ++---------- net/wireless/nl80211.c | 14 +++++++++++--- 5 files changed, 17 insertions(+), 19 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/nan.c b/drivers/net/wireless/intel/iwlwifi/mld/nan.c index 2dbd3d58b0c6..4d8e85f2bd7c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/nan.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/nan.c @@ -54,9 +54,8 @@ static int iwl_mld_nan_config(struct iwl_mld *mld, ether_addr_copy(cmd.nmi_addr, vif->addr); cmd.master_pref = conf->master_pref; - if (conf->cluster_id) - memcpy(cmd.cluster_id, conf->cluster_id + 4, - sizeof(cmd.cluster_id)); + memcpy(cmd.cluster_id, conf->cluster_id + 4, + sizeof(cmd.cluster_id)); cmd.scan_period = conf->scan_period < 255 ? conf->scan_period : 255; cmd.dwell_time = diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index c6871c6c771a..475918ee8132 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -4151,7 +4151,7 @@ static int mac80211_hwsim_start_nan(struct ieee80211_hw *hw, ns_to_ktime(until_dw * NSEC_PER_USEC), HRTIMER_MODE_REL_SOFT); - if (conf->cluster_id && !is_zero_ether_addr(conf->cluster_id) && + if (!is_zero_ether_addr(conf->cluster_id) && is_zero_ether_addr(hwsim_nan_cluster_id)) { memcpy(hwsim_nan_cluster_id, conf->cluster_id, ETH_ALEN); } else if (is_zero_ether_addr(hwsim_nan_cluster_id)) { diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c21354647da0..8a63dea500ad 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4023,7 +4023,6 @@ struct cfg80211_nan_band_config { * (i.e. BIT(NL80211_BAND_2GHZ)). * @cluster_id: cluster ID used for NAN synchronization. This is a MAC address * that can take a value from 50-6F-9A-01-00-00 to 50-6F-9A-01-FF-FF. - * If NULL, the device will pick a random Cluster ID. * @scan_period: period (in seconds) between NAN scans. * @scan_dwell_time: dwell time (in milliseconds) for NAN scans. * @discovery_beacon_interval: interval (in TUs) for discovery beacons. @@ -4039,7 +4038,7 @@ struct cfg80211_nan_band_config { struct cfg80211_nan_conf { u8 master_pref; u8 bands; - const u8 *cluster_id; + u8 cluster_id[ETH_ALEN] __aligned(2); u16 scan_period; u16 scan_dwell_time; u8 discovery_beacon_interval; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index aa3b86644e8f..0c4979526c91 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -330,7 +330,6 @@ static void ieee80211_stop_p2p_device(struct wiphy *wiphy, static void ieee80211_nan_conf_free(struct cfg80211_nan_conf *conf) { - kfree(conf->cluster_id); kfree(conf->extra_nan_attrs); kfree(conf->vendor_elems); memset(conf, 0, sizeof(*conf)); @@ -372,9 +371,6 @@ static int ieee80211_nan_conf_copy(struct cfg80211_nan_conf *dst, memcpy(&dst->band_cfgs, &src->band_cfgs, sizeof(dst->band_cfgs)); - kfree(dst->cluster_id); - dst->cluster_id = NULL; - kfree(dst->extra_nan_attrs); dst->extra_nan_attrs = NULL; dst->extra_nan_attrs_len = 0; @@ -383,12 +379,8 @@ static int ieee80211_nan_conf_copy(struct cfg80211_nan_conf *dst, dst->vendor_elems = NULL; dst->vendor_elems_len = 0; - if (src->cluster_id) { - dst->cluster_id = kmemdup(src->cluster_id, ETH_ALEN, - GFP_KERNEL); - if (!dst->cluster_id) - goto no_mem; - } + if (is_zero_ether_addr(dst->cluster_id)) + ether_addr_copy(dst->cluster_id, src->cluster_id); if (src->extra_nan_attrs && src->extra_nan_attrs_len) { dst->extra_nan_attrs = kmemdup(src->extra_nan_attrs, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index de7956dbe0a0..26cf29c8867b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -15767,9 +15768,16 @@ static int nl80211_parse_nan_conf(struct wiphy *wiphy, return err; changed |= CFG80211_NAN_CONF_CHANGED_CONFIG; - if (attrs[NL80211_NAN_CONF_CLUSTER_ID] && start) - conf->cluster_id = - nla_data(attrs[NL80211_NAN_CONF_CLUSTER_ID]); + if (attrs[NL80211_NAN_CONF_CLUSTER_ID] && start) { + ether_addr_copy(conf->cluster_id, + nla_data(attrs[NL80211_NAN_CONF_CLUSTER_ID])); + } else if (start) { + conf->cluster_id[0] = 0x50; + conf->cluster_id[1] = 0x6f; + conf->cluster_id[2] = 0x9a; + conf->cluster_id[3] = 0x01; + get_random_bytes(&conf->cluster_id[4], 2); + } if (attrs[NL80211_NAN_CONF_EXTRA_ATTRS]) { conf->extra_nan_attrs = From 7c6084d7fa4e61dd7824c34529277a814c7b3836 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Wed, 7 Jan 2026 15:20:02 +0200 Subject: [PATCH 0149/1561] wifi: cfg80211: support key installation on non-netdev wdevs Currently key installation is only supported for netdev. For NAN, support most key operations (except setting default data key) on wdevs instead of netdevs, and adjust all the APIs and tracing to match. Since nothing currently sets NL80211_EXT_FEATURE_SECURE_NAN, this doesn't change anything (P2P Device already isn't allowed.) Signed-off-by: Avraham Stern Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260107150057.69a0cfad95fa.I00efdf3b2c11efab82ef6ece9f393382bcf33ba8@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 16 ++--- drivers/net/wireless/ath/wil6210/cfg80211.c | 13 ++-- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 18 +++--- drivers/net/wireless/marvell/libertas/cfg.c | 6 +- .../net/wireless/marvell/mwifiex/cfg80211.c | 12 ++-- .../wireless/microchip/wilc1000/cfg80211.c | 18 +++--- .../net/wireless/quantenna/qtnfmac/cfg80211.c | 12 ++-- .../staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 9 +-- include/net/cfg80211.h | 10 +-- net/mac80211/cfg.c | 20 +++--- net/wireless/ibss.c | 4 +- net/wireless/nl80211.c | 46 ++++++++------ net/wireless/rdev-ops.h | 32 +++++----- net/wireless/sme.c | 4 +- net/wireless/trace.h | 62 +++++++++---------- net/wireless/util.c | 2 +- net/wireless/wext-compat.c | 6 +- 17 files changed, 148 insertions(+), 142 deletions(-) diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index eecba2201b10..739a24a6ad67 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -1123,13 +1123,13 @@ void ath6kl_cfg80211_ch_switch_notify(struct ath6kl_vif *vif, int freq, wiphy_unlock(vif->ar->wiphy); } -static int ath6kl_cfg80211_add_key(struct wiphy *wiphy, struct net_device *ndev, +static int ath6kl_cfg80211_add_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, struct key_params *params) { - struct ath6kl *ar = ath6kl_priv(ndev); - struct ath6kl_vif *vif = netdev_priv(ndev); + struct ath6kl *ar = ath6kl_priv(wdev->netdev); + struct ath6kl_vif *vif = netdev_priv(wdev->netdev); struct ath6kl_key *key = NULL; int seq_len; u8 key_usage; @@ -1248,12 +1248,12 @@ static int ath6kl_cfg80211_add_key(struct wiphy *wiphy, struct net_device *ndev, (u8 *) mac_addr, SYNC_BOTH_WMIFLAG); } -static int ath6kl_cfg80211_del_key(struct wiphy *wiphy, struct net_device *ndev, +static int ath6kl_cfg80211_del_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr) { - struct ath6kl *ar = ath6kl_priv(ndev); - struct ath6kl_vif *vif = netdev_priv(ndev); + struct ath6kl *ar = ath6kl_priv(wdev->netdev); + struct ath6kl_vif *vif = netdev_priv(wdev->netdev); ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: index %d\n", __func__, key_index); @@ -1278,13 +1278,13 @@ static int ath6kl_cfg80211_del_key(struct wiphy *wiphy, struct net_device *ndev, return ath6kl_wmi_deletekey_cmd(ar->wmi, vif->fw_vif_idx, key_index); } -static int ath6kl_cfg80211_get_key(struct wiphy *wiphy, struct net_device *ndev, +static int ath6kl_cfg80211_get_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, void *cookie, void (*callback) (void *cookie, struct key_params *)) { - struct ath6kl_vif *vif = netdev_priv(ndev); + struct ath6kl_vif *vif = netdev_priv(wdev->netdev); struct ath6kl_key *key = NULL; struct key_params params; diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index 2d8660ccc6f3..3d6e5aad48b1 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -1619,15 +1619,14 @@ static void wil_del_rx_key(u8 key_index, enum wmi_key_usage key_usage, } static int wil_cfg80211_add_key(struct wiphy *wiphy, - struct net_device *ndev, int link_id, + struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, struct key_params *params) { int rc; - struct wil6210_vif *vif = ndev_to_vif(ndev); struct wil6210_priv *wil = wiphy_to_wil(wiphy); - struct wireless_dev *wdev = vif_to_wdev(vif); + struct wil6210_vif *vif = wdev_to_vif(wil, wdev); enum wmi_key_usage key_usage = wil_detect_key_usage(wdev, pairwise); struct wil_sta_info *cs = wil_find_sta_by_key_usage(wil, vif->mid, key_usage, @@ -1695,13 +1694,12 @@ static int wil_cfg80211_add_key(struct wiphy *wiphy, } static int wil_cfg80211_del_key(struct wiphy *wiphy, - struct net_device *ndev, int link_id, + struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr) { - struct wil6210_vif *vif = ndev_to_vif(ndev); struct wil6210_priv *wil = wiphy_to_wil(wiphy); - struct wireless_dev *wdev = vif_to_wdev(vif); + struct wil6210_vif *vif = wdev_to_vif(wil, wdev); enum wmi_key_usage key_usage = wil_detect_key_usage(wdev, pairwise); struct wil_sta_info *cs = wil_find_sta_by_key_usage(wil, vif->mid, key_usage, @@ -2071,7 +2069,8 @@ void wil_cfg80211_ap_recovery(struct wil6210_priv *wil) key_params.key = vif->gtk; key_params.key_len = vif->gtk_len; key_params.seq_len = IEEE80211_GCMP_PN_LEN; - rc = wil_cfg80211_add_key(wiphy, ndev, -1, vif->gtk_index, + rc = wil_cfg80211_add_key(wiphy, vif_to_wdev(vif), -1, + vif->gtk_index, false, NULL, &key_params); if (rc) wil_err(wil, "vif %d recovery add key failed (%d)\n", diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index f7e17994e59a..0b55d445895f 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -2758,11 +2758,11 @@ done: } static s32 -brcmf_cfg80211_del_key(struct wiphy *wiphy, struct net_device *ndev, +brcmf_cfg80211_del_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr) { - struct brcmf_if *ifp = netdev_priv(ndev); + struct brcmf_if *ifp = netdev_priv(wdev->netdev); struct brcmf_wsec_key *key; s32 err; @@ -2796,12 +2796,12 @@ brcmf_cfg80211_del_key(struct wiphy *wiphy, struct net_device *ndev, } static s32 -brcmf_cfg80211_add_key(struct wiphy *wiphy, struct net_device *ndev, +brcmf_cfg80211_add_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr, struct key_params *params) { struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy); - struct brcmf_if *ifp = netdev_priv(ndev); + struct brcmf_if *ifp = netdev_priv(wdev->netdev); struct brcmf_pub *drvr = cfg->pub; struct brcmf_wsec_key *key; s32 val; @@ -2822,7 +2822,7 @@ brcmf_cfg80211_add_key(struct wiphy *wiphy, struct net_device *ndev, } if (params->key_len == 0) - return brcmf_cfg80211_del_key(wiphy, ndev, -1, key_idx, + return brcmf_cfg80211_del_key(wiphy, wdev, -1, key_idx, pairwise, mac_addr); if (params->key_len > sizeof(key->data)) { @@ -2918,7 +2918,7 @@ done: } static s32 -brcmf_cfg80211_get_key(struct wiphy *wiphy, struct net_device *ndev, +brcmf_cfg80211_get_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr, void *cookie, void (*callback)(void *cookie, @@ -2926,7 +2926,7 @@ brcmf_cfg80211_get_key(struct wiphy *wiphy, struct net_device *ndev, { struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy); struct key_params params; - struct brcmf_if *ifp = netdev_priv(ndev); + struct brcmf_if *ifp = netdev_priv(wdev->netdev); struct brcmf_cfg80211_profile *profile = &ifp->vif->profile; struct brcmf_pub *drvr = cfg->pub; struct brcmf_cfg80211_security *sec; @@ -2976,10 +2976,10 @@ done: static s32 brcmf_cfg80211_config_default_mgmt_key(struct wiphy *wiphy, - struct net_device *ndev, int link_id, + struct wireless_dev *wdev, int link_id, u8 key_idx) { - struct brcmf_if *ifp = netdev_priv(ndev); + struct brcmf_if *ifp = netdev_priv(wdev->netdev); brcmf_dbg(TRACE, "Enter key_idx %d\n", key_idx); diff --git a/drivers/net/wireless/marvell/libertas/cfg.c b/drivers/net/wireless/marvell/libertas/cfg.c index 56a82b26a1e9..72c92f72469d 100644 --- a/drivers/net/wireless/marvell/libertas/cfg.c +++ b/drivers/net/wireless/marvell/libertas/cfg.c @@ -1507,7 +1507,7 @@ static int lbs_cfg_set_default_key(struct wiphy *wiphy, } -static int lbs_cfg_add_key(struct wiphy *wiphy, struct net_device *netdev, +static int lbs_cfg_add_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 idx, bool pairwise, const u8 *mac_addr, struct key_params *params) { @@ -1516,7 +1516,7 @@ static int lbs_cfg_add_key(struct wiphy *wiphy, struct net_device *netdev, u16 key_type; int ret = 0; - if (netdev == priv->mesh_dev) + if (wdev->netdev == priv->mesh_dev) return -EOPNOTSUPP; lbs_deb_assoc("add_key: cipher 0x%x, mac_addr %pM\n", @@ -1568,7 +1568,7 @@ static int lbs_cfg_add_key(struct wiphy *wiphy, struct net_device *netdev, } -static int lbs_cfg_del_key(struct wiphy *wiphy, struct net_device *netdev, +static int lbs_cfg_del_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr) { diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 71e71a5af453..c9a651bdf882 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -141,11 +141,11 @@ static void *mwifiex_cfg80211_get_adapter(struct wiphy *wiphy) * CFG802.11 operation handler to delete a network key. */ static int -mwifiex_cfg80211_del_key(struct wiphy *wiphy, struct net_device *netdev, +mwifiex_cfg80211_del_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr) { - struct mwifiex_private *priv = mwifiex_netdev_get_priv(netdev); + struct mwifiex_private *priv = mwifiex_netdev_get_priv(wdev->netdev); static const u8 bc_mac[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; const u8 *peer_mac = pairwise ? mac_addr : bc_mac; @@ -480,11 +480,11 @@ mwifiex_cfg80211_set_default_key(struct wiphy *wiphy, struct net_device *netdev, * CFG802.11 operation handler to add a network key. */ static int -mwifiex_cfg80211_add_key(struct wiphy *wiphy, struct net_device *netdev, +mwifiex_cfg80211_add_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, struct key_params *params) { - struct mwifiex_private *priv = mwifiex_netdev_get_priv(netdev); + struct mwifiex_private *priv = mwifiex_netdev_get_priv(wdev->netdev); struct mwifiex_wep_key *wep_key; static const u8 bc_mac[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; const u8 *peer_mac = pairwise ? mac_addr : bc_mac; @@ -518,11 +518,11 @@ mwifiex_cfg80211_add_key(struct wiphy *wiphy, struct net_device *netdev, */ static int mwifiex_cfg80211_set_default_mgmt_key(struct wiphy *wiphy, - struct net_device *netdev, + struct wireless_dev *wdev, int link_id, u8 key_index) { - struct mwifiex_private *priv = mwifiex_netdev_get_priv(netdev); + struct mwifiex_private *priv = mwifiex_netdev_get_priv(wdev->netdev); struct mwifiex_ds_encrypt_key encrypt_key; wiphy_dbg(wiphy, "set default mgmt key, key index=%d\n", key_index); diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index 21ef341e002b..3a774cc44b26 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -534,7 +534,7 @@ static int wilc_wfi_cfg_copy_wpa_info(struct wilc_wfi_key *key_info, return 0; } -static int add_key(struct wiphy *wiphy, struct net_device *netdev, int link_id, +static int add_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, struct key_params *params) @@ -544,7 +544,7 @@ static int add_key(struct wiphy *wiphy, struct net_device *netdev, int link_id, const u8 *tx_mic = NULL; u8 mode = WILC_FW_SEC_NO; u8 op_mode; - struct wilc_vif *vif = netdev_priv(netdev); + struct wilc_vif *vif = netdev_priv(wdev->netdev); struct wilc_priv *priv = &vif->priv; struct wilc_wfi_key *key; @@ -632,19 +632,19 @@ static int add_key(struct wiphy *wiphy, struct net_device *netdev, int link_id, break; default: - netdev_err(netdev, "%s: Unsupported cipher\n", __func__); + netdev_err(wdev->netdev, "%s: Unsupported cipher\n", __func__); ret = -ENOTSUPP; } return ret; } -static int del_key(struct wiphy *wiphy, struct net_device *netdev, int link_id, +static int del_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr) { - struct wilc_vif *vif = netdev_priv(netdev); + struct wilc_vif *vif = netdev_priv(wdev->netdev); struct wilc_priv *priv = &vif->priv; if (!pairwise && (key_index == 4 || key_index == 5)) { @@ -680,12 +680,12 @@ static int del_key(struct wiphy *wiphy, struct net_device *netdev, int link_id, return 0; } -static int get_key(struct wiphy *wiphy, struct net_device *netdev, int link_id, +static int get_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, void *cookie, void (*callback)(void *cookie, struct key_params *)) { - struct wilc_vif *vif = netdev_priv(netdev); + struct wilc_vif *vif = netdev_priv(wdev->netdev); struct wilc_priv *priv = &vif->priv; struct key_params key_params; @@ -725,10 +725,10 @@ static int set_default_key(struct wiphy *wiphy, struct net_device *netdev, return 0; } -static int set_default_mgmt_key(struct wiphy *wiphy, struct net_device *netdev, +static int set_default_mgmt_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index) { - struct wilc_vif *vif = netdev_priv(netdev); + struct wilc_vif *vif = netdev_priv(wdev->netdev); return wilc_set_default_mgmt_key_index(vif, key_index); } diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 340240847a2f..9e44c85d2051 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -532,11 +532,11 @@ qtnf_dump_station(struct wiphy *wiphy, struct wireless_dev *wdev, return ret; } -static int qtnf_add_key(struct wiphy *wiphy, struct net_device *dev, +static int qtnf_add_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, struct key_params *params) { - struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); + struct qtnf_vif *vif = qtnf_netdev_get_priv(wdev->netdev); int ret; ret = qtnf_cmd_send_add_key(vif, key_index, pairwise, mac_addr, params); @@ -548,11 +548,11 @@ static int qtnf_add_key(struct wiphy *wiphy, struct net_device *dev, return ret; } -static int qtnf_del_key(struct wiphy *wiphy, struct net_device *dev, +static int qtnf_del_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr) { - struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); + struct qtnf_vif *vif = qtnf_netdev_get_priv(wdev->netdev); int ret; ret = qtnf_cmd_send_del_key(vif, key_index, pairwise, mac_addr); @@ -587,10 +587,10 @@ static int qtnf_set_default_key(struct wiphy *wiphy, struct net_device *dev, } static int -qtnf_set_default_mgmt_key(struct wiphy *wiphy, struct net_device *dev, +qtnf_set_default_mgmt_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index) { - struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); + struct qtnf_vif *vif = qtnf_netdev_get_priv(wdev->netdev); int ret; ret = qtnf_cmd_send_set_default_mgmt_key(vif, key_index); diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c index 83422c5c8c44..7c714ef73ea0 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c @@ -831,7 +831,7 @@ exit: return ret; } -static int cfg80211_rtw_add_key(struct wiphy *wiphy, struct net_device *ndev, +static int cfg80211_rtw_add_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, struct key_params *params) { @@ -839,6 +839,7 @@ static int cfg80211_rtw_add_key(struct wiphy *wiphy, struct net_device *ndev, u32 param_len; struct ieee_param *param = NULL; int ret = 0; + struct net_device *ndev = wdev->netdev; struct adapter *padapter = rtw_netdev_priv(ndev); struct mlme_priv *pmlmepriv = &padapter->mlmepriv; @@ -909,7 +910,7 @@ addkey_end: return ret; } -static int cfg80211_rtw_get_key(struct wiphy *wiphy, struct net_device *ndev, +static int cfg80211_rtw_get_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, void *cookie, void (*callback)(void *cookie, @@ -918,11 +919,11 @@ static int cfg80211_rtw_get_key(struct wiphy *wiphy, struct net_device *ndev, return 0; } -static int cfg80211_rtw_del_key(struct wiphy *wiphy, struct net_device *ndev, +static int cfg80211_rtw_del_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr) { - struct adapter *padapter = rtw_netdev_priv(ndev); + struct adapter *padapter = rtw_netdev_priv(wdev->netdev); struct security_priv *psecuritypriv = &padapter->securitypriv; if (key_index == psecuritypriv->dot11PrivacyKeyIndex) { diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8a63dea500ad..8cd870ece351 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4924,24 +4924,24 @@ struct cfg80211_ops { struct wireless_dev *wdev, unsigned int link_id); - int (*add_key)(struct wiphy *wiphy, struct net_device *netdev, + int (*add_key)(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, struct key_params *params); - int (*get_key)(struct wiphy *wiphy, struct net_device *netdev, + int (*get_key)(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, void *cookie, void (*callback)(void *cookie, struct key_params*)); - int (*del_key)(struct wiphy *wiphy, struct net_device *netdev, + int (*del_key)(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr); int (*set_default_key)(struct wiphy *wiphy, struct net_device *netdev, int link_id, u8 key_index, bool unicast, bool multicast); int (*set_default_mgmt_key)(struct wiphy *wiphy, - struct net_device *netdev, int link_id, + struct wireless_dev *wdev, int link_id, u8 key_index); int (*set_default_beacon_key)(struct wiphy *wiphy, - struct net_device *netdev, + struct wireless_dev *wdev, int link_id, u8 key_index); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 0c4979526c91..ee64ac8e0f61 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -608,11 +608,11 @@ static int ieee80211_set_tx(struct ieee80211_sub_if_data *sdata, return ret; } -static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, +static int ieee80211_add_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr, struct key_params *params) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, link_id, false); struct ieee80211_local *local = sdata->local; @@ -790,11 +790,11 @@ ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, int link_id, return NULL; } -static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, +static int ieee80211_del_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = sdata->local; struct ieee80211_key *key; @@ -809,7 +809,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, return 0; } -static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, +static int ieee80211_get_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr, void *cookie, void (*callback)(void *cookie, @@ -825,7 +825,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, int err = -ENOENT; struct ieee80211_key_seq kseq = {}; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); + sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); rcu_read_lock(); @@ -929,10 +929,10 @@ static int ieee80211_config_default_key(struct wiphy *wiphy, } static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, int link_id, u8 key_idx) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, link_id, true); @@ -945,10 +945,10 @@ static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy, } static int ieee80211_config_default_beacon_key(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, int link_id, u8 key_idx) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, link_id, true); diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index a7024af39b40..b1d748bdb504 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -3,7 +3,7 @@ * Some IBSS support code for cfg80211. * * Copyright 2009 Johannes Berg - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2026 Intel Corporation */ #include @@ -172,7 +172,7 @@ void cfg80211_clear_ibss(struct net_device *dev, bool nowext) */ if (rdev->ops->del_key) for (i = 0; i < 6; i++) - rdev_del_key(rdev, dev, -1, i, false, NULL); + rdev_del_key(rdev, wdev, -1, i, false, NULL); if (wdev->u.ibss.current_bss) { cfg80211_unhold_bss(wdev->u.ibss.current_bss); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 26cf29c8867b..2225f5d0b124 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4960,7 +4960,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int err; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; u8 key_idx = 0; const u8 *mac_addr = NULL; bool pairwise; @@ -4971,7 +4971,6 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) struct sk_buff *msg; bool bigtk_support = false; int link_id = nl80211_link_id_or_invalid(info->attrs); - struct wireless_dev *wdev = dev->ieee80211_ptr; if (wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_BEACON_PROTECTION)) @@ -5023,7 +5022,10 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) cookie.msg = msg; cookie.idx = key_idx; - if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || + if ((wdev->netdev && + nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD) || nla_put_u8(msg, NL80211_ATTR_KEY_IDX, key_idx)) goto nla_put_failure; if (mac_addr && @@ -5034,7 +5036,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) if (err) goto free_msg; - err = rdev_get_key(rdev, dev, link_id, key_idx, pairwise, mac_addr, + err = rdev_get_key(rdev, wdev, link_id, key_idx, pairwise, mac_addr, &cookie, get_key_callback); if (err) @@ -5058,9 +5060,8 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct key_parse key; int err; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; int link_id = nl80211_link_id_or_invalid(info->attrs); - struct wireless_dev *wdev = dev->ieee80211_ptr; err = nl80211_parse_key(info, &key); if (err) @@ -5080,6 +5081,9 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->set_default_key) return -EOPNOTSUPP; + if (!wdev->netdev) + return -EINVAL; + err = nl80211_key_allowed(wdev); if (err) return err; @@ -5088,7 +5092,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) if (err) return err; - err = rdev_set_default_key(rdev, dev, link_id, key.idx, + err = rdev_set_default_key(rdev, wdev->netdev, link_id, key.idx, key.def_uni, key.def_multi); if (err) @@ -5113,7 +5117,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) if (err) return err; - err = rdev_set_default_mgmt_key(rdev, dev, link_id, key.idx); + err = rdev_set_default_mgmt_key(rdev, wdev, link_id, key.idx); if (err) return err; @@ -5136,7 +5140,8 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) if (err) return err; - return rdev_set_default_beacon_key(rdev, dev, link_id, key.idx); + return rdev_set_default_beacon_key(rdev, wdev, link_id, + key.idx); } else if (key.p.mode == NL80211_KEY_SET_TX && wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_EXT_KEY_ID)) { @@ -5152,7 +5157,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) if (err) return err; - return rdev_add_key(rdev, dev, link_id, key.idx, + return rdev_add_key(rdev, wdev, link_id, key.idx, NL80211_KEYTYPE_PAIRWISE, mac_addr, &key.p); } @@ -5164,11 +5169,10 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int err; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; struct key_parse key; const u8 *mac_addr = NULL; int link_id = nl80211_link_id_or_invalid(info->attrs); - struct wireless_dev *wdev = dev->ieee80211_ptr; err = nl80211_parse_key(info, &key); if (err) @@ -5219,7 +5223,7 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) key.type == NL80211_KEYTYPE_PAIRWISE); if (!err) { - err = rdev_add_key(rdev, dev, link_id, key.idx, + err = rdev_add_key(rdev, wdev, link_id, key.idx, key.type == NL80211_KEYTYPE_PAIRWISE, mac_addr, &key.p); if (err) @@ -5233,11 +5237,10 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int err; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; u8 *mac_addr = NULL; struct key_parse key; int link_id = nl80211_link_id_or_invalid(info->attrs); - struct wireless_dev *wdev = dev->ieee80211_ptr; err = nl80211_parse_key(info, &key); if (err) @@ -5276,7 +5279,7 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) key.type == NL80211_KEYTYPE_PAIRWISE); if (!err) - err = rdev_del_key(rdev, dev, link_id, key.idx, + err = rdev_del_key(rdev, wdev, link_id, key.idx, key.type == NL80211_KEYTYPE_PAIRWISE, mac_addr); @@ -18071,6 +18074,9 @@ nl80211_epcs_cfg(struct sk_buff *skb, struct genl_info *info) NL80211_FLAG_CLEAR_SKB) \ SELECTOR(__sel, WDEV_UP, \ NL80211_FLAG_NEED_WDEV_UP) \ + SELECTOR(__sel, WDEV_UP_CLEAR, \ + NL80211_FLAG_NEED_WDEV_UP | \ + NL80211_FLAG_CLEAR_SKB) \ SELECTOR(__sel, WDEV_UP_LINK, \ NL80211_FLAG_NEED_WDEV_UP | \ NL80211_FLAG_MLO_VALID_LINK_ID) \ @@ -18403,7 +18409,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_key, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_SET_KEY, @@ -18411,7 +18417,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_set_key, .flags = GENL_UNS_ADMIN_PERM, /* cannot use NL80211_FLAG_MLO_VALID_LINK_ID, depends on key */ - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_CLEAR_SKB), }, { @@ -18419,7 +18425,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_key, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_CLEAR_SKB), }, { @@ -18427,7 +18433,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_key, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_SET_BEACON, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index a8f1e7ddc0c0..2bad8b60b7c9 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -2,7 +2,7 @@ /* * Portions of this file * Copyright(c) 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018, 2021-2025 Intel Corporation + * Copyright (C) 2018, 2021-2026 Intel Corporation */ #ifndef __CFG80211_RDEV_OPS #define __CFG80211_RDEV_OPS @@ -77,42 +77,42 @@ rdev_change_virtual_intf(struct cfg80211_registered_device *rdev, } static inline int rdev_add_key(struct cfg80211_registered_device *rdev, - struct net_device *netdev, int link_id, + struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, struct key_params *params) { int ret; - trace_rdev_add_key(&rdev->wiphy, netdev, link_id, key_index, pairwise, + trace_rdev_add_key(&rdev->wiphy, wdev, link_id, key_index, pairwise, mac_addr, params->mode); - ret = rdev->ops->add_key(&rdev->wiphy, netdev, link_id, key_index, + ret = rdev->ops->add_key(&rdev->wiphy, wdev, link_id, key_index, pairwise, mac_addr, params); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } static inline int -rdev_get_key(struct cfg80211_registered_device *rdev, struct net_device *netdev, +rdev_get_key(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, void *cookie, void (*callback)(void *cookie, struct key_params*)) { int ret; - trace_rdev_get_key(&rdev->wiphy, netdev, link_id, key_index, pairwise, + trace_rdev_get_key(&rdev->wiphy, wdev, link_id, key_index, pairwise, mac_addr); - ret = rdev->ops->get_key(&rdev->wiphy, netdev, link_id, key_index, + ret = rdev->ops->get_key(&rdev->wiphy, wdev, link_id, key_index, pairwise, mac_addr, cookie, callback); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } static inline int rdev_del_key(struct cfg80211_registered_device *rdev, - struct net_device *netdev, int link_id, + struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr) { int ret; - trace_rdev_del_key(&rdev->wiphy, netdev, link_id, key_index, pairwise, + trace_rdev_del_key(&rdev->wiphy, wdev, link_id, key_index, pairwise, mac_addr); - ret = rdev->ops->del_key(&rdev->wiphy, netdev, link_id, key_index, + ret = rdev->ops->del_key(&rdev->wiphy, wdev, link_id, key_index, pairwise, mac_addr); trace_rdev_return_int(&rdev->wiphy, ret); return ret; @@ -134,12 +134,12 @@ rdev_set_default_key(struct cfg80211_registered_device *rdev, static inline int rdev_set_default_mgmt_key(struct cfg80211_registered_device *rdev, - struct net_device *netdev, int link_id, u8 key_index) + struct wireless_dev *wdev, int link_id, u8 key_index) { int ret; - trace_rdev_set_default_mgmt_key(&rdev->wiphy, netdev, link_id, + trace_rdev_set_default_mgmt_key(&rdev->wiphy, wdev, link_id, key_index); - ret = rdev->ops->set_default_mgmt_key(&rdev->wiphy, netdev, link_id, + ret = rdev->ops->set_default_mgmt_key(&rdev->wiphy, wdev, link_id, key_index); trace_rdev_return_int(&rdev->wiphy, ret); return ret; @@ -147,14 +147,14 @@ rdev_set_default_mgmt_key(struct cfg80211_registered_device *rdev, static inline int rdev_set_default_beacon_key(struct cfg80211_registered_device *rdev, - struct net_device *netdev, int link_id, + struct wireless_dev *wdev, int link_id, u8 key_index) { int ret; - trace_rdev_set_default_beacon_key(&rdev->wiphy, netdev, link_id, + trace_rdev_set_default_beacon_key(&rdev->wiphy, wdev, link_id, key_index); - ret = rdev->ops->set_default_beacon_key(&rdev->wiphy, netdev, link_id, + ret = rdev->ops->set_default_beacon_key(&rdev->wiphy, wdev, link_id, key_index); trace_rdev_return_int(&rdev->wiphy, ret); return ret; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 5b21432450d5..86e2ccaa678c 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -5,7 +5,7 @@ * (for nl80211's connect() and wext) * * Copyright 2009 Johannes Berg - * Copyright (C) 2009, 2020, 2022-2025 Intel Corporation. All rights reserved. + * Copyright (C) 2009, 2020, 2022-2026 Intel Corporation. All rights reserved. * Copyright 2017 Intel Deutschland GmbH */ @@ -1386,7 +1386,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT)) max_key_idx = 7; for (i = 0; i <= max_key_idx; i++) - rdev_del_key(rdev, dev, -1, i, false, NULL); + rdev_del_key(rdev, wdev, -1, i, false, NULL); } rdev_set_qos_map(rdev, dev, NULL); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 8ab78a899f57..af23f4fca90a 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2,7 +2,7 @@ /* * Portions of this file * Copyright(c) 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018, 2020-2025 Intel Corporation + * Copyright (C) 2018, 2020-2026 Intel Corporation */ #undef TRACE_SYSTEM #define TRACE_SYSTEM cfg80211 @@ -546,12 +546,12 @@ TRACE_EVENT(rdev_change_virtual_intf, ); DECLARE_EVENT_CLASS(key_handle, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr), - TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr), + TP_ARGS(wiphy, wdev, link_id, key_index, pairwise, mac_addr), TP_STRUCT__entry( WIPHY_ENTRY - NETDEV_ENTRY + WDEV_ENTRY MAC_ENTRY(mac_addr) __field(int, link_id) __field(u8, key_index) @@ -559,38 +559,38 @@ DECLARE_EVENT_CLASS(key_handle, ), TP_fast_assign( WIPHY_ASSIGN; - NETDEV_ASSIGN; + WDEV_ASSIGN; MAC_ASSIGN(mac_addr, mac_addr); __entry->link_id = link_id; __entry->key_index = key_index; __entry->pairwise = pairwise; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, " + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", link_id: %d, " "key_index: %u, pairwise: %s, mac addr: %pM", - WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id, + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->link_id, __entry->key_index, BOOL_TO_STR(__entry->pairwise), __entry->mac_addr) ); DEFINE_EVENT(key_handle, rdev_get_key, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr), - TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr) + TP_ARGS(wiphy, wdev, link_id, key_index, pairwise, mac_addr) ); DEFINE_EVENT(key_handle, rdev_del_key, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr), - TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr) + TP_ARGS(wiphy, wdev, link_id, key_index, pairwise, mac_addr) ); TRACE_EVENT(rdev_add_key, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, u8 mode), - TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr, mode), + TP_ARGS(wiphy, wdev, link_id, key_index, pairwise, mac_addr, mode), TP_STRUCT__entry( WIPHY_ENTRY - NETDEV_ENTRY + WDEV_ENTRY MAC_ENTRY(mac_addr) __field(int, link_id) __field(u8, key_index) @@ -599,17 +599,17 @@ TRACE_EVENT(rdev_add_key, ), TP_fast_assign( WIPHY_ASSIGN; - NETDEV_ASSIGN; + WDEV_ASSIGN; MAC_ASSIGN(mac_addr, mac_addr); __entry->link_id = link_id; __entry->key_index = key_index; __entry->pairwise = pairwise; __entry->mode = mode; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, " + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", link_id: %d, " "key_index: %u, mode: %u, pairwise: %s, " "mac addr: %pM", - WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id, + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->link_id, __entry->key_index, __entry->mode, BOOL_TO_STR(__entry->pairwise), __entry->mac_addr) ); @@ -642,45 +642,45 @@ TRACE_EVENT(rdev_set_default_key, ); TRACE_EVENT(rdev_set_default_mgmt_key, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index), - TP_ARGS(wiphy, netdev, link_id, key_index), + TP_ARGS(wiphy, wdev, link_id, key_index), TP_STRUCT__entry( WIPHY_ENTRY - NETDEV_ENTRY + WDEV_ENTRY __field(int, link_id) __field(u8, key_index) ), TP_fast_assign( WIPHY_ASSIGN; - NETDEV_ASSIGN; + WDEV_ASSIGN; __entry->link_id = link_id; __entry->key_index = key_index; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, " - "key index: %u", WIPHY_PR_ARG, NETDEV_PR_ARG, - __entry->link_id, __entry->key_index) + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", link_id: %d, key index: %u", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->link_id, + __entry->key_index) ); TRACE_EVENT(rdev_set_default_beacon_key, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index), - TP_ARGS(wiphy, netdev, link_id, key_index), + TP_ARGS(wiphy, wdev, link_id, key_index), TP_STRUCT__entry( WIPHY_ENTRY - NETDEV_ENTRY + WDEV_ENTRY __field(int, link_id) __field(u8, key_index) ), TP_fast_assign( WIPHY_ASSIGN; - NETDEV_ASSIGN; + WDEV_ASSIGN; __entry->link_id = link_id; __entry->key_index = key_index; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, " - "key index: %u", WIPHY_PR_ARG, NETDEV_PR_ARG, - __entry->link_id, __entry->key_index) + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", link_id: %d, key index: %u", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->link_id, + __entry->key_index) ); TRACE_EVENT(rdev_start_ap, diff --git a/net/wireless/util.c b/net/wireless/util.c index 702904048d5a..0a0cea018fc5 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1095,7 +1095,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev) for (i = 0; i < 4; i++) { if (!wdev->connect_keys->params[i].cipher) continue; - if (rdev_add_key(rdev, dev, -1, i, false, NULL, + if (rdev_add_key(rdev, wdev, -1, i, false, NULL, &wdev->connect_keys->params[i])) { netdev_err(dev, "failed to set key %d\n", i); continue; diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 98a4f4c7970d..22d9d9bae8f5 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -457,7 +457,7 @@ static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) err = -ENOENT; else - err = rdev_del_key(rdev, dev, -1, idx, pairwise, + err = rdev_del_key(rdev, wdev, -1, idx, pairwise, addr); } wdev->wext.connect.privacy = false; @@ -496,7 +496,7 @@ static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if (wdev->connected || (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->u.ibss.current_bss)) - err = rdev_add_key(rdev, dev, -1, idx, pairwise, addr, params); + err = rdev_add_key(rdev, wdev, -1, idx, pairwise, addr, params); else if (params->cipher != WLAN_CIPHER_SUITE_WEP40 && params->cipher != WLAN_CIPHER_SUITE_WEP104) return -EINVAL; @@ -549,7 +549,7 @@ static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if (wdev->connected || (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->u.ibss.current_bss)) - err = rdev_set_default_mgmt_key(rdev, dev, -1, idx); + err = rdev_set_default_mgmt_key(rdev, wdev, -1, idx); if (!err) wdev->wext.default_mgmt_key = idx; return err; From 6508c9752451a7e5e44a325e8563897a67f5344b Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 12 Feb 2026 08:52:52 -0600 Subject: [PATCH 0150/1561] wifi: ath11k: Silence remoteproc probe deferral prints Upon failing to resolve the remoteproc phandle one ath11k_dbg() and one ath11k_err() is used to tell the user about the (presumably) temporary failure. Reduce the log spam by removing the duplicate print and switching to dev_err_probe(), in line with how ath12k handles this error. Signed-off-by: Bjorn Andersson Reviewed-by: Baochen Qiang Reviewed-by: Konrad Dybcio Link: https://patch.msgid.link/20260212-ath11k-silence-probe-deferr-v1-1-b8a49bb3c332@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/ahb.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c index 8dfe9b40c126..08d3a0c8f105 100644 --- a/drivers/net/wireless/ath/ath11k/ahb.c +++ b/drivers/net/wireless/ath/ath11k/ahb.c @@ -807,10 +807,8 @@ static int ath11k_core_get_rproc(struct ath11k_base *ab) } prproc = rproc_get_by_phandle(rproc_phandle); - if (!prproc) { - ath11k_dbg(ab, ATH11K_DBG_AHB, "failed to get rproc, deferring\n"); - return -EPROBE_DEFER; - } + if (!prproc) + return dev_err_probe(&ab->pdev->dev, -EPROBE_DEFER, "failed to get rproc\n"); ab_ahb->tgt_rproc = prproc; return 0; @@ -1190,10 +1188,8 @@ static int ath11k_ahb_probe(struct platform_device *pdev) ath11k_ahb_init_qmi_ce_config(ab); ret = ath11k_core_get_rproc(ab); - if (ret) { - ath11k_err(ab, "failed to get rproc: %d\n", ret); + if (ret) goto err_ce_free; - } ret = ath11k_core_init(ab); if (ret) { From f33a8e41826831fc8ceb5f62833488cd9388ed59 Mon Sep 17 00:00:00 2001 From: Ramya Gnanasekar Date: Fri, 27 Feb 2026 09:41:27 +0530 Subject: [PATCH 0151/1561] wifi: ath12k: Set up MLO after SSR During recovery of an MLO setup from a core reset, ATH12K_GROUP_FLAG_REGISTERED is set because ath12k_mac_unregister is not called during core reset. So, when an MLO setup is recovering from a core reset, ath12k_core_mlo_setup() is skipped. Hence, the firmware will not have information about partner links. This makes MLO association fail after recovery. To resolve this, call ath12k_core_mlo_setup() during recovery, to set up MLO. Also, if MLO setup fails during recovery, call ath12k_mac_unregister() and ath12k_mac_destroy() to unregister mac and then tear down the mac structures. Also, initiate MLO teardown in the hardware group stop sequence to align with the hardware group start sequence. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.6-01181-QCAHKSWPL_SILICONZ-1 Signed-off-by: Ramya Gnanasekar Signed-off-by: Roopni Devanathan Reviewed-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20260227041127.3265879-1-roopni.devanathan@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 9dca1a0af73e..218c0a0c9699 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -1017,6 +1017,8 @@ static void ath12k_core_hw_group_stop(struct ath12k_hw_group *ag) ath12k_mac_unregister(ag); + ath12k_mac_mlo_teardown(ag); + for (i = ag->num_devices - 1; i >= 0; i--) { ab = ag->ab[i]; if (!ab) @@ -1134,8 +1136,14 @@ static int ath12k_core_hw_group_start(struct ath12k_hw_group *ag) lockdep_assert_held(&ag->mutex); - if (test_bit(ATH12K_GROUP_FLAG_REGISTERED, &ag->flags)) + if (test_bit(ATH12K_GROUP_FLAG_REGISTERED, &ag->flags)) { + ret = ath12k_core_mlo_setup(ag); + if (WARN_ON(ret)) { + ath12k_mac_unregister(ag); + goto err_mac_destroy; + } goto core_pdev_create; + } ret = ath12k_mac_allocate(ag); if (WARN_ON(ret)) From 80a1147469b07a384f6f83a26b31bcd63d6684c0 Mon Sep 17 00:00:00 2001 From: Aaradhana Sahu Date: Fri, 27 Feb 2026 09:03:32 +0530 Subject: [PATCH 0152/1561] wifi: ath12k: Enable monitor mode support on IPQ5332 Currently, rxdma1_enable and supports_monitor are set to false in IPQ5332 hardware parameters, which skips monitor ring configuration and removes NL80211_IFTYPE_MONITOR from the supported interface modes. Set rxdma1_enable and supports_monitor to true so that monitor rings are configured and monitor mode is enabled on IPQ5332. Tested-on: IPQ5332 hw1.0 AHB WLAN.WBE.1.7-00587-QCAHKSWPL_SILICONZ-1 Signed-off-by: Aaradhana Sahu Reviewed-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20260227033332.687805-1-aaradhana.sahu@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wifi7/hw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/wifi7/hw.c b/drivers/net/wireless/ath/ath12k/wifi7/hw.c index df045ddf42da..27acdfc35459 100644 --- a/drivers/net/wireless/ath/ath12k/wifi7/hw.c +++ b/drivers/net/wireless/ath/ath12k/wifi7/hw.c @@ -617,7 +617,7 @@ static const struct ath12k_hw_params ath12k_wifi7_hw_params[] = { ath12k_wifi7_target_service_to_ce_map_wlan_ipq5332, .svc_to_ce_map_len = 18, - .rxdma1_enable = false, + .rxdma1_enable = true, .num_rxdma_per_pdev = 1, .num_rxdma_dst_ring = 0, .rx_mac_buf_ring = false, @@ -626,7 +626,7 @@ static const struct ath12k_hw_params ath12k_wifi7_hw_params[] = { .interface_modes = BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_AP) | BIT(NL80211_IFTYPE_MESH_POINT), - .supports_monitor = false, + .supports_monitor = true, .idle_ps = false, .download_calib = true, From cf7cbf97c630c3414302945617b8ac405e2b0dd7 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Fri, 27 Feb 2026 09:51:28 +0530 Subject: [PATCH 0153/1561] wifi: ath12k: Remove the unused argument from the Rx data path Currently, the Rx path uses new infrastructure to extract the required HAL parameters. Consequently, the HAL Rx descriptor argument is no longer needed in the following helper functions. Remove the unused argument from the following helper functions. ath12k_dp_rx_h_undecap() ath12k_dp_rx_check_nwifi_hdr_len_valid() ath12k_wifi7_dp_rx_h_mpdu() Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.6-01243-QCAHKSWPL_SILICONZ-1 Signed-off-by: Karthikeyan Periyasamy Reviewed-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20260227042128.3494167-1-karthikeyan.periyasamy@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/dp_rx.c | 2 -- drivers/net/wireless/ath/ath12k/dp_rx.h | 2 -- drivers/net/wireless/ath/ath12k/wifi7/dp_rx.c | 25 ++++++++----------- 3 files changed, 11 insertions(+), 18 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index a32ee9f8061a..c0e2b2c1a292 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -1117,7 +1117,6 @@ static void ath12k_dp_rx_h_undecap_eth(struct ath12k_pdev_dp *dp_pdev, } void ath12k_dp_rx_h_undecap(struct ath12k_pdev_dp *dp_pdev, struct sk_buff *msdu, - struct hal_rx_desc *rx_desc, enum hal_encrypt_type enctype, bool decrypted, struct hal_rx_desc_data *rx_info) @@ -1393,7 +1392,6 @@ void ath12k_dp_rx_deliver_msdu(struct ath12k_pdev_dp *dp_pdev, struct napi_struc EXPORT_SYMBOL(ath12k_dp_rx_deliver_msdu); bool ath12k_dp_rx_check_nwifi_hdr_len_valid(struct ath12k_dp *dp, - struct hal_rx_desc *rx_desc, struct sk_buff *msdu, struct hal_rx_desc_data *rx_info) { diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.h b/drivers/net/wireless/ath/ath12k/dp_rx.h index 1ec5382f5995..bd62af0c80d4 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.h +++ b/drivers/net/wireless/ath/ath12k/dp_rx.h @@ -189,7 +189,6 @@ void ath12k_dp_extract_rx_desc_data(struct ath12k_hal *hal, } void ath12k_dp_rx_h_undecap(struct ath12k_pdev_dp *dp_pdev, struct sk_buff *msdu, - struct hal_rx_desc *rx_desc, enum hal_encrypt_type enctype, bool decrypted, struct hal_rx_desc_data *rx_info); @@ -197,7 +196,6 @@ void ath12k_dp_rx_deliver_msdu(struct ath12k_pdev_dp *dp_pdev, struct napi_struc struct sk_buff *msdu, struct hal_rx_desc_data *rx_info); bool ath12k_dp_rx_check_nwifi_hdr_len_valid(struct ath12k_dp *dp, - struct hal_rx_desc *rx_desc, struct sk_buff *msdu, struct hal_rx_desc_data *rx_info); u64 ath12k_dp_rx_h_get_pn(struct ath12k_dp *dp, struct sk_buff *skb); diff --git a/drivers/net/wireless/ath/ath12k/wifi7/dp_rx.c b/drivers/net/wireless/ath/ath12k/wifi7/dp_rx.c index 7450938adf65..77a1679b41df 100644 --- a/drivers/net/wireless/ath/ath12k/wifi7/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/wifi7/dp_rx.c @@ -325,7 +325,6 @@ static void ath12k_wifi7_dp_rx_h_csum_offload(struct sk_buff *msdu, static void ath12k_wifi7_dp_rx_h_mpdu(struct ath12k_pdev_dp *dp_pdev, struct sk_buff *msdu, - struct hal_rx_desc *rx_desc, struct hal_rx_desc_data *rx_info) { struct ath12k_skb_rxcb *rxcb; @@ -388,8 +387,7 @@ static void ath12k_wifi7_dp_rx_h_mpdu(struct ath12k_pdev_dp *dp_pdev, } ath12k_wifi7_dp_rx_h_csum_offload(msdu, rx_info); - ath12k_dp_rx_h_undecap(dp_pdev, msdu, rx_desc, - enctype, is_decrypted, rx_info); + ath12k_dp_rx_h_undecap(dp_pdev, msdu, enctype, is_decrypted, rx_info); if (!is_decrypted || rx_info->is_mcbc) return; @@ -549,14 +547,14 @@ static int ath12k_wifi7_dp_rx_process_msdu(struct ath12k_pdev_dp *dp_pdev, } } - if (unlikely(!ath12k_dp_rx_check_nwifi_hdr_len_valid(dp, rx_desc, msdu, + if (unlikely(!ath12k_dp_rx_check_nwifi_hdr_len_valid(dp, msdu, rx_info))) { ret = -EINVAL; goto free_out; } ath12k_dp_rx_h_ppdu(dp_pdev, rx_info); - ath12k_wifi7_dp_rx_h_mpdu(dp_pdev, msdu, rx_desc, rx_info); + ath12k_wifi7_dp_rx_h_mpdu(dp_pdev, msdu, rx_info); rx_info->rx_status->flag |= RX_FLAG_SKIP_MONITOR | RX_FLAG_DUP_VALIDATED; @@ -1030,13 +1028,13 @@ mic_fail: RX_FLAG_IV_STRIPPED | RX_FLAG_DECRYPTED; skb_pull(msdu, hal_rx_desc_sz); - if (unlikely(!ath12k_dp_rx_check_nwifi_hdr_len_valid(dp, rx_desc, msdu, + if (unlikely(!ath12k_dp_rx_check_nwifi_hdr_len_valid(dp, msdu, rx_info))) return -EINVAL; ath12k_dp_rx_h_ppdu(dp_pdev, rx_info); - ath12k_dp_rx_h_undecap(dp_pdev, msdu, rx_desc, - HAL_ENCRYPT_TYPE_TKIP_MIC, true, rx_info); + ath12k_dp_rx_h_undecap(dp_pdev, msdu, HAL_ENCRYPT_TYPE_TKIP_MIC, true, + rx_info); ieee80211_rx(ath12k_pdev_dp_to_hw(dp_pdev), msdu); return -EINVAL; } @@ -1588,7 +1586,6 @@ static int ath12k_wifi7_dp_rx_h_null_q_desc(struct ath12k_pdev_dp *dp_pdev, struct ath12k_dp *dp = dp_pdev->dp; struct ath12k_base *ab = dp->ab; u16 msdu_len = rx_info->msdu_len; - struct hal_rx_desc *desc = (struct hal_rx_desc *)msdu->data; u8 l3pad_bytes = rx_info->l3_pad_bytes; struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); u32 hal_rx_desc_sz = dp->ab->hal.hal_desc_sz; @@ -1632,11 +1629,11 @@ static int ath12k_wifi7_dp_rx_h_null_q_desc(struct ath12k_pdev_dp *dp_pdev, skb_put(msdu, hal_rx_desc_sz + l3pad_bytes + msdu_len); skb_pull(msdu, hal_rx_desc_sz + l3pad_bytes); } - if (unlikely(!ath12k_dp_rx_check_nwifi_hdr_len_valid(dp, desc, msdu, rx_info))) + if (unlikely(!ath12k_dp_rx_check_nwifi_hdr_len_valid(dp, msdu, rx_info))) return -EINVAL; ath12k_dp_rx_h_ppdu(dp_pdev, rx_info); - ath12k_wifi7_dp_rx_h_mpdu(dp_pdev, msdu, desc, rx_info); + ath12k_wifi7_dp_rx_h_mpdu(dp_pdev, msdu, rx_info); rxcb->tid = rx_info->tid; @@ -1673,7 +1670,7 @@ static bool ath12k_wifi7_dp_rx_h_tkip_mic_err(struct ath12k_pdev_dp *dp_pdev, skb_put(msdu, hal_rx_desc_sz + l3pad_bytes + msdu_len); skb_pull(msdu, hal_rx_desc_sz + l3pad_bytes); - if (unlikely(!ath12k_dp_rx_check_nwifi_hdr_len_valid(dp, desc, msdu, rx_info))) + if (unlikely(!ath12k_dp_rx_check_nwifi_hdr_len_valid(dp, msdu, rx_info))) return true; ath12k_dp_rx_h_ppdu(dp_pdev, rx_info); @@ -1681,8 +1678,8 @@ static bool ath12k_wifi7_dp_rx_h_tkip_mic_err(struct ath12k_pdev_dp *dp_pdev, rx_info->rx_status->flag |= (RX_FLAG_MMIC_STRIPPED | RX_FLAG_MMIC_ERROR | RX_FLAG_DECRYPTED); - ath12k_dp_rx_h_undecap(dp_pdev, msdu, desc, - HAL_ENCRYPT_TYPE_TKIP_MIC, false, rx_info); + ath12k_dp_rx_h_undecap(dp_pdev, msdu, HAL_ENCRYPT_TYPE_TKIP_MIC, false, + rx_info); return false; } From 4ff5801f45b494ad8251a16ec06c9f303ed3b9a0 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 26 Feb 2026 23:09:27 +0000 Subject: [PATCH 0154/1561] phy: qcom-sgmii-eth: add .set_mode() and .validate() methods qcom-sgmii-eth is an Ethernet SerDes supporting only Ethernet mode using SGMII, 1000BASE-X and 2500BASE-X. Add an implementation of the .set_mode() method, which can be used instead of or as well as the .set_speed() method. The Ethernet interface modes mentioned above all have a fixed data rate, so setting the mode is sufficient to fully specify the operating parameters. Add an implementation of the .validate() method, which will be necessary to allow discovery of the SerDes capabilities for platform independent SerDes support in the stmmac network driver. Reviewed-by: Vladimir Oltean Reviewed-by: Bartosz Golaszewski Acked-by: Vinod Koul Tested-by: Mohd Ayaan Anwar Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vvkU3-0000000AuP2-0hu3@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/phy/qualcomm/phy-qcom-sgmii-eth.c | 43 +++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-sgmii-eth.c b/drivers/phy/qualcomm/phy-qcom-sgmii-eth.c index 5b1c82459c12..4ea3dce7719f 100644 --- a/drivers/phy/qualcomm/phy-qcom-sgmii-eth.c +++ b/drivers/phy/qualcomm/phy-qcom-sgmii-eth.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -286,6 +287,37 @@ static int qcom_dwmac_sgmii_phy_power_off(struct phy *phy) return 0; } +static int qcom_dwmac_sgmii_phy_speed(enum phy_mode mode, int submode) +{ + if (mode != PHY_MODE_ETHERNET) + return -EINVAL; + + if (submode == PHY_INTERFACE_MODE_SGMII || + submode == PHY_INTERFACE_MODE_1000BASEX) + return SPEED_1000; + + if (submode == PHY_INTERFACE_MODE_2500BASEX) + return SPEED_2500; + + return -EINVAL; +} + +static int qcom_dwmac_sgmii_phy_set_mode(struct phy *phy, enum phy_mode mode, + int submode) +{ + struct qcom_dwmac_sgmii_phy_data *data = phy_get_drvdata(phy); + int speed; + + speed = qcom_dwmac_sgmii_phy_speed(mode, submode); + if (speed < 0) + return speed; + + if (speed != data->speed) + data->speed = speed; + + return qcom_dwmac_sgmii_phy_calibrate(phy); +} + static int qcom_dwmac_sgmii_phy_set_speed(struct phy *phy, int speed) { struct qcom_dwmac_sgmii_phy_data *data = phy_get_drvdata(phy); @@ -296,10 +328,21 @@ static int qcom_dwmac_sgmii_phy_set_speed(struct phy *phy, int speed) return qcom_dwmac_sgmii_phy_calibrate(phy); } +static int qcom_dwmac_sgmii_phy_validate(struct phy *phy, enum phy_mode mode, + int submode, + union phy_configure_opts *opts) +{ + int ret = qcom_dwmac_sgmii_phy_speed(mode, submode); + + return ret < 0 ? ret : 0; +} + static const struct phy_ops qcom_dwmac_sgmii_phy_ops = { .power_on = qcom_dwmac_sgmii_phy_power_on, .power_off = qcom_dwmac_sgmii_phy_power_off, + .set_mode = qcom_dwmac_sgmii_phy_set_mode, .set_speed = qcom_dwmac_sgmii_phy_set_speed, + .validate = qcom_dwmac_sgmii_phy_validate, .calibrate = qcom_dwmac_sgmii_phy_calibrate, .owner = THIS_MODULE, }; From 2d85152020d576010a5396d10c864a66b4ce8cf3 Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Fri, 6 Feb 2026 20:01:34 +0200 Subject: [PATCH 0155/1561] wifi: rtw89: Turbo mode for RTL8851BU/RTL8852BU RTL8851BU and RTL8852BU currently have much lower TX speed than expected, ~100 and ~300 Mbps, respectively. This is because part of the chip's memory is allocated for some unknown firmware features instead of the TX buffers. The vendor drivers have a module parameter called "rtw_quota_turbo_en", which can be used to choose between full TX speed or the unknown firmware features. It is on by default. Change the relevant chip parameters to implement the turbo mode. Do it only for USB because PCI is already fast even without the turbo mode. It's unclear if SDIO will need it or not. Signed-off-by: Bitterblue Smith Acked-by: Ping-Ke Shih Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/88cce107-ff52-496f-bb77-b930e4182b67@gmail.com --- drivers/net/wireless/realtek/rtw89/mac.c | 31 +++++++++--------- drivers/net/wireless/realtek/rtw89/mac.h | 16 +++++----- drivers/net/wireless/realtek/rtw89/rtw8851b.c | 32 +++++++++---------- drivers/net/wireless/realtek/rtw89/rtw8852b.c | 24 +++++++------- 4 files changed, 51 insertions(+), 52 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/mac.c b/drivers/net/wireless/realtek/rtw89/mac.c index 8472f1a63951..c98ca2a82194 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.c +++ b/drivers/net/wireless/realtek/rtw89/mac.c @@ -1729,8 +1729,8 @@ const struct rtw89_mac_size_set rtw89_mac_size = { /* 8852C PCIE SCC */ .wde_size19 = {RTW89_WDE_PG_64, 3328, 0,}, .wde_size23 = {RTW89_WDE_PG_64, 1022, 2,}, - /* 8852B USB2.0/USB3.0 SCC */ - .wde_size25 = {RTW89_WDE_PG_64, 162, 94,}, + /* 8852B USB2.0/USB3.0 SCC turbo */ + .wde_size30 = {RTW89_WDE_PG_64, 220, 36,}, /* 8852C USB2.0 */ .wde_size31 = {RTW89_WDE_PG_64, 384, 0,}, /* PCIE */ @@ -1754,10 +1754,10 @@ const struct rtw89_mac_size_set rtw89_mac_size = { .ple_size19 = {RTW89_PLE_PG_128, 1904, 16,}, .ple_size20_v1 = {RTW89_PLE_PG_128, 2554, 182, 40960,}, .ple_size22_v1 = {RTW89_PLE_PG_128, 2736, 0, 40960,}, - /* 8852B USB2.0 SCC */ - .ple_size32 = {RTW89_PLE_PG_128, 620, 20,}, - /* 8852B USB3.0 SCC */ - .ple_size33 = {RTW89_PLE_PG_128, 632, 8,}, + /* 8851B USB2.0 SCC turbo */ + .ple_size27 = {RTW89_PLE_PG_128, 1396, 12,}, + /* 8852B USB3.0 SCC turbo */ + .ple_size31 = {RTW89_PLE_PG_128, 1392, 16,}, /* 8852C USB2.0 */ .ple_size34 = {RTW89_PLE_PG_128, 3374, 18,}, /* PCIE 64 */ @@ -1780,8 +1780,8 @@ const struct rtw89_mac_size_set rtw89_mac_size = { .wde_qt18 = {3228, 60, 0, 40,}, .wde_qt19_v1 = {613, 6, 0, 20,}, .wde_qt23 = {958, 48, 0, 16,}, - /* 8852B USB2.0/USB3.0 SCC */ - .wde_qt25 = {152, 2, 0, 8,}, + /* 8852B USB2.0/USB3.0 SCC turbo */ + .wde_qt30 = {210, 2, 0, 8,}, /* 8852C USB2.0 */ .wde_qt31 = {338, 6, 0, 40,}, .ple_qt0 = {320, 320, 32, 16, 13, 13, 292, 292, 64, 18, 1, 4, 0,}, @@ -1799,6 +1799,9 @@ const struct rtw89_mac_size_set rtw89_mac_size = { /* 8852A USB SCC */ .ple_qt25 = {1536, 0, 16, 48, 13, 13, 360, 0, 32, 40, 8, 0,}, .ple_qt26 = {2654, 0, 1134, 48, 64, 13, 1478, 0, 64, 128, 120, 0,}, + /* 8852B USB3.0 SCC turbo */ + .ple_qt27 = {1040, 0, 16, 48, 13, 13, 178, 0, 32, 14, 8, 0,}, + .ple_qt28 = {1040, 0, 32, 48, 43, 13, 208, 0, 62, 14, 24, 0,}, /* USB 52C USB3.0 */ .ple_qt42 = {1068, 0, 16, 48, 4, 13, 178, 0, 16, 1, 8, 16, 0,}, .ple_qt42_v2 = {91, 91, 32, 16, 19, 13, 91, 91, 44, 18, 1, 4, 0, 0,}, @@ -1817,13 +1820,9 @@ const struct rtw89_mac_size_set rtw89_mac_size = { /* PCIE 64 */ .ple_qt58 = {147, 0, 16, 20, 157, 13, 229, 0, 172, 14, 24, 0,}, .ple_qt59 = {147, 0, 32, 20, 1860, 13, 2025, 0, 1879, 14, 24, 0,}, - /* USB2.0 52B SCC */ - .ple_qt72 = {130, 0, 16, 48, 4, 13, 322, 0, 32, 14, 8, 0, 0,}, - /* USB2.0 52B 92K */ - .ple_qt73 = {130, 0, 32, 48, 37, 13, 355, 0, 65, 14, 24, 0, 0,}, - /* USB3.0 52B 92K */ - .ple_qt74 = {286, 0, 16, 48, 4, 13, 178, 0, 32, 14, 8, 0, 0,}, - .ple_qt75 = {286, 0, 32, 48, 37, 13, 211, 0, 65, 14, 24, 0, 0,}, + /* 8851B USB2.0 SCC turbo */ + .ple_qt61 = {858, 0, 16, 48, 4, 13, 370, 0, 32, 14, 8, 0, 0,}, + .ple_qt62 = {858, 0, 32, 48, 37, 13, 403, 0, 65, 14, 24, 0, 0,}, /* USB2.0 52C */ .ple_qt78 = {1560, 0, 16, 48, 13, 13, 390, 0, 32, 38, 8, 16, 0,}, /* USB2.0 52C */ @@ -2004,7 +2003,7 @@ static u32 dle_expected_used_size(struct rtw89_dev *rtwdev, { u32 size = rtwdev->chip->fifo_size; - if (mode == RTW89_QTA_SCC) + if (mode == RTW89_QTA_SCC && rtwdev->hci.type != RTW89_HCI_TYPE_USB) size -= rtwdev->chip->dle_scc_rsvd_size; return size; diff --git a/drivers/net/wireless/realtek/rtw89/mac.h b/drivers/net/wireless/realtek/rtw89/mac.h index e71a71648ab8..e6b715b95409 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.h +++ b/drivers/net/wireless/realtek/rtw89/mac.h @@ -938,7 +938,7 @@ struct rtw89_mac_size_set { const struct rtw89_dle_size wde_size18_v1; const struct rtw89_dle_size wde_size19; const struct rtw89_dle_size wde_size23; - const struct rtw89_dle_size wde_size25; + const struct rtw89_dle_size wde_size30; const struct rtw89_dle_size wde_size31; const struct rtw89_dle_size ple_size0; const struct rtw89_dle_size ple_size1; @@ -953,8 +953,8 @@ struct rtw89_mac_size_set { const struct rtw89_dle_size ple_size19; const struct rtw89_dle_size ple_size20_v1; const struct rtw89_dle_size ple_size22_v1; - const struct rtw89_dle_size ple_size32; - const struct rtw89_dle_size ple_size33; + const struct rtw89_dle_size ple_size27; + const struct rtw89_dle_size ple_size31; const struct rtw89_dle_size ple_size34; const struct rtw89_wde_quota wde_qt0; const struct rtw89_wde_quota wde_qt1; @@ -968,7 +968,7 @@ struct rtw89_mac_size_set { const struct rtw89_wde_quota wde_qt18; const struct rtw89_wde_quota wde_qt19_v1; const struct rtw89_wde_quota wde_qt23; - const struct rtw89_wde_quota wde_qt25; + const struct rtw89_wde_quota wde_qt30; const struct rtw89_wde_quota wde_qt31; const struct rtw89_ple_quota ple_qt0; const struct rtw89_ple_quota ple_qt1; @@ -980,6 +980,8 @@ struct rtw89_mac_size_set { const struct rtw89_ple_quota ple_qt18; const struct rtw89_ple_quota ple_qt25; const struct rtw89_ple_quota ple_qt26; + const struct rtw89_ple_quota ple_qt27; + const struct rtw89_ple_quota ple_qt28; const struct rtw89_ple_quota ple_qt42; const struct rtw89_ple_quota ple_qt42_v2; const struct rtw89_ple_quota ple_qt43; @@ -991,10 +993,8 @@ struct rtw89_mac_size_set { const struct rtw89_ple_quota ple_qt57; const struct rtw89_ple_quota ple_qt58; const struct rtw89_ple_quota ple_qt59; - const struct rtw89_ple_quota ple_qt72; - const struct rtw89_ple_quota ple_qt73; - const struct rtw89_ple_quota ple_qt74; - const struct rtw89_ple_quota ple_qt75; + const struct rtw89_ple_quota ple_qt61; + const struct rtw89_ple_quota ple_qt62; const struct rtw89_ple_quota ple_qt78; const struct rtw89_ple_quota ple_qt79; const struct rtw89_ple_quota ple_qt_52a_wow; diff --git a/drivers/net/wireless/realtek/rtw89/rtw8851b.c b/drivers/net/wireless/realtek/rtw89/rtw8851b.c index d6deb44a685b..b63c0e785209 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8851b.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8851b.c @@ -52,25 +52,25 @@ static const struct rtw89_hfc_param_ini rtw8851b_hfc_param_ini_pcie[] = { }; static const struct rtw89_hfc_ch_cfg rtw8851b_hfc_chcfg_usb[] = { - {18, 152, grp_0}, /* ACH 0 */ - {18, 152, grp_0}, /* ACH 1 */ - {18, 152, grp_0}, /* ACH 2 */ - {18, 152, grp_0}, /* ACH 3 */ + {18, 210, grp_0}, /* ACH 0 */ + {18, 210, grp_0}, /* ACH 1 */ + {18, 210, grp_0}, /* ACH 2 */ + {18, 210, grp_0}, /* ACH 3 */ {0, 0, grp_0}, /* ACH 4 */ {0, 0, grp_0}, /* ACH 5 */ {0, 0, grp_0}, /* ACH 6 */ {0, 0, grp_0}, /* ACH 7 */ - {18, 152, grp_0}, /* B0MGQ */ - {18, 152, grp_0}, /* B0HIQ */ + {18, 210, grp_0}, /* B0MGQ */ + {18, 210, grp_0}, /* B0HIQ */ {0, 0, grp_0}, /* B1MGQ */ {0, 0, grp_0}, /* B1HIQ */ {0, 0, 0} /* FWCMDQ */ }; static const struct rtw89_hfc_pub_cfg rtw8851b_hfc_pubcfg_usb = { - 152, /* Group 0 */ + 210, /* Group 0 */ 0, /* Group 1 */ - 152, /* Public Max */ + 210, /* Public Max */ 0 /* WP threshold */ }; @@ -111,10 +111,10 @@ static const struct rtw89_dle_mem rtw8851b_dle_mem_pcie[] = { }; static const struct rtw89_dle_mem rtw8851b_dle_mem_usb2[] = { - [RTW89_QTA_SCC] = {RTW89_QTA_SCC, &rtw89_mac_size.wde_size25, - &rtw89_mac_size.ple_size32, &rtw89_mac_size.wde_qt25, - &rtw89_mac_size.wde_qt25, &rtw89_mac_size.ple_qt72, - &rtw89_mac_size.ple_qt73}, + [RTW89_QTA_SCC] = {RTW89_QTA_SCC, &rtw89_mac_size.wde_size30, + &rtw89_mac_size.ple_size27, &rtw89_mac_size.wde_qt30, + &rtw89_mac_size.wde_qt30, &rtw89_mac_size.ple_qt61, + &rtw89_mac_size.ple_qt62}, [RTW89_QTA_DLFW] = {RTW89_QTA_DLFW, &rtw89_mac_size.wde_size9, &rtw89_mac_size.ple_size8, &rtw89_mac_size.wde_qt4, &rtw89_mac_size.wde_qt4, &rtw89_mac_size.ple_qt13, @@ -124,10 +124,10 @@ static const struct rtw89_dle_mem rtw8851b_dle_mem_usb2[] = { }; static const struct rtw89_dle_mem rtw8851b_dle_mem_usb3[] = { - [RTW89_QTA_SCC] = {RTW89_QTA_SCC, &rtw89_mac_size.wde_size25, - &rtw89_mac_size.ple_size33, &rtw89_mac_size.wde_qt25, - &rtw89_mac_size.wde_qt25, &rtw89_mac_size.ple_qt74, - &rtw89_mac_size.ple_qt75}, + [RTW89_QTA_SCC] = {RTW89_QTA_SCC, &rtw89_mac_size.wde_size30, + &rtw89_mac_size.ple_size31, &rtw89_mac_size.wde_qt30, + &rtw89_mac_size.wde_qt30, &rtw89_mac_size.ple_qt27, + &rtw89_mac_size.ple_qt28}, [RTW89_QTA_DLFW] = {RTW89_QTA_DLFW, &rtw89_mac_size.wde_size9, &rtw89_mac_size.ple_size8, &rtw89_mac_size.wde_qt4, &rtw89_mac_size.wde_qt4, &rtw89_mac_size.ple_qt13, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852b.c b/drivers/net/wireless/realtek/rtw89/rtw8852b.c index 197e3f5fb21b..393986b297ea 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852b.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852b.c @@ -50,25 +50,25 @@ static const struct rtw89_hfc_param_ini rtw8852b_hfc_param_ini_pcie[] = { }; static const struct rtw89_hfc_ch_cfg rtw8852b_hfc_chcfg_usb[] = { - {18, 152, grp_0}, /* ACH 0 */ - {18, 152, grp_0}, /* ACH 1 */ - {18, 152, grp_0}, /* ACH 2 */ - {18, 152, grp_0}, /* ACH 3 */ + {18, 210, grp_0}, /* ACH 0 */ + {18, 210, grp_0}, /* ACH 1 */ + {18, 210, grp_0}, /* ACH 2 */ + {18, 210, grp_0}, /* ACH 3 */ {0, 0, grp_0}, /* ACH 4 */ {0, 0, grp_0}, /* ACH 5 */ {0, 0, grp_0}, /* ACH 6 */ {0, 0, grp_0}, /* ACH 7 */ - {18, 152, grp_0}, /* B0MGQ */ - {18, 152, grp_0}, /* B0HIQ */ + {18, 210, grp_0}, /* B0MGQ */ + {18, 210, grp_0}, /* B0HIQ */ {0, 0, grp_0}, /* B1MGQ */ {0, 0, grp_0}, /* B1HIQ */ {0, 0, 0} /* FWCMDQ */ }; static const struct rtw89_hfc_pub_cfg rtw8852b_hfc_pubcfg_usb = { - 152, /* Group 0 */ + 210, /* Group 0 */ 0, /* Group 1 */ - 152, /* Public Max */ + 210, /* Public Max */ 0 /* WP threshold */ }; @@ -109,10 +109,10 @@ static const struct rtw89_dle_mem rtw8852b_dle_mem_pcie[] = { }; static const struct rtw89_dle_mem rtw8852b_dle_mem_usb3[] = { - [RTW89_QTA_SCC] = {RTW89_QTA_SCC, &rtw89_mac_size.wde_size25, - &rtw89_mac_size.ple_size33, &rtw89_mac_size.wde_qt25, - &rtw89_mac_size.wde_qt25, &rtw89_mac_size.ple_qt74, - &rtw89_mac_size.ple_qt75}, + [RTW89_QTA_SCC] = {RTW89_QTA_SCC, &rtw89_mac_size.wde_size30, + &rtw89_mac_size.ple_size31, &rtw89_mac_size.wde_qt30, + &rtw89_mac_size.wde_qt30, &rtw89_mac_size.ple_qt27, + &rtw89_mac_size.ple_qt28}, [RTW89_QTA_DLFW] = {RTW89_QTA_DLFW, &rtw89_mac_size.wde_size9, &rtw89_mac_size.ple_size8, &rtw89_mac_size.wde_qt4, &rtw89_mac_size.wde_qt4, &rtw89_mac_size.ple_qt13, From 8341c989ac77d712c7d6e2bce29e8a4bcb2eeae4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Feb 2026 15:11:20 +0000 Subject: [PATCH 0156/1561] net: remove addr_len argument of recvmsg() handlers Use msg->msg_namelen as a place holder instead of a temporary variable, notably in inet[6]_recvmsg(). This removes stack canaries and allows tail-calls. $ scripts/bloat-o-meter -t vmlinux.old vmlinux add/remove: 0/0 grow/shrink: 2/19 up/down: 26/-532 (-506) Function old new delta rawv6_recvmsg 744 767 +23 vsock_dgram_recvmsg 55 58 +3 vsock_connectible_recvmsg 50 47 -3 unix_stream_recvmsg 161 158 -3 unix_seqpacket_recvmsg 62 59 -3 unix_dgram_recvmsg 42 39 -3 tcp_recvmsg 546 543 -3 mptcp_recvmsg 1568 1565 -3 ping_recvmsg 806 800 -6 tcp_bpf_recvmsg_parser 983 974 -9 ip_recv_error 588 576 -12 ipv6_recv_rxpmtu 442 428 -14 udp_recvmsg 1243 1224 -19 ipv6_recv_error 1046 1024 -22 udpv6_recvmsg 1487 1461 -26 raw_recvmsg 465 437 -28 udp_bpf_recvmsg 1027 984 -43 sock_common_recvmsg 103 27 -76 inet_recvmsg 257 175 -82 inet6_recvmsg 257 175 -82 tcp_bpf_recvmsg 663 568 -95 Total: Before=25143834, After=25143328, chg -0.00% Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260227151120.1346573-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- .../chelsio/inline_crypto/chtls/chtls.h | 2 +- .../chelsio/inline_crypto/chtls/chtls_io.c | 8 ++++---- drivers/net/ovpn/tcp.c | 2 +- include/net/inet_common.h | 3 +-- include/net/ip.h | 2 +- include/net/ipv6.h | 6 ++---- include/net/ping.h | 5 ++--- include/net/sock.h | 2 +- include/net/tcp.h | 2 +- net/core/sock.c | 7 +------ net/ieee802154/socket.c | 6 +++--- net/ipv4/af_inet.c | 17 ++++++----------- net/ipv4/ip_sockglue.c | 4 ++-- net/ipv4/ping.c | 9 ++++----- net/ipv4/raw.c | 6 +++--- net/ipv4/tcp.c | 5 ++--- net/ipv4/tcp_bpf.c | 17 ++++++++--------- net/ipv4/udp.c | 9 ++++----- net/ipv4/udp_bpf.c | 16 ++++++++-------- net/ipv4/udp_impl.h | 3 +-- net/ipv6/af_inet6.c | 11 +++-------- net/ipv6/datagram.c | 9 ++++----- net/ipv6/ping.c | 3 +-- net/ipv6/raw.c | 8 ++++---- net/ipv6/udp.c | 10 +++++----- net/ipv6/udp_impl.h | 3 +-- net/l2tp/l2tp_ip.c | 4 ++-- net/l2tp/l2tp_ip6.c | 6 +++--- net/mptcp/protocol.c | 4 ++-- net/phonet/datagram.c | 4 ++-- net/phonet/pep.c | 2 +- net/sctp/socket.c | 12 ++++++------ net/tls/tls.h | 2 +- net/tls/tls_sw.c | 3 +-- net/unix/af_unix.c | 4 ++-- net/unix/unix_bpf.c | 2 +- net/vmw_vsock/af_vsock.c | 4 ++-- net/vmw_vsock/vsock_bpf.c | 2 +- net/xfrm/espintcp.c | 2 +- 39 files changed, 99 insertions(+), 127 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h index 21e0dfeff158..1de5744a49b0 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h @@ -567,7 +567,7 @@ void chtls_shutdown(struct sock *sk, int how); void chtls_destroy_sock(struct sock *sk); int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int chtls_recvmsg(struct sock *sk, struct msghdr *msg, - size_t len, int flags, int *addr_len); + size_t len, int flags); void chtls_splice_eof(struct socket *sock); int send_tx_flowc_wr(struct sock *sk, int compl, u32 snd_nxt, u32 rcv_nxt); diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c index ee19933e2cca..c8e99409a52a 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c @@ -1332,7 +1332,7 @@ static void chtls_cleanup_rbuf(struct sock *sk, int copied) } static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); struct chtls_hws *hws = &csk->tlshws; @@ -1656,7 +1656,7 @@ found_ok_skb: } int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct tcp_sock *tp = tcp_sk(sk); struct chtls_sock *csk; @@ -1670,7 +1670,7 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, buffers_freed = 0; if (unlikely(flags & MSG_OOB)) - return tcp_prot.recvmsg(sk, msg, len, flags, addr_len); + return tcp_prot.recvmsg(sk, msg, len, flags); if (unlikely(flags & MSG_PEEK)) return peekmsg(sk, msg, len, flags); @@ -1684,7 +1684,7 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, csk = rcu_dereference_sk_user_data(sk); if (is_tls_rx(csk)) - return chtls_pt_recvmsg(sk, msg, len, flags, addr_len); + return chtls_pt_recvmsg(sk, msg, len, flags); timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); diff --git a/drivers/net/ovpn/tcp.c b/drivers/net/ovpn/tcp.c index 5499c1572f3e..65054cc84be5 100644 --- a/drivers/net/ovpn/tcp.c +++ b/drivers/net/ovpn/tcp.c @@ -158,7 +158,7 @@ err_nopeer: } static int ovpn_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { int err = 0, off, copied = 0, ret; struct ovpn_socket *sock; diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 5dd2bf24449e..3d747896be30 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -59,8 +59,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int inet_ctl_sock_create(struct sock **sk, unsigned short family, unsigned short type, unsigned char protocol, struct net *net); -int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, - int *addr_len); +int inet_recv_error(struct sock *sk, struct msghdr *msg, int len); struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb); int inet_gro_complete(struct sk_buff *skb, int nhoff); diff --git a/include/net/ip.h b/include/net/ip.h index 69d5cef46004..52264c459357 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -812,7 +812,7 @@ int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *)); -int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len); +int ip_recv_error(struct sock *sk, struct msghdr *msg, int len); void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 53c5056508be..1c0ce5151275 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1129,10 +1129,8 @@ int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr_unsized *addr, int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr); void ip6_datagram_release_cb(struct sock *sk); -int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, - int *addr_len); -int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, - int *addr_len); +int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len); +int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len); void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info); diff --git a/include/net/ping.h b/include/net/ping.h index 05bfd594a64c..bcbdb5a136e3 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -20,8 +20,7 @@ /* Compatibility glue so we can support IPv6 when it's compiled as a module */ struct pingv6_ops { - int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len, - int *addr_len); + int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len); void (*ip6_datagram_recv_common_ctl)(struct sock *sk, struct msghdr *msg, struct sk_buff *skb); @@ -64,7 +63,7 @@ int ping_getfrag(void *from, char *to, int offset, int fraglen, int odd, struct sk_buff *); int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len); + int flags); int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, void *user_icmph, size_t icmph_len); int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); diff --git a/include/net/sock.h b/include/net/sock.h index a7a8b31e9877..6c3f1340e8ef 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1321,7 +1321,7 @@ struct proto { int (*sendmsg)(struct sock *sk, struct msghdr *msg, size_t len); int (*recvmsg)(struct sock *sk, struct msghdr *msg, - size_t len, int flags, int *addr_len); + size_t len, int flags); void (*splice_eof)(struct socket *sock); int (*bind)(struct sock *sk, struct sockaddr_unsized *addr, int addr_len); diff --git a/include/net/tcp.h b/include/net/tcp.h index 9921c47491a5..9cf8785ef0b4 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -500,7 +500,7 @@ void tcp_reset_keepalive_timer(struct sock *sk, unsigned long timeout); void tcp_set_keepalive(struct sock *sk, int val); void tcp_syn_ack_timeout(const struct request_sock *req); int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len); + int flags); int tcp_set_rcvlowat(struct sock *sk, int val); int tcp_set_window_clamp(struct sock *sk, int val); void tcp_update_recv_tstamps(struct sk_buff *skb, diff --git a/net/core/sock.c b/net/core/sock.c index fba4d5b8553c..f4e2ff23d60e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3967,13 +3967,8 @@ int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; - int addr_len = 0; - int err; - err = sk->sk_prot->recvmsg(sk, msg, size, flags, &addr_len); - if (err >= 0) - msg->msg_namelen = addr_len; - return err; + return sk->sk_prot->recvmsg(sk, msg, size, flags); } EXPORT_SYMBOL(sock_common_recvmsg); diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index e542fbe113e7..85dce296d751 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -313,7 +313,7 @@ out: } static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { size_t copied = 0; int err = -EOPNOTSUPP; @@ -703,7 +703,7 @@ out: } static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { size_t copied = 0; int err = -EOPNOTSUPP; @@ -737,7 +737,7 @@ static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, saddr->family = AF_IEEE802154; ieee802154_addr_to_sa(&saddr->addr, &mac_cb(skb)->source); - *addr_len = sizeof(*saddr); + msg->msg_namelen = sizeof(*saddr); } if (ro->want_lqi) { diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8036e76aa1e4..babcd75a08e2 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -877,22 +877,17 @@ void inet_splice_eof(struct socket *sock) EXPORT_SYMBOL_GPL(inet_splice_eof); INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *, struct msghdr *, - size_t, int, int *)); + size_t, int)); int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; - int addr_len = 0; - int err; if (likely(!(flags & MSG_ERRQUEUE))) sock_rps_record_flow(sk); - err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udp_recvmsg, - sk, msg, size, flags, &addr_len); - if (err >= 0) - msg->msg_namelen = addr_len; - return err; + return INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udp_recvmsg, + sk, msg, size, flags); } EXPORT_SYMBOL(inet_recvmsg); @@ -1577,15 +1572,15 @@ __be32 inet_current_timestamp(void) } EXPORT_SYMBOL(inet_current_timestamp); -int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) +int inet_recv_error(struct sock *sk, struct msghdr *msg, int len) { unsigned int family = READ_ONCE(sk->sk_family); if (family == AF_INET) - return ip_recv_error(sk, msg, len, addr_len); + return ip_recv_error(sk, msg, len); #if IS_ENABLED(CONFIG_IPV6) if (family == AF_INET6) - return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len); + return pingv6_ops.ipv6_recv_error(sk, msg, len); #endif return -EINVAL; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 697e18242d6c..a55ef327ec93 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -520,7 +520,7 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk, /* * Handle MSG_ERRQUEUE */ -int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) +int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) { struct sock_exterr_skb *serr; struct sk_buff *skb; @@ -557,7 +557,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr->addr_offset); sin->sin_port = serr->port; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); - *addr_len = sizeof(*sin); + msg->msg_namelen = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 71d5e17719de..92ab0e0f6f71 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -848,8 +848,7 @@ do_confirm: goto out; } -int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, - int *addr_len) +int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags) { struct inet_sock *isk = inet_sk(sk); int family = sk->sk_family; @@ -864,7 +863,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, goto out; if (flags & MSG_ERRQUEUE) - return inet_recv_error(sk, msg, len, addr_len); + return inet_recv_error(sk, msg, len); skb = skb_recv_datagram(sk, flags, &err); if (!skb) @@ -892,7 +891,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, sin->sin_port = 0 /* skb->h.uh->source */; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - *addr_len = sizeof(*sin); + msg->msg_namelen = sizeof(*sin); } if (inet_cmsg_flags(isk)) @@ -913,7 +912,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, inet6_iif(skb)); - *addr_len = sizeof(*sin6); + msg->msg_namelen = sizeof(*sin6); } if (inet6_sk(sk)->rxopt.all) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index e20c41206e29..34859e537b49 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -739,7 +739,7 @@ out: */ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct inet_sock *inet = inet_sk(sk); size_t copied = 0; @@ -751,7 +751,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, goto out; if (flags & MSG_ERRQUEUE) { - err = ip_recv_error(sk, msg, len, addr_len); + err = ip_recv_error(sk, msg, len); goto out; } @@ -777,7 +777,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, sin->sin_addr.s_addr = ip_hdr(skb)->saddr; sin->sin_port = 0; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); - *addr_len = sizeof(*sin); + msg->msg_namelen = sizeof(*sin); } if (inet_cmsg_flags(inet)) ip_cmsg_recv(msg, skb); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f84d9a45cc9d..1790d2fa75ad 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2961,14 +2961,13 @@ recv_sndq: goto out; } -int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, - int *addr_len) +int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags) { int cmsg_flags = 0, ret; struct scm_timestamping_internal tss; if (unlikely(flags & MSG_ERRQUEUE)) - return inet_recv_error(sk, msg, len, addr_len); + return inet_recv_error(sk, msg, len); if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) && diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index c449a044895e..a0215a2da324 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -221,8 +221,7 @@ static bool is_next_msg_fin(struct sk_psock *psock) static int tcp_bpf_recvmsg_parser(struct sock *sk, struct msghdr *msg, size_t len, - int flags, - int *addr_len) + int flags) { int peek = flags & MSG_PEEK; struct sk_psock *psock; @@ -232,14 +231,14 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk, u32 seq; if (unlikely(flags & MSG_ERRQUEUE)) - return inet_recv_error(sk, msg, len, addr_len); + return inet_recv_error(sk, msg, len); if (!len) return 0; psock = sk_psock_get(sk); if (unlikely(!psock)) - return tcp_recvmsg(sk, msg, len, flags, addr_len); + return tcp_recvmsg(sk, msg, len, flags); lock_sock(sk); tcp = tcp_sk(sk); @@ -352,24 +351,24 @@ static int tcp_bpf_ioctl(struct sock *sk, int cmd, int *karg) } static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct sk_psock *psock; int copied, ret; if (unlikely(flags & MSG_ERRQUEUE)) - return inet_recv_error(sk, msg, len, addr_len); + return inet_recv_error(sk, msg, len); if (!len) return 0; psock = sk_psock_get(sk); if (unlikely(!psock)) - return tcp_recvmsg(sk, msg, len, flags, addr_len); + return tcp_recvmsg(sk, msg, len, flags); if (!skb_queue_empty(&sk->sk_receive_queue) && sk_psock_queue_empty(psock)) { sk_psock_put(sk, psock); - return tcp_recvmsg(sk, msg, len, flags, addr_len); + return tcp_recvmsg(sk, msg, len, flags); } lock_sock(sk); msg_bytes_ready: @@ -389,7 +388,7 @@ msg_bytes_ready: goto msg_bytes_ready; release_sock(sk); sk_psock_put(sk, psock); - return tcp_recvmsg(sk, msg, len, flags, addr_len); + return tcp_recvmsg(sk, msg, len, flags); } copied = -EAGAIN; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6c6b68a66dcd..27384024ebc0 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2070,8 +2070,7 @@ EXPORT_IPV6_MOD(udp_read_skb); * return it, otherwise we block. */ -int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, - int *addr_len) +int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags) { struct inet_sock *inet = inet_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); @@ -2082,7 +2081,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, bool checksum_valid = false; if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len, addr_len); + return ip_recv_error(sk, msg, len); try_again: off = sk_peek_offset(sk, flags); @@ -2145,11 +2144,11 @@ try_again: sin->sin_port = udp_hdr(skb)->source; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - *addr_len = sizeof(*sin); + msg->msg_namelen = sizeof(*sin); BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, (struct sockaddr *)sin, - addr_len); + &msg->msg_namelen); } if (udp_test_bit(GRO_ENABLED, sk)) diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index 91233e37cd97..912f0bfef4af 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -12,13 +12,13 @@ static struct proto *udpv6_prot_saved __read_mostly; static int sk_udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) - return udpv6_prot_saved->recvmsg(sk, msg, len, flags, addr_len); + return udpv6_prot_saved->recvmsg(sk, msg, len, flags); #endif - return udp_prot.recvmsg(sk, msg, len, flags, addr_len); + return udp_prot.recvmsg(sk, msg, len, flags); } static bool udp_sk_has_data(struct sock *sk) @@ -61,23 +61,23 @@ static int udp_msg_wait_data(struct sock *sk, struct sk_psock *psock, } static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct sk_psock *psock; int copied, ret; if (unlikely(flags & MSG_ERRQUEUE)) - return inet_recv_error(sk, msg, len, addr_len); + return inet_recv_error(sk, msg, len); if (!len) return 0; psock = sk_psock_get(sk); if (unlikely(!psock)) - return sk_udp_recvmsg(sk, msg, len, flags, addr_len); + return sk_udp_recvmsg(sk, msg, len, flags); if (!psock_has_data(psock)) { - ret = sk_udp_recvmsg(sk, msg, len, flags, addr_len); + ret = sk_udp_recvmsg(sk, msg, len, flags); goto out; } @@ -92,7 +92,7 @@ msg_bytes_ready: if (data) { if (psock_has_data(psock)) goto msg_bytes_ready; - ret = sk_udp_recvmsg(sk, msg, len, flags, addr_len); + ret = sk_udp_recvmsg(sk, msg, len, flags); goto out; } copied = -EAGAIN; diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index c7142213fc21..17a6fa8b1409 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -18,8 +18,7 @@ int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, int udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); -int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, - int *addr_len); +int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags); void udp_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 23cc9b4cb2f1..0b995a961359 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -661,25 +661,20 @@ int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) } INDIRECT_CALLABLE_DECLARE(int udpv6_recvmsg(struct sock *, struct msghdr *, - size_t, int, int *)); + size_t, int)); int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; const struct proto *prot; - int addr_len = 0; - int err; if (likely(!(flags & MSG_ERRQUEUE))) sock_rps_record_flow(sk); /* IPV6_ADDRFORM can change sk->sk_prot under us. */ prot = READ_ONCE(sk->sk_prot); - err = INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udpv6_recvmsg, - sk, msg, size, flags, &addr_len); - if (err >= 0) - msg->msg_namelen = addr_len; - return err; + return INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udpv6_recvmsg, + sk, msg, size, flags); } const struct proto_ops inet6_stream_ops = { diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index c564b68a0562..3cb84022a461 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -452,7 +452,7 @@ static bool ip6_datagram_support_cmsg(struct sk_buff *skb, /* * Handle MSG_ERRQUEUE */ -int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) +int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; @@ -503,7 +503,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) &sin->sin6_addr); sin->sin6_scope_id = 0; } - *addr_len = sizeof(*sin); + msg->msg_namelen = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); @@ -545,8 +545,7 @@ EXPORT_SYMBOL_GPL(ipv6_recv_error); /* * Handle IPV6_RECVPATHMTU */ -int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, - int *addr_len) +int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; @@ -579,7 +578,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, sin->sin6_port = 0; sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id; sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr; - *addr_len = sizeof(*sin); + msg->msg_namelen = sizeof(*sin); } put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index e4afc651731a..6e90d0bf9f3d 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -24,8 +24,7 @@ #include /* Compatibility glue so we can support IPv6 when it's compiled as a module */ -static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, - int *addr_len) +static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) { return -EAFNOSUPPORT; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 27a268059168..0ac704691100 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -432,7 +432,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) */ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct ipv6_pinfo *np = inet6_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); @@ -444,10 +444,10 @@ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, return -EOPNOTSUPP; if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len, addr_len); + return ipv6_recv_error(sk, msg, len); if (np->rxopt.bits.rxpmtu && READ_ONCE(np->rxpmtu)) - return ipv6_recv_rxpmtu(sk, msg, len, addr_len); + return ipv6_recv_rxpmtu(sk, msg, len); skb = skb_recv_datagram(sk, flags, &err); if (!skb) @@ -481,7 +481,7 @@ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, inet6_iif(skb)); - *addr_len = sizeof(*sin6); + msg->msg_namelen = sizeof(*sin6); } sock_recv_cmsgs(msg, sk, skb); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 48f73401adf4..5a3984e59c90 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -465,7 +465,7 @@ static int udp6_skb_len(struct sk_buff *skb) */ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); @@ -478,10 +478,10 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int is_udp4; if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len, addr_len); + return ipv6_recv_error(sk, msg, len); if (np->rxopt.bits.rxpmtu && READ_ONCE(np->rxpmtu)) - return ipv6_recv_rxpmtu(sk, msg, len, addr_len); + return ipv6_recv_rxpmtu(sk, msg, len); try_again: off = sk_peek_offset(sk, flags); @@ -553,11 +553,11 @@ try_again: ipv6_iface_scope_id(&sin6->sin6_addr, inet6_iif(skb)); } - *addr_len = sizeof(*sin6); + msg->msg_namelen = sizeof(*sin6); BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, (struct sockaddr *)sin6, - addr_len); + &msg->msg_namelen); } if (udp_test_bit(GRO_ENABLED, sk)) diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 8a406be25a3a..1bd4a573e1bb 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -22,8 +22,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname, int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); -int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, - int *addr_len); +int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags); void udpv6_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index cac1ff59cb83..acb2570c11f6 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -537,7 +537,7 @@ no_route: } static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg, - size_t len, int flags, int *addr_len) + size_t len, int flags) { struct inet_sock *inet = inet_sk(sk); size_t copied = 0; @@ -570,7 +570,7 @@ static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg, sin->sin_addr.s_addr = ip_hdr(skb)->saddr; sin->sin_port = 0; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); - *addr_len = sizeof(*sin); + msg->msg_namelen = sizeof(*sin); } if (inet_cmsg_flags(inet)) ip_cmsg_recv(msg, skb); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 05a396ba6a3e..bdaae1b64d25 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -679,7 +679,7 @@ do_confirm: } static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct ipv6_pinfo *np = inet6_sk(sk); DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name); @@ -691,7 +691,7 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, goto out; if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len, addr_len); + return ipv6_recv_error(sk, msg, len); skb = skb_recv_datagram(sk, flags, &err); if (!skb) @@ -719,7 +719,7 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, lsa->l2tp_conn_id = 0; if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL) lsa->l2tp_scope_id = inet6_iif(skb); - *addr_len = sizeof(*lsa); + msg->msg_namelen = sizeof(*lsa); } if (np->rxopt.all) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index cf1852b99963..3da3da2c81b1 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2259,7 +2259,7 @@ static unsigned int mptcp_inq_hint(const struct sock *sk) } static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct mptcp_sock *msk = mptcp_sk(sk); struct scm_timestamping_internal tss; @@ -2269,7 +2269,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, /* MSG_ERRQUEUE is really a no-op till we support IP_RECVERR */ if (unlikely(flags & MSG_ERRQUEUE)) - return inet_recv_error(sk, msg, len, addr_len); + return inet_recv_error(sk, msg, len); lock_sock(sk); if (unlikely(sk->sk_state == TCP_LISTEN)) { diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index 976fe250b509..22cf23f06832 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -109,7 +109,7 @@ static int pn_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct sk_buff *skb = NULL; struct sockaddr_pn sa; @@ -143,7 +143,7 @@ static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (msg->msg_name != NULL) { __sockaddr_check_size(sizeof(sa)); memcpy(msg->msg_name, &sa, sizeof(sa)); - *addr_len = sizeof(sa); + msg->msg_namelen = sizeof(sa); } out: diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 120e711ea78c..4dbf0914df7d 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -1262,7 +1262,7 @@ struct sk_buff *pep_read(struct sock *sk) } static int pep_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct sk_buff *skb; int err; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 05fb00c9c335..d2665bbd41a2 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2087,7 +2087,7 @@ static int sctp_skb_pull(struct sk_buff *skb, int len) * 5 for complete description of the flags. */ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct sctp_ulpevent *event = NULL; struct sctp_sock *sp = sctp_sk(sk); @@ -2096,11 +2096,11 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int err = 0; int skb_len; - pr_debug("%s: sk:%p, msghdr:%p, len:%zd, flags:0x%x, addr_len:%p)\n", - __func__, sk, msg, len, flags, addr_len); + pr_debug("%s: sk:%p, msghdr:%p, len:%zd, flags:0x%x)\n", + __func__, sk, msg, len, flags); if (unlikely(flags & MSG_ERRQUEUE)) - return inet_recv_error(sk, msg, len, addr_len); + return inet_recv_error(sk, msg, len); if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue)) @@ -2141,9 +2141,9 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, sock_recv_cmsgs(msg, sk, head_skb); if (sctp_ulpevent_is_notification(event)) { msg->msg_flags |= MSG_NOTIFICATION; - sp->pf->event_msgname(event, msg->msg_name, addr_len); + sp->pf->event_msgname(event, msg->msg_name, &msg->msg_namelen); } else { - sp->pf->skb_msgname(head_skb, msg->msg_name, addr_len); + sp->pf->skb_msgname(head_skb, msg->msg_name, &msg->msg_namelen); } /* Check if we allow SCTP_NXTINFO. */ diff --git a/net/tls/tls.h b/net/tls/tls.h index 2f86baeb71fc..e8f81a006520 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -161,7 +161,7 @@ void tls_sw_free_resources_rx(struct sock *sk); void tls_sw_release_resources_rx(struct sock *sk); void tls_sw_free_ctx_rx(struct tls_context *tls_ctx); int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len); + int flags); bool tls_sw_sock_is_readable(struct sock *sk); ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 5fe07f110fe8..a656ce235758 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2031,8 +2031,7 @@ static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx) int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, - int *addr_len) + int flags) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3756a93dc63a..3a7e00c063c3 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2665,7 +2665,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t si const struct proto *prot = READ_ONCE(sk->sk_prot); if (prot != &unix_dgram_proto) - return prot->recvmsg(sk, msg, size, flags, NULL); + return prot->recvmsg(sk, msg, size, flags); #endif return __unix_dgram_recvmsg(sk, msg, size, flags); } @@ -3139,7 +3139,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, const struct proto *prot = READ_ONCE(sk->sk_prot); if (prot != &unix_stream_proto) - return prot->recvmsg(sk, msg, size, flags, NULL); + return prot->recvmsg(sk, msg, size, flags); #endif return unix_stream_read_generic(&state, true); } diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index e0d30d6d22ac..d14cd5454a8d 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -49,7 +49,7 @@ static int __unix_recvmsg(struct sock *sk, struct msghdr *msg, } static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg, - size_t len, int flags, int *addr_len) + size_t len, int flags) { struct unix_sock *u = unix_sk(sk); struct sk_psock *psock; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 2f7d94d682cb..f0ab2f13e9db 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1502,7 +1502,7 @@ int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, prot = READ_ONCE(sk->sk_prot); if (prot != &vsock_proto) - return prot->recvmsg(sk, msg, len, flags, NULL); + return prot->recvmsg(sk, msg, len, flags); #endif return __vsock_dgram_recvmsg(sock, msg, len, flags); @@ -2575,7 +2575,7 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, prot = READ_ONCE(sk->sk_prot); if (prot != &vsock_proto) - return prot->recvmsg(sk, msg, len, flags, NULL); + return prot->recvmsg(sk, msg, len, flags); #endif return __vsock_connectible_recvmsg(sock, msg, len, flags); diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c index 07b96d56f3a5..9049d2648646 100644 --- a/net/vmw_vsock/vsock_bpf.c +++ b/net/vmw_vsock/vsock_bpf.c @@ -74,7 +74,7 @@ static int __vsock_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int } static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, - size_t len, int flags, int *addr_len) + size_t len, int flags) { struct sk_psock *psock; struct vsock_sock *vsk; diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index e1b11ab59f6e..998832419097 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -133,7 +133,7 @@ static int espintcp_parse(struct strparser *strp, struct sk_buff *skb) } static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct espintcp_ctx *ctx = espintcp_getctx(sk); struct sk_buff *skb; From c95323ea9dfba8ab2bafbaee9da96c53f893873f Mon Sep 17 00:00:00 2001 From: Ching-Te Ku Date: Fri, 13 Feb 2026 13:34:45 +0800 Subject: [PATCH 0157/1561] wifi: rtw88: coex: Solve LE-HID lag & update coex version to 26020420 When Wi-Fi enters power save, the register value can not be read correctly. If mechanism take the wrong information to make decision, it will run with wrong parameters. It leads Bluetooth low-energy HID lag. Add logic to isolate the wrong register state. BTCOEX Version: 26020420-2020 Desired_BT_Coex_Ver: 0x20 Desired_WL_FW_Ver: 9.9.X Signed-off-by: Ching-Te Ku Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260213053445.19384-1-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw88/coex.c | 44 ++++++++++++------- drivers/net/wireless/realtek/rtw88/main.h | 1 + drivers/net/wireless/realtek/rtw88/rtw8703b.c | 5 +++ drivers/net/wireless/realtek/rtw88/rtw8723d.c | 5 +++ drivers/net/wireless/realtek/rtw88/rtw8821a.c | 7 ++- drivers/net/wireless/realtek/rtw88/rtw8821c.c | 7 ++- drivers/net/wireless/realtek/rtw88/rtw8822b.c | 5 +++ drivers/net/wireless/realtek/rtw88/rtw8822c.c | 5 ++- 8 files changed, 61 insertions(+), 18 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/coex.c b/drivers/net/wireless/realtek/rtw88/coex.c index b4dc6ff2c175..ee4007fe6c18 100644 --- a/drivers/net/wireless/realtek/rtw88/coex.c +++ b/drivers/net/wireless/realtek/rtw88/coex.c @@ -485,6 +485,13 @@ static void rtw_coex_monitor_bt_ctr(struct rtw_dev *rtwdev) "[BTCoex], Hi-Pri Rx/Tx: %d/%d, Lo-Pri Rx/Tx: %d/%d\n", coex_stat->hi_pri_rx, coex_stat->hi_pri_tx, coex_stat->lo_pri_rx, coex_stat->lo_pri_tx); + + if (coex_stat->wl_under_lps || coex_stat->wl_under_ips || + (coex_stat->hi_pri_rx > 60000 && coex_stat->hi_pri_tx == 60000 && + coex_stat->lo_pri_rx > 60000 && coex_stat->lo_pri_tx == 60000)) + coex_stat->bt_ctr_ok = false; + else + coex_stat->bt_ctr_ok = true; } static void rtw_coex_monitor_bt_enable(struct rtw_dev *rtwdev) @@ -1959,14 +1966,18 @@ static void rtw_coex_action_bt_hid(struct rtw_dev *rtwdev) struct rtw_coex *coex = &rtwdev->coex; struct rtw_coex_stat *coex_stat = &coex->stat; struct rtw_efuse *efuse = &rtwdev->efuse; + bool is_bt_ctr_hi = false, is_toggle_table = false; u8 table_case, tdma_case; u32 slot_type = 0; - bool bt_multi_link_remain = false, is_toggle_table = false; rtw_dbg(rtwdev, RTW_DBG_COEX, "[BTCoex], %s()\n", __func__); rtw_coex_set_ant_path(rtwdev, false, COEX_SET_ANT_2G); rtw_coex_set_rf_para(rtwdev, chip->wl_rf_para_rx[0]); + if (coex_stat->bt_ctr_ok && + coex_stat->lo_pri_rx + coex_stat->lo_pri_tx > 360) + is_bt_ctr_hi = true; + if (efuse->share_ant) { /* Shared-Ant */ if (coex_stat->bt_ble_exist) { @@ -1980,28 +1991,31 @@ static void rtw_coex_action_bt_hid(struct rtw_dev *rtwdev) } } else { /* Legacy HID */ - if (coex_stat->bt_profile_num == 1 && - (coex_stat->bt_multi_link || - (coex_stat->lo_pri_rx + - coex_stat->lo_pri_tx > 360) || - coex_stat->bt_slave || - bt_multi_link_remain)) { - slot_type = TDMA_4SLOT; - table_case = 12; - tdma_case = 20; - } else if (coex_stat->bt_a2dp_active) { + if (coex_stat->bt_a2dp_active) { table_case = 9; tdma_case = 18; + } else if (coex_stat->bt_profile_num == 1 && + (coex_stat->bt_multi_link && + (is_bt_ctr_hi || coex_stat->bt_slave || + coex_stat->bt_multi_link_remain))) { + if (coex_stat->wl_gl_busy && + (coex_stat->wl_rx_rate <= 3 || + coex_stat->wl_rts_rx_rate <= 3)) + table_case = 13; + else + table_case = 12; + + tdma_case = 26; } else if (coex_stat->bt_418_hid_exist && coex_stat->wl_gl_busy) { is_toggle_table = true; slot_type = TDMA_4SLOT; - table_case = 9; - tdma_case = 24; + table_case = 32; + tdma_case = 27; } else if (coex_stat->bt_ble_hid_exist && coex_stat->wl_gl_busy) { - table_case = 32; - tdma_case = 9; + table_case = 36; + tdma_case = 0; } else { table_case = 9; tdma_case = 9; diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h index 1ab70214ce36..1179231a672d 100644 --- a/drivers/net/wireless/realtek/rtw88/main.h +++ b/drivers/net/wireless/realtek/rtw88/main.h @@ -1475,6 +1475,7 @@ struct rtw_coex_stat { bool bt_game_hid_exist; bool bt_hid_handle_cnt; bool bt_mailbox_reply; + bool bt_ctr_ok; bool wl_under_lps; bool wl_under_ips; diff --git a/drivers/net/wireless/realtek/rtw88/rtw8703b.c b/drivers/net/wireless/realtek/rtw88/rtw8703b.c index 821c28d9cb5d..b5e7ae7ebd95 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8703b.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8703b.c @@ -1794,6 +1794,11 @@ static const struct coex_table_para table_sant_8703b[] = { {0x66556aaa, 0x6a5a6aaa}, /* case-30 */ {0xffffffff, 0x5aaa5aaa}, {0x56555555, 0x5a5a5aaa}, + {0xdaffdaff, 0xdaffdaff}, + {0xddffddff, 0xddffddff}, + {0xe5555555, 0xe5555555}, /* case-35 */ + {0xea5a5a5a, 0xea5a5a5a}, + {0xea6a6a6a, 0xea6a6a6a}, }; /* Shared-Antenna TDMA */ diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c index 8715e0435f17..a2b3e7a2ad99 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c @@ -1459,6 +1459,11 @@ static const struct coex_table_para table_sant_8723d[] = { {0x66556aaa, 0x6a5a6aaa}, /* case-30 */ {0xffffffff, 0x5aaa5aaa}, {0x56555555, 0x5a5a5aaa}, + {0xdaffdaff, 0xdaffdaff}, + {0xddffddff, 0xddffddff}, + {0xe5555555, 0xe5555555}, /* case-35 */ + {0xea5a5a5a, 0xea5a5a5a}, + {0xea6a6a6a, 0xea6a6a6a}, }; /* Non-Shared-Antenna Coex Table */ diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821a.c b/drivers/net/wireless/realtek/rtw88/rtw8821a.c index 414b77eef07c..cab85203b828 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821a.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8821a.c @@ -998,7 +998,12 @@ static const struct coex_table_para table_sant_8821a[] = { {0x66556655, 0x66556655}, {0x66556aaa, 0x6a5a6aaa}, /* case-30 */ {0xffffffff, 0x5aaa5aaa}, - {0x56555555, 0x5a5a5aaa} + {0x56555555, 0x5a5a5aaa}, + {0xdaffdaff, 0xdaffdaff}, + {0xddffddff, 0xddffddff}, + {0xe5555555, 0xe5555555}, /* case-35 */ + {0xea5a5a5a, 0xea5a5a5a}, + {0xea6a6a6a, 0xea6a6a6a}, }; /* Non-Shared-Antenna Coex Table */ diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c index 2078b067562e..246046da4f13 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c @@ -1727,7 +1727,12 @@ static const struct coex_table_para table_sant_8821c[] = { {0x66556655, 0x66556655}, {0x66556aaa, 0x6a5a6aaa}, /* case-30 */ {0xffffffff, 0x5aaa5aaa}, - {0x56555555, 0x5a5a5aaa} + {0x56555555, 0x5a5a5aaa}, + {0xdaffdaff, 0xdaffdaff}, + {0xddffddff, 0xddffddff}, + {0xe5555555, 0xe5555555}, /* case-35 */ + {0xea5a5a5a, 0xea5a5a5a}, + {0xea6a6a6a, 0xea6a6a6a}, }; /* Non-Shared-Antenna Coex Table */ diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822b.c b/drivers/net/wireless/realtek/rtw88/rtw8822b.c index 4d88cc2f4148..e9e8a7f3f382 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8822b.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8822b.c @@ -2217,6 +2217,11 @@ static const struct coex_table_para table_sant_8822b[] = { {0x66556aaa, 0x6a5a6aaa}, /* case-30 */ {0xffffffff, 0x5aaa5aaa}, {0x56555555, 0x5a5a5aaa}, + {0xdaffdaff, 0xdaffdaff}, + {0xddffddff, 0xddffddff}, + {0xe5555555, 0xe5555555}, /* case-35 */ + {0xea5a5a5a, 0xea5a5a5a}, + {0xea6a6a6a, 0xea6a6a6a}, }; /* Non-Shared-Antenna Coex Table */ diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c index 28c121cf1e68..244c8026479c 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c @@ -5035,6 +5035,9 @@ static const struct coex_table_para table_sant_8822c[] = { {0x56555555, 0x5a5a5aaa}, {0xdaffdaff, 0xdaffdaff}, {0xddffddff, 0xddffddff}, + {0xe5555555, 0xe5555555}, /* case-35 */ + {0xea5a5a5a, 0xea5a5a5a}, + {0xea6a6a6a, 0xea6a6a6a}, }; /* Non-Shared-Antenna Coex Table */ @@ -5401,7 +5404,7 @@ const struct rtw_chip_info rtw8822c_hw_spec = { .max_sched_scan_ssids = 4, #endif .max_scan_ie_len = (RTW_PROBE_PG_CNT - 1) * TX_PAGE_SIZE, - .coex_para_ver = 0x22020720, + .coex_para_ver = 0x26020420, .bt_desired_ver = 0x20, .scbd_support = true, .new_scbd10_def = true, From 27c4ab943882c8ca7d3c56241cd969da6b5e6727 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 27 Feb 2026 09:13:03 -0800 Subject: [PATCH 0158/1561] selftests: drv-net: iou-zcrx: wait for memory provider cleanup io_uring defers zcrx context teardown to the iou_exit workqueue. # ps aux | grep iou ... 07:58 0:00 [kworker/u19:0-iou_exit] ... 07:58 0:00 [kworker/u18:2-iou_exit] When the test's receiver process exits, bkg() returns but the memory provider may still be attached to the rx queue. The subsequent defer() that restores tcp-data-split then fails: # Exception while handling defer / cleanup (callback 3 of 3)! # Defer Exception| net.ynl.pyynl.lib.ynl.NlError: Netlink error: can't disable tcp-data-split while device has memory provider enabled: Invalid argument not ok 1 iou-zcrx.test_zcrx.single Add a helper that polls netdev queue-get until no rx queue reports the io-uring memory provider attribute. Register it as a defer() just before tcp-data-split is restored as a "barrier". Reviewed-by: Dragos Tatulea Link: https://patch.msgid.link/20260227171305.2848240-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/hw/iou-zcrx.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py index c63d6d6450d2..c27c2064701d 100755 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -2,14 +2,27 @@ # SPDX-License-Identifier: GPL-2.0 import re +import time from os import path from lib.py import ksft_run, ksft_exit, KsftSkipEx, ksft_variants, KsftNamedVariant from lib.py import NetDrvEpEnv from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen -from lib.py import EthtoolFamily +from lib.py import EthtoolFamily, NetdevFamily SKIP_CODE = 42 + +def mp_clear_wait(cfg): + """Wait for io_uring memory providers to clear from all device queues.""" + deadline = time.time() + 5 + while time.time() < deadline: + queues = cfg.netnl.queue_get({'ifindex': cfg.ifindex}, dump=True) + if not any('io-uring' in q for q in queues): + return + time.sleep(0.1) + raise TimeoutError("Timed out waiting for memory provider to clear") + + def create_rss_ctx(cfg): output = ethtool(f"-X {cfg.ifname} context new start {cfg.target} equal 1").stdout values = re.search(r'New RSS context is (\d+)', output).group(1) @@ -46,6 +59,7 @@ def single(cfg): 'tcp-data-split': 'unknown', 'hds-thresh': hds_thresh, 'rx': rx_rings}) + defer(mp_clear_wait, cfg) cfg.target = channels - 1 ethtool(f"-X {cfg.ifname} equal {cfg.target}") @@ -73,6 +87,7 @@ def rss(cfg): 'tcp-data-split': 'unknown', 'hds-thresh': hds_thresh, 'rx': rx_rings}) + defer(mp_clear_wait, cfg) cfg.target = channels - 1 ethtool(f"-X {cfg.ifname} equal {cfg.target}") @@ -159,6 +174,7 @@ def main() -> None: cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() cfg.port = rand_port() ksft_run(globs=globals(), cases=[test_zcrx, test_zcrx_oneshot], args=(cfg, )) ksft_exit() From 67792dde27a66fa6dc262d5e2ee8260096a9deb3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 27 Feb 2026 09:13:04 -0800 Subject: [PATCH 0159/1561] selftests: drv-net: iou-zcrx: rework large chunks test to use common setup Commit a32bb32d0193 ("selftests: iou-zcrx: test large chunk sizes") and commit de7c600e2d5b ("selftests/net: parametrise iou-zcrx.py with ksft_variants") landed at similar time. The large chunks test was actually not included in the list of tests, so it never run. We haven't noticed that it uses the old-style helpers (_get_combined_channels, _get_current_settings, _set_flow_rule) that were removed by the other commit. Rework test_zcrx_large_chunks to reuse the single() setup function and add it to the ksft_run cases list so it actually gets executed. Link: https://patch.msgid.link/20260227171305.2848240-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/hw/iou-zcrx.py | 31 ++++--------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py index c27c2064701d..1649c23e05e2 100755 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -135,36 +135,16 @@ def test_zcrx_large_chunks(cfg) -> None: cfg.require_ipver('6') - combined_chans = _get_combined_channels(cfg) - if combined_chans < 2: - raise KsftSkipEx('at least 2 combined channels required') - (rx_ring, hds_thresh) = _get_current_settings(cfg) - port = rand_port() - - ethtool(f"-G {cfg.ifname} tcp-data-split on") - defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto") - - ethtool(f"-G {cfg.ifname} hds-thresh 0") - defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}") - - ethtool(f"-G {cfg.ifname} rx 64") - defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}") - - ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}") - defer(ethtool, f"-X {cfg.ifname} default") - - flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1) - defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") - - rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -x 2" - tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840" + single(cfg) + rx_cmd = f"{cfg.bin_local} -s -p {cfg.port} -i {cfg.ifname} -q {cfg.target} -x 2" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {cfg.port} -l 12840" probe = cmd(rx_cmd + " -d", fail=False) if probe.ret == SKIP_CODE: raise KsftSkipEx(probe.stdout) with bkg(rx_cmd, exit_wait=True): - wait_port_listen(port, proto="tcp") + wait_port_listen(cfg.port, proto="tcp") cmd(tx_cmd, host=cfg.remote) @@ -176,7 +156,8 @@ def main() -> None: cfg.ethnl = EthtoolFamily() cfg.netnl = NetdevFamily() cfg.port = rand_port() - ksft_run(globs=globals(), cases=[test_zcrx, test_zcrx_oneshot], args=(cfg, )) + ksft_run(globs=globals(), cases=[test_zcrx, test_zcrx_oneshot, + test_zcrx_large_chunks], args=(cfg, )) ksft_exit() From c7b228418e8b34282d266ffeef59996434a73b32 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 27 Feb 2026 09:13:05 -0800 Subject: [PATCH 0160/1561] selftests: drv-net: iou-zcrx: allocate hugepages for large chunks test The large chunks test needs 2MB hugepages for its mmap allocation, but the test system may not have any pre-allocated. Ensure at least 64 hugepages are available before running the test, and restore the original value on cleanup. While at it strip the stdout, it has a trailing new line. Before: ok 5 iou-zcrx.test_zcrx_large_chunks # SKIP Can't allocate huge pages Link: https://patch.msgid.link/20260227171305.2848240-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/iou-zcrx.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py index 1649c23e05e2..66dd496ec5cf 100755 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -135,13 +135,21 @@ def test_zcrx_large_chunks(cfg) -> None: cfg.require_ipver('6') + hp_file = "/proc/sys/vm/nr_hugepages" + with open(hp_file, 'r+', encoding='utf-8') as f: + nr_hugepages = int(f.read().strip()) + if nr_hugepages < 64: + f.seek(0) + f.write("64") + defer(lambda: open(hp_file, 'w', encoding='utf-8').write(str(nr_hugepages))) + single(cfg) rx_cmd = f"{cfg.bin_local} -s -p {cfg.port} -i {cfg.ifname} -q {cfg.target} -x 2" tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {cfg.port} -l 12840" probe = cmd(rx_cmd + " -d", fail=False) if probe.ret == SKIP_CODE: - raise KsftSkipEx(probe.stdout) + raise KsftSkipEx(probe.stdout.strip()) with bkg(rx_cmd, exit_wait=True): wait_port_listen(cfg.port, proto="tcp") From abaa59fb24cad1bfb4aeb3b2a152b5b4eb90a2d3 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 13 Feb 2026 14:15:41 +0800 Subject: [PATCH 0161/1561] wifi: rtw89: fw: add fw_def struct to put firmware name and format version The RTL8922DE has a RTL8922DE-VS variant which uses different firmware name and format version, and the rule to select firmware type will be needed to extend. Prepare for coming patches. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260213061552.29997-2-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.c | 3 ++- drivers/net/wireless/realtek/rtw89/core.h | 25 +++++++++++++++++-- drivers/net/wireless/realtek/rtw89/fw.c | 10 +++++--- drivers/net/wireless/realtek/rtw89/fw.h | 1 + drivers/net/wireless/realtek/rtw89/rtw8851b.c | 6 +++-- drivers/net/wireless/realtek/rtw89/rtw8852a.c | 6 +++-- drivers/net/wireless/realtek/rtw89/rtw8852b.c | 6 +++-- .../net/wireless/realtek/rtw89/rtw8852bt.c | 6 +++-- drivers/net/wireless/realtek/rtw89/rtw8852c.c | 6 +++-- drivers/net/wireless/realtek/rtw89/rtw8922a.c | 7 ++++-- 10 files changed, 57 insertions(+), 19 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c index 36e988277b2b..9d9b91570989 100644 --- a/drivers/net/wireless/realtek/rtw89/core.c +++ b/drivers/net/wireless/realtek/rtw89/core.c @@ -6753,7 +6753,8 @@ struct rtw89_dev *rtw89_alloc_ieee80211_hw(struct device *device, bool support_mlo; bool no_chanctx; - firmware = rtw89_early_fw_feature_recognize(device, chip, &early_fw, &fw_format); + firmware = rtw89_early_fw_feature_recognize(device, chip, variant, + &early_fw, &fw_format); ops = kmemdup(&rtw89_ops, sizeof(rtw89_ops), GFP_KERNEL); if (!ops) diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index 4778957d6b2d..5cb8aacf7644 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -4152,6 +4152,11 @@ struct rtw89_reg_imr { u32 set; }; +struct rtw89_fw_def { + const char *fw_basename; + u8 fw_format_max; +}; + struct rtw89_phy_table { const struct rtw89_reg2_def *regs; u32 n_regs; @@ -4494,8 +4499,7 @@ struct rtw89_chip_info { const struct rtw89_chip_ops *ops; const struct rtw89_mac_gen_def *mac_def; const struct rtw89_phy_gen_def *phy_def; - const char *fw_basename; - u8 fw_format_max; + struct rtw89_fw_def fw_def; bool try_ce_fw; u8 bbmcu_nr; u32 needed_fw_elms; @@ -4633,6 +4637,7 @@ struct rtw89_chip_info { struct rtw89_chip_variant { bool no_mcs_12_13: 1; u32 fw_min_ver_code; + const struct rtw89_fw_def *fw_def_override; }; union rtw89_bus_info { @@ -7379,6 +7384,22 @@ void rtw89_chip_calc_rx_gain_normal(struct rtw89_dev *rtwdev, chip->ops->calc_rx_gain_normal(rtwdev, chan, path, phy_idx, calc); } +static inline const struct rtw89_fw_def * +__rtw89_chip_get_fw_def(const struct rtw89_chip_info *chip, + const struct rtw89_chip_variant *variant) +{ + if (variant && variant->fw_def_override) + return variant->fw_def_override; + + return &chip->fw_def; +} + +static inline +const struct rtw89_fw_def *rtw89_chip_get_fw_def(struct rtw89_dev *rtwdev) +{ + return __rtw89_chip_get_fw_def(rtwdev->chip, rtwdev->variant); +} + static inline void rtw89_load_txpwr_table(struct rtw89_dev *rtwdev, const struct rtw89_txpwr_table *tbl) { diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index c52f9e11a8b2..aa42e5c60eb7 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -965,18 +965,20 @@ static void rtw89_fw_recognize_features(struct rtw89_dev *rtwdev) const struct firmware * rtw89_early_fw_feature_recognize(struct device *device, const struct rtw89_chip_info *chip, + const struct rtw89_chip_variant *variant, struct rtw89_fw_info *early_fw, int *used_fw_format) { + const struct rtw89_fw_def *fw_def = __rtw89_chip_get_fw_def(chip, variant); const struct firmware *firmware; char fw_name[64]; int fw_format; u32 ver_code; int ret; - for (fw_format = chip->fw_format_max; fw_format >= 0; fw_format--) { + for (fw_format = fw_def->fw_format_max; fw_format >= 0; fw_format--) { rtw89_fw_get_filename(fw_name, sizeof(fw_name), - chip->fw_basename, fw_format); + fw_def->fw_basename, fw_format); ret = request_firmware(&firmware, fw_name, device); if (!ret) { @@ -2024,11 +2026,11 @@ void rtw89_load_firmware_work(struct work_struct *work) { struct rtw89_dev *rtwdev = container_of(work, struct rtw89_dev, load_firmware_work); - const struct rtw89_chip_info *chip = rtwdev->chip; + const struct rtw89_fw_def *fw_def = rtw89_chip_get_fw_def(rtwdev); char fw_name[64]; rtw89_fw_get_filename(fw_name, sizeof(fw_name), - chip->fw_basename, rtwdev->fw.fw_format); + fw_def->fw_basename, rtwdev->fw.fw_format); rtw89_load_firmware_req(rtwdev, &rtwdev->fw.req, fw_name, false); } diff --git a/drivers/net/wireless/realtek/rtw89/fw.h b/drivers/net/wireless/realtek/rtw89/fw.h index d45b6ea6ea1b..c60d419616d6 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.h +++ b/drivers/net/wireless/realtek/rtw89/fw.h @@ -5171,6 +5171,7 @@ int rtw89_fw_recognize_elements(struct rtw89_dev *rtwdev); const struct firmware * rtw89_early_fw_feature_recognize(struct device *device, const struct rtw89_chip_info *chip, + const struct rtw89_chip_variant *variant, struct rtw89_fw_info *early_fw, int *used_fw_format); int rtw89_fw_download(struct rtw89_dev *rtwdev, enum rtw89_fw_type type, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8851b.c b/drivers/net/wireless/realtek/rtw89/rtw8851b.c index b63c0e785209..fdb053fabae6 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8851b.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8851b.c @@ -2580,8 +2580,10 @@ const struct rtw89_chip_info rtw8851b_chip_info = { .ops = &rtw8851b_chip_ops, .mac_def = &rtw89_mac_gen_ax, .phy_def = &rtw89_phy_gen_ax, - .fw_basename = RTW8851B_FW_BASENAME, - .fw_format_max = RTW8851B_FW_FORMAT_MAX, + .fw_def = { + .fw_basename = RTW8851B_FW_BASENAME, + .fw_format_max = RTW8851B_FW_FORMAT_MAX, + }, .try_ce_fw = true, .bbmcu_nr = 0, .needed_fw_elms = 0, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852a.c b/drivers/net/wireless/realtek/rtw89/rtw8852a.c index 5ea7a36ab5ab..36d66f4e2dd0 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852a.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852a.c @@ -2265,8 +2265,10 @@ const struct rtw89_chip_info rtw8852a_chip_info = { .ops = &rtw8852a_chip_ops, .mac_def = &rtw89_mac_gen_ax, .phy_def = &rtw89_phy_gen_ax, - .fw_basename = RTW8852A_FW_BASENAME, - .fw_format_max = RTW8852A_FW_FORMAT_MAX, + .fw_def = { + .fw_basename = RTW8852A_FW_BASENAME, + .fw_format_max = RTW8852A_FW_FORMAT_MAX, + }, .try_ce_fw = false, .bbmcu_nr = 0, .needed_fw_elms = 0, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852b.c b/drivers/net/wireless/realtek/rtw89/rtw8852b.c index 393986b297ea..5a41ff1f18f4 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852b.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852b.c @@ -911,8 +911,10 @@ const struct rtw89_chip_info rtw8852b_chip_info = { .ops = &rtw8852b_chip_ops, .mac_def = &rtw89_mac_gen_ax, .phy_def = &rtw89_phy_gen_ax, - .fw_basename = RTW8852B_FW_BASENAME, - .fw_format_max = RTW8852B_FW_FORMAT_MAX, + .fw_def = { + .fw_basename = RTW8852B_FW_BASENAME, + .fw_format_max = RTW8852B_FW_FORMAT_MAX, + }, .try_ce_fw = true, .bbmcu_nr = 0, .needed_fw_elms = 0, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852bt.c b/drivers/net/wireless/realtek/rtw89/rtw8852bt.c index 92bbd6e5d699..514861e48c25 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852bt.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852bt.c @@ -757,8 +757,10 @@ const struct rtw89_chip_info rtw8852bt_chip_info = { .ops = &rtw8852bt_chip_ops, .mac_def = &rtw89_mac_gen_ax, .phy_def = &rtw89_phy_gen_ax, - .fw_basename = RTW8852BT_FW_BASENAME, - .fw_format_max = RTW8852BT_FW_FORMAT_MAX, + .fw_def = { + .fw_basename = RTW8852BT_FW_BASENAME, + .fw_format_max = RTW8852BT_FW_FORMAT_MAX, + }, .try_ce_fw = true, .bbmcu_nr = 0, .needed_fw_elms = RTW89_AX_GEN_DEF_NEEDED_FW_ELEMENTS_NO_6GHZ, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852c.c b/drivers/net/wireless/realtek/rtw89/rtw8852c.c index de5d343f80a5..70ca8c3950d9 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852c.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852c.c @@ -3106,8 +3106,10 @@ const struct rtw89_chip_info rtw8852c_chip_info = { .ops = &rtw8852c_chip_ops, .mac_def = &rtw89_mac_gen_ax, .phy_def = &rtw89_phy_gen_ax, - .fw_basename = RTW8852C_FW_BASENAME, - .fw_format_max = RTW8852C_FW_FORMAT_MAX, + .fw_def = { + .fw_basename = RTW8852C_FW_BASENAME, + .fw_format_max = RTW8852C_FW_FORMAT_MAX, + }, .try_ce_fw = false, .bbmcu_nr = 0, .needed_fw_elms = 0, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922a.c b/drivers/net/wireless/realtek/rtw89/rtw8922a.c index f41b66b362c4..fd1b171b106c 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922a.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922a.c @@ -2916,8 +2916,10 @@ const struct rtw89_chip_info rtw8922a_chip_info = { .ops = &rtw8922a_chip_ops, .mac_def = &rtw89_mac_gen_be, .phy_def = &rtw89_phy_gen_be, - .fw_basename = RTW8922A_FW_BASENAME, - .fw_format_max = RTW8922A_FW_FORMAT_MAX, + .fw_def = { + .fw_basename = RTW8922A_FW_BASENAME, + .fw_format_max = RTW8922A_FW_FORMAT_MAX, + }, .try_ce_fw = false, .bbmcu_nr = 1, .needed_fw_elms = RTW89_BE_GEN_DEF_NEEDED_FW_ELEMENTS, @@ -3057,6 +3059,7 @@ EXPORT_SYMBOL(rtw8922a_chip_info); const struct rtw89_chip_variant rtw8922ae_vs_variant = { .no_mcs_12_13 = true, .fw_min_ver_code = RTW89_FW_VER_CODE(0, 35, 54, 0), + .fw_def_override = NULL, }; EXPORT_SYMBOL(rtw8922ae_vs_variant); From fdb626b79477d40c858faf7dad7628704f7b7c3c Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 13 Feb 2026 14:15:42 +0800 Subject: [PATCH 0162/1561] wifi: rtw89: fw: recognize firmware type B by AID Select a firmware suit including normal and WoWLAN firmware by chip AID from multiple firmware, because coming RTL8922D has variant hardware with different chip AID. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260213061552.29997-3-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.h | 4 ++++ drivers/net/wireless/realtek/rtw89/fw.c | 13 +++++++++++-- drivers/net/wireless/realtek/rtw89/rtw8851b.c | 1 + drivers/net/wireless/realtek/rtw89/rtw8852a.c | 1 + drivers/net/wireless/realtek/rtw89/rtw8852b.c | 1 + drivers/net/wireless/realtek/rtw89/rtw8852bt.c | 1 + drivers/net/wireless/realtek/rtw89/rtw8852c.c | 1 + drivers/net/wireless/realtek/rtw89/rtw8922a.c | 1 + 8 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index 5cb8aacf7644..d1f58bf953a9 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -4155,6 +4155,7 @@ struct rtw89_reg_imr { struct rtw89_fw_def { const char *fw_basename; u8 fw_format_max; + u16 fw_b_aid; }; struct rtw89_phy_table { @@ -4729,6 +4730,8 @@ enum rtw89_fw_type { RTW89_FW_NORMAL = 1, RTW89_FW_WOWLAN = 3, RTW89_FW_NORMAL_CE = 5, + RTW89_FW_NORMAL_B = 14, + RTW89_FW_WOWLAN_B = 15, RTW89_FW_BBMCU0 = 64, RTW89_FW_BBMCU1 = 65, RTW89_FW_LOGFMT = 255, @@ -7562,6 +7565,7 @@ static inline struct rtw89_fw_suit *rtw89_fw_suit_get(struct rtw89_dev *rtwdev, switch (type) { case RTW89_FW_WOWLAN: + case RTW89_FW_WOWLAN_B: return &fw_info->wowlan; case RTW89_FW_LOGFMT: return &fw_info->log.suit; diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index aa42e5c60eb7..130ebe33cef0 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -1027,16 +1027,25 @@ static int rtw89_fw_validate_ver_required(struct rtw89_dev *rtwdev) int rtw89_fw_recognize(struct rtw89_dev *rtwdev) { + const struct rtw89_fw_def *fw_def = rtw89_chip_get_fw_def(rtwdev); const struct rtw89_chip_info *chip = rtwdev->chip; + const struct rtw89_hal *hal = &rtwdev->hal; + enum rtw89_fw_type normal_fw_type = RTW89_FW_NORMAL; + enum rtw89_fw_type wowlan_fw_type = RTW89_FW_WOWLAN; int ret; + if (fw_def->fw_b_aid && fw_def->fw_b_aid == hal->aid) { + normal_fw_type = RTW89_FW_NORMAL_B; + wowlan_fw_type = RTW89_FW_WOWLAN_B; + } + if (chip->try_ce_fw) { ret = __rtw89_fw_recognize(rtwdev, RTW89_FW_NORMAL_CE, true); if (!ret) goto normal_done; } - ret = __rtw89_fw_recognize(rtwdev, RTW89_FW_NORMAL, false); + ret = __rtw89_fw_recognize(rtwdev, normal_fw_type, false); if (ret) return ret; @@ -1046,7 +1055,7 @@ normal_done: return ret; /* It still works if wowlan firmware isn't existing. */ - __rtw89_fw_recognize(rtwdev, RTW89_FW_WOWLAN, false); + __rtw89_fw_recognize(rtwdev, wowlan_fw_type, false); /* It still works if log format file isn't existing. */ __rtw89_fw_recognize(rtwdev, RTW89_FW_LOGFMT, true); diff --git a/drivers/net/wireless/realtek/rtw89/rtw8851b.c b/drivers/net/wireless/realtek/rtw89/rtw8851b.c index fdb053fabae6..8352380b4be5 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8851b.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8851b.c @@ -2583,6 +2583,7 @@ const struct rtw89_chip_info rtw8851b_chip_info = { .fw_def = { .fw_basename = RTW8851B_FW_BASENAME, .fw_format_max = RTW8851B_FW_FORMAT_MAX, + .fw_b_aid = 0, }, .try_ce_fw = true, .bbmcu_nr = 0, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852a.c b/drivers/net/wireless/realtek/rtw89/rtw8852a.c index 36d66f4e2dd0..32644c40ae28 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852a.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852a.c @@ -2268,6 +2268,7 @@ const struct rtw89_chip_info rtw8852a_chip_info = { .fw_def = { .fw_basename = RTW8852A_FW_BASENAME, .fw_format_max = RTW8852A_FW_FORMAT_MAX, + .fw_b_aid = 0, }, .try_ce_fw = false, .bbmcu_nr = 0, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852b.c b/drivers/net/wireless/realtek/rtw89/rtw8852b.c index 5a41ff1f18f4..66cd46fbee4a 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852b.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852b.c @@ -914,6 +914,7 @@ const struct rtw89_chip_info rtw8852b_chip_info = { .fw_def = { .fw_basename = RTW8852B_FW_BASENAME, .fw_format_max = RTW8852B_FW_FORMAT_MAX, + .fw_b_aid = 0, }, .try_ce_fw = true, .bbmcu_nr = 0, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852bt.c b/drivers/net/wireless/realtek/rtw89/rtw8852bt.c index 514861e48c25..0acdf64395f0 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852bt.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852bt.c @@ -760,6 +760,7 @@ const struct rtw89_chip_info rtw8852bt_chip_info = { .fw_def = { .fw_basename = RTW8852BT_FW_BASENAME, .fw_format_max = RTW8852BT_FW_FORMAT_MAX, + .fw_b_aid = 0, }, .try_ce_fw = true, .bbmcu_nr = 0, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852c.c b/drivers/net/wireless/realtek/rtw89/rtw8852c.c index 70ca8c3950d9..9f0dd6a3956f 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852c.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852c.c @@ -3109,6 +3109,7 @@ const struct rtw89_chip_info rtw8852c_chip_info = { .fw_def = { .fw_basename = RTW8852C_FW_BASENAME, .fw_format_max = RTW8852C_FW_FORMAT_MAX, + .fw_b_aid = 0, }, .try_ce_fw = false, .bbmcu_nr = 0, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922a.c b/drivers/net/wireless/realtek/rtw89/rtw8922a.c index fd1b171b106c..1c9db283cff9 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922a.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922a.c @@ -2919,6 +2919,7 @@ const struct rtw89_chip_info rtw8922a_chip_info = { .fw_def = { .fw_basename = RTW8922A_FW_BASENAME, .fw_format_max = RTW8922A_FW_FORMAT_MAX, + .fw_b_aid = 0, }, .try_ce_fw = false, .bbmcu_nr = 1, From 888226bd560a577be7f169a5f4a0b9e74f4cf039 Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Fri, 13 Feb 2026 14:15:43 +0800 Subject: [PATCH 0163/1561] wifi: rtw89: add general way to generate module firmware string Module firmware string depends on max format of the firmware. When max format is 0, the module firmware string will be like XYZ.bin. However, when max format N > 0, the module firmware string will become XYZ-N.bin. Originally, when one chip updated its max firmware format from 0 to 1, the module firmware string also needed to be changed. However, did not need to do for other cases, e.g. from 1 to 2. It's a bit inconvenient to always remember that from 0 to 1 is a special case. So, add a general macro to generate the corresponding module firmware string based on max firmware format. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260213061552.29997-4-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.h | 9 +++++++++ drivers/net/wireless/realtek/rtw89/rtw8851b.c | 2 +- drivers/net/wireless/realtek/rtw89/rtw8852a.c | 2 +- drivers/net/wireless/realtek/rtw89/rtw8852b.c | 2 +- drivers/net/wireless/realtek/rtw89/rtw8852bt.c | 2 +- drivers/net/wireless/realtek/rtw89/rtw8852c.c | 2 +- drivers/net/wireless/realtek/rtw89/rtw8922a.c | 2 +- 7 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index d1f58bf953a9..1ad1fd2a1f6f 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -4152,6 +4152,15 @@ struct rtw89_reg_imr { u32 set; }; +#define RTW89_MODULE_FWNAME_PLACEHOLDER_0 0, +#define __RTW89_GEN_MODULE_FWNAME_FMT(placeholder_or_ignored, strfmt) \ + __take_second_arg(placeholder_or_ignored, strfmt) +#define RTW89_GEN_MODULE_FWNAME_FMT(maxfmt) \ + __RTW89_GEN_MODULE_FWNAME_FMT(RTW89_MODULE_FWNAME_PLACEHOLDER_ ## maxfmt, \ + "-" __stringify(maxfmt)) +#define RTW89_GEN_MODULE_FWNAME(basename, maxformat) \ + basename RTW89_GEN_MODULE_FWNAME_FMT(maxformat) ".bin" + struct rtw89_fw_def { const char *fw_basename; u8 fw_format_max; diff --git a/drivers/net/wireless/realtek/rtw89/rtw8851b.c b/drivers/net/wireless/realtek/rtw89/rtw8851b.c index 8352380b4be5..19141ecad4d7 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8851b.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8851b.c @@ -18,7 +18,7 @@ #define RTW8851B_FW_FORMAT_MAX 0 #define RTW8851B_FW_BASENAME "rtw89/rtw8851b_fw" #define RTW8851B_MODULE_FIRMWARE \ - RTW8851B_FW_BASENAME ".bin" + RTW89_GEN_MODULE_FWNAME(RTW8851B_FW_BASENAME, RTW8851B_FW_FORMAT_MAX) static const struct rtw89_hfc_ch_cfg rtw8851b_hfc_chcfg_pcie[] = { {5, 343, grp_0}, /* ACH 0 */ diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852a.c b/drivers/net/wireless/realtek/rtw89/rtw8852a.c index 32644c40ae28..0d5ec9755783 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852a.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852a.c @@ -15,7 +15,7 @@ #define RTW8852A_FW_FORMAT_MAX 0 #define RTW8852A_FW_BASENAME "rtw89/rtw8852a_fw" #define RTW8852A_MODULE_FIRMWARE \ - RTW8852A_FW_BASENAME ".bin" + RTW89_GEN_MODULE_FWNAME(RTW8852A_FW_BASENAME, RTW8852A_FW_FORMAT_MAX) static const struct rtw89_hfc_ch_cfg rtw8852a_hfc_chcfg_pcie[] = { {128, 1896, grp_0}, /* ACH 0 */ diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852b.c b/drivers/net/wireless/realtek/rtw89/rtw8852b.c index 66cd46fbee4a..7aa1cf184051 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852b.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852b.c @@ -16,7 +16,7 @@ #define RTW8852B_FW_FORMAT_MAX 1 #define RTW8852B_FW_BASENAME "rtw89/rtw8852b_fw" #define RTW8852B_MODULE_FIRMWARE \ - RTW8852B_FW_BASENAME "-" __stringify(RTW8852B_FW_FORMAT_MAX) ".bin" + RTW89_GEN_MODULE_FWNAME(RTW8852B_FW_BASENAME, RTW8852B_FW_FORMAT_MAX) static const struct rtw89_hfc_ch_cfg rtw8852b_hfc_chcfg_pcie[] = { {5, 341, grp_0}, /* ACH 0 */ diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852bt.c b/drivers/net/wireless/realtek/rtw89/rtw8852bt.c index 0acdf64395f0..c86b995a7cb1 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852bt.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852bt.c @@ -14,7 +14,7 @@ #define RTW8852BT_FW_FORMAT_MAX 0 #define RTW8852BT_FW_BASENAME "rtw89/rtw8852bt_fw" #define RTW8852BT_MODULE_FIRMWARE \ - RTW8852BT_FW_BASENAME ".bin" + RTW89_GEN_MODULE_FWNAME(RTW8852BT_FW_BASENAME, RTW8852BT_FW_FORMAT_MAX) static const struct rtw89_hfc_ch_cfg rtw8852bt_hfc_chcfg_pcie[] = { {16, 742, grp_0}, /* ACH 0 */ diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852c.c b/drivers/net/wireless/realtek/rtw89/rtw8852c.c index 9f0dd6a3956f..e62a7288c8aa 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852c.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852c.c @@ -18,7 +18,7 @@ #define RTW8852C_FW_FORMAT_MAX 2 #define RTW8852C_FW_BASENAME "rtw89/rtw8852c_fw" #define RTW8852C_MODULE_FIRMWARE \ - RTW8852C_FW_BASENAME "-" __stringify(RTW8852C_FW_FORMAT_MAX) ".bin" + RTW89_GEN_MODULE_FWNAME(RTW8852C_FW_BASENAME, RTW8852C_FW_FORMAT_MAX) static const struct rtw89_hfc_ch_cfg rtw8852c_hfc_chcfg_pcie[] = { {13, 1614, grp_0}, /* ACH 0 */ diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922a.c b/drivers/net/wireless/realtek/rtw89/rtw8922a.c index 1c9db283cff9..36ef36110602 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922a.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922a.c @@ -18,7 +18,7 @@ #define RTW8922A_FW_FORMAT_MAX 4 #define RTW8922A_FW_BASENAME "rtw89/rtw8922a_fw" #define RTW8922A_MODULE_FIRMWARE \ - RTW8922A_FW_BASENAME "-" __stringify(RTW8922A_FW_FORMAT_MAX) ".bin" + RTW89_GEN_MODULE_FWNAME(RTW8922A_FW_BASENAME, RTW8922A_FW_FORMAT_MAX) #define HE_N_USER_MAX_8922A 4 From 1d67f1f8e9a0d99ed1731601ea6bdf3c42570be9 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 13 Feb 2026 14:15:44 +0800 Subject: [PATCH 0164/1561] wifi: rtw89: 8852b: update supported firmware format to 2 After firmware version 0.29.29.15, more data are included in firmware file. Increase format to 2 to prevent old driver failed to load the new firmware. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260213061552.29997-5-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/rtw8852b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852b.c b/drivers/net/wireless/realtek/rtw89/rtw8852b.c index 7aa1cf184051..19a9f07e8714 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852b.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852b.c @@ -13,7 +13,7 @@ #include "rtw8852b_table.h" #include "txrx.h" -#define RTW8852B_FW_FORMAT_MAX 1 +#define RTW8852B_FW_FORMAT_MAX 2 #define RTW8852B_FW_BASENAME "rtw89/rtw8852b_fw" #define RTW8852B_MODULE_FIRMWARE \ RTW89_GEN_MODULE_FWNAME(RTW8852B_FW_BASENAME, RTW8852B_FW_FORMAT_MAX) From 5cfda90c638423aeab3f1e99f7606d6c6a2cf018 Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Fri, 13 Feb 2026 14:15:45 +0800 Subject: [PATCH 0165/1561] wifi: rtw89: 8852a: move DIG tables to rtw8852a.c Now, most of PHY parameter tables in driver can be loaded via FW elements. Plan to generate the corresponding FW elements for 8852A PHY tables. Then, after FW elements work for a enough time, rtw8852a_table.c can be cleaned up. However, DIG (dynamic initial gain) tables are legacy for 8852A only, so FW element doesn't support. Their sizes are not very big, so move them to rtw8852a.c and keep rtw8852a_table.c for PHY tables which are supported by FW elements. No logic is changed. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260213061552.29997-6-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/rtw8852a.c | 51 +++++++++++++++++++ .../wireless/realtek/rtw89/rtw8852a_table.c | 51 ------------------- .../wireless/realtek/rtw89/rtw8852a_table.h | 1 - 3 files changed, 51 insertions(+), 52 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852a.c b/drivers/net/wireless/realtek/rtw89/rtw8852a.c index 0d5ec9755783..6089cf9e1d54 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852a.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852a.c @@ -2179,6 +2179,57 @@ static void rtw8852a_query_ppdu(struct rtw89_dev *rtwdev, rtw8852a_fill_freq_with_ppdu(rtwdev, phy_ppdu, status); } +#define DECLARE_DIG_TABLE(name) \ +static const struct rtw89_phy_dig_gain_cfg name##_table = { \ + .table = name, \ + .size = ARRAY_SIZE(name) \ +} + +static const struct rtw89_reg_def rtw89_8852a_lna_gain_g[] = { + {R_PATH0_LNA_ERR1, B_PATH0_LNA_ERR_G0_G_MSK}, + {R_PATH0_LNA_ERR2, B_PATH0_LNA_ERR_G1_G_MSK}, + {R_PATH0_LNA_ERR2, B_PATH0_LNA_ERR_G2_G_MSK}, + {R_PATH0_LNA_ERR3, B_PATH0_LNA_ERR_G3_G_MSK}, + {R_PATH0_LNA_ERR3, B_PATH0_LNA_ERR_G4_G_MSK}, + {R_PATH0_LNA_ERR4, B_PATH0_LNA_ERR_G5_G_MSK}, + {R_PATH0_LNA_ERR5, B_PATH0_LNA_ERR_G6_G_MSK}, +}; + +DECLARE_DIG_TABLE(rtw89_8852a_lna_gain_g); + +static const struct rtw89_reg_def rtw89_8852a_tia_gain_g[] = { + {R_PATH0_TIA_ERR_G0, B_PATH0_TIA_ERR_G0_G_MSK}, + {R_PATH0_TIA_ERR_G1, B_PATH0_TIA_ERR_G1_G_MSK}, +}; + +DECLARE_DIG_TABLE(rtw89_8852a_tia_gain_g); + +static const struct rtw89_reg_def rtw89_8852a_lna_gain_a[] = { + {R_PATH0_LNA_ERR1, B_PATH0_LNA_ERR_G0_A_MSK}, + {R_PATH0_LNA_ERR1, B_PATH0_LNA_ERR_G1_A_MSK}, + {R_PATH0_LNA_ERR2, B_PATH0_LNA_ERR_G2_A_MSK}, + {R_PATH0_LNA_ERR3, B_PATH0_LNA_ERR_G3_A_MSK}, + {R_PATH0_LNA_ERR3, B_PATH0_LNA_ERR_G4_A_MSK}, + {R_PATH0_LNA_ERR4, B_PATH0_LNA_ERR_G5_A_MSK}, + {R_PATH0_LNA_ERR4, B_PATH0_LNA_ERR_G6_A_MSK}, +}; + +DECLARE_DIG_TABLE(rtw89_8852a_lna_gain_a); + +static const struct rtw89_reg_def rtw89_8852a_tia_gain_a[] = { + {R_PATH0_TIA_ERR_G0, B_PATH0_TIA_ERR_G0_A_MSK}, + {R_PATH0_TIA_ERR_G1, B_PATH0_TIA_ERR_G1_A_MSK}, +}; + +DECLARE_DIG_TABLE(rtw89_8852a_tia_gain_a); + +static const struct rtw89_phy_dig_gain_table rtw89_8852a_phy_dig_table = { + .cfg_lna_g = &rtw89_8852a_lna_gain_g_table, + .cfg_tia_g = &rtw89_8852a_tia_gain_g_table, + .cfg_lna_a = &rtw89_8852a_lna_gain_a_table, + .cfg_tia_a = &rtw89_8852a_tia_gain_a_table +}; + #ifdef CONFIG_PM static const struct wiphy_wowlan_support rtw_wowlan_stub_8852a = { .flags = WIPHY_WOWLAN_MAGIC_PKT | WIPHY_WOWLAN_DISCONNECT, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852a_table.c b/drivers/net/wireless/realtek/rtw89/rtw8852a_table.c index 495890c180ef..ffdeb3801991 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852a_table.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852a_table.c @@ -50952,50 +50952,6 @@ const s8 rtw89_8852a_txpwr_lmt_ru_5g[RTW89_RU_NUM][RTW89_NTX_NUM] [2][1][RTW89_UK][46] = 32, }; -#define DECLARE_DIG_TABLE(name) \ -static const struct rtw89_phy_dig_gain_cfg name##_table = { \ - .table = name, \ - .size = ARRAY_SIZE(name) \ -} - -static const struct rtw89_reg_def rtw89_8852a_lna_gain_g[] = { - {R_PATH0_LNA_ERR1, B_PATH0_LNA_ERR_G0_G_MSK}, - {R_PATH0_LNA_ERR2, B_PATH0_LNA_ERR_G1_G_MSK}, - {R_PATH0_LNA_ERR2, B_PATH0_LNA_ERR_G2_G_MSK}, - {R_PATH0_LNA_ERR3, B_PATH0_LNA_ERR_G3_G_MSK}, - {R_PATH0_LNA_ERR3, B_PATH0_LNA_ERR_G4_G_MSK}, - {R_PATH0_LNA_ERR4, B_PATH0_LNA_ERR_G5_G_MSK}, - {R_PATH0_LNA_ERR5, B_PATH0_LNA_ERR_G6_G_MSK}, -}; - -DECLARE_DIG_TABLE(rtw89_8852a_lna_gain_g); - -static const struct rtw89_reg_def rtw89_8852a_tia_gain_g[] = { - {R_PATH0_TIA_ERR_G0, B_PATH0_TIA_ERR_G0_G_MSK}, - {R_PATH0_TIA_ERR_G1, B_PATH0_TIA_ERR_G1_G_MSK}, -}; - -DECLARE_DIG_TABLE(rtw89_8852a_tia_gain_g); - -static const struct rtw89_reg_def rtw89_8852a_lna_gain_a[] = { - {R_PATH0_LNA_ERR1, B_PATH0_LNA_ERR_G0_A_MSK}, - {R_PATH0_LNA_ERR1, B_PATH0_LNA_ERR_G1_A_MSK}, - {R_PATH0_LNA_ERR2, B_PATH0_LNA_ERR_G2_A_MSK}, - {R_PATH0_LNA_ERR3, B_PATH0_LNA_ERR_G3_A_MSK}, - {R_PATH0_LNA_ERR3, B_PATH0_LNA_ERR_G4_A_MSK}, - {R_PATH0_LNA_ERR4, B_PATH0_LNA_ERR_G5_A_MSK}, - {R_PATH0_LNA_ERR4, B_PATH0_LNA_ERR_G6_A_MSK}, -}; - -DECLARE_DIG_TABLE(rtw89_8852a_lna_gain_a); - -static const struct rtw89_reg_def rtw89_8852a_tia_gain_a[] = { - {R_PATH0_TIA_ERR_G0, B_PATH0_TIA_ERR_G0_A_MSK}, - {R_PATH0_TIA_ERR_G1, B_PATH0_TIA_ERR_G1_A_MSK}, -}; - -DECLARE_DIG_TABLE(rtw89_8852a_tia_gain_a); - const struct rtw89_phy_table rtw89_8852a_phy_bb_table = { .regs = rtw89_8852a_phy_bb_regs, .n_regs = ARRAY_SIZE(rtw89_8852a_phy_bb_regs), @@ -51042,13 +50998,6 @@ const struct rtw89_txpwr_track_cfg rtw89_8852a_trk_cfg = { .delta_swingidx_2g_cck_a_p = _txpwr_track_delta_swingidx_2g_cck_a_p, }; -const struct rtw89_phy_dig_gain_table rtw89_8852a_phy_dig_table = { - .cfg_lna_g = &rtw89_8852a_lna_gain_g_table, - .cfg_tia_g = &rtw89_8852a_tia_gain_g_table, - .cfg_lna_a = &rtw89_8852a_lna_gain_a_table, - .cfg_tia_a = &rtw89_8852a_tia_gain_a_table -}; - const struct rtw89_rfe_parms rtw89_8852a_dflt_parms = { .byr_tbl = &rtw89_8852a_byr_table, .rule_2ghz = { diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852a_table.h b/drivers/net/wireless/realtek/rtw89/rtw8852a_table.h index 7463ae6ee3f9..58fe8575c1c9 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852a_table.h +++ b/drivers/net/wireless/realtek/rtw89/rtw8852a_table.h @@ -11,7 +11,6 @@ extern const struct rtw89_phy_table rtw89_8852a_phy_bb_table; extern const struct rtw89_phy_table rtw89_8852a_phy_radioa_table; extern const struct rtw89_phy_table rtw89_8852a_phy_radiob_table; extern const struct rtw89_phy_table rtw89_8852a_phy_nctl_table; -extern const struct rtw89_phy_dig_gain_table rtw89_8852a_phy_dig_table; extern const struct rtw89_txpwr_track_cfg rtw89_8852a_trk_cfg; extern const struct rtw89_rfe_parms rtw89_8852a_dflt_parms; From b60796c07a79178db32b63e5b75996551145fa5d Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Fri, 13 Feb 2026 14:15:46 +0800 Subject: [PATCH 0166/1561] wifi: rtw89: 8852a: update supported firmware format to 1 More data will be included in Firmware file and loaded via FW elements. Unlike other chips, for RF radio, RTL8852A uses rtw89_phy_config_rf_reg instead of v1, so update loading handling of corresponding FW element. And then, increase RTL8852A FW format to 1 to prevent old driver from misusing the data in FW elements. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260213061552.29997-7-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/fw.c | 5 ++++- drivers/net/wireless/realtek/rtw89/phy.c | 8 ++++---- drivers/net/wireless/realtek/rtw89/phy.h | 4 ++++ drivers/net/wireless/realtek/rtw89/rtw8852a.c | 2 +- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index 130ebe33cef0..52f7e65fe6a5 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -1073,6 +1073,7 @@ int rtw89_build_phy_tbl_from_elm(struct rtw89_dev *rtwdev, const union rtw89_fw_element_arg arg) { struct rtw89_fw_elm_info *elm_info = &rtwdev->fw.elm_info; + const struct rtw89_chip_info *chip = rtwdev->chip; struct rtw89_hal *hal = &rtwdev->hal; struct rtw89_phy_table *tbl, **pp; struct rtw89_reg2_def *regs; @@ -1129,7 +1130,9 @@ int rtw89_build_phy_tbl_from_elm(struct rtw89_dev *rtwdev, if (radio) { tbl->rf_path = arg.rf_path; - tbl->config = rtw89_phy_config_rf_reg_v1; + tbl->config = chip->chip_id == RTL8852A ? + rtw89_phy_config_rf_reg : + rtw89_phy_config_rf_reg_v1; } *pp = tbl; diff --git a/drivers/net/wireless/realtek/rtw89/phy.c b/drivers/net/wireless/realtek/rtw89/phy.c index ee6ab2136b9a..74f5d5562848 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.c +++ b/drivers/net/wireless/realtek/rtw89/phy.c @@ -1659,10 +1659,10 @@ static void rtw89_phy_config_rf_reg_noio(struct rtw89_dev *rtwdev, (struct rtw89_fw_h2c_rf_reg_info *)extra_data); } -static void rtw89_phy_config_rf_reg(struct rtw89_dev *rtwdev, - const struct rtw89_reg2_def *reg, - enum rtw89_rf_path rf_path, - void *extra_data) +void rtw89_phy_config_rf_reg(struct rtw89_dev *rtwdev, + const struct rtw89_reg2_def *reg, + enum rtw89_rf_path rf_path, + void *extra_data) { if (reg->addr == 0xfe) { mdelay(50); diff --git a/drivers/net/wireless/realtek/rtw89/phy.h b/drivers/net/wireless/realtek/rtw89/phy.h index ab263738d212..094c7e45f254 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.h +++ b/drivers/net/wireless/realtek/rtw89/phy.h @@ -852,6 +852,10 @@ bool rtw89_phy_write_rf_v3(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_path, void rtw89_phy_init_bb_reg(struct rtw89_dev *rtwdev); void rtw89_phy_init_bb_afe(struct rtw89_dev *rtwdev); void rtw89_phy_init_rf_reg(struct rtw89_dev *rtwdev, bool noio); +void rtw89_phy_config_rf_reg(struct rtw89_dev *rtwdev, + const struct rtw89_reg2_def *reg, + enum rtw89_rf_path rf_path, + void *extra_data); void rtw89_phy_config_rf_reg_v1(struct rtw89_dev *rtwdev, const struct rtw89_reg2_def *reg, enum rtw89_rf_path rf_path, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852a.c b/drivers/net/wireless/realtek/rtw89/rtw8852a.c index 6089cf9e1d54..898c534a5762 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852a.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852a.c @@ -12,7 +12,7 @@ #include "rtw8852a_table.h" #include "txrx.h" -#define RTW8852A_FW_FORMAT_MAX 0 +#define RTW8852A_FW_FORMAT_MAX 1 #define RTW8852A_FW_BASENAME "rtw89/rtw8852a_fw" #define RTW8852A_MODULE_FIRMWARE \ RTW89_GEN_MODULE_FWNAME(RTW8852A_FW_BASENAME, RTW8852A_FW_FORMAT_MAX) From 940ec40dd299bba05bc022cf8e951de8a0f8b124 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Feb 2026 09:53:08 +0000 Subject: [PATCH 0167/1561] net: stmmac: clean up formatting in stmmac_mac_finish() Wrap the arguments for priv->plat->mac_finish() to avoid an overly long line. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vvuWy-0000000AvmY-3GWN@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 8772d735c577..5c144ac259af 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -968,7 +968,8 @@ static int stmmac_mac_finish(struct phylink_config *config, unsigned int mode, struct stmmac_priv *priv = netdev_priv(ndev); if (priv->plat->mac_finish) - priv->plat->mac_finish(ndev, priv->plat->bsp_priv, mode, interface); + priv->plat->mac_finish(ndev, priv->plat->bsp_priv, mode, + interface); return 0; } From 44a2ec96d374806ee74454ea915615536a76b152 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Feb 2026 09:53:13 +0000 Subject: [PATCH 0168/1561] net: stmmac: remove plat_dat->port_node There are repeated instances of: fwnode = priv->plat->port_node; if (!fwnode) fwnode = dev_fwnode(priv->device); However, the only place that ->port_node is set is stmmac_probe_config_dt(): struct device_node *np = pdev->dev.of_node; ... /* PHYLINK automatically parses the phy-handle property */ plat->port_node = of_fwnode_handle(np); which is equivalent to dev_fwnode(&pdev->dev) and, as priv->device will be &pdev->dev, is also equivalent to dev_fwnode(priv->device). Thus, plat_dat->port_node doesn't provide any extra benefit over using dev_fwnode(priv->device) directly. There is one case where port_node is used directly, which can be found in stmmac_pcs_setup(). This may cause a change of behaviour as PCI drivers do not populate plat_dat->port_node, but dev_fwnode(priv->device) may be valid. PCI-based stmmac should be tested. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vvuX3-0000000Avme-3oej@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 13 +++---------- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 7 ++----- .../net/ethernet/stmicro/stmmac/stmmac_platform.c | 3 --- include/linux/stmmac.h | 1 - 4 files changed, 5 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5c144ac259af..4e788f54bbbc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1252,10 +1252,7 @@ static int stmmac_init_phy(struct net_device *dev) xpcs_get_an_mode(priv->hw->xpcs, mode) == DW_AN_C73) return 0; - fwnode = priv->plat->port_node; - if (!fwnode) - fwnode = dev_fwnode(priv->device); - + fwnode = dev_fwnode(priv->device); if (fwnode) phy_fwnode = fwnode_get_phy_node(fwnode); else @@ -1313,7 +1310,6 @@ static int stmmac_phylink_setup(struct stmmac_priv *priv) { struct stmmac_mdio_bus_data *mdio_bus_data; struct phylink_config *config; - struct fwnode_handle *fwnode; struct phylink_pcs *pcs; struct phylink *phylink; @@ -1400,11 +1396,8 @@ static int stmmac_phylink_setup(struct stmmac_priv *priv) config->wol_mac_support |= WAKE_MAGIC; } - fwnode = priv->plat->port_node; - if (!fwnode) - fwnode = dev_fwnode(priv->device); - - phylink = phylink_create(config, fwnode, priv->plat->phy_interface, + phylink = phylink_create(config, dev_fwnode(priv->device), + priv->plat->phy_interface, &stmmac_phylink_mac_ops); if (IS_ERR(phylink)) return PTR_ERR(phylink); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index a7c2496b39f2..485a0d790baa 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -430,7 +430,7 @@ int stmmac_pcs_setup(struct net_device *ndev) struct dw_xpcs *xpcs = NULL; int addr, ret; - devnode = priv->plat->port_node; + devnode = dev_fwnode(priv->device); if (priv->plat->pcs_init) { ret = priv->plat->pcs_init(priv); @@ -649,10 +649,7 @@ int stmmac_mdio_register(struct net_device *ndev) stmmac_xgmac2_mdio_read_c45(new_bus, 0, 0, 0); /* If fixed-link is set, skip PHY scanning */ - fwnode = priv->plat->port_node; - if (!fwnode) - fwnode = dev_fwnode(priv->device); - + fwnode = dev_fwnode(priv->device); if (fwnode) { fixed_node = fwnode_get_named_child_node(fwnode, "fixed-link"); if (fixed_node) { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 5c9fd91a1db9..c34998486293 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -446,9 +446,6 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) * they are not converted to phylink. */ plat->phy_node = of_parse_phandle(np, "phy-handle", 0); - /* PHYLINK automatically parses the phy-handle property */ - plat->port_node = of_fwnode_handle(np); - /* Get max speed of operation from device tree */ of_property_read_u32(np, "max-speed", &plat->max_speed); diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index b96ae9dadfab..77e51eaa5ec5 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -225,7 +225,6 @@ struct plat_stmmacenet_data { phy_interface_t phy_interface; struct stmmac_mdio_bus_data *mdio_bus_data; struct device_node *phy_node; - struct fwnode_handle *port_node; struct device_node *mdio_node; struct stmmac_dma_cfg *dma_cfg; struct stmmac_safety_feature_cfg *safety_feat_cfg; From d48ba98bbc8245a367ce6d30ed4de6323623ebdf Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Feb 2026 09:53:19 +0000 Subject: [PATCH 0169/1561] net: stmmac: remove .get_tx_owner() No code calls stmmac_get_tx_owner(). Remove the macro, its associated function pointer, and all implementations. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vvuX9-0000000Avml-08Lo@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 6 ------ drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c | 6 ------ drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 6 ------ drivers/net/ethernet/stmicro/stmmac/hwif.h | 3 --- drivers/net/ethernet/stmicro/stmmac/norm_desc.c | 6 ------ 5 files changed, 27 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index e226dc6a1b17..b8fe4ad883e5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -176,11 +176,6 @@ static int dwmac4_rd_get_tx_len(struct dma_desc *p) return (le32_to_cpu(p->des2) & TDES2_BUFFER1_SIZE_MASK); } -static int dwmac4_get_tx_owner(struct dma_desc *p) -{ - return (le32_to_cpu(p->des3) & TDES3_OWN) >> TDES3_OWN_SHIFT; -} - static void dwmac4_set_tx_owner(struct dma_desc *p) { p->des3 |= cpu_to_le32(TDES3_OWN); @@ -552,7 +547,6 @@ const struct stmmac_desc_ops dwmac4_desc_ops = { .tx_status = dwmac4_wrback_get_tx_status, .rx_status = dwmac4_wrback_get_rx_status, .get_tx_len = dwmac4_rd_get_tx_len, - .get_tx_owner = dwmac4_get_tx_owner, .set_tx_owner = dwmac4_set_tx_owner, .set_rx_owner = dwmac4_set_rx_owner, .get_tx_ls = dwmac4_get_tx_ls, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c index 41e5b420a215..8bf373513930 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c @@ -45,11 +45,6 @@ static int dwxgmac2_get_tx_len(struct dma_desc *p) return (le32_to_cpu(p->des2) & XGMAC_TDES2_B1L); } -static int dwxgmac2_get_tx_owner(struct dma_desc *p) -{ - return (le32_to_cpu(p->des3) & XGMAC_TDES3_OWN) > 0; -} - static void dwxgmac2_set_tx_owner(struct dma_desc *p) { p->des3 |= cpu_to_le32(XGMAC_TDES3_OWN); @@ -356,7 +351,6 @@ const struct stmmac_desc_ops dwxgmac210_desc_ops = { .tx_status = dwxgmac2_get_tx_status, .rx_status = dwxgmac2_get_rx_status, .get_tx_len = dwxgmac2_get_tx_len, - .get_tx_owner = dwxgmac2_get_tx_owner, .set_tx_owner = dwxgmac2_set_tx_owner, .set_rx_owner = dwxgmac2_set_rx_owner, .get_tx_ls = dwxgmac2_get_tx_ls, diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index 8f6993c8bcae..77b2cb34aa0e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -277,11 +277,6 @@ static void enh_desc_init_tx_desc(struct dma_desc *p, int mode, int end) enh_desc_end_tx_desc_on_ring(p, end); } -static int enh_desc_get_tx_owner(struct dma_desc *p) -{ - return (le32_to_cpu(p->des0) & ETDES0_OWN) >> 31; -} - static void enh_desc_set_tx_owner(struct dma_desc *p) { p->des0 |= cpu_to_le32(ETDES0_OWN); @@ -448,7 +443,6 @@ const struct stmmac_desc_ops enh_desc_ops = { .get_tx_len = enh_desc_get_tx_len, .init_rx_desc = enh_desc_init_rx_desc, .init_tx_desc = enh_desc_init_tx_desc, - .get_tx_owner = enh_desc_get_tx_owner, .release_tx_desc = enh_desc_release_tx_desc, .prepare_tx_desc = enh_desc_prepare_tx_desc, .set_tx_ic = enh_desc_set_tx_ic, diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 0db96a387259..50ca8dcea2fd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -51,7 +51,6 @@ struct stmmac_desc_ops { unsigned int tcppayloadlen); /* Set/get the owner of the descriptor */ void (*set_tx_owner)(struct dma_desc *p); - int (*get_tx_owner)(struct dma_desc *p); /* Clean the tx descriptor as soon as the tx irq is received */ void (*release_tx_desc)(struct dma_desc *p, int mode); /* Clear interrupt on tx frame completion. When this bit is @@ -116,8 +115,6 @@ struct stmmac_desc_ops { stmmac_do_void_callback(__priv, desc, prepare_tso_tx_desc, __args) #define stmmac_set_tx_owner(__priv, __args...) \ stmmac_do_void_callback(__priv, desc, set_tx_owner, __args) -#define stmmac_get_tx_owner(__priv, __args...) \ - stmmac_do_callback(__priv, desc, get_tx_owner, __args) #define stmmac_release_tx_desc(__priv, __args...) \ stmmac_do_void_callback(__priv, desc, release_tx_desc, __args) #define stmmac_set_tx_ic(__priv, __args...) \ diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index 859cb9242a52..e9face06b950 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -141,11 +141,6 @@ static void ndesc_init_tx_desc(struct dma_desc *p, int mode, int end) ndesc_end_tx_desc_on_ring(p, end); } -static int ndesc_get_tx_owner(struct dma_desc *p) -{ - return (le32_to_cpu(p->des0) & TDES0_OWN) >> 31; -} - static void ndesc_set_tx_owner(struct dma_desc *p) { p->des0 |= cpu_to_le32(TDES0_OWN); @@ -294,7 +289,6 @@ const struct stmmac_desc_ops ndesc_ops = { .get_tx_len = ndesc_get_tx_len, .init_rx_desc = ndesc_init_rx_desc, .init_tx_desc = ndesc_init_tx_desc, - .get_tx_owner = ndesc_get_tx_owner, .release_tx_desc = ndesc_release_tx_desc, .prepare_tx_desc = ndesc_prepare_tx_desc, .set_tx_ic = ndesc_set_tx_ic, From 1fe444bdc58399118ef1e85e4160ece227278c89 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Feb 2026 09:53:24 +0000 Subject: [PATCH 0170/1561] net: stmmac: remove .get_tx_ls() No code calls stmmac_get_tx_ls(). Remove this macro, its associated function pointer, and all implementations. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vvuXE-0000000Avmr-0eB0@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 7 ------- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c | 6 ------ drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 6 ------ drivers/net/ethernet/stmicro/stmmac/hwif.h | 4 ---- drivers/net/ethernet/stmicro/stmmac/norm_desc.c | 6 ------ 5 files changed, 29 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index b8fe4ad883e5..56a88b71def9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -191,12 +191,6 @@ static void dwmac4_set_rx_owner(struct dma_desc *p, int disable_rx_ic) p->des3 |= cpu_to_le32(flags); } -static int dwmac4_get_tx_ls(struct dma_desc *p) -{ - return (le32_to_cpu(p->des3) & TDES3_LAST_DESCRIPTOR) - >> TDES3_LAST_DESCRIPTOR_SHIFT; -} - static u16 dwmac4_wrback_get_rx_vlan_tci(struct dma_desc *p) { return (le32_to_cpu(p->des0) & RDES0_VLAN_TAG_MASK); @@ -549,7 +543,6 @@ const struct stmmac_desc_ops dwmac4_desc_ops = { .get_tx_len = dwmac4_rd_get_tx_len, .set_tx_owner = dwmac4_set_tx_owner, .set_rx_owner = dwmac4_set_rx_owner, - .get_tx_ls = dwmac4_get_tx_ls, .get_rx_vlan_tci = dwmac4_wrback_get_rx_vlan_tci, .get_rx_vlan_valid = dwmac4_wrback_get_rx_vlan_valid, .get_rx_frame_len = dwmac4_wrback_get_rx_frame_len, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c index 8bf373513930..f5deceb052d7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c @@ -60,11 +60,6 @@ static void dwxgmac2_set_rx_owner(struct dma_desc *p, int disable_rx_ic) p->des3 |= cpu_to_le32(flags); } -static int dwxgmac2_get_tx_ls(struct dma_desc *p) -{ - return (le32_to_cpu(p->des3) & XGMAC_RDES3_LD) > 0; -} - static u16 dwxgmac2_wrback_get_rx_vlan_tci(struct dma_desc *p) { return le32_to_cpu(p->des0) & XGMAC_RDES0_VLAN_TAG_MASK; @@ -353,7 +348,6 @@ const struct stmmac_desc_ops dwxgmac210_desc_ops = { .get_tx_len = dwxgmac2_get_tx_len, .set_tx_owner = dwxgmac2_set_tx_owner, .set_rx_owner = dwxgmac2_set_rx_owner, - .get_tx_ls = dwxgmac2_get_tx_ls, .get_rx_vlan_tci = dwxgmac2_wrback_get_rx_vlan_tci, .get_rx_vlan_valid = dwxgmac2_wrback_get_rx_vlan_valid, .get_rx_frame_len = dwxgmac2_get_rx_frame_len, diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index 77b2cb34aa0e..3c241c5699ed 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -287,11 +287,6 @@ static void enh_desc_set_rx_owner(struct dma_desc *p, int disable_rx_ic) p->des0 |= cpu_to_le32(RDES0_OWN); } -static int enh_desc_get_tx_ls(struct dma_desc *p) -{ - return (le32_to_cpu(p->des0) & ETDES0_LAST_SEGMENT) >> 29; -} - static void enh_desc_release_tx_desc(struct dma_desc *p, int mode) { int ter = (le32_to_cpu(p->des0) & ETDES0_END_RING) >> 21; @@ -446,7 +441,6 @@ const struct stmmac_desc_ops enh_desc_ops = { .release_tx_desc = enh_desc_release_tx_desc, .prepare_tx_desc = enh_desc_prepare_tx_desc, .set_tx_ic = enh_desc_set_tx_ic, - .get_tx_ls = enh_desc_get_tx_ls, .set_tx_owner = enh_desc_set_tx_owner, .set_rx_owner = enh_desc_set_rx_owner, .get_rx_frame_len = enh_desc_get_rx_frame_len, diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 50ca8dcea2fd..e317f64a0fcc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -56,8 +56,6 @@ struct stmmac_desc_ops { /* Clear interrupt on tx frame completion. When this bit is * set an interrupt happens as soon as the frame is transmitted */ void (*set_tx_ic)(struct dma_desc *p); - /* Last tx segment reports the transmit status */ - int (*get_tx_ls)(struct dma_desc *p); /* Get the tag of the descriptor */ u16 (*get_rx_vlan_tci)(struct dma_desc *p); /* Get the valid status of descriptor */ @@ -119,8 +117,6 @@ struct stmmac_desc_ops { stmmac_do_void_callback(__priv, desc, release_tx_desc, __args) #define stmmac_set_tx_ic(__priv, __args...) \ stmmac_do_void_callback(__priv, desc, set_tx_ic, __args) -#define stmmac_get_tx_ls(__priv, __args...) \ - stmmac_do_callback(__priv, desc, get_tx_ls, __args) #define stmmac_get_rx_vlan_tci(__priv, __args...) \ stmmac_do_callback(__priv, desc, get_rx_vlan_tci, __args) #define stmmac_get_rx_vlan_valid(__priv, __args...) \ diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index e9face06b950..621bc1deb4e5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -151,11 +151,6 @@ static void ndesc_set_rx_owner(struct dma_desc *p, int disable_rx_ic) p->des0 |= cpu_to_le32(RDES0_OWN); } -static int ndesc_get_tx_ls(struct dma_desc *p) -{ - return (le32_to_cpu(p->des1) & TDES1_LAST_SEGMENT) >> 30; -} - static void ndesc_release_tx_desc(struct dma_desc *p, int mode) { int ter = (le32_to_cpu(p->des1) & TDES1_END_RING) >> 25; @@ -292,7 +287,6 @@ const struct stmmac_desc_ops ndesc_ops = { .release_tx_desc = ndesc_release_tx_desc, .prepare_tx_desc = ndesc_prepare_tx_desc, .set_tx_ic = ndesc_set_tx_ic, - .get_tx_ls = ndesc_get_tx_ls, .set_tx_owner = ndesc_set_tx_owner, .set_rx_owner = ndesc_set_rx_owner, .get_rx_frame_len = ndesc_get_rx_frame_len, From 19f2d59c3c3af5092ccea8086b8e840980057b9c Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Feb 2026 09:53:29 +0000 Subject: [PATCH 0171/1561] net: stmmac: remove .get_tx_len() No code calls stmmac_get_tx_len(). Remove this macro, its associated function pointer, and all implementations. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vvuXJ-0000000Avmx-1B8Y@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 6 ------ drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c | 6 ------ drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 6 ------ drivers/net/ethernet/stmicro/stmmac/hwif.h | 4 ---- drivers/net/ethernet/stmicro/stmmac/norm_desc.c | 6 ------ 5 files changed, 28 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 56a88b71def9..1bbf02504dad 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -171,11 +171,6 @@ static int dwmac4_wrback_get_rx_status(struct stmmac_extra_stats *x, return ret; } -static int dwmac4_rd_get_tx_len(struct dma_desc *p) -{ - return (le32_to_cpu(p->des2) & TDES2_BUFFER1_SIZE_MASK); -} - static void dwmac4_set_tx_owner(struct dma_desc *p) { p->des3 |= cpu_to_le32(TDES3_OWN); @@ -540,7 +535,6 @@ static void dwmac4_set_tbs(struct dma_edesc *p, u32 sec, u32 nsec) const struct stmmac_desc_ops dwmac4_desc_ops = { .tx_status = dwmac4_wrback_get_tx_status, .rx_status = dwmac4_wrback_get_rx_status, - .get_tx_len = dwmac4_rd_get_tx_len, .set_tx_owner = dwmac4_set_tx_owner, .set_rx_owner = dwmac4_set_rx_owner, .get_rx_vlan_tci = dwmac4_wrback_get_rx_vlan_tci, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c index f5deceb052d7..1009ef436a1e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c @@ -40,11 +40,6 @@ static int dwxgmac2_get_rx_status(struct stmmac_extra_stats *x, return good_frame; } -static int dwxgmac2_get_tx_len(struct dma_desc *p) -{ - return (le32_to_cpu(p->des2) & XGMAC_TDES2_B1L); -} - static void dwxgmac2_set_tx_owner(struct dma_desc *p) { p->des3 |= cpu_to_le32(XGMAC_TDES3_OWN); @@ -345,7 +340,6 @@ static void dwxgmac2_set_tbs(struct dma_edesc *p, u32 sec, u32 nsec) const struct stmmac_desc_ops dwxgmac210_desc_ops = { .tx_status = dwxgmac2_get_tx_status, .rx_status = dwxgmac2_get_rx_status, - .get_tx_len = dwxgmac2_get_tx_len, .set_tx_owner = dwxgmac2_set_tx_owner, .set_rx_owner = dwxgmac2_set_rx_owner, .get_rx_vlan_tci = dwxgmac2_wrback_get_rx_vlan_tci, diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index 3c241c5699ed..ead468f4b645 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -76,11 +76,6 @@ static int enh_desc_get_tx_status(struct stmmac_extra_stats *x, return ret; } -static int enh_desc_get_tx_len(struct dma_desc *p) -{ - return (le32_to_cpu(p->des1) & ETDES1_BUFFER1_SIZE_MASK); -} - static int enh_desc_coe_rdes0(int ipc_err, int type, int payload_err) { int ret = good_frame; @@ -435,7 +430,6 @@ static void enh_desc_clear(struct dma_desc *p) const struct stmmac_desc_ops enh_desc_ops = { .tx_status = enh_desc_get_tx_status, .rx_status = enh_desc_get_rx_status, - .get_tx_len = enh_desc_get_tx_len, .init_rx_desc = enh_desc_init_rx_desc, .init_tx_desc = enh_desc_init_tx_desc, .release_tx_desc = enh_desc_release_tx_desc, diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index e317f64a0fcc..374f326efa01 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -63,8 +63,6 @@ struct stmmac_desc_ops { /* Return the transmit status looking at the TDES1 */ int (*tx_status)(struct stmmac_extra_stats *x, struct dma_desc *p, void __iomem *ioaddr); - /* Get the buffer size from the descriptor */ - int (*get_tx_len)(struct dma_desc *p); /* Handle extra events on specific interrupts hw dependent */ void (*set_rx_owner)(struct dma_desc *p, int disable_rx_ic); /* Get the receive frame size */ @@ -123,8 +121,6 @@ struct stmmac_desc_ops { stmmac_do_callback(__priv, desc, get_rx_vlan_valid, __args) #define stmmac_tx_status(__priv, __args...) \ stmmac_do_callback(__priv, desc, tx_status, __args) -#define stmmac_get_tx_len(__priv, __args...) \ - stmmac_do_callback(__priv, desc, get_tx_len, __args) #define stmmac_set_rx_owner(__priv, __args...) \ stmmac_do_void_callback(__priv, desc, set_rx_owner, __args) #define stmmac_get_rx_frame_len(__priv, __args...) \ diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index 621bc1deb4e5..7c3a818c33c1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -55,11 +55,6 @@ static int ndesc_get_tx_status(struct stmmac_extra_stats *x, return ret; } -static int ndesc_get_tx_len(struct dma_desc *p) -{ - return (le32_to_cpu(p->des1) & RDES1_BUFFER1_SIZE_MASK); -} - /* This function verifies if each incoming frame has some errors * and, if required, updates the multicast statistics. * In case of success, it returns good_frame because the GMAC device @@ -281,7 +276,6 @@ static void ndesc_clear(struct dma_desc *p) const struct stmmac_desc_ops ndesc_ops = { .tx_status = ndesc_get_tx_status, .rx_status = ndesc_get_rx_status, - .get_tx_len = ndesc_get_tx_len, .init_rx_desc = ndesc_init_rx_desc, .init_tx_desc = ndesc_init_tx_desc, .release_tx_desc = ndesc_release_tx_desc, From d1925291231e00f31f9711b40fcab3e139a2ca73 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Feb 2026 09:53:34 +0000 Subject: [PATCH 0172/1561] net: stmmac: remove dwmac4 DMA_CHAN_INTR_DEFAULT_[TR]X* Remove the DMA_CHAN_INTR_DEFAULT_[TR]X* definitions, which are aliases of their respective DMA_CHAN_INTR_ENA_[TR]IE definitions. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vvuXO-0000000Avn3-1hhD@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h | 4 ---- drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c | 16 ++++++++-------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h index 9d9077a4ac9f..7fbd02a8119f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h @@ -111,8 +111,6 @@ static inline u32 dma_chanx_base_addr(const struct dwmac4_addrs *addrs, /* DMA default interrupt mask for 4.00 */ #define DMA_CHAN_INTR_DEFAULT_MASK (DMA_CHAN_INTR_NORMAL | \ DMA_CHAN_INTR_ABNORMAL) -#define DMA_CHAN_INTR_DEFAULT_RX (DMA_CHAN_INTR_ENA_RIE) -#define DMA_CHAN_INTR_DEFAULT_TX (DMA_CHAN_INTR_ENA_TIE) #define DMA_CHAN_INTR_NORMAL_4_10 (DMA_CHAN_INTR_ENA_NIE_4_10 | \ DMA_CHAN_INTR_ENA_RIE | \ @@ -123,8 +121,6 @@ static inline u32 dma_chanx_base_addr(const struct dwmac4_addrs *addrs, /* DMA default interrupt mask for 4.10a */ #define DMA_CHAN_INTR_DEFAULT_MASK_4_10 (DMA_CHAN_INTR_NORMAL_4_10 | \ DMA_CHAN_INTR_ABNORMAL_4_10) -#define DMA_CHAN_INTR_DEFAULT_RX_4_10 (DMA_CHAN_INTR_ENA_RIE) -#define DMA_CHAN_INTR_DEFAULT_TX_4_10 (DMA_CHAN_INTR_ENA_TIE) #define DMA_CHAN_RX_WATCHDOG(addrs, x) (dma_chanx_base_addr(addrs, x) + 0x38) #define DMA_CHAN_SLOT_CTRL_STATUS(addrs, x) (dma_chanx_base_addr(addrs, x) + 0x3c) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c index c098047a3bff..9217308bfd38 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c @@ -116,9 +116,9 @@ void dwmac4_enable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr, u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan)); if (rx) - value |= DMA_CHAN_INTR_DEFAULT_RX; + value |= DMA_CHAN_INTR_ENA_RIE; if (tx) - value |= DMA_CHAN_INTR_DEFAULT_TX; + value |= DMA_CHAN_INTR_ENA_TIE; writel(value, ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan)); } @@ -130,9 +130,9 @@ void dwmac410_enable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr, u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan)); if (rx) - value |= DMA_CHAN_INTR_DEFAULT_RX_4_10; + value |= DMA_CHAN_INTR_ENA_RIE; if (tx) - value |= DMA_CHAN_INTR_DEFAULT_TX_4_10; + value |= DMA_CHAN_INTR_ENA_TIE; writel(value, ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan)); } @@ -144,9 +144,9 @@ void dwmac4_disable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr, u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan)); if (rx) - value &= ~DMA_CHAN_INTR_DEFAULT_RX; + value &= ~DMA_CHAN_INTR_ENA_RIE; if (tx) - value &= ~DMA_CHAN_INTR_DEFAULT_TX; + value &= ~DMA_CHAN_INTR_ENA_TIE; writel(value, ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan)); } @@ -158,9 +158,9 @@ void dwmac410_disable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr, u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan)); if (rx) - value &= ~DMA_CHAN_INTR_DEFAULT_RX_4_10; + value &= ~DMA_CHAN_INTR_ENA_RIE; if (tx) - value &= ~DMA_CHAN_INTR_DEFAULT_TX_4_10; + value &= ~DMA_CHAN_INTR_ENA_TIE; writel(value, ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan)); } From 0e7cb34d0f6cc3ef56f2b0f6f637f501106ab8c5 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Feb 2026 09:53:39 +0000 Subject: [PATCH 0173/1561] net: stmmac: remove dwmac410_(enable|disable)_dma_irq As a result of the previous cleanup, it is now obvious that there are no differences between the dwmac4 and dwmac410 versions of the DMA interrupt enable/disable functions. Moreover, dwmac410_disable_dma_irq() is completely unused; instead, dwmac4_disable_dma_irq() is used to disable the interrupts for v4.10a cores while dwmac410_enable_dma_irq() was being used to enable these same same interrupts. Remove the unnecessary v4.10a functions. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vvuXT-0000000Avn9-29US@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac4_dma.c | 2 +- .../net/ethernet/stmicro/stmmac/dwmac4_dma.h | 4 --- .../net/ethernet/stmicro/stmmac/dwmac4_lib.c | 28 ------------------- 3 files changed, 1 insertion(+), 33 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index 60b880cdd9da..28728271fbc9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -582,7 +582,7 @@ const struct stmmac_dma_ops dwmac410_dma_ops = { .dump_regs = dwmac4_dump_dma_regs, .dma_rx_mode = dwmac4_dma_rx_chan_op_mode, .dma_tx_mode = dwmac4_dma_tx_chan_op_mode, - .enable_dma_irq = dwmac410_enable_dma_irq, + .enable_dma_irq = dwmac4_enable_dma_irq, .disable_dma_irq = dwmac4_disable_dma_irq, .start_tx = dwmac4_dma_start_tx, .stop_tx = dwmac4_dma_stop_tx, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h index 7fbd02a8119f..af6580332d49 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h @@ -170,12 +170,8 @@ static inline u32 dma_chanx_base_addr(const struct dwmac4_addrs *addrs, int dwmac4_dma_reset(void __iomem *ioaddr); void dwmac4_enable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr, u32 chan, bool rx, bool tx); -void dwmac410_enable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr, - u32 chan, bool rx, bool tx); void dwmac4_disable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr, u32 chan, bool rx, bool tx); -void dwmac410_disable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr, - u32 chan, bool rx, bool tx); void dwmac4_dma_start_tx(struct stmmac_priv *priv, void __iomem *ioaddr, u32 chan); void dwmac4_dma_stop_tx(struct stmmac_priv *priv, void __iomem *ioaddr, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c index 9217308bfd38..8c87a20880c4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c @@ -123,20 +123,6 @@ void dwmac4_enable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr, writel(value, ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan)); } -void dwmac410_enable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr, - u32 chan, bool rx, bool tx) -{ - const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs; - u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan)); - - if (rx) - value |= DMA_CHAN_INTR_ENA_RIE; - if (tx) - value |= DMA_CHAN_INTR_ENA_TIE; - - writel(value, ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan)); -} - void dwmac4_disable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr, u32 chan, bool rx, bool tx) { @@ -151,20 +137,6 @@ void dwmac4_disable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr, writel(value, ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan)); } -void dwmac410_disable_dma_irq(struct stmmac_priv *priv, void __iomem *ioaddr, - u32 chan, bool rx, bool tx) -{ - const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs; - u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan)); - - if (rx) - value &= ~DMA_CHAN_INTR_ENA_RIE; - if (tx) - value &= ~DMA_CHAN_INTR_ENA_TIE; - - writel(value, ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan)); -} - int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr, struct stmmac_extra_stats *x, u32 chan, u32 dir) { From 70bafb53b305801f21562c145d717e62dcf589e4 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Feb 2026 09:53:44 +0000 Subject: [PATCH 0174/1561] net: stmmac: remove mac->xlgmac mac->xlgmac is only ever written to by the dwxlgmac2_quirk() function. Remove mac->xlgmac, and the quirk function that then becomes redundant. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vvuXY-0000000AvnF-2ccv@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/common.h | 1 - drivers/net/ethernet/stmicro/stmmac/hwif.c | 7 ------- 2 files changed, 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index d26e8a063022..42a48f655849 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -635,7 +635,6 @@ struct mac_device_info { unsigned int mcast_bits_log2; unsigned int rx_csum; unsigned int pcs; - unsigned int xlgmac; unsigned int num_vlan; u32 vlan_filter[32]; bool vlan_fail_q_en; diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 7e69ff4b9a98..1fcc91be7589 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -97,12 +97,6 @@ static int stmmac_dwmac4_quirks(struct stmmac_priv *priv) return 0; } -static int stmmac_dwxlgmac_quirks(struct stmmac_priv *priv) -{ - priv->hw->xlgmac = true; - return 0; -} - int stmmac_reset(struct stmmac_priv *priv) { struct plat_stmmacenet_data *plat = priv->plat; @@ -293,7 +287,6 @@ static const struct stmmac_hwif_entry { .mmc = &dwxgmac_mmc_ops, .est = &dwmac510_est_ops, .setup = dwxlgmac2_setup, - .quirks = stmmac_dwxlgmac_quirks, }, }; From ecb037f58da7db2eae199a5f40f4684005d8e57f Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Feb 2026 09:53:49 +0000 Subject: [PATCH 0175/1561] net: stmmac: make extend_desc boolean extend_desc is a boolean, so make it so, and use "true" to assign it. Add a comment to describe what this member does. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vvuXd-0000000AvnL-36K3@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/hwif.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 1fcc91be7589..71dac8c1a3ca 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -76,7 +76,7 @@ static int stmmac_dwmac1_quirks(struct stmmac_priv *priv) /* GMAC older than 3.50 has no extended descriptors */ if (priv->synopsys_id >= DWMAC_CORE_3_50) { dev_info(priv->device, "Enabled extended descriptors\n"); - priv->extend_desc = 1; + priv->extend_desc = true; } else { dev_warn(priv->device, "Extended descriptors not supported\n"); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 51c96a738151..57ded8e5177f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -302,9 +302,13 @@ struct stmmac_priv { bool eee_active; bool eee_sw_timer_en; bool legacy_serdes_is_powered; + /* descriptor format: + * when clear: struct dma_desc or for tx TBS struct dma_edesc + * when set, struct dma_extended_desc + */ + bool extend_desc; unsigned int mode; unsigned int chain_mode; - int extend_desc; struct kernel_hwtstamp_config tstamp_config; struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_clock_ops; From a2a3832ad76d357aadabd2e730cbe4b26725fc8a Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Feb 2026 09:53:54 +0000 Subject: [PATCH 0176/1561] net: stmmac: make chain_mode a boolean priv->chain_mode is only tested for non-zero, so it can be a boolean. Change its type to boolean, and add a comment describing this member. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vvuXi-0000000AvnR-3btC@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 3 ++- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 57ded8e5177f..e98cb9f3a44c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -307,8 +307,9 @@ struct stmmac_priv { * when set, struct dma_extended_desc */ bool extend_desc; + /* chain_mode: requested descriptor mode */ + bool chain_mode; unsigned int mode; - unsigned int chain_mode; struct kernel_hwtstamp_config tstamp_config; struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_clock_ops; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 4e788f54bbbc..1ab958769b96 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7339,7 +7339,7 @@ static int stmmac_hw_init(struct stmmac_priv *priv) /* dwmac-sun8i only work in chain mode */ if (priv->plat->flags & STMMAC_FLAG_HAS_SUN8I) chain_mode = 1; - priv->chain_mode = chain_mode; + priv->chain_mode = !!chain_mode; /* Initialize HW Interface */ ret = stmmac_hwif_init(priv); From 1558705afbb293549fdedd539682bc5240e1964b Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Feb 2026 09:53:59 +0000 Subject: [PATCH 0177/1561] net: stmmac: make dma_cfg mixed/fixed burst boolean struct stmmac_dma_cfg mixed_burst/fixed_burst members are both boolean in nature - of_property_read_bool() are used to read these from DT, and they are only tested for non-zero values. Use bool to avoid unnecessary padding in this structure. Update dwmac-intel to initialise these using true rather than '1', and remove the '0' initialisers as the struct is already zero initialised on allocation. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vvuXn-0000000AvnX-4A1u@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 4 +--- include/linux/stmmac.h | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 92d77b0c2f54..ece2a0c38562 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -636,8 +636,6 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, plat->dma_cfg->pbl = 32; plat->dma_cfg->pblx8 = true; - plat->dma_cfg->fixed_burst = 0; - plat->dma_cfg->mixed_burst = 0; plat->dma_cfg->aal = 0; plat->dma_cfg->dche = true; @@ -1106,7 +1104,7 @@ static int quark_default_data(struct pci_dev *pdev, plat->dma_cfg->pbl = 16; plat->dma_cfg->pblx8 = true; - plat->dma_cfg->fixed_burst = 1; + plat->dma_cfg->fixed_burst = true; /* AXI (TODO) */ return 0; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 77e51eaa5ec5..2fc169c7117e 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -97,8 +97,8 @@ struct stmmac_dma_cfg { int txpbl; int rxpbl; bool pblx8; - int fixed_burst; - int mixed_burst; + bool fixed_burst; + bool mixed_burst; bool aal; bool eame; bool multi_msi_en; From 0835bc72510ffbd2dba1827bd17ac08e31dc2485 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Feb 2026 09:54:05 +0000 Subject: [PATCH 0178/1561] net: stmmac: move initialisation of dma_cfg->atds Move the initialisation of priv->plat->dma_cfg->atds, which indicates that 8 32-bit word descriptors are being used for pre-v4.0 cores, after the call to stmmac_hwif_init(), which will initialise priv->extend_desc and priv->mode (the descriptor mode.) We don't need to re-evaluate this in stmmac_init_dma_engine() - as the state that it depends on only changes in stmmac_hwif_init() which is only called in the probe path. Also, once set, no code clears this flag. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vvuXt-0000000Avnc-0UYC@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 1ab958769b96..027b566bb9c0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3249,9 +3249,6 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) return -EINVAL; } - if (priv->extend_desc && (priv->mode == STMMAC_RING_MODE)) - priv->plat->dma_cfg->atds = 1; - ret = stmmac_prereset_configure(priv); if (ret) return ret; @@ -7444,6 +7441,13 @@ static int stmmac_hw_init(struct stmmac_priv *priv) return ret; } + /* Set alternate descriptor size (which tells the hardware that + * descriptors are 8 32-bit words) when using extended descriptors + * with ring mode. Only applicable for pre-v4.0 cores. + */ + if (priv->extend_desc && priv->mode == STMMAC_RING_MODE) + priv->plat->dma_cfg->atds = 1; + /* Rx Watchdog is available in the COREs newer than the 3.40. * In some case, for example on bugged HW this feature * has to be disable and this can be done by passing the From 93cde989bd280b5930ed4852f832d87feb001533 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Feb 2026 09:54:10 +0000 Subject: [PATCH 0179/1561] net: stmmac: simplify atds initialisation atds is boolean, and there is only one place that its value is changed. Simplify this to a boolean assignment. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vvuXy-0000000Avnk-10Q8@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 027b566bb9c0..1a054453d05e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7443,10 +7443,11 @@ static int stmmac_hw_init(struct stmmac_priv *priv) /* Set alternate descriptor size (which tells the hardware that * descriptors are 8 32-bit words) when using extended descriptors - * with ring mode. Only applicable for pre-v4.0 cores. + * with ring mode. Only applicable for pre-v4.0 cores. Platform glue + * is not expected to change this. */ - if (priv->extend_desc && priv->mode == STMMAC_RING_MODE) - priv->plat->dma_cfg->atds = 1; + priv->plat->dma_cfg->atds = priv->extend_desc && + priv->mode == STMMAC_RING_MODE; /* Rx Watchdog is available in the COREs newer than the 3.40. * In some case, for example on bugged HW this feature From 07a8531d44272a1b9fd646f5fc09fc397e5b8a65 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Feb 2026 09:54:15 +0000 Subject: [PATCH 0180/1561] net: stmmac: move DMA configuration validation to driver probe Move the DMA configuration validation from stmmac_init_dma_engine() to the start of the driver probe function. The platform glue is expected to supply the DMA configuration, and a non-zero programmable burst length (bpl). Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vvuY3-0000000Avnq-1Spv@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 1a054453d05e..029e9b518160 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3244,11 +3244,6 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) u32 chan = 0; int ret = 0; - if (!priv->plat->dma_cfg || !priv->plat->dma_cfg->pbl) { - netdev_err(priv->dev, "Invalid DMA configuration\n"); - return -EINVAL; - } - ret = stmmac_prereset_configure(priv); if (ret) return ret; @@ -7728,6 +7723,11 @@ static int __stmmac_dvr_probe(struct device *device, u32 rxq; int i, ret = 0; + if (!plat_dat->dma_cfg || !plat_dat->dma_cfg->pbl) { + dev_err(device, "invalid DMA configuration\n"); + return -EINVAL; + } + ndev = devm_alloc_etherdev_mqs(device, sizeof(struct stmmac_priv), MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES); if (!ndev) From 6b4fe140874b9e8a1765fda9e9b11b55b79de64e Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Fri, 13 Feb 2026 14:15:47 +0800 Subject: [PATCH 0181/1561] wifi: rtw89: 8851b: update supported firmware format to 1 More data will be included in Firmware file and loaded via FW elements. Increase RTL8851B FW format to 1 to prevent old driver from failing to recognize FW with FW elements. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260213061552.29997-8-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/rtw8851b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw89/rtw8851b.c b/drivers/net/wireless/realtek/rtw89/rtw8851b.c index 19141ecad4d7..a3a5b6fbe46c 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8851b.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8851b.c @@ -15,7 +15,7 @@ #include "txrx.h" #include "util.h" -#define RTW8851B_FW_FORMAT_MAX 0 +#define RTW8851B_FW_FORMAT_MAX 1 #define RTW8851B_FW_BASENAME "rtw89/rtw8851b_fw" #define RTW8851B_MODULE_FIRMWARE \ RTW89_GEN_MODULE_FWNAME(RTW8851B_FW_BASENAME, RTW8851B_FW_FORMAT_MAX) From 79b3702bbc02bc6e65e89195d761caae51fa20c8 Mon Sep 17 00:00:00 2001 From: Kuan-Chung Chen Date: Fri, 13 Feb 2026 14:15:48 +0800 Subject: [PATCH 0182/1561] wifi: rtw89: add H2C command to protect TX/RX for unused PHY For BE chips, the unused PHY should pause transmissions and receptions. This ensures that no unexpected packets are routed to an inactive PHY, which could otherwise trigger SER L0 and lead to TX hang. Signed-off-by: Kuan-Chung Chen Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260213061552.29997-9-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/fw.c | 87 +++++++++++++++++++ drivers/net/wireless/realtek/rtw89/fw.h | 51 +++++++++++ drivers/net/wireless/realtek/rtw89/mac.c | 3 + drivers/net/wireless/realtek/rtw89/mac80211.c | 5 ++ drivers/net/wireless/realtek/rtw89/ps.c | 2 + 5 files changed, 148 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index 52f7e65fe6a5..ffbd75cb5533 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -6871,6 +6871,93 @@ flex_member: return 0; } +int rtw89_fw_h2c_trx_protect(struct rtw89_dev *rtwdev, + enum rtw89_phy_idx phy_idx, bool enable) +{ + struct rtw89_wait_info *wait = &rtwdev->mac.fw_ofld_wait; + const struct rtw89_chip_info *chip = rtwdev->chip; + struct rtw89_h2c_trx_protect *h2c; + u32 len = sizeof(*h2c); + struct sk_buff *skb; + int ret; + + if (chip->chip_gen != RTW89_CHIP_BE) + return 0; + + skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len); + if (!skb) { + rtw89_err(rtwdev, "failed to alloc skb for h2c trx protect\n"); + return -ENOMEM; + } + + skb_put(skb, len); + h2c = (struct rtw89_h2c_trx_protect *)skb->data; + + h2c->c0 = le32_encode_bits(BIT(phy_idx), RTW89_H2C_TRX_PROTECT_C0_BAND_BITMAP) | + le32_encode_bits(0, RTW89_H2C_TRX_PROTECT_C0_OP_MODE); + h2c->c1 = le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_C1_RX_IN) | + le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_C1_PPDU_STS) | + le32_encode_bits(1, RTW89_H2C_TRX_PROTECT_C1_MSK_RX_IN) | + le32_encode_bits(1, RTW89_H2C_TRX_PROTECT_C1_MSK_PPDU_STS); + h2c->w0 = le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_W0_TXEN_BE0) | + le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_W0_TXEN_BK0) | + le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_W0_TXEN_VI0) | + le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_W0_TXEN_VO0) | + le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_W0_TXEN_BE1) | + le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_W0_TXEN_BK1) | + le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_W0_TXEN_VI1) | + le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_W0_TXEN_VO1) | + le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_W0_TXEN_MG0) | + le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_W0_TXEN_MG1) | + le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_W0_TXEN_MG2) | + le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_W0_TXEN_HI) | + le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_W0_TXEN_BCN) | + le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_W0_TXEN_UL) | + le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_W0_TXEN_TWT0) | + le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_W0_TXEN_TWT1) | + le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_W0_TXEN_TWT2) | + le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_W0_TXEN_TWT3) | + le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_W0_TXEN_SPEQ0) | + le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_W0_TXEN_SPEQ1); + h2c->m0 = cpu_to_le32(RTW89_H2C_TRX_PROTECT_W0_TXEN_BE0 | + RTW89_H2C_TRX_PROTECT_W0_TXEN_BK0 | + RTW89_H2C_TRX_PROTECT_W0_TXEN_VI0 | + RTW89_H2C_TRX_PROTECT_W0_TXEN_VO0 | + RTW89_H2C_TRX_PROTECT_W0_TXEN_BE1 | + RTW89_H2C_TRX_PROTECT_W0_TXEN_BK1 | + RTW89_H2C_TRX_PROTECT_W0_TXEN_VI1 | + RTW89_H2C_TRX_PROTECT_W0_TXEN_VO1 | + RTW89_H2C_TRX_PROTECT_W0_TXEN_MG0 | + RTW89_H2C_TRX_PROTECT_W0_TXEN_MG1 | + RTW89_H2C_TRX_PROTECT_W0_TXEN_MG2 | + RTW89_H2C_TRX_PROTECT_W0_TXEN_HI | + RTW89_H2C_TRX_PROTECT_W0_TXEN_BCN | + RTW89_H2C_TRX_PROTECT_W0_TXEN_UL | + RTW89_H2C_TRX_PROTECT_W0_TXEN_TWT0 | + RTW89_H2C_TRX_PROTECT_W0_TXEN_TWT1 | + RTW89_H2C_TRX_PROTECT_W0_TXEN_TWT2 | + RTW89_H2C_TRX_PROTECT_W0_TXEN_TWT3 | + RTW89_H2C_TRX_PROTECT_W0_TXEN_SPEQ0 | + RTW89_H2C_TRX_PROTECT_W0_TXEN_SPEQ1); + h2c->w1 = le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_W1_CHINFO_EN) | + le32_encode_bits(enable, RTW89_H2C_TRX_PROTECT_W1_DFS_EN); + h2c->m1 = cpu_to_le32(RTW89_H2C_TRX_PROTECT_W1_CHINFO_EN | + RTW89_H2C_TRX_PROTECT_W1_DFS_EN); + + rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C, + H2C_CAT_MAC, H2C_CL_MAC_FW_OFLD, + H2C_FUNC_TRX_PROTECT, 0, 1, len); + + ret = rtw89_h2c_tx_and_wait(rtwdev, skb, wait, + RTW89_FW_OFLD_WAIT_COND_TRX_PROTECT); + if (ret) { + rtw89_debug(rtwdev, RTW89_DBG_FW, "failed to trx protect\n"); + return ret; + } + + return 0; +} + int rtw89_fw_h2c_rf_reg(struct rtw89_dev *rtwdev, struct rtw89_fw_h2c_rf_reg_info *info, u16 len, u8 page) diff --git a/drivers/net/wireless/realtek/rtw89/fw.h b/drivers/net/wireless/realtek/rtw89/fw.h index c60d419616d6..80d260eb08cd 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.h +++ b/drivers/net/wireless/realtek/rtw89/fw.h @@ -3106,6 +3106,44 @@ struct rtw89_h2c_scanofld_be { #define RTW89_H2C_SCANOFLD_BE_W9_SIZE_MACC GENMASK(15, 8) #define RTW89_H2C_SCANOFLD_BE_W9_SIZE_OP GENMASK(23, 16) +struct rtw89_h2c_trx_protect { + __le32 c0; + __le32 c1; + __le32 w0; + __le32 m0; + __le32 w1; + __le32 m1; +} __packed; + +#define RTW89_H2C_TRX_PROTECT_C0_BAND_BITMAP GENMASK(2, 0) +#define RTW89_H2C_TRX_PROTECT_C0_OP_MODE GENMASK(4, 3) +#define RTW89_H2C_TRX_PROTECT_C1_RX_IN BIT(0) +#define RTW89_H2C_TRX_PROTECT_C1_PPDU_STS BIT(4) +#define RTW89_H2C_TRX_PROTECT_C1_MSK_RX_IN BIT(16) +#define RTW89_H2C_TRX_PROTECT_C1_MSK_PPDU_STS BIT(20) +#define RTW89_H2C_TRX_PROTECT_W0_TXEN_BE0 BIT(0) +#define RTW89_H2C_TRX_PROTECT_W0_TXEN_BK0 BIT(1) +#define RTW89_H2C_TRX_PROTECT_W0_TXEN_VI0 BIT(2) +#define RTW89_H2C_TRX_PROTECT_W0_TXEN_VO0 BIT(3) +#define RTW89_H2C_TRX_PROTECT_W0_TXEN_BE1 BIT(4) +#define RTW89_H2C_TRX_PROTECT_W0_TXEN_BK1 BIT(5) +#define RTW89_H2C_TRX_PROTECT_W0_TXEN_VI1 BIT(6) +#define RTW89_H2C_TRX_PROTECT_W0_TXEN_VO1 BIT(7) +#define RTW89_H2C_TRX_PROTECT_W0_TXEN_MG0 BIT(8) +#define RTW89_H2C_TRX_PROTECT_W0_TXEN_MG1 BIT(9) +#define RTW89_H2C_TRX_PROTECT_W0_TXEN_MG2 BIT(10) +#define RTW89_H2C_TRX_PROTECT_W0_TXEN_HI BIT(11) +#define RTW89_H2C_TRX_PROTECT_W0_TXEN_BCN BIT(12) +#define RTW89_H2C_TRX_PROTECT_W0_TXEN_UL BIT(13) +#define RTW89_H2C_TRX_PROTECT_W0_TXEN_TWT0 BIT(14) +#define RTW89_H2C_TRX_PROTECT_W0_TXEN_TWT1 BIT(15) +#define RTW89_H2C_TRX_PROTECT_W0_TXEN_TWT2 BIT(16) +#define RTW89_H2C_TRX_PROTECT_W0_TXEN_TWT3 BIT(17) +#define RTW89_H2C_TRX_PROTECT_W0_TXEN_SPEQ0 BIT(18) +#define RTW89_H2C_TRX_PROTECT_W0_TXEN_SPEQ1 BIT(19) +#define RTW89_H2C_TRX_PROTECT_W1_CHINFO_EN BIT(0) +#define RTW89_H2C_TRX_PROTECT_W1_DFS_EN BIT(1) + struct rtw89_h2c_fwips { __le32 w0; } __packed; @@ -4598,6 +4636,7 @@ enum rtw89_fw_ofld_h2c_func { H2C_FUNC_OFLD_TP = 0x20, H2C_FUNC_MAC_MACID_PAUSE_SLEEP = 0x28, H2C_FUNC_SCANOFLD_BE = 0x2c, + H2C_FUNC_TRX_PROTECT = 0x34, NUM_OF_RTW89_FW_OFLD_H2C_FUNC, }; @@ -4608,6 +4647,7 @@ enum rtw89_fw_ofld_h2c_func { #define RTW89_FW_OFLD_WAIT_COND_PKT_OFLD(pkt_id, pkt_op) \ RTW89_FW_OFLD_WAIT_COND(RTW89_PKT_OFLD_WAIT_TAG(pkt_id, pkt_op), \ H2C_FUNC_PACKET_OFLD) +#define RTW89_FW_OFLD_WAIT_COND_TRX_PROTECT RTW89_FW_OFLD_WAIT_COND(0, H2C_FUNC_TRX_PROTECT) #define RTW89_SCANOFLD_WAIT_COND_ADD_CH RTW89_FW_OFLD_WAIT_COND(0, H2C_FUNC_ADD_SCANOFLD_CH) @@ -5294,6 +5334,8 @@ int rtw89_fw_h2c_scan_offload_be(struct rtw89_dev *rtwdev, struct rtw89_scan_option *opt, struct rtw89_vif_link *vif, bool wowlan); +int rtw89_fw_h2c_trx_protect(struct rtw89_dev *rtwdev, + enum rtw89_phy_idx phy_idx, bool enable); int rtw89_fw_h2c_rf_reg(struct rtw89_dev *rtwdev, struct rtw89_fw_h2c_rf_reg_info *info, u16 len, u8 page); @@ -5469,6 +5511,15 @@ static inline void rtw89_fw_h2c_init_ba_cam(struct rtw89_dev *rtwdev) rtw89_fw_h2c_init_dynamic_ba_cam_v0_ext(rtwdev); } +static inline void rtw89_fw_h2c_init_trx_protect(struct rtw89_dev *rtwdev) +{ + u8 active_bands = rtw89_get_active_phy_bitmap(rtwdev); + int i; + + for (i = 0; i < RTW89_PHY_NUM; i++) + rtw89_fw_h2c_trx_protect(rtwdev, i, active_bands & BIT(i)); +} + static inline int rtw89_chip_h2c_default_cmac_tbl(struct rtw89_dev *rtwdev, struct rtw89_vif_link *rtwvif_link, struct rtw89_sta_link *rtwsta_link) diff --git a/drivers/net/wireless/realtek/rtw89/mac.c b/drivers/net/wireless/realtek/rtw89/mac.c index c98ca2a82194..04ef4bae1852 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.c +++ b/drivers/net/wireless/realtek/rtw89/mac.c @@ -5411,6 +5411,9 @@ rtw89_mac_c2h_done_ack(struct rtw89_dev *rtwdev, struct sk_buff *skb_c2h, u32 le cond = RTW89_SCANOFLD_BE_WAIT_COND_START; h2c_return &= RTW89_C2H_SCAN_DONE_ACK_RETURN; break; + case H2C_FUNC_TRX_PROTECT: + cond = RTW89_FW_OFLD_WAIT_COND_TRX_PROTECT; + break; } data.err = !!h2c_return; diff --git a/drivers/net/wireless/realtek/rtw89/mac80211.c b/drivers/net/wireless/realtek/rtw89/mac80211.c index 0ea33743853e..1ef73bfc40d1 100644 --- a/drivers/net/wireless/realtek/rtw89/mac80211.c +++ b/drivers/net/wireless/realtek/rtw89/mac80211.c @@ -528,6 +528,8 @@ static int __rtw89_ops_sta_add(struct rtw89_dev *rtwdev, if (vif->type == NL80211_IFTYPE_AP || sta->tdls) rtw89_queue_chanctx_change(rtwdev, RTW89_CHANCTX_REMOTE_STA_CHANGE); + rtw89_fw_h2c_init_trx_protect(rtwdev); + return 0; unset_link: @@ -1584,6 +1586,8 @@ static void __rtw89_ops_clr_vif_links(struct rtw89_dev *rtwdev, if (unlikely(!rtwvif_link)) continue; + rtw89_fw_h2c_trx_protect(rtwdev, rtwvif_link->phy_idx, false); + __rtw89_ops_remove_iface_link(rtwdev, rtwvif_link); rtw89_vif_unset_link(rtwvif, link_id); @@ -1609,6 +1613,7 @@ static int __rtw89_ops_set_vif_links(struct rtw89_dev *rtwdev, __func__, link_id); return ret; } + rtw89_fw_h2c_trx_protect(rtwdev, rtwvif_link->phy_idx, true); } return 0; diff --git a/drivers/net/wireless/realtek/rtw89/ps.c b/drivers/net/wireless/realtek/rtw89/ps.c index aad2ee7926d6..125cf14fa581 100644 --- a/drivers/net/wireless/realtek/rtw89/ps.c +++ b/drivers/net/wireless/realtek/rtw89/ps.c @@ -226,6 +226,8 @@ void rtw89_leave_lps(struct rtw89_dev *rtwdev) rtw89_for_each_rtwvif(rtwdev, rtwvif) rtw89_vif_for_each_link(rtwvif, rtwvif_link, link_id) rtw89_leave_lps_vif(rtwdev, rtwvif_link); + + rtw89_fw_h2c_init_trx_protect(rtwdev); } void rtw89_enter_ips(struct rtw89_dev *rtwdev) From dcd2326db84399adbb0a6768d04a09f3078394c5 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 13 Feb 2026 14:15:49 +0800 Subject: [PATCH 0183/1561] wifi: rtw89: rfk: add hardware version to rtw89_fw_h2c_rf_pre_ntfy_mcc for new WiFi 7 firmware The RF calibration in firmware needs proper hardware version to select corresponding logic, so add the field to H2C command accordingly. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260213061552.29997-10-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/fw.c | 1 + drivers/net/wireless/realtek/rtw89/fw.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index ffbd75cb5533..99084a6e10a5 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -7371,6 +7371,7 @@ v1: h2c = (struct rtw89_fw_h2c_rfk_pre_info_mcc *)skb->data; h2c->aid = cpu_to_le32(hal->aid); + h2c->acv = hal->acv; done: rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C, diff --git a/drivers/net/wireless/realtek/rtw89/fw.h b/drivers/net/wireless/realtek/rtw89/fw.h index 80d260eb08cd..57e3b464c0a2 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.h +++ b/drivers/net/wireless/realtek/rtw89/fw.h @@ -4881,6 +4881,8 @@ struct rtw89_fw_h2c_rfk_pre_info_mcc { struct rtw89_fw_h2c_rfk_pre_info_mcc_v1 base; u8 rsvd[2]; __le32 aid; + u8 acv; + u8 rsvd2[3]; } __packed; struct rtw89_h2c_rf_tssi { From a72de4d4599e8e31d43485f108bed477da139171 Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Fri, 13 Feb 2026 14:15:50 +0800 Subject: [PATCH 0184/1561] wifi: rtw89: debug: add SER SW counters to count simulation Some chipsets, e.g. Wi-Fi 7, will not record SER (system error recovery), which are triggered by simulations, in the HW counters. Their HW counters only record the SER happen in field. However for verification, it's still needed to check if simulations are triggered. So, add SW counters to count any causes that SER happen. But, SW can only count L1 and L2. SW does not involve L0 SER, so SW cannot count it. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260213061552.29997-11-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.h | 7 +++++++ drivers/net/wireless/realtek/rtw89/debug.c | 6 ++++++ drivers/net/wireless/realtek/rtw89/ser.c | 2 ++ 3 files changed, 15 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index 1ad1fd2a1f6f..5d1cec20bc80 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -5753,11 +5753,18 @@ enum rtw89_ser_rcvy_step { RTW89_NUM_OF_SER_FLAGS }; +struct rtw89_ser_count { + unsigned int l1; + unsigned int l2; +}; + struct rtw89_ser { u8 state; u8 alarm_event; bool prehandle_l1; + struct rtw89_ser_count sw_cnt; + struct work_struct ser_hdl_work; struct delayed_work ser_alarm_work; const struct state_ent *st_tbl; diff --git a/drivers/net/wireless/realtek/rtw89/debug.c b/drivers/net/wireless/realtek/rtw89/debug.c index 012ead92f5f2..3e16ed2c4570 100644 --- a/drivers/net/wireless/realtek/rtw89/debug.c +++ b/drivers/net/wireless/realtek/rtw89/debug.c @@ -3781,6 +3781,7 @@ static ssize_t rtw89_debug_priv_ser_counters_get(struct rtw89_dev *rtwdev, struct rtw89_debugfs_priv *debugfs_priv, char *buf, size_t bufsz) { + const struct rtw89_ser_count *sw_cnt = &rtwdev->ser.sw_cnt; const struct rtw89_chip_info *chip = rtwdev->chip; struct rtw89_dbg_ser_counters cnt = {}; char *p = buf, *end = buf + bufsz; @@ -3798,6 +3799,11 @@ static ssize_t rtw89_debug_priv_ser_counters_get(struct rtw89_dev *rtwdev, return -EOPNOTSUPP; } + p += scnprintf(p, end - p, "SER L1 SW Count: %u\n", sw_cnt->l1); + p += scnprintf(p, end - p, "SER L2 SW Count: %u\n", sw_cnt->l2); + + /* Some chipsets won't record SER simulation in HW cnt. */ + p += scnprintf(p, end - p, "---\n"); p += scnprintf(p, end - p, "SER L0 Count: %d\n", cnt.l0); p += scnprintf(p, end - p, "SER L1 Count: %d\n", cnt.l1); p += scnprintf(p, end - p, "SER L0 promote event: %d\n", cnt.l0_to_l1); diff --git a/drivers/net/wireless/realtek/rtw89/ser.c b/drivers/net/wireless/realtek/rtw89/ser.c index f91e66133b30..75220042a9a7 100644 --- a/drivers/net/wireless/realtek/rtw89/ser.c +++ b/drivers/net/wireless/realtek/rtw89/ser.c @@ -498,6 +498,7 @@ static void ser_reset_trx_st_hdl(struct rtw89_ser *ser, u8 evt) switch (evt) { case SER_EV_STATE_IN: wiphy_lock(wiphy); + ser->sw_cnt.l1++; wiphy_delayed_work_cancel(wiphy, &rtwdev->track_work); wiphy_delayed_work_cancel(wiphy, &rtwdev->track_ps_work); wiphy_unlock(wiphy); @@ -730,6 +731,7 @@ static void ser_l2_reset_st_hdl(struct rtw89_ser *ser, u8 evt) switch (evt) { case SER_EV_STATE_IN: wiphy_lock(rtwdev->hw->wiphy); + ser->sw_cnt.l2++; ser_l2_reset_st_pre_hdl(ser); wiphy_unlock(rtwdev->hw->wiphy); From 0cae26a78b14fe1292b0f50f28ebabe6801f3885 Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Fri, 13 Feb 2026 14:15:51 +0800 Subject: [PATCH 0185/1561] wifi: rtw89: ser: Wi-Fi 7 reset HALT C2H after reading it When a SER (system error recovery) interrupt happens, driver reads HALT C2H register to get the error status via MAC. For Wi-Fi 7 chipset, driver needs to reset HALT C2H register after reading it to make FW aware that. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260213061552.29997-12-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/mac.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw89/mac.c b/drivers/net/wireless/realtek/rtw89/mac.c index 04ef4bae1852..e1cea0f88406 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.c +++ b/drivers/net/wireless/realtek/rtw89/mac.c @@ -814,6 +814,7 @@ static bool rtw89_mac_suppress_log(struct rtw89_dev *rtwdev, u32 err) u32 rtw89_mac_get_err_status(struct rtw89_dev *rtwdev) { const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def; + const struct rtw89_chip_info *chip = rtwdev->chip; u32 err, err_scnr; int ret; @@ -836,11 +837,15 @@ u32 rtw89_mac_get_err_status(struct rtw89_dev *rtwdev) err = MAC_AX_ERR_RXI300; if (rtw89_mac_suppress_log(rtwdev, err)) - return err; + goto bottom; rtw89_fw_st_dbg_dump(rtwdev); mac->dump_err_status(rtwdev, err); +bottom: + if (chip->chip_gen != RTW89_CHIP_AX) + rtw89_write32(rtwdev, R_AX_HALT_C2H, 0); + return err; } EXPORT_SYMBOL(rtw89_mac_get_err_status); From 3d90c421341ba6705f991ddaa350b585afefe078 Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Fri, 13 Feb 2026 14:15:52 +0800 Subject: [PATCH 0186/1561] wifi: rtw89: ser: post-recover DMAC state to prevent LPS If entering LPS during SER (system error recovery), IMR might fail to be re-enabled after SER. Then, the next SER would not be noticed well. After FW v0.35.100.0, Wi-Fi 7 chipsets adjust the order in which SER recovers DMAC state to prevent LPS from being in the middle of SER. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260213061552.29997-13-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.h | 1 + drivers/net/wireless/realtek/rtw89/fw.c | 1 + drivers/net/wireless/realtek/rtw89/mac.c | 7 ++++++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index 5d1cec20bc80..cf0cc718f41c 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -4796,6 +4796,7 @@ enum rtw89_fw_feature { RTW89_FW_FEATURE_SER_L1_BY_EVENT, RTW89_FW_FEATURE_SIM_SER_L0L1_BY_HALT_H2C, RTW89_FW_FEATURE_LPS_ML_INFO_V1, + RTW89_FW_FEATURE_SER_POST_RECOVER_DMAC, NUM_OF_RTW89_FW_FEATURES, }; diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index 99084a6e10a5..45d8c5e70084 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -923,6 +923,7 @@ static const struct __fw_feat_cfg fw_feat_tbl[] = { __CFG_FW_FEAT(RTL8922A, ge, 0, 35, 84, 0, RFK_PRE_NOTIFY_MCC_V1), __CFG_FW_FEAT(RTL8922A, lt, 0, 35, 84, 0, ADDR_CAM_V0), __CFG_FW_FEAT(RTL8922A, ge, 0, 35, 97, 0, SIM_SER_L0L1_BY_HALT_H2C), + __CFG_FW_FEAT(RTL8922A, ge, 0, 35, 100, 0, SER_POST_RECOVER_DMAC), }; static void rtw89_fw_iterate_feature_cfg(struct rtw89_fw_info *fw, diff --git a/drivers/net/wireless/realtek/rtw89/mac.c b/drivers/net/wireless/realtek/rtw89/mac.c index e1cea0f88406..234928613ef4 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.c +++ b/drivers/net/wireless/realtek/rtw89/mac.c @@ -826,7 +826,9 @@ u32 rtw89_mac_get_err_status(struct rtw89_dev *rtwdev) } err = rtw89_read32(rtwdev, R_AX_HALT_C2H); - rtw89_write32(rtwdev, R_AX_HALT_C2H_CTRL, 0); + + if (!RTW89_CHK_FW_FEATURE(SER_POST_RECOVER_DMAC, &rtwdev->fw)) + rtw89_write32(rtwdev, R_AX_HALT_C2H_CTRL, 0); err_scnr = RTW89_ERROR_SCENARIO(err); if (err_scnr == RTW89_WCPU_CPU_EXCEPTION) @@ -846,6 +848,9 @@ bottom: if (chip->chip_gen != RTW89_CHIP_AX) rtw89_write32(rtwdev, R_AX_HALT_C2H, 0); + if (RTW89_CHK_FW_FEATURE(SER_POST_RECOVER_DMAC, &rtwdev->fw)) + rtw89_write32(rtwdev, R_AX_HALT_C2H_CTRL, 0); + return err; } EXPORT_SYMBOL(rtw89_mac_get_err_status); From b8909aad5b8de0e2d7e27c0246119eb07f0caa3b Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Fri, 27 Feb 2026 15:56:39 +0100 Subject: [PATCH 0187/1561] net: sparx5: move netdev and notifier block registration to probe Move netdev registration and notifier block registration from sparx5_start() to probe(). This allows proper cleanup via goto-based error labels in probe(). Also, remove the sparx5_cleanup_ports() helper as its functionality is now split between sparx5_unregister_netdevs() and sparx5_destroy_netdevs() called at appropriate points. Signed-off-by: Daniel Machon Link: https://patch.msgid.link/20260227-sparx5-init-deinit-v2-1-10ba54ccf005@microchip.com Signed-off-by: Jakub Kicinski --- .../ethernet/microchip/sparx5/sparx5_main.c | 41 ++++++++++--------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index 9befd0719f74..e07bf414afdf 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -773,20 +773,11 @@ static int sparx5_start(struct sparx5 *sparx5) mutex_init(&sparx5->mdb_lock); INIT_LIST_HEAD(&sparx5->mdb_entries); - err = sparx5_register_netdevs(sparx5); - if (err) - return err; - sparx5_board_init(sparx5); - err = sparx5_register_notifier_blocks(sparx5); - if (err) - return err; err = sparx5_vcap_init(sparx5); - if (err) { - sparx5_unregister_notifier_blocks(sparx5); + if (err) return err; - } /* Start Frame DMA with fallback to register based INJ/XTR */ err = -ENXIO; @@ -835,12 +826,6 @@ static int sparx5_start(struct sparx5 *sparx5) return err; } -static void sparx5_cleanup_ports(struct sparx5 *sparx5) -{ - sparx5_unregister_netdevs(sparx5); - sparx5_destroy_netdevs(sparx5); -} - static int mchp_sparx5_probe(struct platform_device *pdev) { struct initial_port_config *configs, *config; @@ -1019,10 +1004,26 @@ static int mchp_sparx5_probe(struct platform_device *pdev) INIT_LIST_HEAD(&sparx5->mall_entries); + err = sparx5_register_netdevs(sparx5); + if (err) { + dev_err(sparx5->dev, "Failed to register net devices\n"); + goto cleanup_ptp; + } + + err = sparx5_register_notifier_blocks(sparx5); + if (err) { + dev_err(sparx5->dev, "Failed to register notifier blocks\n"); + goto cleanup_netdevs; + } + goto cleanup_config; +cleanup_netdevs: + sparx5_unregister_netdevs(sparx5); +cleanup_ptp: + sparx5_ptp_deinit(sparx5); cleanup_ports: - sparx5_cleanup_ports(sparx5); + sparx5_destroy_netdevs(sparx5); if (sparx5->mact_queue) destroy_workqueue(sparx5->mact_queue); cleanup_config: @@ -1046,12 +1047,12 @@ static void mchp_sparx5_remove(struct platform_device *pdev) disable_irq(sparx5->fdma_irq); sparx5->fdma_irq = -ENXIO; } + sparx5_unregister_notifier_blocks(sparx5); + sparx5_unregister_netdevs(sparx5); sparx5_ptp_deinit(sparx5); ops->fdma_deinit(sparx5); - sparx5_cleanup_ports(sparx5); sparx5_vcap_destroy(sparx5); - /* Unregister netdevs */ - sparx5_unregister_notifier_blocks(sparx5); + sparx5_destroy_netdevs(sparx5); destroy_workqueue(sparx5->mact_queue); } From 3a95973e7c79bbef04171240ecf8c42e28b4ea46 Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Fri, 27 Feb 2026 15:56:40 +0100 Subject: [PATCH 0188/1561] net: sparx5: move VCAP initialization to probe Move the VCAP initialization code from sparx5_start() to probe(). Add proper error handling with a cleanup_vcap label and sparx5_vcap_deinit() call. Also, rename sparx5_vcap_destroy() to sparx5_vcap_deinit() to stay consistent with the naming. Signed-off-by: Daniel Machon Link: https://patch.msgid.link/20260227-sparx5-init-deinit-v2-2-10ba54ccf005@microchip.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/microchip/sparx5/sparx5_main.c | 16 ++++++++++------ .../net/ethernet/microchip/sparx5/sparx5_main.h | 2 +- .../ethernet/microchip/sparx5/sparx5_vcap_impl.c | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index e07bf414afdf..dd313b29b7aa 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -775,10 +775,6 @@ static int sparx5_start(struct sparx5 *sparx5) sparx5_board_init(sparx5); - err = sparx5_vcap_init(sparx5); - if (err) - return err; - /* Start Frame DMA with fallback to register based INJ/XTR */ err = -ENXIO; if (sparx5->fdma_irq >= 0) { @@ -1002,12 +998,18 @@ static int mchp_sparx5_probe(struct platform_device *pdev) goto cleanup_ports; } + err = sparx5_vcap_init(sparx5); + if (err) { + dev_err(sparx5->dev, "Failed to initialize VCAP\n"); + goto cleanup_ptp; + } + INIT_LIST_HEAD(&sparx5->mall_entries); err = sparx5_register_netdevs(sparx5); if (err) { dev_err(sparx5->dev, "Failed to register net devices\n"); - goto cleanup_ptp; + goto cleanup_vcap; } err = sparx5_register_notifier_blocks(sparx5); @@ -1020,6 +1022,8 @@ static int mchp_sparx5_probe(struct platform_device *pdev) cleanup_netdevs: sparx5_unregister_netdevs(sparx5); +cleanup_vcap: + sparx5_vcap_deinit(sparx5); cleanup_ptp: sparx5_ptp_deinit(sparx5); cleanup_ports: @@ -1049,9 +1053,9 @@ static void mchp_sparx5_remove(struct platform_device *pdev) } sparx5_unregister_notifier_blocks(sparx5); sparx5_unregister_netdevs(sparx5); + sparx5_vcap_deinit(sparx5); sparx5_ptp_deinit(sparx5); ops->fdma_deinit(sparx5); - sparx5_vcap_destroy(sparx5); sparx5_destroy_netdevs(sparx5); destroy_workqueue(sparx5->mact_queue); } diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index fe7d8bcc0cd9..6a069434fca6 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -563,7 +563,7 @@ void sparx5_get_hwtimestamp(struct sparx5 *sparx5, /* sparx5_vcap_impl.c */ int sparx5_vcap_init(struct sparx5 *sparx5); -void sparx5_vcap_destroy(struct sparx5 *sparx5); +void sparx5_vcap_deinit(struct sparx5 *sparx5); /* sparx5_pgid.c */ enum sparx5_pgid_type { diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.c b/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.c index 3d64a0448d43..95b93e46a41d 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.c @@ -2083,7 +2083,7 @@ int sparx5_vcap_init(struct sparx5 *sparx5) return err; } -void sparx5_vcap_destroy(struct sparx5 *sparx5) +void sparx5_vcap_deinit(struct sparx5 *sparx5) { struct vcap_control *ctrl = sparx5->vcap_ctrl; struct vcap_admin *admin, *admin_next; From 13cb1b68842b010275974454d4c5dffaf427197f Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Fri, 27 Feb 2026 15:56:41 +0100 Subject: [PATCH 0189/1561] net: sparx5: move MAC table initialization and add deinit function Consolidate all MAC table initialization from sparx5_start() into sparx5_mact_init(), move it to probe(), and add a deinit function for proper teardown. Also, make sparx5_mact_pull_work() static since it is only used within sparx5_mactable.c. Signed-off-by: Daniel Machon Link: https://patch.msgid.link/20260227-sparx5-init-deinit-v2-3-10ba54ccf005@microchip.com Signed-off-by: Jakub Kicinski --- .../microchip/sparx5/sparx5_mactable.c | 34 +++++++++++++++- .../ethernet/microchip/sparx5/sparx5_main.c | 39 +++++-------------- .../ethernet/microchip/sparx5/sparx5_main.h | 4 +- 3 files changed, 44 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c index f5584244612c..2bf9c5f64151 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c @@ -419,7 +419,7 @@ static void sparx5_mact_handle_entry(struct sparx5 *sparx5, true); } -void sparx5_mact_pull_work(struct work_struct *work) +static void sparx5_mact_pull_work(struct work_struct *work) { struct delayed_work *del_work = to_delayed_work(work); struct sparx5 *sparx5 = container_of(del_work, struct sparx5, @@ -489,8 +489,10 @@ void sparx5_set_ageing(struct sparx5 *sparx5, int msecs) LRN_AUTOAGE_CFG(0)); } -void sparx5_mact_init(struct sparx5 *sparx5) +int sparx5_mact_init(struct sparx5 *sparx5) { + char queue_name[32]; + mutex_init(&sparx5->lock); /* Flush MAC table */ @@ -502,4 +504,32 @@ void sparx5_mact_init(struct sparx5 *sparx5) dev_warn(sparx5->dev, "MAC flush error\n"); sparx5_set_ageing(sparx5, BR_DEFAULT_AGEING_TIME / HZ * 1000); + + /* Add host mode BC address (points only to CPU) */ + sparx5_mact_learn(sparx5, sparx5_get_pgid(sparx5, PGID_CPU), + (unsigned char[]){0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + NULL_VID); + + mutex_init(&sparx5->mdb_lock); + INIT_LIST_HEAD(&sparx5->mdb_entries); + mutex_init(&sparx5->mact_lock); + INIT_LIST_HEAD(&sparx5->mact_entries); + snprintf(queue_name, sizeof(queue_name), "%s-mact", + dev_name(sparx5->dev)); + sparx5->mact_queue = create_singlethread_workqueue(queue_name); + if (!sparx5->mact_queue) + return -ENOMEM; + + INIT_DELAYED_WORK(&sparx5->mact_work, sparx5_mact_pull_work); + queue_delayed_work(sparx5->mact_queue, &sparx5->mact_work, + SPX5_MACT_PULL_DELAY); + + return 0; +} + +void sparx5_mact_deinit(struct sparx5 *sparx5) +{ + cancel_delayed_work_sync(&sparx5->mact_work); + destroy_workqueue(sparx5->mact_queue); + mutex_destroy(&sparx5->mact_lock); } diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index dd313b29b7aa..e7281514666b 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -685,10 +685,8 @@ static void sparx5_board_init(struct sparx5 *sparx5) static int sparx5_start(struct sparx5 *sparx5) { - u8 broadcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; const struct sparx5_consts *consts = sparx5->data->consts; const struct sparx5_ops *ops = sparx5->data->ops; - char queue_name[32]; u32 idx; int err; @@ -728,19 +726,12 @@ static int sparx5_start(struct sparx5 *sparx5) ANA_CL_FILTER_CTRL_FORCE_FCS_UPDATE_ENA, sparx5, ANA_CL_FILTER_CTRL(idx)); - /* Init MAC table, ageing */ - sparx5_mact_init(sparx5); - /* Init PGID table arbitrator */ sparx5_pgid_init(sparx5); /* Setup VLANs */ sparx5_vlan_init(sparx5); - /* Add host mode BC address (points only to CPU) */ - sparx5_mact_learn(sparx5, sparx5_get_pgid(sparx5, PGID_CPU), broadcast, - NULL_VID); - /* Enable queue limitation watermarks */ sparx5_qlim_set(sparx5); @@ -757,22 +748,6 @@ static int sparx5_start(struct sparx5 *sparx5) if (err) return err; - /* Init mact_sw struct */ - mutex_init(&sparx5->mact_lock); - INIT_LIST_HEAD(&sparx5->mact_entries); - snprintf(queue_name, sizeof(queue_name), "%s-mact", - dev_name(sparx5->dev)); - sparx5->mact_queue = create_singlethread_workqueue(queue_name); - if (!sparx5->mact_queue) - return -ENOMEM; - - INIT_DELAYED_WORK(&sparx5->mact_work, sparx5_mact_pull_work); - queue_delayed_work(sparx5->mact_queue, &sparx5->mact_work, - SPX5_MACT_PULL_DELAY); - - mutex_init(&sparx5->mdb_lock); - INIT_LIST_HEAD(&sparx5->mdb_entries); - sparx5_board_init(sparx5); /* Start Frame DMA with fallback to register based INJ/XTR */ @@ -1004,12 +979,18 @@ static int mchp_sparx5_probe(struct platform_device *pdev) goto cleanup_ptp; } + err = sparx5_mact_init(sparx5); + if (err) { + dev_err(sparx5->dev, "Failed to initialize MAC table\n"); + goto cleanup_vcap; + } + INIT_LIST_HEAD(&sparx5->mall_entries); err = sparx5_register_netdevs(sparx5); if (err) { dev_err(sparx5->dev, "Failed to register net devices\n"); - goto cleanup_vcap; + goto cleanup_mact; } err = sparx5_register_notifier_blocks(sparx5); @@ -1022,14 +1003,14 @@ static int mchp_sparx5_probe(struct platform_device *pdev) cleanup_netdevs: sparx5_unregister_netdevs(sparx5); +cleanup_mact: + sparx5_mact_deinit(sparx5); cleanup_vcap: sparx5_vcap_deinit(sparx5); cleanup_ptp: sparx5_ptp_deinit(sparx5); cleanup_ports: sparx5_destroy_netdevs(sparx5); - if (sparx5->mact_queue) - destroy_workqueue(sparx5->mact_queue); cleanup_config: kfree(configs); cleanup_pnode: @@ -1053,11 +1034,11 @@ static void mchp_sparx5_remove(struct platform_device *pdev) } sparx5_unregister_notifier_blocks(sparx5); sparx5_unregister_netdevs(sparx5); + sparx5_mact_deinit(sparx5); sparx5_vcap_deinit(sparx5); sparx5_ptp_deinit(sparx5); ops->fdma_deinit(sparx5); sparx5_destroy_netdevs(sparx5); - destroy_workqueue(sparx5->mact_queue); } static const struct sparx5_regs sparx5_regs = { diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index 6a069434fca6..e4c39cca7b26 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -470,7 +470,6 @@ void sparx5_fdma_reload(struct sparx5 *sparx5, struct fdma *fdma); void sparx5_fdma_injection_mode(struct sparx5 *sparx5); /* sparx5_mactable.c */ -void sparx5_mact_pull_work(struct work_struct *work); int sparx5_mact_learn(struct sparx5 *sparx5, int port, const unsigned char mac[ETH_ALEN], u16 vid); bool sparx5_mact_getnext(struct sparx5 *sparx5, @@ -489,7 +488,8 @@ int sparx5_del_mact_entry(struct sparx5 *sparx5, int sparx5_mc_sync(struct net_device *dev, const unsigned char *addr); int sparx5_mc_unsync(struct net_device *dev, const unsigned char *addr); void sparx5_set_ageing(struct sparx5 *sparx5, int msecs); -void sparx5_mact_init(struct sparx5 *sparx5); +int sparx5_mact_init(struct sparx5 *sparx5); +void sparx5_mact_deinit(struct sparx5 *sparx5); /* sparx5_vlan.c */ void sparx5_pgid_update_mask(struct sparx5_port *port, int pgid, bool enable); From e180067a03cad7043d951b0040f0169a267f0293 Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Fri, 27 Feb 2026 15:56:42 +0100 Subject: [PATCH 0190/1561] net: sparx5: move stats initialization and add deinit function The sparx5_stats_init() function starts a worker thread which needs to be cleaned up. Move the initialization code to probe() and add a deinit() function for proper teardown. Also, rename sparx_stats_init() to sparx5_stats_init() to match the driver naming convention. Signed-off-by: Daniel Machon Link: https://patch.msgid.link/20260227-sparx5-init-deinit-v2-4-10ba54ccf005@microchip.com Signed-off-by: Jakub Kicinski --- .../ethernet/microchip/sparx5/sparx5_ethtool.c | 9 ++++++++- .../net/ethernet/microchip/sparx5/sparx5_main.c | 16 ++++++++++------ .../net/ethernet/microchip/sparx5/sparx5_main.h | 3 ++- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c index 049541eeaae0..d42c57bead89 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c @@ -1244,7 +1244,7 @@ const struct ethtool_ops sparx5_ethtool_ops = { .set_pauseparam = sparx5_set_pauseparam, }; -int sparx_stats_init(struct sparx5 *sparx5) +int sparx5_stats_init(struct sparx5 *sparx5) { const struct sparx5_consts *consts = sparx5->data->consts; char queue_name[32]; @@ -1278,3 +1278,10 @@ int sparx_stats_init(struct sparx5 *sparx5) return 0; } + +void sparx5_stats_deinit(struct sparx5 *sparx5) +{ + cancel_delayed_work_sync(&sparx5->stats_work); + destroy_workqueue(sparx5->stats_queue); + mutex_destroy(&sparx5->queue_stats_lock); +} diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index e7281514666b..cc0c32d7e886 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -743,11 +743,6 @@ static int sparx5_start(struct sparx5 *sparx5) if (err) return err; - /* Init stats */ - err = sparx_stats_init(sparx5); - if (err) - return err; - sparx5_board_init(sparx5); /* Start Frame DMA with fallback to register based INJ/XTR */ @@ -985,12 +980,18 @@ static int mchp_sparx5_probe(struct platform_device *pdev) goto cleanup_vcap; } + err = sparx5_stats_init(sparx5); + if (err) { + dev_err(sparx5->dev, "Failed to initialize stats\n"); + goto cleanup_mact; + } + INIT_LIST_HEAD(&sparx5->mall_entries); err = sparx5_register_netdevs(sparx5); if (err) { dev_err(sparx5->dev, "Failed to register net devices\n"); - goto cleanup_mact; + goto cleanup_stats; } err = sparx5_register_notifier_blocks(sparx5); @@ -1003,6 +1004,8 @@ static int mchp_sparx5_probe(struct platform_device *pdev) cleanup_netdevs: sparx5_unregister_netdevs(sparx5); +cleanup_stats: + sparx5_stats_deinit(sparx5); cleanup_mact: sparx5_mact_deinit(sparx5); cleanup_vcap: @@ -1034,6 +1037,7 @@ static void mchp_sparx5_remove(struct platform_device *pdev) } sparx5_unregister_notifier_blocks(sparx5); sparx5_unregister_netdevs(sparx5); + sparx5_stats_deinit(sparx5); sparx5_mact_deinit(sparx5); sparx5_vcap_deinit(sparx5); sparx5_ptp_deinit(sparx5); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index e4c39cca7b26..97d53e229ad6 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -514,7 +514,8 @@ enum sparx5_cal_bw sparx5_get_port_cal_speed(struct sparx5 *sparx5, u32 portno); /* sparx5_ethtool.c */ void sparx5_get_stats64(struct net_device *ndev, struct rtnl_link_stats64 *stats); -int sparx_stats_init(struct sparx5 *sparx5); +int sparx5_stats_init(struct sparx5 *sparx5); +void sparx5_stats_deinit(struct sparx5 *sparx5); /* sparx5_dcb.c */ #ifdef CONFIG_SPARX5_DCB From 274182ff34fdd85f712c2b6e624e1c85c6696685 Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Fri, 27 Feb 2026 15:56:43 +0100 Subject: [PATCH 0191/1561] net: sparx5: move calendar initialization to probe Move the calendar initialization from sparx5_start() to probe() by creating a new sparx5_calendar_init() wrapper function that calls both sparx5_config_auto_calendar() and sparx5_config_dsm_calendar(). Calendar initialization does not require cleanup. Also, make the individual calendar config functions static since they are now only called from within sparx5_calendar.c. Signed-off-by: Daniel Machon Link: https://patch.msgid.link/20260227-sparx5-init-deinit-v2-5-10ba54ccf005@microchip.com Signed-off-by: Jakub Kicinski --- .../ethernet/microchip/sparx5/sparx5_calendar.c | 15 +++++++++++++-- .../net/ethernet/microchip/sparx5/sparx5_main.c | 14 ++++++-------- .../net/ethernet/microchip/sparx5/sparx5_main.h | 3 +-- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_calendar.c b/drivers/net/ethernet/microchip/sparx5/sparx5_calendar.c index 8c67ff1d411b..35815fb58080 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_calendar.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_calendar.c @@ -151,7 +151,7 @@ enum sparx5_cal_bw sparx5_get_port_cal_speed(struct sparx5 *sparx5, u32 portno) } /* Auto configure the QSYS calendar based on port configuration */ -int sparx5_config_auto_calendar(struct sparx5 *sparx5) +static int sparx5_config_auto_calendar(struct sparx5 *sparx5) { const struct sparx5_consts *consts = sparx5->data->consts; u32 cal[7], value, idx, portno; @@ -578,7 +578,7 @@ update_err: } /* Configure the DSM calendar based on port configuration */ -int sparx5_config_dsm_calendar(struct sparx5 *sparx5) +static int sparx5_config_dsm_calendar(struct sparx5 *sparx5) { const struct sparx5_ops *ops = sparx5->data->ops; int taxi; @@ -610,3 +610,14 @@ cal_out: kfree(data); return err; } + +int sparx5_calendar_init(struct sparx5 *sparx5) +{ + int err; + + err = sparx5_config_auto_calendar(sparx5); + if (err) + return err; + + return sparx5_config_dsm_calendar(sparx5); +} diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index cc0c32d7e886..46ddd7a5fb61 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -735,14 +735,6 @@ static int sparx5_start(struct sparx5 *sparx5) /* Enable queue limitation watermarks */ sparx5_qlim_set(sparx5); - err = sparx5_config_auto_calendar(sparx5); - if (err) - return err; - - err = sparx5_config_dsm_calendar(sparx5); - if (err) - return err; - sparx5_board_init(sparx5); /* Start Frame DMA with fallback to register based INJ/XTR */ @@ -956,6 +948,12 @@ static int mchp_sparx5_probe(struct platform_device *pdev) goto cleanup_ports; } + err = sparx5_calendar_init(sparx5); + if (err) { + dev_err(sparx5->dev, "Failed to initialize calendar\n"); + goto cleanup_ports; + } + err = sparx5_qos_init(sparx5); if (err) { dev_err(sparx5->dev, "Failed to initialize QoS\n"); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index 97d53e229ad6..6a745bb71b5c 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -504,8 +504,7 @@ int sparx5_vlan_vid_del(struct sparx5_port *port, u16 vid); void sparx5_vlan_port_apply(struct sparx5 *sparx5, struct sparx5_port *port); /* sparx5_calendar.c */ -int sparx5_config_auto_calendar(struct sparx5 *sparx5); -int sparx5_config_dsm_calendar(struct sparx5 *sparx5); +int sparx5_calendar_init(struct sparx5 *sparx5); int sparx5_dsm_calendar_calc(struct sparx5 *sparx5, u32 taxi, struct sparx5_calendar_data *data); u32 sparx5_cal_speed_to_value(enum sparx5_cal_bw speed); From cdc374359fe8c99d4519ac331324a344ff89d44d Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Fri, 27 Feb 2026 15:56:44 +0100 Subject: [PATCH 0192/1561] net: sparx5: move remaining init functions from start() to probe() Move sparx5_pgid_init(), sparx5_vlan_init(), and sparx5_board_init() from sparx5_start() to probe(). These functions do not require cleanup. Signed-off-by: Daniel Machon Link: https://patch.msgid.link/20260227-sparx5-init-deinit-v2-6-10ba54ccf005@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index 46ddd7a5fb61..c2f8a6635715 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -726,17 +726,9 @@ static int sparx5_start(struct sparx5 *sparx5) ANA_CL_FILTER_CTRL_FORCE_FCS_UPDATE_ENA, sparx5, ANA_CL_FILTER_CTRL(idx)); - /* Init PGID table arbitrator */ - sparx5_pgid_init(sparx5); - - /* Setup VLANs */ - sparx5_vlan_init(sparx5); - /* Enable queue limitation watermarks */ sparx5_qlim_set(sparx5); - sparx5_board_init(sparx5); - /* Start Frame DMA with fallback to register based INJ/XTR */ err = -ENXIO; if (sparx5->fdma_irq >= 0) { @@ -942,6 +934,10 @@ static int mchp_sparx5_probe(struct platform_device *pdev) } } + sparx5_pgid_init(sparx5); + sparx5_vlan_init(sparx5); + sparx5_board_init(sparx5); + err = sparx5_start(sparx5); if (err) { dev_err(sparx5->dev, "Start failed\n"); From 0432c60112b4fe3ebaf5c2c6bd859b9666233a03 Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Fri, 27 Feb 2026 15:56:45 +0100 Subject: [PATCH 0193/1561] net: sparx5: move PTP IRQ handling out of sparx5_start() Move the PTP IRQ request into sparx5_ptp_init() so all PTP setup is done in one place. Also move the sparx5_ptp_init() call to right before sparx5_register_netdevs() and add a cleanup_ptp label. Update remove() to disable the PTP IRQ and reorder ptp_deinit accordingly. Signed-off-by: Daniel Machon Link: https://patch.msgid.link/20260227-sparx5-init-deinit-v2-7-10ba54ccf005@microchip.com Signed-off-by: Jakub Kicinski --- .../ethernet/microchip/sparx5/sparx5_main.c | 34 ++++++------------- .../ethernet/microchip/sparx5/sparx5_ptp.c | 18 ++++++++++ 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index c2f8a6635715..bea3959be584 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -761,18 +761,6 @@ static int sparx5_start(struct sparx5 *sparx5) sparx5->xtr_irq = -ENXIO; } - if (sparx5->ptp_irq >= 0 && - sparx5_has_feature(sparx5, SPX5_FEATURE_PTP)) { - err = devm_request_threaded_irq(sparx5->dev, sparx5->ptp_irq, - NULL, ops->ptp_irq_handler, - IRQF_ONESHOT, "sparx5-ptp", - sparx5); - if (err) - sparx5->ptp_irq = -ENXIO; - - sparx5->ptp = 1; - } - return err; } @@ -956,16 +944,10 @@ static int mchp_sparx5_probe(struct platform_device *pdev) goto cleanup_ports; } - err = sparx5_ptp_init(sparx5); - if (err) { - dev_err(sparx5->dev, "PTP failed\n"); - goto cleanup_ports; - } - err = sparx5_vcap_init(sparx5); if (err) { dev_err(sparx5->dev, "Failed to initialize VCAP\n"); - goto cleanup_ptp; + goto cleanup_ports; } err = sparx5_mact_init(sparx5); @@ -982,10 +964,16 @@ static int mchp_sparx5_probe(struct platform_device *pdev) INIT_LIST_HEAD(&sparx5->mall_entries); + err = sparx5_ptp_init(sparx5); + if (err) { + dev_err(sparx5->dev, "Failed to initialize PTP\n"); + goto cleanup_stats; + } + err = sparx5_register_netdevs(sparx5); if (err) { dev_err(sparx5->dev, "Failed to register net devices\n"); - goto cleanup_stats; + goto cleanup_ptp; } err = sparx5_register_notifier_blocks(sparx5); @@ -998,14 +986,14 @@ static int mchp_sparx5_probe(struct platform_device *pdev) cleanup_netdevs: sparx5_unregister_netdevs(sparx5); +cleanup_ptp: + sparx5_ptp_deinit(sparx5); cleanup_stats: sparx5_stats_deinit(sparx5); cleanup_mact: sparx5_mact_deinit(sparx5); cleanup_vcap: sparx5_vcap_deinit(sparx5); -cleanup_ptp: - sparx5_ptp_deinit(sparx5); cleanup_ports: sparx5_destroy_netdevs(sparx5); cleanup_config: @@ -1031,10 +1019,10 @@ static void mchp_sparx5_remove(struct platform_device *pdev) } sparx5_unregister_notifier_blocks(sparx5); sparx5_unregister_netdevs(sparx5); + sparx5_ptp_deinit(sparx5); sparx5_stats_deinit(sparx5); sparx5_mact_deinit(sparx5); sparx5_vcap_deinit(sparx5); - sparx5_ptp_deinit(sparx5); ops->fdma_deinit(sparx5); sparx5_destroy_netdevs(sparx5); } diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c index 8b2e07821a95..a16ec8136d6d 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c @@ -606,9 +606,22 @@ static int sparx5_ptp_phc_init(struct sparx5 *sparx5, int sparx5_ptp_init(struct sparx5 *sparx5) { u64 tod_adj = sparx5_ptp_get_nominal_value(sparx5); + const struct sparx5_ops *ops = sparx5->data->ops; struct sparx5_port *port; int err, i; + if (sparx5->ptp_irq >= 0 && + sparx5_has_feature(sparx5, SPX5_FEATURE_PTP)) { + err = devm_request_threaded_irq(sparx5->dev, sparx5->ptp_irq, + NULL, ops->ptp_irq_handler, + IRQF_ONESHOT, "sparx5-ptp", + sparx5); + if (err) + sparx5->ptp_irq = -ENXIO; + + sparx5->ptp = 1; + } + if (!sparx5->ptp) return 0; @@ -660,6 +673,11 @@ void sparx5_ptp_deinit(struct sparx5 *sparx5) struct sparx5_port *port; int i; + if (sparx5->ptp_irq >= 0) { + disable_irq(sparx5->ptp_irq); + sparx5->ptp_irq = -ENXIO; + } + for (i = 0; i < sparx5->data->consts->n_ports; i++) { port = sparx5->ports[i]; if (!port) From 8b1e4a6747b8f1bfb835967f57435ebf5e928e37 Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Fri, 27 Feb 2026 15:56:46 +0100 Subject: [PATCH 0194/1561] net: sparx5: move FDMA/XTR initialization out of sparx5_start() Move the Frame DMA and register-based extraction initialization out of sparx5_start() and into a new sparx5_frame_io_init() function, called from probe(). Also, add sparx5_frame_io_deinit() for the cleanup path. Signed-off-by: Daniel Machon Link: https://patch.msgid.link/20260227-sparx5-init-deinit-v2-8-10ba54ccf005@microchip.com Signed-off-by: Jakub Kicinski --- .../ethernet/microchip/sparx5/sparx5_main.c | 109 ++++++++++-------- 1 file changed, 63 insertions(+), 46 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index bea3959be584..d86a5d1f495f 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -658,6 +658,58 @@ static int sparx5_qlim_set(struct sparx5 *sparx5) return 0; } +static int sparx5_frame_io_init(struct sparx5 *sparx5) +{ + const struct sparx5_ops *ops = sparx5->data->ops; + int err = -ENXIO; + + /* Start Frame DMA with fallback to register based INJ/XTR */ + if (sparx5->fdma_irq >= 0) { + if (GCB_CHIP_ID_REV_ID_GET(sparx5->chip_id) > 0 || + !is_sparx5(sparx5)) + err = devm_request_irq(sparx5->dev, + sparx5->fdma_irq, + sparx5_fdma_handler, + 0, + "sparx5-fdma", sparx5); + if (!err) { + err = ops->fdma_init(sparx5); + if (!err) + sparx5_fdma_start(sparx5); + } + if (err) + sparx5->fdma_irq = -ENXIO; + } else { + sparx5->fdma_irq = -ENXIO; + } + if (err && sparx5->xtr_irq >= 0) { + err = devm_request_irq(sparx5->dev, sparx5->xtr_irq, + sparx5_xtr_handler, IRQF_SHARED, + "sparx5-xtr", sparx5); + if (!err) + err = sparx5_manual_injection_mode(sparx5); + if (err) + sparx5->xtr_irq = -ENXIO; + } else { + sparx5->xtr_irq = -ENXIO; + } + + return err; +} + +static void sparx5_frame_io_deinit(struct sparx5 *sparx5) +{ + if (sparx5->xtr_irq >= 0) { + disable_irq(sparx5->xtr_irq); + sparx5->xtr_irq = -ENXIO; + } + if (sparx5->fdma_irq >= 0) { + disable_irq(sparx5->fdma_irq); + sparx5->data->ops->fdma_deinit(sparx5); + sparx5->fdma_irq = -ENXIO; + } +} + /* Some boards needs to map the SGPIO for signal detect explicitly to the * port module */ @@ -686,9 +738,7 @@ static void sparx5_board_init(struct sparx5 *sparx5) static int sparx5_start(struct sparx5 *sparx5) { const struct sparx5_consts *consts = sparx5->data->consts; - const struct sparx5_ops *ops = sparx5->data->ops; u32 idx; - int err; /* Setup own UPSIDs */ for (idx = 0; idx < consts->n_own_upsids; idx++) { @@ -729,39 +779,7 @@ static int sparx5_start(struct sparx5 *sparx5) /* Enable queue limitation watermarks */ sparx5_qlim_set(sparx5); - /* Start Frame DMA with fallback to register based INJ/XTR */ - err = -ENXIO; - if (sparx5->fdma_irq >= 0) { - if (GCB_CHIP_ID_REV_ID_GET(sparx5->chip_id) > 0 || - !is_sparx5(sparx5)) - err = devm_request_irq(sparx5->dev, - sparx5->fdma_irq, - sparx5_fdma_handler, - 0, - "sparx5-fdma", sparx5); - if (!err) { - err = ops->fdma_init(sparx5); - if (!err) - sparx5_fdma_start(sparx5); - } - if (err) - sparx5->fdma_irq = -ENXIO; - } else { - sparx5->fdma_irq = -ENXIO; - } - if (err && sparx5->xtr_irq >= 0) { - err = devm_request_irq(sparx5->dev, sparx5->xtr_irq, - sparx5_xtr_handler, IRQF_SHARED, - "sparx5-xtr", sparx5); - if (!err) - err = sparx5_manual_injection_mode(sparx5); - if (err) - sparx5->xtr_irq = -ENXIO; - } else { - sparx5->xtr_irq = -ENXIO; - } - - return err; + return 0; } static int mchp_sparx5_probe(struct platform_device *pdev) @@ -964,10 +982,16 @@ static int mchp_sparx5_probe(struct platform_device *pdev) INIT_LIST_HEAD(&sparx5->mall_entries); + err = sparx5_frame_io_init(sparx5); + if (err) { + dev_err(sparx5->dev, "Failed to initialize frame I/O\n"); + goto cleanup_stats; + } + err = sparx5_ptp_init(sparx5); if (err) { dev_err(sparx5->dev, "Failed to initialize PTP\n"); - goto cleanup_stats; + goto cleanup_frame_io; } err = sparx5_register_netdevs(sparx5); @@ -988,6 +1012,8 @@ cleanup_netdevs: sparx5_unregister_netdevs(sparx5); cleanup_ptp: sparx5_ptp_deinit(sparx5); +cleanup_frame_io: + sparx5_frame_io_deinit(sparx5); cleanup_stats: sparx5_stats_deinit(sparx5); cleanup_mact: @@ -1006,24 +1032,15 @@ cleanup_pnode: static void mchp_sparx5_remove(struct platform_device *pdev) { struct sparx5 *sparx5 = platform_get_drvdata(pdev); - const struct sparx5_ops *ops = sparx5->data->ops; debugfs_remove_recursive(sparx5->debugfs_root); - if (sparx5->xtr_irq) { - disable_irq(sparx5->xtr_irq); - sparx5->xtr_irq = -ENXIO; - } - if (sparx5->fdma_irq) { - disable_irq(sparx5->fdma_irq); - sparx5->fdma_irq = -ENXIO; - } sparx5_unregister_notifier_blocks(sparx5); sparx5_unregister_netdevs(sparx5); sparx5_ptp_deinit(sparx5); + sparx5_frame_io_deinit(sparx5); sparx5_stats_deinit(sparx5); sparx5_mact_deinit(sparx5); sparx5_vcap_deinit(sparx5); - ops->fdma_deinit(sparx5); sparx5_destroy_netdevs(sparx5); } From 1e540c4d8f3266fac261a5e9aaa423d2c2c1c5e5 Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Fri, 27 Feb 2026 15:56:47 +0100 Subject: [PATCH 0195/1561] net: sparx5: replace sparx5_start() with sparx5_forwarding_init() With all subsystem initializations moved out, sparx5_start() only sets up forwarding (UPSIDs, CPU ports, masks, PGIDs, FCS, watermarks). Rename it to sparx5_forwarding_init() and make it void since it cannot fail. This removes sparx5_start() entirely. Signed-off-by: Daniel Machon Link: https://patch.msgid.link/20260227-sparx5-init-deinit-v2-9-10ba54ccf005@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index d86a5d1f495f..dad713e9ddd5 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -735,7 +735,7 @@ static void sparx5_board_init(struct sparx5 *sparx5) GCB_HW_SGPIO_TO_SD_MAP_CFG(idx)); } -static int sparx5_start(struct sparx5 *sparx5) +static void sparx5_forwarding_init(struct sparx5 *sparx5) { const struct sparx5_consts *consts = sparx5->data->consts; u32 idx; @@ -779,7 +779,6 @@ static int sparx5_start(struct sparx5 *sparx5) /* Enable queue limitation watermarks */ sparx5_qlim_set(sparx5); - return 0; } static int mchp_sparx5_probe(struct platform_device *pdev) @@ -943,12 +942,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev) sparx5_pgid_init(sparx5); sparx5_vlan_init(sparx5); sparx5_board_init(sparx5); - - err = sparx5_start(sparx5); - if (err) { - dev_err(sparx5->dev, "Start failed\n"); - goto cleanup_ports; - } + sparx5_forwarding_init(sparx5); err = sparx5_calendar_init(sparx5); if (err) { From a0e8c9a5060fbdb72fca767164467a3cf2b8fc30 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 28 Feb 2026 17:57:15 +0000 Subject: [PATCH 0196/1561] mpls: remove test against ipv6_stub ipv6_stub is never NULL, let's remove this test. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260228175715.1195536-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/mpls/af_mpls.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index ef9e749d5e08..ae85a7654b1f 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -640,9 +640,6 @@ static struct net_device *inet6_fib_lookup_dev(struct net *net, struct dst_entry *dst; struct flowi6 fl6; - if (!ipv6_stub) - return ERR_PTR(-EAFNOSUPPORT); - memset(&fl6, 0, sizeof(fl6)); memcpy(&fl6.daddr, addr, sizeof(struct in6_addr)); dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL); From 05068eaa67b29963c1249c3032658968f64993e6 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 28 Feb 2026 22:17:19 +0000 Subject: [PATCH 0197/1561] selftest: net: Add basic functionality tests for ipmr. The new test exercise paths, where RTNL is needed, to catch lockdep splat: setsockopt MRT_INIT / MRT_DONE MRT_ADD_VIF / MRT_DEL_VIF MRT_ADD_MFC / MRT_DEL_MFC / MRT_ADD_MFC_PROXY / MRT_DEL_MFC_PROXY MRT_TABLE MRT_FLUSH rtnetlink RTM_NEWROUTE RTM_DELROUTE NETDEV_UNREGISTER I will extend this to cover IPv6 setsockopt() later. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260228221800.1082070-2-kuniyu@google.com Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/.gitignore | 1 + .../testing/selftests/net/forwarding/Makefile | 4 + tools/testing/selftests/net/forwarding/ipmr.c | 455 ++++++++++++++++++ 3 files changed, 460 insertions(+) create mode 100644 tools/testing/selftests/net/forwarding/ipmr.c diff --git a/tools/testing/selftests/net/forwarding/.gitignore b/tools/testing/selftests/net/forwarding/.gitignore index 2dea317f12e7..418ff96c52ef 100644 --- a/tools/testing/selftests/net/forwarding/.gitignore +++ b/tools/testing/selftests/net/forwarding/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only forwarding.config +ipmr diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index ff4a00d91a26..bbaf4d937dd8 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -133,6 +133,10 @@ TEST_FILES := \ tc_common.sh \ # end of TEST_FILES +TEST_GEN_PROGS := \ + ipmr +# end of TEST_GEN_PROGS + TEST_INCLUDES := \ $(wildcard ../lib/sh/*.sh) \ ../lib.sh \ diff --git a/tools/testing/selftests/net/forwarding/ipmr.c b/tools/testing/selftests/net/forwarding/ipmr.c new file mode 100644 index 000000000000..df870aad9ead --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ipmr.c @@ -0,0 +1,455 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2026 Google LLC */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kselftest_harness.h" + +FIXTURE(ipmr) +{ + int netlink_sk; + int raw_sk; + int veth_ifindex; +}; + +FIXTURE_VARIANT(ipmr) +{ + int family; + int protocol; + int level; + int opts[MRT_MAX - MRT_BASE + 1]; +}; + +FIXTURE_VARIANT_ADD(ipmr, ipv4) +{ + .family = AF_INET, + .protocol = IPPROTO_IGMP, + .level = IPPROTO_IP, + .opts = { + MRT_INIT, + MRT_DONE, + MRT_ADD_VIF, + MRT_DEL_VIF, + MRT_ADD_MFC, + MRT_DEL_MFC, + MRT_VERSION, + MRT_ASSERT, + MRT_PIM, + MRT_TABLE, + MRT_ADD_MFC_PROXY, + MRT_DEL_MFC_PROXY, + MRT_FLUSH, + }, +}; + +struct mfc_attr { + int table; + __u32 origin; + __u32 group; + int ifindex; + bool proxy; +}; + +static struct rtattr *nl_add_rtattr(struct nlmsghdr *nlmsg, struct rtattr *rta, + int type, const void *data, int len) +{ + int unused = 0; + + rta->rta_type = type; + rta->rta_len = RTA_LENGTH(len); + memcpy(RTA_DATA(rta), data, len); + + nlmsg->nlmsg_len += NLMSG_ALIGN(rta->rta_len); + + return RTA_NEXT(rta, unused); +} + +static int nl_sendmsg_mfc(struct __test_metadata *_metadata, FIXTURE_DATA(ipmr) *self, + __u16 nlmsg_type, struct mfc_attr *mfc_attr) +{ + struct { + struct nlmsghdr nlmsg; + struct rtmsg rtm; + char buf[4096]; + } req = { + .nlmsg = { + .nlmsg_len = NLMSG_LENGTH(sizeof(req.rtm)), + /* ipmr does not care about NLM_F_CREATE and NLM_F_EXCL ... */ + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + .nlmsg_type = nlmsg_type, + }, + .rtm = { + /* hard requirements in rtm_to_ipmr_mfcc() */ + .rtm_family = RTNL_FAMILY_IPMR, + .rtm_dst_len = 32, + .rtm_type = RTN_MULTICAST, + .rtm_scope = RT_SCOPE_UNIVERSE, + .rtm_protocol = RTPROT_MROUTED, + }, + }; + struct nlmsghdr *nlmsg = &req.nlmsg; + struct nlmsgerr *errmsg; + struct rtattr *rta; + int err; + + rta = (struct rtattr *)&req.buf; + rta = nl_add_rtattr(nlmsg, rta, RTA_TABLE, &mfc_attr->table, sizeof(mfc_attr->table)); + rta = nl_add_rtattr(nlmsg, rta, RTA_SRC, &mfc_attr->origin, sizeof(mfc_attr->origin)); + rta = nl_add_rtattr(nlmsg, rta, RTA_DST, &mfc_attr->group, sizeof(mfc_attr->group)); + if (mfc_attr->ifindex) + rta = nl_add_rtattr(nlmsg, rta, RTA_IIF, &mfc_attr->ifindex, sizeof(mfc_attr->ifindex)); + if (mfc_attr->proxy) + rta = nl_add_rtattr(nlmsg, rta, RTA_PREFSRC, NULL, 0); + + err = send(self->netlink_sk, &req, req.nlmsg.nlmsg_len, 0); + ASSERT_EQ(err, req.nlmsg.nlmsg_len); + + memset(&req, 0, sizeof(req)); + + err = recv(self->netlink_sk, &req, sizeof(req), 0); + ASSERT_TRUE(NLMSG_OK(nlmsg, err)); + ASSERT_EQ(NLMSG_ERROR, nlmsg->nlmsg_type); + + errmsg = (struct nlmsgerr *)NLMSG_DATA(nlmsg); + return errmsg->error; +} + +FIXTURE_SETUP(ipmr) +{ + struct ifreq ifr = { + .ifr_name = "veth0", + }; + int err; + + err = unshare(CLONE_NEWNET); + ASSERT_EQ(0, err); + + self->netlink_sk = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + ASSERT_LE(0, self->netlink_sk); + + self->raw_sk = socket(variant->family, SOCK_RAW, variant->protocol); + ASSERT_LT(0, self->raw_sk); + + err = system("ip link add veth0 type veth peer veth1"); + ASSERT_EQ(0, err); + + err = ioctl(self->raw_sk, SIOCGIFINDEX, &ifr); + ASSERT_EQ(0, err); + + self->veth_ifindex = ifr.ifr_ifindex; +} + +FIXTURE_TEARDOWN(ipmr) +{ + close(self->raw_sk); + close(self->netlink_sk); +} + +TEST_F(ipmr, mrt_init) +{ + int err, val = 0; /* any value is ok, but size must be int for MRT_INIT. */ + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_INIT - MRT_BASE], + &val, sizeof(val)); + ASSERT_EQ(0, err); + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_DONE - MRT_BASE], + &val, sizeof(val)); + ASSERT_EQ(0, err); +} + +TEST_F(ipmr, mrt_add_vif_register) +{ + struct vifctl vif = { + .vifc_vifi = 0, + .vifc_flags = VIFF_REGISTER, + }; + int err; + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(0, err); + + err = system("cat /proc/net/ip_mr_vif | grep -q pimreg"); + ASSERT_EQ(0, err); + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_DEL_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(0, err); +} + +TEST_F(ipmr, mrt_del_vif_unreg) +{ + struct vifctl vif = { + .vifc_vifi = 0, + .vifc_flags = VIFF_USE_IFINDEX, + .vifc_lcl_ifindex = self->veth_ifindex, + }; + int err; + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(0, err); + + err = system("cat /proc/net/ip_mr_vif | grep -q veth0"); + ASSERT_EQ(0, err); + + /* VIF is removed along with its device. */ + err = system("ip link del veth0"); + ASSERT_EQ(0, err); + + /* mrt->vif_table[veth_ifindex]->dev is NULL. */ + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_DEL_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(-1, err); + ASSERT_EQ(EADDRNOTAVAIL, errno); +} + +TEST_F(ipmr, mrt_del_vif_netns_dismantle) +{ + struct vifctl vif = { + .vifc_vifi = 0, + .vifc_flags = VIFF_USE_IFINDEX, + .vifc_lcl_ifindex = self->veth_ifindex, + }; + int err; + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(0, err); + + /* Let cleanup_net() remove veth0 and VIF. */ +} + +TEST_F(ipmr, mrt_add_mfc) +{ + struct mfcctl mfc = {}; + int err; + + /* MRT_ADD_MFC / MRT_ADD_MFC_PROXY does not need vif to exist (unlike netlink). */ + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_MFC - MRT_BASE], + &mfc, sizeof(mfc)); + ASSERT_EQ(0, err); + + /* (0.0.0.0 -> 0.0.0.0) */ + err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); + ASSERT_EQ(0, err); + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_DEL_MFC - MRT_BASE], + &mfc, sizeof(mfc)); +} + +TEST_F(ipmr, mrt_add_mfc_proxy) +{ + struct mfcctl mfc = {}; + int err; + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_MFC_PROXY - MRT_BASE], + &mfc, sizeof(mfc)); + ASSERT_EQ(0, err); + + err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); + ASSERT_EQ(0, err); + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_DEL_MFC_PROXY - MRT_BASE], + &mfc, sizeof(mfc)); +} + +TEST_F(ipmr, mrt_add_mfc_netlink) +{ + struct vifctl vif = { + .vifc_vifi = 0, + .vifc_flags = VIFF_USE_IFINDEX, + .vifc_lcl_ifindex = self->veth_ifindex, + }; + struct mfc_attr mfc_attr = { + .table = RT_TABLE_DEFAULT, + .origin = 0, + .group = 0, + .ifindex = self->veth_ifindex, + .proxy = false, + }; + int err; + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(0, err); + + err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); + ASSERT_EQ(0, err); + + err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); + ASSERT_EQ(0, err); + + err = nl_sendmsg_mfc(_metadata, self, RTM_DELROUTE, &mfc_attr); + ASSERT_EQ(0, err); +} + +TEST_F(ipmr, mrt_add_mfc_netlink_proxy) +{ + struct vifctl vif = { + .vifc_vifi = 0, + .vifc_flags = VIFF_USE_IFINDEX, + .vifc_lcl_ifindex = self->veth_ifindex, + }; + struct mfc_attr mfc_attr = { + .table = RT_TABLE_DEFAULT, + .origin = 0, + .group = 0, + .ifindex = self->veth_ifindex, + .proxy = true, + }; + int err; + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(0, err); + + err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); + ASSERT_EQ(0, err); + + err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); + ASSERT_EQ(0, err); + + err = nl_sendmsg_mfc(_metadata, self, RTM_DELROUTE, &mfc_attr); + ASSERT_EQ(0, err); +} + +TEST_F(ipmr, mrt_add_mfc_netlink_no_vif) +{ + struct mfc_attr mfc_attr = { + .table = RT_TABLE_DEFAULT, + .origin = 0, + .group = 0, + .proxy = false, + }; + int err; + + /* netlink always requires RTA_IIF of an existing vif. */ + mfc_attr.ifindex = 0; + err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); + ASSERT_EQ(-ENFILE, err); + + /* netlink always requires RTA_IIF of an existing vif. */ + mfc_attr.ifindex = self->veth_ifindex; + err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); + ASSERT_EQ(-ENFILE, err); +} + +TEST_F(ipmr, mrt_del_mfc_netlink_netns_dismantle) +{ + struct vifctl vifs[2] = { + { + .vifc_vifi = 0, + .vifc_flags = VIFF_USE_IFINDEX, + .vifc_lcl_ifindex = self->veth_ifindex, + }, + { + .vifc_vifi = 1, + .vifc_flags = VIFF_REGISTER, + } + }; + struct mfc_attr mfc_attr = { + .table = RT_TABLE_DEFAULT, + .origin = 0, + .group = 0, + .ifindex = self->veth_ifindex, + .proxy = false, + }; + int i, err; + + for (i = 0; i < 2; i++) { + /* Create 2 VIFs just to avoid -ENFILE later. */ + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], + &vifs[i], sizeof(vifs[i])); + ASSERT_EQ(0, err); + } + + /* Create a MFC for mrt->vif_table[0]. */ + err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); + ASSERT_EQ(0, err); + + err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); + ASSERT_EQ(0, err); + + /* Remove mrt->vif_table[0]. */ + err = system("ip link del veth0"); + ASSERT_EQ(0, err); + + /* MFC entry is NOT removed even if the tied VIF is removed... */ + err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' "); + ASSERT_EQ(0, err); + + /* ... and netlink is not capable of removing such an entry + * because netlink always requires a valid RTA_IIF ... :/ + */ + err = nl_sendmsg_mfc(_metadata, self, RTM_DELROUTE, &mfc_attr); + ASSERT_EQ(-ENODEV, err); + + /* It can be removed by setsockopt(), but let cleanup_net() remove this time. */ +} + +TEST_F(ipmr, mrt_table_flush) +{ + struct vifctl vif = { + .vifc_vifi = 0, + .vifc_flags = VIFF_USE_IFINDEX, + .vifc_lcl_ifindex = self->veth_ifindex, + }; + struct mfc_attr mfc_attr = { + .origin = 0, + .group = 0, + .ifindex = self->veth_ifindex, + .proxy = false, + }; + int table_id = 92; + int err, flags; + + /* Set a random table id rather than RT_TABLE_DEFAULT. + * Note that /proc/net/ip_mr_{vif,cache} only supports RT_TABLE_DEFAULT. + */ + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_TABLE - MRT_BASE], + &table_id, sizeof(table_id)); + ASSERT_EQ(0, err); + + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE], + &vif, sizeof(vif)); + ASSERT_EQ(0, err); + + mfc_attr.table = table_id; + err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr); + ASSERT_EQ(0, err); + + /* Flush mrt->vif_table[] and all caches. */ + flags = MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | + MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC; + err = setsockopt(self->raw_sk, + variant->level, variant->opts[MRT_FLUSH - MRT_BASE], + &flags, sizeof(flags)); + ASSERT_EQ(0, err); +} + +TEST_HARNESS_MAIN From 261950e0390b70f1f17947423a36b8d9baae80f2 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 28 Feb 2026 22:17:20 +0000 Subject: [PATCH 0198/1561] ipmr: Annotate access to mrt->mroute_do_{pim,assert,wrvifwhole}. These fields in struct mr_table are updated in ip_mroute_setsockopt() under RTNL: * mroute_do_pim * mroute_do_assert * mroute_do_wrvifwhole However, ip_mroute_getsockopt() does not hold RTNL and read the first two fields locklessly, and ip_mr_forward() reads all the three under RCU. pim_rcv_v1() also reads mroute_do_pim locklessly. Let's use WRITE_ONCE() and READ_ONCE() for them. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260228221800.1082070-3-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/ipmr.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 131382c388e9..970f173654c7 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1506,7 +1506,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, ret = -EFAULT; break; } - mrt->mroute_do_assert = val; + WRITE_ONCE(mrt->mroute_do_assert, val); break; case MRT_PIM: if (!ipmr_pimsm_enabled()) { @@ -1525,9 +1525,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, do_wrvifwhole = (val == IGMPMSG_WRVIFWHOLE); val = !!val; if (val != mrt->mroute_do_pim) { - mrt->mroute_do_pim = val; - mrt->mroute_do_assert = val; - mrt->mroute_do_wrvifwhole = do_wrvifwhole; + WRITE_ONCE(mrt->mroute_do_pim, val); + WRITE_ONCE(mrt->mroute_do_assert, val); + WRITE_ONCE(mrt->mroute_do_wrvifwhole, do_wrvifwhole); } break; case MRT_TABLE: @@ -1610,10 +1610,10 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval, case MRT_PIM: if (!ipmr_pimsm_enabled()) return -ENOPROTOOPT; - val = mrt->mroute_do_pim; + val = READ_ONCE(mrt->mroute_do_pim); break; case MRT_ASSERT: - val = mrt->mroute_do_assert; + val = READ_ONCE(mrt->mroute_do_assert); break; default: return -ENOPROTOOPT; @@ -2037,20 +2037,20 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, atomic_long_inc(&c->_c.mfc_un.res.wrong_if); - if (true_vifi >= 0 && mrt->mroute_do_assert && + if (true_vifi >= 0 && READ_ONCE(mrt->mroute_do_assert) && /* pimsm uses asserts, when switching from RPT to SPT, * so that we cannot check that packet arrived on an oif. * It is bad, but otherwise we would need to move pretty * large chunk of pimd to kernel. Ough... --ANK */ - (mrt->mroute_do_pim || + (READ_ONCE(mrt->mroute_do_pim) || c->_c.mfc_un.res.ttls[true_vifi] < 255) && time_after(jiffies, c->_c.mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { c->_c.mfc_un.res.last_assert = jiffies; ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); - if (mrt->mroute_do_wrvifwhole) + if (READ_ONCE(mrt->mroute_do_wrvifwhole)) ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRVIFWHOLE); } @@ -2358,7 +2358,7 @@ int pim_rcv_v1(struct sk_buff *skb) mrt = ipmr_rt_fib_lookup(net, skb); if (IS_ERR(mrt)) goto drop; - if (!mrt->mroute_do_pim || + if (!READ_ONCE(mrt->mroute_do_pim) || pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) goto drop; From 402a8111d7becb4220a94f5684edfbd5d4668ddb Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 28 Feb 2026 22:17:21 +0000 Subject: [PATCH 0199/1561] ipmr: Convert ipmr_rtm_dumplink() to RCU. net->ipv4.mr_tables is updated under RTNL and can be read safely under RCU. Once created, the multicast route tables are not removed until netns dismantle. ipmr_rtm_dumplink() does not need RTNL protection for ipmr_for_each_table() and ipmr_fill_table() if RCU is held. Even if mrt->maxvif changes concurrently, ipmr_fill_vif() returns true to continue dumping the next table. Let's convert it to RCU. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260228221800.1082070-4-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/ipmr.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 970f173654c7..eecc79a835d1 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2901,12 +2901,13 @@ static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb) if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) || nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) || nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM, - mrt->mroute_reg_vif_num) || + READ_ONCE(mrt->mroute_reg_vif_num)) || nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT, - mrt->mroute_do_assert) || - nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim) || + READ_ONCE(mrt->mroute_do_assert)) || + nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, + READ_ONCE(mrt->mroute_do_pim)) || nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE, - mrt->mroute_do_wrvifwhole)) + READ_ONCE(mrt->mroute_do_wrvifwhole))) return false; return true; @@ -2919,7 +2920,7 @@ static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) struct vif_device *vif; vif = &mrt->vif_table[vifid]; - vif_dev = rtnl_dereference(vif->dev); + vif_dev = vif_dev_read(vif); /* if the VIF doesn't exist just continue */ if (!vif_dev) return true; @@ -2928,16 +2929,16 @@ static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) if (!vif_nest) return false; - if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif_dev->ifindex) || + if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, READ_ONCE(vif_dev->ifindex)) || nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) || nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) || - nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in, + nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, READ_ONCE(vif->bytes_in), IPMRA_VIFA_PAD) || - nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, vif->bytes_out, + nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, READ_ONCE(vif->bytes_out), IPMRA_VIFA_PAD) || - nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, vif->pkt_in, + nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, READ_ONCE(vif->pkt_in), IPMRA_VIFA_PAD) || - nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, vif->pkt_out, + nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, READ_ONCE(vif->pkt_out), IPMRA_VIFA_PAD) || nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) || nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) { @@ -2992,6 +2993,8 @@ static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; s_e = cb->args[1]; + rcu_read_lock(); + ipmr_for_each_table(mrt, net) { struct nlattr *vifs, *af; struct ifinfomsg *hdr; @@ -3026,7 +3029,7 @@ static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb) nlmsg_end(skb, nlh); goto out; } - for (i = 0; i < mrt->maxvif; i++) { + for (i = 0; i < READ_ONCE(mrt->maxvif); i++) { if (e < s_e) goto skip_entry; if (!ipmr_fill_vif(mrt, i, skb)) { @@ -3048,6 +3051,8 @@ skip_table: } out: + rcu_read_unlock(); + cb->args[1] = e; cb->args[0] = t; @@ -3287,7 +3292,7 @@ static struct pernet_operations ipmr_net_ops = { static const struct rtnl_msg_handler ipmr_rtnl_msg_handlers[] __initconst = { {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETLINK, - .dumpit = ipmr_rtm_dumplink}, + .dumpit = ipmr_rtm_dumplink, .flags = RTNL_FLAG_DUMP_UNLOCKED}, {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_NEWROUTE, .doit = ipmr_rtm_route}, {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_DELROUTE, From 2bd6c9d600d66497f5293dbb8ec61a5e80f13e64 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 28 Feb 2026 22:17:22 +0000 Subject: [PATCH 0200/1561] ipmr: Use MAXVIFS in mroute_msgsize(). mroute_msgsize() calculates skb size needed for ipmr_fill_mroute(). The size differs based on mrt->maxvif. We will drop RTNL for ipmr_rtm_getroute() and mrt->maxvif may change under RCU. To avoid -EMSGSIZE, let's calculate the size with the maximum value of mrt->maxvif, MAXVIFS. struct rtnexthop is 8 bytes and MAXVIFS is 32, so the maximum delta is 256 bytes, which is small enough. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260228221800.1082070-5-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/ipmr.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index eecc79a835d1..9f2dd726affb 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2510,7 +2510,7 @@ static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, cmd, flags); } -static size_t mroute_msgsize(bool unresolved, int maxvif) +static size_t mroute_msgsize(bool unresolved) { size_t len = NLMSG_ALIGN(sizeof(struct rtmsg)) @@ -2523,7 +2523,7 @@ static size_t mroute_msgsize(bool unresolved, int maxvif) len = len + nla_total_size(4) /* RTA_IIF */ + nla_total_size(0) /* RTA_MULTIPATH */ - + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) + + MAXVIFS * NLA_ALIGN(sizeof(struct rtnexthop)) /* RTA_MFC_STATS */ + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) ; @@ -2538,8 +2538,7 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS, - mrt->maxvif), + skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS), GFP_ATOMIC); if (!skb) goto errout; @@ -2711,7 +2710,7 @@ static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, goto errout_free; } - skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL); + skb = nlmsg_new(mroute_msgsize(false), GFP_KERNEL); if (!skb) { err = -ENOBUFS; goto errout_free; From 295a17b3eae97910c2664e7905a903b483c4089c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 28 Feb 2026 22:17:23 +0000 Subject: [PATCH 0201/1561] ipmr: Convert ipmr_rtm_getroute() to RCU. ipmr_rtm_getroute() calls __ipmr_get_table(), ipmr_cache_find(), and ipmr_fill_mroute(). The table is not removed until netns dismantle, and net->ipv4.mr_tables is managed with RCU list API, so __ipmr_get_table() is safe under RCU. struct mfc_cache is freed by mr_cache_put() after RCU grace period, so we can use ipmr_cache_find() under RCU. rcu_read_lock() around it was just to avoid lockdep splat for rhl_for_each_entry_rcu(). ipmr_fill_mroute() calls mr_fill_mroute(), which properly uses RCU. Let's drop RTNL for ipmr_rtm_getroute() and use RCU instead. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260228221800.1082070-6-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/ipmr.c | 44 +++++++++++++++++++++++--------------------- net/ipv4/ipmr_base.c | 4 ++-- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9f2dd726affb..cb1a5e3a6296 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2680,9 +2680,9 @@ static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[RTA_MAX + 1]; - struct sk_buff *skb = NULL; struct mfc_cache *cache; struct mr_table *mrt; + struct sk_buff *skb; __be32 src, grp; u32 tableid; int err; @@ -2695,39 +2695,40 @@ static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, grp = nla_get_in_addr_default(tb[RTA_DST], 0); tableid = nla_get_u32_default(tb[RTA_TABLE], 0); - mrt = __ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); - if (!mrt) { - err = -ENOENT; - goto errout_free; - } - - /* entries are added/deleted only under RTNL */ - rcu_read_lock(); - cache = ipmr_cache_find(mrt, src, grp); - rcu_read_unlock(); - if (!cache) { - err = -ENOENT; - goto errout_free; - } - skb = nlmsg_new(mroute_msgsize(false), GFP_KERNEL); if (!skb) { err = -ENOBUFS; - goto errout_free; + goto errout; + } + + rcu_read_lock(); + + mrt = __ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); + if (!mrt) { + err = -ENOENT; + goto errout_unlock; + } + + cache = ipmr_cache_find(mrt, src, grp); + if (!cache) { + err = -ENOENT; + goto errout_unlock; } err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0); if (err < 0) - goto errout_free; + goto errout_unlock; + + rcu_read_unlock(); err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); - errout: return err; -errout_free: +errout_unlock: + rcu_read_unlock(); kfree_skb(skb); goto errout; } @@ -3297,7 +3298,8 @@ static const struct rtnl_msg_handler ipmr_rtnl_msg_handlers[] __initconst = { {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_DELROUTE, .doit = ipmr_rtm_route}, {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETROUTE, - .doit = ipmr_rtm_getroute, .dumpit = ipmr_rtm_dumproute}, + .doit = ipmr_rtm_getroute, .dumpit = ipmr_rtm_dumproute, + .flags = RTNL_FLAG_DOIT_UNLOCKED}, }; int __init ip_mr_init(void) diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index 2d62526406ca..b0fd9ffa01a2 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -223,7 +223,7 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, rcu_read_lock(); vif_dev = rcu_dereference(mrt->vif_table[c->mfc_parent].dev); - if (vif_dev && nla_put_u32(skb, RTA_IIF, vif_dev->ifindex) < 0) { + if (vif_dev && nla_put_u32(skb, RTA_IIF, READ_ONCE(vif_dev->ifindex)) < 0) { rcu_read_unlock(); return -EMSGSIZE; } @@ -252,7 +252,7 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, nhp->rtnh_flags = 0; nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; - nhp->rtnh_ifindex = vif_dev->ifindex; + nhp->rtnh_ifindex = READ_ONCE(vif_dev->ifindex); nhp->rtnh_len = sizeof(*nhp); } } From 2c698bab294aa273dacd8d6b72db1d79ef994385 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 28 Feb 2026 22:17:24 +0000 Subject: [PATCH 0202/1561] ipmr: Convert ipmr_rtm_dumproute() to RCU. ipmr_rtm_dumproute() calls mr_table_dump() or mr_rtm_dumproute(), and mr_rtm_dumproute() finally calls mr_table_dump(). mr_table_dump() calls the passed function, _ipmr_fill_mroute(). _ipmr_fill_mroute() is a wrapper of ipmr_fill_mroute() to cast struct mr_mfc * to struct mfc_cache *. ipmr_fill_mroute() can be already called safely under RCU. Let's convert ipmr_rtm_dumproute() to RCU. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260228221800.1082070-7-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/ipmr.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index cb1a5e3a6296..5c8508788fb6 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2736,15 +2736,17 @@ errout_unlock: static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) { struct fib_dump_filter filter = { - .rtnl_held = true, + .rtnl_held = false, }; int err; + rcu_read_lock(); + if (cb->strict_check) { err = ip_valid_fib_dump_req(sock_net(skb->sk), cb->nlh, &filter, cb); if (err < 0) - return err; + goto out; } if (filter.table_id) { @@ -2752,19 +2754,28 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) mrt = __ipmr_get_table(sock_net(skb->sk), filter.table_id); if (!mrt) { - if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IPMR) - return skb->len; + if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IPMR) { + err = skb->len; + goto out; + } NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist"); - return -ENOENT; + err = -ENOENT; + goto out; } + err = mr_table_dump(mrt, skb, cb, _ipmr_fill_mroute, &mfc_unres_lock, &filter); - return skb->len ? : err; + err = skb->len ? : err; + goto out; } - return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter, - _ipmr_fill_mroute, &mfc_unres_lock, &filter); + err = mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter, + _ipmr_fill_mroute, &mfc_unres_lock, &filter); +out: + rcu_read_unlock(); + + return err; } static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = { @@ -3299,7 +3310,7 @@ static const struct rtnl_msg_handler ipmr_rtnl_msg_handlers[] __initconst = { .doit = ipmr_rtm_route}, {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETROUTE, .doit = ipmr_rtm_getroute, .dumpit = ipmr_rtm_dumproute, - .flags = RTNL_FLAG_DOIT_UNLOCKED}, + .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, }; int __init ip_mr_init(void) From 3810f9529dc7f784b5b958b2a018bb6996cf9077 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 28 Feb 2026 22:17:25 +0000 Subject: [PATCH 0203/1561] ipmr: Move unregister_netdevice_many() out of mroute_clean_tables(). This is a prep commit to convert ipmr_net_exit_batch() to ->exit_rtnl(). Let's move unregister_netdevice_many() in mroute_clean_tables() to its callers. As a bonus, mrtsock_destruct() can do batching for all tables. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260228221800.1082070-8-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/ipmr.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5c8508788fb6..d15e05662b09 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -112,7 +112,8 @@ static int ipmr_cache_report(const struct mr_table *mrt, static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, int cmd); static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt); -static void mroute_clean_tables(struct mr_table *mrt, int flags); +static void mroute_clean_tables(struct mr_table *mrt, int flags, + struct list_head *dev_kill_list); static void ipmr_expire_process(struct timer_list *t); #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES @@ -427,12 +428,15 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) static void ipmr_free_table(struct mr_table *mrt) { struct net *net = read_pnet(&mrt->net); + LIST_HEAD(dev_kill_list); WARN_ON_ONCE(!mr_can_free_table(net)); timer_shutdown_sync(&mrt->ipmr_expire_timer); mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | - MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC); + MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC, + &dev_kill_list); + unregister_netdevice_many(&dev_kill_list); rhltable_destroy(&mrt->mfc_hash); kfree(mrt); } @@ -1293,12 +1297,12 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, } /* Close the multicast socket, and clear the vif tables etc */ -static void mroute_clean_tables(struct mr_table *mrt, int flags) +static void mroute_clean_tables(struct mr_table *mrt, int flags, + struct list_head *dev_kill_list) { struct net *net = read_pnet(&mrt->net); - struct mr_mfc *c, *tmp; struct mfc_cache *cache; - LIST_HEAD(list); + struct mr_mfc *c, *tmp; int i; /* Shut down all active vif entries */ @@ -1308,9 +1312,8 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags) !(flags & MRT_FLUSH_VIFS_STATIC)) || (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT_FLUSH_VIFS))) continue; - vif_delete(mrt, i, 0, &list); + vif_delete(mrt, i, 0, dev_kill_list); } - unregister_netdevice_many(&list); } /* Wipe the cache */ @@ -1349,9 +1352,11 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags) static void mrtsock_destruct(struct sock *sk) { struct net *net = sock_net(sk); + LIST_HEAD(dev_kill_list); struct mr_table *mrt; rtnl_lock(); + ipmr_for_each_table(mrt, net) { if (sk == rtnl_dereference(mrt->mroute_sk)) { IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; @@ -1360,9 +1365,13 @@ static void mrtsock_destruct(struct sock *sk) NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); RCU_INIT_POINTER(mrt->mroute_sk, NULL); - mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_MFC); + mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_MFC, + &dev_kill_list); } } + + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); } @@ -1485,7 +1494,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, sk == rtnl_dereference(mrt->mroute_sk), parent); break; - case MRT_FLUSH: + case MRT_FLUSH: { + LIST_HEAD(dev_kill_list); + if (optlen != sizeof(val)) { ret = -EINVAL; break; @@ -1494,8 +1505,11 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, ret = -EFAULT; break; } - mroute_clean_tables(mrt, val); + + mroute_clean_tables(mrt, val, &dev_kill_list); + unregister_netdevice_many(&dev_kill_list); break; + } /* Control PIM assert. */ case MRT_ASSERT: if (optlen != sizeof(val)) { From b7fdc3cfb60a4dd80bb71c818fe433d8b3449cf3 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 28 Feb 2026 22:17:26 +0000 Subject: [PATCH 0204/1561] ipmr: Move unregister_netdevice_many() out of ipmr_free_table(). This is a prep commit to convert ipmr_net_exit_batch() to ->exit_rtnl(). Let's move unregister_netdevice_many() in ipmr_free_table() to its callers. Now ipmr_rules_exit() can do batching all tables per netns. Note that later we will remove RTNL and unregister_netdevice_many() in ipmr_rules_init(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260228221800.1082070-9-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/ipmr.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index d15e05662b09..7e2aa2026f01 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -102,7 +102,8 @@ static DEFINE_SPINLOCK(mfc_unres_lock); static struct kmem_cache *mrt_cachep __ro_after_init; static struct mr_table *ipmr_new_table(struct net *net, u32 id); -static void ipmr_free_table(struct mr_table *mrt); +static void ipmr_free_table(struct mr_table *mrt, + struct list_head *dev_kill_list); static void ip_mr_forward(struct net *net, struct mr_table *mrt, struct net_device *dev, struct sk_buff *skb, @@ -251,6 +252,7 @@ static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = { static int __net_init ipmr_rules_init(struct net *net) { struct fib_rules_ops *ops; + LIST_HEAD(dev_kill_list); struct mr_table *mrt; int err; @@ -275,7 +277,8 @@ static int __net_init ipmr_rules_init(struct net *net) err2: rtnl_lock(); - ipmr_free_table(mrt); + ipmr_free_table(mrt, &dev_kill_list); + unregister_netdevice_many(&dev_kill_list); rtnl_unlock(); err1: fib_rules_unregister(ops); @@ -285,12 +288,15 @@ err1: static void __net_exit ipmr_rules_exit(struct net *net) { struct mr_table *mrt, *next; + LIST_HEAD(dev_kill_list); ASSERT_RTNL(); list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { list_del(&mrt->list); - ipmr_free_table(mrt); + ipmr_free_table(mrt, &dev_kill_list); } + + unregister_netdevice_many(&dev_kill_list); fib_rules_unregister(net->ipv4.mr_rules_ops); } @@ -349,8 +355,13 @@ static int __net_init ipmr_rules_init(struct net *net) static void __net_exit ipmr_rules_exit(struct net *net) { + LIST_HEAD(dev_kill_list); + ASSERT_RTNL(); - ipmr_free_table(net->ipv4.mrt); + + ipmr_free_table(net->ipv4.mrt, &dev_kill_list); + unregister_netdevice_many(&dev_kill_list); + net->ipv4.mrt = NULL; } @@ -425,18 +436,16 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) ipmr_expire_process, ipmr_new_table_set); } -static void ipmr_free_table(struct mr_table *mrt) +static void ipmr_free_table(struct mr_table *mrt, struct list_head *dev_kill_list) { struct net *net = read_pnet(&mrt->net); - LIST_HEAD(dev_kill_list); WARN_ON_ONCE(!mr_can_free_table(net)); timer_shutdown_sync(&mrt->ipmr_expire_timer); mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC, - &dev_kill_list); - unregister_netdevice_many(&dev_kill_list); + dev_kill_list); rhltable_destroy(&mrt->mfc_hash); kfree(mrt); } From b22b01867406bcafbf61b61dccdf5b0afbd89fdc Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 28 Feb 2026 22:17:27 +0000 Subject: [PATCH 0205/1561] ipmr: Convert ipmr_net_exit_batch() to ->exit_rtnl(). ipmr_net_ops uses ->exit_batch() to acquire RTNL only once for dying network namespaces. ipmr does not depend on the ordering of ->exit_rtnl() and ->exit_batch() of other pernet_operations (unlike fib_net_ops). Once ipmr_free_table() is called and all devices are queued for destruction in ->exit_rtnl(), later during NETDEV_UNREGISTER, ipmr_device_event() will not see anything in vif table and just do nothing. Let's convert ipmr_net_exit_batch() to ->exit_rtnl(). Note that fib_rules_unregister() does not need RTNL and we will remove RTNL and unregister_netdevice_many() in ipmr_net_init(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260228221800.1082070-10-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/ipmr.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 7e2aa2026f01..72761c8b2930 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -285,18 +285,17 @@ err1: return err; } -static void __net_exit ipmr_rules_exit(struct net *net) +static void __net_exit ipmr_rules_exit_rtnl(struct net *net, + struct list_head *dev_kill_list) { struct mr_table *mrt, *next; - LIST_HEAD(dev_kill_list); ASSERT_RTNL(); list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { list_del(&mrt->list); - ipmr_free_table(mrt, &dev_kill_list); + ipmr_free_table(mrt, dev_kill_list); } - unregister_netdevice_many(&dev_kill_list); fib_rules_unregister(net->ipv4.mr_rules_ops); } @@ -353,14 +352,12 @@ static int __net_init ipmr_rules_init(struct net *net) return 0; } -static void __net_exit ipmr_rules_exit(struct net *net) +static void __net_exit ipmr_rules_exit_rtnl(struct net *net, + struct list_head *dev_kill_list) { - LIST_HEAD(dev_kill_list); - ASSERT_RTNL(); - ipmr_free_table(net->ipv4.mrt, &dev_kill_list); - unregister_netdevice_many(&dev_kill_list); + ipmr_free_table(net->ipv4.mrt, dev_kill_list); net->ipv4.mrt = NULL; } @@ -3264,6 +3261,7 @@ static void __net_exit ipmr_notifier_exit(struct net *net) /* Setup for IP multicast routing */ static int __net_init ipmr_net_init(struct net *net) { + LIST_HEAD(dev_kill_list); int err; err = ipmr_notifier_init(net); @@ -3290,7 +3288,8 @@ proc_cache_fail: remove_proc_entry("ip_mr_vif", net->proc_net); proc_vif_fail: rtnl_lock(); - ipmr_rules_exit(net); + ipmr_rules_exit_rtnl(net, &dev_kill_list); + unregister_netdevice_many(&dev_kill_list); rtnl_unlock(); #endif ipmr_rules_fail: @@ -3308,20 +3307,16 @@ static void __net_exit ipmr_net_exit(struct net *net) ipmr_notifier_exit(net); } -static void __net_exit ipmr_net_exit_batch(struct list_head *net_list) +static void __net_exit ipmr_net_exit_rtnl(struct net *net, + struct list_head *dev_kill_list) { - struct net *net; - - rtnl_lock(); - list_for_each_entry(net, net_list, exit_list) - ipmr_rules_exit(net); - rtnl_unlock(); + ipmr_rules_exit_rtnl(net, dev_kill_list); } static struct pernet_operations ipmr_net_ops = { .init = ipmr_net_init, .exit = ipmr_net_exit, - .exit_batch = ipmr_net_exit_batch, + .exit_rtnl = ipmr_net_exit_rtnl, }; static const struct rtnl_msg_handler ipmr_rtnl_msg_handlers[] __initconst = { From 4a11adcd9eefb841d4595267bbd4df304a98ded6 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 28 Feb 2026 22:17:28 +0000 Subject: [PATCH 0206/1561] ipmr: Remove RTNL in ipmr_rules_init() and ipmr_net_init(). When ipmr_free_table() is called from ipmr_rules_init() or ipmr_net_init(), the netns is not yet published. Thus, no device should have been registered, and mroute_clean_tables() will not call vif_delete(), so unregister_netdevice_many() is unnecessary. unregister_netdevice_many() does nothing if the list is empty, but it requires RTNL due to the unconditional ASSERT_RTNL() at the entry of unregister_netdevice_many_notify(). Let's remove unnecessary RTNL and ASSERT_RTNL() and instead add WARN_ON_ONCE() in ipmr_free_table(). Note that we use a local list for the new WARN_ON_ONCE() because dev_kill_list passed from ipmr_rules_exit_rtnl() may have some devices when other ops->init() fails after ipmr durnig setup_net(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260228221800.1082070-11-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/ipmr.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 72761c8b2930..c22bcaead348 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -276,10 +276,7 @@ static int __net_init ipmr_rules_init(struct net *net) return 0; err2: - rtnl_lock(); ipmr_free_table(mrt, &dev_kill_list); - unregister_netdevice_many(&dev_kill_list); - rtnl_unlock(); err1: fib_rules_unregister(ops); return err; @@ -290,7 +287,6 @@ static void __net_exit ipmr_rules_exit_rtnl(struct net *net, { struct mr_table *mrt, *next; - ASSERT_RTNL(); list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { list_del(&mrt->list); ipmr_free_table(mrt, dev_kill_list); @@ -355,8 +351,6 @@ static int __net_init ipmr_rules_init(struct net *net) static void __net_exit ipmr_rules_exit_rtnl(struct net *net, struct list_head *dev_kill_list) { - ASSERT_RTNL(); - ipmr_free_table(net->ipv4.mrt, dev_kill_list); net->ipv4.mrt = NULL; @@ -436,15 +430,19 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) static void ipmr_free_table(struct mr_table *mrt, struct list_head *dev_kill_list) { struct net *net = read_pnet(&mrt->net); + LIST_HEAD(ipmr_dev_kill_list); WARN_ON_ONCE(!mr_can_free_table(net)); timer_shutdown_sync(&mrt->ipmr_expire_timer); mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC, - dev_kill_list); + &ipmr_dev_kill_list); rhltable_destroy(&mrt->mfc_hash); kfree(mrt); + + WARN_ON_ONCE(!net_initialized(net) && !list_empty(&ipmr_dev_kill_list)); + list_splice(&ipmr_dev_kill_list, dev_kill_list); } /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ @@ -3287,10 +3285,7 @@ static int __net_init ipmr_net_init(struct net *net) proc_cache_fail: remove_proc_entry("ip_mr_vif", net->proc_net); proc_vif_fail: - rtnl_lock(); ipmr_rules_exit_rtnl(net, &dev_kill_list); - unregister_netdevice_many(&dev_kill_list); - rtnl_unlock(); #endif ipmr_rules_fail: ipmr_notifier_exit(net); From 478c2add78b13e36d781d6891d5861e6e1eecef4 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 28 Feb 2026 22:17:29 +0000 Subject: [PATCH 0207/1561] ipmr: Call fib_rules_unregister() without RTNL. fib_rules_unregister() removes ops from net->rules_ops under spinlock, calls ops->delete() for each rule, and frees the ops. ipmr_rules_ops_template does not have ->delete(), and any operation does not require RTNL there. Let's move fib_rules_unregister() from ipmr_rules_exit_rtnl() to ipmr_net_exit(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260228221800.1082070-12-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/ipmr.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c22bcaead348..07f2d4f8dcbe 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -282,6 +282,11 @@ err1: return err; } +static void __net_exit ipmr_rules_exit(struct net *net) +{ + fib_rules_unregister(net->ipv4.mr_rules_ops); +} + static void __net_exit ipmr_rules_exit_rtnl(struct net *net, struct list_head *dev_kill_list) { @@ -291,8 +296,6 @@ static void __net_exit ipmr_rules_exit_rtnl(struct net *net, list_del(&mrt->list); ipmr_free_table(mrt, dev_kill_list); } - - fib_rules_unregister(net->ipv4.mr_rules_ops); } static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, @@ -348,6 +351,10 @@ static int __net_init ipmr_rules_init(struct net *net) return 0; } +static void __net_exit ipmr_rules_exit(struct net *net) +{ +} + static void __net_exit ipmr_rules_exit_rtnl(struct net *net, struct list_head *dev_kill_list) { @@ -3286,6 +3293,7 @@ proc_cache_fail: remove_proc_entry("ip_mr_vif", net->proc_net); proc_vif_fail: ipmr_rules_exit_rtnl(net, &dev_kill_list); + ipmr_rules_exit(net); #endif ipmr_rules_fail: ipmr_notifier_exit(net); @@ -3299,6 +3307,7 @@ static void __net_exit ipmr_net_exit(struct net *net) remove_proc_entry("ip_mr_cache", net->proc_net); remove_proc_entry("ip_mr_vif", net->proc_net); #endif + ipmr_rules_exit(net); ipmr_notifier_exit(net); } From 1c36d186a0c81f3b55b2722736163233b05f8756 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 28 Feb 2026 22:17:30 +0000 Subject: [PATCH 0208/1561] ipmr: Define net->ipv4.{ipmr_notifier_ops,ipmr_seq} under CONFIG_IP_MROUTE. net->ipv4.ipmr_notifier_ops and net->ipv4.ipmr_seq are used only in net/ipv4/ipmr.c. Let's move these definitions under CONFIG_IP_MROUTE. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260228221800.1082070-13-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/netns/ipv4.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 8e971c7bf164..380ff34c0233 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -279,6 +279,8 @@ struct netns_ipv4 { struct list_head mr_tables; struct fib_rules_ops *mr_rules_ops; #endif + struct fib_notifier_ops *ipmr_notifier_ops; + unsigned int ipmr_seq; /* protected by rtnl_mutex */ #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH struct sysctl_fib_multipath_hash_seed sysctl_fib_multipath_hash_seed; @@ -290,9 +292,6 @@ struct netns_ipv4 { struct fib_notifier_ops *notifier_ops; unsigned int fib_seq; /* writes protected by rtnl_mutex */ - struct fib_notifier_ops *ipmr_notifier_ops; - unsigned int ipmr_seq; /* protected by rtnl_mutex */ - atomic_t rt_genid; siphash_key_t ip_id_key; struct hlist_head *inet_addr_lst; From 4480d5fa1f6ebe7dfc546e14371d63c8b915a82d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 28 Feb 2026 22:17:31 +0000 Subject: [PATCH 0209/1561] ipmr/ip6mr: Convert net->ipv[46].ipmr_seq to atomic_t. We will no longer hold RTNL for ipmr_mfc_add() and ipmr_mfc_delete(). MFC entry can be loosely connected with VIF by its index for mrt->vif_table[] (stored in mfc_parent), but the two tables are not synchronised. i.e. Even if VIF 1 is removed, MFC for VIF 1 is not automatically removed. The only field that the MFC/VIF interfaces share is net->ipv[46].ipmr_seq, which is protected by RTNL. Adding a new mutex for both just to protect a single field is overkill. Let's convert the field to atomic_t. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260228221800.1082070-14-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/mroute_base.h | 8 ++++---- include/net/netns/ipv4.h | 2 +- include/net/netns/ipv6.h | 2 +- net/ipv4/ipmr.c | 4 ++-- net/ipv6/ip6mr.c | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 0075f6e5c3da..0baa6f994da9 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -76,7 +76,7 @@ static inline int mr_call_vif_notifiers(struct net *net, struct vif_device *vif, struct net_device *vif_dev, unsigned short vif_index, u32 tb_id, - unsigned int *ipmr_seq) + atomic_t *ipmr_seq) { struct vif_entry_notifier_info info = { .info = { @@ -89,7 +89,7 @@ static inline int mr_call_vif_notifiers(struct net *net, }; ASSERT_RTNL(); - (*ipmr_seq)++; + atomic_inc(ipmr_seq); return call_fib_notifiers(net, event_type, &info.info); } @@ -198,7 +198,7 @@ static inline int mr_call_mfc_notifiers(struct net *net, unsigned short family, enum fib_event_type event_type, struct mr_mfc *mfc, u32 tb_id, - unsigned int *ipmr_seq) + atomic_t *ipmr_seq) { struct mfc_entry_notifier_info info = { .info = { @@ -209,7 +209,7 @@ static inline int mr_call_mfc_notifiers(struct net *net, }; ASSERT_RTNL(); - (*ipmr_seq)++; + atomic_inc(ipmr_seq); return call_fib_notifiers(net, event_type, &info.info); } diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 380ff34c0233..94dca64fec41 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -280,7 +280,7 @@ struct netns_ipv4 { struct fib_rules_ops *mr_rules_ops; #endif struct fib_notifier_ops *ipmr_notifier_ops; - unsigned int ipmr_seq; /* protected by rtnl_mutex */ + atomic_t ipmr_seq; #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH struct sysctl_fib_multipath_hash_seed sysctl_fib_multipath_hash_seed; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 34bdb1308e8f..499e4288170f 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -118,7 +118,7 @@ struct netns_ipv6 { struct seg6_pernet_data *seg6_data; struct fib_notifier_ops *notifier_ops; struct fib_notifier_ops *ip6mr_notifier_ops; - unsigned int ipmr_seq; /* protected by rtnl_mutex */ + atomic_t ipmr_seq; struct { struct hlist_head head; spinlock_t lock; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 07f2d4f8dcbe..6ec73796d84d 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -3226,7 +3226,7 @@ static const struct net_protocol pim_protocol = { static unsigned int ipmr_seq_read(const struct net *net) { - return READ_ONCE(net->ipv4.ipmr_seq) + ipmr_rules_seq_read(net); + return atomic_read(&net->ipv4.ipmr_seq) + ipmr_rules_seq_read(net); } static int ipmr_dump(struct net *net, struct notifier_block *nb, @@ -3247,7 +3247,7 @@ static int __net_init ipmr_notifier_init(struct net *net) { struct fib_notifier_ops *ops; - net->ipv4.ipmr_seq = 0; + atomic_set(&net->ipv4.ipmr_seq, 0); ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net); if (IS_ERR(ops)) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index e047a4680ab0..85010ff21c98 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1280,7 +1280,7 @@ static int ip6mr_device_event(struct notifier_block *this, static unsigned int ip6mr_seq_read(const struct net *net) { - return READ_ONCE(net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net); + return atomic_read(&net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net); } static int ip6mr_dump(struct net *net, struct notifier_block *nb, @@ -1305,7 +1305,7 @@ static int __net_init ip6mr_notifier_init(struct net *net) { struct fib_notifier_ops *ops; - net->ipv6.ipmr_seq = 0; + atomic_set(&net->ipv6.ipmr_seq, 0); ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net); if (IS_ERR(ops)) From 3c1e53e55418d4ca4040e281501643a96e227974 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 28 Feb 2026 22:17:32 +0000 Subject: [PATCH 0210/1561] ipmr: Add dedicated mutex for mrt->{mfc_hash,mfc_cache_list}. We will no longer hold RTNL for ipmr_rtm_route() to modify the MFC hash table. Only __dev_get_by_index() in rtm_to_ipmr_mfcc() is the RTNL dependant, otherwise, we just need protection for mrt->mfc_hash and mrt->mfc_cache_list. Let's add a new mutex for ipmr_mfc_add(), ipmr_mfc_delete(), and mroute_clean_tables() (setsockopt(MRT_FLUSH or MRT_DONE)). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260228221800.1082070-15-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/netns/ipv4.h | 1 + net/ipv4/ipmr.c | 28 ++++++++++++++++++++++------ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 94dca64fec41..4c249aeaf7f1 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -281,6 +281,7 @@ struct netns_ipv4 { #endif struct fib_notifier_ops *ipmr_notifier_ops; atomic_t ipmr_seq; + struct mutex mfc_mutex; #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH struct sysctl_fib_multipath_hash_seed sysctl_fib_multipath_hash_seed; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 6ec73796d84d..d4983d8a9b2a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1329,6 +1329,8 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags, /* Wipe the cache */ if (flags & (MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC)) { + mutex_lock(&net->ipv4.mfc_mutex); + list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC_STATIC)) || (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC))) @@ -1341,6 +1343,8 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags, mroute_netlink_event(mrt, cache, RTM_DELROUTE); mr_cache_put(c); } + + mutex_unlock(&net->ipv4.mfc_mutex); } if (flags & MRT_FLUSH_MFC) { @@ -1498,12 +1502,17 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, } if (parent == 0) parent = mfc.mfcc_parent; + + mutex_lock(&net->ipv4.mfc_mutex); + if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) ret = ipmr_mfc_delete(mrt, &mfc, parent); else ret = ipmr_mfc_add(net, mrt, &mfc, sk == rtnl_dereference(mrt->mroute_sk), parent); + + mutex_unlock(&net->ipv4.mfc_mutex); break; case MRT_FLUSH: { LIST_HEAD(dev_kill_list); @@ -2913,21 +2922,26 @@ static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); - int ret, mrtsock, parent; - struct mr_table *tbl; + int ret, mrtsock = 0, parent; + struct mr_table *tbl = NULL; struct mfcctl mfcc; - mrtsock = 0; - tbl = NULL; ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack); if (ret < 0) return ret; parent = ret ? mfcc.mfcc_parent : -1; + + mutex_lock(&net->ipv4.mfc_mutex); + if (nlh->nlmsg_type == RTM_NEWROUTE) - return ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent); + ret = ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent); else - return ipmr_mfc_delete(tbl, &mfcc, parent); + ret = ipmr_mfc_delete(tbl, &mfcc, parent); + + mutex_unlock(&net->ipv4.mfc_mutex); + + return ret; } static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb) @@ -3269,6 +3283,8 @@ static int __net_init ipmr_net_init(struct net *net) LIST_HEAD(dev_kill_list); int err; + mutex_init(&net->ipv4.mfc_mutex); + err = ipmr_notifier_init(net); if (err) goto ipmr_notifier_fail; From bddafc06ca5ee1be4d10061f7954c6d6be5dc1d8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 28 Feb 2026 22:17:33 +0000 Subject: [PATCH 0211/1561] ipmr: Don't hold RTNL for ipmr_rtm_route(). ipmr_mfc_add() and ipmr_mfc_delete() are already protected by a dedicated mutex. rtm_to_ipmr_mfcc() calls __ipmr_get_table(), __dev_get_by_index(), amd ipmr_find_vif(). Once __dev_get_by_index() is converted to dev_get_by_index_rcu(), we can move the other two functions under that same RCU section and drop RTNL for ipmr_rtm_route(). Let's do that conversion and drop ASSERT_RTNL() in mr_call_mfc_notifiers(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260228221800.1082070-16-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/mroute_base.h | 1 - net/ipv4/ipmr.c | 34 +++++++++++++++++++++------------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 0baa6f994da9..cf3374580f74 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -208,7 +208,6 @@ static inline int mr_call_mfc_notifiers(struct net *net, .tb_id = tb_id }; - ASSERT_RTNL(); atomic_inc(ipmr_seq); return call_fib_notifiers(net, event_type, &info.info); } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index d4983d8a9b2a..8a08d09b4c30 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1211,7 +1211,6 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) struct net *net = read_pnet(&mrt->net); struct mfc_cache *c; - /* The entries are added/deleted only under RTNL */ rcu_read_lock(); c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, mfc->mfcc_mcastgrp.s_addr, parent); @@ -1238,7 +1237,6 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, if (mfc->mfcc_parent >= MAXVIFS) return -ENFILE; - /* The entries are added/deleted only under RTNL */ rcu_read_lock(); c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, mfc->mfcc_mcastgrp.s_addr, parent); @@ -2853,10 +2851,10 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, { struct net_device *dev = NULL; u32 tblid = RT_TABLE_DEFAULT; + int ret, rem, iif = 0; struct mr_table *mrt; struct nlattr *attr; struct rtmsg *rtm; - int ret, rem; ret = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy, extack); @@ -2883,11 +2881,7 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr); break; case RTA_IIF: - dev = __dev_get_by_index(net, nla_get_u32(attr)); - if (!dev) { - ret = -ENODEV; - goto out; - } + iif = nla_get_u32(attr); break; case RTA_MULTIPATH: if (ipmr_nla_get_ttls(attr, mfcc) < 0) { @@ -2903,16 +2897,30 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, break; } } + + rcu_read_lock(); + mrt = __ipmr_get_table(net, tblid); if (!mrt) { ret = -ENOENT; - goto out; + goto unlock; } + + if (iif) { + dev = dev_get_by_index_rcu(net, iif); + if (!dev) { + ret = -ENODEV; + goto unlock; + } + + mfcc->mfcc_parent = ipmr_find_vif(mrt, dev); + } + *mrtret = mrt; *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0; - if (dev) - mfcc->mfcc_parent = ipmr_find_vif(mrt, dev); +unlock: + rcu_read_unlock(); out: return ret; } @@ -3343,9 +3351,9 @@ static const struct rtnl_msg_handler ipmr_rtnl_msg_handlers[] __initconst = { {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETLINK, .dumpit = ipmr_rtm_dumplink, .flags = RTNL_FLAG_DUMP_UNLOCKED}, {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_NEWROUTE, - .doit = ipmr_rtm_route}, + .doit = ipmr_rtm_route, .flags = RTNL_FLAG_DOIT_UNLOCKED}, {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_DELROUTE, - .doit = ipmr_rtm_route}, + .doit = ipmr_rtm_route, .flags = RTNL_FLAG_DOIT_UNLOCKED}, {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETROUTE, .doit = ipmr_rtm_getroute, .dumpit = ipmr_rtm_dumproute, .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, From 425e080a1c34859395efcc377efead05dc6fae3b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sun, 1 Mar 2026 06:37:55 +0000 Subject: [PATCH 0212/1561] dccp Remove inet_hashinfo2_init_mod(). Commit c92c81df93df ("net: dccp: fix kernel crash on module load") added inet_hashinfo2_init_mod() for DCCP. Commit 22d6c9eebf2e ("net: Unexport shared functions for DCCP.") removed EXPORT_SYMBOL_GPL() it but forgot to remove the function itself. Let's remove inet_hashinfo2_init_mod(). Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260301063756.1581685-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/inet_hashtables.h | 1 - net/ipv4/inet_hashtables.c | 34 ++++++++-------------------------- 2 files changed, 8 insertions(+), 27 deletions(-) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ac05a52d9e13..8bddf58b1a85 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -286,7 +286,6 @@ void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, unsigned long numentries, int scale, unsigned long low_limit, unsigned long high_limit); -int inet_hashinfo2_init_mod(struct inet_hashinfo *h); bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk); bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fca980772c81..52847950b28a 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -1246,22 +1246,13 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, __inet_check_established); } -static void init_hashinfo_lhash2(struct inet_hashinfo *h) -{ - int i; - - for (i = 0; i <= h->lhash2_mask; i++) { - spin_lock_init(&h->lhash2[i].lock); - INIT_HLIST_NULLS_HEAD(&h->lhash2[i].nulls_head, - i + LISTENING_NULLS_BASE); - } -} - void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, unsigned long numentries, int scale, unsigned long low_limit, unsigned long high_limit) { + unsigned int i; + h->lhash2 = alloc_large_system_hash(name, sizeof(*h->lhash2), numentries, @@ -1271,7 +1262,12 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, &h->lhash2_mask, low_limit, high_limit); - init_hashinfo_lhash2(h); + + for (i = 0; i <= h->lhash2_mask; i++) { + spin_lock_init(&h->lhash2[i].lock); + INIT_HLIST_NULLS_HEAD(&h->lhash2[i].nulls_head, + i + LISTENING_NULLS_BASE); + } /* this one is used for source ports of outgoing connections */ table_perturb = alloc_large_system_hash("Table-perturb", @@ -1282,20 +1278,6 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, INET_TABLE_PERTURB_SIZE); } -int inet_hashinfo2_init_mod(struct inet_hashinfo *h) -{ - h->lhash2 = kmalloc_objs(*h->lhash2, INET_LHTABLE_SIZE); - if (!h->lhash2) - return -ENOMEM; - - h->lhash2_mask = INET_LHTABLE_SIZE - 1; - /* INET_LHTABLE_SIZE must be a power of 2 */ - BUG_ON(INET_LHTABLE_SIZE & h->lhash2_mask); - - init_hashinfo_lhash2(h); - return 0; -} - int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) { unsigned int locksz = sizeof(spinlock_t); From eb101d2abdcccb514ca4fccd3b278dd8267374f6 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Fri, 20 Feb 2026 12:47:30 +0300 Subject: [PATCH 0213/1561] wifi: rtw88: check for PCI upstream bridge existence pci_upstream_bridge() returns NULL if the device is on a root bus. If 8821CE is installed in the system with such a PCI topology, the probing routine will crash. This has probably been unnoticed as 8821CE is mostly supplied in laptops where there is a PCI-to-PCI bridge located upstream from the device. However the card might be installed on a system with different configuration. Check if the bridge does exist for the specific workaround to be applied. Found by Linux Verification Center (linuxtesting.org) with Svace static analysis tool. Fixes: 24f5e38a13b5 ("rtw88: Disable PCIe ASPM while doing NAPI poll on 8821CE") Cc: stable@vger.kernel.org Signed-off-by: Fedor Pchelkin Acked-by: Ping-Ke Shih Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260220094730.49791-1-pchelkin@ispras.ru --- drivers/net/wireless/realtek/rtw88/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c index 56b16186d3aa..ec0a45bfb670 100644 --- a/drivers/net/wireless/realtek/rtw88/pci.c +++ b/drivers/net/wireless/realtek/rtw88/pci.c @@ -1804,7 +1804,8 @@ int rtw_pci_probe(struct pci_dev *pdev, } /* Disable PCIe ASPM L1 while doing NAPI poll for 8821CE */ - if (rtwdev->chip->id == RTW_CHIP_TYPE_8821C && bridge->vendor == PCI_VENDOR_ID_INTEL) + if (rtwdev->chip->id == RTW_CHIP_TYPE_8821C && + bridge && bridge->vendor == PCI_VENDOR_ID_INTEL) rtwpci->rx_no_aspm = true; rtw_pci_phy_cfg(rtwdev); From c69855ada28656fdd7e197b6e24cd40a04fe14d3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 28 Feb 2026 14:08:45 -0800 Subject: [PATCH 0214/1561] atm: atmdev: add function parameter names and description kernel-doc reports function parameters not described for parameters that are not named. Add parameter names for these functions and then describe the function parameters in kernel-doc format. Fixes these warnings: Warning: include/linux/atmdev.h:316 function parameter '' not described in 'register_atm_ioctl' Warning: include/linux/atmdev.h:321 function parameter '' not described in 'deregister_atm_ioctl' Signed-off-by: Randy Dunlap Link: https://patch.msgid.link/20260228220845.2978547-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- include/linux/atmdev.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 70807c679f1a..82a32526df64 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -309,17 +309,19 @@ struct atm_ioctl { /** * register_atm_ioctl - register handler for ioctl operations + * @ioctl: ioctl handler to register * * Special (non-device) handlers of ioctl's should * register here. If you're a normal device, you should * set .ioctl in your atmdev_ops instead. */ -void register_atm_ioctl(struct atm_ioctl *); +void register_atm_ioctl(struct atm_ioctl *ioctl); /** * deregister_atm_ioctl - remove the ioctl handler + * @ioctl: ioctl handler to deregister */ -void deregister_atm_ioctl(struct atm_ioctl *); +void deregister_atm_ioctl(struct atm_ioctl *ioctl); /* register_atmdevice_notifier - register atm_dev notify events From 039cd522dc70151da13329a5e3ae19b1736f468a Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Mon, 23 Feb 2026 12:55:22 +0800 Subject: [PATCH 0215/1561] wifi: rtlwifi: pci: fix possible use-after-free caused by unfinished irq_prepare_bcn_tasklet The irq_prepare_bcn_tasklet is initialized in rtl_pci_init() and scheduled when RTL_IMR_BCNINT interrupt is triggered by hardware. But it is never killed in rtl_pci_deinit(). When the rtlwifi card probe fails or is being detached, the ieee80211_hw is deallocated. However, irq_prepare_bcn_tasklet may still be running or pending, leading to use-after-free when the freed ieee80211_hw is accessed in _rtl_pci_prepare_bcn_tasklet(). Similar to irq_tasklet, add tasklet_kill() in rtl_pci_deinit() to ensure that irq_prepare_bcn_tasklet is properly terminated before the ieee80211_hw is released. The issue was identified through static analysis. Fixes: 0c8173385e54 ("rtl8192ce: Add new driver") Signed-off-by: Duoming Zhou Acked-by: Ping-Ke Shih Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260223045522.48377-1-duoming@zju.edu.cn --- drivers/net/wireless/realtek/rtlwifi/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index d080469264cf..f0010336e78c 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -1674,6 +1674,7 @@ static void rtl_pci_deinit(struct ieee80211_hw *hw) synchronize_irq(rtlpci->pdev->irq); tasklet_kill(&rtlpriv->works.irq_tasklet); + tasklet_kill(&rtlpriv->works.irq_prepare_bcn_tasklet); cancel_work_sync(&rtlpriv->works.lps_change_work); } From 2b12ffb669553972f5cd017c69a2b81593b09106 Mon Sep 17 00:00:00 2001 From: Dipayaan Roy Date: Fri, 27 Feb 2026 00:15:02 -0800 Subject: [PATCH 0216/1561] net: mana: Trigger VF reset/recovery on health check failure due to HWC timeout The GF stats periodic query is used as mechanism to monitor HWC health check. If this HWC command times out, it is a strong indication that the device/SoC is in a faulty state and requires recovery. Today, when a timeout is detected, the driver marks hwc_timeout_occurred, clears cached stats, and stops rescheduling the periodic work. However, the device itself is left in the same failing state. Extend the timeout handling path to trigger the existing MANA VF recovery service by queueing a GDMA_EQE_HWC_RESET_REQUEST work item. This is expected to initiate the appropriate recovery flow by suspende resume first and if it fails then trigger a bus rescan. This change is intentionally limited to HWC command timeouts and does not trigger recovery for errors reported by the SoC as a normal command response. Signed-off-by: Dipayaan Roy Reviewed-by: Haiyang Zhang Reviewed-by: Simon Horman Link: https://patch.msgid.link/aaFShvKnwR5FY8dH@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net Signed-off-by: Paolo Abeni --- .../net/ethernet/microsoft/mana/gdma_main.c | 65 ++++++++++--------- drivers/net/ethernet/microsoft/mana/mana_en.c | 9 ++- include/net/mana/gdma.h | 16 ++++- 3 files changed, 55 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 37d2f108a839..aef8612b73cb 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -490,15 +490,9 @@ static void mana_serv_reset(struct pci_dev *pdev) dev_info(&pdev->dev, "MANA reset cycle completed\n"); out: - gc->in_service = false; + clear_bit(GC_IN_SERVICE, &gc->flags); } -struct mana_serv_work { - struct work_struct serv_work; - struct pci_dev *pdev; - enum gdma_eqe_type type; -}; - static void mana_do_service(enum gdma_eqe_type type, struct pci_dev *pdev) { switch (type) { @@ -558,12 +552,42 @@ static void mana_serv_func(struct work_struct *w) module_put(THIS_MODULE); } +int mana_schedule_serv_work(struct gdma_context *gc, enum gdma_eqe_type type) +{ + struct mana_serv_work *mns_wk; + + if (test_and_set_bit(GC_IN_SERVICE, &gc->flags)) { + dev_info(gc->dev, "Already in service\n"); + return -EBUSY; + } + + if (!try_module_get(THIS_MODULE)) { + dev_info(gc->dev, "Module is unloading\n"); + clear_bit(GC_IN_SERVICE, &gc->flags); + return -ENODEV; + } + + mns_wk = kzalloc(sizeof(*mns_wk), GFP_ATOMIC); + if (!mns_wk) { + module_put(THIS_MODULE); + clear_bit(GC_IN_SERVICE, &gc->flags); + return -ENOMEM; + } + + dev_info(gc->dev, "Start MANA service type:%d\n", type); + mns_wk->pdev = to_pci_dev(gc->dev); + mns_wk->type = type; + pci_dev_get(mns_wk->pdev); + INIT_WORK(&mns_wk->serv_work, mana_serv_func); + schedule_work(&mns_wk->serv_work); + return 0; +} + static void mana_gd_process_eqe(struct gdma_queue *eq) { u32 head = eq->head % (eq->queue_size / GDMA_EQE_SIZE); struct gdma_context *gc = eq->gdma_dev->gdma_context; struct gdma_eqe *eq_eqe_ptr = eq->queue_mem_ptr; - struct mana_serv_work *mns_wk; union gdma_eqe_info eqe_info; enum gdma_eqe_type type; struct gdma_event event; @@ -623,30 +647,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq) "Service is to be processed in probe\n"); break; } - - if (gc->in_service) { - dev_info(gc->dev, "Already in service\n"); - break; - } - - if (!try_module_get(THIS_MODULE)) { - dev_info(gc->dev, "Module is unloading\n"); - break; - } - - mns_wk = kzalloc_obj(*mns_wk, GFP_ATOMIC); - if (!mns_wk) { - module_put(THIS_MODULE); - break; - } - - dev_info(gc->dev, "Start MANA service type:%d\n", type); - gc->in_service = true; - mns_wk->pdev = to_pci_dev(gc->dev); - mns_wk->type = type; - pci_dev_get(mns_wk->pdev); - INIT_WORK(&mns_wk->serv_work, mana_serv_func); - schedule_work(&mns_wk->serv_work); + mana_schedule_serv_work(gc, type); break; default: diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 933e9d681ded..56ee993e3a43 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -875,7 +875,7 @@ static void mana_tx_timeout(struct net_device *netdev, unsigned int txqueue) struct gdma_context *gc = ac->gdma_dev->gdma_context; /* Already in service, hence tx queue reset is not required.*/ - if (gc->in_service) + if (test_bit(GC_IN_SERVICE, &gc->flags)) return; /* Note: If there are pending queue reset work for this port(apc), @@ -3525,6 +3525,7 @@ static void mana_gf_stats_work_handler(struct work_struct *work) { struct mana_context *ac = container_of(to_delayed_work(work), struct mana_context, gf_stats_work); + struct gdma_context *gc = ac->gdma_dev->gdma_context; int err; err = mana_query_gf_stats(ac); @@ -3532,6 +3533,12 @@ static void mana_gf_stats_work_handler(struct work_struct *work) /* HWC timeout detected - reset stats and stop rescheduling */ ac->hwc_timeout_occurred = true; memset(&ac->hc_stats, 0, sizeof(ac->hc_stats)); + dev_warn(gc->dev, + "Gf stats wk handler: gf stats query timed out.\n"); + /* As HWC timed out, indicating a faulty HW state and needs a + * reset. + */ + mana_schedule_serv_work(gc, GDMA_EQE_HWC_RESET_REQUEST); return; } schedule_delayed_work(&ac->gf_stats_work, MANA_GF_STATS_PERIOD); diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 766f4fb25e26..ec17004b10c0 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -215,6 +215,12 @@ enum gdma_page_type { #define GDMA_INVALID_DMA_REGION 0 +struct mana_serv_work { + struct work_struct serv_work; + struct pci_dev *pdev; + enum gdma_eqe_type type; +}; + struct gdma_mem_info { struct device *dev; @@ -386,6 +392,7 @@ struct gdma_irq_context { enum gdma_context_flags { GC_PROBE_SUCCEEDED = 0, + GC_IN_SERVICE = 1, }; struct gdma_context { @@ -411,7 +418,6 @@ struct gdma_context { u32 test_event_eq_id; bool is_pf; - bool in_service; phys_addr_t bar0_pa; void __iomem *bar0_va; @@ -473,6 +479,8 @@ int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe); void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit); +int mana_schedule_serv_work(struct gdma_context *gc, enum gdma_eqe_type type); + struct gdma_wqe { u32 reserved :24; u32 last_vbytes :8; @@ -615,6 +623,9 @@ enum { /* Driver can handle hardware recovery events during probe */ #define GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY BIT(22) +/* Driver supports self recovery on Hardware Channel timeouts */ +#define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECOVERY BIT(25) + #define GDMA_DRV_CAP_FLAGS1 \ (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \ @@ -628,7 +639,8 @@ enum { GDMA_DRV_CAP_FLAG_1_PERIODIC_STATS_QUERY | \ GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE | \ GDMA_DRV_CAP_FLAG_1_PROBE_RECOVERY | \ - GDMA_DRV_CAP_FLAG_1_HANDLE_STALL_SQ_RECOVERY) + GDMA_DRV_CAP_FLAG_1_HANDLE_STALL_SQ_RECOVERY | \ + GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECOVERY) #define GDMA_DRV_CAP_FLAGS2 0 From 5d048bbed1bb2bbef612dad0bb9c177c434e63a4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Jan 2026 14:24:12 +0100 Subject: [PATCH 0217/1561] wifi: mac80211: give the AP more time for EPPKE as well EPPKE authentication can use SAE (via PASN), so give the AP more time to respond to EPPKE case just like for SAE. Link: https://patch.msgid.link/20260128132414.881741-2-johannes@sipsolutions.net Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 7957eacc5ab7..170330d924a3 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -8498,7 +8498,8 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata) return -ETIMEDOUT; } - if (auth_data->algorithm == WLAN_AUTH_SAE) + if (auth_data->algorithm == WLAN_AUTH_SAE || + auth_data->algorithm == WLAN_AUTH_EPPKE) info.duration = jiffies_to_msecs(IEEE80211_AUTH_TIMEOUT_SAE); info.link_id = auth_data->link_id; From 4845f2fff730f0cdf8f7fe6401c8b871891cf1cb Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 27 Feb 2026 11:52:59 +0100 Subject: [PATCH 0218/1561] dpll: zl3073x: detect DPLL channel count from chip ID at runtime Replace the five per-variant zl3073x_chip_info structures and their exported symbol definitions with a single consolidated chip ID lookup table. The chip variant is now detected at runtime by reading the chip ID register from hardware and looking it up in the table, rather than being selected at compile time via the bus driver match data. Repurpose struct zl3073x_chip_info to hold a single chip ID, its channel count, and a flags field. Introduce enum zl3073x_flags with ZL3073X_FLAG_REF_PHASE_COMP_32 to replace the chip_id switch statement in zl3073x_dev_is_ref_phase_comp_32bit(). Store a pointer to the detected chip_info entry in struct zl3073x_dev for runtime access. This simplifies the bus drivers by removing per-variant .data and .driver_data references from the I2C/SPI match tables, and makes adding support for new chip variants a single-line table addition. Signed-off-by: Ivan Vecera Link: https://patch.msgid.link/20260227105300.710272-2-ivecera@redhat.com Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- drivers/dpll/zl3073x/core.c | 116 ++++++++++-------------------------- drivers/dpll/zl3073x/core.h | 57 +++++++++--------- drivers/dpll/zl3073x/i2c.c | 37 ++++-------- drivers/dpll/zl3073x/spi.c | 37 ++++-------- 4 files changed, 81 insertions(+), 166 deletions(-) diff --git a/drivers/dpll/zl3073x/core.c b/drivers/dpll/zl3073x/core.c index 37f3c33570ee..c8af34301045 100644 --- a/drivers/dpll/zl3073x/core.c +++ b/drivers/dpll/zl3073x/core.c @@ -20,78 +20,29 @@ #include "dpll.h" #include "regs.h" -/* Chip IDs for zl30731 */ -static const u16 zl30731_ids[] = { - 0x0E93, - 0x1E93, - 0x2E93, -}; +#define ZL_CHIP_INFO(_id, _nchannels, _flags) \ + { .id = (_id), .num_channels = (_nchannels), .flags = (_flags) } -const struct zl3073x_chip_info zl30731_chip_info = { - .ids = zl30731_ids, - .num_ids = ARRAY_SIZE(zl30731_ids), - .num_channels = 1, +static const struct zl3073x_chip_info zl3073x_chip_ids[] = { + ZL_CHIP_INFO(0x0E30, 2, ZL3073X_FLAG_REF_PHASE_COMP_32), + ZL_CHIP_INFO(0x0E93, 1, ZL3073X_FLAG_REF_PHASE_COMP_32), + ZL_CHIP_INFO(0x0E94, 2, ZL3073X_FLAG_REF_PHASE_COMP_32), + ZL_CHIP_INFO(0x0E95, 3, ZL3073X_FLAG_REF_PHASE_COMP_32), + ZL_CHIP_INFO(0x0E96, 4, ZL3073X_FLAG_REF_PHASE_COMP_32), + ZL_CHIP_INFO(0x0E97, 5, ZL3073X_FLAG_REF_PHASE_COMP_32), + ZL_CHIP_INFO(0x1E93, 1, 0), + ZL_CHIP_INFO(0x1E94, 2, 0), + ZL_CHIP_INFO(0x1E95, 3, 0), + ZL_CHIP_INFO(0x1E96, 4, 0), + ZL_CHIP_INFO(0x1E97, 5, 0), + ZL_CHIP_INFO(0x1F60, 2, ZL3073X_FLAG_REF_PHASE_COMP_32), + ZL_CHIP_INFO(0x2E93, 1, 0), + ZL_CHIP_INFO(0x2E94, 2, 0), + ZL_CHIP_INFO(0x2E95, 3, 0), + ZL_CHIP_INFO(0x2E96, 4, 0), + ZL_CHIP_INFO(0x2E97, 5, 0), + ZL_CHIP_INFO(0x3FC4, 2, 0), }; -EXPORT_SYMBOL_NS_GPL(zl30731_chip_info, "ZL3073X"); - -/* Chip IDs for zl30732 */ -static const u16 zl30732_ids[] = { - 0x0E30, - 0x0E94, - 0x1E94, - 0x1F60, - 0x2E94, - 0x3FC4, -}; - -const struct zl3073x_chip_info zl30732_chip_info = { - .ids = zl30732_ids, - .num_ids = ARRAY_SIZE(zl30732_ids), - .num_channels = 2, -}; -EXPORT_SYMBOL_NS_GPL(zl30732_chip_info, "ZL3073X"); - -/* Chip IDs for zl30733 */ -static const u16 zl30733_ids[] = { - 0x0E95, - 0x1E95, - 0x2E95, -}; - -const struct zl3073x_chip_info zl30733_chip_info = { - .ids = zl30733_ids, - .num_ids = ARRAY_SIZE(zl30733_ids), - .num_channels = 3, -}; -EXPORT_SYMBOL_NS_GPL(zl30733_chip_info, "ZL3073X"); - -/* Chip IDs for zl30734 */ -static const u16 zl30734_ids[] = { - 0x0E96, - 0x1E96, - 0x2E96, -}; - -const struct zl3073x_chip_info zl30734_chip_info = { - .ids = zl30734_ids, - .num_ids = ARRAY_SIZE(zl30734_ids), - .num_channels = 4, -}; -EXPORT_SYMBOL_NS_GPL(zl30734_chip_info, "ZL3073X"); - -/* Chip IDs for zl30735 */ -static const u16 zl30735_ids[] = { - 0x0E97, - 0x1E97, - 0x2E97, -}; - -const struct zl3073x_chip_info zl30735_chip_info = { - .ids = zl30735_ids, - .num_ids = ARRAY_SIZE(zl30735_ids), - .num_channels = 5, -}; -EXPORT_SYMBOL_NS_GPL(zl30735_chip_info, "ZL3073X"); #define ZL_RANGE_OFFSET 0x80 #define ZL_PAGE_SIZE 0x80 @@ -942,7 +893,7 @@ static void zl3073x_dev_dpll_fini(void *ptr) } static int -zl3073x_devm_dpll_init(struct zl3073x_dev *zldev, u8 num_dplls) +zl3073x_devm_dpll_init(struct zl3073x_dev *zldev) { struct kthread_worker *kworker; struct zl3073x_dpll *zldpll; @@ -952,7 +903,7 @@ zl3073x_devm_dpll_init(struct zl3073x_dev *zldev, u8 num_dplls) INIT_LIST_HEAD(&zldev->dplls); /* Allocate all DPLLs */ - for (i = 0; i < num_dplls; i++) { + for (i = 0; i < zldev->info->num_channels; i++) { zldpll = zl3073x_dpll_alloc(zldev, i); if (IS_ERR(zldpll)) { dev_err_probe(zldev->dev, PTR_ERR(zldpll), @@ -992,14 +943,12 @@ error: /** * zl3073x_dev_probe - initialize zl3073x device * @zldev: pointer to zl3073x device - * @chip_info: chip info based on compatible * * Common initialization of zl3073x device structure. * * Returns: 0 on success, <0 on error */ -int zl3073x_dev_probe(struct zl3073x_dev *zldev, - const struct zl3073x_chip_info *chip_info) +int zl3073x_dev_probe(struct zl3073x_dev *zldev) { u16 id, revision, fw_ver; unsigned int i; @@ -1011,18 +960,17 @@ int zl3073x_dev_probe(struct zl3073x_dev *zldev, if (rc) return rc; - /* Check it matches */ - for (i = 0; i < chip_info->num_ids; i++) { - if (id == chip_info->ids[i]) + /* Detect chip variant */ + for (i = 0; i < ARRAY_SIZE(zl3073x_chip_ids); i++) { + if (zl3073x_chip_ids[i].id == id) break; } - if (i == chip_info->num_ids) { + if (i == ARRAY_SIZE(zl3073x_chip_ids)) return dev_err_probe(zldev->dev, -ENODEV, - "Unknown or non-match chip ID: 0x%0x\n", - id); - } - zldev->chip_id = id; + "Unknown chip ID: 0x%04x\n", id); + + zldev->info = &zl3073x_chip_ids[i]; /* Read revision, firmware version and custom config version */ rc = zl3073x_read_u16(zldev, ZL_REG_REVISION, &revision); @@ -1061,7 +1009,7 @@ int zl3073x_dev_probe(struct zl3073x_dev *zldev, "Failed to initialize mutex\n"); /* Register DPLL channels */ - rc = zl3073x_devm_dpll_init(zldev, chip_info->num_channels); + rc = zl3073x_devm_dpll_init(zldev); if (rc) return rc; diff --git a/drivers/dpll/zl3073x/core.h b/drivers/dpll/zl3073x/core.h index fd2af3c62a7d..fde5c8371fbd 100644 --- a/drivers/dpll/zl3073x/core.h +++ b/drivers/dpll/zl3073x/core.h @@ -30,12 +30,32 @@ struct zl3073x_dpll; #define ZL3073X_NUM_PINS (ZL3073X_NUM_INPUT_PINS + \ ZL3073X_NUM_OUTPUT_PINS) +enum zl3073x_flags { + ZL3073X_FLAG_REF_PHASE_COMP_32_BIT, + ZL3073X_FLAGS_NBITS /* must be last */ +}; + +#define __ZL3073X_FLAG(name) BIT(ZL3073X_FLAG_ ## name ## _BIT) +#define ZL3073X_FLAG_REF_PHASE_COMP_32 __ZL3073X_FLAG(REF_PHASE_COMP_32) + +/** + * struct zl3073x_chip_info - chip variant identification + * @id: chip ID + * @num_channels: number of DPLL channels supported by this variant + * @flags: chip variant flags + */ +struct zl3073x_chip_info { + u16 id; + u8 num_channels; + unsigned long flags; +}; + /** * struct zl3073x_dev - zl3073x device * @dev: pointer to device * @regmap: regmap to access device registers + * @info: detected chip info * @multiop_lock: to serialize multiple register operations - * @chip_id: chip ID read from hardware * @ref: array of input references' invariants * @out: array of outs' invariants * @synth: array of synths' invariants @@ -46,10 +66,10 @@ struct zl3073x_dpll; * @phase_avg_factor: phase offset measurement averaging factor */ struct zl3073x_dev { - struct device *dev; - struct regmap *regmap; - struct mutex multiop_lock; - u16 chip_id; + struct device *dev; + struct regmap *regmap; + const struct zl3073x_chip_info *info; + struct mutex multiop_lock; /* Invariants */ struct zl3073x_ref ref[ZL3073X_NUM_REFS]; @@ -68,22 +88,10 @@ struct zl3073x_dev { u8 phase_avg_factor; }; -struct zl3073x_chip_info { - const u16 *ids; - size_t num_ids; - int num_channels; -}; - -extern const struct zl3073x_chip_info zl30731_chip_info; -extern const struct zl3073x_chip_info zl30732_chip_info; -extern const struct zl3073x_chip_info zl30733_chip_info; -extern const struct zl3073x_chip_info zl30734_chip_info; -extern const struct zl3073x_chip_info zl30735_chip_info; extern const struct regmap_config zl3073x_regmap_config; struct zl3073x_dev *zl3073x_devm_alloc(struct device *dev); -int zl3073x_dev_probe(struct zl3073x_dev *zldev, - const struct zl3073x_chip_info *chip_info); +int zl3073x_dev_probe(struct zl3073x_dev *zldev); int zl3073x_dev_start(struct zl3073x_dev *zldev, bool full); void zl3073x_dev_stop(struct zl3073x_dev *zldev); @@ -158,18 +166,7 @@ int zl3073x_ref_phase_offsets_update(struct zl3073x_dev *zldev, int channel); static inline bool zl3073x_dev_is_ref_phase_comp_32bit(struct zl3073x_dev *zldev) { - switch (zldev->chip_id) { - case 0x0E30: - case 0x0E93: - case 0x0E94: - case 0x0E95: - case 0x0E96: - case 0x0E97: - case 0x1F60: - return true; - default: - return false; - } + return zldev->info->flags & ZL3073X_FLAG_REF_PHASE_COMP_32; } static inline bool diff --git a/drivers/dpll/zl3073x/i2c.c b/drivers/dpll/zl3073x/i2c.c index 7bbfdd4ed867..979df85826ab 100644 --- a/drivers/dpll/zl3073x/i2c.c +++ b/drivers/dpll/zl3073x/i2c.c @@ -22,40 +22,25 @@ static int zl3073x_i2c_probe(struct i2c_client *client) return dev_err_probe(dev, PTR_ERR(zldev->regmap), "Failed to initialize regmap\n"); - return zl3073x_dev_probe(zldev, i2c_get_match_data(client)); + return zl3073x_dev_probe(zldev); } static const struct i2c_device_id zl3073x_i2c_id[] = { - { - .name = "zl30731", - .driver_data = (kernel_ulong_t)&zl30731_chip_info, - }, - { - .name = "zl30732", - .driver_data = (kernel_ulong_t)&zl30732_chip_info, - }, - { - .name = "zl30733", - .driver_data = (kernel_ulong_t)&zl30733_chip_info, - }, - { - .name = "zl30734", - .driver_data = (kernel_ulong_t)&zl30734_chip_info, - }, - { - .name = "zl30735", - .driver_data = (kernel_ulong_t)&zl30735_chip_info, - }, + { "zl30731" }, + { "zl30732" }, + { "zl30733" }, + { "zl30734" }, + { "zl30735" }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(i2c, zl3073x_i2c_id); static const struct of_device_id zl3073x_i2c_of_match[] = { - { .compatible = "microchip,zl30731", .data = &zl30731_chip_info }, - { .compatible = "microchip,zl30732", .data = &zl30732_chip_info }, - { .compatible = "microchip,zl30733", .data = &zl30733_chip_info }, - { .compatible = "microchip,zl30734", .data = &zl30734_chip_info }, - { .compatible = "microchip,zl30735", .data = &zl30735_chip_info }, + { .compatible = "microchip,zl30731" }, + { .compatible = "microchip,zl30732" }, + { .compatible = "microchip,zl30733" }, + { .compatible = "microchip,zl30734" }, + { .compatible = "microchip,zl30735" }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, zl3073x_i2c_of_match); diff --git a/drivers/dpll/zl3073x/spi.c b/drivers/dpll/zl3073x/spi.c index af901b4d6dda..f024f42b78d0 100644 --- a/drivers/dpll/zl3073x/spi.c +++ b/drivers/dpll/zl3073x/spi.c @@ -22,40 +22,25 @@ static int zl3073x_spi_probe(struct spi_device *spi) return dev_err_probe(dev, PTR_ERR(zldev->regmap), "Failed to initialize regmap\n"); - return zl3073x_dev_probe(zldev, spi_get_device_match_data(spi)); + return zl3073x_dev_probe(zldev); } static const struct spi_device_id zl3073x_spi_id[] = { - { - .name = "zl30731", - .driver_data = (kernel_ulong_t)&zl30731_chip_info - }, - { - .name = "zl30732", - .driver_data = (kernel_ulong_t)&zl30732_chip_info, - }, - { - .name = "zl30733", - .driver_data = (kernel_ulong_t)&zl30733_chip_info, - }, - { - .name = "zl30734", - .driver_data = (kernel_ulong_t)&zl30734_chip_info, - }, - { - .name = "zl30735", - .driver_data = (kernel_ulong_t)&zl30735_chip_info, - }, + { "zl30731" }, + { "zl30732" }, + { "zl30733" }, + { "zl30734" }, + { "zl30735" }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(spi, zl3073x_spi_id); static const struct of_device_id zl3073x_spi_of_match[] = { - { .compatible = "microchip,zl30731", .data = &zl30731_chip_info }, - { .compatible = "microchip,zl30732", .data = &zl30732_chip_info }, - { .compatible = "microchip,zl30733", .data = &zl30733_chip_info }, - { .compatible = "microchip,zl30734", .data = &zl30734_chip_info }, - { .compatible = "microchip,zl30735", .data = &zl30735_chip_info }, + { .compatible = "microchip,zl30731" }, + { .compatible = "microchip,zl30732" }, + { .compatible = "microchip,zl30733" }, + { .compatible = "microchip,zl30734" }, + { .compatible = "microchip,zl30735" }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, zl3073x_spi_of_match); From 3a97e02b3e91e4d40095ad9bb6e466d8d7c1a1bc Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 27 Feb 2026 11:53:00 +0100 Subject: [PATCH 0219/1561] dpll: zl3073x: add die temperature reporting for supported chips Some zl3073x chip variants (0x1Exx, 0x2Exx and 0x3FC4) provide a die temperature status register with 0.1 C resolution. Add a ZL3073X_FLAG_DIE_TEMP chip flag to identify these variants and implement zl3073x_dpll_temp_get() as the dpll_device_ops.temp_get callback. The register value is converted from 0.1 C units to millidegrees as expected by the DPLL subsystem. To support per-instance ops selection, copy the base dpll_device_ops into struct zl3073x_dpll and conditionally set .temp_get during device registration based on the chip flag. Signed-off-by: Ivan Vecera Link: https://patch.msgid.link/20260227105300.710272-3-ivecera@redhat.com Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- drivers/dpll/zl3073x/core.c | 22 +++++++++++----------- drivers/dpll/zl3073x/core.h | 2 ++ drivers/dpll/zl3073x/dpll.c | 28 +++++++++++++++++++++++++--- drivers/dpll/zl3073x/dpll.h | 2 ++ drivers/dpll/zl3073x/regs.h | 2 ++ 5 files changed, 42 insertions(+), 14 deletions(-) diff --git a/drivers/dpll/zl3073x/core.c b/drivers/dpll/zl3073x/core.c index c8af34301045..10e036ccf08f 100644 --- a/drivers/dpll/zl3073x/core.c +++ b/drivers/dpll/zl3073x/core.c @@ -30,18 +30,18 @@ static const struct zl3073x_chip_info zl3073x_chip_ids[] = { ZL_CHIP_INFO(0x0E95, 3, ZL3073X_FLAG_REF_PHASE_COMP_32), ZL_CHIP_INFO(0x0E96, 4, ZL3073X_FLAG_REF_PHASE_COMP_32), ZL_CHIP_INFO(0x0E97, 5, ZL3073X_FLAG_REF_PHASE_COMP_32), - ZL_CHIP_INFO(0x1E93, 1, 0), - ZL_CHIP_INFO(0x1E94, 2, 0), - ZL_CHIP_INFO(0x1E95, 3, 0), - ZL_CHIP_INFO(0x1E96, 4, 0), - ZL_CHIP_INFO(0x1E97, 5, 0), + ZL_CHIP_INFO(0x1E93, 1, ZL3073X_FLAG_DIE_TEMP), + ZL_CHIP_INFO(0x1E94, 2, ZL3073X_FLAG_DIE_TEMP), + ZL_CHIP_INFO(0x1E95, 3, ZL3073X_FLAG_DIE_TEMP), + ZL_CHIP_INFO(0x1E96, 4, ZL3073X_FLAG_DIE_TEMP), + ZL_CHIP_INFO(0x1E97, 5, ZL3073X_FLAG_DIE_TEMP), ZL_CHIP_INFO(0x1F60, 2, ZL3073X_FLAG_REF_PHASE_COMP_32), - ZL_CHIP_INFO(0x2E93, 1, 0), - ZL_CHIP_INFO(0x2E94, 2, 0), - ZL_CHIP_INFO(0x2E95, 3, 0), - ZL_CHIP_INFO(0x2E96, 4, 0), - ZL_CHIP_INFO(0x2E97, 5, 0), - ZL_CHIP_INFO(0x3FC4, 2, 0), + ZL_CHIP_INFO(0x2E93, 1, ZL3073X_FLAG_DIE_TEMP), + ZL_CHIP_INFO(0x2E94, 2, ZL3073X_FLAG_DIE_TEMP), + ZL_CHIP_INFO(0x2E95, 3, ZL3073X_FLAG_DIE_TEMP), + ZL_CHIP_INFO(0x2E96, 4, ZL3073X_FLAG_DIE_TEMP), + ZL_CHIP_INFO(0x2E97, 5, ZL3073X_FLAG_DIE_TEMP), + ZL_CHIP_INFO(0x3FC4, 2, ZL3073X_FLAG_DIE_TEMP), }; #define ZL_RANGE_OFFSET 0x80 diff --git a/drivers/dpll/zl3073x/core.h b/drivers/dpll/zl3073x/core.h index fde5c8371fbd..b6f22ee1c0bd 100644 --- a/drivers/dpll/zl3073x/core.h +++ b/drivers/dpll/zl3073x/core.h @@ -32,11 +32,13 @@ struct zl3073x_dpll; enum zl3073x_flags { ZL3073X_FLAG_REF_PHASE_COMP_32_BIT, + ZL3073X_FLAG_DIE_TEMP_BIT, ZL3073X_FLAGS_NBITS /* must be last */ }; #define __ZL3073X_FLAG(name) BIT(ZL3073X_FLAG_ ## name ## _BIT) #define ZL3073X_FLAG_REF_PHASE_COMP_32 __ZL3073X_FLAG(REF_PHASE_COMP_32) +#define ZL3073X_FLAG_DIE_TEMP __ZL3073X_FLAG(DIE_TEMP) /** * struct zl3073x_chip_info - chip variant identification diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c index aaa14ea5e670..c201c974a7f9 100644 --- a/drivers/dpll/zl3073x/dpll.c +++ b/drivers/dpll/zl3073x/dpll.c @@ -1065,6 +1065,25 @@ zl3073x_dpll_output_pin_state_on_dpll_get(const struct dpll_pin *dpll_pin, return 0; } +static int +zl3073x_dpll_temp_get(const struct dpll_device *dpll, void *dpll_priv, + s32 *temp, struct netlink_ext_ack *extack) +{ + struct zl3073x_dpll *zldpll = dpll_priv; + struct zl3073x_dev *zldev = zldpll->dev; + u16 val; + int rc; + + rc = zl3073x_read_u16(zldev, ZL_REG_DIE_TEMP_STATUS, &val); + if (rc) + return rc; + + /* Register value is in units of 0.1 C, convert to millidegrees */ + *temp = (s16)val * 100; + + return 0; +} + static int zl3073x_dpll_lock_status_get(const struct dpll_device *dpll, void *dpll_priv, enum dpll_lock_status *status, @@ -1671,6 +1690,10 @@ zl3073x_dpll_device_register(struct zl3073x_dpll *zldpll) zldpll->forced_ref = FIELD_GET(ZL_DPLL_MODE_REFSEL_REF, dpll_mode_refsel); + zldpll->ops = zl3073x_dpll_device_ops; + if (zldev->info->flags & ZL3073X_FLAG_DIE_TEMP) + zldpll->ops.temp_get = zl3073x_dpll_temp_get; + zldpll->dpll_dev = dpll_device_get(zldev->clock_id, zldpll->id, THIS_MODULE, &zldpll->tracker); if (IS_ERR(zldpll->dpll_dev)) { @@ -1682,7 +1705,7 @@ zl3073x_dpll_device_register(struct zl3073x_dpll *zldpll) rc = dpll_device_register(zldpll->dpll_dev, zl3073x_prop_dpll_type_get(zldev, zldpll->id), - &zl3073x_dpll_device_ops, zldpll); + &zldpll->ops, zldpll); if (rc) { dpll_device_put(zldpll->dpll_dev, &zldpll->tracker); zldpll->dpll_dev = NULL; @@ -1705,8 +1728,7 @@ zl3073x_dpll_device_unregister(struct zl3073x_dpll *zldpll) cancel_work_sync(&zldpll->change_work); - dpll_device_unregister(zldpll->dpll_dev, &zl3073x_dpll_device_ops, - zldpll); + dpll_device_unregister(zldpll->dpll_dev, &zldpll->ops, zldpll); dpll_device_put(zldpll->dpll_dev, &zldpll->tracker); zldpll->dpll_dev = NULL; } diff --git a/drivers/dpll/zl3073x/dpll.h b/drivers/dpll/zl3073x/dpll.h index c65c798c3792..278a24f357c9 100644 --- a/drivers/dpll/zl3073x/dpll.h +++ b/drivers/dpll/zl3073x/dpll.h @@ -17,6 +17,7 @@ * @forced_ref: selected reference in forced reference lock mode * @check_count: periodic check counter * @phase_monitor: is phase offset monitor enabled + * @ops: DPLL device operations for this instance * @dpll_dev: pointer to registered DPLL device * @tracker: tracking object for the acquired reference * @lock_status: last saved DPLL lock status @@ -31,6 +32,7 @@ struct zl3073x_dpll { u8 forced_ref; u8 check_count; bool phase_monitor; + struct dpll_device_ops ops; struct dpll_device *dpll_dev; dpll_tracker tracker; enum dpll_lock_status lock_status; diff --git a/drivers/dpll/zl3073x/regs.h b/drivers/dpll/zl3073x/regs.h index 5573d7188406..19c598daa784 100644 --- a/drivers/dpll/zl3073x/regs.h +++ b/drivers/dpll/zl3073x/regs.h @@ -78,6 +78,8 @@ #define ZL_REG_RESET_STATUS ZL_REG(0, 0x18, 1) #define ZL_REG_RESET_STATUS_RESET BIT(0) +#define ZL_REG_DIE_TEMP_STATUS ZL_REG(0, 0x44, 2) + /************************* * Register Page 2, Status *************************/ From f56438a74d8809fa0c28811cd782a74477994fdd Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Fri, 27 Feb 2026 23:12:54 +0530 Subject: [PATCH 0220/1561] net: ti: icssg: Add HSR/PRP protocol frame filtering Add support for HSR and PRP protocol frame filtering in the ICSSG classifier by configuring filter table 3 (FT3) to detect PTP frames (EtherType 0x88F7) in HSR/PRP tagged packets. Also add rx_class_or_base to miig_rt_offsets structure to support RX_CLASS_OR register access, and fix typos in FT1_N_REG and FT3_N_REG macros (slize -> slice). Signed-off-by: MD Danish Anwar Link: https://patch.msgid.link/20260227174254.3821443-1-danishanwar@ti.com Signed-off-by: Paolo Abeni --- .../net/ethernet/ti/icssg/icssg_classifier.c | 69 ++++++++++++++++++- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 6 ++ drivers/net/ethernet/ti/icssg/icssg_prueth.h | 2 + 3 files changed, 75 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_classifier.c b/drivers/net/ethernet/ti/icssg/icssg_classifier.c index 833ca86d0b71..cde53d9aaa7b 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_classifier.c +++ b/drivers/net/ethernet/ti/icssg/icssg_classifier.c @@ -27,7 +27,7 @@ #define FT1_DA0_MASK 0x8 #define FT1_DA1_MASK 0xc -#define FT1_N_REG(slize, n, reg) \ +#define FT1_N_REG(slice, n, reg) \ (offs[slice].ft1_slot_base + FT1_SLOT_SIZE * (n) + (reg)) #define FT1_LEN_MASK GENMASK(19, 16) @@ -62,7 +62,7 @@ enum ft1_cfg_type { #define FT3_T 0x18 #define FT3_T_MASK 0x1c -#define FT3_N_REG(slize, n, reg) \ +#define FT3_N_REG(slice, n, reg) \ (offs[slice].ft3_slot_base + FT3_SLOT_SIZE * (n) + (reg)) /* offsets from rx_class n's base */ @@ -74,6 +74,9 @@ enum ft1_cfg_type { #define RX_CLASS_N_REG(slice, n, reg) \ (offs[slice].rx_class_base + RX_CLASS_EN_SIZE * (n) + (reg)) +#define RX_CLASS_OR_REG(slice, n, reg) \ + (offs[slice].rx_class_or_base + RX_CLASS_EN_SIZE * (n) + (reg)) + /* RX Class Gates */ #define RX_CLASS_GATES_SIZE 0x4 /* bytes */ @@ -102,6 +105,22 @@ enum ft1_cfg_type { ((BIT(slot) << RX_CLASS_FT_FT1_MATCH_SHIFT) & \ RX_CLASS_FT_FT1_MATCH_MASK) +/* HSR/PRP protocol frame filtering */ +#define HSR_PTP_ETHERTYPE_OFFSET 0x12 +#define PRP_PTP_ETHERTYPE_OFFSET 0x0C +#define FT3_PTP_PATTERN 0xF788 +#define FT3_ETHERTYPE_MASK 0xFFFF0000U +#define FT3_VLAN_MODE_BOTH 0x1 +#define RX_CLASS_OR_DUP_PTP 0x4200 +#define RX_CLASS_OR_HSR_TAG 0x4000 +#define RX_CLASS_GATE_PTP 0x50 +#define RX_CLASS_DISABLE 0x0 + +/* HSR/PRP classifier indices */ +#define CLASSIFIER_PTP_DUP 10 +#define CLASSIFIER_HSR_TAG 11 +#define FT3_PTP_SLOT 14 + /* RX class type */ enum rx_class_sel_type { RX_CLASS_SEL_TYPE_OR = 0, @@ -133,6 +152,7 @@ struct miig_rt_offsets { u32 ft3_p_base; u32 ft_rx_ptr; u32 rx_class_base; + u32 rx_class_or_base; u32 rx_class_cfg1; u32 rx_class_cfg2; u32 rx_class_gates_base; @@ -161,6 +181,7 @@ static const struct miig_rt_offsets offs[] = { 0x308, 0x408, 0x40c, + 0x410, 0x48c, 0x490, 0x494, @@ -186,6 +207,7 @@ static const struct miig_rt_offsets offs[] = { 0x8d4, 0x9d4, 0x9d8, + 0x9dc, 0xa58, 0xa5c, 0xa60, @@ -467,3 +489,46 @@ void icssg_ft1_set_mac_addr(struct regmap *miig_rt, int slice, u8 *mac_addr) rx_class_ft1_cfg_set_type(miig_rt, slice, 0, FT1_CFG_TYPE_EQ); } EXPORT_SYMBOL_GPL(icssg_ft1_set_mac_addr); + +/** + * icssg_ft3_hsr_configurations - Configure filter table for HSR/PRP protocol frames + * @miig_rt: Pointer to the MII-G register map + * @slice: ICSSG slice number (0 or 1) + * @prueth: Pointer to prueth structure to determine HSR/PRP mode + * + * Configures FT3 to detect PTP frames (EtherType 0x88F7) in HSR/PRP tagged packets. + * HSR frames have a 6-byte tag, while PRP has no tag offset for EtherType detection. + */ +void icssg_ft3_hsr_configurations(struct regmap *miig_rt, int slice, + struct prueth *prueth) +{ + u8 offset = (prueth->hsr_prp_version == PRP_V1) ? + PRP_PTP_ETHERTYPE_OFFSET : HSR_PTP_ETHERTYPE_OFFSET; + + regmap_write(miig_rt, FT3_N_REG(slice, FT3_PTP_SLOT, FT3_START), offset); + regmap_write(miig_rt, FT3_N_REG(slice, FT3_PTP_SLOT, FT3_START_AUTO), 0); + regmap_write(miig_rt, FT3_N_REG(slice, FT3_PTP_SLOT, FT3_START_OFFSET), 0); + regmap_write(miig_rt, FT3_N_REG(slice, FT3_PTP_SLOT, FT3_JUMP_OFFSET), 0); + regmap_write(miig_rt, FT3_N_REG(slice, FT3_PTP_SLOT, FT3_LEN), 0); + regmap_write(miig_rt, FT3_N_REG(slice, FT3_PTP_SLOT, FT3_CFG), FT3_VLAN_MODE_BOTH); + regmap_write(miig_rt, FT3_N_REG(slice, FT3_PTP_SLOT, FT3_T), FT3_PTP_PATTERN); + regmap_write(miig_rt, FT3_N_REG(slice, FT3_PTP_SLOT, FT3_T_MASK), + FT3_ETHERTYPE_MASK); + + regmap_write(miig_rt, RX_CLASS_N_REG(slice, CLASSIFIER_PTP_DUP, RX_CLASS_AND_EN), + RX_CLASS_DISABLE); + regmap_write(miig_rt, RX_CLASS_OR_REG(slice, CLASSIFIER_PTP_DUP, RX_CLASS_OR_EN), + RX_CLASS_OR_DUP_PTP); + regmap_write(miig_rt, RX_CLASS_GATES_N_REG(slice, CLASSIFIER_PTP_DUP), + RX_CLASS_GATE_PTP); + + if (prueth->hsr_prp_version != PRP_V1) { + regmap_write(miig_rt, RX_CLASS_N_REG(slice, CLASSIFIER_HSR_TAG, RX_CLASS_AND_EN), + RX_CLASS_DISABLE); + regmap_write(miig_rt, RX_CLASS_OR_REG(slice, CLASSIFIER_HSR_TAG, RX_CLASS_OR_EN), + RX_CLASS_OR_HSR_TAG); + regmap_write(miig_rt, RX_CLASS_GATES_N_REG(slice, CLASSIFIER_HSR_TAG), + RX_CLASS_GATE_PTP); + } +} +EXPORT_SYMBOL_GPL(icssg_ft3_hsr_configurations); diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 0939994c932f..9efa39069c8c 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -260,6 +260,12 @@ static int prueth_emac_common_start(struct prueth *prueth) icssg_class_default(prueth->miig_rt, ICSS_SLICE0, 0, false); icssg_class_default(prueth->miig_rt, ICSS_SLICE1, 0, false); + /* Configure HSR/PRP protocol filtering if in HSR offload mode */ + if (prueth->is_hsr_offload_mode) { + icssg_ft3_hsr_configurations(prueth->miig_rt, ICSS_SLICE0, prueth); + icssg_ft3_hsr_configurations(prueth->miig_rt, ICSS_SLICE1, prueth); + } + if (prueth->is_switch_mode || prueth->is_hsr_offload_mode) icssg_init_fw_offload_mode(prueth); else diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index 3d94fa5a7ac1..df93d15c5b78 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -423,6 +423,8 @@ void icssg_class_promiscuous_sr1(struct regmap *miig_rt, int slice); void icssg_class_add_mcast_sr1(struct regmap *miig_rt, int slice, struct net_device *ndev); void icssg_ft1_set_mac_addr(struct regmap *miig_rt, int slice, u8 *mac_addr); +void icssg_ft3_hsr_configurations(struct regmap *miig_rt, int slice, + struct prueth *prueth); /* config helpers */ void icssg_config_ipg(struct prueth_emac *emac); From e07bd1f7161fd0918e644d97e11608854c3bf14b Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 27 Feb 2026 21:52:16 +0100 Subject: [PATCH 0221/1561] net: ti: davinci_emac: stop using bus type mdio_bus_type This driver is the only user of mdio_bus_type outside phylib. Using mdio_bus_type isn't strictly needed here, so use an alternative approach. This will allow to make mdio_bus_type private to phylib in a follow-up series. Compile-tested only. Note: Devices supported by this driver are OF-only, therefore the string comparison in match_first_device() isn't needed any longer. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/cc8e83aa-48c3-4497-b6ad-760a7f9e25dc@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/davinci_emac.c | 39 ++++++++++---------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index ed8116fb05e9..3802122d0f1b 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1389,15 +1389,6 @@ static int emac_devioctl(struct net_device *ndev, struct ifreq *ifrq, int cmd) return -EOPNOTSUPP; } -static int match_first_device(struct device *dev, const void *data) -{ - if (dev->parent && dev->parent->of_node) - return of_device_is_compatible(dev->parent->of_node, - "ti,davinci_mdio"); - - return !strncmp(dev_name(dev), "davinci_mdio", 12); -} - /** * emac_dev_open - EMAC device open * @ndev: The DaVinci EMAC network adapter @@ -1417,7 +1408,6 @@ static int emac_dev_open(struct net_device *ndev) int i = 0; struct emac_priv *priv = netdev_priv(ndev); struct phy_device *phydev = NULL; - struct device *phy = NULL; ret = pm_runtime_resume_and_get(&priv->pdev->dev); if (ret < 0) { @@ -1502,20 +1492,22 @@ static int emac_dev_open(struct net_device *ndev) } } - /* use the first phy on the bus if pdata did not give us a phy id */ + /* if no phy-handle and no fixed link, use the first phy on the bus */ if (!phydev && !priv->phy_id) { - /* NOTE: we can't use bus_find_device_by_name() here because - * the device name is not guaranteed to be 'davinci_mdio'. On - * some systems it can be 'davinci_mdio.0' so we need to use - * strncmp() against the first part of the string to correctly - * match it. - */ - phy = bus_find_device(&mdio_bus_type, NULL, NULL, - match_first_device); - if (phy) { - priv->phy_id = dev_name(phy); - if (!priv->phy_id || !*priv->phy_id) - put_device(phy); + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, "ti,davinci_mdio"); + if (np) { + struct mii_bus *bus = of_mdio_find_bus(np); + + if (bus) { + struct phy_device *phy = phy_find_first(bus); + + if (phy) + priv->phy_id = phydev_name(phy); + put_device(&bus->dev); /* of_mdio_find_bus */ + } + of_node_put(np); /* of_find_compatible_node */ } } @@ -1523,7 +1515,6 @@ static int emac_dev_open(struct net_device *ndev) phydev = phy_connect(ndev, priv->phy_id, &emac_adjust_link, PHY_INTERFACE_MODE_MII); - put_device(phy); /* reference taken by bus_find_device */ if (IS_ERR(phydev)) { dev_err(emac_dev, "could not connect to phy %s\n", priv->phy_id); From 5c494f404a3fa5181f038b8c328a3e0dc4cc2e18 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 27 Feb 2026 23:05:18 +0100 Subject: [PATCH 0222/1561] net: mdio: extend struct mdio_bus_stat_attr instead of using dev_ext_attribute Currently the var member of struct dev_ext_attribute is used in a very ugly way. Extend struct mdio_bus_stat_attr instead, what allows to simplify the code and also slightly reduces memory footprint. Note: Member addr is renamed to avoid a conflict in macro MDIO_BUS_STATS_ADDR_ATTR_DECL. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/ce9f85d2-4f72-4b15-b868-210a8ced662d@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/phy/mdio_bus.c | 45 +++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index afdf1ad6c0e6..4fe9859369b1 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -124,10 +124,16 @@ static void mdiobus_release(struct device *d) } struct mdio_bus_stat_attr { - int addr; + struct device_attribute attr; + int address; unsigned int field_offset; }; +static struct mdio_bus_stat_attr *to_sattr(struct device_attribute *attr) +{ + return container_of(attr, struct mdio_bus_stat_attr, attr); +} + static u64 mdio_bus_get_stat(struct mdio_bus_stats *s, unsigned int offset) { const char *p = (const char *)s + offset; @@ -157,18 +163,14 @@ static ssize_t mdio_bus_stat_field_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct mdio_bus_stat_attr *sattr = to_sattr(attr); struct mii_bus *bus = to_mii_bus(dev); - struct mdio_bus_stat_attr *sattr; - struct dev_ext_attribute *eattr; u64 val; - eattr = container_of(attr, struct dev_ext_attribute, attr); - sattr = eattr->var; - - if (sattr->addr < 0) + if (sattr->address < 0) val = mdio_bus_get_global_stat(bus, sattr->field_offset); else - val = mdio_bus_get_stat(&bus->stats[sattr->addr], + val = mdio_bus_get_stat(&bus->stats[sattr->address], sattr->field_offset); return sysfs_emit(buf, "%llu\n", val); @@ -178,37 +180,31 @@ static ssize_t mdio_bus_device_stat_field_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct mdio_bus_stat_attr *sattr = to_sattr(attr); struct mdio_device *mdiodev = to_mdio_device(dev); struct mii_bus *bus = mdiodev->bus; - struct mdio_bus_stat_attr *sattr; - struct dev_ext_attribute *eattr; int addr = mdiodev->addr; u64 val; - eattr = container_of(attr, struct dev_ext_attribute, attr); - sattr = eattr->var; - val = mdio_bus_get_stat(&bus->stats[addr], sattr->field_offset); return sysfs_emit(buf, "%llu\n", val); } #define MDIO_BUS_STATS_ATTR_DECL(field, file) \ -static struct dev_ext_attribute dev_attr_mdio_bus_##field = { \ +static struct mdio_bus_stat_attr dev_attr_mdio_bus_##field = { \ .attr = { .attr = { .name = file, .mode = 0444 }, \ .show = mdio_bus_stat_field_show, \ }, \ - .var = &((struct mdio_bus_stat_attr) { \ - -1, offsetof(struct mdio_bus_stats, field) \ - }), \ + .address = -1, \ + .field_offset = offsetof(struct mdio_bus_stats, field), \ }; \ -static struct dev_ext_attribute dev_attr_mdio_bus_device_##field = { \ +static struct mdio_bus_stat_attr dev_attr_mdio_bus_device_##field = { \ .attr = { .attr = { .name = file, .mode = 0444 }, \ .show = mdio_bus_device_stat_field_show, \ }, \ - .var = &((struct mdio_bus_stat_attr) { \ - -1, offsetof(struct mdio_bus_stats, field) \ - }), \ + .address = -1, \ + .field_offset = offsetof(struct mdio_bus_stats, field), \ }; #define MDIO_BUS_STATS_ATTR(field) \ @@ -220,13 +216,12 @@ MDIO_BUS_STATS_ATTR(writes); MDIO_BUS_STATS_ATTR(reads); #define MDIO_BUS_STATS_ADDR_ATTR_DECL(field, addr, file) \ -static struct dev_ext_attribute dev_attr_mdio_bus_addr_##field##_##addr = { \ +static struct mdio_bus_stat_attr dev_attr_mdio_bus_addr_##field##_##addr = { \ .attr = { .attr = { .name = file, .mode = 0444 }, \ .show = mdio_bus_stat_field_show, \ }, \ - .var = &((struct mdio_bus_stat_attr) { \ - addr, offsetof(struct mdio_bus_stats, field) \ - }), \ + .address = addr, \ + .field_offset = offsetof(struct mdio_bus_stats, field), \ } #define MDIO_BUS_STATS_ADDR_ATTR(field, addr) \ From 807d8addc3ec6e3b89523dc65096594e9e549c1a Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 27 Feb 2026 23:06:28 +0100 Subject: [PATCH 0223/1561] net: mdio: use macro __ATTR to simplify the code Use macro __ATTR to simplify the code. Note that __ATTR can't be used in MDIO_BUS_STATS_ADDR_ATTR_DECL because the included stringification would conflict with how argument file is passed. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/4877a4dc-247c-4453-b281-20a8d969b15b@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/phy/mdio_bus.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 4fe9859369b1..0c0cf7a7296a 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -191,24 +191,17 @@ static ssize_t mdio_bus_device_stat_field_show(struct device *dev, return sysfs_emit(buf, "%llu\n", val); } -#define MDIO_BUS_STATS_ATTR_DECL(field, file) \ +#define MDIO_BUS_STATS_ATTR(field) \ static struct mdio_bus_stat_attr dev_attr_mdio_bus_##field = { \ - .attr = { .attr = { .name = file, .mode = 0444 }, \ - .show = mdio_bus_stat_field_show, \ - }, \ + .attr = __ATTR(field, 0444, mdio_bus_stat_field_show, NULL), \ .address = -1, \ .field_offset = offsetof(struct mdio_bus_stats, field), \ }; \ static struct mdio_bus_stat_attr dev_attr_mdio_bus_device_##field = { \ - .attr = { .attr = { .name = file, .mode = 0444 }, \ - .show = mdio_bus_device_stat_field_show, \ - }, \ + .attr = __ATTR(field, 0444, mdio_bus_device_stat_field_show, NULL), \ .address = -1, \ .field_offset = offsetof(struct mdio_bus_stats, field), \ -}; - -#define MDIO_BUS_STATS_ATTR(field) \ - MDIO_BUS_STATS_ATTR_DECL(field, __stringify(field)) +} MDIO_BUS_STATS_ATTR(transfers); MDIO_BUS_STATS_ATTR(errors); From 8068acaff1250d836a97ebef4659548b435858f2 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 27 Feb 2026 23:07:16 +0100 Subject: [PATCH 0224/1561] net: phy: consider that mdio_bus_device_stat_field_show doesn't use member address mdio_bus_device_stat_field_show() doesn't use the address member, so we don't have to initialize it. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/03a812a7-6871-4cc0-b5bf-ee80c6d6b5fd@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/phy/mdio_bus.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 0c0cf7a7296a..f189e9fbcbd9 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -199,7 +199,6 @@ static struct mdio_bus_stat_attr dev_attr_mdio_bus_##field = { \ }; \ static struct mdio_bus_stat_attr dev_attr_mdio_bus_device_##field = { \ .attr = __ATTR(field, 0444, mdio_bus_device_stat_field_show, NULL), \ - .address = -1, \ .field_offset = offsetof(struct mdio_bus_stats, field), \ } From c599649d05a072a6ff7c4012e57cbabab7a2d57e Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 27 Feb 2026 23:07:56 +0100 Subject: [PATCH 0225/1561] net: phy: avoid extra casting in mdio_bus_get_stat Using void * instead of char * allows to remove one cast. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/054bbf60-d8ac-45ce-8b80-9c396469b7f9@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/phy/mdio_bus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index f189e9fbcbd9..fb2bcc69c8da 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -136,13 +136,13 @@ static struct mdio_bus_stat_attr *to_sattr(struct device_attribute *attr) static u64 mdio_bus_get_stat(struct mdio_bus_stats *s, unsigned int offset) { - const char *p = (const char *)s + offset; + const u64_stats_t *stats = (const void *)s + offset; unsigned int start; u64 val = 0; do { start = u64_stats_fetch_begin(&s->syncp); - val = u64_stats_read((const u64_stats_t *)p); + val = u64_stats_read(stats); } while (u64_stats_fetch_retry(&s->syncp, start)); return val; From a4c08b701559c5a3039f3bdf0b5286ca02877985 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 27 Feb 2026 23:08:58 +0100 Subject: [PATCH 0226/1561] net: mdio: constify attributes and attribute arrays Constify attributes and attribute arrays, using new member attrs_const of struct attribute_group. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/c20f17bb-3489-42b5-b8fe-457245ac6cb3@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/phy/mdio_bus.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index fb2bcc69c8da..750b9165086f 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -192,12 +192,12 @@ static ssize_t mdio_bus_device_stat_field_show(struct device *dev, } #define MDIO_BUS_STATS_ATTR(field) \ -static struct mdio_bus_stat_attr dev_attr_mdio_bus_##field = { \ +static const struct mdio_bus_stat_attr dev_attr_mdio_bus_##field = { \ .attr = __ATTR(field, 0444, mdio_bus_stat_field_show, NULL), \ .address = -1, \ .field_offset = offsetof(struct mdio_bus_stats, field), \ }; \ -static struct mdio_bus_stat_attr dev_attr_mdio_bus_device_##field = { \ +static const struct mdio_bus_stat_attr dev_attr_mdio_bus_device_##field = { \ .attr = __ATTR(field, 0444, mdio_bus_device_stat_field_show, NULL), \ .field_offset = offsetof(struct mdio_bus_stats, field), \ } @@ -208,7 +208,8 @@ MDIO_BUS_STATS_ATTR(writes); MDIO_BUS_STATS_ATTR(reads); #define MDIO_BUS_STATS_ADDR_ATTR_DECL(field, addr, file) \ -static struct mdio_bus_stat_attr dev_attr_mdio_bus_addr_##field##_##addr = { \ +static const struct mdio_bus_stat_attr \ +dev_attr_mdio_bus_addr_##field##_##addr = { \ .attr = { .attr = { .name = file, .mode = 0444 }, \ .show = mdio_bus_stat_field_show, \ }, \ @@ -265,7 +266,7 @@ MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(31); &dev_attr_mdio_bus_addr_writes_##addr.attr.attr, \ &dev_attr_mdio_bus_addr_reads_##addr.attr.attr \ -static struct attribute *mdio_bus_statistics_attrs[] = { +static const struct attribute *const mdio_bus_statistics_attrs[] = { &dev_attr_mdio_bus_transfers.attr.attr, &dev_attr_mdio_bus_errors.attr.attr, &dev_attr_mdio_bus_writes.attr.attr, @@ -306,8 +307,8 @@ static struct attribute *mdio_bus_statistics_attrs[] = { }; static const struct attribute_group mdio_bus_statistics_group = { - .name = "statistics", - .attrs = mdio_bus_statistics_attrs, + .name = "statistics", + .attrs_const = mdio_bus_statistics_attrs, }; static const struct attribute_group *mdio_bus_groups[] = { @@ -973,7 +974,7 @@ static int mdio_uevent(const struct device *dev, struct kobj_uevent_env *env) return 0; } -static struct attribute *mdio_bus_device_statistics_attrs[] = { +static const struct attribute *const mdio_bus_device_statistics_attrs[] = { &dev_attr_mdio_bus_device_transfers.attr.attr, &dev_attr_mdio_bus_device_errors.attr.attr, &dev_attr_mdio_bus_device_writes.attr.attr, @@ -982,8 +983,8 @@ static struct attribute *mdio_bus_device_statistics_attrs[] = { }; static const struct attribute_group mdio_bus_device_statistics_group = { - .name = "statistics", - .attrs = mdio_bus_device_statistics_attrs, + .name = "statistics", + .attrs_const = mdio_bus_device_statistics_attrs, }; static const struct attribute_group *mdio_bus_dev_groups[] = { From 8e0bdf30be75e7af8dc8349205b100c7a9889ff2 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 27 Feb 2026 23:09:35 +0100 Subject: [PATCH 0227/1561] net: mdio: use macro __ATTRIBUTE_GROUPS Use macro __ATTRIBUTE_GROUPS() to simplify the code. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/260fb184-c662-415c-b288-e1423097f2b9@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/phy/mdio_bus.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 750b9165086f..0e2820ab8cbe 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -310,16 +310,12 @@ static const struct attribute_group mdio_bus_statistics_group = { .name = "statistics", .attrs_const = mdio_bus_statistics_attrs, }; - -static const struct attribute_group *mdio_bus_groups[] = { - &mdio_bus_statistics_group, - NULL, -}; +__ATTRIBUTE_GROUPS(mdio_bus_statistics); const struct class mdio_bus_class = { .name = "mdio_bus", .dev_release = mdiobus_release, - .dev_groups = mdio_bus_groups, + .dev_groups = mdio_bus_statistics_groups, }; EXPORT_SYMBOL_GPL(mdio_bus_class); @@ -986,15 +982,11 @@ static const struct attribute_group mdio_bus_device_statistics_group = { .name = "statistics", .attrs_const = mdio_bus_device_statistics_attrs, }; - -static const struct attribute_group *mdio_bus_dev_groups[] = { - &mdio_bus_device_statistics_group, - NULL, -}; +__ATTRIBUTE_GROUPS(mdio_bus_device_statistics); const struct bus_type mdio_bus_type = { .name = "mdio_bus", - .dev_groups = mdio_bus_dev_groups, + .dev_groups = mdio_bus_device_statistics_groups, .match = mdio_bus_match, .uevent = mdio_uevent, }; From 7f97ca5f9858629f618d7f3b5a16c0d6e48e4353 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 27 Feb 2026 23:10:17 +0100 Subject: [PATCH 0228/1561] net: phy: inline helper mdio_bus_get_global_stat mdio_bus_get_global_stat() has only one user. Inline it to simplify the code. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/7876625a-bd6f-42b4-8eb3-420f39d2f59a@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/phy/mdio_bus.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 0e2820ab8cbe..48c0447e6a8f 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -148,30 +148,23 @@ static u64 mdio_bus_get_stat(struct mdio_bus_stats *s, unsigned int offset) return val; } -static u64 mdio_bus_get_global_stat(struct mii_bus *bus, unsigned int offset) -{ - unsigned int i; - u64 val = 0; - - for (i = 0; i < PHY_MAX_ADDR; i++) - val += mdio_bus_get_stat(&bus->stats[i], offset); - - return val; -} - static ssize_t mdio_bus_stat_field_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mdio_bus_stat_attr *sattr = to_sattr(attr); struct mii_bus *bus = to_mii_bus(dev); - u64 val; + u64 val = 0; - if (sattr->address < 0) - val = mdio_bus_get_global_stat(bus, sattr->field_offset); - else + if (sattr->address < 0) { + /* get global stats */ + for (int i = 0; i < PHY_MAX_ADDR; i++) + val += mdio_bus_get_stat(&bus->stats[i], + sattr->field_offset); + } else { val = mdio_bus_get_stat(&bus->stats[sattr->address], sattr->field_offset); + } return sysfs_emit(buf, "%llu\n", val); } From 1afccc5a201ec7c9023370958bae1312369b64da Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 27 Feb 2026 23:11:02 +0100 Subject: [PATCH 0229/1561] net: phy: improve mdiobus_stats_acct - Remove duplicated preempt disable. Disabling preemption has been added to functions like u64_stats_update_begin() in the meantime. - Simplify branch structure Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/2ceeb542-986a-404e-ad0f-62e0a938ce7c@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/phy/mdio_bus.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 48c0447e6a8f..b32a369cd25f 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -358,22 +358,17 @@ EXPORT_SYMBOL(of_mdio_find_bus); static void mdiobus_stats_acct(struct mdio_bus_stats *stats, bool op, int ret) { - preempt_disable(); u64_stats_update_begin(&stats->syncp); u64_stats_inc(&stats->transfers); - if (ret < 0) { + if (ret < 0) u64_stats_inc(&stats->errors); - goto out; - } - - if (op) + else if (op) u64_stats_inc(&stats->reads); else u64_stats_inc(&stats->writes); -out: + u64_stats_update_end(&stats->syncp); - preempt_enable(); } /** From dc0cdb7ff3b1e1ff2faf594640724c2771a8b2c6 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 27 Jan 2026 16:16:55 +0000 Subject: [PATCH 0230/1561] ice: Make name member of struct ice_cgu_pin_desc const The name member of struct ice_cgu_pin_desc never modified. Make it const. Found by inspection. Compile tested only. Signed-off-by: Simon Horman Reviewed-by: Paul Menzel Reviewed-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp_hw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index 5896b346e579..9bfd3e79c580 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -258,7 +258,7 @@ enum ice_si_cgu_out_pins { }; struct ice_cgu_pin_desc { - char *name; + const char *name; u8 index; enum dpll_pin_type type; u32 freq_supp_num; From 1f3d49734820fdc30655b9ee4e51aede2bf86d45 Mon Sep 17 00:00:00 2001 From: "Yury Norov (NVIDIA)" Date: Wed, 17 Dec 2025 20:57:57 -0500 Subject: [PATCH 0231/1561] i40e: drop useless bitmap_weight() call in i40e_set_rxfh_fields() bitmap_weight() is O(N) and useless here, because the following for_each_set_bit() returns immediately in case of empty flow_pctypes. Signed-off-by: Yury Norov (NVIDIA) Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/i40e/i40e_ethtool.c | 21 +++++++------------ 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 8c27f7ba4adc..3da9ec49cc74 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -3624,6 +3624,7 @@ static int i40e_set_rxfh_fields(struct net_device *netdev, ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); DECLARE_BITMAP(flow_pctypes, FLOW_PCTYPES_SIZE); u64 i_set, i_setc; + u8 flow_id; bitmap_zero(flow_pctypes, FLOW_PCTYPES_SIZE); @@ -3707,20 +3708,14 @@ static int i40e_set_rxfh_fields(struct net_device *netdev, return -EINVAL; } - if (bitmap_weight(flow_pctypes, FLOW_PCTYPES_SIZE)) { - u8 flow_id; + for_each_set_bit(flow_id, flow_pctypes, FLOW_PCTYPES_SIZE) { + i_setc = (u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_id)) | + ((u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_id)) << 32); + i_set = i40e_get_rss_hash_bits(&pf->hw, nfc, i_setc); - for_each_set_bit(flow_id, flow_pctypes, FLOW_PCTYPES_SIZE) { - i_setc = (u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_id)) | - ((u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_id)) << 32); - i_set = i40e_get_rss_hash_bits(&pf->hw, nfc, i_setc); - - i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_id), - (u32)i_set); - i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_id), - (u32)(i_set >> 32)); - hena |= BIT_ULL(flow_id); - } + i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_id), (u32)i_set); + i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_id), (u32)(i_set >> 32)); + hena |= BIT_ULL(flow_id); } i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena); From b09621cc0dfd7d2ae8557ee9d0d7ca6dc75145e5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 2 Mar 2026 10:18:31 +0100 Subject: [PATCH 0232/1561] i40e: Add missing wordpart.h header When cleaning up another header I have met this build error: drivers/net/ethernet/intel/i40e/i40e_hmc.h:105:22: error: implicit declaration of function 'upper_32_bits' [-Wimplicit-function-declaration] 105 | val1 = (u32)(upper_32_bits(pa)); \ This is due to missing header, add it to fix the possible issue. Reviewed-by: Paul Menzel Reviewed-by: Aleksandr Loktionov Signed-off-by: Andy Shevchenko Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_hmc.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_hmc.h index 480e3a883cc7..967711405919 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_hmc.h +++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.h @@ -4,6 +4,8 @@ #ifndef _I40E_HMC_H_ #define _I40E_HMC_H_ +#include + #include "i40e_alloc.h" #include "i40e_io.h" #include "i40e_register.h" From 246c5495c69fae1ec96531d79106c946ea904312 Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Thu, 22 Jan 2026 09:50:39 +0100 Subject: [PATCH 0233/1561] ixgbe: refactor: use DECLARE_BITMAP for ring state field Convert the ring state field from 'unsigned long' to a proper bitmap using DECLARE_BITMAP macro, aligning with the implementation pattern already used in the i40e driver. This change: - Adds __IXGBE_RING_STATE_NBITS as the bitmap size sentinel to enum ixgbe_ring_state_t (consistent with i40e's __I40E_RING_STATE_NBITS) - Changes 'unsigned long state' to 'DECLARE_BITMAP(state, __IXGBE_RING_STATE_NBITS)' in struct ixgbe_ring - Removes the address-of operator (&) when passing ring->state to bit manipulation functions, as bitmap arrays naturally decay to pointers The change maintains functional equivalence while using the more appropriate kernel bitmap API, consistent with other Intel Ethernet drivers. Signed-off-by: Aleksandr Loktionov Reviewed-by: Marcin Szycik Reviewed-by: Paul Menzel Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 27 ++++----- drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 4 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 56 +++++++++---------- drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 2 +- 4 files changed, 45 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index dce4936708eb..59a1cee40b43 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -322,10 +322,11 @@ enum ixgbe_ring_state_t { __IXGBE_HANG_CHECK_ARMED, __IXGBE_TX_XDP_RING, __IXGBE_TX_DISABLED, + __IXGBE_RING_STATE_NBITS, /* must be last */ }; #define ring_uses_build_skb(ring) \ - test_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &(ring)->state) + test_bit(__IXGBE_RX_BUILD_SKB_ENABLED, (ring)->state) struct ixgbe_fwd_adapter { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; @@ -336,23 +337,23 @@ struct ixgbe_fwd_adapter { }; #define check_for_tx_hang(ring) \ - test_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state) + test_bit(__IXGBE_TX_DETECT_HANG, (ring)->state) #define set_check_for_tx_hang(ring) \ - set_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state) + set_bit(__IXGBE_TX_DETECT_HANG, (ring)->state) #define clear_check_for_tx_hang(ring) \ - clear_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state) + clear_bit(__IXGBE_TX_DETECT_HANG, (ring)->state) #define ring_is_rsc_enabled(ring) \ - test_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state) + test_bit(__IXGBE_RX_RSC_ENABLED, (ring)->state) #define set_ring_rsc_enabled(ring) \ - set_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state) + set_bit(__IXGBE_RX_RSC_ENABLED, (ring)->state) #define clear_ring_rsc_enabled(ring) \ - clear_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state) + clear_bit(__IXGBE_RX_RSC_ENABLED, (ring)->state) #define ring_is_xdp(ring) \ - test_bit(__IXGBE_TX_XDP_RING, &(ring)->state) + test_bit(__IXGBE_TX_XDP_RING, (ring)->state) #define set_ring_xdp(ring) \ - set_bit(__IXGBE_TX_XDP_RING, &(ring)->state) + set_bit(__IXGBE_TX_XDP_RING, (ring)->state) #define clear_ring_xdp(ring) \ - clear_bit(__IXGBE_TX_XDP_RING, &(ring)->state) + clear_bit(__IXGBE_TX_XDP_RING, (ring)->state) struct ixgbe_ring { struct ixgbe_ring *next; /* pointer to next ring in q_vector */ struct ixgbe_q_vector *q_vector; /* backpointer to host q_vector */ @@ -364,7 +365,7 @@ struct ixgbe_ring { struct ixgbe_tx_buffer *tx_buffer_info; struct ixgbe_rx_buffer *rx_buffer_info; }; - unsigned long state; + DECLARE_BITMAP(state, __IXGBE_RING_STATE_NBITS); u8 __iomem *tail; dma_addr_t dma; /* phys. address of descriptor ring */ unsigned int size; /* length in bytes */ @@ -453,7 +454,7 @@ struct ixgbe_ring_feature { */ static inline unsigned int ixgbe_rx_bufsz(struct ixgbe_ring *ring) { - if (test_bit(__IXGBE_RX_3K_BUFFER, &ring->state)) + if (test_bit(__IXGBE_RX_3K_BUFFER, ring->state)) return IXGBE_RXBUFFER_3K; #if (PAGE_SIZE < 8192) if (ring_uses_build_skb(ring)) @@ -465,7 +466,7 @@ static inline unsigned int ixgbe_rx_bufsz(struct ixgbe_ring *ring) static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) { #if (PAGE_SIZE < 8192) - if (test_bit(__IXGBE_RX_3K_BUFFER, &ring->state)) + if (test_bit(__IXGBE_RX_3K_BUFFER, ring->state)) return 1; #endif return 0; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index 87abb36f50c1..1db4bd5cc2ba 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -976,7 +976,7 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, * can be marked as checksum errors. */ if (adapter->hw.mac.type == ixgbe_mac_82599EB) - set_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR, &ring->state); + set_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR, ring->state); #ifdef IXGBE_FCOE if (adapter->netdev->fcoe_mtu) { @@ -984,7 +984,7 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, f = &adapter->ring_feature[RING_F_FCOE]; if ((rxr_idx >= f->offset) && (rxr_idx < f->offset + f->indices)) - set_bit(__IXGBE_RX_FCOE, &ring->state); + set_bit(__IXGBE_RX_FCOE, ring->state); } #endif /* IXGBE_FCOE */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index e4101c59074d..0bc806aaed90 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -968,7 +968,7 @@ static void ixgbe_update_xoff_rx_lfc(struct ixgbe_adapter *adapter) for (i = 0; i < adapter->num_tx_queues; i++) clear_bit(__IXGBE_HANG_CHECK_ARMED, - &adapter->tx_ring[i]->state); + adapter->tx_ring[i]->state); } static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter) @@ -1011,7 +1011,7 @@ static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter) tc = tx_ring->dcb_tc; if (xoff[tc]) - clear_bit(__IXGBE_HANG_CHECK_ARMED, &tx_ring->state); + clear_bit(__IXGBE_HANG_CHECK_ARMED, tx_ring->state); } for (i = 0; i < adapter->num_xdp_queues; i++) { @@ -1019,7 +1019,7 @@ static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter) tc = xdp_ring->dcb_tc; if (xoff[tc]) - clear_bit(__IXGBE_HANG_CHECK_ARMED, &xdp_ring->state); + clear_bit(__IXGBE_HANG_CHECK_ARMED, xdp_ring->state); } } @@ -1103,11 +1103,11 @@ static bool ixgbe_check_tx_hang(struct ixgbe_ring *tx_ring) if (tx_done_old == tx_done && tx_pending) /* make sure it is true for two checks in a row */ return test_and_set_bit(__IXGBE_HANG_CHECK_ARMED, - &tx_ring->state); + tx_ring->state); /* update completed stats and continue */ tx_ring->tx_stats.tx_done_old = tx_done; /* reset the countdown */ - clear_bit(__IXGBE_HANG_CHECK_ARMED, &tx_ring->state); + clear_bit(__IXGBE_HANG_CHECK_ARMED, tx_ring->state); return false; } @@ -1660,7 +1660,7 @@ static inline bool ixgbe_rx_is_fcoe(struct ixgbe_ring *ring, { __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info; - return test_bit(__IXGBE_RX_FCOE, &ring->state) && + return test_bit(__IXGBE_RX_FCOE, ring->state) && ((pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_ETQF_MASK)) == (cpu_to_le16(IXGBE_ETQF_FILTER_FCOE << IXGBE_RXDADV_PKTTYPE_ETQF_SHIFT))); @@ -1708,7 +1708,7 @@ static inline void ixgbe_rx_checksum(struct ixgbe_ring *ring, * checksum errors. */ if ((pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_UDP)) && - test_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR, &ring->state)) + test_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR, ring->state)) return; ring->rx_stats.csum_err++; @@ -3526,7 +3526,7 @@ static irqreturn_t ixgbe_msix_other(int irq, void *data) for (i = 0; i < adapter->num_tx_queues; i++) { struct ixgbe_ring *ring = adapter->tx_ring[i]; if (test_and_clear_bit(__IXGBE_TX_FDIR_INIT_DONE, - &ring->state)) + ring->state)) reinit_count++; } if (reinit_count) { @@ -3952,13 +3952,13 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter, if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) { ring->atr_sample_rate = adapter->atr_sample_rate; ring->atr_count = 0; - set_bit(__IXGBE_TX_FDIR_INIT_DONE, &ring->state); + set_bit(__IXGBE_TX_FDIR_INIT_DONE, ring->state); } else { ring->atr_sample_rate = 0; } /* initialize XPS */ - if (!test_and_set_bit(__IXGBE_TX_XPS_INIT_DONE, &ring->state)) { + if (!test_and_set_bit(__IXGBE_TX_XPS_INIT_DONE, ring->state)) { struct ixgbe_q_vector *q_vector = ring->q_vector; if (q_vector) @@ -3967,7 +3967,7 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter, ring->queue_index); } - clear_bit(__IXGBE_HANG_CHECK_ARMED, &ring->state); + clear_bit(__IXGBE_HANG_CHECK_ARMED, ring->state); /* reinitialize tx_buffer_info */ memset(ring->tx_buffer_info, 0, @@ -4173,7 +4173,7 @@ static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter, srrctl |= PAGE_SIZE >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; else srrctl |= xsk_buf_len >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; - } else if (test_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state)) { + } else if (test_bit(__IXGBE_RX_3K_BUFFER, rx_ring->state)) { srrctl |= IXGBE_RXBUFFER_3K >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; } else { srrctl |= IXGBE_RXBUFFER_2K >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; @@ -4558,7 +4558,7 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, * higher than the MTU of the PF. */ if (ring_uses_build_skb(ring) && - !test_bit(__IXGBE_RX_3K_BUFFER, &ring->state)) + !test_bit(__IXGBE_RX_3K_BUFFER, ring->state)) rxdctl |= IXGBE_MAX_2K_FRAME_BUILD_SKB | IXGBE_RXDCTL_RLPML_EN; #endif @@ -4733,27 +4733,27 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) rx_ring = adapter->rx_ring[i]; clear_ring_rsc_enabled(rx_ring); - clear_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state); - clear_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &rx_ring->state); + clear_bit(__IXGBE_RX_3K_BUFFER, rx_ring->state); + clear_bit(__IXGBE_RX_BUILD_SKB_ENABLED, rx_ring->state); if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) set_ring_rsc_enabled(rx_ring); - if (test_bit(__IXGBE_RX_FCOE, &rx_ring->state)) - set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state); + if (test_bit(__IXGBE_RX_FCOE, rx_ring->state)) + set_bit(__IXGBE_RX_3K_BUFFER, rx_ring->state); if (adapter->flags2 & IXGBE_FLAG2_RX_LEGACY) continue; - set_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &rx_ring->state); + set_bit(__IXGBE_RX_BUILD_SKB_ENABLED, rx_ring->state); #if (PAGE_SIZE < 8192) if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) - set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state); + set_bit(__IXGBE_RX_3K_BUFFER, rx_ring->state); if (IXGBE_2K_TOO_SMALL_WITH_PADDING || (max_frame > (ETH_FRAME_LEN + ETH_FCS_LEN))) - set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state); + set_bit(__IXGBE_RX_3K_BUFFER, rx_ring->state); #endif } } @@ -7944,10 +7944,10 @@ static void ixgbe_fdir_reinit_subtask(struct ixgbe_adapter *adapter) if (ixgbe_reinit_fdir_tables_82599(hw) == 0) { for (i = 0; i < adapter->num_tx_queues; i++) set_bit(__IXGBE_TX_FDIR_INIT_DONE, - &(adapter->tx_ring[i]->state)); + adapter->tx_ring[i]->state); for (i = 0; i < adapter->num_xdp_queues; i++) set_bit(__IXGBE_TX_FDIR_INIT_DONE, - &adapter->xdp_ring[i]->state); + adapter->xdp_ring[i]->state); /* re-enable flow director interrupts */ IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR); } else { @@ -9488,7 +9488,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, ixgbe_tx_csum(tx_ring, first, &ipsec_tx); /* add the ATR filter if ATR is on */ - if (test_bit(__IXGBE_TX_FDIR_INIT_DONE, &tx_ring->state)) + if (test_bit(__IXGBE_TX_FDIR_INIT_DONE, tx_ring->state)) ixgbe_atr(tx_ring, first); #ifdef IXGBE_FCOE @@ -9528,7 +9528,7 @@ static netdev_tx_t __ixgbe_xmit_frame(struct sk_buff *skb, return NETDEV_TX_OK; tx_ring = ring ? ring : adapter->tx_ring[skb_get_queue_mapping(skb)]; - if (unlikely(test_bit(__IXGBE_TX_DISABLED, &tx_ring->state))) + if (unlikely(test_bit(__IXGBE_TX_DISABLED, tx_ring->state))) return NETDEV_TX_BUSY; return ixgbe_xmit_frame_ring(skb, adapter, tx_ring); @@ -11013,7 +11013,7 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n, if (unlikely(!ring)) return -ENXIO; - if (unlikely(test_bit(__IXGBE_TX_DISABLED, &ring->state))) + if (unlikely(test_bit(__IXGBE_TX_DISABLED, ring->state))) return -ENXIO; if (static_branch_unlikely(&ixgbe_xdp_locking_key)) @@ -11119,7 +11119,7 @@ static void ixgbe_disable_txr_hw(struct ixgbe_adapter *adapter, static void ixgbe_disable_txr(struct ixgbe_adapter *adapter, struct ixgbe_ring *tx_ring) { - set_bit(__IXGBE_TX_DISABLED, &tx_ring->state); + set_bit(__IXGBE_TX_DISABLED, tx_ring->state); ixgbe_disable_txr_hw(adapter, tx_ring); } @@ -11273,9 +11273,9 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring) ixgbe_configure_tx_ring(adapter, xdp_ring); ixgbe_configure_rx_ring(adapter, rx_ring); - clear_bit(__IXGBE_TX_DISABLED, &tx_ring->state); + clear_bit(__IXGBE_TX_DISABLED, tx_ring->state); if (xdp_ring) - clear_bit(__IXGBE_TX_DISABLED, &xdp_ring->state); + clear_bit(__IXGBE_TX_DISABLED, xdp_ring->state); /* Rx/Tx/XDP Tx share the same napi context. */ napi_enable(&rx_ring->q_vector->napi); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 7b941505a9d0..89f96c463f02 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -524,7 +524,7 @@ int ixgbe_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) ring = adapter->xdp_ring[qid]; - if (test_bit(__IXGBE_TX_DISABLED, &ring->state)) + if (test_bit(__IXGBE_TX_DISABLED, ring->state)) return -ENETDOWN; if (!ring->xsk_pool) From e63f5918adb8efa7d9609585318db773ac7cd241 Mon Sep 17 00:00:00 2001 From: Tomasz Unger Date: Sun, 1 Mar 2026 13:12:54 +0100 Subject: [PATCH 0234/1561] NFC: pn544: i2c: Replace strcpy() with strscpy() Replace strcpy() with strscpy() which limits the copy to the size of the destination buffer. Since phy->firmware_name is an array, the two-argument variant of strscpy() is used - the compiler deduces the buffer size automatically. This is a defensive cleanup. As pointed out by Jakub Kicinski , firmware_name is already bounded to NFC_FIRMWARE_NAME_MAXSIZE via nla_strscpy() in net/nfc/netlink.c before reaching this driver, so no actual buffer overflow is possible. Signed-off-by: Tomasz Unger Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260301121254.174354-1-tomasz.unger@yahoo.pl Signed-off-by: Jakub Kicinski --- drivers/nfc/pn544/i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c index a0dfb3f98d5a..b31b5bef7187 100644 --- a/drivers/nfc/pn544/i2c.c +++ b/drivers/nfc/pn544/i2c.c @@ -526,7 +526,7 @@ static int pn544_hci_i2c_fw_download(void *phy_id, const char *firmware_name, pr_info("Starting Firmware Download (%s)\n", firmware_name); - strcpy(phy->firmware_name, firmware_name); + strscpy(phy->firmware_name, firmware_name); phy->hw_variant = hw_variant; phy->fw_work_state = FW_WORK_STATE_START; From 66e807f96f4e895283ac27ebd0e2513d7c8da557 Mon Sep 17 00:00:00 2001 From: Tomasz Unger Date: Sun, 1 Mar 2026 14:56:33 +0100 Subject: [PATCH 0235/1561] NFC: nxp-nci: Replace strcpy() with strscpy() Replace strcpy() with strscpy() which limits the copy to the size of the destination buffer. Since fw_info->name is an array, the two-argument variant of strscpy() is used - the compiler deduces the buffer size automatically. This is a defensive cleanup replacing the deprecated strcpy() with the preferred strscpy(). Signed-off-by: Tomasz Unger Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260301135633.214497-1-tomasz.unger@yahoo.pl Signed-off-by: Jakub Kicinski --- drivers/nfc/nxp-nci/firmware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/nxp-nci/firmware.c b/drivers/nfc/nxp-nci/firmware.c index 381b5bb75477..a9533977aff8 100644 --- a/drivers/nfc/nxp-nci/firmware.c +++ b/drivers/nfc/nxp-nci/firmware.c @@ -211,7 +211,7 @@ int nxp_nci_fw_download(struct nci_dev *ndev, const char *firmware_name) goto fw_download_exit; } - strcpy(fw_info->name, firmware_name); + strscpy(fw_info->name, firmware_name); r = request_firmware(&fw_info->fw, firmware_name, ndev->nfc_dev->dev.parent); From c49a9eb650d5b6c9a19901e9055ad4e0a0d2386e Mon Sep 17 00:00:00 2001 From: Tomasz Unger Date: Sun, 1 Mar 2026 15:43:45 +0100 Subject: [PATCH 0236/1561] NFC: nfcmrvl: Replace strcpy() with strscpy() Replace strcpy() with strscpy() which limits the copy to the size of the destination buffer. Since fw_dnld->name is an array, the two-argument variant of strscpy() is used - the compiler deduces the buffer size automatically. This is a defensive cleanup replacing the deprecated strcpy() with the preferred strscpy(). Signed-off-by: Tomasz Unger Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260301144345.218628-1-tomasz.unger@yahoo.pl Signed-off-by: Jakub Kicinski --- drivers/nfc/nfcmrvl/fw_dnld.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/nfcmrvl/fw_dnld.c b/drivers/nfc/nfcmrvl/fw_dnld.c index a9b03dcc4100..2b8f401d8fd7 100644 --- a/drivers/nfc/nfcmrvl/fw_dnld.c +++ b/drivers/nfc/nfcmrvl/fw_dnld.c @@ -492,7 +492,7 @@ int nfcmrvl_fw_dnld_start(struct nci_dev *ndev, const char *firmware_name) if (!firmware_name || !firmware_name[0]) return -EINVAL; - strcpy(fw_dnld->name, firmware_name); + strscpy(fw_dnld->name, firmware_name); /* * Retrieve FW binary file and parse it to initialize FW download From 8ce185c7e00dbddfdea026acd5dcf7b122af3db0 Mon Sep 17 00:00:00 2001 From: Tomasz Unger Date: Mon, 2 Mar 2026 11:09:08 +0100 Subject: [PATCH 0237/1561] NFC: s3fwrn5: Replace strcpy() with strscpy() Replace strcpy() with strscpy() which limits the copy to the size of the destination buffer. Since fw_info->fw_name is an array with a fixed, declared size, the two-argument variant of strscpy() is used - the compiler deduces the buffer size automatically. This is a defensive cleanup replacing the deprecated strcpy() with the preferred strscpy(). Signed-off-by: Tomasz Unger Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20260302100908.26399-1-tomasz.unger@yahoo.pl Signed-off-by: Jakub Kicinski --- drivers/nfc/s3fwrn5/firmware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c index 64d61b2a715a..9145deec7f6c 100644 --- a/drivers/nfc/s3fwrn5/firmware.c +++ b/drivers/nfc/s3fwrn5/firmware.c @@ -454,7 +454,7 @@ void s3fwrn5_fw_init(struct s3fwrn5_fw_info *fw_info, const char *fw_name) fw_info->parity = 0x00; fw_info->rsp = NULL; fw_info->fw.fw = NULL; - strcpy(fw_info->fw_name, fw_name); + strscpy(fw_info->fw_name, fw_name); init_completion(&fw_info->completion); } From 39feb171f361f887dad8504dc5822b852871ac21 Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Fri, 20 Feb 2026 11:09:17 +0000 Subject: [PATCH 0238/1561] net: core: allow netdev_upper_get_next_dev_rcu from bh context Since XDP programs are called from a NAPI poll context, the RCU reference liveness is ensured by local_bh_disable(). Commit aeea1b86f936 ("bpf, devmap: Exclude XDP broadcast to master device") started to call netdev_upper_get_next_dev_rcu() from this context, but missed adding rcu_read_lock_bh_held() as a condition to the RCU checks. While both bh_disabled and rcu_read_lock() provide RCU protection, lockdep complains since the check condition is insufficient [1]. Add rcu_read_lock_bh_held() as condition to help lockdep to understand the dereference is safe, in the same way as commit 694cea395fde ("bpf: Allow RCU-protected lookups to happen from bh context"). [1] WARNING: net/core/dev.c:8099 at netdev_upper_get_next_dev_rcu+0x96/0xd0, CPU#0: swapper/0/0 ... RIP: 0010:netdev_upper_get_next_dev_rcu+0x96/0xd0 ... dev_map_enqueue_multi+0x411/0x970 xdp_do_redirect+0xdf2/0x1030 __igc_xdp_run_prog+0x6a0/0xc80 igc_poll+0x34b0/0x70b0 __napi_poll.constprop.0+0x98/0x490 net_rx_action+0x8f2/0xfa0 handle_softirqs+0x1c7/0x710 __irq_exit_rcu+0xb1/0xf0 irq_exit_rcu+0x9/0x20 common_interrupt+0x7f/0x90 Signed-off-by: Kohei Enju Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/20260220110922.94781-1-kohei@enjuk.jp Signed-off-by: Jakub Kicinski --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 1cf3ad840697..19b84eaa2643 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8113,7 +8113,8 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, { struct netdev_adjacent *upper; - WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held() && + !lockdep_rtnl_is_held()); upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); From acd338ba2f3a33d68aa05f406407fbdf6adccfee Mon Sep 17 00:00:00 2001 From: Sean Chang Date: Mon, 2 Mar 2026 22:29:31 +0800 Subject: [PATCH 0239/1561] net: macb: use ethtool_sprintf to fill ethtool stats strings The RISC-V toolchain triggers a stringop-truncation warning when using snprintf() with a fixed ETH_GSTRING_LEN (32 bytes) buffer. Convert the driver to use the modern ethtool_sprintf() API from linux/ethtool.h. This removes the need for manual snprintf() and memcpy() calls, handles the 32-byte padding automatically, and simplifies the logic by removing manual pointer arithmetic. Suggested-by: Andrew Lunn Reviewed-by: Andrew Lunn Reviewed-by: Andy Shevchenko Signed-off-by: Sean Chang Link: https://patch.msgid.link/20260302142931.49108-1-seanwascoding@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 02eab26fd98b..17f0ad3d7a09 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3161,7 +3161,6 @@ static int gem_get_sset_count(struct net_device *dev, int sset) static void gem_get_ethtool_strings(struct net_device *dev, u32 sset, u8 *p) { - char stat_string[ETH_GSTRING_LEN]; struct macb *bp = netdev_priv(dev); struct macb_queue *queue; unsigned int i; @@ -3174,11 +3173,8 @@ static void gem_get_ethtool_strings(struct net_device *dev, u32 sset, u8 *p) ETH_GSTRING_LEN); for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { - for (i = 0; i < QUEUE_STATS_LEN; i++, p += ETH_GSTRING_LEN) { - snprintf(stat_string, ETH_GSTRING_LEN, "q%d_%s", - q, queue_statistics[i].stat_string); - memcpy(p, stat_string, ETH_GSTRING_LEN); - } + for (i = 0; i < QUEUE_STATS_LEN; i++) + ethtool_sprintf(&p, "q%u_%s", q, queue_statistics[i].stat_string); } break; } From ca4c7771a059fb2665fecc7d5954672d09475089 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 2 Mar 2026 16:58:43 +0100 Subject: [PATCH 0240/1561] dt-bindings: sram: qcom,imem: Allow modem-tables subnode The IP Accelerator hardware/firmware owns a sizeable region within the IMEM, named 'modem-tables', containing various packet processing configuration data. It's not actually accessed by the OS, although we have to IOMMU-map it with the IPA device, so that presumably the firmware can act upon it. Allow it as a subnode of IMEM. Reviewed-by: Krzysztof Kozlowski Reviewed-by: Alex Elder Signed-off-by: Konrad Dybcio Link: https://patch.msgid.link/20260302-topic-ipa_imem-v6-1-c0ebbf3eae9f@oss.qualcomm.com Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/sram/qcom,imem.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Documentation/devicetree/bindings/sram/qcom,imem.yaml b/Documentation/devicetree/bindings/sram/qcom,imem.yaml index 6a627c57ae2f..c63026904061 100644 --- a/Documentation/devicetree/bindings/sram/qcom,imem.yaml +++ b/Documentation/devicetree/bindings/sram/qcom,imem.yaml @@ -67,6 +67,20 @@ properties: $ref: /schemas/power/reset/syscon-reboot-mode.yaml# patternProperties: + "^modem-tables@[0-9a-f]+$": + type: object + description: + Region containing packet processing configuration for the IP Accelerator. + + properties: + reg: + maxItems: 1 + + required: + - reg + + additionalProperties: false + "^pil-reloc@[0-9a-f]+$": $ref: /schemas/remoteproc/qcom,pil-info.yaml# description: Peripheral image loader relocation region From f5a598abfdd9dc73a9537b9628d4f26a3069c707 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 2 Mar 2026 16:58:44 +0100 Subject: [PATCH 0241/1561] dt-bindings: net: qcom,ipa: Add sram property for describing IMEM slice The IPA driver currently grabs a slice of IMEM through hardcoded addresses. Not only is that ugly and against the principles of DT, but it also creates a situation where two distinct platforms implementing the same version of IPA would need to be hardcoded together and matched at runtime. Instead, do the sane thing and accept a handle to said region directly. Don't make it required on purpose, as it's not there on ancient implementations (currently unsupported) and we're not yet done with filling the data across al DTs. Reviewed-by: Alex Elder Reviewed-by: Krzysztof Kozlowski Signed-off-by: Konrad Dybcio Link: https://patch.msgid.link/20260302-topic-ipa_imem-v6-2-c0ebbf3eae9f@oss.qualcomm.com Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/qcom,ipa.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml index c7f5f2ef7452..4237e74041ef 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml @@ -165,6 +165,13 @@ properties: initializing IPA hardware. Optional, and only used when Trust Zone performs early initialization. + sram: + maxItems: 1 + description: + A reference to an additional region residing in IMEM (special + on-chip SRAM), which is accessed by the IPA firmware and needs + to be IOMMU-mapped from the OS. + required: - compatible - iommus From 6f82cb4ecdb4f23b81d7f71e31d17a55857c8d74 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 2 Mar 2026 16:58:45 +0100 Subject: [PATCH 0242/1561] net: ipa: Grab IMEM slice base/size from DTS This is a detail that differ per chip, and not per IPA version (and there are cases of the same IPA versions being implemented across very very very different SoCs). This region isn't actually used by the driver, but we most definitely want to iommu-map it, so that IPA can poke at the data within. Reviewed-by: Alex Elder Acked-by: Dmitry Baryshkov Reviewed-by: Simon Horman Signed-off-by: Konrad Dybcio Link: https://patch.msgid.link/20260302-topic-ipa_imem-v6-3-c0ebbf3eae9f@oss.qualcomm.com Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_data.h | 9 +++++++-- drivers/net/ipa/ipa_mem.c | 24 +++++++++++++++++++++++- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h index 2fd03f0799b2..f3bdc64cef05 100644 --- a/drivers/net/ipa/ipa_data.h +++ b/drivers/net/ipa/ipa_data.h @@ -185,8 +185,13 @@ struct ipa_resource_data { struct ipa_mem_data { u32 local_count; const struct ipa_mem *local; - u32 imem_addr; - u32 imem_size; + + /* These values are now passed via DT, but to support + * older systems we must allow this to be specified here. + */ + u32 imem_addr; /* DEPRECATED */ + u32 imem_size; /* DEPRECATED */ + u32 smem_size; }; diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c index 835a3c9c1fd4..078d32a18dbf 100644 --- a/drivers/net/ipa/ipa_mem.c +++ b/drivers/net/ipa/ipa_mem.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -617,7 +618,9 @@ static void ipa_smem_exit(struct ipa *ipa) int ipa_mem_init(struct ipa *ipa, struct platform_device *pdev, const struct ipa_mem_data *mem_data) { + struct device_node *ipa_slice_np; struct device *dev = &pdev->dev; + u32 imem_base, imem_size; struct resource *res; int ret; @@ -656,7 +659,26 @@ int ipa_mem_init(struct ipa *ipa, struct platform_device *pdev, ipa->mem_addr = res->start; ipa->mem_size = resource_size(res); - ret = ipa_imem_init(ipa, mem_data->imem_addr, mem_data->imem_size); + ipa_slice_np = of_parse_phandle(dev->of_node, "sram", 0); + if (ipa_slice_np) { + struct resource sram_res; + + ret = of_address_to_resource(ipa_slice_np, 0, &sram_res); + of_node_put(ipa_slice_np); + if (ret) + goto err_unmap; + + imem_base = sram_res.start; + imem_size = resource_size(&sram_res); + } else { + /* Backwards compatibility for DTs lacking + * an explicit reference + */ + imem_base = mem_data->imem_addr; + imem_size = mem_data->imem_size; + } + + ret = ipa_imem_init(ipa, imem_base, imem_size); if (ret) goto err_unmap; From dfa77c0dd4ab267d3f1160617c95eab80181a76f Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 2 Mar 2026 06:40:39 -0800 Subject: [PATCH 0243/1561] selftests: netconsole: print diagnostic on busywait timeout in netcons_basic The script uses set -euo pipefail, so when busywait times out waiting for the netconsole message to arrive, it returns 1 and the script exits immediately without printing any error message. As reported by Jakub, this makes failures hard to diagnose since the test reports exit=1 with no explanation. Handle the busywait failure explicitly so that a FAIL message is printed before exiting. This is how it looks like now: Running with target mode: basic (ipv6) [ 167.452561] netconsole selftest: netcons_QdMay FAIL: Timed out waiting (20000 ms) for netconsole message in /tmp/netcons_QdMay The remaining silent failures under set -e can only happen during the setup phase (netdevsim creation, interface configuration, configfs writes). So, it is not expected to have any silent failure once the test starts. Note that this issue might be less frequent now, since commit a68a9bd086c28 ("selftests: netconsole: Increase port listening timeout") increased the timeout that _might_ have been the root cause of these random failures in NIPA. Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20260302-netconsole_test_verbose-v1-1-b1be5d30cd7d@debian.org Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/netconsole/netcons_basic.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/netconsole/netcons_basic.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_basic.sh index 59cf10013ecd..7976206523b2 100755 --- a/tools/testing/selftests/drivers/net/netconsole/netcons_basic.sh +++ b/tools/testing/selftests/drivers/net/netconsole/netcons_basic.sh @@ -58,7 +58,11 @@ do # Send the message echo "${MSG}: ${TARGET}" > /dev/kmsg # Wait until socat saves the file to disk - busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + if ! busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + then + echo "FAIL: Timed out waiting (${BUSYWAIT_TIMEOUT} ms) for netconsole message in ${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi # Make sure the message was received in the dst part # and exit From d6ca199568c53ece99aeeae1022d04f2fd8cde7f Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmad Date: Mon, 2 Mar 2026 11:43:17 +0500 Subject: [PATCH 0244/1561] net: core: failover: enforce mandatory ops and clean up redundant checks The failover framework requires 'ops' to be functional. Currently, failover_register() allows an instance to be registered with NULL ops, which leads to inconsistent NULL checks and potential NULL pointer dereferences in the slave registration paths. Harden the entry point by requiring non-NULL ops in failover_register(). This ensures the 'fops' pointer is guaranteed to be valid for any successfully registered failover instance. Consequently, remove the now redundant NULL checks for 'fops' throughout the module to simplify the logic. Signed-off-by: Zeeshan Ahmad Link: https://patch.msgid.link/20260302064317.9964-1-zeeshanahmad022019@gmail.com Signed-off-by: Jakub Kicinski --- net/core/failover.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/core/failover.c b/net/core/failover.c index 0eb2e0ec875b..11bb183c7a1b 100644 --- a/net/core/failover.c +++ b/net/core/failover.c @@ -59,7 +59,7 @@ static int failover_slave_register(struct net_device *slave_dev) if (!failover_dev) goto done; - if (fops && fops->slave_pre_register && + if (fops->slave_pre_register && fops->slave_pre_register(slave_dev, failover_dev)) goto done; @@ -82,7 +82,7 @@ static int failover_slave_register(struct net_device *slave_dev) slave_dev->priv_flags |= (IFF_FAILOVER_SLAVE | IFF_NO_ADDRCONF); - if (fops && fops->slave_register && + if (fops->slave_register && !fops->slave_register(slave_dev, failover_dev)) return NOTIFY_OK; @@ -115,7 +115,7 @@ int failover_slave_unregister(struct net_device *slave_dev) if (!failover_dev) goto done; - if (fops && fops->slave_pre_unregister && + if (fops->slave_pre_unregister && fops->slave_pre_unregister(slave_dev, failover_dev)) goto done; @@ -123,7 +123,7 @@ int failover_slave_unregister(struct net_device *slave_dev) netdev_upper_dev_unlink(slave_dev, failover_dev); slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_NO_ADDRCONF); - if (fops && fops->slave_unregister && + if (fops->slave_unregister && !fops->slave_unregister(slave_dev, failover_dev)) return NOTIFY_OK; @@ -149,7 +149,7 @@ static int failover_slave_link_change(struct net_device *slave_dev) if (!netif_running(failover_dev)) goto done; - if (fops && fops->slave_link_change && + if (fops->slave_link_change && !fops->slave_link_change(slave_dev, failover_dev)) return NOTIFY_OK; @@ -174,7 +174,7 @@ static int failover_slave_name_change(struct net_device *slave_dev) if (!netif_running(failover_dev)) goto done; - if (fops && fops->slave_name_change && + if (fops->slave_name_change && !fops->slave_name_change(slave_dev, failover_dev)) return NOTIFY_OK; @@ -244,7 +244,7 @@ struct failover *failover_register(struct net_device *dev, { struct failover *failover; - if (dev->type != ARPHRD_ETHER) + if (dev->type != ARPHRD_ETHER || !ops) return ERR_PTR(-EINVAL); failover = kzalloc_obj(*failover); From b52363f706e53101d9f8c5ba5e3854d6be8f122c Mon Sep 17 00:00:00 2001 From: Kibaek Yoo Date: Sat, 28 Feb 2026 16:16:12 +0900 Subject: [PATCH 0245/1561] net: macvlan: support multicast rx for bridge ports with shared source MAC Macvlan bridge mode currently does not handle the case where an external source shares its MAC address with a local macvlan interface. When such a frame arrives, macvlan_hash_lookup() matches the source MAC to the local macvlan, and macvlan_multicast_rx() assumes bridge ports already received the frame during local transmission. Since the frame actually originated externally, bridge ports never saw it. This situation arises with protocols like VRRP, where multiple hosts use the same virtual MAC address. Support this by passing NULL as the source device and including MACVLAN_MODE_BRIDGE in the mode mask for the else branch of macvlan_multicast_rx(). This ensures all VEPA and bridge mode macvlan interfaces receive incoming multicast regardless of source MAC matching. The trade-off is that looped-back locally-originated multicasts may be delivered to bridge ports a second time, but multicast consumers already handle duplicate frames. Signed-off-by: Kibaek Yoo Link: https://patch.msgid.link/20260228071613.4360-1-psykibaek@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/macvlan.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index a71f058eceef..ea22909cb09d 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -313,11 +313,15 @@ static void macvlan_multicast_rx(const struct macvlan_port *port, MACVLAN_MODE_BRIDGE); else /* - * flood only to VEPA ports, bridge ports - * already saw the frame on the way out. + * Flood to VEPA and bridge ports. We cannot distinguish + * a looped-back locally-originated multicast from one + * sent by an external source sharing the same source MAC + * (e.g., VRRP virtual MAC), so deliver to bridge ports + * as well to ensure correct reception in all cases. */ - macvlan_broadcast(skb, port, src->dev, - MACVLAN_MODE_VEPA); + macvlan_broadcast(skb, port, NULL, + MACVLAN_MODE_VEPA | + MACVLAN_MODE_BRIDGE); } static void macvlan_process_broadcast(struct work_struct *w) From 4ad96a7c9e2cebbbdc68369438a736a133539f1d Mon Sep 17 00:00:00 2001 From: Kibaek Yoo Date: Sat, 28 Feb 2026 16:16:13 +0900 Subject: [PATCH 0246/1561] selftests: net: add macvlan multicast test for shared source MAC Add a selftest that verifies multicast delivery to a macvlan bridge port when the source MAC of the incoming frame matches the macvlan's own MAC address. This scenario occurs with protocols like VRRP where multiple hosts share the same virtual MAC address. Without the corresponding kernel change, macvlan bridge mode does not handle this case and the multicast frame is not delivered. Signed-off-by: Kibaek Yoo Link: https://patch.msgid.link/20260228071613.4360-2-psykibaek@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + .../selftests/net/macvlan_mcast_shared_mac.sh | 93 +++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100755 tools/testing/selftests/net/macvlan_mcast_shared_mac.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index ede107fa6ea8..997056697ec8 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -55,6 +55,7 @@ TEST_PROGS := \ l2tp.sh \ link_netns.py \ lwt_dst_cache_ref_loop.sh \ + macvlan_mcast_shared_mac.sh \ msg_zerocopy.sh \ nat6to4.sh \ ndisc_unsolicited_na_test.sh \ diff --git a/tools/testing/selftests/net/macvlan_mcast_shared_mac.sh b/tools/testing/selftests/net/macvlan_mcast_shared_mac.sh new file mode 100755 index 000000000000..ff5b89347247 --- /dev/null +++ b/tools/testing/selftests/net/macvlan_mcast_shared_mac.sh @@ -0,0 +1,93 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test multicast delivery to macvlan bridge ports when the source MAC +# matches the macvlan's own MAC address (e.g., VRRP virtual MAC shared +# across multiple hosts). +# +# Topology: +# +# NS_SRC NS_BRIDGE +# veth_src (SHARED_MAC) <-----> veth_dst +# | +# +-- macvlan0 (bridge mode, SHARED_MAC) +# +# A multicast packet sent from NS_SRC with source MAC equal to +# macvlan0's MAC must still be delivered to macvlan0. + +source lib.sh + +SHARED_MAC="00:00:5e:00:01:01" +MCAST_ADDR="239.0.0.1" + +setup() { + setup_ns NS_SRC NS_BRIDGE + + ip -net "${NS_BRIDGE}" link add veth_dst type veth \ + peer name veth_src netns "${NS_SRC}" + + ip -net "${NS_SRC}" link set veth_src address "${SHARED_MAC}" + ip -net "${NS_SRC}" link set veth_src up + ip -net "${NS_SRC}" addr add 192.168.1.1/24 dev veth_src + + ip -net "${NS_BRIDGE}" link set veth_dst up + + ip -net "${NS_BRIDGE}" link add macvlan0 link veth_dst \ + type macvlan mode bridge + ip -net "${NS_BRIDGE}" link set macvlan0 address "${SHARED_MAC}" + ip -net "${NS_BRIDGE}" link set macvlan0 up + ip -net "${NS_BRIDGE}" addr add 192.168.1.2/24 dev macvlan0 + + # Accept all multicast so the mc_filter passes for any group. + ip -net "${NS_BRIDGE}" link set macvlan0 allmulticast on +} + +cleanup() { + rm -f "${CAPFILE}" "${CAPOUT}" + cleanup_ns "${NS_SRC}" "${NS_BRIDGE}" +} + +test_macvlan_mcast_shared_mac() { + CAPFILE=$(mktemp) + CAPOUT=$(mktemp) + + echo "Testing multicast delivery to macvlan with shared source MAC" + + # Listen for one ICMP packet on macvlan0. + timeout 5s ip netns exec "${NS_BRIDGE}" \ + tcpdump -i macvlan0 -c 1 -w "${CAPFILE}" icmp &> "${CAPOUT}" & + local pid=$! + if ! slowwait 1 grep -qs "listening" "${CAPOUT}"; then + echo "[FAIL] tcpdump did not start listening" + return "${ksft_fail}" + fi + + # Send multicast ping from NS_SRC; source MAC equals macvlan0's MAC. + ip netns exec "${NS_SRC}" \ + ping -W 0.1 -c 3 -I veth_src "${MCAST_ADDR}" &> /dev/null + + wait "${pid}" + + local count + count=$(tcpdump -r "${CAPFILE}" 2>/dev/null | wc -l) + if [[ "${count}" -ge 1 ]]; then + echo "[ OK ]" + return "${ksft_pass}" + else + echo "[FAIL] expected at least 1 ICMP packet on macvlan0," \ + "got ${count}" + return "${ksft_fail}" + fi +} + +if [ ! -x "$(command -v tcpdump)" ]; then + echo "SKIP: Could not run test without tcpdump tool" + exit "${ksft_skip}" +fi + +trap cleanup EXIT + +setup +test_macvlan_mcast_shared_mac + +exit $? From 08d7d4cf2570428c695d63c018d3be897caf5be7 Mon Sep 17 00:00:00 2001 From: Jori Koolstra Date: Tue, 3 Mar 2026 17:59:37 +0100 Subject: [PATCH 0247/1561] wifi: mac80211_hwsim: change hwsim_class to a const struct The class_create() call has been deprecated in favor of class_register() as the driver core now allows for a struct class to be in read-only memory. Change hwsim_class to be a const struct class and drop the class_create() call. Suggested-by: Greg Kroah-Hartman Signed-off-by: Jori Koolstra Link: https://patch.msgid.link/20260303165938.3773998-1-jkoolstra@xs4all.nl Signed-off-by: Johannes Berg --- drivers/net/wireless/virtual/mac80211_hwsim.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index 475918ee8132..32897d88bda3 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -337,7 +337,9 @@ static inline void hwsim_net_set_wmediumd(struct net *net, u32 portid) hwsim_net->wmediumd = portid; } -static struct class *hwsim_class; +static const struct class hwsim_class = { + .name = "mac80211_hwsim" +}; static struct net_device *hwsim_mon; /* global monitor netdev */ @@ -5524,7 +5526,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, data = hw->priv; data->hw = hw; - data->dev = device_create(hwsim_class, NULL, 0, hw, "hwsim%d", idx); + data->dev = device_create(&hwsim_class, NULL, 0, hw, "hwsim%d", idx); if (IS_ERR(data->dev)) { printk(KERN_DEBUG "mac80211_hwsim: device_create failed (%ld)\n", @@ -6097,7 +6099,7 @@ static void mac80211_hwsim_free(void) spin_lock_bh(&hwsim_radio_lock); } spin_unlock_bh(&hwsim_radio_lock); - class_destroy(hwsim_class); + class_unregister(&hwsim_class); } static const struct net_device_ops hwsim_netdev_ops = { @@ -7205,11 +7207,9 @@ static int __init init_mac80211_hwsim(void) if (err) goto out_exit_netlink; - hwsim_class = class_create("mac80211_hwsim"); - if (IS_ERR(hwsim_class)) { - err = PTR_ERR(hwsim_class); + err = class_register(&hwsim_class); + if (err) goto out_exit_virtio; - } hwsim_init_s1g_channels(hwsim_channels_s1g); From 7a135bf9903fe599aec1825ebc2c5b026fe1e38c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 1 Dec 2025 19:47:13 +0100 Subject: [PATCH 0248/1561] ipv6: export fib6_lookup for nft_fib_ipv6 Upcoming patch will call fib6_lookup from nft_fib_ipv6. The EXPORT_SYMBOL is added twice because there are two implementations of the function, one is a small stub for MULTIPLE_TABLES=n, only one is compiled into the kernel depending on .config settings. Alternative to EXPORT_SYMBOL is to use an indirect call via the ipv6_stub->fib6_lookup() indirection, but thats more expensive than the direct call. Also, nft_fib_ipv6 cannot be builtin if ipv6 is a module. Signed-off-by: Florian Westphal --- net/ipv6/fib6_rules.c | 3 +++ net/ipv6/ip6_fib.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index fd5f7112a51f..e1b2b4fa6e18 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -92,6 +92,9 @@ int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, return err; } +#if IS_MODULE(CONFIG_NFT_FIB_IPV6) +EXPORT_SYMBOL_GPL(fib6_lookup); +#endif struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, const struct sk_buff *skb, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 9058e71241dc..a6e58a435735 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -342,6 +342,9 @@ int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6, res, flags); } +#if IS_MODULE(CONFIG_NFT_FIB_IPV6) +EXPORT_SYMBOL_GPL(fib6_lookup); +#endif static void __net_init fib6_tables_init(struct net *net) { From 831fb31b76aea1453229dfd7cbd1946ffe1c03b5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 24 Feb 2026 21:09:31 +0100 Subject: [PATCH 0249/1561] ipv6: make ipv6_anycast_destination logic usable without dst_entry nft_fib_ipv6 uses ipv6_anycast_destination(), but upcoming patch removes the dst_entry usage in favor of fib6_result. Move the 'plen > 127' logic to a new helper and call it from the existing one. Signed-off-by: Florian Westphal --- include/net/ip6_route.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index a55f9bf95fe3..0c8eeb6abe7a 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -252,15 +252,22 @@ static inline bool ipv6_unicast_destination(const struct sk_buff *skb) return rt->rt6i_flags & RTF_LOCAL; } +static inline bool __ipv6_anycast_destination(const struct rt6key *rt6i_dst, + u32 rt6i_flags, + const struct in6_addr *daddr) +{ + return rt6i_flags & RTF_ANYCAST || + (rt6i_dst->plen < 127 && + !(rt6i_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) && + ipv6_addr_equal(&rt6i_dst->addr, daddr)); +} + static inline bool ipv6_anycast_destination(const struct dst_entry *dst, const struct in6_addr *daddr) { const struct rt6_info *rt = dst_rt6_info(dst); - return rt->rt6i_flags & RTF_ANYCAST || - (rt->rt6i_dst.plen < 127 && - !(rt->rt6i_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) && - ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)); + return __ipv6_anycast_destination(&rt->rt6i_dst, rt->rt6i_flags, daddr); } int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, From 1c32b24c234ba88a969e6bd9c385e66212a4af15 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 22 Nov 2025 19:24:49 +0100 Subject: [PATCH 0250/1561] netfilter: nft_fib_ipv6: switch to fib6_lookup Existing code works but it requires a temporary dst object that is released again right away. Switch to fib6_lookup + RT6_LOOKUP_F_DST_NOREF: no need for temporary dst objects and refcount overhead anymore. Provides ~13% improvement in match performance. Signed-off-by: Florian Westphal --- net/ipv6/netfilter/nft_fib_ipv6.c | 79 +++++++++++++++++++------------ 1 file changed, 49 insertions(+), 30 deletions(-) diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index dc375b725b28..8b2dba88ee96 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -52,7 +52,13 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv, fl6->flowlabel = (*(__be32 *)iph) & IPV6_FLOWINFO_MASK; fl6->flowi6_l3mdev = nft_fib_l3mdev_master_ifindex_rcu(pkt, dev); - return lookup_flags; + return lookup_flags | RT6_LOOKUP_F_DST_NOREF; +} + +static int nft_fib6_lookup(struct net *net, struct flowi6 *fl6, + struct fib6_result *res, int flags) +{ + return fib6_lookup(net, fl6->flowi6_oif, fl6, res, flags); } static u32 __nft_fib6_eval_type(const struct nft_fib *priv, @@ -60,13 +66,14 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, struct ipv6hdr *iph) { const struct net_device *dev = NULL; + struct fib6_result res = {}; int route_err, addrtype; - struct rt6_info *rt; struct flowi6 fl6 = { .flowi6_iif = LOOPBACK_IFINDEX, .flowi6_proto = pkt->tprot, .flowi6_uid = sock_net_uid(nft_net(pkt), NULL), }; + int lookup_flags; u32 ret = 0; if (priv->flags & NFTA_FIB_F_IIF) @@ -74,29 +81,23 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, else if (priv->flags & NFTA_FIB_F_OIF) dev = nft_out(pkt); - nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph); + lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph); if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) ret = RTN_LOCAL; - route_err = nf_ip6_route(nft_net(pkt), (struct dst_entry **)&rt, - flowi6_to_flowi(&fl6), false); + route_err = nft_fib6_lookup(nft_net(pkt), &fl6, &res, lookup_flags); if (route_err) goto err; - if (rt->rt6i_flags & RTF_REJECT) { - route_err = rt->dst.error; - dst_release(&rt->dst); - goto err; - } + if (res.fib6_flags & RTF_REJECT) + return res.fib6_type; - if (ipv6_anycast_destination((struct dst_entry *)rt, &fl6.daddr)) + if (__ipv6_anycast_destination(&res.f6i->fib6_dst, res.fib6_flags, &fl6.daddr)) ret = RTN_ANYCAST; - else if (!dev && rt->rt6i_flags & RTF_LOCAL) + else if (!dev && res.fib6_flags & RTF_LOCAL) ret = RTN_LOCAL; - dst_release(&rt->dst); - if (ret) return ret; @@ -152,6 +153,33 @@ static bool nft_fib_v6_skip_icmpv6(const struct sk_buff *skb, u8 next, const str return ipv6_addr_type(&iph->daddr) & IPV6_ADDR_LINKLOCAL; } +static bool nft_fib6_info_nh_dev_match(const struct net_device *nh_dev, + const struct net_device *dev) +{ + return nh_dev == dev || + l3mdev_master_ifindex_rcu(nh_dev) == dev->ifindex; +} + +static bool nft_fib6_info_nh_uses_dev(struct fib6_info *rt, + const struct net_device *dev) +{ + const struct net_device *nh_dev; + struct fib6_info *iter; + + nh_dev = fib6_info_nh_dev(rt); + if (nft_fib6_info_nh_dev_match(nh_dev, dev)) + return true; + + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { + nh_dev = fib6_info_nh_dev(iter); + + if (nft_fib6_info_nh_dev_match(nh_dev, dev)) + return true; + } + + return false; +} + void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { @@ -160,14 +188,14 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct net_device *found = NULL; const struct net_device *oif = NULL; u32 *dest = ®s->data[priv->dreg]; + struct fib6_result res = {}; struct ipv6hdr *iph, _iph; struct flowi6 fl6 = { .flowi6_iif = LOOPBACK_IFINDEX, .flowi6_proto = pkt->tprot, .flowi6_uid = sock_net_uid(nft_net(pkt), NULL), }; - struct rt6_info *rt; - int lookup_flags; + int lookup_flags, ret; if (nft_fib_can_skip(pkt)) { nft_fib_store_result(dest, priv, nft_in(pkt)); @@ -193,26 +221,17 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph); *dest = 0; - rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb, - lookup_flags); - if (rt->dst.error) - goto put_rt_err; - - /* Should not see RTF_LOCAL here */ - if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL)) - goto put_rt_err; + ret = nft_fib6_lookup(nft_net(pkt), &fl6, &res, lookup_flags); + if (ret || res.fib6_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL)) + return; if (!oif) { - found = rt->rt6i_idev->dev; + found = fib6_info_nh_dev(res.f6i); } else { - if (oif == rt->rt6i_idev->dev || - l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == oif->ifindex) + if (nft_fib6_info_nh_uses_dev(res.f6i, oif)) found = oif; } - nft_fib_store_result(dest, priv, found); - put_rt_err: - ip6_rt_put(rt); } EXPORT_SYMBOL_GPL(nft_fib6_eval); From 5663ac3e548163183c4354e75f728f7b34ffe040 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 Feb 2026 13:20:19 +0000 Subject: [PATCH 0251/1561] netfilter: nf_log_syslog: no longer acquire sk_callback_lock in nf_log_dump_sk_uid_gid() After commit 983512f3a87f ("net: Drop the lock in skb_may_tx_timestamp()") from Sebastian Andrzej Siewior, apply the same logic in nf_log_dump_sk_uid_gid() to avoid touching sk_callback_lock. Signed-off-by: Eric Dumazet Signed-off-by: Florian Westphal --- net/netfilter/nf_log_syslog.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c index 41503847d9d7..0507d67cad27 100644 --- a/net/netfilter/nf_log_syslog.c +++ b/net/netfilter/nf_log_syslog.c @@ -165,18 +165,26 @@ static struct nf_logger nf_arp_logger __read_mostly = { static void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m, struct sock *sk) { + const struct socket *sock; + const struct file *file; + if (!sk || !sk_fullsock(sk) || !net_eq(net, sock_net(sk))) return; - read_lock_bh(&sk->sk_callback_lock); - if (sk->sk_socket && sk->sk_socket->file) { - const struct cred *cred = sk->sk_socket->file->f_cred; + /* The sk pointer remains valid as long as the skb is. The sk_socket and + * file pointer may become NULL if the socket is closed. Both structures + * (including file->cred) are RCU freed which means they can be accessed + * within a RCU read section. + */ + sock = READ_ONCE(sk->sk_socket); + file = sock ? READ_ONCE(sock->file) : NULL; + if (file) { + const struct cred *cred = file->f_cred; nf_log_buf_add(m, "UID=%u GID=%u ", from_kuid_munged(&init_user_ns, cred->fsuid), from_kgid_munged(&init_user_ns, cred->fsgid)); } - read_unlock_bh(&sk->sk_callback_lock); } static noinline_for_stack int From cdec942ac2006d74b2219c5f950402e5bd1f6c7b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 Feb 2026 13:23:19 +0000 Subject: [PATCH 0252/1561] netfilter: xt_owner: no longer acquire sk_callback_lock in mt_owner() After commit 983512f3a87f ("net: Drop the lock in skb_may_tx_timestamp()") from Sebastian Andrzej Siewior, apply the same logic in mt_owner() to avoid touching sk_callback_lock. Signed-off-by: Eric Dumazet Signed-off-by: Florian Westphal --- net/netfilter/xt_owner.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index 50332888c8d2..5bfb4843df66 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -63,11 +63,12 @@ static bool owner_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_owner_match_info *info = par->matchinfo; - const struct file *filp; struct sock *sk = skb_to_full_sk(skb); struct net *net = xt_net(par); + const struct socket *sock; + const struct file *filp; - if (!sk || !sk->sk_socket || !net_eq(net, sock_net(sk))) + if (!sk || !READ_ONCE(sk->sk_socket) || !net_eq(net, sock_net(sk))) return (info->match ^ info->invert) == 0; else if (info->match & info->invert & XT_OWNER_SOCKET) /* @@ -76,23 +77,25 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) */ return false; - read_lock_bh(&sk->sk_callback_lock); - filp = sk->sk_socket ? sk->sk_socket->file : NULL; - if (filp == NULL) { - read_unlock_bh(&sk->sk_callback_lock); + /* The sk pointer remains valid as long as the skb is. The sk_socket and + * file pointer may become NULL if the socket is closed. Both structures + * (including file->cred) are RCU freed which means they can be accessed + * within a RCU read section. + */ + sock = READ_ONCE(sk->sk_socket); + filp = sock ? READ_ONCE(sock->file) : NULL; + if (filp == NULL) return ((info->match ^ info->invert) & (XT_OWNER_UID | XT_OWNER_GID)) == 0; - } if (info->match & XT_OWNER_UID) { kuid_t uid_min = make_kuid(net->user_ns, info->uid_min); kuid_t uid_max = make_kuid(net->user_ns, info->uid_max); + if ((uid_gte(filp->f_cred->fsuid, uid_min) && uid_lte(filp->f_cred->fsuid, uid_max)) ^ - !(info->invert & XT_OWNER_UID)) { - read_unlock_bh(&sk->sk_callback_lock); + !(info->invert & XT_OWNER_UID)) return false; - } } if (info->match & XT_OWNER_GID) { @@ -117,13 +120,10 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) } } - if (match ^ !(info->invert & XT_OWNER_GID)) { - read_unlock_bh(&sk->sk_callback_lock); + if (match ^ !(info->invert & XT_OWNER_GID)) return false; - } } - read_unlock_bh(&sk->sk_callback_lock); return true; } From afc2125de7414be3a31f5c09724c0375a57f5eae Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Feb 2026 08:29:22 +0000 Subject: [PATCH 0253/1561] netfilter: nft_meta: no longer acquire sk_callback_lock in nft_meta_get_eval_skugid() After commit 983512f3a87f ("net: Drop the lock in skb_may_tx_timestamp()") from Sebastian Andrzej Siewior, apply the same logic in nft_meta_get_eval_skugid() to avoid touching sk->sk_callback_lock. Signed-off-by: Eric Dumazet Signed-off-by: Florian Westphal --- net/netfilter/nft_meta.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 983158274c68..d0df6cf374d1 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -131,33 +131,36 @@ nft_meta_get_eval_skugid(enum nft_meta_keys key, u32 *dest, const struct nft_pktinfo *pkt) { - struct sock *sk = skb_to_full_sk(pkt->skb); - struct socket *sock; + const struct sock *sk = skb_to_full_sk(pkt->skb); + const struct socket *sock; + const struct file *file; if (!sk || !sk_fullsock(sk) || !net_eq(nft_net(pkt), sock_net(sk))) return false; - read_lock_bh(&sk->sk_callback_lock); - sock = sk->sk_socket; - if (!sock || !sock->file) { - read_unlock_bh(&sk->sk_callback_lock); + /* The sk pointer remains valid as long as the skb is. The sk_socket and + * file pointer may become NULL if the socket is closed. Both structures + * (including file->cred) are RCU freed which means they can be accessed + * within a RCU read section. + */ + sock = READ_ONCE(sk->sk_socket); + file = sock ? READ_ONCE(sock->file) : NULL; + if (!file) return false; - } switch (key) { case NFT_META_SKUID: *dest = from_kuid_munged(sock_net(sk)->user_ns, - sock->file->f_cred->fsuid); + file->f_cred->fsuid); break; case NFT_META_SKGID: *dest = from_kgid_munged(sock_net(sk)->user_ns, - sock->file->f_cred->fsgid); + file->f_cred->fsgid); break; default: break; } - read_unlock_bh(&sk->sk_callback_lock); return true; } From b297aaefc648be6bd6f3028f0b6161707c7bf632 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Feb 2026 08:58:16 +0000 Subject: [PATCH 0254/1561] netfilter: nfnetlink_log: no longer acquire sk_callback_lock After commit 983512f3a87f ("net: Drop the lock in skb_may_tx_timestamp()") from Sebastian Andrzej Siewior, apply the same logic in __build_packet_message() to avoid touching sk->sk_callback_lock. Signed-off-by: Eric Dumazet Signed-off-by: Florian Westphal --- net/netfilter/nfnetlink_log.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b35a90955e2e..730c2541a849 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -611,19 +611,26 @@ __build_packet_message(struct nfnl_log_net *log, /* UID */ sk = skb->sk; if (sk && sk_fullsock(sk)) { - read_lock_bh(&sk->sk_callback_lock); - if (sk->sk_socket && sk->sk_socket->file) { - struct file *file = sk->sk_socket->file; + const struct socket *sock; + const struct file *file; + + /* The sk pointer remains valid as long as the skb is. + * The sk_socket and file pointer may become NULL + * if the socket is closed. + * Both structures (including file->cred) are RCU freed + * which means they can be accessed within a RCU read section. + */ + sock = READ_ONCE(sk->sk_socket); + file = sock ? READ_ONCE(sock->file) : NULL; + if (file) { const struct cred *cred = file->f_cred; struct user_namespace *user_ns = inst->peer_user_ns; __be32 uid = htonl(from_kuid_munged(user_ns, cred->fsuid)); __be32 gid = htonl(from_kgid_munged(user_ns, cred->fsgid)); - read_unlock_bh(&sk->sk_callback_lock); if (nla_put_be32(inst->skb, NFULA_UID, uid) || nla_put_be32(inst->skb, NFULA_GID, gid)) goto nla_put_failure; - } else - read_unlock_bh(&sk->sk_callback_lock); + } } /* local sequence number */ From 013e2f91d0a4cac7396ac121ecdbc7bb71ca5cef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Feb 2026 09:40:36 +0000 Subject: [PATCH 0255/1561] netfilter: nfnetlink_queue: no longer acquire sk_callback_lock After commit 983512f3a87f ("net: Drop the lock in skb_may_tx_timestamp()") from Sebastian Andrzej Siewior, apply the same logic in nfqnl_put_sk_uidgid() to avoid touching sk->sk_callback_lock. Signed-off-by: Eric Dumazet Signed-off-by: Florian Westphal --- net/netfilter/nfnetlink_queue.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 7f5248b5f1ee..27300d3663da 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -545,14 +545,23 @@ nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet, static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk) { + const struct socket *sock; + const struct file *file; const struct cred *cred; if (!sk_fullsock(sk)) return 0; - read_lock_bh(&sk->sk_callback_lock); - if (sk->sk_socket && sk->sk_socket->file) { - cred = sk->sk_socket->file->f_cred; + /* The sk pointer remains valid as long as the skb is. + * The sk_socket and file pointer may become NULL + * if the socket is closed. + * Both structures (including file->cred) are RCU freed + * which means they can be accessed within a RCU read section. + */ + sock = READ_ONCE(sk->sk_socket); + file = sock ? READ_ONCE(sock->file) : NULL; + if (file) { + cred = file->f_cred; if (nla_put_be32(skb, NFQA_UID, htonl(from_kuid_munged(&init_user_ns, cred->fsuid)))) goto nla_put_failure; @@ -560,11 +569,9 @@ static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk) htonl(from_kgid_munged(&init_user_ns, cred->fsgid)))) goto nla_put_failure; } - read_unlock_bh(&sk->sk_callback_lock); return 0; nla_put_failure: - read_unlock_bh(&sk->sk_callback_lock); return -1; } From 34a6a003d4e493133c4dc81c055324646bb7ebef Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 26 Feb 2026 11:12:21 +0100 Subject: [PATCH 0256/1561] netfilter: nfnetlink_queue: remove locking in nfqnl_get_sk_secctx We don't need the cb lock here. Also, if skb was NULL we'd have crashed already. Signed-off-by: Florian Westphal --- net/netfilter/nfnetlink_queue.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 27300d3663da..5379d8ff39c0 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -592,15 +592,8 @@ static int nfqnl_get_sk_secctx(struct sk_buff *skb, struct lsm_context *ctx) { int seclen = 0; #if IS_ENABLED(CONFIG_NETWORK_SECMARK) - - if (!skb || !sk_fullsock(skb->sk)) - return 0; - - read_lock_bh(&skb->sk->sk_callback_lock); - if (skb->secmark) seclen = security_secid_to_secctx(skb->secmark, ctx); - read_unlock_bh(&skb->sk->sk_callback_lock); #endif return seclen; } From 1ac252ad036cdb18f5fb7f76bb6061adfed9cedf Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 3 Mar 2026 23:04:04 +0200 Subject: [PATCH 0257/1561] rculist_bl: add hlist_bl_for_each_entry_continue_rcu Change the old hlist_bl_first_rcu to hlist_bl_first_rcu_dereference to indicate that it is a RCU dereference. Add hlist_bl_next_rcu and hlist_bl_first_rcu to use RCU pointers and use them to fix sparse warnings. Add hlist_bl_for_each_entry_continue_rcu. Signed-off-by: Julian Anastasov Signed-off-by: Florian Westphal --- include/linux/rculist_bl.h | 49 +++++++++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/include/linux/rculist_bl.h b/include/linux/rculist_bl.h index 0b952d06eb0b..36363b876e53 100644 --- a/include/linux/rculist_bl.h +++ b/include/linux/rculist_bl.h @@ -8,21 +8,31 @@ #include #include +/* return the first ptr or next element in an RCU protected list */ +#define hlist_bl_first_rcu(head) \ + (*((struct hlist_bl_node __rcu **)(&(head)->first))) +#define hlist_bl_next_rcu(node) \ + (*((struct hlist_bl_node __rcu **)(&(node)->next))) + static inline void hlist_bl_set_first_rcu(struct hlist_bl_head *h, struct hlist_bl_node *n) { LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK); LIST_BL_BUG_ON(((unsigned long)h->first & LIST_BL_LOCKMASK) != LIST_BL_LOCKMASK); - rcu_assign_pointer(h->first, + rcu_assign_pointer(hlist_bl_first_rcu(h), (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK)); } -static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h) -{ - return (struct hlist_bl_node *) - ((unsigned long)rcu_dereference_check(h->first, hlist_bl_is_locked(h)) & ~LIST_BL_LOCKMASK); -} +#define hlist_bl_first_rcu_dereference(head) \ +({ \ + struct hlist_bl_head *__head = (head); \ + \ + (struct hlist_bl_node *) \ + ((unsigned long)rcu_dereference_check(hlist_bl_first_rcu(__head), \ + hlist_bl_is_locked(__head)) & \ + ~LIST_BL_LOCKMASK); \ +}) /** * hlist_bl_del_rcu - deletes entry from hash list without re-initialization @@ -73,7 +83,7 @@ static inline void hlist_bl_add_head_rcu(struct hlist_bl_node *n, { struct hlist_bl_node *first; - /* don't need hlist_bl_first_rcu because we're under lock */ + /* don't need hlist_bl_first_rcu* because we're under lock */ first = hlist_bl_first(h); n->next = first; @@ -93,9 +103,30 @@ static inline void hlist_bl_add_head_rcu(struct hlist_bl_node *n, * */ #define hlist_bl_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = hlist_bl_first_rcu(head); \ + for (pos = hlist_bl_first_rcu_dereference(head); \ pos && \ ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_raw(hlist_bl_next_rcu(pos))) + +/** + * hlist_bl_for_each_entry_continue_rcu - continue iteration over list of given + * type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_bl_node to use as a loop cursor. + * @member: the name of the hlist_bl_node within the struct. + * + * Continue to iterate over list of given type, continuing after + * the current position which must have been in the list when the RCU read + * lock was taken. + * This would typically require either that you obtained the node from a + * previous walk of the list in the same RCU read-side critical section, or + * that you held some sort of non-RCU reference (such as a reference count) + * to keep the node alive *and* in the list. + */ +#define hlist_bl_for_each_entry_continue_rcu(tpos, pos, member) \ + for (pos = rcu_dereference_raw(hlist_bl_next_rcu(&(tpos)->member)); \ + pos && \ + ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1; }); \ + pos = rcu_dereference_raw(hlist_bl_next_rcu(pos))) #endif From b655388111cf7e43f70e49db64bdaa42bcb8a038 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 3 Mar 2026 23:04:05 +0200 Subject: [PATCH 0258/1561] ipvs: add resizable hash tables Add infrastructure for resizable hash tables based on hlist_bl which we will use in followup patches. The tables allow RCU lookups during resizing, bucket modifications are protected with per-bucket bit lock and additional custom locking, the tables are resized when load reaches thresholds determined based on load factor parameter. Compared to other implementations we rely on: * fast entry removal by using node unlinking without pre-lookup * entry rehashing when hash key changes * entries can contain multiple hash nodes * custom locking depending on different contexts * adjustable load factor to customize the grow/shrink process Signed-off-by: Julian Anastasov Signed-off-by: Florian Westphal --- include/net/ip_vs.h | 198 ++++++++++++++++++++++++++++++++ net/netfilter/ipvs/ip_vs_conn.c | 5 - net/netfilter/ipvs/ip_vs_core.c | 179 +++++++++++++++++++++++++++++ 3 files changed, 377 insertions(+), 5 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index ad8a16146ac5..c373fbdd2d0f 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -11,6 +11,7 @@ #include /* for __uXX types */ #include /* for struct list_head */ +#include /* for struct hlist_bl_head */ #include /* for struct rwlock_t */ #include /* for struct atomic_t */ #include /* for struct refcount_t */ @@ -30,6 +31,7 @@ #endif #include /* Netw namespace */ #include +#include #define IP_VS_HDR_INVERSE 1 #define IP_VS_HDR_ICMP 2 @@ -271,6 +273,10 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, pr_err(msg, ##__VA_ARGS__); \ } while (0) +struct ip_vs_aligned_lock { + spinlock_t l; /* Protect buckets */ +} ____cacheline_aligned_in_smp; + /* For arrays per family */ enum { IP_VS_AF_INET, @@ -484,6 +490,198 @@ struct ip_vs_est_kt_data { int est_row; /* estimated row */ }; +/* IPVS resizable hash tables */ +struct ip_vs_rht { + struct hlist_bl_head *buckets; + struct ip_vs_rht __rcu *new_tbl; /* New/Same table */ + seqcount_t *seqc; /* Protects moves */ + struct ip_vs_aligned_lock *lock; /* Protect seqc */ + int mask; /* Buckets mask */ + int size; /* Buckets */ + int seqc_mask; /* seqc mask */ + int lock_mask; /* lock mask */ + u32 table_id; + int u_thresh; /* upper threshold */ + int l_thresh; /* lower threshold */ + int lfactor; /* Load Factor (shift)*/ + int bits; /* size = 1 << bits */ + siphash_key_t hash_key; + struct rcu_head rcu_head; +}; + +/** + * ip_vs_rht_for_each_table() - Walk the hash tables + * @table: struct ip_vs_rht __rcu *table + * @t: current table, used as cursor, struct ip_vs_rht *var + * @p: previous table, temp struct ip_vs_rht *var + * + * Walk tables assuming others can not change the installed tables + */ +#define ip_vs_rht_for_each_table(table, t, p) \ + for (p = NULL, t = rcu_dereference_protected(table, 1); \ + t != p; \ + p = t, t = rcu_dereference_protected(t->new_tbl, 1)) + +/** + * ip_vs_rht_for_each_table_rcu() - Walk the hash tables under RCU reader lock + * @table: struct ip_vs_rht __rcu *table + * @t: current table, used as cursor, struct ip_vs_rht *var + * @p: previous table, temp struct ip_vs_rht *var + * + * We usually search in one table and also in second table on resizing + */ +#define ip_vs_rht_for_each_table_rcu(table, t, p) \ + for (p = NULL, t = rcu_dereference(table); \ + t != p; \ + p = t, t = rcu_dereference(t->new_tbl)) + +/** + * ip_vs_rht_for_each_bucket() - Walk all table buckets + * @t: current table, used as cursor, struct ip_vs_rht *var + * @bucket: bucket index, used as cursor, u32 var + * @head: bucket address, used as cursor, struct hlist_bl_head *var + */ +#define ip_vs_rht_for_each_bucket(t, bucket, head) \ + for (bucket = 0, head = (t)->buckets; \ + bucket < t->size; bucket++, head++) + +/** + * ip_vs_rht_for_bucket_retry() - Retry bucket if entries are moved + * @t: current table, used as cursor, struct ip_vs_rht *var + * @bucket: index of current bucket or hash key + * @sc: temp seqcount_t *var + * @seq: temp unsigned int var for sequence count + * @retry: temp int var + */ +#define ip_vs_rht_for_bucket_retry(t, bucket, sc, seq, retry) \ + for (retry = 1, sc = &(t)->seqc[(bucket) & (t)->seqc_mask]; \ + retry && ({ seq = read_seqcount_begin(sc); 1; }); \ + retry = read_seqcount_retry(sc, seq)) + +/** + * DECLARE_IP_VS_RHT_WALK_BUCKETS_RCU() - Declare variables + * + * Variables for ip_vs_rht_walk_buckets_rcu + */ +#define DECLARE_IP_VS_RHT_WALK_BUCKETS_RCU() \ + struct ip_vs_rht *_t, *_p; \ + unsigned int _seq; \ + seqcount_t *_sc; \ + u32 _bucket; \ + int _retry +/** + * ip_vs_rht_walk_buckets_rcu() - Walk all buckets under RCU read lock + * @table: struct ip_vs_rht __rcu *table + * @head: bucket address, used as cursor, struct hlist_bl_head *var + * + * Can be used while others add/delete/move entries + * Not suitable if duplicates are not desired + * Possible cases for reader that uses cond_resched_rcu() in the loop: + * - new table can not be installed, no need to repeat + * - new table can be installed => check and repeat if new table is + * installed, needed for !PREEMPT_RCU + */ +#define ip_vs_rht_walk_buckets_rcu(table, head) \ + ip_vs_rht_for_each_table_rcu(table, _t, _p) \ + ip_vs_rht_for_each_bucket(_t, _bucket, head) \ + ip_vs_rht_for_bucket_retry(_t, _bucket, _sc, \ + _seq, _retry) + +/** + * DECLARE_IP_VS_RHT_WALK_BUCKET_RCU() - Declare variables + * + * Variables for ip_vs_rht_walk_bucket_rcu + */ +#define DECLARE_IP_VS_RHT_WALK_BUCKET_RCU() \ + unsigned int _seq; \ + seqcount_t *_sc; \ + int _retry +/** + * ip_vs_rht_walk_bucket_rcu() - Walk bucket under RCU read lock + * @t: current table, struct ip_vs_rht *var + * @bucket: index of current bucket or hash key + * @head: bucket address, used as cursor, struct hlist_bl_head *var + * + * Can be used while others add/delete/move entries + * Not suitable if duplicates are not desired + * Possible cases for reader that uses cond_resched_rcu() in the loop: + * - new table can not be installed, no need to repeat + * - new table can be installed => check and repeat if new table is + * installed, needed for !PREEMPT_RCU + */ +#define ip_vs_rht_walk_bucket_rcu(t, bucket, head) \ + if (({ head = (t)->buckets + ((bucket) & (t)->mask); 0; })) \ + {} \ + else \ + ip_vs_rht_for_bucket_retry(t, (bucket), _sc, _seq, _retry) + +/** + * DECLARE_IP_VS_RHT_WALK_BUCKETS_SAFE_RCU() - Declare variables + * + * Variables for ip_vs_rht_walk_buckets_safe_rcu + */ +#define DECLARE_IP_VS_RHT_WALK_BUCKETS_SAFE_RCU() \ + struct ip_vs_rht *_t, *_p; \ + u32 _bucket +/** + * ip_vs_rht_walk_buckets_safe_rcu() - Walk all buckets under RCU read lock + * @table: struct ip_vs_rht __rcu *table + * @head: bucket address, used as cursor, struct hlist_bl_head *var + * + * Can be used while others add/delete entries but moving is disabled + * Using cond_resched_rcu() should be safe if tables do not change + */ +#define ip_vs_rht_walk_buckets_safe_rcu(table, head) \ + ip_vs_rht_for_each_table_rcu(table, _t, _p) \ + ip_vs_rht_for_each_bucket(_t, _bucket, head) + +/** + * DECLARE_IP_VS_RHT_WALK_BUCKETS() - Declare variables + * + * Variables for ip_vs_rht_walk_buckets + */ +#define DECLARE_IP_VS_RHT_WALK_BUCKETS() \ + struct ip_vs_rht *_t, *_p; \ + u32 _bucket + +/** + * ip_vs_rht_walk_buckets() - Walk all buckets + * @table: struct ip_vs_rht __rcu *table + * @head: bucket address, used as cursor, struct hlist_bl_head *var + * + * Use if others can not add/delete/move entries + */ +#define ip_vs_rht_walk_buckets(table, head) \ + ip_vs_rht_for_each_table(table, _t, _p) \ + ip_vs_rht_for_each_bucket(_t, _bucket, head) + +/* Entries can be in one of two tables, so we flip bit when new table is + * created and store it as highest bit in hash keys + */ +#define IP_VS_RHT_TABLE_ID_MASK BIT(31) + +/* Check if hash key is from this table */ +static inline bool ip_vs_rht_same_table(struct ip_vs_rht *t, u32 hash_key) +{ + return !((t->table_id ^ hash_key) & IP_VS_RHT_TABLE_ID_MASK); +} + +/* Build per-table hash key from hash value */ +static inline u32 ip_vs_rht_build_hash_key(struct ip_vs_rht *t, u32 hash) +{ + return t->table_id | (hash & ~IP_VS_RHT_TABLE_ID_MASK); +} + +void ip_vs_rht_free(struct ip_vs_rht *t); +void ip_vs_rht_rcu_free(struct rcu_head *head); +struct ip_vs_rht *ip_vs_rht_alloc(int buckets, int scounts, int locks); +int ip_vs_rht_desired_size(struct netns_ipvs *ipvs, struct ip_vs_rht *t, int n, + int lfactor, int min_bits, int max_bits); +void ip_vs_rht_set_thresholds(struct ip_vs_rht *t, int size, int lfactor, + int min_bits, int max_bits); +u32 ip_vs_rht_hash_linfo(struct ip_vs_rht *t, int af, + const union nf_inet_addr *addr, u32 v1, u32 v2); + struct dst_entry; struct iphdr; struct ip_vs_conn; diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index deaf16a90e2f..a6fd3b64428f 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -76,11 +76,6 @@ static unsigned int ip_vs_conn_rnd __read_mostly; #define IP_VS_ADDRSTRLEN (8+1) #endif -struct ip_vs_aligned_lock -{ - spinlock_t l; -} __attribute__((__aligned__(SMP_CACHE_BYTES))); - /* lock array for conn table */ static struct ip_vs_aligned_lock __ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 869f18e0e835..f5b7a2047291 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -117,6 +117,185 @@ void ip_vs_init_hash_table(struct list_head *table, int rows) INIT_LIST_HEAD(&table[rows]); } +/* IPVS Resizable Hash Tables: + * - list_bl buckets with bit lock + * + * Goals: + * - RCU lookup for entry can run in parallel with add/del/move operations + * - hash keys can be on non-contiguous memory + * - support entries with duplicate keys + * - unlink entries without lookup, use the saved table and bucket id + * - resizing can trigger on load change or depending on key refresh period + * - customizable load factor to balance between speed and memory usage + * - add/del/move operations should be allowed for any context + * + * Resizing: + * - new table is attached to the current table and all entries are moved + * with new hash key. Finally, the new table is installed as current one and + * the old table is released after RCU grace period. + * - RCU read-side critical sections will walk two tables while resizing is + * in progress + * - new entries are added to the new table + * - entries will be deleted from the old or from the new table, the table_id + * can be saved into entry as part of the hash key to know where the entry is + * hashed + * - move operations may delay readers or to cause retry for the modified + * bucket. As result, searched entry will be found but walkers that operate + * on multiple entries may see same entry twice if bucket walking is retried. + * - for fast path the number of entries (load) can be compared to u_thresh + * and l_thresh to decide when to trigger table growing/shrinking. They + * are calculated based on load factor (shift count), negative value allows + * load to be below 100% to reduce collisions by maintaining larger table + * while positive value tolerates collisions by using smaller table and load + * above 100%: u_thresh(load) = size * (2 ^ lfactor) + * + * Locking: + * - lock: protect seqc if other context except resizer can move entries + * - seqc: seqcount_t, delay/retry readers while entries are moved to + * new table on resizing + * - bit lock: serialize bucket modifications + * - writers may use other locking mechanisms to serialize operations for + * resizing, moving and installing new tables + */ + +void ip_vs_rht_free(struct ip_vs_rht *t) +{ + kvfree(t->buckets); + kvfree(t->seqc); + kvfree(t->lock); + kfree(t); +} + +void ip_vs_rht_rcu_free(struct rcu_head *head) +{ + struct ip_vs_rht *t; + + t = container_of(head, struct ip_vs_rht, rcu_head); + ip_vs_rht_free(t); +} + +struct ip_vs_rht *ip_vs_rht_alloc(int buckets, int scounts, int locks) +{ + struct ip_vs_rht *t = kzalloc(sizeof(*t), GFP_KERNEL); + int i; + + if (!t) + return NULL; + if (scounts) { + int ml = roundup_pow_of_two(nr_cpu_ids); + + scounts = min(scounts, buckets); + scounts = min(scounts, ml); + t->seqc = kvmalloc_array(scounts, sizeof(*t->seqc), GFP_KERNEL); + if (!t->seqc) + goto err; + for (i = 0; i < scounts; i++) + seqcount_init(&t->seqc[i]); + + if (locks) { + locks = min(locks, scounts); + t->lock = kvmalloc_array(locks, sizeof(*t->lock), + GFP_KERNEL); + if (!t->lock) + goto err; + for (i = 0; i < locks; i++) + spin_lock_init(&t->lock[i].l); + } + } + + t->buckets = kvmalloc_array(buckets, sizeof(*t->buckets), GFP_KERNEL); + if (!t->buckets) + goto err; + for (i = 0; i < buckets; i++) + INIT_HLIST_BL_HEAD(&t->buckets[i]); + t->mask = buckets - 1; + t->size = buckets; + t->seqc_mask = scounts - 1; + t->lock_mask = locks - 1; + t->u_thresh = buckets; + t->l_thresh = buckets >> 4; + t->bits = order_base_2(buckets); + /* new_tbl points to self if no new table is filled */ + RCU_INIT_POINTER(t->new_tbl, t); + get_random_bytes(&t->hash_key, sizeof(t->hash_key)); + return t; + +err: + ip_vs_rht_free(t); + return NULL; +} + +/* Get the desired table size for n entries based on current table size and + * by using the formula size = n / (2^lfactor) + * lfactor: shift value for the load factor: + * - >0: u_thresh=size << lfactor, for load factor above 100% + * - <0: u_thresh=size >> -lfactor, for load factor below 100% + * - 0: for load factor of 100% + */ +int ip_vs_rht_desired_size(struct netns_ipvs *ipvs, struct ip_vs_rht *t, int n, + int lfactor, int min_bits, int max_bits) +{ + if (!t) + return 1 << min_bits; + n = roundup_pow_of_two(n); + if (lfactor < 0) { + int factor = min(-lfactor, max_bits); + + n = min(n, 1 << (max_bits - factor)); + n <<= factor; + } else { + n = min(n >> lfactor, 1 << max_bits); + } + if (lfactor != t->lfactor) + return clamp(n, 1 << min_bits, 1 << max_bits); + if (n > t->size) + return n; + if (n > t->size >> 4) + return t->size; + /* Shrink but keep it n * 2 to prevent frequent resizing */ + return clamp(n << 1, 1 << min_bits, 1 << max_bits); +} + +/* Set thresholds based on table size and load factor: + * u_thresh = size * (2^lfactor) + * l_thresh = u_thresh / 16 + * u_thresh/l_thresh can be used to check if load triggers a table grow/shrink + */ +void ip_vs_rht_set_thresholds(struct ip_vs_rht *t, int size, int lfactor, + int min_bits, int max_bits) +{ + if (size >= 1 << max_bits) + t->u_thresh = INT_MAX; /* stop growing */ + else if (lfactor <= 0) + t->u_thresh = size >> min(-lfactor, max_bits); + else + t->u_thresh = min(size, 1 << (30 - lfactor)) << lfactor; + + /* l_thresh: shrink when load is 16 times lower, can be 0 */ + if (size >= 1 << max_bits) + t->l_thresh = (1 << max_bits) >> 4; + else if (size > 1 << min_bits) + t->l_thresh = t->u_thresh >> 4; + else + t->l_thresh = 0; /* stop shrinking */ +} + +/* Return hash value for local info (fast, insecure) */ +u32 ip_vs_rht_hash_linfo(struct ip_vs_rht *t, int af, + const union nf_inet_addr *addr, u32 v1, u32 v2) +{ + u32 v3; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + v3 = ipv6_addr_hash(&addr->in6); + else +#endif + v3 = addr->all[0]; + + return jhash_3words(v1, v2, v3, (u32)t->hash_key.key[0]); +} + static inline void ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) { From 840aac3d900d09ec8fb8efe41bd7d09f9eb15538 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 3 Mar 2026 23:04:06 +0200 Subject: [PATCH 0259/1561] ipvs: use resizable hash table for services Make the hash table for services resizable in the bit range of 4-20. Table is attached only while services are present. Resizing is done by delayed work based on load (the number of hashed services). Table grows when load increases 2+ times (above 12.5% with lfactor=-3) and shrinks 8+ times when load decreases 16+ times (below 0.78%). Switch to jhash hashing to reduce the collisions for multiple services. Add a hash_key field into the service to store the table ID in the highest bit and the entry's hash value in the lowest bits. The lowest part of the hash value is used as bucket ID, the remaining part is used to filter the entries in the bucket before matching the keys and as result, helps the lookup operation to access only one cache line. By knowing the table ID and bucket ID for entry, we can unlink it without calculating the hash value and doing lookup by keys. We need only to validate the saved hash_key under lock. Signed-off-by: Julian Anastasov Signed-off-by: Florian Westphal --- include/net/ip_vs.h | 49 ++- net/netfilter/ipvs/ip_vs_ctl.c | 663 +++++++++++++++++++++++++++------ 2 files changed, 588 insertions(+), 124 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index c373fbdd2d0f..663ad6ad9518 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -35,12 +35,10 @@ #define IP_VS_HDR_INVERSE 1 #define IP_VS_HDR_ICMP 2 -/* - * Hash table: for virtual service lookups - */ -#define IP_VS_SVC_TAB_BITS 8 -#define IP_VS_SVC_TAB_SIZE BIT(IP_VS_SVC_TAB_BITS) -#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) + +/* svc_table limits */ +#define IP_VS_SVC_TAB_MIN_BITS 4 +#define IP_VS_SVC_TAB_MAX_BITS 20 /* Generic access of ipvs struct */ static inline struct netns_ipvs *net_ipvs(struct net* net) @@ -51,8 +49,6 @@ static inline struct netns_ipvs *net_ipvs(struct net* net) /* Connections' size value needed by ip_vs_ctl.c */ extern int ip_vs_conn_tab_size; -extern struct mutex __ip_vs_mutex; - struct ip_vs_iphdr { int hdr_flags; /* ipvs flags */ __u32 off; /* Where IP or IPv4 header starts */ @@ -289,6 +285,12 @@ static inline int ip_vs_af_index(int af) return af == AF_INET6 ? IP_VS_AF_INET6 : IP_VS_AF_INET; } +/* work_flags */ +enum { + IP_VS_WORK_SVC_RESIZE, /* Schedule svc_resize_work */ + IP_VS_WORK_SVC_NORESIZE, /* Stopping svc_resize_work */ +}; + /* The port number of FTP service (in network order). */ #define FTPPORT cpu_to_be16(21) #define FTPDATA cpu_to_be16(20) @@ -889,14 +891,15 @@ struct ip_vs_dest_user_kern { * forwarding entries. */ struct ip_vs_service { - struct hlist_node s_list; /* node in service table */ - atomic_t refcnt; /* reference counter */ - + struct hlist_bl_node s_list; /* node in service table */ + u32 hash_key; /* Key for the hash table */ u16 af; /* address family */ __u16 protocol; /* which protocol (TCP/UDP) */ + union nf_inet_addr addr; /* IP address for virtual service */ - __be16 port; /* port number for the service */ __u32 fwmark; /* firewall mark of the service */ + atomic_t refcnt; /* reference counter */ + __be16 port; /* port number for the service */ unsigned int flags; /* service status flags */ unsigned int timeout; /* persistent timeout in ticks */ __be32 netmask; /* grouping granularity, mask/plen */ @@ -1155,6 +1158,10 @@ struct netns_ipvs { struct list_head dest_trash; spinlock_t dest_trash_lock; struct timer_list dest_trash_timer; /* expiration timer */ + struct mutex service_mutex; /* service reconfig */ + struct rw_semaphore svc_resize_sem; /* svc_table resizing */ + struct delayed_work svc_resize_work; /* resize svc_table */ + atomic_t svc_table_changes;/* ++ on new table */ /* Service counters */ atomic_t num_services[IP_VS_AF_MAX]; /* Services */ atomic_t fwm_services[IP_VS_AF_MAX]; /* Services */ @@ -1219,6 +1226,7 @@ struct netns_ipvs { int sysctl_est_nice; /* kthread nice */ int est_stopped; /* stop tasks */ #endif + int sysctl_svc_lfactor; /* ip_vs_lblc */ int sysctl_lblc_expiration; @@ -1228,6 +1236,7 @@ struct netns_ipvs { int sysctl_lblcr_expiration; struct ctl_table_header *lblcr_ctl_header; struct ctl_table *lblcr_ctl_table; + unsigned long work_flags; /* IP_VS_WORK_* flags */ /* ip_vs_est */ struct delayed_work est_reload_work;/* Reload kthread tasks */ struct mutex est_mutex; /* protect kthread tasks */ @@ -1259,9 +1268,7 @@ struct netns_ipvs { unsigned int mixed_address_family_dests; unsigned int hooks_afmask; /* &1=AF_INET, &2=AF_INET6 */ - /* the service mutex that protect svc_table and svc_fwm_table */ - struct mutex service_mutex; - struct hlist_head svc_table[IP_VS_SVC_TAB_SIZE]; /* Services */ + struct ip_vs_rht __rcu *svc_table; /* Services */ }; #define DEFAULT_SYNC_THRESHOLD 3 @@ -1511,6 +1518,18 @@ static inline int sysctl_est_nice(struct netns_ipvs *ipvs) #endif +/* Get load factor to map num_services/u_thresh to t->size + * Smaller value decreases u_thresh to reduce collisions but increases + * the table size + * Returns factor where: + * - <0: u_thresh = size >> -factor, eg. lfactor -2 = 25% load + * - >=0: u_thresh = size << factor, eg. lfactor 1 = 200% load + */ +static inline int sysctl_svc_lfactor(struct netns_ipvs *ipvs) +{ + return READ_ONCE(ipvs->sysctl_svc_lfactor); +} + /* IPVS core functions * (from ip_vs_core.c) */ diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index f7d454df2b58..2baef945c56f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -293,47 +294,59 @@ ip_vs_use_count_dec(void) } - +/* Service hashing: + * Operation Locking order + * --------------------------------------------------------------------------- + * add table service_mutex, svc_resize_sem(W) + * del table service_mutex + * move between tables svc_resize_sem(W), seqcount_t(W), bit lock + * add/del service service_mutex, bit lock + * find service RCU, seqcount_t(R) + * walk services(blocking) service_mutex, svc_resize_sem(R) + * walk services(non-blocking) RCU, seqcount_t(R) + * + * - new tables are linked/unlinked under service_mutex and svc_resize_sem + * - new table is linked on resizing and all operations can run in parallel + * in 2 tables until the new table is registered as current one + * - two contexts can modify buckets: config and table resize, both in + * process context + * - only table resizer can move entries, so we do not protect t->seqc[] + * items with t->lock[] + * - lookups occur under RCU lock and seqcount reader lock to detect if + * services are moved to new table + * - move operations may disturb readers: find operation will not miss entries + * but walkers may see same entry twice if they are forced to retry chains + * - walkers using cond_resched_rcu() on !PREEMPT_RCU may need to hold + * service_mutex to disallow new tables to be installed or to check + * svc_table_changes and repeat the RCU read section if new table is installed + */ /* * Returns hash value for virtual service */ -static inline unsigned int -ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto, +static inline u32 +ip_vs_svc_hashval(struct ip_vs_rht *t, int af, unsigned int proto, const union nf_inet_addr *addr, __be16 port) { - unsigned int porth = ntohs(port); - __be32 addr_fold = addr->ip; - __u32 ahash; - -#ifdef CONFIG_IP_VS_IPV6 - if (af == AF_INET6) - addr_fold = addr->ip6[0]^addr->ip6[1]^ - addr->ip6[2]^addr->ip6[3]; -#endif - ahash = ntohl(addr_fold); - ahash ^= ((size_t) ipvs >> 8); - - return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) & - IP_VS_SVC_TAB_MASK; + return ip_vs_rht_hash_linfo(t, af, addr, ntohs(port), proto); } /* * Returns hash value of fwmark for virtual service lookup */ -static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 fwmark) +static inline u32 ip_vs_svc_fwm_hashval(struct ip_vs_rht *t, int af, + __u32 fwmark) { - return (((size_t)ipvs>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; + return jhash_2words(fwmark, af, (u32)t->hash_key.key[0]); } -/* - * Hashes a service in the svc_table by - * or by fwmark. - * Should be called with locked tables. - */ +/* Hashes a service in the svc_table by or by fwmark */ static int ip_vs_svc_hash(struct ip_vs_service *svc) { - unsigned int hash; + struct netns_ipvs *ipvs = svc->ipvs; + struct hlist_bl_head *head; + struct ip_vs_rht *t; + u32 hash; if (svc->flags & IP_VS_SVC_F_HASHED) { pr_err("%s(): request for already hashed, called from %pS\n", @@ -341,23 +354,32 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) return 0; } + /* increase its refcnt because it is referenced by the svc table */ + atomic_inc(&svc->refcnt); + + /* New entries go into recent table */ + t = rcu_dereference_protected(ipvs->svc_table, 1); + t = rcu_dereference_protected(t->new_tbl, 1); + if (svc->fwmark == 0) { /* - * Hash it by + * Hash it by */ - hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol, + hash = ip_vs_svc_hashval(t, svc->af, svc->protocol, &svc->addr, svc->port); } else { /* * Hash it by fwmark */ - hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark); + hash = ip_vs_svc_fwm_hashval(t, svc->af, svc->fwmark); } - hlist_add_head_rcu(&svc->s_list, &svc->ipvs->svc_table[hash]); - + head = t->buckets + (hash & t->mask); + hlist_bl_lock(head); + WRITE_ONCE(svc->hash_key, ip_vs_rht_build_hash_key(t, hash)); svc->flags |= IP_VS_SVC_F_HASHED; - /* increase its refcnt because it is referenced by the svc table */ - atomic_inc(&svc->refcnt); + hlist_bl_add_head_rcu(&svc->s_list, head); + hlist_bl_unlock(head); + return 1; } @@ -368,17 +390,45 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) */ static int ip_vs_svc_unhash(struct ip_vs_service *svc) { + struct netns_ipvs *ipvs = svc->ipvs; + struct hlist_bl_head *head; + struct ip_vs_rht *t; + u32 hash_key2; + u32 hash_key; + if (!(svc->flags & IP_VS_SVC_F_HASHED)) { pr_err("%s(): request for unhash flagged, called from %pS\n", __func__, __builtin_return_address(0)); return 0; } + t = rcu_dereference_protected(ipvs->svc_table, 1); + hash_key = READ_ONCE(svc->hash_key); + /* We need to lock the bucket in the right table */ + if (ip_vs_rht_same_table(t, hash_key)) { + head = t->buckets + (hash_key & t->mask); + hlist_bl_lock(head); + /* Ensure hash_key is read under lock */ + hash_key2 = READ_ONCE(svc->hash_key); + /* Moved to new table ? */ + if (hash_key != hash_key2) { + hlist_bl_unlock(head); + t = rcu_dereference_protected(t->new_tbl, 1); + head = t->buckets + (hash_key2 & t->mask); + hlist_bl_lock(head); + } + } else { + /* It is already moved to new table */ + t = rcu_dereference_protected(t->new_tbl, 1); + head = t->buckets + (hash_key & t->mask); + hlist_bl_lock(head); + } /* Remove it from svc_table */ - hlist_del_rcu(&svc->s_list); + hlist_bl_del_rcu(&svc->s_list); svc->flags &= ~IP_VS_SVC_F_HASHED; atomic_dec(&svc->refcnt); + hlist_bl_unlock(head); return 1; } @@ -390,18 +440,29 @@ static inline struct ip_vs_service * __ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport) { - unsigned int hash; + DECLARE_IP_VS_RHT_WALK_BUCKET_RCU(); + struct hlist_bl_head *head; struct ip_vs_service *svc; + struct ip_vs_rht *t, *p; + struct hlist_bl_node *e; + u32 hash, hash_key; - /* Check for "full" addressed entries */ - hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport); + ip_vs_rht_for_each_table_rcu(ipvs->svc_table, t, p) { + /* Check for "full" addressed entries */ + hash = ip_vs_svc_hashval(t, af, protocol, vaddr, vport); - hlist_for_each_entry_rcu(svc, &ipvs->svc_table[hash], s_list) { - if (svc->af == af && ip_vs_addr_equal(af, &svc->addr, vaddr) && - svc->port == vport && svc->protocol == protocol && - !svc->fwmark) { - /* HIT */ - return svc; + hash_key = ip_vs_rht_build_hash_key(t, hash); + ip_vs_rht_walk_bucket_rcu(t, hash_key, head) { + hlist_bl_for_each_entry_rcu(svc, e, head, s_list) { + if (READ_ONCE(svc->hash_key) == hash_key && + svc->af == af && + ip_vs_addr_equal(af, &svc->addr, vaddr) && + svc->port == vport && + svc->protocol == protocol && !svc->fwmark) { + /* HIT */ + return svc; + } + } } } @@ -415,16 +476,26 @@ __ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol, static inline struct ip_vs_service * __ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark) { - unsigned int hash; + DECLARE_IP_VS_RHT_WALK_BUCKET_RCU(); + struct hlist_bl_head *head; struct ip_vs_service *svc; + struct ip_vs_rht *t, *p; + struct hlist_bl_node *e; + u32 hash, hash_key; - /* Check for fwmark addressed entries */ - hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark); + ip_vs_rht_for_each_table_rcu(ipvs->svc_table, t, p) { + /* Check for fwmark addressed entries */ + hash = ip_vs_svc_fwm_hashval(t, af, fwmark); - hlist_for_each_entry_rcu(svc, &ipvs->svc_table[hash], s_list) { - if (svc->fwmark == fwmark && svc->af == af) { - /* HIT */ - return svc; + hash_key = ip_vs_rht_build_hash_key(t, hash); + ip_vs_rht_walk_bucket_rcu(t, hash_key, head) { + hlist_bl_for_each_entry_rcu(svc, e, head, s_list) { + if (READ_ONCE(svc->hash_key) == hash_key && + svc->fwmark == fwmark && svc->af == af) { + /* HIT */ + return svc; + } + } } } @@ -487,6 +558,220 @@ ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol return svc; } +/* Return the number of registered services */ +static int ip_vs_get_num_services(struct netns_ipvs *ipvs) +{ + int ns = 0, ni = IP_VS_AF_MAX; + + while (--ni >= 0) + ns += atomic_read(&ipvs->num_services[ni]); + return ns; +} + +/* Get default load factor to map num_services/u_thresh to t->size */ +static int ip_vs_svc_default_load_factor(struct netns_ipvs *ipvs) +{ + int factor; + + if (net_eq(ipvs->net, &init_net)) + factor = -3; /* grow if load is above 12.5% */ + else + factor = -2; /* grow if load is above 25% */ + return factor; +} + +/* Get the desired svc_table size */ +static int ip_vs_svc_desired_size(struct netns_ipvs *ipvs, struct ip_vs_rht *t, + int lfactor) +{ + return ip_vs_rht_desired_size(ipvs, t, ip_vs_get_num_services(ipvs), + lfactor, IP_VS_SVC_TAB_MIN_BITS, + IP_VS_SVC_TAB_MAX_BITS); +} + +/* Allocate svc_table */ +static struct ip_vs_rht *ip_vs_svc_table_alloc(struct netns_ipvs *ipvs, + int buckets, int lfactor) +{ + struct ip_vs_rht *t; + int scounts, locks; + + /* No frequent lookups to race with resizing, so use max of 64 + * seqcounts. Only resizer moves entries, so use 0 locks. + */ + scounts = clamp(buckets >> 4, 1, 64); + locks = 0; + + t = ip_vs_rht_alloc(buckets, scounts, locks); + if (!t) + return NULL; + t->lfactor = lfactor; + ip_vs_rht_set_thresholds(t, t->size, lfactor, IP_VS_SVC_TAB_MIN_BITS, + IP_VS_SVC_TAB_MAX_BITS); + return t; +} + +/* svc_table resizer work */ +static void svc_resize_work_handler(struct work_struct *work) +{ + struct hlist_bl_head *head, *head2; + struct ip_vs_rht *t_free = NULL; + unsigned int resched_score = 0; + struct hlist_bl_node *cn, *nn; + struct ip_vs_rht *t, *t_new; + struct ip_vs_service *svc; + struct netns_ipvs *ipvs; + bool more_work = true; + seqcount_t *sc; + int limit = 0; + int new_size; + int lfactor; + u32 bucket; + + ipvs = container_of(work, struct netns_ipvs, svc_resize_work.work); + + if (!down_write_trylock(&ipvs->svc_resize_sem)) + goto out; + if (!mutex_trylock(&ipvs->service_mutex)) + goto unlock_sem; + more_work = false; + clear_bit(IP_VS_WORK_SVC_RESIZE, &ipvs->work_flags); + if (!READ_ONCE(ipvs->enable) || + test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags)) + goto unlock_m; + t = rcu_dereference_protected(ipvs->svc_table, 1); + /* Do nothing if table is removed */ + if (!t) + goto unlock_m; + /* New table needs to be registered? BUG! */ + if (t != rcu_dereference_protected(t->new_tbl, 1)) + goto unlock_m; + + lfactor = sysctl_svc_lfactor(ipvs); + /* Should we resize ? */ + new_size = ip_vs_svc_desired_size(ipvs, t, lfactor); + if (new_size == t->size && lfactor == t->lfactor) + goto unlock_m; + + t_new = ip_vs_svc_table_alloc(ipvs, new_size, lfactor); + if (!t_new) { + more_work = true; + goto unlock_m; + } + /* Flip the table_id */ + t_new->table_id = t->table_id ^ IP_VS_RHT_TABLE_ID_MASK; + + rcu_assign_pointer(t->new_tbl, t_new); + /* Allow add/del to new_tbl while moving from old table */ + mutex_unlock(&ipvs->service_mutex); + + ip_vs_rht_for_each_bucket(t, bucket, head) { +same_bucket: + if (++limit >= 16) { + if (!READ_ONCE(ipvs->enable) || + test_bit(IP_VS_WORK_SVC_NORESIZE, + &ipvs->work_flags)) + goto unlock_sem; + if (resched_score >= 100) { + resched_score = 0; + cond_resched(); + } + limit = 0; + } + if (hlist_bl_empty(head)) { + resched_score++; + continue; + } + /* Preemption calls ahead... */ + resched_score = 0; + + sc = &t->seqc[bucket & t->seqc_mask]; + /* seqcount_t usage considering PREEMPT_RT rules: + * - we are the only writer => preemption can be allowed + * - readers (SoftIRQ) => disable BHs + * - readers (processes) => preemption should be disabled + */ + local_bh_disable(); + preempt_disable_nested(); + write_seqcount_begin(sc); + hlist_bl_lock(head); + + hlist_bl_for_each_entry_safe(svc, cn, nn, head, s_list) { + u32 hash; + + /* New hash for the new table */ + if (svc->fwmark == 0) { + /* Hash it by */ + hash = ip_vs_svc_hashval(t_new, svc->af, + svc->protocol, + &svc->addr, svc->port); + } else { + /* Hash it by fwmark */ + hash = ip_vs_svc_fwm_hashval(t_new, svc->af, + svc->fwmark); + } + hlist_bl_del_rcu(&svc->s_list); + head2 = t_new->buckets + (hash & t_new->mask); + + hlist_bl_lock(head2); + WRITE_ONCE(svc->hash_key, + ip_vs_rht_build_hash_key(t_new, hash)); + /* t_new->seqc are not used at this stage, we race + * only with add/del, so only lock the bucket. + */ + hlist_bl_add_head_rcu(&svc->s_list, head2); + hlist_bl_unlock(head2); + /* Too long chain? Do it in steps */ + if (++limit >= 64) + break; + } + + hlist_bl_unlock(head); + write_seqcount_end(sc); + preempt_enable_nested(); + local_bh_enable(); + if (limit >= 64) + goto same_bucket; + } + + /* Tables can be switched only under service_mutex */ + while (!mutex_trylock(&ipvs->service_mutex)) { + cond_resched(); + if (!READ_ONCE(ipvs->enable) || + test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags)) + goto unlock_sem; + } + if (!READ_ONCE(ipvs->enable) || + test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags)) + goto unlock_m; + + rcu_assign_pointer(ipvs->svc_table, t_new); + /* Inform readers that new table is installed */ + smp_mb__before_atomic(); + atomic_inc(&ipvs->svc_table_changes); + t_free = t; + +unlock_m: + mutex_unlock(&ipvs->service_mutex); + +unlock_sem: + up_write(&ipvs->svc_resize_sem); + + if (t_free) { + /* RCU readers should not see more than two tables in chain. + * To prevent new table to be attached wait here instead of + * freeing the old table in RCU callback. + */ + synchronize_rcu(); + ip_vs_rht_free(t_free); + } + +out: + if (!READ_ONCE(ipvs->enable) || !more_work || + test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags)) + return; + queue_delayed_work(system_unbound_wq, &ipvs->svc_resize_work, 1); +} static inline void __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) @@ -1357,12 +1642,13 @@ static int ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, struct ip_vs_service **svc_p) { - int ret = 0; struct ip_vs_scheduler *sched = NULL; + struct ip_vs_rht *t, *t_new = NULL; int af_id = ip_vs_af_index(u->af); - struct ip_vs_pe *pe = NULL; struct ip_vs_service *svc = NULL; + struct ip_vs_pe *pe = NULL; int ret_hooks = -1; + int ret = 0; /* increase the module use count */ if (!ip_vs_use_count_inc()) @@ -1404,6 +1690,18 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, } #endif + t = rcu_dereference_protected(ipvs->svc_table, 1); + if (!t) { + int lfactor = sysctl_svc_lfactor(ipvs); + int new_size = ip_vs_svc_desired_size(ipvs, NULL, lfactor); + + t_new = ip_vs_svc_table_alloc(ipvs, new_size, lfactor); + if (!t_new) { + ret = -ENOMEM; + goto out_err; + } + } + if (!atomic_read(&ipvs->num_services[af_id])) { ret = ip_vs_register_hooks(ipvs, u->af); if (ret < 0) @@ -1449,6 +1747,12 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, if (ret < 0) goto out_err; + if (t_new) { + clear_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags); + rcu_assign_pointer(ipvs->svc_table, t_new); + t_new = NULL; + } + /* Update the virtual service counters */ if (svc->port == FTPPORT) atomic_inc(&ipvs->ftpsvc_counter[af_id]); @@ -1470,6 +1774,12 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, /* Hash the service into the service table */ ip_vs_svc_hash(svc); + /* Schedule resize work */ + if (t && ip_vs_get_num_services(ipvs) > t->u_thresh && + !test_and_set_bit(IP_VS_WORK_SVC_RESIZE, &ipvs->work_flags)) + queue_delayed_work(system_unbound_wq, &ipvs->svc_resize_work, + 1); + *svc_p = svc; if (!READ_ONCE(ipvs->enable)) { @@ -1484,6 +1794,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, out_err: + if (t_new) + ip_vs_rht_free(t_new); if (ret_hooks >= 0) ip_vs_unregister_hooks(ipvs, u->af); if (svc != NULL) { @@ -1671,10 +1983,38 @@ static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup) */ static int ip_vs_del_service(struct ip_vs_service *svc) { + struct netns_ipvs *ipvs; + struct ip_vs_rht *t, *p; + int ns; + if (svc == NULL) return -EEXIST; + ipvs = svc->ipvs; ip_vs_unlink_service(svc, false); + t = rcu_dereference_protected(ipvs->svc_table, 1); + /* Drop the table if no more services */ + ns = ip_vs_get_num_services(ipvs); + if (!ns) { + /* Stop the resizer and drop the tables */ + set_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags); + cancel_delayed_work_sync(&ipvs->svc_resize_work); + if (t) { + rcu_assign_pointer(ipvs->svc_table, NULL); + while (1) { + p = rcu_dereference_protected(t->new_tbl, 1); + call_rcu(&t->rcu_head, ip_vs_rht_rcu_free); + if (p == t) + break; + t = p; + } + } + } else if (ns <= t->l_thresh && + !test_and_set_bit(IP_VS_WORK_SVC_RESIZE, + &ipvs->work_flags)) { + queue_delayed_work(system_unbound_wq, &ipvs->svc_resize_work, + 1); + } return 0; } @@ -1684,14 +2024,36 @@ static int ip_vs_del_service(struct ip_vs_service *svc) */ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) { - int idx; + DECLARE_IP_VS_RHT_WALK_BUCKETS(); + struct hlist_bl_head *head; struct ip_vs_service *svc; - struct hlist_node *n; + struct hlist_bl_node *ne; + struct hlist_bl_node *e; + struct ip_vs_rht *t, *p; - for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry_safe(svc, n, &ipvs->svc_table[idx], - s_list) - ip_vs_unlink_service(svc, cleanup); + /* Stop the resizer and drop the tables */ + if (!test_and_set_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags)) + cancel_delayed_work_sync(&ipvs->svc_resize_work); + /* No resizer, so now we have exclusive write access */ + + if (ip_vs_get_num_services(ipvs)) { + ip_vs_rht_walk_buckets(ipvs->svc_table, head) { + hlist_bl_for_each_entry_safe(svc, e, ne, head, s_list) + ip_vs_unlink_service(svc, cleanup); + } + } + + /* Unregister the hash table and release it after RCU grace period */ + t = rcu_dereference_protected(ipvs->svc_table, 1); + if (t) { + rcu_assign_pointer(ipvs->svc_table, NULL); + while (1) { + p = rcu_dereference_protected(t->new_tbl, 1); + call_rcu(&t->rcu_head, ip_vs_rht_rcu_free); + if (p == t) + break; + t = p; + } } return 0; } @@ -1742,19 +2104,44 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct netns_ipvs *ipvs = net_ipvs(net); + DECLARE_IP_VS_RHT_WALK_BUCKETS_RCU(); + unsigned int resched_score = 0; + struct hlist_bl_head *head; struct ip_vs_service *svc; + struct hlist_bl_node *e; struct ip_vs_dest *dest; - unsigned int idx; + int old_gen, new_gen; if (event != NETDEV_DOWN || !ipvs) return NOTIFY_DONE; IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); + + old_gen = atomic_read(&ipvs->svc_table_changes); + rcu_read_lock(); - for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry_rcu(svc, &ipvs->svc_table[idx], s_list) + +repeat: + smp_rmb(); /* ipvs->svc_table and svc_table_changes */ + ip_vs_rht_walk_buckets_rcu(ipvs->svc_table, head) { + hlist_bl_for_each_entry_rcu(svc, e, head, s_list) { list_for_each_entry_rcu(dest, &svc->destinations, - n_list) + n_list) { ip_vs_forget_dev(dest, dev); + resched_score += 10; + } + resched_score++; + } + resched_score++; + if (resched_score >= 100) { + resched_score = 0; + cond_resched_rcu(); + new_gen = atomic_read(&ipvs->svc_table_changes); + /* New table installed ? */ + if (old_gen != new_gen) { + old_gen = new_gen; + goto repeat; + } + } } rcu_read_unlock(); @@ -1777,14 +2164,28 @@ static int ip_vs_zero_service(struct ip_vs_service *svc) static int ip_vs_zero_all(struct netns_ipvs *ipvs) { - int idx; + DECLARE_IP_VS_RHT_WALK_BUCKETS_RCU(); + unsigned int resched_score = 0; + struct hlist_bl_head *head; struct ip_vs_service *svc; + struct hlist_bl_node *e; - for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry(svc, &ipvs->svc_table[idx], s_list) + rcu_read_lock(); + + ip_vs_rht_walk_buckets_rcu(ipvs->svc_table, head) { + hlist_bl_for_each_entry_rcu(svc, e, head, s_list) { ip_vs_zero_service(svc); + resched_score += 10; + } + resched_score++; + if (resched_score >= 100) { + resched_score = 0; + cond_resched_rcu(); + } } + rcu_read_unlock(); + ip_vs_zero_stats(&ipvs->tot_stats->s); return 0; } @@ -2218,7 +2619,8 @@ static struct ctl_table vs_vars[] = { struct ip_vs_iter { struct seq_net_private p; /* Do not move this, netns depends upon it*/ - int bucket; + struct ip_vs_rht *t; + u32 bucket; }; /* @@ -2239,17 +2641,23 @@ static inline const char *ip_vs_fwd_name(unsigned int flags) } } - +/* Do not expect consistent view during add, del and move(table resize). + * We may miss entries and even show duplicates. + */ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) { - struct net *net = seq_file_net(seq); - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_iter *iter = seq->private; - int idx; + struct ip_vs_rht *t = iter->t; struct ip_vs_service *svc; + struct hlist_bl_node *e; + int idx; - for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry_rcu(svc, &ipvs->svc_table[idx], s_list) { + if (!t) + return NULL; + for (idx = 0; idx < t->size; idx++) { + hlist_bl_for_each_entry_rcu(svc, e, &t->buckets[idx], s_list) { + if (!ip_vs_rht_same_table(t, READ_ONCE(svc->hash_key))) + break; if (pos-- == 0) { iter->bucket = idx; return svc; @@ -2262,18 +2670,22 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { + struct ip_vs_iter *iter = seq->private; + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); + rcu_read_lock(); + iter->t = rcu_dereference(ipvs->svc_table); return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN; } static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct hlist_node *e; - struct ip_vs_iter *iter; struct ip_vs_service *svc; - struct net *net = seq_file_net(seq); - struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_iter *iter; + struct hlist_bl_node *e; + struct ip_vs_rht *t; ++*pos; if (v == SEQ_START_TOKEN) @@ -2281,15 +2693,22 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) svc = v; iter = seq->private; + t = iter->t; + if (!t) + return NULL; - e = rcu_dereference(hlist_next_rcu(&svc->s_list)); - if (e) - return hlist_entry(e, struct ip_vs_service, s_list); + hlist_bl_for_each_entry_continue_rcu(svc, e, s_list) { + /* Our cursor was moved to new table ? */ + if (!ip_vs_rht_same_table(t, READ_ONCE(svc->hash_key))) + break; + return svc; + } - while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { - hlist_for_each_entry_rcu(svc, - &ipvs->svc_table[iter->bucket], - s_list) { + while (++iter->bucket < t->size) { + hlist_bl_for_each_entry_rcu(svc, e, &t->buckets[iter->bucket], + s_list) { + if (!ip_vs_rht_same_table(t, READ_ONCE(svc->hash_key))) + break; return svc; } } @@ -2770,13 +3189,18 @@ __ip_vs_get_service_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_services *get, struct ip_vs_get_services __user *uptr) { - int idx, count=0; - struct ip_vs_service *svc; struct ip_vs_service_entry entry; + DECLARE_IP_VS_RHT_WALK_BUCKETS(); + struct hlist_bl_head *head; + struct ip_vs_service *svc; + struct hlist_bl_node *e; + int count = 0; int ret = 0; - for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry(svc, &ipvs->svc_table[idx], s_list) { + lockdep_assert_held(&ipvs->svc_resize_sem); + /* All service modifications are disabled, go ahead */ + ip_vs_rht_walk_buckets(ipvs->svc_table, head) { + hlist_bl_for_each_entry(svc, e, head, s_list) { /* Only expose IPv4 entries to old interface */ if (svc->af != AF_INET) continue; @@ -2948,6 +3372,35 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } + if (cmd == IP_VS_SO_GET_SERVICES) { + struct ip_vs_get_services *get; + size_t size; + + get = (struct ip_vs_get_services *)arg; + size = struct_size(get, entrytable, get->num_services); + if (*len != size) { + pr_err("length: %u != %zu\n", *len, size); + return -EINVAL; + } + /* Protect against table resizer moving the entries. + * Try reverse locking, so that we do not hold the mutex + * while waiting for semaphore. + */ + while (1) { + ret = down_read_killable(&ipvs->svc_resize_sem); + if (ret < 0) + return ret; + if (mutex_trylock(&ipvs->service_mutex)) + break; + up_read(&ipvs->svc_resize_sem); + cond_resched(); + } + ret = __ip_vs_get_service_entries(ipvs, get, user); + up_read(&ipvs->svc_resize_sem); + mutex_unlock(&ipvs->service_mutex); + return ret; + } + mutex_lock(&ipvs->service_mutex); switch (cmd) { case IP_VS_SO_GET_VERSION: @@ -2976,22 +3429,6 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) } break; - case IP_VS_SO_GET_SERVICES: - { - struct ip_vs_get_services *get; - size_t size; - - get = (struct ip_vs_get_services *)arg; - size = struct_size(get, entrytable, get->num_services); - if (*len != size) { - pr_err("length: %u != %zu\n", *len, size); - ret = -EINVAL; - goto out; - } - ret = __ip_vs_get_service_entries(ipvs, get, user); - } - break; - case IP_VS_SO_GET_SERVICE: { struct ip_vs_service_entry *entry; @@ -3277,15 +3714,19 @@ nla_put_failure: static int ip_vs_genl_dump_services(struct sk_buff *skb, struct netlink_callback *cb) { - int idx = 0, i; - int start = cb->args[0]; - struct ip_vs_service *svc; + DECLARE_IP_VS_RHT_WALK_BUCKETS_SAFE_RCU(); struct net *net = sock_net(skb->sk); struct netns_ipvs *ipvs = net_ipvs(net); + struct hlist_bl_head *head; + struct ip_vs_service *svc; + struct hlist_bl_node *e; + int start = cb->args[0]; + int idx = 0; + down_read(&ipvs->svc_resize_sem); rcu_read_lock(); - for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { - hlist_for_each_entry_rcu(svc, &ipvs->svc_table[i], s_list) { + ip_vs_rht_walk_buckets_safe_rcu(ipvs->svc_table, head) { + hlist_bl_for_each_entry_rcu(svc, e, head, s_list) { if (++idx <= start) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { @@ -3297,6 +3738,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, nla_put_failure: rcu_read_unlock(); + up_read(&ipvs->svc_resize_sem); cb->args[0] = idx; return skb->len; @@ -4306,8 +4748,10 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) /* Initialize service_mutex, svc_table per netns */ __mutex_init(&ipvs->service_mutex, "ipvs->service_mutex", &__ipvs_service_key); - for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) - INIT_HLIST_HEAD(&ipvs->svc_table[idx]); + init_rwsem(&ipvs->svc_resize_sem); + INIT_DELAYED_WORK(&ipvs->svc_resize_work, svc_resize_work_handler); + atomic_set(&ipvs->svc_table_changes, 0); + RCU_INIT_POINTER(ipvs->svc_table, NULL); /* Initialize rs_table */ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) @@ -4326,6 +4770,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) } INIT_DELAYED_WORK(&ipvs->est_reload_work, est_reload_work_handler); + ipvs->sysctl_svc_lfactor = ip_vs_svc_default_load_factor(ipvs); /* procfs stats */ ipvs->tot_stats = kzalloc_obj(*ipvs->tot_stats); From 2fa7cc9c70254d42a82bf82827d8d20cafe975d2 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 3 Mar 2026 23:04:07 +0200 Subject: [PATCH 0260/1561] ipvs: switch to per-net connection table Use per-net resizable hash table for connections. The global table is slow to walk when using many namespaces. The table can be resized in the range of [256 - ip_vs_conn_tab_size]. Table is attached only while services are present. Resizing is done by delayed work based on load (the number of connections). Add a hash_key field into the connection to store the table ID in the highest bit and the entry's hash value in the lowest bits. The lowest part of the hash value is used as bucket ID, the remaining part is used to filter the entries in the bucket before matching the keys and as result, helps the lookup operation to access only one cache line. By knowing the table ID and bucket ID for entry, we can unlink it without calculating the hash value and doing lookup by keys. We need only to validate the saved hash_key under lock. For better security switch from jhash to siphash for the default connection hashing but the persistence engines may use their own function. Keeping the hash table loaded with entries below the size (12%) allows to avoid collision for 96+% of the conns. ip_vs_conn_fill_cport() now will rehash the connection with proper locking because unhash+hash is not safe for RCU readers. To invalidate the templates setting just dport to 0xffff is enough, no need to rehash them. As result, ip_vs_conn_unhash() is now unused and removed. Signed-off-by: Julian Anastasov Signed-off-by: Florian Westphal --- include/net/ip_vs.h | 34 +- net/netfilter/ipvs/ip_vs_conn.c | 851 +++++++++++++++++++++--------- net/netfilter/ipvs/ip_vs_ctl.c | 18 + net/netfilter/ipvs/ip_vs_pe_sip.c | 4 +- net/netfilter/ipvs/ip_vs_sync.c | 23 + 5 files changed, 668 insertions(+), 262 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 663ad6ad9518..3d595bd99eb3 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -36,6 +36,14 @@ #define IP_VS_HDR_INVERSE 1 #define IP_VS_HDR_ICMP 2 +/* conn_tab limits (as per Kconfig) */ +#define IP_VS_CONN_TAB_MIN_BITS 8 +#if BITS_PER_LONG > 32 +#define IP_VS_CONN_TAB_MAX_BITS 27 +#else +#define IP_VS_CONN_TAB_MAX_BITS 20 +#endif + /* svc_table limits */ #define IP_VS_SVC_TAB_MIN_BITS 4 #define IP_VS_SVC_TAB_MAX_BITS 20 @@ -289,6 +297,7 @@ static inline int ip_vs_af_index(int af) enum { IP_VS_WORK_SVC_RESIZE, /* Schedule svc_resize_work */ IP_VS_WORK_SVC_NORESIZE, /* Stopping svc_resize_work */ + IP_VS_WORK_CONN_RESIZE, /* Schedule conn_resize_work */ }; /* The port number of FTP service (in network order). */ @@ -779,18 +788,19 @@ struct ip_vs_conn_param { /* IP_VS structure allocated for each dynamically scheduled connection */ struct ip_vs_conn { - struct hlist_node c_list; /* hashed list heads */ + struct hlist_bl_node c_list; /* node in conn_tab */ + __u32 hash_key; /* Key for the hash table */ /* Protocol, addresses and port numbers */ __be16 cport; __be16 dport; __be16 vport; u16 af; /* address family */ + __u16 protocol; /* Which protocol (TCP/UDP) */ + __u16 daf; /* Address family of the dest */ union nf_inet_addr caddr; /* client address */ union nf_inet_addr vaddr; /* virtual address */ union nf_inet_addr daddr; /* destination address */ volatile __u32 flags; /* status flags */ - __u16 protocol; /* Which protocol (TCP/UDP) */ - __u16 daf; /* Address family of the dest */ struct netns_ipvs *ipvs; /* counter and timer */ @@ -1009,8 +1019,8 @@ struct ip_vs_pe { int (*fill_param)(struct ip_vs_conn_param *p, struct sk_buff *skb); bool (*ct_match)(const struct ip_vs_conn_param *p, struct ip_vs_conn *ct); - u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval, - bool inverse); + u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, + struct ip_vs_rht *t, bool inverse); int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf); /* create connections for real-server outgoing packets */ struct ip_vs_conn* (*conn_out)(struct ip_vs_service *svc, @@ -1150,6 +1160,7 @@ struct netns_ipvs { /* ip_vs_conn */ atomic_t conn_count; /* connection counter */ atomic_t no_cport_conns[IP_VS_AF_MAX]; + struct delayed_work conn_resize_work;/* resize conn_tab */ /* ip_vs_ctl */ struct ip_vs_stats_rcu *tot_stats; /* Statistics & est. */ @@ -1226,6 +1237,7 @@ struct netns_ipvs { int sysctl_est_nice; /* kthread nice */ int est_stopped; /* stop tasks */ #endif + int sysctl_conn_lfactor; int sysctl_svc_lfactor; /* ip_vs_lblc */ @@ -1269,6 +1281,8 @@ struct netns_ipvs { unsigned int hooks_afmask; /* &1=AF_INET, &2=AF_INET6 */ struct ip_vs_rht __rcu *svc_table; /* Services */ + struct ip_vs_rht __rcu *conn_tab; /* Connections */ + atomic_t conn_tab_changes;/* ++ on new table */ }; #define DEFAULT_SYNC_THRESHOLD 3 @@ -1518,6 +1532,12 @@ static inline int sysctl_est_nice(struct netns_ipvs *ipvs) #endif +/* Get load factor to map conn_count/u_thresh to t->size */ +static inline int sysctl_conn_lfactor(struct netns_ipvs *ipvs) +{ + return READ_ONCE(ipvs->sysctl_conn_lfactor); +} + /* Get load factor to map num_services/u_thresh to t->size * Smaller value decreases u_thresh to reduce collisions but increases * the table size @@ -1603,6 +1623,10 @@ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp) } void ip_vs_conn_put(struct ip_vs_conn *cp); void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport); +int ip_vs_conn_desired_size(struct netns_ipvs *ipvs, struct ip_vs_rht *t, + int lfactor); +struct ip_vs_rht *ip_vs_conn_tab_alloc(struct netns_ipvs *ipvs, int buckets, + int lfactor); struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, const union nf_inet_addr *daddr, diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index a6fd3b64428f..07a47e525f01 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -47,28 +47,12 @@ static int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS; module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444); MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size"); -/* size and mask values */ +/* Max table size */ int ip_vs_conn_tab_size __read_mostly; -static int ip_vs_conn_tab_mask __read_mostly; - -/* - * Connection hash table: for input and output packets lookups of IPVS - */ -static struct hlist_head *ip_vs_conn_tab __read_mostly; /* SLAB cache for IPVS connections */ static struct kmem_cache *ip_vs_conn_cachep __read_mostly; -/* random value for IPVS connection hash */ -static unsigned int ip_vs_conn_rnd __read_mostly; - -/* - * Fine locking granularity for big connection hash table - */ -#define CT_LOCKARRAY_BITS 5 -#define CT_LOCKARRAY_SIZE (1<lock protects conn fields like cp->flags, cp->dest + */ -static inline void ct_write_lock_bh(unsigned int key) +/* Lock conn_tab bucket for conn hash/unhash, not for rehash */ +static __always_inline void +conn_tab_lock(struct ip_vs_rht *t, struct ip_vs_conn *cp, u32 hash_key, + bool new_hash, struct hlist_bl_head **head_ret) { - spin_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); + struct hlist_bl_head *head; + u32 hash_key_new; + + if (!new_hash) { + /* We need to lock the bucket in the right table */ + +retry: + if (!ip_vs_rht_same_table(t, hash_key)) { + /* It is already moved to new table */ + t = rcu_dereference(t->new_tbl); + } + } + + head = t->buckets + (hash_key & t->mask); + + local_bh_disable(); + /* Do not touch seqcount, this is a safe operation */ + + hlist_bl_lock(head); + if (!new_hash) { + /* Ensure hash_key is read under lock */ + hash_key_new = READ_ONCE(cp->hash_key); + /* Hash changed ? */ + if (hash_key != hash_key_new) { + hlist_bl_unlock(head); + local_bh_enable(); + hash_key = hash_key_new; + goto retry; + } + } + *head_ret = head; } -static inline void ct_write_unlock_bh(unsigned int key) +static inline void conn_tab_unlock(struct hlist_bl_head *head) { - spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); + hlist_bl_unlock(head); + local_bh_enable(); } static void ip_vs_conn_expire(struct timer_list *t); @@ -95,30 +122,31 @@ static void ip_vs_conn_expire(struct timer_list *t); /* * Returns hash value for IPVS connection entry */ -static unsigned int ip_vs_conn_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto, - const union nf_inet_addr *addr, - __be16 port) +static u32 ip_vs_conn_hashkey(struct ip_vs_rht *t, int af, unsigned int proto, + const union nf_inet_addr *addr, __be16 port) { + u64 a = (u32)proto << 16 | (__force u32)port; + #ifdef CONFIG_IP_VS_IPV6 - if (af == AF_INET6) - return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), - (__force u32)port, proto, ip_vs_conn_rnd) ^ - ((size_t)ipvs>>8)) & ip_vs_conn_tab_mask; + if (af == AF_INET6) { + u64 b = (u64)addr->all[0] << 32 | addr->all[1]; + u64 c = (u64)addr->all[2] << 32 | addr->all[3]; + + return (u32)siphash_3u64(a, b, c, &t->hash_key); + } #endif - return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto, - ip_vs_conn_rnd) ^ - ((size_t)ipvs>>8)) & ip_vs_conn_tab_mask; + a |= (u64)addr->all[0] << 32; + return (u32)siphash_1u64(a, &t->hash_key); } static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, - bool inverse) + struct ip_vs_rht *t, bool inverse) { const union nf_inet_addr *addr; __be16 port; if (p->pe_data && p->pe->hashkey_raw) - return p->pe->hashkey_raw(p, ip_vs_conn_rnd, inverse) & - ip_vs_conn_tab_mask; + return p->pe->hashkey_raw(p, t, inverse); if (likely(!inverse)) { addr = p->caddr; @@ -128,10 +156,11 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, port = p->vport; } - return ip_vs_conn_hashkey(p->ipvs, p->af, p->protocol, addr, port); + return ip_vs_conn_hashkey(t, p->af, p->protocol, addr, port); } -static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) +static unsigned int ip_vs_conn_hashkey_conn(struct ip_vs_rht *t, + const struct ip_vs_conn *cp) { struct ip_vs_conn_param p; @@ -144,31 +173,36 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) p.pe_data_len = cp->pe_data_len; } - return ip_vs_conn_hashkey_param(&p, false); + return ip_vs_conn_hashkey_param(&p, t, false); } -/* - * Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port. +/* Hashes ip_vs_conn in conn_tab * returns bool success. */ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) { - unsigned int hash; + struct netns_ipvs *ipvs = cp->ipvs; + struct hlist_bl_head *head; + struct ip_vs_rht *t; + u32 hash_key; int ret; if (cp->flags & IP_VS_CONN_F_ONE_PACKET) return 0; - /* Hash by protocol, client address and port */ - hash = ip_vs_conn_hashkey_conn(cp); + /* New entries go into recent table */ + t = rcu_dereference(ipvs->conn_tab); + t = rcu_dereference(t->new_tbl); - ct_write_lock_bh(hash); + hash_key = ip_vs_rht_build_hash_key(t, ip_vs_conn_hashkey_conn(t, cp)); + conn_tab_lock(t, cp, hash_key, true /* new_hash */, &head); spin_lock(&cp->lock); if (!(cp->flags & IP_VS_CONN_F_HASHED)) { cp->flags |= IP_VS_CONN_F_HASHED; + WRITE_ONCE(cp->hash_key, hash_key); refcount_inc(&cp->refcnt); - hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]); + hlist_bl_add_head_rcu(&cp->c_list, head); ret = 1; } else { pr_err("%s(): request for already hashed, called from %pS\n", @@ -177,75 +211,58 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) } spin_unlock(&cp->lock); - ct_write_unlock_bh(hash); + conn_tab_unlock(head); + + /* Schedule resizing if load increases */ + if (atomic_read(&ipvs->conn_count) > t->u_thresh && + !test_and_set_bit(IP_VS_WORK_CONN_RESIZE, &ipvs->work_flags)) + mod_delayed_work(system_unbound_wq, &ipvs->conn_resize_work, 0); return ret; } - -/* - * UNhashes ip_vs_conn from ip_vs_conn_tab. - * returns bool success. Caller should hold conn reference. - */ -static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) -{ - unsigned int hash; - int ret; - - /* unhash it and decrease its reference counter */ - hash = ip_vs_conn_hashkey_conn(cp); - - ct_write_lock_bh(hash); - spin_lock(&cp->lock); - - if (cp->flags & IP_VS_CONN_F_HASHED) { - hlist_del_rcu(&cp->c_list); - cp->flags &= ~IP_VS_CONN_F_HASHED; - refcount_dec(&cp->refcnt); - ret = 1; - } else - ret = 0; - - spin_unlock(&cp->lock); - ct_write_unlock_bh(hash); - - return ret; -} - -/* Try to unlink ip_vs_conn from ip_vs_conn_tab. +/* Try to unlink ip_vs_conn from conn_tab. * returns bool success. */ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) { - unsigned int hash; + struct netns_ipvs *ipvs = cp->ipvs; + struct hlist_bl_head *head; + struct ip_vs_rht *t; bool ret = false; + u32 hash_key; if (cp->flags & IP_VS_CONN_F_ONE_PACKET) return refcount_dec_if_one(&cp->refcnt); - hash = ip_vs_conn_hashkey_conn(cp); + rcu_read_lock(); - ct_write_lock_bh(hash); + t = rcu_dereference(ipvs->conn_tab); + hash_key = READ_ONCE(cp->hash_key); + + conn_tab_lock(t, cp, hash_key, false /* new_hash */, &head); spin_lock(&cp->lock); if (cp->flags & IP_VS_CONN_F_HASHED) { /* Decrease refcnt and unlink conn only if we are last user */ if (refcount_dec_if_one(&cp->refcnt)) { - hlist_del_rcu(&cp->c_list); + hlist_bl_del_rcu(&cp->c_list); cp->flags &= ~IP_VS_CONN_F_HASHED; ret = true; } } spin_unlock(&cp->lock); - ct_write_unlock_bh(hash); + conn_tab_unlock(head); + + rcu_read_unlock(); return ret; } /* - * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. + * Gets ip_vs_conn associated with supplied parameters in the conn_tab. * Called for pkts coming from OUTside-to-INside. * p->caddr, p->cport: pkt source address (foreign host) * p->vaddr, p->vport: pkt dest address (load balancer) @@ -253,26 +270,38 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) static inline struct ip_vs_conn * __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) { - unsigned int hash; + DECLARE_IP_VS_RHT_WALK_BUCKET_RCU(); + struct netns_ipvs *ipvs = p->ipvs; + struct hlist_bl_head *head; + struct ip_vs_rht *t, *pt; + struct hlist_bl_node *e; struct ip_vs_conn *cp; - - hash = ip_vs_conn_hashkey_param(p, false); + u32 hash, hash_key; rcu_read_lock(); - hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { - if (p->cport == cp->cport && p->vport == cp->vport && - cp->af == p->af && - ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && - ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && - ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && - p->protocol == cp->protocol && - cp->ipvs == p->ipvs) { - if (!__ip_vs_conn_get(cp)) - continue; - /* HIT */ - rcu_read_unlock(); - return cp; + ip_vs_rht_for_each_table_rcu(ipvs->conn_tab, t, pt) { + hash = ip_vs_conn_hashkey_param(p, t, false); + hash_key = ip_vs_rht_build_hash_key(t, hash); + ip_vs_rht_walk_bucket_rcu(t, hash_key, head) { + hlist_bl_for_each_entry_rcu(cp, e, head, c_list) { + if (READ_ONCE(cp->hash_key) == hash_key && + p->cport == cp->cport && + p->vport == cp->vport && cp->af == p->af && + ip_vs_addr_equal(p->af, p->caddr, + &cp->caddr) && + ip_vs_addr_equal(p->af, p->vaddr, + &cp->vaddr) && + (!p->cport ^ + (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && + p->protocol == cp->protocol) { + if (__ip_vs_conn_get(cp)) { + /* HIT */ + rcu_read_unlock(); + return cp; + } + } + } } } @@ -345,37 +374,50 @@ EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto); /* Get reference to connection template */ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) { - unsigned int hash; + DECLARE_IP_VS_RHT_WALK_BUCKET_RCU(); + struct netns_ipvs *ipvs = p->ipvs; + struct hlist_bl_head *head; + struct ip_vs_rht *t, *pt; + struct hlist_bl_node *e; struct ip_vs_conn *cp; - - hash = ip_vs_conn_hashkey_param(p, false); + u32 hash, hash_key; rcu_read_lock(); - hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { - if (unlikely(p->pe_data && p->pe->ct_match)) { - if (cp->ipvs != p->ipvs) - continue; - if (p->pe == cp->pe && p->pe->ct_match(p, cp)) { - if (__ip_vs_conn_get(cp)) - goto out; + ip_vs_rht_for_each_table_rcu(ipvs->conn_tab, t, pt) { + hash = ip_vs_conn_hashkey_param(p, t, false); + hash_key = ip_vs_rht_build_hash_key(t, hash); + ip_vs_rht_walk_bucket_rcu(t, hash_key, head) { + hlist_bl_for_each_entry_rcu(cp, e, head, c_list) { + if (READ_ONCE(cp->hash_key) != hash_key) + continue; + if (unlikely(p->pe_data && p->pe->ct_match)) { + if (p->pe == cp->pe && + p->pe->ct_match(p, cp) && + __ip_vs_conn_get(cp)) + goto out; + continue; + } + if (cp->af == p->af && + ip_vs_addr_equal(p->af, p->caddr, + &cp->caddr) && + /* protocol should only be IPPROTO_IP if + * p->vaddr is a fwmark + */ + ip_vs_addr_equal(p->protocol == IPPROTO_IP ? + AF_UNSPEC : p->af, + p->vaddr, &cp->vaddr) && + p->vport == cp->vport && + p->cport == cp->cport && + cp->flags & IP_VS_CONN_F_TEMPLATE && + p->protocol == cp->protocol && + cp->dport != htons(0xffff)) { + if (__ip_vs_conn_get(cp)) + goto out; + } } - continue; } - if (cp->af == p->af && - ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && - /* protocol should only be IPPROTO_IP if - * p->vaddr is a fwmark */ - ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC : - p->af, p->vaddr, &cp->vaddr) && - p->vport == cp->vport && p->cport == cp->cport && - cp->flags & IP_VS_CONN_F_TEMPLATE && - p->protocol == cp->protocol && - cp->ipvs == p->ipvs) { - if (__ip_vs_conn_get(cp)) - goto out; - } } cp = NULL; @@ -391,58 +433,64 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) return cp; } -/* Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. +/* Gets ip_vs_conn associated with supplied parameters in the conn_tab. * Called for pkts coming from inside-to-OUTside. * p->caddr, p->cport: pkt source address (inside host) * p->vaddr, p->vport: pkt dest address (foreign host) */ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) { - unsigned int hash; - struct ip_vs_conn *cp, *ret=NULL; + DECLARE_IP_VS_RHT_WALK_BUCKET_RCU(); + struct netns_ipvs *ipvs = p->ipvs; const union nf_inet_addr *saddr; + struct hlist_bl_head *head; + struct ip_vs_rht *t, *pt; + struct hlist_bl_node *e; + struct ip_vs_conn *cp; + u32 hash, hash_key; __be16 sport; - /* - * Check for "full" addressed entries - */ - hash = ip_vs_conn_hashkey_param(p, true); - rcu_read_lock(); - hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { - if (p->vport != cp->cport) - continue; + ip_vs_rht_for_each_table_rcu(ipvs->conn_tab, t, pt) { + hash = ip_vs_conn_hashkey_param(p, t, true); + hash_key = ip_vs_rht_build_hash_key(t, hash); + ip_vs_rht_walk_bucket_rcu(t, hash_key, head) { + hlist_bl_for_each_entry_rcu(cp, e, head, c_list) { + if (READ_ONCE(cp->hash_key) != hash_key || + p->vport != cp->cport) + continue; - if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { - sport = cp->vport; - saddr = &cp->vaddr; - } else { - sport = cp->dport; - saddr = &cp->daddr; - } + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { + sport = cp->vport; + saddr = &cp->vaddr; + } else { + sport = cp->dport; + saddr = &cp->daddr; + } - if (p->cport == sport && cp->af == p->af && - ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && - ip_vs_addr_equal(p->af, p->caddr, saddr) && - p->protocol == cp->protocol && - cp->ipvs == p->ipvs) { - if (!__ip_vs_conn_get(cp)) - continue; - /* HIT */ - ret = cp; - break; + if (p->cport == sport && cp->af == p->af && + ip_vs_addr_equal(p->af, p->vaddr, + &cp->caddr) && + ip_vs_addr_equal(p->af, p->caddr, saddr) && + p->protocol == cp->protocol) { + if (__ip_vs_conn_get(cp)) + goto out; + } + } } } + cp = NULL; +out: rcu_read_unlock(); IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n", ip_vs_proto_name(p->protocol), IP_VS_DBG_ADDR(p->af, p->caddr), ntohs(p->cport), IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport), - ret ? "hit" : "not hit"); + cp ? "hit" : "not hit"); - return ret; + return cp; } struct ip_vs_conn * @@ -487,23 +535,261 @@ void ip_vs_conn_put(struct ip_vs_conn *cp) */ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport) { - if (ip_vs_conn_unhash(cp)) { - struct netns_ipvs *ipvs = cp->ipvs; - int af_id = ip_vs_af_index(cp->af); + struct hlist_bl_head *head, *head2, *head_new; + struct netns_ipvs *ipvs = cp->ipvs; + int af_id = ip_vs_af_index(cp->af); + u32 hash_r = 0, hash_key_r = 0; + struct ip_vs_rht *t, *tp, *t2; + u32 hash_key, hash_key_new; + struct ip_vs_conn_param p; + int ntbl; - spin_lock_bh(&cp->lock); - if (cp->flags & IP_VS_CONN_F_NO_CPORT) { - atomic_dec(&ipvs->no_cport_conns[af_id]); - cp->flags &= ~IP_VS_CONN_F_NO_CPORT; - cp->cport = cport; - } - spin_unlock_bh(&cp->lock); + ip_vs_conn_fill_param(ipvs, cp->af, cp->protocol, &cp->caddr, + cport, &cp->vaddr, cp->vport, &p); + ntbl = 0; - /* hash on new dport */ - ip_vs_conn_hash(cp); + /* Attempt to rehash cp safely, by informing seqcount readers */ + t = rcu_dereference(ipvs->conn_tab); + hash_key = READ_ONCE(cp->hash_key); + tp = NULL; + +retry: + /* Moved to new table ? */ + if (!ip_vs_rht_same_table(t, hash_key)) { + t = rcu_dereference(t->new_tbl); + ntbl++; + /* We are lost? */ + if (ntbl >= 2) + return; } + + /* Rehashing during resize? Use the recent table for adds */ + t2 = rcu_dereference(t->new_tbl); + /* Calc new hash once per table */ + if (tp != t2) { + hash_r = ip_vs_conn_hashkey_param(&p, t2, false); + hash_key_r = ip_vs_rht_build_hash_key(t2, hash_r); + tp = t2; + } + head = t->buckets + (hash_key & t->mask); + head2 = t2->buckets + (hash_key_r & t2->mask); + head_new = head2; + + if (head > head2 && t == t2) + swap(head, head2); + + /* Lock seqcount only for the old bucket, even if we are on new table + * because it affects the del operation, not the adding. + */ + spin_lock_bh(&t->lock[hash_key & t->lock_mask].l); + preempt_disable_nested(); + write_seqcount_begin(&t->seqc[hash_key & t->seqc_mask]); + + /* Lock buckets in same (increasing) order */ + hlist_bl_lock(head); + if (head != head2) + hlist_bl_lock(head2); + + /* Ensure hash_key is read under lock */ + hash_key_new = READ_ONCE(cp->hash_key); + /* Racing with another rehashing ? */ + if (unlikely(hash_key != hash_key_new)) { + if (head != head2) + hlist_bl_unlock(head2); + hlist_bl_unlock(head); + write_seqcount_end(&t->seqc[hash_key & t->seqc_mask]); + preempt_enable_nested(); + spin_unlock_bh(&t->lock[hash_key & t->lock_mask].l); + hash_key = hash_key_new; + goto retry; + } + + spin_lock(&cp->lock); + if ((cp->flags & IP_VS_CONN_F_NO_CPORT) && + (cp->flags & IP_VS_CONN_F_HASHED)) { + /* We do not recalc hash_key_r under lock, we assume the + * parameters in cp do not change, i.e. cport is + * the only possible change. + */ + WRITE_ONCE(cp->hash_key, hash_key_r); + if (head != head2) { + hlist_bl_del_rcu(&cp->c_list); + hlist_bl_add_head_rcu(&cp->c_list, head_new); + } + atomic_dec(&ipvs->no_cport_conns[af_id]); + cp->flags &= ~IP_VS_CONN_F_NO_CPORT; + cp->cport = cport; + } + spin_unlock(&cp->lock); + + if (head != head2) + hlist_bl_unlock(head2); + hlist_bl_unlock(head); + write_seqcount_end(&t->seqc[hash_key & t->seqc_mask]); + preempt_enable_nested(); + spin_unlock_bh(&t->lock[hash_key & t->lock_mask].l); } +/* Get default load factor to map conn_count/u_thresh to t->size */ +static int ip_vs_conn_default_load_factor(struct netns_ipvs *ipvs) +{ + int factor; + + if (net_eq(ipvs->net, &init_net)) + factor = -3; + else + factor = -1; + return factor; +} + +/* Get the desired conn_tab size */ +int ip_vs_conn_desired_size(struct netns_ipvs *ipvs, struct ip_vs_rht *t, + int lfactor) +{ + return ip_vs_rht_desired_size(ipvs, t, atomic_read(&ipvs->conn_count), + lfactor, IP_VS_CONN_TAB_MIN_BITS, + ip_vs_conn_tab_bits); +} + +/* Allocate conn_tab */ +struct ip_vs_rht *ip_vs_conn_tab_alloc(struct netns_ipvs *ipvs, int buckets, + int lfactor) +{ + struct ip_vs_rht *t; + int scounts, locks; + + /* scounts: affects readers during resize */ + scounts = clamp(buckets >> 6, 1, 256); + /* locks: based on parallel IP_VS_CONN_F_NO_CPORT operations + resize */ + locks = clamp(8, 1, scounts); + + t = ip_vs_rht_alloc(buckets, scounts, locks); + if (!t) + return NULL; + t->lfactor = lfactor; + ip_vs_rht_set_thresholds(t, t->size, lfactor, IP_VS_CONN_TAB_MIN_BITS, + ip_vs_conn_tab_bits); + return t; +} + +/* conn_tab resizer work */ +static void conn_resize_work_handler(struct work_struct *work) +{ + struct hlist_bl_head *head, *head2; + unsigned int resched_score = 0; + struct hlist_bl_node *cn, *nn; + struct ip_vs_rht *t, *t_new; + struct netns_ipvs *ipvs; + struct ip_vs_conn *cp; + bool more_work = false; + u32 hash, hash_key; + int limit = 0; + int new_size; + int lfactor; + u32 bucket; + + ipvs = container_of(work, struct netns_ipvs, conn_resize_work.work); + + /* Allow work to be queued again */ + clear_bit(IP_VS_WORK_CONN_RESIZE, &ipvs->work_flags); + t = rcu_dereference_protected(ipvs->conn_tab, 1); + /* Do nothing if table is removed */ + if (!t) + goto out; + /* New table needs to be registered? BUG! */ + if (t != rcu_dereference_protected(t->new_tbl, 1)) + goto out; + + lfactor = sysctl_conn_lfactor(ipvs); + /* Should we resize ? */ + new_size = ip_vs_conn_desired_size(ipvs, t, lfactor); + if (new_size == t->size && lfactor == t->lfactor) + goto out; + + t_new = ip_vs_conn_tab_alloc(ipvs, new_size, lfactor); + if (!t_new) { + more_work = true; + goto out; + } + /* Flip the table_id */ + t_new->table_id = t->table_id ^ IP_VS_RHT_TABLE_ID_MASK; + + rcu_assign_pointer(t->new_tbl, t_new); + + /* Wait RCU readers to see the new table, we do not want new + * conns to go into old table and to be left there. + */ + synchronize_rcu(); + + ip_vs_rht_for_each_bucket(t, bucket, head) { +same_bucket: + if (++limit >= 16) { + if (resched_score >= 100) { + resched_score = 0; + cond_resched(); + } + limit = 0; + } + if (hlist_bl_empty(head)) { + resched_score++; + continue; + } + /* Preemption calls ahead... */ + resched_score = 0; + + /* seqcount_t usage considering PREEMPT_RT rules: + * - other writers (SoftIRQ) => serialize with spin_lock_bh + * - readers (SoftIRQ) => disable BHs + * - readers (processes) => preemption should be disabled + */ + spin_lock_bh(&t->lock[bucket & t->lock_mask].l); + preempt_disable_nested(); + write_seqcount_begin(&t->seqc[bucket & t->seqc_mask]); + hlist_bl_lock(head); + + hlist_bl_for_each_entry_safe(cp, cn, nn, head, c_list) { + hash = ip_vs_conn_hashkey_conn(t_new, cp); + hash_key = ip_vs_rht_build_hash_key(t_new, hash); + + head2 = t_new->buckets + (hash & t_new->mask); + hlist_bl_lock(head2); + /* t_new->seqc are not used at this stage, we race + * only with add/del, so only lock the bucket. + */ + hlist_bl_del_rcu(&cp->c_list); + WRITE_ONCE(cp->hash_key, hash_key); + hlist_bl_add_head_rcu(&cp->c_list, head2); + hlist_bl_unlock(head2); + /* Too long chain? Do it in steps */ + if (++limit >= 64) + break; + } + + hlist_bl_unlock(head); + write_seqcount_end(&t->seqc[bucket & t->seqc_mask]); + preempt_enable_nested(); + spin_unlock_bh(&t->lock[bucket & t->lock_mask].l); + if (limit >= 64) + goto same_bucket; + } + + rcu_assign_pointer(ipvs->conn_tab, t_new); + /* Inform readers that new table is installed */ + smp_mb__before_atomic(); + atomic_inc(&ipvs->conn_tab_changes); + + /* RCU readers should not see more than two tables in chain. + * To prevent new table to be attached wait here instead of + * freeing the old table in RCU callback. + */ + synchronize_rcu(); + ip_vs_rht_free(t); + +out: + /* Monitor if we need to shrink table */ + queue_delayed_work(system_unbound_wq, &ipvs->conn_resize_work, + more_work ? 1 : 2 * HZ); +} /* * Bind a connection entry with the corresponding packet_xmit. @@ -787,17 +1073,11 @@ int ip_vs_check_template(struct ip_vs_conn *ct, struct ip_vs_dest *cdest) IP_VS_DBG_ADDR(ct->daf, &ct->daddr), ntohs(ct->dport)); - /* - * Invalidate the connection template + /* Invalidate the connection template. Prefer to avoid + * rehashing, it will move it as first in chain, so use + * only dport as indication, it is not a hash key. */ - if (ct->vport != htons(0xffff)) { - if (ip_vs_conn_unhash(ct)) { - ct->dport = htons(0xffff); - ct->vport = htons(0xffff); - ct->cport = 0; - ip_vs_conn_hash(ct); - } - } + ct->dport = htons(0xffff); /* * Simply decrease the refcnt of the template, @@ -938,7 +1218,7 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp) /* - * Create a new connection entry and hash it into the ip_vs_conn_tab + * Create a new connection entry and hash it into the conn_tab */ struct ip_vs_conn * ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, @@ -956,7 +1236,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, return NULL; } - INIT_HLIST_NODE(&cp->c_list); + INIT_HLIST_BL_NODE(&cp->c_list); timer_setup(&cp->timer, ip_vs_conn_expire, 0); cp->ipvs = ipvs; cp->af = p->af; @@ -1040,7 +1320,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, if (ip_vs_conntrack_enabled(ipvs)) cp->flags |= IP_VS_CONN_F_NFCT; - /* Hash it in the ip_vs_conn_tab finally */ + /* Hash it in the conn_tab finally */ ip_vs_conn_hash(cp); return cp; @@ -1052,22 +1332,33 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, #ifdef CONFIG_PROC_FS struct ip_vs_iter_state { struct seq_net_private p; - unsigned int bucket; + struct ip_vs_rht *t; + int gen; + u32 bucket; unsigned int skip_elems; }; -static void *ip_vs_conn_array(struct ip_vs_iter_state *iter) +static void *ip_vs_conn_array(struct seq_file *seq) { - int idx; + struct ip_vs_iter_state *iter = seq->private; + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_rht *t = iter->t; + struct hlist_bl_node *e; struct ip_vs_conn *cp; + int idx; - for (idx = iter->bucket; idx < ip_vs_conn_tab_size; idx++) { + if (!t) + return NULL; + for (idx = iter->bucket; idx < t->size; idx++) { unsigned int skip = 0; - hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { + hlist_bl_for_each_entry_rcu(cp, e, &t->buckets[idx], c_list) { /* __ip_vs_conn_get() is not needed by * ip_vs_conn_seq_show and ip_vs_conn_sync_seq_show */ + if (!ip_vs_rht_same_table(t, READ_ONCE(cp->hash_key))) + break; if (skip >= iter->skip_elems) { iter->bucket = idx; return cp; @@ -1076,8 +1367,13 @@ static void *ip_vs_conn_array(struct ip_vs_iter_state *iter) ++skip; } + if (!(idx & 31)) { + cond_resched_rcu(); + /* New table installed ? */ + if (iter->gen != atomic_read(&ipvs->conn_tab_changes)) + break; + } iter->skip_elems = 0; - cond_resched_rcu(); } iter->bucket = idx; @@ -1088,38 +1384,50 @@ static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { struct ip_vs_iter_state *iter = seq->private; + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); rcu_read_lock(); + iter->gen = atomic_read(&ipvs->conn_tab_changes); + smp_rmb(); /* ipvs->conn_tab and conn_tab_changes */ + iter->t = rcu_dereference(ipvs->conn_tab); if (*pos == 0) { iter->skip_elems = 0; iter->bucket = 0; return SEQ_START_TOKEN; } - return ip_vs_conn_array(iter); + return ip_vs_conn_array(seq); } static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct ip_vs_conn *cp = v; struct ip_vs_iter_state *iter = seq->private; - struct hlist_node *e; + struct ip_vs_conn *cp = v; + struct hlist_bl_node *e; + struct ip_vs_rht *t; ++*pos; if (v == SEQ_START_TOKEN) - return ip_vs_conn_array(iter); + return ip_vs_conn_array(seq); + + t = iter->t; + if (!t) + return NULL; /* more on same hash chain? */ - e = rcu_dereference(hlist_next_rcu(&cp->c_list)); - if (e) { + hlist_bl_for_each_entry_continue_rcu(cp, e, c_list) { + /* Our cursor was moved to new table ? */ + if (!ip_vs_rht_same_table(t, READ_ONCE(cp->hash_key))) + break; iter->skip_elems++; - return hlist_entry(e, struct ip_vs_conn, c_list); + return cp; } iter->skip_elems = 0; iter->bucket++; - return ip_vs_conn_array(iter); + return ip_vs_conn_array(seq); } static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) @@ -1136,13 +1444,10 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n"); else { const struct ip_vs_conn *cp = v; - struct net *net = seq_file_net(seq); char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3]; size_t len = 0; char dbuf[IP_VS_ADDRSTRLEN]; - if (!net_eq(cp->ipvs->net, net)) - return 0; if (cp->pe_data) { pe_data[0] = ' '; len = strlen(cp->pe->name); @@ -1214,10 +1519,6 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v) "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n"); else { const struct ip_vs_conn *cp = v; - struct net *net = seq_file_net(seq); - - if (!net_eq(cp->ipvs->net, net)) - return 0; #ifdef CONFIG_IP_VS_IPV6 if (cp->daf == AF_INET6) @@ -1307,22 +1608,29 @@ static inline bool ip_vs_conn_ops_mode(struct ip_vs_conn *cp) return svc && (svc->flags & IP_VS_SVC_F_ONEPACKET); } -/* Called from keventd and must protect itself from softirqs */ void ip_vs_random_dropentry(struct netns_ipvs *ipvs) { - int idx; + struct hlist_bl_node *e; struct ip_vs_conn *cp; + struct ip_vs_rht *t; + unsigned int r; + int idx; + r = get_random_u32(); rcu_read_lock(); + t = rcu_dereference(ipvs->conn_tab); + if (!t) + goto out; /* * Randomly scan 1/32 of the whole table every second */ - for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) { - unsigned int hash = get_random_u32() & ip_vs_conn_tab_mask; + for (idx = 0; idx < (t->size >> 5); idx++) { + unsigned int hash = (r + idx) & t->mask; - hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { - if (cp->ipvs != ipvs) - continue; + /* Don't care if due to moved entry we jump to another bucket + * and even to new table + */ + hlist_bl_for_each_entry_rcu(cp, e, &t->buckets[hash], c_list) { if (atomic_read(&cp->n_control)) continue; if (cp->flags & IP_VS_CONN_F_TEMPLATE) { @@ -1369,27 +1677,39 @@ drop: IP_VS_DBG(4, "drop connection\n"); ip_vs_conn_del(cp); } - cond_resched_rcu(); + if (!(idx & 31)) { + cond_resched_rcu(); + t = rcu_dereference(ipvs->conn_tab); + if (!t) + goto out; + } } + +out: rcu_read_unlock(); } #endif -/* - * Flush all the connection entries in the ip_vs_conn_tab - */ +/* Flush all the connection entries in the conn_tab */ static void ip_vs_conn_flush(struct netns_ipvs *ipvs) { - int idx; + DECLARE_IP_VS_RHT_WALK_BUCKETS_SAFE_RCU(); struct ip_vs_conn *cp, *cp_c; + struct hlist_bl_head *head; + struct ip_vs_rht *t, *p; + struct hlist_bl_node *e; + + if (!rcu_dereference_protected(ipvs->conn_tab, 1)) + return; + cancel_delayed_work_sync(&ipvs->conn_resize_work); + if (!atomic_read(&ipvs->conn_count)) + goto unreg; flush_again: + /* Rely on RCU grace period while accessing cp after ip_vs_conn_del */ rcu_read_lock(); - for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { - - hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { - if (cp->ipvs != ipvs) - continue; + ip_vs_rht_walk_buckets_safe_rcu(ipvs->conn_tab, head) { + hlist_bl_for_each_entry_rcu(cp, e, head, c_list) { if (atomic_read(&cp->n_control)) continue; cp_c = cp->control; @@ -1410,21 +1730,47 @@ flush_again: schedule(); goto flush_again; } + +unreg: + /* Unregister the hash table and release it after RCU grace period. + * This is needed because other works may not be stopped yet and + * they may walk the tables. + */ + t = rcu_dereference_protected(ipvs->conn_tab, 1); + rcu_assign_pointer(ipvs->conn_tab, NULL); + /* Inform readers that conn_tab is changed */ + smp_mb__before_atomic(); + atomic_inc(&ipvs->conn_tab_changes); + while (1) { + p = rcu_dereference_protected(t->new_tbl, 1); + call_rcu(&t->rcu_head, ip_vs_rht_rcu_free); + if (p == t) + break; + t = p; + } } #ifdef CONFIG_SYSCTL void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs) { - int idx; + DECLARE_IP_VS_RHT_WALK_BUCKETS_RCU(); + unsigned int resched_score = 0; struct ip_vs_conn *cp, *cp_c; + struct hlist_bl_head *head; struct ip_vs_dest *dest; + struct hlist_bl_node *e; + int old_gen, new_gen; + if (!atomic_read(&ipvs->conn_count)) + return; + old_gen = atomic_read(&ipvs->conn_tab_changes); rcu_read_lock(); - for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { - hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { - if (cp->ipvs != ipvs) - continue; +repeat: + smp_rmb(); /* ipvs->conn_tab and conn_tab_changes */ + ip_vs_rht_walk_buckets_rcu(ipvs->conn_tab, head) { + hlist_bl_for_each_entry_rcu(cp, e, head, c_list) { + resched_score++; dest = cp->dest; if (!dest || (dest->flags & IP_VS_DEST_F_AVAILABLE)) continue; @@ -1439,13 +1785,25 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs) IP_VS_DBG(4, "del controlling connection\n"); ip_vs_conn_del(cp_c); } + resched_score += 10; + } + resched_score++; + if (resched_score >= 100) { + resched_score = 0; + cond_resched_rcu(); + /* netns clean up started, abort delayed work */ + if (!READ_ONCE(ipvs->enable)) + goto out; + new_gen = atomic_read(&ipvs->conn_tab_changes); + /* New table installed ? */ + if (old_gen != new_gen) { + old_gen = new_gen; + goto repeat; + } } - cond_resched_rcu(); - - /* netns clean up started, abort delayed work */ - if (!READ_ONCE(ipvs->enable)) - break; } + +out: rcu_read_unlock(); } #endif @@ -1460,6 +1818,10 @@ int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs) atomic_set(&ipvs->conn_count, 0); for (idx = 0; idx < IP_VS_AF_MAX; idx++) atomic_set(&ipvs->no_cport_conns[idx], 0); + INIT_DELAYED_WORK(&ipvs->conn_resize_work, conn_resize_work_handler); + RCU_INIT_POINTER(ipvs->conn_tab, NULL); + atomic_set(&ipvs->conn_tab_changes, 0); + ipvs->sysctl_conn_lfactor = ip_vs_conn_default_load_factor(ipvs); #ifdef CONFIG_PROC_FS if (!proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net, @@ -1495,56 +1857,36 @@ void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs) int __init ip_vs_conn_init(void) { + int min = IP_VS_CONN_TAB_MIN_BITS; + int max = IP_VS_CONN_TAB_MAX_BITS; size_t tab_array_size; int max_avail; -#if BITS_PER_LONG > 32 - int max = 27; -#else - int max = 20; -#endif - int min = 8; - int idx; max_avail = order_base_2(totalram_pages()) + PAGE_SHIFT; - max_avail -= 2; /* ~4 in hash row */ + /* 64-bit: 27 bits at 64GB, 32-bit: 20 bits at 512MB */ + max_avail += 1; /* hash table loaded at 50% */ max_avail -= 1; /* IPVS up to 1/2 of mem */ max_avail -= order_base_2(sizeof(struct ip_vs_conn)); max = clamp(max_avail, min, max); ip_vs_conn_tab_bits = clamp(ip_vs_conn_tab_bits, min, max); ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; - ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1; /* * Allocate the connection hash table and initialize its list heads */ tab_array_size = array_size(ip_vs_conn_tab_size, - sizeof(*ip_vs_conn_tab)); - ip_vs_conn_tab = kvmalloc_objs(*ip_vs_conn_tab, ip_vs_conn_tab_size); - if (!ip_vs_conn_tab) - return -ENOMEM; + sizeof(struct hlist_bl_head)); /* Allocate ip_vs_conn slab cache */ ip_vs_conn_cachep = KMEM_CACHE(ip_vs_conn, SLAB_HWCACHE_ALIGN); - if (!ip_vs_conn_cachep) { - kvfree(ip_vs_conn_tab); + if (!ip_vs_conn_cachep) return -ENOMEM; - } pr_info("Connection hash table configured (size=%d, memory=%zdKbytes)\n", ip_vs_conn_tab_size, tab_array_size / 1024); IP_VS_DBG(0, "Each connection entry needs %zd bytes at least\n", sizeof(struct ip_vs_conn)); - for (idx = 0; idx < ip_vs_conn_tab_size; idx++) - INIT_HLIST_HEAD(&ip_vs_conn_tab[idx]); - - for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) { - spin_lock_init(&__ip_vs_conntbl_lock_array[idx].l); - } - - /* calculate the random value for connection hash */ - get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); - return 0; } @@ -1554,5 +1896,4 @@ void ip_vs_conn_cleanup(void) rcu_barrier(); /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); - kvfree(ip_vs_conn_tab); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 2baef945c56f..032425025d88 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1643,6 +1643,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, struct ip_vs_service **svc_p) { struct ip_vs_scheduler *sched = NULL; + struct ip_vs_rht *tc_new = NULL; struct ip_vs_rht *t, *t_new = NULL; int af_id = ip_vs_af_index(u->af); struct ip_vs_service *svc = NULL; @@ -1702,6 +1703,17 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, } } + if (!rcu_dereference_protected(ipvs->conn_tab, 1)) { + int lfactor = sysctl_conn_lfactor(ipvs); + int new_size = ip_vs_conn_desired_size(ipvs, NULL, lfactor); + + tc_new = ip_vs_conn_tab_alloc(ipvs, new_size, lfactor); + if (!tc_new) { + ret = -ENOMEM; + goto out_err; + } + } + if (!atomic_read(&ipvs->num_services[af_id])) { ret = ip_vs_register_hooks(ipvs, u->af); if (ret < 0) @@ -1752,6 +1764,10 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, rcu_assign_pointer(ipvs->svc_table, t_new); t_new = NULL; } + if (tc_new) { + rcu_assign_pointer(ipvs->conn_tab, tc_new); + tc_new = NULL; + } /* Update the virtual service counters */ if (svc->port == FTPPORT) @@ -1794,6 +1810,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, out_err: + if (tc_new) + ip_vs_rht_free(tc_new); if (t_new) ip_vs_rht_free(t_new); if (ret_hooks >= 0) diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 85f31d71e29a..0c83c7b69581 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -132,9 +132,9 @@ static bool ip_vs_sip_ct_match(const struct ip_vs_conn_param *p, } static u32 ip_vs_sip_hashkey_raw(const struct ip_vs_conn_param *p, - u32 initval, bool inverse) + struct ip_vs_rht *t, bool inverse) { - return jhash(p->pe_data, p->pe_data_len, initval); + return jhash(p->pe_data, p->pe_data_len, (u32)t->hash_key.key[0]); } static int ip_vs_sip_show_pe_data(const struct ip_vs_conn *cp, char *buf) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index b2ba3befbd55..93038abbf5e0 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1755,6 +1755,28 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, if (!ip_vs_use_count_inc()) return -ENOPROTOOPT; + /* Backup server can be started without services just to sync conns, + * make sure conn_tab is created even if ipvs->enable is 0. + */ + if (state == IP_VS_STATE_BACKUP) { + mutex_lock(&ipvs->service_mutex); + if (!rcu_dereference_protected(ipvs->conn_tab, 1)) { + int lfactor = sysctl_conn_lfactor(ipvs); + int new_size = ip_vs_conn_desired_size(ipvs, NULL, + lfactor); + struct ip_vs_rht *tc_new; + + tc_new = ip_vs_conn_tab_alloc(ipvs, new_size, lfactor); + if (!tc_new) { + mutex_unlock(&ipvs->service_mutex); + result = -ENOMEM; + goto out_module; + } + rcu_assign_pointer(ipvs->conn_tab, tc_new); + } + mutex_unlock(&ipvs->service_mutex); + } + /* Do not hold one mutex and then to block on another */ for (;;) { rtnl_lock(); @@ -1922,6 +1944,7 @@ out_early: mutex_unlock(&ipvs->sync_mutex); rtnl_unlock(); +out_module: /* decrease the module use count */ ip_vs_use_count_dec(); return result; From f20c73b0460d15301cf1bddf0f85d060a38a75df Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 3 Mar 2026 23:04:08 +0200 Subject: [PATCH 0261/1561] ipvs: use more keys for connection hashing Simon Kirby reported long time ago that IPVS connection hashing based only on the client address/port (caddr, cport) as hash keys is not suitable for setups that accept traffic on multiple virtual IPs and ports. It can happen for multiple VIP:VPORT services, for single or many fwmark service(s) that match multiple virtual IPs and ports or even for passive FTP with peristence in DR/TUN mode where we expect traffic on multiple ports for the virtual IP. Fix it by adding virtual addresses and ports to the hash function. This causes the traffic from NAT real servers to clients to use second hashing for the in->out direction. As result: - the IN direction from client will use hash node hn0 where the source/dest addresses and ports used by client will be used as hash keys - the OUT direction from NAT real servers will use hash node hn1 for the traffic from real server to client - the persistence templates are hashed only with parameters based on the IN direction, so they now will also use the virtual address, port and fwmark from the service. OLD: - all methods: c_list node: proto, caddr:cport - persistence templates: c_list node: proto, caddr_net:0 - persistence engine templates: c_list node: per-PE, PE-SIP uses jhash NEW: - all methods: hn0 node (dir 0): proto, caddr:cport -> vaddr:vport - MASQ method: hn1 node (dir 1): proto, daddr:dport -> caddr:cport - persistence templates: hn0 node (dir 0): proto, caddr_net:0 -> vaddr:vport_or_0 proto, caddr_net:0 -> fwmark:0 - persistence engine templates: hn0 node (dir 0): as before Also reorder the ip_vs_conn fields, so that hash nodes are on same read-mostly cache line while write-mostly fields are on separate cache line. Reported-by: Simon Kirby Signed-off-by: Julian Anastasov Signed-off-by: Florian Westphal --- include/net/ip_vs.h | 112 ++++++++----- net/netfilter/ipvs/ip_vs_conn.c | 271 ++++++++++++++++++++++++-------- 2 files changed, 279 insertions(+), 104 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 3d595bd99eb3..72d325c81313 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -786,43 +786,72 @@ struct ip_vs_conn_param { __u8 pe_data_len; }; +/* Hash node in conn_tab */ +struct ip_vs_conn_hnode { + struct hlist_bl_node node; /* node in conn_tab */ + u32 hash_key; /* Key for the hash table */ + u8 dir; /* 0=out->in, 1=in->out */ +} __packed; + /* IP_VS structure allocated for each dynamically scheduled connection */ struct ip_vs_conn { - struct hlist_bl_node c_list; /* node in conn_tab */ - __u32 hash_key; /* Key for the hash table */ - /* Protocol, addresses and port numbers */ + /* Cacheline for hash table nodes - rarely modified */ + + struct ip_vs_conn_hnode hn0; /* Original direction */ + u8 af; /* address family */ __be16 cport; + struct ip_vs_conn_hnode hn1; /* Reply direction */ + u8 daf; /* Address family of the dest */ __be16 dport; - __be16 vport; - u16 af; /* address family */ - __u16 protocol; /* Which protocol (TCP/UDP) */ - __u16 daf; /* Address family of the dest */ - union nf_inet_addr caddr; /* client address */ - union nf_inet_addr vaddr; /* virtual address */ - union nf_inet_addr daddr; /* destination address */ + struct ip_vs_dest *dest; /* real server */ + atomic_t n_control; /* Number of controlled ones */ volatile __u32 flags; /* status flags */ - struct netns_ipvs *ipvs; + /* 44/64 */ - /* counter and timer */ - refcount_t refcnt; /* reference count */ - struct timer_list timer; /* Expiration timer */ - volatile unsigned long timeout; /* timeout */ - - /* Flags and state transition */ - spinlock_t lock; /* lock for state transition */ + struct ip_vs_conn *control; /* Master control connection */ + const struct ip_vs_pe *pe; + char *pe_data; + __u8 pe_data_len; volatile __u16 state; /* state info */ volatile __u16 old_state; /* old state, to be used for * state transition triggered * synchronization */ - __u32 fwmark; /* Fire wall mark from skb */ - unsigned long sync_endtime; /* jiffies + sent_retries */ + /* 2-byte hole */ + /* 64/96 */ - /* Control members */ - struct ip_vs_conn *control; /* Master control connection */ - atomic_t n_control; /* Number of controlled ones */ - struct ip_vs_dest *dest; /* real server */ + union nf_inet_addr caddr; /* client address */ + union nf_inet_addr vaddr; /* virtual address */ + /* 96/128 */ + + union nf_inet_addr daddr; /* destination address */ + __u32 fwmark; /* Fire wall mark from skb */ + __be16 vport; + __u16 protocol; /* Which protocol (TCP/UDP) */ + + /* Note: we can group the following members into a structure, + * in order to save more space, and the following members are + * only used in VS/NAT anyway + */ + struct ip_vs_app *app; /* bound ip_vs_app object */ + void *app_data; /* Application private data */ + /* 128/168 */ + struct_group(sync_conn_opt, + struct ip_vs_seq in_seq; /* incoming seq. struct */ + struct ip_vs_seq out_seq; /* outgoing seq. struct */ + ); + /* 152/192 */ + + struct timer_list timer; /* Expiration timer */ + volatile unsigned long timeout; /* timeout */ + spinlock_t lock; /* lock for state transition */ + refcount_t refcnt; /* reference count */ atomic_t in_pkts; /* incoming packet counter */ + /* 64-bit: 4-byte gap */ + + /* 188/256 */ + unsigned long sync_endtime; /* jiffies + sent_retries */ + struct netns_ipvs *ipvs; /* Packet transmitter for different forwarding methods. If it * mangles the packet, it must return NF_DROP or better NF_STOLEN, @@ -832,21 +861,6 @@ struct ip_vs_conn { int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph); - /* Note: we can group the following members into a structure, - * in order to save more space, and the following members are - * only used in VS/NAT anyway - */ - struct ip_vs_app *app; /* bound ip_vs_app object */ - void *app_data; /* Application private data */ - struct_group(sync_conn_opt, - struct ip_vs_seq in_seq; /* incoming seq. struct */ - struct ip_vs_seq out_seq; /* outgoing seq. struct */ - ); - - const struct ip_vs_pe *pe; - char *pe_data; - __u8 pe_data_len; - struct rcu_head rcu_head; }; @@ -1628,6 +1642,19 @@ int ip_vs_conn_desired_size(struct netns_ipvs *ipvs, struct ip_vs_rht *t, struct ip_vs_rht *ip_vs_conn_tab_alloc(struct netns_ipvs *ipvs, int buckets, int lfactor); +static inline struct ip_vs_conn * +ip_vs_hn0_to_conn(struct ip_vs_conn_hnode *hn) +{ + return container_of(hn, struct ip_vs_conn, hn0); +} + +static inline struct ip_vs_conn * +ip_vs_hn_to_conn(struct ip_vs_conn_hnode *hn) +{ + return hn->dir ? container_of(hn, struct ip_vs_conn, hn1) : + container_of(hn, struct ip_vs_conn, hn0); +} + struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, const union nf_inet_addr *daddr, __be16 dport, unsigned int flags, @@ -1980,6 +2007,13 @@ static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp) return fwd; } +/* Check if connection uses double hashing */ +static inline bool ip_vs_conn_use_hash2(struct ip_vs_conn *cp) +{ + return IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ && + !(cp->flags & IP_VS_CONN_F_TEMPLATE); +} + void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, int dir); diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 07a47e525f01..2082bfb2d93c 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -76,11 +76,19 @@ static struct kmem_cache *ip_vs_conn_cachep __read_mostly; /* Lock conn_tab bucket for conn hash/unhash, not for rehash */ static __always_inline void conn_tab_lock(struct ip_vs_rht *t, struct ip_vs_conn *cp, u32 hash_key, - bool new_hash, struct hlist_bl_head **head_ret) + u32 hash_key2, bool use2, bool new_hash, + struct hlist_bl_head **head_ret, struct hlist_bl_head **head2_ret) { - struct hlist_bl_head *head; - u32 hash_key_new; + struct hlist_bl_head *head, *head2; + u32 hash_key_new, hash_key_new2; + struct ip_vs_rht *t2 = t; + u32 idx, idx2; + idx = hash_key & t->mask; + if (use2) + idx2 = hash_key2 & t->mask; + else + idx2 = idx; if (!new_hash) { /* We need to lock the bucket in the right table */ @@ -88,31 +96,64 @@ retry: if (!ip_vs_rht_same_table(t, hash_key)) { /* It is already moved to new table */ t = rcu_dereference(t->new_tbl); + /* Rehashing works in two steps and we may detect + * both nodes in different tables, use idx/idx2 + * for proper lock ordering for heads. + */ + idx = hash_key & t->mask; + idx |= IP_VS_RHT_TABLE_ID_MASK; + } + if (use2) { + if (!ip_vs_rht_same_table(t2, hash_key2)) { + /* It is already moved to new table */ + t2 = rcu_dereference(t2->new_tbl); + idx2 = hash_key2 & t2->mask; + idx2 |= IP_VS_RHT_TABLE_ID_MASK; + } + } else { + idx2 = idx; } } head = t->buckets + (hash_key & t->mask); + head2 = use2 ? t2->buckets + (hash_key2 & t2->mask) : head; local_bh_disable(); /* Do not touch seqcount, this is a safe operation */ - hlist_bl_lock(head); + if (idx <= idx2) { + hlist_bl_lock(head); + if (head != head2) + hlist_bl_lock(head2); + } else { + hlist_bl_lock(head2); + hlist_bl_lock(head); + } if (!new_hash) { /* Ensure hash_key is read under lock */ - hash_key_new = READ_ONCE(cp->hash_key); + hash_key_new = READ_ONCE(cp->hn0.hash_key); + hash_key_new2 = READ_ONCE(cp->hn1.hash_key); /* Hash changed ? */ - if (hash_key != hash_key_new) { + if (hash_key != hash_key_new || + (hash_key2 != hash_key_new2 && use2)) { + if (head != head2) + hlist_bl_unlock(head2); hlist_bl_unlock(head); local_bh_enable(); hash_key = hash_key_new; + hash_key2 = hash_key_new2; goto retry; } } *head_ret = head; + *head2_ret = head2; } -static inline void conn_tab_unlock(struct hlist_bl_head *head) +static inline void conn_tab_unlock(struct hlist_bl_head *head, + struct hlist_bl_head *head2) { + if (head != head2) + hlist_bl_unlock(head2); hlist_bl_unlock(head); local_bh_enable(); } @@ -123,26 +164,34 @@ static void ip_vs_conn_expire(struct timer_list *t); * Returns hash value for IPVS connection entry */ static u32 ip_vs_conn_hashkey(struct ip_vs_rht *t, int af, unsigned int proto, - const union nf_inet_addr *addr, __be16 port) + const union nf_inet_addr *addr, __be16 port, + const union nf_inet_addr *laddr, __be16 lport) { u64 a = (u32)proto << 16 | (__force u32)port; + u64 d; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { u64 b = (u64)addr->all[0] << 32 | addr->all[1]; u64 c = (u64)addr->all[2] << 32 | addr->all[3]; - return (u32)siphash_3u64(a, b, c, &t->hash_key); + a |= (u64)laddr->all[2] << 32 ^ (__force u32)lport; + c ^= laddr->all[1]; + d = (u64)laddr->all[0] << 32 | laddr->all[3]; + return (u32)siphash_4u64(a, b, c, d, &t->hash_key); } #endif a |= (u64)addr->all[0] << 32; - return (u32)siphash_1u64(a, &t->hash_key); + d = (u64)laddr->all[0] << 32 | (__force u32)lport; + return (u32)siphash_2u64(a, d, &t->hash_key); } static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, struct ip_vs_rht *t, bool inverse) { + const union nf_inet_addr *laddr; const union nf_inet_addr *addr; + __be16 lport; __be16 port; if (p->pe_data && p->pe->hashkey_raw) @@ -151,21 +200,33 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, if (likely(!inverse)) { addr = p->caddr; port = p->cport; + laddr = p->vaddr; + lport = p->vport; } else { addr = p->vaddr; port = p->vport; + laddr = p->caddr; + lport = p->cport; } - return ip_vs_conn_hashkey(t, p->af, p->protocol, addr, port); + return ip_vs_conn_hashkey(t, p->af, p->protocol, addr, port, laddr, + lport); } static unsigned int ip_vs_conn_hashkey_conn(struct ip_vs_rht *t, - const struct ip_vs_conn *cp) + const struct ip_vs_conn *cp, + bool out) { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(cp->ipvs, cp->af, cp->protocol, - &cp->caddr, cp->cport, NULL, 0, &p); + if (!out) + ip_vs_conn_fill_param(cp->ipvs, cp->af, cp->protocol, + &cp->caddr, cp->cport, &cp->vaddr, + cp->vport, &p); + else + ip_vs_conn_fill_param(cp->ipvs, cp->af, cp->protocol, + &cp->daddr, cp->dport, &cp->caddr, + cp->cport, &p); if (cp->pe) { p.pe = cp->pe; @@ -173,7 +234,7 @@ static unsigned int ip_vs_conn_hashkey_conn(struct ip_vs_rht *t, p.pe_data_len = cp->pe_data_len; } - return ip_vs_conn_hashkey_param(&p, t, false); + return ip_vs_conn_hashkey_param(&p, t, out); } /* Hashes ip_vs_conn in conn_tab @@ -182,9 +243,11 @@ static unsigned int ip_vs_conn_hashkey_conn(struct ip_vs_rht *t, static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) { struct netns_ipvs *ipvs = cp->ipvs; - struct hlist_bl_head *head; + struct hlist_bl_head *head, *head2; + u32 hash_key, hash_key2; struct ip_vs_rht *t; - u32 hash_key; + u32 hash, hash2; + bool use2; int ret; if (cp->flags & IP_VS_CONN_F_ONE_PACKET) @@ -194,15 +257,28 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) t = rcu_dereference(ipvs->conn_tab); t = rcu_dereference(t->new_tbl); - hash_key = ip_vs_rht_build_hash_key(t, ip_vs_conn_hashkey_conn(t, cp)); - conn_tab_lock(t, cp, hash_key, true /* new_hash */, &head); + hash = ip_vs_conn_hashkey_conn(t, cp, false); + hash_key = ip_vs_rht_build_hash_key(t, hash); + if (ip_vs_conn_use_hash2(cp)) { + hash2 = ip_vs_conn_hashkey_conn(t, cp, true); + hash_key2 = ip_vs_rht_build_hash_key(t, hash2); + use2 = true; + } else { + hash_key2 = hash_key; + use2 = false; + } + conn_tab_lock(t, cp, hash_key, hash_key2, use2, true /* new_hash */, + &head, &head2); spin_lock(&cp->lock); if (!(cp->flags & IP_VS_CONN_F_HASHED)) { cp->flags |= IP_VS_CONN_F_HASHED; - WRITE_ONCE(cp->hash_key, hash_key); + WRITE_ONCE(cp->hn0.hash_key, hash_key); + WRITE_ONCE(cp->hn1.hash_key, hash_key2); refcount_inc(&cp->refcnt); - hlist_bl_add_head_rcu(&cp->c_list, head); + hlist_bl_add_head_rcu(&cp->hn0.node, head); + if (use2) + hlist_bl_add_head_rcu(&cp->hn1.node, head2); ret = 1; } else { pr_err("%s(): request for already hashed, called from %pS\n", @@ -211,7 +287,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) } spin_unlock(&cp->lock); - conn_tab_unlock(head); + conn_tab_unlock(head, head2); /* Schedule resizing if load increases */ if (atomic_read(&ipvs->conn_count) > t->u_thresh && @@ -227,10 +303,11 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) { struct netns_ipvs *ipvs = cp->ipvs; - struct hlist_bl_head *head; + struct hlist_bl_head *head, *head2; + u32 hash_key, hash_key2; struct ip_vs_rht *t; bool ret = false; - u32 hash_key; + bool use2; if (cp->flags & IP_VS_CONN_F_ONE_PACKET) return refcount_dec_if_one(&cp->refcnt); @@ -238,22 +315,27 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) rcu_read_lock(); t = rcu_dereference(ipvs->conn_tab); - hash_key = READ_ONCE(cp->hash_key); + hash_key = READ_ONCE(cp->hn0.hash_key); + hash_key2 = READ_ONCE(cp->hn1.hash_key); + use2 = ip_vs_conn_use_hash2(cp); - conn_tab_lock(t, cp, hash_key, false /* new_hash */, &head); + conn_tab_lock(t, cp, hash_key, hash_key2, use2, false /* new_hash */, + &head, &head2); spin_lock(&cp->lock); if (cp->flags & IP_VS_CONN_F_HASHED) { /* Decrease refcnt and unlink conn only if we are last user */ if (refcount_dec_if_one(&cp->refcnt)) { - hlist_bl_del_rcu(&cp->c_list); + hlist_bl_del_rcu(&cp->hn0.node); + if (use2) + hlist_bl_del_rcu(&cp->hn1.node); cp->flags &= ~IP_VS_CONN_F_HASHED; ret = true; } } spin_unlock(&cp->lock); - conn_tab_unlock(head); + conn_tab_unlock(head, head2); rcu_read_unlock(); @@ -272,6 +354,7 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) { DECLARE_IP_VS_RHT_WALK_BUCKET_RCU(); struct netns_ipvs *ipvs = p->ipvs; + struct ip_vs_conn_hnode *hn; struct hlist_bl_head *head; struct ip_vs_rht *t, *pt; struct hlist_bl_node *e; @@ -284,9 +367,12 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) hash = ip_vs_conn_hashkey_param(p, t, false); hash_key = ip_vs_rht_build_hash_key(t, hash); ip_vs_rht_walk_bucket_rcu(t, hash_key, head) { - hlist_bl_for_each_entry_rcu(cp, e, head, c_list) { - if (READ_ONCE(cp->hash_key) == hash_key && - p->cport == cp->cport && + hlist_bl_for_each_entry_rcu(hn, e, head, node) { + if (READ_ONCE(hn->hash_key) != hash_key || + hn->dir != 0) + continue; + cp = ip_vs_hn0_to_conn(hn); + if (p->cport == cp->cport && p->vport == cp->vport && cp->af == p->af && ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && @@ -376,6 +462,7 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) { DECLARE_IP_VS_RHT_WALK_BUCKET_RCU(); struct netns_ipvs *ipvs = p->ipvs; + struct ip_vs_conn_hnode *hn; struct hlist_bl_head *head; struct ip_vs_rht *t, *pt; struct hlist_bl_node *e; @@ -388,9 +475,11 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) hash = ip_vs_conn_hashkey_param(p, t, false); hash_key = ip_vs_rht_build_hash_key(t, hash); ip_vs_rht_walk_bucket_rcu(t, hash_key, head) { - hlist_bl_for_each_entry_rcu(cp, e, head, c_list) { - if (READ_ONCE(cp->hash_key) != hash_key) + hlist_bl_for_each_entry_rcu(hn, e, head, node) { + if (READ_ONCE(hn->hash_key) != hash_key || + hn->dir != 0) continue; + cp = ip_vs_hn0_to_conn(hn); if (unlikely(p->pe_data && p->pe->ct_match)) { if (p->pe == cp->pe && p->pe->ct_match(p, cp) && @@ -442,6 +531,7 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) DECLARE_IP_VS_RHT_WALK_BUCKET_RCU(); struct netns_ipvs *ipvs = p->ipvs; const union nf_inet_addr *saddr; + struct ip_vs_conn_hnode *hn; struct hlist_bl_head *head; struct ip_vs_rht *t, *pt; struct hlist_bl_node *e; @@ -455,9 +545,12 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) hash = ip_vs_conn_hashkey_param(p, t, true); hash_key = ip_vs_rht_build_hash_key(t, hash); ip_vs_rht_walk_bucket_rcu(t, hash_key, head) { - hlist_bl_for_each_entry_rcu(cp, e, head, c_list) { - if (READ_ONCE(cp->hash_key) != hash_key || - p->vport != cp->cport) + hlist_bl_for_each_entry_rcu(hn, e, head, node) { + /* dir can be 0 for DR/TUN */ + if (READ_ONCE(hn->hash_key) != hash_key) + continue; + cp = ip_vs_hn_to_conn(hn); + if (p->vport != cp->cport) continue; if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { @@ -536,21 +629,33 @@ void ip_vs_conn_put(struct ip_vs_conn *cp) void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport) { struct hlist_bl_head *head, *head2, *head_new; + bool use2 = ip_vs_conn_use_hash2(cp); struct netns_ipvs *ipvs = cp->ipvs; int af_id = ip_vs_af_index(cp->af); u32 hash_r = 0, hash_key_r = 0; struct ip_vs_rht *t, *tp, *t2; + struct ip_vs_conn_hnode *hn; u32 hash_key, hash_key_new; struct ip_vs_conn_param p; int ntbl; + int dir; - ip_vs_conn_fill_param(ipvs, cp->af, cp->protocol, &cp->caddr, - cport, &cp->vaddr, cp->vport, &p); + /* No packets from inside, so we can do it in 2 steps. */ + dir = use2 ? 1 : 0; + +next_dir: + if (dir) + ip_vs_conn_fill_param(ipvs, cp->af, cp->protocol, &cp->daddr, + cp->dport, &cp->caddr, cport, &p); + else + ip_vs_conn_fill_param(ipvs, cp->af, cp->protocol, &cp->caddr, + cport, &cp->vaddr, cp->vport, &p); + hn = dir ? &cp->hn1 : &cp->hn0; ntbl = 0; /* Attempt to rehash cp safely, by informing seqcount readers */ t = rcu_dereference(ipvs->conn_tab); - hash_key = READ_ONCE(cp->hash_key); + hash_key = READ_ONCE(hn->hash_key); tp = NULL; retry: @@ -567,7 +672,7 @@ retry: t2 = rcu_dereference(t->new_tbl); /* Calc new hash once per table */ if (tp != t2) { - hash_r = ip_vs_conn_hashkey_param(&p, t2, false); + hash_r = ip_vs_conn_hashkey_param(&p, t2, dir); hash_key_r = ip_vs_rht_build_hash_key(t2, hash_r); tp = t2; } @@ -591,7 +696,7 @@ retry: hlist_bl_lock(head2); /* Ensure hash_key is read under lock */ - hash_key_new = READ_ONCE(cp->hash_key); + hash_key_new = READ_ONCE(hn->hash_key); /* Racing with another rehashing ? */ if (unlikely(hash_key != hash_key_new)) { if (head != head2) @@ -611,14 +716,21 @@ retry: * parameters in cp do not change, i.e. cport is * the only possible change. */ - WRITE_ONCE(cp->hash_key, hash_key_r); + WRITE_ONCE(hn->hash_key, hash_key_r); + if (!use2) + WRITE_ONCE(cp->hn1.hash_key, hash_key_r); + /* For dir=1 we do not check in flags if hn is already + * rehashed but this check will do it. + */ if (head != head2) { - hlist_bl_del_rcu(&cp->c_list); - hlist_bl_add_head_rcu(&cp->c_list, head_new); + hlist_bl_del_rcu(&hn->node); + hlist_bl_add_head_rcu(&hn->node, head_new); + } + if (!dir) { + atomic_dec(&ipvs->no_cport_conns[af_id]); + cp->flags &= ~IP_VS_CONN_F_NO_CPORT; + cp->cport = cport; } - atomic_dec(&ipvs->no_cport_conns[af_id]); - cp->flags &= ~IP_VS_CONN_F_NO_CPORT; - cp->cport = cport; } spin_unlock(&cp->lock); @@ -628,6 +740,8 @@ retry: write_seqcount_end(&t->seqc[hash_key & t->seqc_mask]); preempt_enable_nested(); spin_unlock_bh(&t->lock[hash_key & t->lock_mask].l); + if (dir--) + goto next_dir; } /* Get default load factor to map conn_count/u_thresh to t->size */ @@ -639,6 +753,8 @@ static int ip_vs_conn_default_load_factor(struct netns_ipvs *ipvs) factor = -3; else factor = -1; + /* Double hashing adds twice more nodes for NAT */ + factor--; return factor; } @@ -679,6 +795,7 @@ static void conn_resize_work_handler(struct work_struct *work) unsigned int resched_score = 0; struct hlist_bl_node *cn, *nn; struct ip_vs_rht *t, *t_new; + struct ip_vs_conn_hnode *hn; struct netns_ipvs *ipvs; struct ip_vs_conn *cp; bool more_work = false; @@ -747,8 +864,9 @@ same_bucket: write_seqcount_begin(&t->seqc[bucket & t->seqc_mask]); hlist_bl_lock(head); - hlist_bl_for_each_entry_safe(cp, cn, nn, head, c_list) { - hash = ip_vs_conn_hashkey_conn(t_new, cp); + hlist_bl_for_each_entry_safe(hn, cn, nn, head, node) { + cp = ip_vs_hn_to_conn(hn); + hash = ip_vs_conn_hashkey_conn(t_new, cp, hn->dir); hash_key = ip_vs_rht_build_hash_key(t_new, hash); head2 = t_new->buckets + (hash & t_new->mask); @@ -756,9 +874,12 @@ same_bucket: /* t_new->seqc are not used at this stage, we race * only with add/del, so only lock the bucket. */ - hlist_bl_del_rcu(&cp->c_list); - WRITE_ONCE(cp->hash_key, hash_key); - hlist_bl_add_head_rcu(&cp->c_list, head2); + hlist_bl_del_rcu(&hn->node); + WRITE_ONCE(hn->hash_key, hash_key); + /* Keep both hash keys in sync if no double hashing */ + if (!ip_vs_conn_use_hash2(cp)) + WRITE_ONCE(cp->hn1.hash_key, hash_key); + hlist_bl_add_head_rcu(&hn->node, head2); hlist_bl_unlock(head2); /* Too long chain? Do it in steps */ if (++limit >= 64) @@ -1236,10 +1357,13 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, return NULL; } - INIT_HLIST_BL_NODE(&cp->c_list); + INIT_HLIST_BL_NODE(&cp->hn0.node); + INIT_HLIST_BL_NODE(&cp->hn1.node); timer_setup(&cp->timer, ip_vs_conn_expire, 0); cp->ipvs = ipvs; + cp->hn0.dir = 0; cp->af = p->af; + cp->hn1.dir = 1; cp->daf = dest_af; cp->protocol = p->protocol; ip_vs_addr_set(p->af, &cp->caddr, p->caddr); @@ -1344,8 +1468,8 @@ static void *ip_vs_conn_array(struct seq_file *seq) struct net *net = seq_file_net(seq); struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_rht *t = iter->t; + struct ip_vs_conn_hnode *hn; struct hlist_bl_node *e; - struct ip_vs_conn *cp; int idx; if (!t) @@ -1353,15 +1477,17 @@ static void *ip_vs_conn_array(struct seq_file *seq) for (idx = iter->bucket; idx < t->size; idx++) { unsigned int skip = 0; - hlist_bl_for_each_entry_rcu(cp, e, &t->buckets[idx], c_list) { + hlist_bl_for_each_entry_rcu(hn, e, &t->buckets[idx], node) { /* __ip_vs_conn_get() is not needed by * ip_vs_conn_seq_show and ip_vs_conn_sync_seq_show */ - if (!ip_vs_rht_same_table(t, READ_ONCE(cp->hash_key))) + if (!ip_vs_rht_same_table(t, READ_ONCE(hn->hash_key))) break; + if (hn->dir != 0) + continue; if (skip >= iter->skip_elems) { iter->bucket = idx; - return cp; + return hn; } ++skip; @@ -1403,7 +1529,7 @@ static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos) static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ip_vs_iter_state *iter = seq->private; - struct ip_vs_conn *cp = v; + struct ip_vs_conn_hnode *hn = v; struct hlist_bl_node *e; struct ip_vs_rht *t; @@ -1416,12 +1542,14 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) return NULL; /* more on same hash chain? */ - hlist_bl_for_each_entry_continue_rcu(cp, e, c_list) { + hlist_bl_for_each_entry_continue_rcu(hn, e, node) { /* Our cursor was moved to new table ? */ - if (!ip_vs_rht_same_table(t, READ_ONCE(cp->hash_key))) + if (!ip_vs_rht_same_table(t, READ_ONCE(hn->hash_key))) break; + if (hn->dir != 0) + continue; iter->skip_elems++; - return cp; + return hn; } iter->skip_elems = 0; @@ -1443,7 +1571,8 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n"); else { - const struct ip_vs_conn *cp = v; + struct ip_vs_conn_hnode *hn = v; + const struct ip_vs_conn *cp = ip_vs_hn0_to_conn(hn); char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3]; size_t len = 0; char dbuf[IP_VS_ADDRSTRLEN]; @@ -1610,6 +1739,7 @@ static inline bool ip_vs_conn_ops_mode(struct ip_vs_conn *cp) void ip_vs_random_dropentry(struct netns_ipvs *ipvs) { + struct ip_vs_conn_hnode *hn; struct hlist_bl_node *e; struct ip_vs_conn *cp; struct ip_vs_rht *t; @@ -1630,7 +1760,10 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs) /* Don't care if due to moved entry we jump to another bucket * and even to new table */ - hlist_bl_for_each_entry_rcu(cp, e, &t->buckets[hash], c_list) { + hlist_bl_for_each_entry_rcu(hn, e, &t->buckets[hash], node) { + if (hn->dir != 0) + continue; + cp = ip_vs_hn0_to_conn(hn); if (atomic_read(&cp->n_control)) continue; if (cp->flags & IP_VS_CONN_F_TEMPLATE) { @@ -1695,6 +1828,7 @@ static void ip_vs_conn_flush(struct netns_ipvs *ipvs) { DECLARE_IP_VS_RHT_WALK_BUCKETS_SAFE_RCU(); struct ip_vs_conn *cp, *cp_c; + struct ip_vs_conn_hnode *hn; struct hlist_bl_head *head; struct ip_vs_rht *t, *p; struct hlist_bl_node *e; @@ -1709,7 +1843,10 @@ flush_again: /* Rely on RCU grace period while accessing cp after ip_vs_conn_del */ rcu_read_lock(); ip_vs_rht_walk_buckets_safe_rcu(ipvs->conn_tab, head) { - hlist_bl_for_each_entry_rcu(cp, e, head, c_list) { + hlist_bl_for_each_entry_rcu(hn, e, head, node) { + if (hn->dir != 0) + continue; + cp = ip_vs_hn0_to_conn(hn); if (atomic_read(&cp->n_control)) continue; cp_c = cp->control; @@ -1756,6 +1893,7 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs) DECLARE_IP_VS_RHT_WALK_BUCKETS_RCU(); unsigned int resched_score = 0; struct ip_vs_conn *cp, *cp_c; + struct ip_vs_conn_hnode *hn; struct hlist_bl_head *head; struct ip_vs_dest *dest; struct hlist_bl_node *e; @@ -1769,7 +1907,10 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs) repeat: smp_rmb(); /* ipvs->conn_tab and conn_tab_changes */ ip_vs_rht_walk_buckets_rcu(ipvs->conn_tab, head) { - hlist_bl_for_each_entry_rcu(cp, e, head, c_list) { + hlist_bl_for_each_entry_rcu(hn, e, head, node) { + if (hn->dir != 0) + continue; + cp = ip_vs_hn0_to_conn(hn); resched_score++; dest = cp->dest; if (!dest || (dest->flags & IP_VS_DEST_F_AVAILABLE)) From 44d93cf1abb6a85d65c3b4b027c82d44263de6a5 Mon Sep 17 00:00:00 2001 From: Karthikeyan Kathirvel Date: Wed, 4 Mar 2026 14:23:42 +0530 Subject: [PATCH 0262/1561] wifi: UHR: define DPS/DBE/P-EDCA elements and fix size parsing Add UHR Operation and Capability definitions and parsing helpers: - Define ieee80211_uhr_dps_info, ieee80211_uhr_dbe_info, ieee80211_uhr_p_edca_info with masks. - Update ieee80211_uhr_oper_size_ok() to account for optional DPS/DBE/P-EDCA blocks. - Move NPCA pointer position after DPS Operation Parameter if it is present in ieee80211_uhr_oper_size_ok(). - Move NPCA pointer position after DPS info if it is present in ieee80211_uhr_npca_info(). Signed-off-by: Karthikeyan Kathirvel Link: https://patch.msgid.link/20260304085343.1093993-2-karthikeyan.kathirvel@oss.qualcomm.com Signed-off-by: Johannes Berg --- include/linux/ieee80211-uhr.h | 271 +++++++++++++++++++++++++++++++++- 1 file changed, 265 insertions(+), 6 deletions(-) diff --git a/include/linux/ieee80211-uhr.h b/include/linux/ieee80211-uhr.h index 132acced7d79..9729d23e4766 100644 --- a/include/linux/ieee80211-uhr.h +++ b/include/linux/ieee80211-uhr.h @@ -29,11 +29,216 @@ struct ieee80211_uhr_operation { #define IEEE80211_UHR_NPCA_PARAMS_MOPLEN 0x00400000 #define IEEE80211_UHR_NPCA_PARAMS_DIS_SUBCH_BMAP_PRES 0x00800000 +/** + * struct ieee80211_uhr_npca_info - npca operation information + * + * This structure is the "NPCA Operation Parameters field format" of "UHR + * Operation Element" fields as described in P802.11bn_D1.3 + * subclause 9.4.2.353. See Figure 9-aa4. + * + * Refer to IEEE80211_UHR_NPCA* + * @params: + * NPCA Primary Channel - NPCA primary channel + * NPCA_Min Duration Threshold - Minimum duration of inter-BSS activity + * NPCA Switching Delay - + * Time needed by an NPCA AP to switch from the + * BSS primary channel to the NPCA primary channel + * in the unit of 4 µs. + * NPCA Switching Back Delay - + * Time to switch from the NPCA primary channel + * to the BSS primary channel in the unit of 4 µs. + * NPCA Initial QSRC - + * Initialize the EDCAF QSRC[AC] variables + * when an NPCA STA in the BSS + * switches to NPCA operation. + * NPCA MOPLEN - + * Indicates which conditions can be used to + * initiate an NPCA operation, + * 1 -> both PHYLEN NPCA operation and MOPLEN + * NPCA operation are + * permitted in the BSS + * 0 -> only PHYLEN NPCA operation is allowed in the BSS. + * NPCA Disabled Subchannel Bitmap Present - + * Indicates whether the NPCA Disabled Subchannel + * Bitmap field is present. A 1 in this field indicates that + * the NPCA Disabled Subchannel Bitmap field is present + * @dis_subch_bmap: + * A bit in the bitmap that lies within the BSS bandwidth is set + * to 1 to indicate that the corresponding 20 MHz subchannel is + * punctured and is set to 0 to indicate that the corresponding + * 20 MHz subchannel is not punctured. A bit in the bitmap that + * falls outside of the BSS bandwidth is reserved. This field is + * present when the value of the NPCA Disabled Subchannel Bitmap + * Field Present field is equal to 1, and not present, otherwise + */ struct ieee80211_uhr_npca_info { __le32 params; __le16 dis_subch_bmap[]; } __packed; +#define IEEE80211_UHR_DPS_PADDING_DELAY 0x0000003F +#define IEEE80211_UHR_DPS_TRANSITION_DELAY 0x00003F00 +#define IEEE80211_UHR_DPS_ICF_REQUIRED 0x00010000 +#define IEEE80211_UHR_DPS_PARAMETERIZED_FLAG 0x00020000 +#define IEEE80211_UHR_DPS_LC_MODE_BW 0x001C0000 +#define IEEE80211_UHR_DPS_LC_MODE_NSS 0x01E00000 +#define IEEE80211_UHR_DPS_LC_MODE_MCS 0x1E000000 +#define IEEE80211_UHR_DPS_MOBILE_AP_DPS_STATIC_HCM 0x20000000 + +/** + * struct ieee80211_uhr_dps_info - DPS operation information + * + * This structure is the "DPS Operation Parameter field" of "UHR + * Operation Element" fields as described in P802.11bn_D1.3 + * subclause 9.4.1.87. See Figure 9-207u. + * + * Refer to IEEE80211_UHR_DPS* + * @params: + * DPS Padding Delay - + * Indicates the minimum MAC padding + * duration that is required by a DPS STA + * in an ICF to cause the STA to transition + * from the lower capability mode to the + * higher capability mode. The DPS Padding + * Delay field is in units of 4 µs. + * DPS Transition Delay - + * Indicates the amount of time required by a + * DPS STA to transition from the higher + * capability mode to the lower capability + * mode. The DPS Transition Delay field is in + * units of 4 µs. + * ICF Required - + * Indicates when the DPS assisting STA needs + * to transmit an ICF frame to the peer DPS STA + * before performing the frame exchanges with + * the peer DPS STA in a TXOP. + * 1 -> indicates that the transmission of the + * ICF frame to the peer DPS STA prior to + * any frame exchange is needed. + * 0 -> ICF transmission before the frame + * exchanges with the peer DPS STA is only + * needed if the frame exchange is performed + * in the HC mode. + * Parameterized Flag - + * 0 -> indicates that only 20 MHz, 1 SS, + * non-HT PPDU format with the data + * rate of 6, 12, and 24 Mb/s as the + * default mode are supported by the + * DPS STA in the LC mode + * 1 -> indicates that a bandwidth up to the + * bandwidth indicated in the LC Mode + * Bandwidth field, a number of spatial + * streams up to the NSS indicated in + * the LC Mode Nss field, and an MCS up + * to the MCS indicated in the LC Mode + * MCS fields are supported by the DPS + * STA in the LC mode as the + * parameterized mode. + * LC Mode Bandwidth - + * Indicates the maximum bandwidth supported + * by the STA in the LC mode. + * LC Mode NSS - + * Indicates the maximum number of the spatial + * streams supported by the STA in the LC mode. + * LC Mode MCS - + * Indicates the highest MCS supported by the STA + * in the LC mode. + * Mobile AP DPS Static HCM - + * 1 -> indicates that it will remain in the DPS high + * capability mode until the next TBTT on that + * link. + * 0 -> otherwise. + */ +struct ieee80211_uhr_dps_info { + __le32 params; +} __packed; + +#define IEEE80211_UHR_DBE_OPER_BANDWIDTH 0x07 +#define IEEE80211_UHR_DBE_OPER_DIS_SUBCHANNEL_BITMAP_PRES 0x08 + +/** + * enum ieee80211_uhr_dbe_oper_bw - DBE Operational Bandwidth + * + * Encoding for the DBE Operational Bandwidth field in the UHR Operation + * element (DBE Operation Parameters). + * + * @IEEE80211_UHR_DBE_OPER_BW_40: 40 MHz operational DBE bandwidth + * @IEEE80211_UHR_DBE_OPER_BW_80: 80 MHz operational DBE bandwidth + * @IEEE80211_UHR_DBE_OPER_BW_160: 160 MHz operational DBE bandwidth + * @IEEE80211_UHR_DBE_OPER_BW_320_1: 320-1 MHz operational DBE bandwidth + * @IEEE80211_UHR_DBE_OPER_BW_320_2: 320-2 MHz operational DBE bandwidth + */ +enum ieee80211_uhr_dbe_oper_bw { + IEEE80211_UHR_DBE_OPER_BW_40 = 1, + IEEE80211_UHR_DBE_OPER_BW_80 = 2, + IEEE80211_UHR_DBE_OPER_BW_160 = 3, + IEEE80211_UHR_DBE_OPER_BW_320_1 = 4, + IEEE80211_UHR_DBE_OPER_BW_320_2 = 5, +}; + +/** + * struct ieee80211_uhr_dbe_info - DBE operation information + * + * This structure is the "DBE Operation Parameters field" of + * "UHR Operation Element" fields as described in P802.11bn_D1.3 + * subclause 9.4.2.353. See Figure 9-aa6. + * + * Refer to IEEE80211_UHR_DBE_OPER* + * @params: + * B0-B2 - DBE Operational Bandwidth field, see + * "enum ieee80211_uhr_dbe_oper_bw" for values. + * Value 0 is reserved. + * Value 1 indicates 40 MHz operational DBE bandwidth. + * Value 2 indicates 80 MHz operational DBE bandwidth. + * Value 3 indicates 160 MHz operational DBE bandwidth. + * Value 4 indicates 320-1 MHz operational DBE bandwidth. + * Value 5 indicates 320-2 MHz operational DBE bandwidth. + * Values 6 to 7 are reserved. + * B3 - DBE Disabled Subchannel Bitmap Present. + * @dis_subch_bmap: DBE Disabled Subchannel Bitmap field is set to indicate + * disabled 20 MHz subchannels within the DBE Bandwidth. + */ +struct ieee80211_uhr_dbe_info { + u8 params; + __le16 dis_subch_bmap[]; +} __packed; + +#define IEEE80211_UHR_P_EDCA_ECWMIN 0x0F +#define IEEE80211_UHR_P_EDCA_ECWMAX 0xF0 +#define IEEE80211_UHR_P_EDCA_AIFSN 0x000F +#define IEEE80211_UHR_P_EDCA_CW_DS 0x0030 +#define IEEE80211_UHR_P_EDCA_PSRC_THRESHOLD 0x01C0 +#define IEEE80211_UHR_P_EDCA_QSRC_THRESHOLD 0x0600 + +/** + * struct ieee80211_uhr_p_edca_info - P-EDCA operation information + * + * This structure is the "P-EDCA Operation Parameters field" of + * "UHR Operation Element" fields as described in P802.11bn_D1.3 + * subclause 9.4.2.353. See Figure 9-aa5. + * + * Refer to IEEE80211_UHR_P_EDCA* + * @p_edca_ec: P-EDCA ECWmin and ECWmax. + * These fields indicate the CWmin and CWmax values used by a + * P-EDCA STA during P-EDCA contention. + * @params: AIFSN, CW DS, PSRC threshold, and QSRC threshold. + * - The AIFSN field indicates the AIFSN value used by a P-EDCA STA + * during P-EDCA contention. + * - The CW DS field indicates the value used for randomization of the + * transmission slot of the DS-CTS frame. The value 3 is reserved. + * The value 0 indicates that randomization is not enabled. + * - The P-EDCA PSRC threshold field indicates the maximum number of + * allowed consecutive DS-CTS transmissions. The value 0 and values + * greater than 4 are reserved. + * - The P-EDCA QSRC threshold field indicates the value of the + * QSRC[AC_VO] counter required to start P-EDCA contention. The + * value 0 is reserved. + */ +struct ieee80211_uhr_p_edca_info { + u8 p_edca_ec; + __le16 params; +} __packed; + static inline bool ieee80211_uhr_oper_size_ok(const u8 *data, u8 len, bool beacon) { @@ -47,19 +252,52 @@ static inline bool ieee80211_uhr_oper_size_ok(const u8 *data, u8 len, if (beacon) return true; - /* FIXME: DPS, DBE, P-EDCA (consider order, also relative to NPCA) */ + /* DPS Operation Parameters (fixed 4 bytes) */ + if (oper->params & cpu_to_le16(IEEE80211_UHR_OPER_PARAMS_DPS_ENA)) { + needed += sizeof(struct ieee80211_uhr_dps_info); + if (len < needed) + return false; + } + /* NPCA Operation Parameters (fixed 4 bytes + optional 2 bytes) */ if (oper->params & cpu_to_le16(IEEE80211_UHR_OPER_PARAMS_NPCA_ENA)) { const struct ieee80211_uhr_npca_info *npca = - (const void *)oper->variable; + (const void *)(data + needed); needed += sizeof(*npca); - if (len < needed) return false; - if (npca->params & cpu_to_le32(IEEE80211_UHR_NPCA_PARAMS_DIS_SUBCH_BMAP_PRES)) + if (npca->params & + cpu_to_le32(IEEE80211_UHR_NPCA_PARAMS_DIS_SUBCH_BMAP_PRES)) { needed += sizeof(npca->dis_subch_bmap[0]); + if (len < needed) + return false; + } + } + + /* P-EDCA Operation Parameters (fixed 3 bytes) */ + if (oper->params & cpu_to_le16(IEEE80211_UHR_OPER_PARAMS_PEDCA_ENA)) { + needed += sizeof(struct ieee80211_uhr_p_edca_info); + if (len < needed) + return false; + } + + /* DBE Operation Parameters (fixed 1 byte + optional 2 bytes) */ + if (oper->params & cpu_to_le16(IEEE80211_UHR_OPER_PARAMS_DBE_ENA)) { + const struct ieee80211_uhr_dbe_info *dbe = + (const void *)(data + needed); + + needed += sizeof(*dbe); + if (len < needed) + return false; + + if (dbe->params & + IEEE80211_UHR_DBE_OPER_DIS_SUBCHANNEL_BITMAP_PRES) { + needed += sizeof(dbe->dis_subch_bmap[0]); + if (len < needed) + return false; + } } return len >= needed; @@ -72,12 +310,15 @@ static inline bool ieee80211_uhr_oper_size_ok(const u8 *data, u8 len, static inline const struct ieee80211_uhr_npca_info * ieee80211_uhr_npca_info(const struct ieee80211_uhr_operation *oper) { + const u8 *pos = oper->variable; + if (!(oper->params & cpu_to_le16(IEEE80211_UHR_OPER_PARAMS_NPCA_ENA))) return NULL; - /* FIXME: DPS */ + if (oper->params & cpu_to_le16(IEEE80211_UHR_OPER_PARAMS_DPS_ENA)) + pos += sizeof(struct ieee80211_uhr_dps_info); - return (const void *)oper->variable; + return (const void *)pos; } static inline const __le16 * @@ -131,6 +372,24 @@ ieee80211_uhr_npca_dis_subch_bitmap(const struct ieee80211_uhr_operation *oper) #define IEEE80211_UHR_MAC_CAP_DBE_EHT_MCS_MAP_160_PRES 0x08 #define IEEE80211_UHR_MAC_CAP_DBE_EHT_MCS_MAP_320_PRES 0x10 +/** + * enum ieee80211_uhr_dbe_max_supported_bw - DBE Maximum Supported Bandwidth + * + * As per spec P802.11bn_D1.3 "Table 9-bb5—Encoding of the DBE Maximum + * Supported Bandwidth field". + * + * @IEEE80211_UHR_DBE_MAX_BW_40: Indicates 40 MHz DBE max supported bw + * @IEEE80211_UHR_DBE_MAX_BW_80: Indicates 80 MHz DBE max supported bw + * @IEEE80211_UHR_DBE_MAX_BW_160: Indicates 160 MHz DBE max supported bw + * @IEEE80211_UHR_DBE_MAX_BW_320: Indicates 320 MHz DBE max supported bw + */ +enum ieee80211_uhr_dbe_max_supported_bw { + IEEE80211_UHR_DBE_MAX_BW_40 = 1, + IEEE80211_UHR_DBE_MAX_BW_80 = 2, + IEEE80211_UHR_DBE_MAX_BW_160 = 3, + IEEE80211_UHR_DBE_MAX_BW_320 = 4, +}; + struct ieee80211_uhr_cap_mac { u8 mac_cap[5]; } __packed; From 8838bb185ef3c801ea4641a95bdace6210cd4b4e Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 3 Mar 2026 03:16:23 +0000 Subject: [PATCH 0263/1561] dt-bindings: net: dsa: maxlinear,mxl862xx: remove port label The ports in the example device tree should not have a 'label' property. Labels for all user ports have been removed from an earlier submission, but this was overlooked in the case of the CPU port. Remove 'cpu' port label from the example. Suggested-by: Vladimir Oltean Signed-off-by: Daniel Golle Reviewed-by: Andrew Lunn Acked-by: Conor Dooley Link: https://patch.msgid.link/61579de297eb636ec5f1e6c97d453e26abb0625d.1772507210.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/net/dsa/maxlinear,mxl862xx.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/dsa/maxlinear,mxl862xx.yaml b/Documentation/devicetree/bindings/net/dsa/maxlinear,mxl862xx.yaml index f1d667f7a055..2f19c19c60f3 100644 --- a/Documentation/devicetree/bindings/net/dsa/maxlinear,mxl862xx.yaml +++ b/Documentation/devicetree/bindings/net/dsa/maxlinear,mxl862xx.yaml @@ -110,7 +110,6 @@ examples: port@9 { reg = <9>; - label = "cpu"; ethernet = <&gmac0>; phy-mode = "usxgmii"; From aefa52a28a36cf4c9063c095a8191c951e07eb4e Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 3 Mar 2026 03:17:43 +0000 Subject: [PATCH 0264/1561] net: dsa: mxl862xx: rename MDIO op arguments The use of the 'port' argument name for functions implementing the MDIO bus operations is misleading as the port address isn't equal to the PHY address. Rename the MDIO operation argument name to match the prototypes of mdiobus_write, mdiobus_read, mdiobus_c45_read and mdiobus_c45_write. Suggested-by: Vladimir Oltean Signed-off-by: Daniel Golle Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/e1f4cb3bcffc7df9af0f2c9b673b14c7e1201c9a.1772507674.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mxl862xx/mxl862xx.c | 32 ++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/net/dsa/mxl862xx/mxl862xx.c b/drivers/net/dsa/mxl862xx/mxl862xx.c index b1e2094b5816..8281c749967a 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx.c +++ b/drivers/net/dsa/mxl862xx/mxl862xx.c @@ -44,13 +44,13 @@ static enum dsa_tag_protocol mxl862xx_get_tag_protocol(struct dsa_switch *ds, } /* PHY access via firmware relay */ -static int mxl862xx_phy_read_mmd(struct mxl862xx_priv *priv, int port, - int devadd, int reg) +static int mxl862xx_phy_read_mmd(struct mxl862xx_priv *priv, int addr, + int devadd, int regnum) { struct mdio_relay_data param = { - .phy = port, + .phy = addr, .mmd = devadd, - .reg = cpu_to_le16(reg), + .reg = cpu_to_le16(regnum), }; int ret; @@ -61,40 +61,40 @@ static int mxl862xx_phy_read_mmd(struct mxl862xx_priv *priv, int port, return le16_to_cpu(param.data); } -static int mxl862xx_phy_write_mmd(struct mxl862xx_priv *priv, int port, - int devadd, int reg, u16 data) +static int mxl862xx_phy_write_mmd(struct mxl862xx_priv *priv, int addr, + int devadd, int regnum, u16 data) { struct mdio_relay_data param = { - .phy = port, + .phy = addr, .mmd = devadd, - .reg = cpu_to_le16(reg), + .reg = cpu_to_le16(regnum), .data = cpu_to_le16(data), }; return MXL862XX_API_WRITE(priv, INT_GPHY_WRITE, param); } -static int mxl862xx_phy_read_mii_bus(struct mii_bus *bus, int port, int regnum) +static int mxl862xx_phy_read_mii_bus(struct mii_bus *bus, int addr, int regnum) { - return mxl862xx_phy_read_mmd(bus->priv, port, 0, regnum); + return mxl862xx_phy_read_mmd(bus->priv, addr, 0, regnum); } -static int mxl862xx_phy_write_mii_bus(struct mii_bus *bus, int port, +static int mxl862xx_phy_write_mii_bus(struct mii_bus *bus, int addr, int regnum, u16 val) { - return mxl862xx_phy_write_mmd(bus->priv, port, 0, regnum, val); + return mxl862xx_phy_write_mmd(bus->priv, addr, 0, regnum, val); } -static int mxl862xx_phy_read_c45_mii_bus(struct mii_bus *bus, int port, +static int mxl862xx_phy_read_c45_mii_bus(struct mii_bus *bus, int addr, int devadd, int regnum) { - return mxl862xx_phy_read_mmd(bus->priv, port, devadd, regnum); + return mxl862xx_phy_read_mmd(bus->priv, addr, devadd, regnum); } -static int mxl862xx_phy_write_c45_mii_bus(struct mii_bus *bus, int port, +static int mxl862xx_phy_write_c45_mii_bus(struct mii_bus *bus, int addr, int devadd, int regnum, u16 val) { - return mxl862xx_phy_write_mmd(bus->priv, port, devadd, regnum, val); + return mxl862xx_phy_write_mmd(bus->priv, addr, devadd, regnum, val); } static int mxl862xx_wait_ready(struct dsa_switch *ds) From d86670b837fbe1994e8b1ce9d1f56f1272038ad1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Mar 2026 08:35:01 -0800 Subject: [PATCH 0265/1561] tools: ynl: rename TESTS variable to TEST_PROGS Use the standard kselftest variable naming for tests in the Makefile. NIPA depends on being able to selectively target tests by setting those variables on the CLI. Acked-by: Matthieu Baerts (NGI0) Reviewed-by: Hangbin Liu Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20260303163504.2084981-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/tests/Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/net/ynl/tests/Makefile b/tools/net/ynl/tests/Makefile index c1df2e001255..ee4362ca286d 100644 --- a/tools/net/ynl/tests/Makefile +++ b/tools/net/ynl/tests/Makefile @@ -1,23 +1,23 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for YNL tests -TESTS := \ +TEST_PROGS := \ test_ynl_cli.sh \ test_ynl_ethtool.sh \ -# end of TESTS +# end of TEST_PROGS -all: $(TESTS) +all: $(TEST_PROGS) run_tests: - @for test in $(TESTS); do \ + @for test in $(TEST_PROGS); do \ ./$$test; \ done -install: $(TESTS) +install: $(TEST_PROGS) @mkdir -p $(DESTDIR)/usr/bin @mkdir -p $(DESTDIR)/usr/share/kselftest @cp ../../../testing/selftests/kselftest/ktap_helpers.sh $(DESTDIR)/usr/share/kselftest/ - @for test in $(TESTS); do \ + @for test in $(TEST_PROGS); do \ name=$$(basename $$test .sh); \ sed -e 's|^ynl=.*|ynl="ynl"|' \ -e 's|^ynl_ethtool=.*|ynl_ethtool="ynl-ethtool"|' \ From 3e90e00da96b6eddc91b5fffb19089d35af48695 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Mar 2026 08:35:02 -0800 Subject: [PATCH 0266/1561] tools: ynl: don't install tests in /usr/bin/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Until commit 790792ebc960 ("tools: ynl: don't install tests") YNL selftests were installed with all the other YNL outputs. That's no longer the case, as tests are not really production artifacts. Let's not install them in /usr/bin at all, and mirror kselftest format more closely: For: make -C tools/net/ynl/tests/ install DESTDIR=tmp tmp/usr/share/kselftest ├── ktap_helpers.sh └── ynl ├── test_ynl_cli.sh └── test_ynl_ethtool.sh Acked-by: Matthieu Baerts (NGI0) Reviewed-by: Hangbin Liu Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20260303163504.2084981-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/tests/Makefile | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tools/net/ynl/tests/Makefile b/tools/net/ynl/tests/Makefile index ee4362ca286d..5045341300e2 100644 --- a/tools/net/ynl/tests/Makefile +++ b/tools/net/ynl/tests/Makefile @@ -14,16 +14,15 @@ run_tests: done install: $(TEST_PROGS) - @mkdir -p $(DESTDIR)/usr/bin - @mkdir -p $(DESTDIR)/usr/share/kselftest + @mkdir -p $(DESTDIR)/usr/share/kselftest/ynl @cp ../../../testing/selftests/kselftest/ktap_helpers.sh $(DESTDIR)/usr/share/kselftest/ @for test in $(TEST_PROGS); do \ - name=$$(basename $$test .sh); \ + name=$$(basename $$test); \ sed -e 's|^ynl=.*|ynl="ynl"|' \ -e 's|^ynl_ethtool=.*|ynl_ethtool="ynl-ethtool"|' \ -e 's|KSELFTEST_KTAP_HELPERS=.*|KSELFTEST_KTAP_HELPERS="/usr/share/kselftest/ktap_helpers.sh"|' \ - $$test > $(DESTDIR)/usr/bin/$$name; \ - chmod +x $(DESTDIR)/usr/bin/$$name; \ + $$test > $(DESTDIR)/usr/share/kselftest/ynl/$$name; \ + chmod +x $(DESTDIR)/usr/share/kselftest/ynl/$$name; \ done clean distclean: From 2bfc36f5ea163b9ea620c99cac9582245e3681ef Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Mar 2026 08:35:03 -0800 Subject: [PATCH 0267/1561] tools: ynl: support INSTALL_PATH in the tests Makefile We have modelled the YNL tests after ksft to be able to reuse the NIPA wrappers. Make sure YNL honors INSTALL_PATH not just DESTDIR, ksft uses INSTALL_PATH. Acked-by: Matthieu Baerts (NGI0) Reviewed-by: Hangbin Liu Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20260303163504.2084981-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/tests/Makefile | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tools/net/ynl/tests/Makefile b/tools/net/ynl/tests/Makefile index 5045341300e2..ee717db12dff 100644 --- a/tools/net/ynl/tests/Makefile +++ b/tools/net/ynl/tests/Makefile @@ -6,6 +6,8 @@ TEST_PROGS := \ test_ynl_ethtool.sh \ # end of TEST_PROGS +INSTALL_PATH ?= $(DESTDIR)/usr/share/kselftest + all: $(TEST_PROGS) run_tests: @@ -14,15 +16,15 @@ run_tests: done install: $(TEST_PROGS) - @mkdir -p $(DESTDIR)/usr/share/kselftest/ynl - @cp ../../../testing/selftests/kselftest/ktap_helpers.sh $(DESTDIR)/usr/share/kselftest/ + @mkdir -p $(INSTALL_PATH)/ynl + @cp ../../../testing/selftests/kselftest/ktap_helpers.sh $(INSTALL_PATH)/ @for test in $(TEST_PROGS); do \ name=$$(basename $$test); \ sed -e 's|^ynl=.*|ynl="ynl"|' \ -e 's|^ynl_ethtool=.*|ynl_ethtool="ynl-ethtool"|' \ - -e 's|KSELFTEST_KTAP_HELPERS=.*|KSELFTEST_KTAP_HELPERS="/usr/share/kselftest/ktap_helpers.sh"|' \ - $$test > $(DESTDIR)/usr/share/kselftest/ynl/$$name; \ - chmod +x $(DESTDIR)/usr/share/kselftest/ynl/$$name; \ + -e 's|KSELFTEST_KTAP_HELPERS=.*|KSELFTEST_KTAP_HELPERS="$(INSTALL_PATH)/ktap_helpers.sh"|' \ + $$test > $(INSTALL_PATH)/ynl/$$name; \ + chmod +x $(INSTALL_PATH)/ynl/$$name; \ done clean distclean: From 32d6fd5832ad8773fa19192a3e6695cac5cd4379 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Mar 2026 08:35:04 -0800 Subject: [PATCH 0268/1561] tools: ynl: produce kselftest-list.txt from tests Executors will need kselftest-list.txt so create it when tests are installed. Acked-by: Matthieu Baerts (NGI0) Reviewed-by: Hangbin Liu Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20260303163504.2084981-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/tests/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/net/ynl/tests/Makefile b/tools/net/ynl/tests/Makefile index ee717db12dff..eb166c9550db 100644 --- a/tools/net/ynl/tests/Makefile +++ b/tools/net/ynl/tests/Makefile @@ -26,6 +26,9 @@ install: $(TEST_PROGS) $$test > $(INSTALL_PATH)/ynl/$$name; \ chmod +x $(INSTALL_PATH)/ynl/$$name; \ done + @for test in $(TEST_PROGS); do \ + echo "ynl:$$test"; \ + done > $(INSTALL_PATH)/kselftest-list.txt clean distclean: @# Nothing to clean From c26b8c4e291c55c7b2138d7bcb27348ca3a5ae59 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Mar 2026 16:39:33 +0000 Subject: [PATCH 0269/1561] net: fix off-by-one in udp_flow_src_port() / psp_write_headers() udp_flow_src_port() and psp_write_headers() use ip_local_port_range. ip_local_port_range is inclusive : all ports between min and max can be used. Before this patch, if ip_local_port_range was set to 40000-40001 40001 would not be used as a source port. Use reciprocal_scale() to help code readability. Not tagged for stable trees, as this change could break user expectations. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260302163933.1754393-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/udp.h | 3 ++- net/psp/psp_main.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index da68702ddf6e..b648003e5792 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -29,6 +29,7 @@ #include #include #include +#include /** * struct udp_skb_cb - UDP(-Lite) private variables @@ -376,7 +377,7 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb, */ hash ^= hash << 16; - return htons((((u64) hash * (max - min)) >> 32) + min); + return htons(reciprocal_scale(hash, max - min + 1) + min); } static inline int udp_rqueue_get(struct sock *sk) diff --git a/net/psp/psp_main.c b/net/psp/psp_main.c index d4c04c923c5a..9508b6c38003 100644 --- a/net/psp/psp_main.c +++ b/net/psp/psp_main.c @@ -202,7 +202,7 @@ static void psp_write_headers(struct net *net, struct sk_buff *skb, __be32 spi, * reciprocal divide. */ hash ^= hash << 16; - uh->source = htons((((u64)hash * (max - min)) >> 32) + min); + uh->source = htons(reciprocal_scale(hash, max - min + 1) + min); } else { uh->source = udp_flow_src_port(net, skb, 0, 0, false); } From 42a101775bc515a77ac0c39de6cef42aa7abb3a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Mar 2026 18:14:26 +0000 Subject: [PATCH 0270/1561] net: add rps_tag_ptr type and helpers Add a new rps_tag_ptr type to encode a pointer and a size to a power-of-two table. Three helpers are added converting an rps_tag_ptr to: 1) A log of the size. 2) A mask : (size - 1). 3) A pointer to the array. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260302181432.1836150-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/rps-types.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 include/net/rps-types.h diff --git a/include/net/rps-types.h b/include/net/rps-types.h new file mode 100644 index 000000000000..6b90a66866c1 --- /dev/null +++ b/include/net/rps-types.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _NET_RPS_TYPES_H +#define _NET_RPS_TYPES_H + +/* Define a rps_tag_ptr: + * Low order 5 bits are used to store the ilog2(size) of an RPS table. + */ +typedef unsigned long rps_tag_ptr; + +static inline u8 rps_tag_to_log(rps_tag_ptr tag_ptr) +{ + return tag_ptr & 31U; +} + +static inline u32 rps_tag_to_mask(rps_tag_ptr tag_ptr) +{ + return (1U << rps_tag_to_log(tag_ptr)) - 1; +} + +static inline void *rps_tag_to_table(rps_tag_ptr tag_ptr) +{ + return (void *)(tag_ptr & ~31UL); +} +#endif /* _NET_RPS_TYPES_H */ From 61753849b8bc6420cc5834fb3de331ce1134060d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Mar 2026 18:14:27 +0000 Subject: [PATCH 0271/1561] net-sysfs: remove rcu field from 'struct rps_sock_flow_table' Removing rcu_head (and @mask in a following patch) will allow a power-of-two allocation and thus high-order allocation for better performance. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260302181432.1836150-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/rps.h | 1 - net/core/sysctl_net_core.c | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/net/rps.h b/include/net/rps.h index f1794cd2e7fb..32cfa250d9f9 100644 --- a/include/net/rps.h +++ b/include/net/rps.h @@ -60,7 +60,6 @@ struct rps_dev_flow_table { * meaning we use 32-6=26 bits for the hash. */ struct rps_sock_flow_table { - struct rcu_head rcu; u32 mask; u32 ents[] ____cacheline_aligned_in_smp; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 03aea10073f0..0b659c932cff 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -147,6 +147,7 @@ static int rps_sock_flow_sysctl(const struct ctl_table *table, int write, }; struct rps_sock_flow_table *orig_sock_table, *sock_table; static DEFINE_MUTEX(sock_flow_mutex); + void *tofree = NULL; mutex_lock(&sock_flow_mutex); @@ -193,13 +194,14 @@ static int rps_sock_flow_sysctl(const struct ctl_table *table, int write, if (orig_sock_table) { static_branch_dec(&rps_needed); static_branch_dec(&rfs_needed); - kvfree_rcu(orig_sock_table, rcu); + tofree = orig_sock_table; } } } mutex_unlock(&sock_flow_mutex); + kvfree_rcu_mightsleep(tofree); return ret; } #endif /* CONFIG_RPS */ From 9cde131cdd888873363b5d9dfd8d4d4c1fae6986 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Mar 2026 18:14:28 +0000 Subject: [PATCH 0272/1561] net-sysfs: add rps_sock_flow_table_mask() helper In preparation of the following patch, abstract access to the @mask field in 'struct rps_sock_flow_table'. Also cleanup rps_sock_flow_sysctl() a bit : - Rename orig_sock_table to o_sock_table. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260302181432.1836150-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/rps.h | 11 ++++++++--- net/core/dev.c | 4 +++- net/core/sysctl_net_core.c | 19 ++++++++++--------- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/include/net/rps.h b/include/net/rps.h index 32cfa250d9f9..82cdffdf3e6b 100644 --- a/include/net/rps.h +++ b/include/net/rps.h @@ -60,18 +60,23 @@ struct rps_dev_flow_table { * meaning we use 32-6=26 bits for the hash. */ struct rps_sock_flow_table { - u32 mask; + u32 _mask; u32 ents[] ____cacheline_aligned_in_smp; }; #define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) +static inline u32 rps_sock_flow_table_mask(const struct rps_sock_flow_table *table) +{ + return table->_mask; +} + #define RPS_NO_CPU 0xffff static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, u32 hash) { - unsigned int index = hash & table->mask; + unsigned int index = hash & rps_sock_flow_table_mask(table); u32 val = hash & ~net_hotdata.rps_cpu_mask; /* We only give a hint, preemption can change CPU under us */ @@ -129,7 +134,7 @@ static inline void _sock_rps_delete_flow(const struct sock *sk) rcu_read_lock(); table = rcu_dereference(net_hotdata.rps_sock_flow_table); if (table) { - index = hash & table->mask; + index = hash & rps_sock_flow_table_mask(table); if (READ_ONCE(table->ents[index]) != RPS_NO_CPU) WRITE_ONCE(table->ents[index], RPS_NO_CPU); } diff --git a/net/core/dev.c b/net/core/dev.c index 19b84eaa2643..92f8eeac8de3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5112,12 +5112,14 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, if (flow_table && sock_flow_table) { struct rps_dev_flow *rflow; u32 next_cpu; + u32 flow_id; u32 ident; /* First check into global flow table if there is a match. * This READ_ONCE() pairs with WRITE_ONCE() from rps_record_sock_flow(). */ - ident = READ_ONCE(sock_flow_table->ents[hash & sock_flow_table->mask]); + flow_id = hash & rps_sock_flow_table_mask(sock_flow_table); + ident = READ_ONCE(sock_flow_table->ents[flow_id]); if ((ident ^ hash) & ~net_hotdata.rps_cpu_mask) goto try_rps; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 0b659c932cff..cfbe798493b5 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -145,16 +145,17 @@ static int rps_sock_flow_sysctl(const struct ctl_table *table, int write, .maxlen = sizeof(size), .mode = table->mode }; - struct rps_sock_flow_table *orig_sock_table, *sock_table; + struct rps_sock_flow_table *o_sock_table, *sock_table; static DEFINE_MUTEX(sock_flow_mutex); void *tofree = NULL; mutex_lock(&sock_flow_mutex); - orig_sock_table = rcu_dereference_protected( + o_sock_table = rcu_dereference_protected( net_hotdata.rps_sock_flow_table, lockdep_is_held(&sock_flow_mutex)); - size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; + size = o_sock_table ? rps_sock_flow_table_mask(o_sock_table) + 1 : 0; + orig_size = size; ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); @@ -165,6 +166,7 @@ static int rps_sock_flow_sysctl(const struct ctl_table *table, int write, mutex_unlock(&sock_flow_mutex); return -EINVAL; } + sock_table = o_sock_table; size = roundup_pow_of_two(size); if (size != orig_size) { sock_table = @@ -175,26 +177,25 @@ static int rps_sock_flow_sysctl(const struct ctl_table *table, int write, } net_hotdata.rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1; - sock_table->mask = size - 1; - } else - sock_table = orig_sock_table; + sock_table->_mask = size - 1; + } for (i = 0; i < size; i++) sock_table->ents[i] = RPS_NO_CPU; } else sock_table = NULL; - if (sock_table != orig_sock_table) { + if (sock_table != o_sock_table) { rcu_assign_pointer(net_hotdata.rps_sock_flow_table, sock_table); if (sock_table) { static_branch_inc(&rps_needed); static_branch_inc(&rfs_needed); } - if (orig_sock_table) { + if (o_sock_table) { static_branch_dec(&rps_needed); static_branch_dec(&rfs_needed); - tofree = orig_sock_table; + tofree = o_sock_table; } } } From dd378109d20ff6789091fa3558607c1d242d80ad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Mar 2026 18:14:29 +0000 Subject: [PATCH 0273/1561] net-sysfs: use rps_tag_ptr and remove metadata from rps_sock_flow_table Instead of storing the @mask at the beginning of rps_sock_flow_table, use 5 low order bits of the rps_tag_ptr to store the log of the size. This removes a potential cache line miss to fetch @mask. More importantly, we can switch to vmalloc_huge() without wasting memory. Tested with: numactl --interleave=all bash -c "echo 4194304 >/proc/sys/net/core/rps_sock_flow_entries" Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260302181432.1836150-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- Documentation/networking/scaling.rst | 13 ++-- include/net/hotdata.h | 5 +- include/net/rps.h | 42 ++++++------- net/core/dev.c | 12 ++-- net/core/sysctl_net_core.c | 89 +++++++++++++++------------- 5 files changed, 86 insertions(+), 75 deletions(-) diff --git a/Documentation/networking/scaling.rst b/Documentation/networking/scaling.rst index 0023afa530ec..6c261eb48845 100644 --- a/Documentation/networking/scaling.rst +++ b/Documentation/networking/scaling.rst @@ -403,16 +403,21 @@ Both of these need to be set before RFS is enabled for a receive queue. Values for both are rounded up to the nearest power of two. The suggested flow count depends on the expected number of active connections at any given time, which may be significantly less than the number of open -connections. We have found that a value of 32768 for rps_sock_flow_entries -works fairly well on a moderately loaded server. +connections. We have found that a value of 65536 for rps_sock_flow_entries +works fairly well on a moderately loaded server. Big servers might +need 1048576 or even higher values. + +On a NUMA host it is advisable to spread rps_sock_flow_entries on all nodes. + +numactl --interleave=all bash -c "echo 1048576 >/proc/sys/net/core/rps_sock_flow_entries" For a single queue device, the rps_flow_cnt value for the single queue would normally be configured to the same value as rps_sock_flow_entries. For a multi-queue device, the rps_flow_cnt for each queue might be configured as rps_sock_flow_entries / N, where N is the number of -queues. So for instance, if rps_sock_flow_entries is set to 32768 and there +queues. So for instance, if rps_sock_flow_entries is set to 131072 and there are 16 configured receive queues, rps_flow_cnt for each queue might be -configured as 2048. +configured as 8192. Accelerated RFS diff --git a/include/net/hotdata.h b/include/net/hotdata.h index 6632b1aa7584..62534d1f3c70 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -6,6 +6,9 @@ #include #include #include +#ifdef CONFIG_RPS +#include +#endif struct skb_defer_node { struct llist_head defer_list; @@ -33,7 +36,7 @@ struct net_hotdata { struct kmem_cache *skbuff_fclone_cache; struct kmem_cache *skb_small_head_cache; #ifdef CONFIG_RPS - struct rps_sock_flow_table __rcu *rps_sock_flow_table; + rps_tag_ptr rps_sock_flow_table; u32 rps_cpu_mask; #endif struct skb_defer_node __percpu *skb_defer_nodes; diff --git a/include/net/rps.h b/include/net/rps.h index 82cdffdf3e6b..dee930d9dd38 100644 --- a/include/net/rps.h +++ b/include/net/rps.h @@ -8,6 +8,7 @@ #include #ifdef CONFIG_RPS +#include extern struct static_key_false rps_needed; extern struct static_key_false rfs_needed; @@ -60,45 +61,38 @@ struct rps_dev_flow_table { * meaning we use 32-6=26 bits for the hash. */ struct rps_sock_flow_table { - u32 _mask; - - u32 ents[] ____cacheline_aligned_in_smp; + u32 ent; }; -#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) - -static inline u32 rps_sock_flow_table_mask(const struct rps_sock_flow_table *table) -{ - return table->_mask; -} #define RPS_NO_CPU 0xffff -static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, - u32 hash) +static inline void rps_record_sock_flow(rps_tag_ptr tag_ptr, u32 hash) { - unsigned int index = hash & rps_sock_flow_table_mask(table); + unsigned int index = hash & rps_tag_to_mask(tag_ptr); u32 val = hash & ~net_hotdata.rps_cpu_mask; + struct rps_sock_flow_table *table; /* We only give a hint, preemption can change CPU under us */ val |= raw_smp_processor_id(); + table = rps_tag_to_table(tag_ptr); /* The following WRITE_ONCE() is paired with the READ_ONCE() * here, and another one in get_rps_cpu(). */ - if (READ_ONCE(table->ents[index]) != val) - WRITE_ONCE(table->ents[index], val); + if (READ_ONCE(table[index].ent) != val) + WRITE_ONCE(table[index].ent, val); } static inline void _sock_rps_record_flow_hash(__u32 hash) { - struct rps_sock_flow_table *sock_flow_table; + rps_tag_ptr tag_ptr; if (!hash) return; rcu_read_lock(); - sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table); - if (sock_flow_table) - rps_record_sock_flow(sock_flow_table, hash); + tag_ptr = READ_ONCE(net_hotdata.rps_sock_flow_table); + if (tag_ptr) + rps_record_sock_flow(tag_ptr, hash); rcu_read_unlock(); } @@ -125,6 +119,7 @@ static inline void _sock_rps_record_flow(const struct sock *sk) static inline void _sock_rps_delete_flow(const struct sock *sk) { struct rps_sock_flow_table *table; + rps_tag_ptr tag_ptr; u32 hash, index; hash = READ_ONCE(sk->sk_rxhash); @@ -132,11 +127,12 @@ static inline void _sock_rps_delete_flow(const struct sock *sk) return; rcu_read_lock(); - table = rcu_dereference(net_hotdata.rps_sock_flow_table); - if (table) { - index = hash & rps_sock_flow_table_mask(table); - if (READ_ONCE(table->ents[index]) != RPS_NO_CPU) - WRITE_ONCE(table->ents[index], RPS_NO_CPU); + tag_ptr = READ_ONCE(net_hotdata.rps_sock_flow_table); + if (tag_ptr) { + index = hash & rps_tag_to_mask(tag_ptr); + table = rps_tag_to_table(tag_ptr); + if (READ_ONCE(table[index].ent) != RPS_NO_CPU) + WRITE_ONCE(table[index].ent, RPS_NO_CPU); } rcu_read_unlock(); } diff --git a/net/core/dev.c b/net/core/dev.c index 92f8eeac8de3..7ae87be81afc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5075,9 +5075,9 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow **rflowp) { - const struct rps_sock_flow_table *sock_flow_table; struct netdev_rx_queue *rxqueue = dev->_rx; struct rps_dev_flow_table *flow_table; + rps_tag_ptr global_tag_ptr; struct rps_map *map; int cpu = -1; u32 tcpu; @@ -5108,8 +5108,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, if (!hash) goto done; - sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table); - if (flow_table && sock_flow_table) { + global_tag_ptr = READ_ONCE(net_hotdata.rps_sock_flow_table); + if (flow_table && global_tag_ptr) { + struct rps_sock_flow_table *sock_flow_table; struct rps_dev_flow *rflow; u32 next_cpu; u32 flow_id; @@ -5118,8 +5119,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, /* First check into global flow table if there is a match. * This READ_ONCE() pairs with WRITE_ONCE() from rps_record_sock_flow(). */ - flow_id = hash & rps_sock_flow_table_mask(sock_flow_table); - ident = READ_ONCE(sock_flow_table->ents[flow_id]); + flow_id = hash & rps_tag_to_mask(global_tag_ptr); + sock_flow_table = rps_tag_to_table(global_tag_ptr); + ident = READ_ONCE(sock_flow_table[flow_id].ent); if ((ident ^ hash) & ~net_hotdata.rps_cpu_mask) goto try_rps; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index cfbe798493b5..502705e04649 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -138,68 +138,73 @@ done: static int rps_sock_flow_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { + struct rps_sock_flow_table *o_sock_table, *sock_table; + static DEFINE_MUTEX(sock_flow_mutex); + rps_tag_ptr o_tag_ptr, tag_ptr; unsigned int orig_size, size; - int ret, i; struct ctl_table tmp = { .data = &size, .maxlen = sizeof(size), .mode = table->mode }; - struct rps_sock_flow_table *o_sock_table, *sock_table; - static DEFINE_MUTEX(sock_flow_mutex); void *tofree = NULL; + int ret, i; + u8 log; mutex_lock(&sock_flow_mutex); - o_sock_table = rcu_dereference_protected( - net_hotdata.rps_sock_flow_table, - lockdep_is_held(&sock_flow_mutex)); - size = o_sock_table ? rps_sock_flow_table_mask(o_sock_table) + 1 : 0; + o_tag_ptr = tag_ptr = net_hotdata.rps_sock_flow_table; + + size = o_tag_ptr ? rps_tag_to_mask(o_tag_ptr) + 1 : 0; + o_sock_table = rps_tag_to_table(o_tag_ptr); orig_size = size; ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); - if (write) { - if (size) { - if (size > 1<<29) { - /* Enforce limit to prevent overflow */ + if (!write) + goto unlock; + + if (size) { + if (size > 1<<29) { + /* Enforce limit to prevent overflow */ + mutex_unlock(&sock_flow_mutex); + return -EINVAL; + } + sock_table = o_sock_table; + size = roundup_pow_of_two(size); + if (size != orig_size) { + sock_table = vmalloc_huge(size * sizeof(*sock_table), + GFP_KERNEL); + if (!sock_table) { mutex_unlock(&sock_flow_mutex); - return -EINVAL; - } - sock_table = o_sock_table; - size = roundup_pow_of_two(size); - if (size != orig_size) { - sock_table = - vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size)); - if (!sock_table) { - mutex_unlock(&sock_flow_mutex); - return -ENOMEM; - } - net_hotdata.rps_cpu_mask = - roundup_pow_of_two(nr_cpu_ids) - 1; - sock_table->_mask = size - 1; + return -ENOMEM; } + net_hotdata.rps_cpu_mask = + roundup_pow_of_two(nr_cpu_ids) - 1; + log = ilog2(size); + tag_ptr = (rps_tag_ptr)sock_table | log; + } - for (i = 0; i < size; i++) - sock_table->ents[i] = RPS_NO_CPU; - } else - sock_table = NULL; - - if (sock_table != o_sock_table) { - rcu_assign_pointer(net_hotdata.rps_sock_flow_table, - sock_table); - if (sock_table) { - static_branch_inc(&rps_needed); - static_branch_inc(&rfs_needed); - } - if (o_sock_table) { - static_branch_dec(&rps_needed); - static_branch_dec(&rfs_needed); - tofree = o_sock_table; - } + for (i = 0; i < size; i++) + sock_table[i].ent = RPS_NO_CPU; + } else { + sock_table = NULL; + tag_ptr = 0UL; + } + if (tag_ptr != o_tag_ptr) { + smp_store_release(&net_hotdata.rps_sock_flow_table, tag_ptr); + if (sock_table) { + static_branch_inc(&rps_needed); + static_branch_inc(&rfs_needed); + } + if (o_sock_table) { + static_branch_dec(&rps_needed); + static_branch_dec(&rfs_needed); + tofree = o_sock_table; } } +unlock: mutex_unlock(&sock_flow_mutex); kvfree_rcu_mightsleep(tofree); From 68b6394a220b782586e30d0efb94633ccaef9c8d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Mar 2026 18:14:30 +0000 Subject: [PATCH 0274/1561] net-sysfs: get rid of rps_dev_flow_lock Use unrcu_pointer() and xchg() in store_rps_dev_flow_table_cnt() instead of a dedicated spinlock. Make a similar change in rx_queue_release(), so that both functions use a similar construct and synchronization. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260302181432.1836150-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/net-sysfs.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 07624b682b08..52fcf7fa58a8 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1084,7 +1084,6 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, { unsigned long mask, count; struct rps_dev_flow_table *table, *old_table; - static DEFINE_SPINLOCK(rps_dev_flow_lock); int rc; if (!capable(CAP_NET_ADMIN)) @@ -1128,11 +1127,8 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, table = NULL; } - spin_lock(&rps_dev_flow_lock); - old_table = rcu_dereference_protected(queue->rps_flow_table, - lockdep_is_held(&rps_dev_flow_lock)); - rcu_assign_pointer(queue->rps_flow_table, table); - spin_unlock(&rps_dev_flow_lock); + old_table = unrcu_pointer(xchg(&queue->rps_flow_table, + RCU_INITIALIZER(table))); if (old_table) call_rcu(&old_table->rcu, rps_dev_flow_table_release); @@ -1161,8 +1157,8 @@ static void rx_queue_release(struct kobject *kobj) { struct netdev_rx_queue *queue = to_rx_queue(kobj); #ifdef CONFIG_RPS + struct rps_dev_flow_table *old_table; struct rps_map *map; - struct rps_dev_flow_table *flow_table; map = rcu_dereference_protected(queue->rps_map, 1); if (map) { @@ -1170,11 +1166,9 @@ static void rx_queue_release(struct kobject *kobj) kfree_rcu(map, rcu); } - flow_table = rcu_dereference_protected(queue->rps_flow_table, 1); - if (flow_table) { - RCU_INIT_POINTER(queue->rps_flow_table, NULL); - call_rcu(&flow_table->rcu, rps_dev_flow_table_release); - } + old_table = unrcu_pointer(xchg(&queue->rps_flow_table, NULL)); + if (old_table) + call_rcu(&old_table->rcu, rps_dev_flow_table_release); #endif memset(kobj, 0, sizeof(*kobj)); From b2cc61857e3cf7e103089dd54c0548d54a6ae381 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Mar 2026 18:14:31 +0000 Subject: [PATCH 0275/1561] net-sysfs: remove rcu field from 'struct rps_dev_flow_table' Remove rps_dev_flow_table_release() in favor of kvfree_rcu_mightsleep(). In the following pach, we will remove "u8 @log" field and 'struct rps_dev_flow_table' size will be a power-of-two. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260302181432.1836150-7-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/rps.h | 1 - net/core/net-sysfs.c | 11 ++--------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/include/net/rps.h b/include/net/rps.h index dee930d9dd38..e900480e828b 100644 --- a/include/net/rps.h +++ b/include/net/rps.h @@ -44,7 +44,6 @@ struct rps_dev_flow { */ struct rps_dev_flow_table { u8 log; - struct rcu_head rcu; struct rps_dev_flow flows[]; }; #define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 52fcf7fa58a8..fd6f81930bc6 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1072,13 +1072,6 @@ static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, return sysfs_emit(buf, "%lu\n", val); } -static void rps_dev_flow_table_release(struct rcu_head *rcu) -{ - struct rps_dev_flow_table *table = container_of(rcu, - struct rps_dev_flow_table, rcu); - vfree(table); -} - static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, const char *buf, size_t len) { @@ -1131,7 +1124,7 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, RCU_INITIALIZER(table))); if (old_table) - call_rcu(&old_table->rcu, rps_dev_flow_table_release); + kvfree_rcu_mightsleep(old_table); return len; } @@ -1168,7 +1161,7 @@ static void rx_queue_release(struct kobject *kobj) old_table = unrcu_pointer(xchg(&queue->rps_flow_table, NULL)); if (old_table) - call_rcu(&old_table->rcu, rps_dev_flow_table_release); + kvfree_rcu_mightsleep(old_table); #endif memset(kobj, 0, sizeof(*kobj)); From a435163d3100b044d620990772a5ce1684ff02ca Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Mar 2026 18:14:32 +0000 Subject: [PATCH 0276/1561] net-sysfs: use rps_tag_ptr and remove metadata from rps_dev_flow_table Instead of storing the @log at the beginning of rps_dev_flow_table use 5 low order bits of the rps_tag_ptr to store the log of the size. This removes a potential cache line miss (for light traffic). This allows us to switch to one high-order allocation instead of vmalloc() when CONFIG_RFS_ACCEL is not set. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260302181432.1836150-8-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/netdev_rx_queue.h | 3 +- include/net/rps.h | 10 ----- net/core/dev.c | 53 +++++++++++++++----------- net/core/net-sysfs.c | 72 +++++++++++++++++------------------ 4 files changed, 68 insertions(+), 70 deletions(-) diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index cfa72c485387..08f81329fc11 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -8,13 +8,14 @@ #include #include #include +#include /* This structure contains an instance of an RX queue. */ struct netdev_rx_queue { struct xdp_rxq_info xdp_rxq; #ifdef CONFIG_RPS struct rps_map __rcu *rps_map; - struct rps_dev_flow_table __rcu *rps_flow_table; + rps_tag_ptr rps_flow_table; #endif struct kobject kobj; const struct attribute_group **groups; diff --git a/include/net/rps.h b/include/net/rps.h index e900480e828b..e33c6a2fa8bb 100644 --- a/include/net/rps.h +++ b/include/net/rps.h @@ -39,16 +39,6 @@ struct rps_dev_flow { }; #define RPS_NO_FILTER 0xffff -/* - * The rps_dev_flow_table structure contains a table of flow mappings. - */ -struct rps_dev_flow_table { - u8 log; - struct rps_dev_flow flows[]; -}; -#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ - ((_num) * sizeof(struct rps_dev_flow))) - /* * The rps_sock_flow_table contains mappings of flows to the last CPU * on which they were processed by the application (set in recvmsg). diff --git a/net/core/dev.c b/net/core/dev.c index 7ae87be81afc..b470487788a2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4968,16 +4968,16 @@ EXPORT_SYMBOL(rps_needed); struct static_key_false rfs_needed __read_mostly; EXPORT_SYMBOL(rfs_needed); -static u32 rfs_slot(u32 hash, const struct rps_dev_flow_table *flow_table) +static u32 rfs_slot(u32 hash, rps_tag_ptr tag_ptr) { - return hash_32(hash, flow_table->log); + return hash_32(hash, rps_tag_to_log(tag_ptr)); } #ifdef CONFIG_RFS_ACCEL /** * rps_flow_is_active - check whether the flow is recently active. * @rflow: Specific flow to check activity. - * @flow_table: per-queue flowtable that @rflow belongs to. + * @log: ilog2(hashsize). * @cpu: CPU saved in @rflow. * * If the CPU has processed many packets since the flow's last activity @@ -4986,7 +4986,7 @@ static u32 rfs_slot(u32 hash, const struct rps_dev_flow_table *flow_table) * Return: true if flow was recently active. */ static bool rps_flow_is_active(struct rps_dev_flow *rflow, - struct rps_dev_flow_table *flow_table, + u8 log, unsigned int cpu) { unsigned int flow_last_active; @@ -4999,7 +4999,7 @@ static bool rps_flow_is_active(struct rps_dev_flow *rflow, flow_last_active = READ_ONCE(rflow->last_qtail); return (int)(sd_input_head - flow_last_active) < - (int)(10 << flow_table->log); + (int)(10 << log); } #endif @@ -5011,9 +5011,10 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, u32 head; #ifdef CONFIG_RFS_ACCEL struct netdev_rx_queue *rxqueue; - struct rps_dev_flow_table *flow_table; + struct rps_dev_flow *flow_table; struct rps_dev_flow *old_rflow; struct rps_dev_flow *tmp_rflow; + rps_tag_ptr q_tag_ptr; unsigned int tmp_cpu; u16 rxq_index; u32 flow_id; @@ -5028,16 +5029,18 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, goto out; rxqueue = dev->_rx + rxq_index; - flow_table = rcu_dereference(rxqueue->rps_flow_table); - if (!flow_table) + q_tag_ptr = READ_ONCE(rxqueue->rps_flow_table); + if (!q_tag_ptr) goto out; - flow_id = rfs_slot(hash, flow_table); - tmp_rflow = &flow_table->flows[flow_id]; + flow_id = rfs_slot(hash, q_tag_ptr); + flow_table = rps_tag_to_table(q_tag_ptr); + tmp_rflow = flow_table + flow_id; tmp_cpu = READ_ONCE(tmp_rflow->cpu); if (READ_ONCE(tmp_rflow->filter) != RPS_NO_FILTER) { - if (rps_flow_is_active(tmp_rflow, flow_table, + if (rps_flow_is_active(tmp_rflow, + rps_tag_to_log(q_tag_ptr), tmp_cpu)) { if (hash != READ_ONCE(tmp_rflow->hash) || next_cpu == tmp_cpu) @@ -5076,8 +5079,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow **rflowp) { struct netdev_rx_queue *rxqueue = dev->_rx; - struct rps_dev_flow_table *flow_table; - rps_tag_ptr global_tag_ptr; + rps_tag_ptr global_tag_ptr, q_tag_ptr; struct rps_map *map; int cpu = -1; u32 tcpu; @@ -5098,9 +5100,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, /* Avoid computing hash if RFS/RPS is not active for this rxqueue */ - flow_table = rcu_dereference(rxqueue->rps_flow_table); + q_tag_ptr = READ_ONCE(rxqueue->rps_flow_table); map = rcu_dereference(rxqueue->rps_map); - if (!flow_table && !map) + if (!q_tag_ptr && !map) goto done; skb_reset_network_header(skb); @@ -5109,8 +5111,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, goto done; global_tag_ptr = READ_ONCE(net_hotdata.rps_sock_flow_table); - if (flow_table && global_tag_ptr) { + if (q_tag_ptr && global_tag_ptr) { struct rps_sock_flow_table *sock_flow_table; + struct rps_dev_flow *flow_table; struct rps_dev_flow *rflow; u32 next_cpu; u32 flow_id; @@ -5130,7 +5133,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, /* OK, now we know there is a match, * we can look at the local (per receive queue) flow table */ - rflow = &flow_table->flows[rfs_slot(hash, flow_table)]; + flow_id = rfs_slot(hash, q_tag_ptr); + flow_table = rps_tag_to_table(q_tag_ptr); + rflow = flow_table + flow_id; tcpu = rflow->cpu; /* @@ -5190,19 +5195,23 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id, u16 filter_id) { struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index; - struct rps_dev_flow_table *flow_table; + struct rps_dev_flow *flow_table; struct rps_dev_flow *rflow; + rps_tag_ptr q_tag_ptr; bool expire = true; + u8 log; rcu_read_lock(); - flow_table = rcu_dereference(rxqueue->rps_flow_table); - if (flow_table && flow_id < (1UL << flow_table->log)) { + q_tag_ptr = READ_ONCE(rxqueue->rps_flow_table); + log = rps_tag_to_log(q_tag_ptr); + if (q_tag_ptr && flow_id < (1UL << log)) { unsigned int cpu; - rflow = &flow_table->flows[flow_id]; + flow_table = rps_tag_to_table(q_tag_ptr); + rflow = flow_table + flow_id; cpu = READ_ONCE(rflow->cpu); if (READ_ONCE(rflow->filter) == filter_id && - rps_flow_is_active(rflow, flow_table, cpu)) + rps_flow_is_active(rflow, log, cpu)) expire = false; } rcu_read_unlock(); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index fd6f81930bc6..2ce011fae249 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1060,14 +1060,12 @@ out: static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, char *buf) { - struct rps_dev_flow_table *flow_table; unsigned long val = 0; + rps_tag_ptr tag_ptr; - rcu_read_lock(); - flow_table = rcu_dereference(queue->rps_flow_table); - if (flow_table) - val = 1UL << flow_table->log; - rcu_read_unlock(); + tag_ptr = READ_ONCE(queue->rps_flow_table); + if (tag_ptr) + val = 1UL << rps_tag_to_log(tag_ptr); return sysfs_emit(buf, "%lu\n", val); } @@ -1075,8 +1073,10 @@ static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, const char *buf, size_t len) { + rps_tag_ptr otag, tag_ptr = 0UL; + struct rps_dev_flow *table; unsigned long mask, count; - struct rps_dev_flow_table *table, *old_table; + size_t sz; int rc; if (!capable(CAP_NET_ADMIN)) @@ -1093,38 +1093,36 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, */ while ((mask | (mask >> 1)) != mask) mask |= (mask >> 1); - /* On 64 bit arches, must check mask fits in table->mask (u32), - * and on 32bit arches, must check - * RPS_DEV_FLOW_TABLE_SIZE(mask + 1) doesn't overflow. - */ -#if BITS_PER_LONG > 32 - if (mask > (unsigned long)(u32)mask) + + /* Do not accept too large tables. */ + if (mask > (INT_MAX / sizeof(*table) - 1)) return -EINVAL; -#else - if (mask > (ULONG_MAX - RPS_DEV_FLOW_TABLE_SIZE(1)) - / sizeof(struct rps_dev_flow)) { - /* Enforce a limit to prevent overflow */ - return -EINVAL; - } -#endif - table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(mask + 1)); + + sz = max_t(size_t, sizeof(*table) * (mask + 1), + PAGE_SIZE); + if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) || + is_power_of_2(sizeof(*table))) + table = kvmalloc(sz, GFP_KERNEL); + else + table = vmalloc(sz); if (!table) return -ENOMEM; - - table->log = ilog2(mask) + 1; - for (count = 0; count <= mask; count++) { - table->flows[count].cpu = RPS_NO_CPU; - table->flows[count].filter = RPS_NO_FILTER; + tag_ptr = (rps_tag_ptr)table; + if (rps_tag_to_log(tag_ptr)) { + pr_err_once("store_rps_dev_flow_table_cnt() got a non page aligned allocation.\n"); + kvfree(table); + return -ENOMEM; + } + tag_ptr |= (ilog2(mask) + 1); + for (count = 0; count <= mask; count++) { + table[count].cpu = RPS_NO_CPU; + table[count].filter = RPS_NO_FILTER; } - } else { - table = NULL; } - old_table = unrcu_pointer(xchg(&queue->rps_flow_table, - RCU_INITIALIZER(table))); - - if (old_table) - kvfree_rcu_mightsleep(old_table); + otag = xchg(&queue->rps_flow_table, tag_ptr); + if (otag) + kvfree_rcu_mightsleep(rps_tag_to_table(otag)); return len; } @@ -1150,7 +1148,7 @@ static void rx_queue_release(struct kobject *kobj) { struct netdev_rx_queue *queue = to_rx_queue(kobj); #ifdef CONFIG_RPS - struct rps_dev_flow_table *old_table; + rps_tag_ptr tag_ptr; struct rps_map *map; map = rcu_dereference_protected(queue->rps_map, 1); @@ -1159,9 +1157,9 @@ static void rx_queue_release(struct kobject *kobj) kfree_rcu(map, rcu); } - old_table = unrcu_pointer(xchg(&queue->rps_flow_table, NULL)); - if (old_table) - kvfree_rcu_mightsleep(old_table); + tag_ptr = xchg(&queue->rps_flow_table, 0UL); + if (tag_ptr) + kvfree_rcu_mightsleep(rps_tag_to_table(tag_ptr)); #endif memset(kobj, 0, sizeof(*kobj)); From 1d88db16156e02989087eb6287662827c10ada13 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 3 Mar 2026 19:12:43 +0000 Subject: [PATCH 0277/1561] tcp: move tcp_do_parse_auth_options() to net/ipv4/tcp.c tcp_do_parse_auth_options() fast path user is tcp_inbound_hash(). Move tcp_do_parse_auth_options() right before tcp_inbound_hash() so that it can be (auto)inlined by the compiler. As a bonus, stack canary is removed from tcp_inbound_hash(). Also use EXPORT_IPV6_MOD(tcp_do_parse_auth_options). $ scripts/bloat-o-meter -t vmlinux.0 vmlinux add/remove: 0/0 grow/shrink: 1/0 up/down: 131/0 (131) Function old new delta tcp_inbound_hash 565 696 +131 Total: Before=25223788, After=25223919, chg +0.00% Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260303191243.557245-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 54 ++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_input.c | 54 -------------------------------------------- 2 files changed, 54 insertions(+), 54 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1790d2fa75ad..5997e0fb7a45 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4987,6 +4987,60 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, #endif +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) +/* + * Parse Signature options + */ +int tcp_do_parse_auth_options(const struct tcphdr *th, + const u8 **md5_hash, const u8 **ao_hash) +{ + int length = (th->doff << 2) - sizeof(*th); + const u8 *ptr = (const u8 *)(th + 1); + unsigned int minlen = TCPOLEN_MD5SIG; + + if (IS_ENABLED(CONFIG_TCP_AO)) + minlen = sizeof(struct tcp_ao_hdr) + 1; + + *md5_hash = NULL; + *ao_hash = NULL; + + /* If not enough data remaining, we can short cut */ + while (length >= minlen) { + int opcode = *ptr++; + int opsize; + + switch (opcode) { + case TCPOPT_EOL: + return 0; + case TCPOPT_NOP: + length--; + continue; + default: + opsize = *ptr++; + if (opsize < 2 || opsize > length) + return -EINVAL; + if (opcode == TCPOPT_MD5SIG) { + if (opsize != TCPOLEN_MD5SIG) + return -EINVAL; + if (unlikely(*md5_hash || *ao_hash)) + return -EEXIST; + *md5_hash = ptr; + } else if (opcode == TCPOPT_AO) { + if (opsize <= sizeof(struct tcp_ao_hdr)) + return -EINVAL; + if (unlikely(*md5_hash || *ao_hash)) + return -EEXIST; + *ao_hash = ptr; + } + } + ptr += opsize - 2; + length -= opsize; + } + return 0; +} +EXPORT_IPV6_MOD(tcp_do_parse_auth_options); +#endif + /* Called with rcu_read_lock() */ enum skb_drop_reason tcp_inbound_hash(struct sock *sk, const struct request_sock *req, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9079bc963f81..c27d11c3470b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4714,60 +4714,6 @@ static bool tcp_fast_parse_options(const struct net *net, return true; } -#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) -/* - * Parse Signature options - */ -int tcp_do_parse_auth_options(const struct tcphdr *th, - const u8 **md5_hash, const u8 **ao_hash) -{ - int length = (th->doff << 2) - sizeof(*th); - const u8 *ptr = (const u8 *)(th + 1); - unsigned int minlen = TCPOLEN_MD5SIG; - - if (IS_ENABLED(CONFIG_TCP_AO)) - minlen = sizeof(struct tcp_ao_hdr) + 1; - - *md5_hash = NULL; - *ao_hash = NULL; - - /* If not enough data remaining, we can short cut */ - while (length >= minlen) { - int opcode = *ptr++; - int opsize; - - switch (opcode) { - case TCPOPT_EOL: - return 0; - case TCPOPT_NOP: - length--; - continue; - default: - opsize = *ptr++; - if (opsize < 2 || opsize > length) - return -EINVAL; - if (opcode == TCPOPT_MD5SIG) { - if (opsize != TCPOLEN_MD5SIG) - return -EINVAL; - if (unlikely(*md5_hash || *ao_hash)) - return -EEXIST; - *md5_hash = ptr; - } else if (opcode == TCPOPT_AO) { - if (opsize <= sizeof(struct tcp_ao_hdr)) - return -EINVAL; - if (unlikely(*md5_hash || *ao_hash)) - return -EEXIST; - *ao_hash = ptr; - } - } - ptr += opsize - 2; - length -= opsize; - } - return 0; -} -EXPORT_SYMBOL(tcp_do_parse_auth_options); -#endif - /* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM * * It is not fatal. If this ACK does _not_ change critical state (seqs, window) From 39ae83b0f557969c461d93c608545443a2f5c307 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 3 Mar 2026 17:24:37 -0800 Subject: [PATCH 0278/1561] net: openvswitch: clean up some kernel-doc warnings Fix some kernel-doc warnings in openvswitch.h: Mark enum placeholders that are not used as "private" so that kernel-doc comments are not needed for them. Correct names for 2 enum values: Warning: include/uapi/linux/openvswitch.h:300 Excess enum value '@OVS_VPORT_UPCALL_SUCCESS' description in 'ovs_vport_upcall_attr' Warning: include/uapi/linux/openvswitch.h:300 Excess enum value '@OVS_VPORT_UPCALL_FAIL' description in 'ovs_vport_upcall_attr' Convert one comment from "/**" kernel-doc to a plain C "/*" comment: Warning: include/uapi/linux/openvswitch.h:638 This comment starts with '/**', but isn't a kernel-doc comment. * Omit attributes for notifications. Add more kernel-doc: - add kernel-doc for kernel-only enums; - add missing kernel-doc for enum ovs_datapath_attr; - add missing kernel-doc for enum ovs_flow_attr; - add missing kernel-doc for enum ovs_sample_attr; - add kernel-doc for enum ovs_check_pkt_len_attr; - add kernel-doc for enum ovs_action_attr; - add kernel-doc for enum ovs_action_push_eth; - add kernel-doc for enum ovs_vport_attr; Signed-off-by: Randy Dunlap Acked-by: Ilya Maximets Link: https://patch.msgid.link/20260304012437.469151-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/openvswitch.h | 76 ++++++++++++++++++++++++++++---- 1 file changed, 68 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 3092c2c6f1d2..aa2acdbda8f8 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -70,12 +70,15 @@ enum ovs_datapath_cmd { * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should * not be sent. + * @OVS_DP_ATTR_MASKS_CACHE_SIZE: Number of the entries in the flow table + * masks cache. * @OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set. * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the * datapath. Always present in notifications. * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the * datapath. Always present in notifications. + * @OVS_DP_ATTR_USER_FEATURES: OVS_DP_F_* flags. * @OVS_DP_ATTR_IFINDEX: Interface index for a new datapath netdev. Only * valid for %OVS_DP_CMD_NEW requests. * @@ -83,18 +86,23 @@ enum ovs_datapath_cmd { * payload for %OVS_DP_* commands. */ enum ovs_datapath_attr { + /* private: */ OVS_DP_ATTR_UNSPEC, + /* public: */ OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */ OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */ OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */ + /* private: */ OVS_DP_ATTR_PAD, + /* public: */ OVS_DP_ATTR_MASKS_CACHE_SIZE, OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls in * per-cpu dispatch mode */ OVS_DP_ATTR_IFINDEX, + /* private: */ __OVS_DP_ATTR_MAX }; @@ -181,6 +189,7 @@ enum ovs_packet_cmd { * %OVS_USERSPACE_ATTR_EGRESS_TUN_PORT attribute, which is sent only if the * output port is actually a tunnel port. Contains the output tunnel key * extracted from the packet as nested %OVS_TUNNEL_KEY_ATTR_* attributes. + * @OVS_PACKET_ATTR_PROBE: Packet operation is a feature probe. * @OVS_PACKET_ATTR_MRU: Present for an %OVS_PACKET_CMD_ACTION and * @OVS_PACKET_ATTR_LEN: Packet size before truncation. * %OVS_PACKET_ATTR_USERSPACE action specify the Maximum received fragment @@ -196,21 +205,26 @@ enum ovs_packet_cmd { * payload for %OVS_PACKET_* commands. */ enum ovs_packet_attr { + /* private: */ OVS_PACKET_ATTR_UNSPEC, + /* public: */ OVS_PACKET_ATTR_PACKET, /* Packet data. */ OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */ OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ OVS_PACKET_ATTR_USERDATA, /* OVS_ACTION_ATTR_USERSPACE arg. */ OVS_PACKET_ATTR_EGRESS_TUN_KEY, /* Nested OVS_TUNNEL_KEY_ATTR_* attributes. */ + /* private: */ OVS_PACKET_ATTR_UNUSED1, OVS_PACKET_ATTR_UNUSED2, + /* public: */ OVS_PACKET_ATTR_PROBE, /* Packet operation is a feature probe, error logging should be suppressed. */ OVS_PACKET_ATTR_MRU, /* Maximum received IP fragment size. */ OVS_PACKET_ATTR_LEN, /* Packet size before truncation. */ OVS_PACKET_ATTR_HASH, /* Packet hash. */ OVS_PACKET_ATTR_UPCALL_PID, /* u32 Netlink PID. */ + /* private: */ __OVS_PACKET_ATTR_MAX }; @@ -257,6 +271,11 @@ enum ovs_vport_type { * upcalls should not be sent. * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for * packets sent or received through the vport. + * @OVS_VPORT_ATTR_IFINDEX: Provides the ifindex of a vport, or sets the desired + * ifindex while creating a new vport with type %OVS_VPORT_TYPE_INTERNAL. + * @OVS_VPORT_ATTR_NETNSID: Provides the netns id of the vport if it's not local. + * @OVS_VPORT_ATTR_UPCALL_STATS: Provides upcall statistics for a vport. + * Contains nested %OVS_VPORT_UPCALL_ATTR_* attributes. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_VPORT_* commands. @@ -272,7 +291,9 @@ enum ovs_vport_type { * ovs_header plus %OVS_VPORT_ATTR_PORT_NO determine the vport. */ enum ovs_vport_attr { + /* private: */ OVS_VPORT_ATTR_UNSPEC, + /* public: */ OVS_VPORT_ATTR_PORT_NO, /* u32 port number within datapath */ OVS_VPORT_ATTR_TYPE, /* u32 OVS_VPORT_TYPE_* constant. */ OVS_VPORT_ATTR_NAME, /* string name, up to IFNAMSIZ bytes long */ @@ -280,23 +301,27 @@ enum ovs_vport_attr { OVS_VPORT_ATTR_UPCALL_PID, /* array of u32 Netlink socket PIDs for */ /* receiving upcalls */ OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */ + /* private: */ OVS_VPORT_ATTR_PAD, + /* public: */ OVS_VPORT_ATTR_IFINDEX, OVS_VPORT_ATTR_NETNSID, OVS_VPORT_ATTR_UPCALL_STATS, + /* private: */ __OVS_VPORT_ATTR_MAX }; #define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1) /** - * enum ovs_vport_upcall_attr - attributes for %OVS_VPORT_UPCALL* commands - * @OVS_VPORT_UPCALL_SUCCESS: 64-bit upcall success packets. - * @OVS_VPORT_UPCALL_FAIL: 64-bit upcall fail packets. + * enum ovs_vport_upcall_attr - attributes for %OVS_VPORT_ATTR_UPCALL_STATS + * @OVS_VPORT_UPCALL_ATTR_SUCCESS: 64-bit upcall success packets. + * @OVS_VPORT_UPCALL_ATTR_FAIL: 64-bit upcall fail packets. */ enum ovs_vport_upcall_attr { OVS_VPORT_UPCALL_ATTR_SUCCESS, OVS_VPORT_UPCALL_ATTR_FAIL, + /* private: */ __OVS_VPORT_UPCALL_ATTR_MAX }; @@ -431,6 +456,7 @@ enum ovs_frag_type { OVS_FRAG_TYPE_NONE, OVS_FRAG_TYPE_FIRST, OVS_FRAG_TYPE_LATER, + /* private: */ __OVS_FRAG_TYPE_MAX }; @@ -604,6 +630,8 @@ struct ovs_nsh_key_md1 { * a wildcarded match. Omitting attribute is treated as wildcarding all * corresponding fields. Optional for all requests. If not present, * all flow key bits are exact match bits. + * @OVS_FLOW_ATTR_PROBE: Flow operation is a feature probe, error logging + * should be suppressed. * @OVS_FLOW_ATTR_UFID: A value between 1-16 octets specifying a unique * identifier for the flow. Causes the flow to be indexed by this value rather * than the value of the %OVS_FLOW_ATTR_KEY attribute. Optional for all @@ -617,7 +645,9 @@ struct ovs_nsh_key_md1 { * payload for %OVS_FLOW_* commands. */ enum ovs_flow_attr { + /* private: */ OVS_FLOW_ATTR_UNSPEC, + /* public: */ OVS_FLOW_ATTR_KEY, /* Sequence of OVS_KEY_ATTR_* attributes. */ OVS_FLOW_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ OVS_FLOW_ATTR_STATS, /* struct ovs_flow_stats. */ @@ -629,13 +659,14 @@ enum ovs_flow_attr { * logging should be suppressed. */ OVS_FLOW_ATTR_UFID, /* Variable length unique flow identifier. */ OVS_FLOW_ATTR_UFID_FLAGS,/* u32 of OVS_UFID_F_*. */ + /* private: */ OVS_FLOW_ATTR_PAD, __OVS_FLOW_ATTR_MAX }; #define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1) -/** +/* * Omit attributes for notifications. * * If a datapath request contains an %OVS_UFID_F_OMIT_* flag, then the datapath @@ -653,17 +684,23 @@ enum ovs_flow_attr { * fractions of packets. * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event. * Actions are passed as nested attributes. + * @OVS_SAMPLE_ATTR_ARG: For in-kernel use, passing &struct sample_arg + * derived from other attributes. * * Executes the specified actions with the given probability on a per-packet * basis. Nested actions will be able to access the probability value of the * parent @OVS_ACTION_ATTR_SAMPLE. */ enum ovs_sample_attr { + /* private: */ OVS_SAMPLE_ATTR_UNSPEC, + /* public: */ OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */ OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ + /* private: */ __OVS_SAMPLE_ATTR_MAX, + /* public: */ #ifdef __KERNEL__ OVS_SAMPLE_ATTR_ARG /* struct sample_arg */ #endif @@ -693,12 +730,15 @@ struct sample_arg { * @OVS_USERSPACE_ATTR_ACTIONS: If present, send actions with upcall. */ enum ovs_userspace_attr { + /* private: */ OVS_USERSPACE_ATTR_UNSPEC, + /* public: */ OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */ OVS_USERSPACE_ATTR_USERDATA, /* Optional user-specified cookie. */ OVS_USERSPACE_ATTR_EGRESS_TUN_PORT, /* Optional, u32 output port * to get tunnel info. */ OVS_USERSPACE_ATTR_ACTIONS, /* Optional flag to get actions. */ + /* private: */ __OVS_USERSPACE_ATTR_MAX }; @@ -819,7 +859,9 @@ struct ovs_action_hash { * @OVS_CT_ATTR_TIMEOUT: Variable length string defining conntrack timeout. */ enum ovs_ct_attr { + /* private: */ OVS_CT_ATTR_UNSPEC, + /* public: */ OVS_CT_ATTR_COMMIT, /* No argument, commits connection. */ OVS_CT_ATTR_ZONE, /* u16 zone id. */ OVS_CT_ATTR_MARK, /* mark to associate with this connection. */ @@ -831,6 +873,7 @@ enum ovs_ct_attr { OVS_CT_ATTR_EVENTMASK, /* u32 mask of IPCT_* events. */ OVS_CT_ATTR_TIMEOUT, /* Associate timeout with this connection for * fine-grain timeout tuning. */ + /* private: */ __OVS_CT_ATTR_MAX }; @@ -859,7 +902,9 @@ enum ovs_ct_attr { * @OVS_NAT_ATTR_PROTO_RANDOM: Flag for fully randomized L4 port mapping */ enum ovs_nat_attr { + /* private: */ OVS_NAT_ATTR_UNSPEC, + /* public: */ OVS_NAT_ATTR_SRC, OVS_NAT_ATTR_DST, OVS_NAT_ATTR_IP_MIN, @@ -869,38 +914,44 @@ enum ovs_nat_attr { OVS_NAT_ATTR_PERSISTENT, OVS_NAT_ATTR_PROTO_HASH, OVS_NAT_ATTR_PROTO_RANDOM, + /* private: */ __OVS_NAT_ATTR_MAX, }; #define OVS_NAT_ATTR_MAX (__OVS_NAT_ATTR_MAX - 1) -/* +/** * struct ovs_action_push_eth - %OVS_ACTION_ATTR_PUSH_ETH action argument. * @addresses: Source and destination MAC addresses. - * @eth_type: Ethernet type */ struct ovs_action_push_eth { struct ovs_key_ethernet addresses; }; -/* +/** * enum ovs_check_pkt_len_attr - Attributes for %OVS_ACTION_ATTR_CHECK_PKT_LEN. * * @OVS_CHECK_PKT_LEN_ATTR_PKT_LEN: u16 Packet length to check for. * @OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER: Nested OVS_ACTION_ATTR_* * actions to apply if the packer length is greater than the specified * length in the attr - OVS_CHECK_PKT_LEN_ATTR_PKT_LEN. - * @OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL - Nested OVS_ACTION_ATTR_* + * @OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL: Nested OVS_ACTION_ATTR_* * actions to apply if the packer length is lesser or equal to the specified * length in the attr - OVS_CHECK_PKT_LEN_ATTR_PKT_LEN. + * @OVS_CHECK_PKT_LEN_ATTR_ARG: For in-kernel use, passing &struct + * check_pkt_len_arg derived from other attributes. */ enum ovs_check_pkt_len_attr { + /* private: */ OVS_CHECK_PKT_LEN_ATTR_UNSPEC, + /* public: */ OVS_CHECK_PKT_LEN_ATTR_PKT_LEN, OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER, OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL, + /* private: */ __OVS_CHECK_PKT_LEN_ATTR_MAX, + /* public: */ #ifdef __KERNEL__ OVS_CHECK_PKT_LEN_ATTR_ARG /* struct check_pkt_len_arg */ #endif @@ -968,6 +1019,9 @@ enum ovs_psample_attr { * from the packet. * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in * the nested %OVS_SAMPLE_ATTR_* attributes. + * @OVS_ACTION_ATTR_RECIRC: Recirculate the clone of the packet through the + * datapath with the new id (u32 recirc_id). + * @OVS_ACTION_ATTR_HASH: Compute the packet hash, using &struct ovs_action_hash. * @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label stack entry onto the * top of the packets MPLS label stack. Set the ethertype of the * encapsulating frame to either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC to @@ -997,6 +1051,8 @@ enum ovs_psample_attr { * start of the packet or at the start of the l3 header depending on the value * of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS * argument. + * @OVS_ACTION_ATTR_DEC_TTL: Decrement TTL or hop limit of the packet. Execute + * nested %OVS_DEC_TTL_ATTR_* actions if the value is less or equal to 1. * @OVS_ACTION_ATTR_DROP: Explicit drop action. * @OVS_ACTION_ATTR_PSAMPLE: Send a sample of the packet to external observers * via psample. @@ -1010,7 +1066,9 @@ enum ovs_psample_attr { */ enum ovs_action_attr { + /* private: */ OVS_ACTION_ATTR_UNSPEC, + /* public: */ OVS_ACTION_ATTR_OUTPUT, /* u32 port number. */ OVS_ACTION_ATTR_USERSPACE, /* Nested OVS_USERSPACE_ATTR_*. */ OVS_ACTION_ATTR_SET, /* One nested OVS_KEY_ATTR_*. */ @@ -1040,9 +1098,11 @@ enum ovs_action_attr { OVS_ACTION_ATTR_DROP, /* u32 error code. */ OVS_ACTION_ATTR_PSAMPLE, /* Nested OVS_PSAMPLE_ATTR_*. */ + /* private: */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ + /* public: */ #ifdef __KERNEL__ OVS_ACTION_ATTR_SET_TO_MASKED, /* Kernel module internal masked * set action converted from From c66e0f453d1afa82534383c58d503238a43fa76c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Mar 2026 01:27:47 +0000 Subject: [PATCH 0279/1561] net: use ktime_t in struct scm_timestamping_internal Instead of using struct timespec64 in scm_timestamping_internal, use ktime_t, saving 24 bytes in kernel stack. This makes tcp_update_recv_tstamps() small enough to be inlined. The ktime_t -> timespec64 conversions happen after socket lock has been released in tcp_recvmsg(), and only if the application requested them. $ scripts/bloat-o-meter -t vmlinux.0 vmlinux add/remove: 0/2 grow/shrink: 5/4 up/down: 146/-277 (-131) Function old new delta tcp_zerocopy_receive 2383 2425 +42 mptcp_recvmsg 1565 1607 +42 tcp_recvmsg_locked 3797 3823 +26 put_cmsg_scm_timestamping64 131 149 +18 put_cmsg_scm_timestamping 131 149 +18 __pfx_tcp_update_recv_tstamps 16 - -16 do_tcp_getsockopt 4024 4006 -18 tcp_recv_timestamp 474 430 -44 tcp_zc_handle_leftover 417 371 -46 __sock_recv_timestamp 1087 1031 -56 tcp_update_recv_tstamps 97 - -97 Total: Before=25223788, After=25223657, chg -0.00% Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: Jason Xing Link: https://patch.msgid.link/20260304012747.881644-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/socket.h | 2 +- include/net/tcp.h | 11 ++++++-- net/core/scm.c | 12 ++++++--- net/ipv4/tcp.c | 61 +++++++++++++++--------------------------- net/socket.c | 23 ++++++++-------- 5 files changed, 51 insertions(+), 58 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index ec715ad4bf25..ec4a0a025793 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -415,7 +415,7 @@ struct __kernel_timespec; struct old_timespec32; struct scm_timestamping_internal { - struct timespec64 ts[3]; + ktime_t ts[3]; }; extern void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss); diff --git a/include/net/tcp.h b/include/net/tcp.h index 9cf8785ef0b4..fea6081cf6c7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -503,8 +503,15 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags); int tcp_set_rcvlowat(struct sock *sk, int val); int tcp_set_window_clamp(struct sock *sk, int val); -void tcp_update_recv_tstamps(struct sk_buff *skb, - struct scm_timestamping_internal *tss); + +static inline void +tcp_update_recv_tstamps(struct sk_buff *skb, + struct scm_timestamping_internal *tss) +{ + tss->ts[0] = skb->tstamp; + tss->ts[2] = skb_hwtstamps(skb)->hwtstamp; +} + void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, struct scm_timestamping_internal *tss); void tcp_data_ready(struct sock *sk); diff --git a/net/core/scm.c b/net/core/scm.c index a29aa8fb8065..eec13f50ecaf 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -318,8 +318,10 @@ void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_int int i; for (i = 0; i < ARRAY_SIZE(tss.ts); i++) { - tss.ts[i].tv_sec = tss_internal->ts[i].tv_sec; - tss.ts[i].tv_nsec = tss_internal->ts[i].tv_nsec; + struct timespec64 tv = ktime_to_timespec64(tss_internal->ts[i]); + + tss.ts[i].tv_sec = tv.tv_sec; + tss.ts[i].tv_nsec = tv.tv_nsec; } put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_NEW, sizeof(tss), &tss); @@ -332,8 +334,10 @@ void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_inter int i; for (i = 0; i < ARRAY_SIZE(tss.ts); i++) { - tss.ts[i].tv_sec = tss_internal->ts[i].tv_sec; - tss.ts[i].tv_nsec = tss_internal->ts[i].tv_nsec; + struct timespec64 tv = ktime_to_timespec64(tss_internal->ts[i]); + + tss.ts[i].tv_sec = tv.tv_sec; + tss.ts[i].tv_nsec = tv.tv_nsec; } put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_OLD, sizeof(tss), &tss); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5997e0fb7a45..1c8be22a361e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1871,20 +1871,6 @@ int tcp_set_rcvlowat(struct sock *sk, int val) } EXPORT_IPV6_MOD(tcp_set_rcvlowat); -void tcp_update_recv_tstamps(struct sk_buff *skb, - struct scm_timestamping_internal *tss) -{ - if (skb->tstamp) - tss->ts[0] = ktime_to_timespec64(skb->tstamp); - else - tss->ts[0] = (struct timespec64) {0}; - - if (skb_hwtstamps(skb)->hwtstamp) - tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp); - else - tss->ts[2] = (struct timespec64) {0}; -} - #ifdef CONFIG_MMU static const struct vm_operations_struct tcp_vm_ops = { }; @@ -2376,22 +2362,23 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, { int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW); u32 tsflags = READ_ONCE(sk->sk_tsflags); - bool has_timestamping = false; - if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) { + if (tss->ts[0]) { if (sock_flag(sk, SOCK_RCVTSTAMP)) { + struct timespec64 tv = ktime_to_timespec64(tss->ts[0]); + if (sock_flag(sk, SOCK_RCVTSTAMPNS)) { if (new_tstamp) { struct __kernel_timespec kts = { - .tv_sec = tss->ts[0].tv_sec, - .tv_nsec = tss->ts[0].tv_nsec, + .tv_sec = tv.tv_sec, + .tv_nsec = tv.tv_nsec, }; put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW, sizeof(kts), &kts); } else { struct __kernel_old_timespec ts_old = { - .tv_sec = tss->ts[0].tv_sec, - .tv_nsec = tss->ts[0].tv_nsec, + .tv_sec = tv.tv_sec, + .tv_nsec = tv.tv_nsec, }; put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD, sizeof(ts_old), &ts_old); @@ -2399,41 +2386,37 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, } else { if (new_tstamp) { struct __kernel_sock_timeval stv = { - .tv_sec = tss->ts[0].tv_sec, - .tv_usec = tss->ts[0].tv_nsec / 1000, + .tv_sec = tv.tv_sec, + .tv_usec = tv.tv_nsec / 1000, }; put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW, sizeof(stv), &stv); } else { - struct __kernel_old_timeval tv = { - .tv_sec = tss->ts[0].tv_sec, - .tv_usec = tss->ts[0].tv_nsec / 1000, + struct __kernel_old_timeval otv = { + .tv_sec = tv.tv_sec, + .tv_usec = tv.tv_nsec / 1000, }; put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, - sizeof(tv), &tv); + sizeof(otv), &otv); } } } - if (tsflags & SOF_TIMESTAMPING_SOFTWARE && + if (!(tsflags & SOF_TIMESTAMPING_SOFTWARE && (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE || - !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) - has_timestamping = true; - else - tss->ts[0] = (struct timespec64) {0}; + !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER)))) + tss->ts[0] = 0; } - if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) { - if (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE && + if (tss->ts[2]) { + if (!(tsflags & SOF_TIMESTAMPING_RAW_HARDWARE && (tsflags & SOF_TIMESTAMPING_RX_HARDWARE || - !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) - has_timestamping = true; - else - tss->ts[2] = (struct timespec64) {0}; + !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER)))) + tss->ts[2] = 0; } - if (has_timestamping) { - tss->ts[1] = (struct timespec64) {0}; + if (tss->ts[0] | tss->ts[2]) { + tss->ts[1] = 0; if (sock_flag(sk, SOCK_TSTAMP_NEW)) put_cmsg_scm_timestamping64(msg, tss); else diff --git a/net/socket.c b/net/socket.c index 05952188127f..68829d09bcf1 100644 --- a/net/socket.c +++ b/net/socket.c @@ -912,11 +912,10 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, { int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW); - struct scm_timestamping_internal tss; - int empty = 1, false_tstamp = 0; struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); - int if_index; + struct scm_timestamping_internal tss; + int if_index, false_tstamp = 0; ktime_t hwtstamp; u32 tsflags; @@ -961,12 +960,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, memset(&tss, 0, sizeof(tss)); tsflags = READ_ONCE(sk->sk_tsflags); - if ((tsflags & SOF_TIMESTAMPING_SOFTWARE && - (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE || - skb_is_err_queue(skb) || - !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) && - ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0)) - empty = 0; + if (tsflags & SOF_TIMESTAMPING_SOFTWARE && + (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE || + skb_is_err_queue(skb) || + !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) + tss.ts[0] = skb->tstamp; + if (shhwtstamps && (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE && (tsflags & SOF_TIMESTAMPING_RX_HARDWARE || @@ -983,15 +982,15 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, hwtstamp = ptp_convert_timestamp(&hwtstamp, READ_ONCE(sk->sk_bind_phc)); - if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) { - empty = 0; + if (hwtstamp) { + tss.ts[2] = hwtstamp; if ((tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) && !skb_is_err_queue(skb)) put_ts_pktinfo(msg, skb, if_index); } } - if (!empty) { + if (tss.ts[0] | tss.ts[2]) { if (sock_flag(sk, SOCK_TSTAMP_NEW)) put_cmsg_scm_timestamping64(msg, &tss); else From f85db97bc5d4d2048016cc0cfb5a8d80cab24e9a Mon Sep 17 00:00:00 2001 From: Dimitri Daskalakis Date: Tue, 3 Mar 2026 12:22:58 -0800 Subject: [PATCH 0280/1561] selftests: drv-net: rss: Fix error calculation in test_hitless_key_update This test verifies there are no errors when a devices RSS key is updated while traffic is flowing. The current check is a no-op since the last sample was subtracted from itself. Signed-off-by: Dimitri Daskalakis Link: https://patch.msgid.link/20260303202258.1595661-1-dimitri.daskalakis1@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/rss_ctx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 97246eab863a..d7cb30306368 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -357,7 +357,7 @@ def test_hitless_key_update(cfg): tgen.wait_pkts_and_stop(5000) ksft_lt((t1 - t0).total_seconds(), 0.15) - ksft_eq(errors1 - errors1, 0) + ksft_eq(errors1 - errors0, 0) ksft_eq(carrier1 - carrier0, 0) From d37f53822c4c96b3fbcd487b0da4f2232c5863e6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Mar 2026 13:36:25 -0800 Subject: [PATCH 0281/1561] selftests: drv-net: update the README I have added some instructions for driver authors on the NIPA wiki: https://github.com/linux-netdev/nipa/wiki/Guidance-for-test-authors last year. Given the increasingly common use of LLMs let's add those in tree as well. Hopefully this will decrease the number of review comments we have to give to AI-assisted noobs. While at it sync the overall instructions with what's on the GitHub as well. Link: https://patch.msgid.link/20260303213626.2320308-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .../testing/selftests/drivers/net/README.rst | 90 ++++++++++++++++++- 1 file changed, 88 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/README.rst b/tools/testing/selftests/drivers/net/README.rst index eb838ae94844..dc70a69ee885 100644 --- a/tools/testing/selftests/drivers/net/README.rst +++ b/tools/testing/selftests/drivers/net/README.rst @@ -47,6 +47,10 @@ or:: # Variable set in a file NETIF=eth0 +Please note that the config parser is very simple, if there are +any non-alphanumeric characters in the value it needs to be in +double quotes. + Local test (which don't require endpoint for sending / receiving traffic) need only the ``NETIF`` variable. Remaining variables define the endpoint and communication method. @@ -107,7 +111,7 @@ On the target machine, running the tests will use netdevsim by default:: 1..1 # timeout set to 45 # selftests: drivers/net: ping.py - # TAP version 13 + # KTAP version 1 # 1..3 # ok 1 ping.test_v4 # ok 2 ping.test_v6 @@ -128,9 +132,91 @@ Create a config with remote info:: Run the test:: [/root] # ./ksft-net-drv/drivers/net/ping.py - TAP version 13 + KTAP version 1 1..3 ok 1 ping.test_v4 ok 2 ping.test_v6 # SKIP Test requires IPv6 connectivity ok 3 ping.test_tcp # Totals: pass:2 fail:0 xfail:0 xpass:0 skip:1 error:0 + +Dependencies +~~~~~~~~~~~~ + +The tests have a handful of dependencies. For Fedora / CentOS:: + + dnf -y install netsniff-ng python-yaml socat iperf3 + +Guidance for test authors +========================= + +This section mostly applies to Python tests but some of the guidance +may be more broadly applicable. + +Kernel config +~~~~~~~~~~~~~ + +Each test directory has a ``config`` file listing which kernel +configuration options the tests depend on. This file must be kept +up to date, the CIs build minimal kernels for each test group. + +Adding checks inside the tests to validate that the necessary kernel +configs are enabled is discouraged. The test author may include such +checks, but standalone patches to make tests compatible e.g. with +distro kernel configs are unlikely to be accepted. + +Avoid libraries and frameworks +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Test files should be relatively self contained. The libraries should +only include very core or non-trivial code. +It may be tempting to "factor out" the common code, but fight that urge. +Library code increases the barrier of entry, and complexity in general. + +Avoid mixing test code and boilerplate +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In Python, try to avoid adding code in the ``main()`` function which +instantiates ``NetDrvEnv()`` and calls ``ksft_run()``. It's okay to +set up global resources (e.g. open an RtNetlink socket used by multiple +tests), but any complex logic, test-specific environment configuration +and validation should be done in the tests (even if it means it has to +be repeated). + +Local host is the DUT +~~~~~~~~~~~~~~~~~~~~~ + +Dual-host tests (tests with an endpoint) should be written from the DUT +perspective. IOW the local machine should be the one tested, remote is +just for traffic generation. + +Avoid modifying remote +~~~~~~~~~~~~~~~~~~~~~~ + +Avoid making configuration changes to the remote system as much as possible. +Remote system may be used concurrently by multiple DUTs. + +defer() +~~~~~~~ + +The env must be clean after test exits. Register a ``defer()`` for any +action that needs an "undo" as soon as possible. If you need to run +the cancel action as part of the test - ``defer()`` returns an object +you can ``.exec()``-ute. + +ksft_pr() +~~~~~~~~~ + +Use ``ksft_pr()`` instead of ``print()`` to avoid breaking TAP format. + +ksft_disruptive +~~~~~~~~~~~~~~~ + +By default the tests are expected to be able to run on +single-interface systems. All tests which may disconnect ``NETIF`` +must be annotated with ``@ksft_disruptive``. + +Running tests CI-style +====================== + +See https://github.com/linux-netdev/nipa/wiki for instructions on how +to easily run the tests using ``virtme-ng``. From ad3dfa80be765757f612da04318248f6d20e4f71 Mon Sep 17 00:00:00 2001 From: Jori Koolstra Date: Tue, 3 Mar 2026 17:31:04 +0100 Subject: [PATCH 0282/1561] dibs: change dibs_class to a const struct The class_create() call has been deprecated in favor of class_register() as the driver core now allows for a struct class to be in read-only memory. Change dibs_class to be a const struct class and drop the class_create() call. Link: https://lore.kernel.org/all/2023040244-duffel-pushpin-f738@gregkh/ Suggested-by: Greg Kroah-Hartman Signed-off-by: Jori Koolstra Reviewed-by: Greg Kroah-Hartman Reviewed-by: Alexandra Winter Link: https://patch.msgid.link/20260303163104.3749311-1-jkoolstra@xs4all.nl Signed-off-by: Jakub Kicinski --- drivers/dibs/dibs_main.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/dibs/dibs_main.c b/drivers/dibs/dibs_main.c index f1816361b74d..14c3e2d84902 100644 --- a/drivers/dibs/dibs_main.c +++ b/drivers/dibs/dibs_main.c @@ -19,7 +19,9 @@ MODULE_DESCRIPTION("Direct Internal Buffer Sharing class"); MODULE_LICENSE("GPL"); -static struct class *dibs_class; +static const struct class dibs_class = { + .name = "dibs", +}; /* use an array rather a list for fast mapping: */ static struct dibs_client *clients[MAX_DIBS_CLIENTS]; @@ -137,7 +139,7 @@ struct dibs_dev *dibs_dev_alloc(void) if (!dibs) return dibs; dibs->dev.release = dibs_dev_release; - dibs->dev.class = dibs_class; + dibs->dev.class = &dibs_class; device_initialize(&dibs->dev); return dibs; @@ -253,9 +255,9 @@ static int __init dibs_init(void) { int rc; - dibs_class = class_create("dibs"); - if (IS_ERR(dibs_class)) - return PTR_ERR(dibs_class); + rc = class_register(&dibs_class); + if (rc) + return rc; rc = dibs_loopback_init(); if (rc) @@ -267,7 +269,7 @@ static int __init dibs_init(void) static void __exit dibs_exit(void) { dibs_loopback_exit(); - class_destroy(dibs_class); + class_unregister(&dibs_class); } subsys_initcall(dibs_init); From 718e1f6dd06e8055ee9ca297a3f2219168bda19f Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Mon, 2 Mar 2026 10:16:33 +0530 Subject: [PATCH 0283/1561] amd-xgbe: define macros for MAC versions and speed select values Define symbolic constants for MAC hardware version numbers and speed select register values to improve code readability and maintainability. This replaces magic numbers like 0x30, 0x33, 0x07, 0x06, etc. with descriptive macro names that indicate their purpose: MAC versions: - XGBE_MAC_VER_30: Baseline version supporting Rx adaptation - XGBE_MAC_VER_33: P100a platform version Speed select values for MAC_TCR_SS register: - XGBE_MAC_SS_10G: 10Gbps XGMII mode - XGBE_MAC_SS_2_5G_GMII: 2.5Gbps GMII mode (older platforms) - XGBE_MAC_SS_2_5G_XGMII: 2.5Gbps XGMII mode (P100a) - XGBE_MAC_SS_1G: 1Gbps mode - XGBE_MAC_SS_100M: 100Mbps mode - XGBE_MAC_SS_10M: 10Mbps mode No functional changes. Signed-off-by: Raju Rangoju Link: https://patch.msgid.link/20260302044634.1388661-1-Raju.Rangoju@amd.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 8 ++++---- drivers/net/ethernet/amd/xgbe/xgbe.h | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index c04a9c76bd40..1a91ec455bee 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -722,16 +722,16 @@ static int xgbe_set_speed(struct xgbe_prv_data *pdata, int speed) switch (speed) { case SPEED_10: - ss = 0x07; + ss = XGBE_MAC_SS_10M; break; case SPEED_1000: - ss = 0x03; + ss = XGBE_MAC_SS_1G; break; case SPEED_2500: - ss = 0x02; + ss = XGBE_MAC_SS_2_5G_GMII; break; case SPEED_10000: - ss = 0x00; + ss = XGBE_MAC_SS_10G; break; default: return -EINVAL; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 1269b8ce9249..32cab96e013c 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -267,6 +267,25 @@ #define XGBE_GEN_HI_MASK GENMASK(31, 16) #define XGBE_GEN_LO_MASK GENMASK(15, 0) +/* MAC hardware version numbers (SNPSVER field in MAC_VR register) */ +#define XGBE_MAC_VER_30 0x30 /* Baseline Rx adaptation support */ +#define XGBE_MAC_VER_33 0x33 /* P100a platform */ + +/* MAC Speed Select (SS) values for MAC_TCR register + * These values are written to the SS field to configure link speed. + * Note: P100a uses XGMII mode (0x06) for 2.5G instead of GMII (0x02) + */ +/* Note: 100M and 2.5G GMII share the same value (0x02) but are + * differentiated by the mode/interface type at the PHY level + */ + +#define XGBE_MAC_SS_10G 0x00 /* 10Gbps - XGMII mode */ +#define XGBE_MAC_SS_2_5G_GMII 0x02 /* 2.5Gbps - GMII mode (YC) */ +#define XGBE_MAC_SS_2_5G_XGMII 0x06 /* 2.5Gbps - XGMII mode (P100a) */ +#define XGBE_MAC_SS_1G 0x03 /* 1Gbps */ +#define XGBE_MAC_SS_100M 0x02 /* 100Mbps */ +#define XGBE_MAC_SS_10M 0x07 /* 10Mbps */ + struct xgbe_prv_data; struct xgbe_packet_data { From ea274bf8529a70c987c7314b31176fc15f030dae Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Mon, 2 Mar 2026 10:16:34 +0530 Subject: [PATCH 0284/1561] amd-xgbe: add support for P100a platform Add hardware support for the AMD P100a platform featuring the ethernet controller PCI device ID 0x1122. Platform-specific changes include: 1. PCI device ID and register configuration: - Add XGBE_P100a_PCI_DEVICE_ID (0x1122) for recognition - Configure platform-specific XPCS window registers - Disable CDR workaround and RRC for this platform 2. XPCS window offset calculation fix: The P100a platform uses a different memory mapping scheme for XPCS register access. The offset calculation differs between platforms: - Older platforms (YC): offset = base + (addr & mask) The address is masked first, then added to the window base. - P100a: offset = (base + addr) & mask The full address is added to base first, then masked. This is critical because using the wrong calculation causes register reads/writes to access incorrect addresses, leading to incorrect behaviour. 3. 2.5G speed mode handling: P100a uses XGMII mode (ss=0x06) for 2.5G instead of GMII mode (ss=0x02) used by older platforms. The MAC version check determines which mode to use. 4. Port speed bits extended: Extend XP_PROP_0_PORT_SPEEDS from 5 bits to 6 bits to support the additional 5G speed capability. 5. Rx adaptation disabled: Rx adaptation is disabled for P100a (MAC version 0x33) as this feature requires further development for this platform. 6. Rate change command for 2.5G: Use XGBE_MB_SUBCMD_2_5G_KX subcommand for 2.5G mode on P100a instead of XGBE_MB_SUBCMD_NONE used on older platforms. Signed-off-by: Raju Rangoju Link: https://patch.msgid.link/20260302044634.1388661-2-Raju.Rangoju@amd.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amd/xgbe/xgbe-common.h | 4 ++- drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 33 +++++++++++++++++++-- drivers/net/ethernet/amd/xgbe/xgbe-pci.c | 8 +++++ drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 22 ++++++++++++-- drivers/net/ethernet/amd/xgbe/xgbe.h | 6 +++- 5 files changed, 65 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index 711f295eb777..57cd8d2e1ad7 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -832,6 +832,8 @@ #define PCS_V2_YC_WINDOW_SELECT 0x18064 #define PCS_V3_RN_WINDOW_DEF 0xf8078 #define PCS_V3_RN_WINDOW_SELECT 0xf807c +#define PCS_P100a_WINDOW_DEF 0x8060 +#define PCS_P100a_WINDOW_SELECT 0x8080 #define PCS_RN_SMN_BASE_ADDR 0x11e00000 #define PCS_RN_PORT_ADDR_SIZE 0x100000 @@ -968,7 +970,7 @@ #define XP_PROP_0_PORT_MODE_INDEX 8 #define XP_PROP_0_PORT_MODE_WIDTH 4 #define XP_PROP_0_PORT_SPEEDS_INDEX 22 -#define XP_PROP_0_PORT_SPEEDS_WIDTH 5 +#define XP_PROP_0_PORT_SPEEDS_WIDTH 6 #define XP_PROP_1_MAX_RX_DMA_INDEX 24 #define XP_PROP_1_MAX_RX_DMA_WIDTH 5 #define XP_PROP_1_MAX_RX_QUEUES_INDEX 8 diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index 1a91ec455bee..f1357619097e 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -718,7 +718,7 @@ static void xgbe_disable_ecc_sec(struct xgbe_prv_data *pdata, static int xgbe_set_speed(struct xgbe_prv_data *pdata, int speed) { - unsigned int ss; + unsigned int ss, ver; switch (speed) { case SPEED_10: @@ -728,7 +728,12 @@ static int xgbe_set_speed(struct xgbe_prv_data *pdata, int speed) ss = XGBE_MAC_SS_1G; break; case SPEED_2500: - ss = XGBE_MAC_SS_2_5G_GMII; + /* P100a uses XGMII mode for 2.5G, older platforms use GMII */ + ver = XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER); + if (ver == XGBE_MAC_VER_33) + ss = XGBE_MAC_SS_2_5G_XGMII; + else + ss = XGBE_MAC_SS_2_5G_GMII; break; case SPEED_10000: ss = XGBE_MAC_SS_10G; @@ -1070,6 +1075,8 @@ static void xgbe_get_pcs_index_and_offset(struct xgbe_prv_data *pdata, unsigned int *index, unsigned int *offset) { + unsigned int ver; + /* The PCS registers are accessed using mmio. The underlying * management interface uses indirect addressing to access the MMD * register sets. This requires accessing of the PCS register in two @@ -1081,7 +1088,27 @@ static void xgbe_get_pcs_index_and_offset(struct xgbe_prv_data *pdata, */ mmd_address <<= 1; *index = mmd_address & ~pdata->xpcs_window_mask; - *offset = pdata->xpcs_window + (mmd_address & pdata->xpcs_window_mask); + + ver = XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER); + + /* The P100a platform uses a different memory mapping scheme for XPCS + * register access. The offset calculation differs between platforms: + * + * For P100a platforms: The offset is calculated by adding the + * mmd_address to the xpcs_window and then applying the xpcs_window_mask + * For older platforms: The offset is calculated by applying the + * xpcs_window_mask to the mmd_address and then adding it to the + * xpcs_window. + * + * This is critical because using the wrong calculation causes register + * accesses to target the wrong registers, leading to incorrect behavior + */ + if (ver == XGBE_MAC_VER_33) + *offset = (pdata->xpcs_window + mmd_address) & + pdata->xpcs_window_mask; + else + *offset = pdata->xpcs_window + + (mmd_address & pdata->xpcs_window_mask); } static int xgbe_read_mmd_regs_v3(struct xgbe_prv_data *pdata, int prtad, diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c index e3e1dca9856a..f54a5040a493 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c @@ -168,6 +168,14 @@ static int xgbe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) rdev = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0)); if (rdev && rdev->vendor == PCI_VENDOR_ID_AMD) { switch (rdev->device) { + case XGBE_P100a_PCI_DEVICE_ID: + pdata->xpcs_window_def_reg = PCS_P100a_WINDOW_DEF; + pdata->xpcs_window_sel_reg = PCS_P100a_WINDOW_SELECT; + + /* P100a devices do not need rrc and cdr workaround */ + pdata->vdata->an_cdr_workaround = 0; + pdata->vdata->enable_rrc = 0; + break; case XGBE_RV_PCI_DEVICE_ID: pdata->xpcs_window_def_reg = PCS_V2_RV_WINDOW_DEF; pdata->xpcs_window_sel_reg = PCS_V2_RV_WINDOW_SELECT; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index c63ddb12237e..1dbfa9d4360d 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -2132,7 +2132,11 @@ static bool enable_rx_adap(struct xgbe_prv_data *pdata, enum xgbe_mode mode) /* Rx-Adaptation is not supported on older platforms(< 0x30H) */ ver = XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER); - if (ver < 0x30) + if (ver < XGBE_MAC_VER_30) + return false; + + /* Rx adaptation not yet supported on P100a */ + if (ver == XGBE_MAC_VER_33) return false; /* Re-driver models 4223 && 4227 do not support Rx-Adaptation */ @@ -2258,12 +2262,24 @@ static void xgbe_phy_kr_mode(struct xgbe_prv_data *pdata) static void xgbe_phy_kx_2500_mode(struct xgbe_prv_data *pdata) { + unsigned int ver; + struct xgbe_phy_data *phy_data = pdata->phy_data; xgbe_phy_set_redrv_mode(pdata); - /* 2.5G/KX */ - xgbe_phy_perform_ratechange(pdata, XGBE_MB_CMD_SET_2_5G, XGBE_MB_SUBCMD_NONE); + ver = XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER); + + /* + * P100a uses XGMII mode for 2.5G which requires the 2.5G_KX subcommand. + * Older platforms use GMII mode. + */ + if (ver == XGBE_MAC_VER_33) + xgbe_phy_perform_ratechange(pdata, XGBE_MB_CMD_SET_2_5G, + XGBE_MB_SUBCMD_2_5G_KX); + else + xgbe_phy_perform_ratechange(pdata, XGBE_MB_CMD_SET_2_5G, + XGBE_MB_SUBCMD_NONE); phy_data->cur_mode = XGBE_MODE_KX_2500; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 32cab96e013c..f1b2954017e6 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -262,6 +262,7 @@ #define XGBE_RV_PCI_DEVICE_ID 0x15d0 #define XGBE_YC_PCI_DEVICE_ID 0x14b5 #define XGBE_RN_PCI_DEVICE_ID 0x1630 +#define XGBE_P100a_PCI_DEVICE_ID 0x1122 /* Generic low and high masks */ #define XGBE_GEN_HI_MASK GENMASK(31, 16) @@ -577,7 +578,10 @@ enum xgbe_mb_subcmd { XGBE_MB_SUBCMD_10MBITS = 0, XGBE_MB_SUBCMD_100MBITS, XGBE_MB_SUBCMD_1G_SGMII, - XGBE_MB_SUBCMD_1G_KX + XGBE_MB_SUBCMD_1G_KX, + + /* 2.5GbE Mode subcommands */ + XGBE_MB_SUBCMD_2_5G_KX = 1 }; struct xgbe_phy { From 01b7768578a68abe597cfb36ebe0fc47c9305f88 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Wed, 25 Feb 2026 16:19:31 +0200 Subject: [PATCH 0285/1561] net/mlx5: Add TLP emulation device capabilities Introduce the hardware structures and definitions needed for the driver support of TLP emulation in mlx5_ifc. Signed-off-by: Maher Sanalla Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 775cb0c56865..a3948b36820d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1389,6 +1389,26 @@ struct mlx5_ifc_virtio_emulation_cap_bits { u8 reserved_at_1c0[0x640]; }; +struct mlx5_ifc_tlp_dev_emu_capabilities_bits { + u8 reserved_at_0[0x20]; + + u8 reserved_at_20[0x13]; + u8 log_tlp_rsp_gw_page_stride[0x5]; + u8 reserved_at_38[0x8]; + + u8 reserved_at_40[0xc0]; + + u8 reserved_at_100[0xc]; + u8 tlp_rsp_gw_num_pages[0x4]; + u8 reserved_at_110[0x10]; + + u8 reserved_at_120[0xa0]; + + u8 tlp_rsp_gw_pages_bar_offset[0x40]; + + u8 reserved_at_200[0x600]; +}; + enum { MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_1_BYTE = 0x0, MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_2_BYTES = 0x2, @@ -1961,7 +1981,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_rqt[0x5]; u8 reserved_at_390[0x3]; u8 log_max_rqt_size[0x5]; - u8 reserved_at_398[0x1]; + u8 tlp_device_emulation_manager[0x1]; u8 vnic_env_cnt_bar_uar_access[0x1]; u8 vnic_env_cnt_odp_page_fault[0x1]; u8 log_max_tis_per_sq[0x5]; @@ -3830,6 +3850,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_tls_cap_bits tls_cap; struct mlx5_ifc_device_mem_cap_bits device_mem_cap; struct mlx5_ifc_virtio_emulation_cap_bits virtio_emulation_cap; + struct mlx5_ifc_tlp_dev_emu_capabilities_bits tlp_dev_emu_capabilities; struct mlx5_ifc_macsec_cap_bits macsec_cap; struct mlx5_ifc_crypto_cap_bits crypto_cap; struct mlx5_ifc_ipsec_cap_bits ipsec_cap; From 385a06f74ff7a03e3fb0b15fb87cfeb052d75073 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Wed, 25 Feb 2026 16:19:32 +0200 Subject: [PATCH 0286/1561] net/mlx5: Expose TLP emulation capabilities Expose and query TLP device emulation caps on driver load. Signed-off-by: Maher Sanalla Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 6 ++++++ drivers/net/ethernet/mellanox/mlx5/core/main.c | 1 + include/linux/mlx5/device.h | 9 +++++++++ 3 files changed, 16 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index eeb4437975f2..55249f405841 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -255,6 +255,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN(dev, tlp_device_emulation_manager)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_TLP_EMULATION, HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + if (MLX5_CAP_GEN(dev, ipsec_offload)) { err = mlx5_core_get_caps_mode(dev, MLX5_CAP_IPSEC, HCA_CAP_OPMOD_GET_CUR); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index fdc3ba20912e..b0bc4a7d4a93 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1772,6 +1772,7 @@ static const int types[] = { MLX5_CAP_CRYPTO, MLX5_CAP_SHAMPO, MLX5_CAP_ADV_RDMA, + MLX5_CAP_TLP_EMULATION, }; static void mlx5_hca_caps_free(struct mlx5_core_dev *dev) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index b37fe39cef27..25c6b42140b2 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1259,6 +1259,7 @@ enum mlx5_cap_type { MLX5_CAP_PORT_SELECTION = 0x25, MLX5_CAP_ADV_VIRTUALIZATION = 0x26, MLX5_CAP_ADV_RDMA = 0x28, + MLX5_CAP_TLP_EMULATION = 0x2a, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1481,6 +1482,14 @@ enum mlx5_qcam_feature_groups { MLX5_GET64(virtio_emulation_cap, \ (mdev)->caps.hca[MLX5_CAP_VDPA_EMULATION]->cur, cap) +#define MLX5_CAP_DEV_TLP_EMULATION(mdev, cap)\ + MLX5_GET(tlp_dev_emu_capabilities, \ + (mdev)->caps.hca[MLX5_CAP_TLP_EMULATION]->cur, cap) + +#define MLX5_CAP64_DEV_TLP_EMULATION(mdev, cap)\ + MLX5_GET64(tlp_dev_emu_capabilities, \ + (mdev)->caps.hca[MLX5_CAP_TLP_EMULATION]->cur, cap) + #define MLX5_CAP_IPSEC(mdev, cap)\ MLX5_GET(ipsec_cap, (mdev)->caps.hca[MLX5_CAP_IPSEC]->cur, cap) From 0172f8d80220d2255cae00eb5131a864047433f7 Mon Sep 17 00:00:00 2001 From: Erni Sri Satya Vennela Date: Mon, 2 Mar 2026 09:41:52 -0800 Subject: [PATCH 0287/1561] net: mana: Add MAC address to vPort logs and clarify error messages Add MAC address to vPort configuration success message and update error message to be more specific about HWC message errors in mana_send_request. Signed-off-by: Erni Sri Satya Vennela Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260302174204.234837-1-ernis@linux.microsoft.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microsoft/mana/hw_channel.c | 12 +++++++----- drivers/net/ethernet/microsoft/mana/mana_en.c | 8 ++++---- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index ba3467f1e2ea..91975bdb5686 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -853,6 +853,7 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, struct hwc_caller_ctx *ctx; u32 dest_vrcq = 0; u32 dest_vrq = 0; + u32 command; u16 msg_id; int err; @@ -878,6 +879,7 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, req_msg->req.hwc_msg_id = msg_id; tx_wr->msg_size = req_len; + command = req_msg->req.msg_type; if (gc->is_pf) { dest_vrq = hwc->pf_dest_vrq_id; @@ -893,8 +895,8 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, if (!wait_for_completion_timeout(&ctx->comp_event, (msecs_to_jiffies(hwc->hwc_timeout)))) { if (hwc->hwc_timeout != 0) - dev_err(hwc->dev, "HWC: Request timed out: %u ms\n", - hwc->hwc_timeout); + dev_err(hwc->dev, "Command 0x%x timed out: %u ms\n", + command, hwc->hwc_timeout); /* Reduce further waiting if HWC no response */ if (hwc->hwc_timeout > 1) @@ -914,9 +916,9 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, err = -EOPNOTSUPP; goto out; } - if (req_msg->req.msg_type != MANA_QUERY_PHY_STAT) - dev_err(hwc->dev, "HWC: Failed hw_channel req: 0x%x\n", - ctx->status_code); + if (command != MANA_QUERY_PHY_STAT) + dev_err(hwc->dev, "Command 0x%x failed with status: 0x%x\n", + command, ctx->status_code); err = -EPROTO; goto out; } diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 56ee993e3a43..a868c28c8280 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1021,8 +1021,8 @@ static int mana_send_request(struct mana_context *ac, void *in_buf, if (req->req.msg_type != MANA_QUERY_PHY_STAT && mana_need_log(gc, err)) - dev_err(dev, "Failed to send mana message: %d, 0x%x\n", - err, resp->status); + dev_err(dev, "Command 0x%x failed with status: 0x%x, err: %d\n", + req->req.msg_type, resp->status, err); return err ? err : -EPROTO; } @@ -1335,8 +1335,8 @@ int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, apc->tx_shortform_allowed = resp.short_form_allowed; apc->tx_vp_offset = resp.tx_vport_offset; - netdev_info(apc->ndev, "Configured vPort %llu PD %u DB %u\n", - apc->port_handle, protection_dom_id, doorbell_pg_id); + netdev_info(apc->ndev, "Enabled vPort %llu PD %u DB %u MAC %pM\n", + apc->port_handle, protection_dom_id, doorbell_pg_id, apc->mac_addr); out: if (err) mana_uncfg_vport(apc); From 70836c8d0fe046400de8cdcf0613b2f1f6bddde3 Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Tue, 3 Mar 2026 17:32:19 +0800 Subject: [PATCH 0288/1561] ppp: don't store tx skb in the fastpath Currently, ppp->xmit_pending is used in ppp_send_frame() to pass a skb to ppp_push(), and holds the skb when a PPP channel cannot immediately transmit it. This state is redundant because the transmit queue (ppp->file.xq) can already handle the backlog. Furthermore, during normal operation, an skb is queued in file.xq only to be immediately dequeued, causing unnecessary overhead. Refactor the transmit path to avoid stashing the skb when possible: - Remove ppp->xmit_pending. - Rename ppp_send_frame() to ppp_prepare_tx_skb(), and don't call ppp_push() in it. It returns 1 if the skb is consumed (dropped/handled) or 0 if it can be passed to ppp_push(). - Update ppp_push() to accept the skb. It returns 1 if the skb is consumed, or 0 if the channel is busy. - Optimize __ppp_xmit_process(): - Fastpath: If the queue is empty, attempt to send the skb directly via ppp_push(). If busy, queue it. - Slowpath: If the queue is not empty, process the backlog in file.xq. Split dequeuing loop into a separate function ppp_xmit_flush() so ppp_channel_push() uses that directly instead of passing a NULL skb to __ppp_xmit_process(). This simplifies the states and reduces locking in the fastpath. Signed-off-by: Qingfang Deng Link: https://patch.msgid.link/20260303093219.234403-1-dqfext@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ppp/ppp_generic.c | 113 ++++++++++++++++++++-------------- 1 file changed, 66 insertions(+), 47 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index e9b41777be80..2081da6c2144 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -134,7 +134,6 @@ struct ppp { int debug; /* debug flags 70 */ struct slcompress *vj; /* state for VJ header compression */ enum NPmode npmode[NUM_NP]; /* what to do with each net proto 78 */ - struct sk_buff *xmit_pending; /* a packet ready to go out 88 */ struct compressor *xcomp; /* transmit packet compressor 8c */ void *xc_state; /* its internal state 90 */ struct compressor *rcomp; /* receive decompressor 94 */ @@ -264,8 +263,8 @@ struct ppp_net { static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf, struct file *file, unsigned int cmd, unsigned long arg); static void ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb); -static void ppp_send_frame(struct ppp *ppp, struct sk_buff *skb); -static void ppp_push(struct ppp *ppp); +static int ppp_prepare_tx_skb(struct ppp *ppp, struct sk_buff **pskb); +static int ppp_push(struct ppp *ppp, struct sk_buff *skb); static void ppp_channel_push(struct channel *pch); static void ppp_receive_frame(struct ppp *ppp, struct sk_buff *skb, struct channel *pch); @@ -1651,26 +1650,44 @@ static void ppp_setup(struct net_device *dev) */ /* Called to do any work queued up on the transmit side that can now be done */ +static void ppp_xmit_flush(struct ppp *ppp) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(&ppp->file.xq))) { + if (unlikely(!ppp_push(ppp, skb))) { + skb_queue_head(&ppp->file.xq, skb); + return; + } + } + /* If there's no work left to do, tell the core net code that we can + * accept some more. + */ + netif_wake_queue(ppp->dev); +} + static void __ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb) { ppp_xmit_lock(ppp); - if (!ppp->closing) { - ppp_push(ppp); - - if (skb) - skb_queue_tail(&ppp->file.xq, skb); - while (!ppp->xmit_pending && - (skb = skb_dequeue(&ppp->file.xq))) - ppp_send_frame(ppp, skb); - /* If there's no work left to do, tell the core net - code that we can accept some more. */ - if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq)) - netif_wake_queue(ppp->dev); - else - netif_stop_queue(ppp->dev); - } else { + if (unlikely(ppp->closing)) { kfree_skb(skb); + goto out; } + if (unlikely(ppp_prepare_tx_skb(ppp, &skb))) + goto out; + /* Fastpath: No backlog, just send the new skb. */ + if (likely(skb_queue_empty(&ppp->file.xq))) { + if (unlikely(!ppp_push(ppp, skb))) { + skb_queue_tail(&ppp->file.xq, skb); + netif_stop_queue(ppp->dev); + } + goto out; + } + + /* Slowpath: Enqueue the new skb and process backlog */ + skb_queue_tail(&ppp->file.xq, skb); + ppp_xmit_flush(ppp); +out: ppp_xmit_unlock(ppp); } @@ -1757,13 +1774,15 @@ pad_compress_skb(struct ppp *ppp, struct sk_buff *skb) } /* - * Compress and send a frame. - * The caller should have locked the xmit path, - * and xmit_pending should be 0. + * Compress and prepare to send a frame. + * The caller should have locked the xmit path. + * Returns 1 if the skb was consumed, 0 if it can be passed to ppp_push(). + * @pskb is updated if a compressor is in use. */ -static void -ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) +static int +ppp_prepare_tx_skb(struct ppp *ppp, struct sk_buff **pskb) { + struct sk_buff *skb = *pskb; int proto = PPP_PROTO(skb); struct sk_buff *new_skb; int len; @@ -1784,7 +1803,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) "PPP: outbound frame " "not passed\n"); kfree_skb(skb); - return; + return 1; } /* if this packet passes the active filter, record the time */ if (!(ppp->active_filter && @@ -1832,6 +1851,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) } consume_skb(skb); skb = new_skb; + *pskb = skb; cp = skb_put(skb, len + 2); cp[0] = 0; cp[1] = proto; @@ -1858,6 +1878,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) if (!new_skb) goto drop; skb = new_skb; + *pskb = skb; } /* @@ -1869,42 +1890,38 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) goto drop; skb_queue_tail(&ppp->file.rq, skb); wake_up_interruptible(&ppp->file.rwait); - return; + return 1; } - ppp->xmit_pending = skb; - ppp_push(ppp); - return; + return 0; drop: kfree_skb(skb); ++ppp->dev->stats.tx_errors; + return 1; } /* - * Try to send the frame in xmit_pending. + * Try to send the frame. * The caller should have the xmit path locked. + * Returns 1 if the skb was consumed, 0 if not. */ -static void -ppp_push(struct ppp *ppp) +static int +ppp_push(struct ppp *ppp, struct sk_buff *skb) { struct list_head *list; struct channel *pch; - struct sk_buff *skb = ppp->xmit_pending; - - if (!skb) - return; list = &ppp->channels; if (list_empty(list)) { /* nowhere to send the packet, just drop it */ - ppp->xmit_pending = NULL; kfree_skb(skb); - return; + return 1; } if ((ppp->flags & SC_MULTILINK) == 0) { struct ppp_channel *chan; + int ret; /* not doing multilink: send it down the first channel */ list = list->next; pch = list_entry(list, struct channel, clist); @@ -1916,27 +1933,26 @@ ppp_push(struct ppp *ppp) * skb but linearization failed */ kfree_skb(skb); - ppp->xmit_pending = NULL; + ret = 1; goto out; } - if (chan->ops->start_xmit(chan, skb)) - ppp->xmit_pending = NULL; + ret = chan->ops->start_xmit(chan, skb); out: spin_unlock(&pch->downl); - return; + return ret; } #ifdef CONFIG_PPP_MULTILINK /* Multilink: fragment the packet over as many links as can take the packet at the moment. */ if (!ppp_mp_explode(ppp, skb)) - return; + return 0; #endif /* CONFIG_PPP_MULTILINK */ - ppp->xmit_pending = NULL; kfree_skb(skb); + return 1; } #ifdef CONFIG_PPP_MULTILINK @@ -2005,7 +2021,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) * performance if we have a lot of channels. */ if (nfree == 0 || nfree < navail / 2) - return 0; /* can't take now, leave it in xmit_pending */ + return 0; /* can't take now, leave it in transmit queue */ /* Do protocol field compression */ if (skb_linearize(skb)) @@ -2199,8 +2215,12 @@ static void __ppp_channel_push(struct channel *pch, struct ppp *ppp) spin_unlock(&pch->downl); /* see if there is anything from the attached unit to be sent */ if (skb_queue_empty(&pch->file.xq)) { - if (ppp) - __ppp_xmit_process(ppp, NULL); + if (ppp) { + ppp_xmit_lock(ppp); + if (!ppp->closing) + ppp_xmit_flush(ppp); + ppp_xmit_unlock(ppp); + } } } @@ -3460,7 +3480,6 @@ static void ppp_destroy_interface(struct ppp *ppp) } #endif /* CONFIG_PPP_FILTER */ - kfree_skb(ppp->xmit_pending); free_percpu(ppp->xmit_recursion); free_netdev(ppp->dev); From a90e3029f20d8a8c2c1337436a313d29acde5b93 Mon Sep 17 00:00:00 2001 From: Javen Xu Date: Tue, 3 Mar 2026 17:46:11 +0800 Subject: [PATCH 0289/1561] r8169: add support for RTL8125cp This patch adds support for chip RTL8125cp. Its XID is 0x708. We apply different configuration and firmware for RTL8125cp. Signed-off-by: Javen Xu Link: https://patch.msgid.link/20260303094611.450-1-javen_xu@realsil.com.cn [pabeni@redhat.com: changelog cleanup] Signed-off-by: Paolo Abeni --- drivers/net/ethernet/realtek/r8169.h | 1 + drivers/net/ethernet/realtek/r8169_main.c | 7 +++++ .../net/ethernet/realtek/r8169_phy_config.c | 26 +++++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h index aed4cf852091..0b9c1d4eb48b 100644 --- a/drivers/net/ethernet/realtek/r8169.h +++ b/drivers/net/ethernet/realtek/r8169.h @@ -68,6 +68,7 @@ enum mac_version { RTL_GIGA_MAC_VER_61, RTL_GIGA_MAC_VER_63, RTL_GIGA_MAC_VER_64, + RTL_GIGA_MAC_VER_65, RTL_GIGA_MAC_VER_66, RTL_GIGA_MAC_VER_70, RTL_GIGA_MAC_VER_80, diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 58788d196c57..791277e750ba 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -60,6 +60,7 @@ #define FIRMWARE_8125D_2 "rtl_nic/rtl8125d-2.fw" #define FIRMWARE_8125K_1 "rtl_nic/rtl8125k-1.fw" #define FIRMWARE_8125BP_2 "rtl_nic/rtl8125bp-2.fw" +#define FIRMWARE_8125CP_1 "rtl_nic/rtl8125cp-1.fw" #define FIRMWARE_9151A_1 "rtl_nic/rtl9151a-1.fw" #define FIRMWARE_8126A_2 "rtl_nic/rtl8126a-2.fw" #define FIRMWARE_8126A_3 "rtl_nic/rtl8126a-3.fw" @@ -112,6 +113,9 @@ static const struct rtl_chip_info { /* 8125BP family. */ { 0x7cf, 0x681, RTL_GIGA_MAC_VER_66, "RTL8125BP", FIRMWARE_8125BP_2 }, + /* 8125CP family*/ + { 0x7cf, 0x708, RTL_GIGA_MAC_VER_65, "RTL8125CP", FIRMWARE_8125CP_1 }, + /* 8125D family. */ { 0x7cf, 0x68b, RTL_GIGA_MAC_VER_64, "RTL9151A", FIRMWARE_9151A_1 }, { 0x7cf, 0x68a, RTL_GIGA_MAC_VER_64, "RTL8125K", FIRMWARE_8125K_1 }, @@ -802,6 +806,7 @@ MODULE_FIRMWARE(FIRMWARE_8125D_1); MODULE_FIRMWARE(FIRMWARE_8125D_2); MODULE_FIRMWARE(FIRMWARE_8125K_1); MODULE_FIRMWARE(FIRMWARE_8125BP_2); +MODULE_FIRMWARE(FIRMWARE_8125CP_1); MODULE_FIRMWARE(FIRMWARE_9151A_1); MODULE_FIRMWARE(FIRMWARE_8126A_2); MODULE_FIRMWARE(FIRMWARE_8126A_3); @@ -4021,6 +4026,7 @@ static void rtl_hw_config(struct rtl8169_private *tp) [RTL_GIGA_MAC_VER_61] = rtl_hw_start_8125a_2, [RTL_GIGA_MAC_VER_63] = rtl_hw_start_8125b, [RTL_GIGA_MAC_VER_64] = rtl_hw_start_8125d, + [RTL_GIGA_MAC_VER_65] = rtl_hw_start_8125d, [RTL_GIGA_MAC_VER_66] = rtl_hw_start_8125d, [RTL_GIGA_MAC_VER_70] = rtl_hw_start_8126a, [RTL_GIGA_MAC_VER_80] = rtl_hw_start_8127a, @@ -4040,6 +4046,7 @@ static void rtl_hw_start_8125(struct rtl8169_private *tp) switch (tp->mac_version) { case RTL_GIGA_MAC_VER_61: case RTL_GIGA_MAC_VER_64: + case RTL_GIGA_MAC_VER_65: case RTL_GIGA_MAC_VER_66: case RTL_GIGA_MAC_VER_80: for (i = 0xa00; i < 0xb00; i += 4) diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c index 032d9d2cfa2a..4f1d447136ec 100644 --- a/drivers/net/ethernet/realtek/r8169_phy_config.c +++ b/drivers/net/ethernet/realtek/r8169_phy_config.c @@ -14,6 +14,9 @@ #include "r8169.h" +#define LINK_SPEED_10M_PLL_OFF BIT(0) +#define ALDPS_PLL_OFF BIT(1) + typedef void (*rtl_phy_cfg_fct)(struct rtl8169_private *tp, struct phy_device *phydev); @@ -1102,6 +1105,28 @@ static void rtl8125d_hw_phy_config(struct rtl8169_private *tp, rtl8125_config_eee_phy(phydev); } +static void rtl8125cp_hw_phy_config(struct rtl8169_private *tp, + struct phy_device *phydev) +{ + r8169_apply_firmware(tp); + rtl8168g_enable_gphy_10m(phydev); + + phy_modify_paged(phydev, 0xad0, 0x17, 0x007f, 0x000b); + phy_modify_paged(phydev, 0xad7, 0x14, 0x0000, BIT(4)); + rtl8125_phy_param(phydev, 0x807f, 0xff00, 0x5300); + r8168g_phy_param(phydev, 0x81b8, 0xffff, 0x00b4); + r8168g_phy_param(phydev, 0x81ba, 0xffff, 0x00e4); + r8168g_phy_param(phydev, 0x81c5, 0xffff, 0x0104); + r8168g_phy_param(phydev, 0x81d0, 0xffff, 0x054d); + phy_modify_paged(phydev, 0x0a43, 0x10, 0x0000, + LINK_SPEED_10M_PLL_OFF | ALDPS_PLL_OFF); + phy_modify_paged(phydev, 0x0a44, 0x11, 0x0000, BIT(7)); + + rtl8125_legacy_force_mode(phydev); + rtl8168g_disable_aldps(phydev); + rtl8125_config_eee_phy(phydev); +} + static void rtl8125bp_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev) { @@ -1344,6 +1369,7 @@ void r8169_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev, [RTL_GIGA_MAC_VER_61] = rtl8125a_2_hw_phy_config, [RTL_GIGA_MAC_VER_63] = rtl8125b_hw_phy_config, [RTL_GIGA_MAC_VER_64] = rtl8125d_hw_phy_config, + [RTL_GIGA_MAC_VER_65] = rtl8125cp_hw_phy_config, [RTL_GIGA_MAC_VER_66] = rtl8125bp_hw_phy_config, [RTL_GIGA_MAC_VER_70] = rtl8126a_hw_phy_config, [RTL_GIGA_MAC_VER_80] = rtl8127a_1_hw_phy_config, From e637c244b954426b84340cbc551ca0e2a32058ce Mon Sep 17 00:00:00 2001 From: Ankit Garg Date: Tue, 3 Mar 2026 11:55:46 -0800 Subject: [PATCH 0290/1561] gve: Advertise NETIF_F_GRO_HW instead of NETIF_F_LRO The device behind DQO format has always coalesced packets per stricter hardware GRO spec even though it was being advertised as LRO. Update advertised capability to match device behavior. Signed-off-by: Ankit Garg Reviewed-by: Willem de Bruijn Reviewed-by: Harshitha Ramamurthy Signed-off-by: Joshua Washington Link: https://patch.msgid.link/20260303195549.2679070-2-joshwash@google.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/google/gve/gve_adminq.c | 6 +++--- drivers/net/ethernet/google/gve/gve_main.c | 15 ++++++++------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index b5f105709e49..8bd0ceadd927 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -791,7 +791,7 @@ static void gve_adminq_get_create_rx_queue_cmd(struct gve_priv *priv, cmd->create_rx_queue.rx_buff_ring_size = cpu_to_be16(priv->rx_desc_cnt); cmd->create_rx_queue.enable_rsc = - !!(priv->dev->features & NETIF_F_LRO); + !!(priv->dev->features & NETIF_F_GRO_HW); if (priv->header_split_enabled) cmd->create_rx_queue.header_buffer_size = cpu_to_be16(priv->header_buf_size); @@ -1117,9 +1117,9 @@ int gve_adminq_describe_device(struct gve_priv *priv) gve_set_default_rss_sizes(priv); - /* DQO supports LRO. */ + /* DQO supports HW-GRO. */ if (!gve_is_gqi(priv)) - priv->dev->hw_features |= NETIF_F_LRO; + priv->dev->hw_features |= NETIF_F_GRO_HW; priv->max_registered_pages = be64_to_cpu(descriptor->max_registered_pages); diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index c654cf503c1a..424d973c97f2 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1758,9 +1758,9 @@ static int gve_verify_xdp_configuration(struct net_device *dev, struct gve_priv *priv = netdev_priv(dev); u16 max_xdp_mtu; - if (dev->features & NETIF_F_LRO) { + if (dev->features & NETIF_F_GRO_HW) { NL_SET_ERR_MSG_MOD(extack, - "XDP is not supported when LRO is on."); + "XDP is not supported when HW-GRO is on."); return -EOPNOTSUPP; } @@ -2177,12 +2177,13 @@ static int gve_set_features(struct net_device *netdev, gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); - if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { - netdev->features ^= NETIF_F_LRO; - if (priv->xdp_prog && (netdev->features & NETIF_F_LRO)) { + if ((netdev->features & NETIF_F_GRO_HW) != + (features & NETIF_F_GRO_HW)) { + netdev->features ^= NETIF_F_GRO_HW; + if (priv->xdp_prog && (netdev->features & NETIF_F_GRO_HW)) { netdev_warn(netdev, - "XDP is not supported when LRO is on.\n"); - err = -EOPNOTSUPP; + "HW-GRO is not supported when XDP is on."); + err = -EOPNOTSUPP; goto revert_features; } if (netif_running(netdev)) { From ea4c1176871fd70a06eadcbd7c828f6cb9a1b0cd Mon Sep 17 00:00:00 2001 From: Ankit Garg Date: Tue, 3 Mar 2026 11:55:47 -0800 Subject: [PATCH 0291/1561] gve: fix SW coalescing when hw-GRO is used Leaving gso_segs unpopulated on hardware GRO packet prevents further coalescing by software stack because the kernel's GRO logic marks the SKB for flush because the expected length of all segments doesn't match actual payload length. Setting gso_segs correctly results in significantly more segments being coalesced as measured by the result of dev_gro_receive(). gso_segs are derived from payload length. When header-split is enabled, payload is in the non-linear portion of skb. And when header-split is disabled, we have to parse the headers to determine payload length. Signed-off-by: Ankit Garg Reviewed-by: Eric Dumazet Reviewed-by: Jordan Rhee Reviewed-by: Harshitha Ramamurthy Signed-off-by: Joshua Washington Link: https://patch.msgid.link/20260303195549.2679070-3-joshwash@google.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/google/gve/gve_rx_dqo.c | 23 ++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 3b10139941ea..27885ccf5226 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -942,11 +942,16 @@ static int gve_rx_complete_rsc(struct sk_buff *skb, struct gve_ptype ptype) { struct skb_shared_info *shinfo = skb_shinfo(skb); + int rsc_segments, rsc_seg_len, hdr_len; - /* Only TCP is supported right now. */ + /* HW-GRO only coalesces TCP. */ if (ptype.l4_type != GVE_L4_TYPE_TCP) return -EINVAL; + rsc_seg_len = le16_to_cpu(desc->rsc_seg_len); + if (!rsc_seg_len) + return 0; + switch (ptype.l3_type) { case GVE_L3_TYPE_IPV4: shinfo->gso_type = SKB_GSO_TCPV4; @@ -958,7 +963,21 @@ static int gve_rx_complete_rsc(struct sk_buff *skb, return -EINVAL; } - shinfo->gso_size = le16_to_cpu(desc->rsc_seg_len); + if (skb_headlen(skb)) { + /* With header-split, payload is in the non-linear part */ + rsc_segments = DIV_ROUND_UP(skb->data_len, rsc_seg_len); + } else { + /* HW-GRO packets are guaranteed to have complete TCP/IP + * headers in frag[0] when header-split is not enabled. + */ + hdr_len = eth_get_headlen(skb->dev, + skb_frag_address(&shinfo->frags[0]), + skb_frag_size(&shinfo->frags[0])); + rsc_segments = DIV_ROUND_UP(skb->len - hdr_len, rsc_seg_len); + } + shinfo->gso_size = rsc_seg_len; + shinfo->gso_segs = rsc_segments; + return 0; } From 0c7025fd24db5b2f8cbd2e1f0050c033b923fd48 Mon Sep 17 00:00:00 2001 From: Ankit Garg Date: Tue, 3 Mar 2026 11:55:48 -0800 Subject: [PATCH 0292/1561] gve: pull network headers into skb linear part Currently, in DQO mode with hw-gro enabled, entire received packet is placed into skb fragments when header-split is disabled. This leaves the skb linear part empty, forcing the networking stack to do multiple small memory copies to access eth, IP and TCP headers. This patch adds a single memcpy to put all headers into linear portion before packet reaches the SW GRO stack; thus eliminating multiple smaller memcpy calls. Additionally, the criteria for calling napi_gro_frags() was updated. Since skb->head is now populated, we instead check if the SKB is the cached NAPI scratchpad to ensure we continue using the zero-allocation path. Signed-off-by: Ankit Garg Reviewed-by: Eric Dumazet Reviewed-by: Harshitha Ramamurthy Signed-off-by: Joshua Washington Link: https://patch.msgid.link/20260303195549.2679070-4-joshwash@google.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/google/gve/gve_rx_dqo.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 27885ccf5226..7924dce719e2 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -943,6 +943,8 @@ static int gve_rx_complete_rsc(struct sk_buff *skb, { struct skb_shared_info *shinfo = skb_shinfo(skb); int rsc_segments, rsc_seg_len, hdr_len; + skb_frag_t *frag; + void *va; /* HW-GRO only coalesces TCP. */ if (ptype.l4_type != GVE_L4_TYPE_TCP) @@ -970,10 +972,20 @@ static int gve_rx_complete_rsc(struct sk_buff *skb, /* HW-GRO packets are guaranteed to have complete TCP/IP * headers in frag[0] when header-split is not enabled. */ - hdr_len = eth_get_headlen(skb->dev, - skb_frag_address(&shinfo->frags[0]), - skb_frag_size(&shinfo->frags[0])); + frag = &skb_shinfo(skb)->frags[0]; + va = skb_frag_address(frag); + hdr_len = + eth_get_headlen(skb->dev, va, skb_frag_size(frag)); rsc_segments = DIV_ROUND_UP(skb->len - hdr_len, rsc_seg_len); + skb_copy_to_linear_data(skb, va, hdr_len); + skb_frag_size_sub(frag, hdr_len); + /* Verify we didn't empty the fragment completely as that could + * otherwise lead to page leaks. + */ + DEBUG_NET_WARN_ON_ONCE(!skb_frag_size(frag)); + skb_frag_off_add(frag, hdr_len); + skb->data_len -= hdr_len; + skb->tail += hdr_len; } shinfo->gso_size = rsc_seg_len; shinfo->gso_segs = rsc_segments; @@ -1010,7 +1022,7 @@ static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi, return err; } - if (skb_headlen(rx->ctx.skb_head) == 0) + if (rx->ctx.skb_head == napi->skb) napi_gro_frags(napi); else napi_gro_receive(napi, rx->ctx.skb_head); From 3c398063ef01b02d7efd31662154fe70fd28ace6 Mon Sep 17 00:00:00 2001 From: Ankit Garg Date: Tue, 3 Mar 2026 11:55:49 -0800 Subject: [PATCH 0293/1561] gve: Enable hw-gro by default if device supported Change the driver's default behavior to enable hw-gro whenever supported for device. Performance observations: - We observed ~10% improvement in RX single stream throughput across various MTU sizes. - No change in TCP_RR/TCP_CRR latencies Signed-off-by: Ankit Garg Reviewed-by: Willem de Bruijn Reviewed-by: Harshitha Ramamurthy Signed-off-by: Joshua Washington Link: https://patch.msgid.link/20260303195549.2679070-5-joshwash@google.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/google/gve/gve_adminq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 8bd0ceadd927..129f3e11a442 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -1118,8 +1118,10 @@ int gve_adminq_describe_device(struct gve_priv *priv) gve_set_default_rss_sizes(priv); /* DQO supports HW-GRO. */ - if (!gve_is_gqi(priv)) + if (gve_is_dqo(priv)) { priv->dev->hw_features |= NETIF_F_GRO_HW; + priv->dev->features |= NETIF_F_GRO_HW; + } priv->max_registered_pages = be64_to_cpu(descriptor->max_registered_pages); From cc39325f927850473d3a84b029ae6f9b508e9bd1 Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Mon, 2 Mar 2026 15:01:45 -0800 Subject: [PATCH 0294/1561] net: ethtool: Track pause storm events With TX pause enabled, if a device is unable to pass packets up to the stack (e.g., CPU is hanged), the device can cause pause storm. Given that devices can have native support to protect the neighbor from such flooding, such events need some tracking. This support is to track TX pause storm events for better observability. Reviewed-by: Oleksij Rempel Signed-off-by: Jakub Kicinski Signed-off-by: Mohsin Bashir Link: https://patch.msgid.link/20260302230149.1580195-2-mohsin.bashr@gmail.com Signed-off-by: Paolo Abeni --- Documentation/netlink/specs/ethtool.yaml | 13 +++++++++++++ include/linux/ethtool.h | 2 ++ include/uapi/linux/ethtool_netlink_generated.h | 1 + net/ethtool/pause.c | 4 +++- 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 0a2d2343f79a..4707063af3b4 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -879,6 +879,19 @@ attribute-sets: - name: rx-frames type: u64 + - + name: tx-pause-storm-events + type: u64 + doc: >- + TX pause storm event count. Increments each time device + detects that its pause assertion condition has been true + for too long for normal operation. As a result, the device + has temporarily disabled its own Pause TX function to + protect the network from itself. + This counter should never increment under normal overload + conditions; it indicates catastrophic failure like an OS + crash. The rate of incrementing is implementation specific. + - name: pause attr-cnt-name: __ethtool-a-pause-cnt diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 798abec67a1b..83c375840835 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -512,12 +512,14 @@ struct ethtool_eth_ctrl_stats { * * Equivalent to `30.3.4.3 aPAUSEMACCtrlFramesReceived` * from the standard. + * @tx_pause_storm_events: TX pause storm event count (see ethtool.yaml). */ struct ethtool_pause_stats { enum ethtool_mac_stats_src src; struct_group(stats, u64 tx_pause_frames; u64 rx_pause_frames; + u64 tx_pause_storm_events; ); }; diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 556a0c834df5..114b83017297 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -381,6 +381,7 @@ enum { ETHTOOL_A_PAUSE_STAT_PAD, ETHTOOL_A_PAUSE_STAT_TX_FRAMES, ETHTOOL_A_PAUSE_STAT_RX_FRAMES, + ETHTOOL_A_PAUSE_STAT_TX_PAUSE_STORM_EVENTS, __ETHTOOL_A_PAUSE_STAT_CNT, ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1) diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c index 0f9af1e66548..5d28f642764c 100644 --- a/net/ethtool/pause.c +++ b/net/ethtool/pause.c @@ -130,7 +130,9 @@ static int pause_put_stats(struct sk_buff *skb, if (ethtool_put_stat(skb, pause_stats->tx_pause_frames, ETHTOOL_A_PAUSE_STAT_TX_FRAMES, pad) || ethtool_put_stat(skb, pause_stats->rx_pause_frames, - ETHTOOL_A_PAUSE_STAT_RX_FRAMES, pad)) + ETHTOOL_A_PAUSE_STAT_RX_FRAMES, pad) || + ethtool_put_stat(skb, pause_stats->tx_pause_storm_events, + ETHTOOL_A_PAUSE_STAT_TX_PAUSE_STORM_EVENTS, pad)) goto err_cancel; nla_nest_end(skb, nest); From 817de93c348a3086ecca6e03ff459138832157cc Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Mon, 2 Mar 2026 15:01:46 -0800 Subject: [PATCH 0295/1561] net: ethtool: Update doc for tunable ETHTOOL_PFC_PREVENTION_TOUT enables the configuration of timeout value for PFC storm prevention. This can also be used to configure storm detection timeout for global pause settings. In fact some existing drivers are already using it for the said purpose. Highlight that the knob can formally be used to configure timeout value for pause storm prevention mechanism. The update to the ethtool man page will follow afterwards. Link: https://lore.kernel.org/aa5f189a-ac62-4633-97b5-ebf939e9c535@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Mohsin Bashir Link: https://patch.msgid.link/20260302230149.1580195-3-mohsin.bashr@gmail.com Signed-off-by: Paolo Abeni --- include/uapi/linux/ethtool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index b74b80508553..1cdfb8341df2 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -225,7 +225,7 @@ enum tunable_id { ETHTOOL_ID_UNSPEC, ETHTOOL_RX_COPYBREAK, ETHTOOL_TX_COPYBREAK, - ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */ + ETHTOOL_PFC_PREVENTION_TOUT, /* both pause and pfc, see man ethtool */ ETHTOOL_TX_COPYBREAK_BUF_SIZE, /* * Add your fresh new tunable attribute above and remember to update From 9b7c8728f53a5652a5184651c7b78ed1587542a4 Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Mon, 2 Mar 2026 15:01:47 -0800 Subject: [PATCH 0296/1561] eth: fbnic: Add protection against pause storm Add protection against TX pause storms. A pause storm occurs when a device fails to send received packets up to the stack. When a pause storm is detected (pause state persists beyond the configured timeout), the device stops sending the pause frames and begins dropping packets instead of back-pressuring. The timeout is configurable via ethtool tunable (pfc-prevention-tout) with a maximum value of 10485ms, and the default value of 500ms. Once the device transitions to the storm-detected state, the service task periodically attempts recovery, returning the device to normal operation to handle any subsequent pause storm episodes. Signed-off-by: Jakub Kicinski Signed-off-by: Mohsin Bashir Link: https://patch.msgid.link/20260302230149.1580195-4-mohsin.bashr@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic.h | 3 + drivers/net/ethernet/meta/fbnic/fbnic_csr.h | 10 ++ .../net/ethernet/meta/fbnic/fbnic_ethtool.c | 43 +++++++++ drivers/net/ethernet/meta/fbnic/fbnic_irq.c | 2 + drivers/net/ethernet/meta/fbnic/fbnic_mac.c | 96 +++++++++++++++++++ drivers/net/ethernet/meta/fbnic/fbnic_mac.h | 27 ++++++ drivers/net/ethernet/meta/fbnic/fbnic_pci.c | 5 + 7 files changed, 186 insertions(+) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index 779a083b9215..a760a27b1516 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -98,6 +98,9 @@ struct fbnic_dev { /* MDIO bus for PHYs */ struct mii_bus *mdio_bus; + + /* In units of ms since API supports values in ms */ + u16 ps_timeout; }; /* Reserve entry 0 in the MSI-X "others" array until we have filled all diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h index b717db879cd3..e68c56237b61 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h @@ -230,6 +230,7 @@ enum { #define FBNIC_INTR_MSIX_CTRL_VECTOR_MASK CSR_GENMASK(7, 0) #define FBNIC_INTR_MSIX_CTRL_ENABLE CSR_BIT(31) enum { + FBNIC_INTR_MSIX_CTRL_RXB_IDX = 7, FBNIC_INTR_MSIX_CTRL_PCS_IDX = 34, }; @@ -560,6 +561,11 @@ enum { #define FBNIC_RXB_DROP_THLD_CNT 8 #define FBNIC_RXB_DROP_THLD_ON CSR_GENMASK(12, 0) #define FBNIC_RXB_DROP_THLD_OFF CSR_GENMASK(25, 13) +#define FBNIC_RXB_PAUSE_STORM(n) (0x08019 + (n)) /* 0x20064 + 4*n */ +#define FBNIC_RXB_PAUSE_STORM_CNT 4 +#define FBNIC_RXB_PAUSE_STORM_FORCE_NORMAL CSR_BIT(20) +#define FBNIC_RXB_PAUSE_STORM_THLD_TIME CSR_GENMASK(19, 0) +#define FBNIC_RXB_PAUSE_STORM_UNIT_WR 0x0801d /* 0x20074 */ #define FBNIC_RXB_ECN_THLD(n) (0x0801e + (n)) /* 0x20078 + 4*n */ #define FBNIC_RXB_ECN_THLD_CNT 8 #define FBNIC_RXB_ECN_THLD_ON CSR_GENMASK(12, 0) @@ -596,6 +602,9 @@ enum { #define FBNIC_RXB_INTF_CREDIT_MASK2 CSR_GENMASK(11, 8) #define FBNIC_RXB_INTF_CREDIT_MASK3 CSR_GENMASK(15, 12) +#define FBNIC_RXB_ERR_INTR_STS 0x08050 /* 0x20140 */ +#define FBNIC_RXB_ERR_INTR_STS_PS CSR_GENMASK(15, 12) +#define FBNIC_RXB_ERR_INTR_MASK 0x08052 /* 0x20148 */ #define FBNIC_RXB_PAUSE_EVENT_CNT(n) (0x08053 + (n)) /* 0x2014c + 4*n */ #define FBNIC_RXB_DROP_FRMS_STS(n) (0x08057 + (n)) /* 0x2015c + 4*n */ #define FBNIC_RXB_DROP_BYTES_STS_L(n) \ @@ -636,6 +645,7 @@ enum { #define FBNIC_RXB_PBUF_FIFO_LEVEL(n) (0x0811d + (n)) /* 0x20474 + 4*n */ +#define FBNIC_RXB_PAUSE_STORM_UNIT_RD 0x08125 /* 0x20494 */ #define FBNIC_RXB_INTEGRITY_ERR(n) (0x0812f + (n)) /* 0x204bc + 4*n */ #define FBNIC_RXB_MAC_ERR(n) (0x08133 + (n)) /* 0x204cc + 4*n */ #define FBNIC_RXB_PARSER_ERR(n) (0x08137 + (n)) /* 0x204dc + 4*n */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c index 401c2196b9ff..ade9e667640f 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c @@ -1641,6 +1641,47 @@ static void fbnic_get_ts_stats(struct net_device *netdev, } } +static int fbnic_get_tunable(struct net_device *netdev, + const struct ethtool_tunable *tun, + void *data) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + int err = 0; + + switch (tun->id) { + case ETHTOOL_PFC_PREVENTION_TOUT: + *(u16 *)data = fbn->fbd->ps_timeout; + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int fbnic_set_tunable(struct net_device *netdev, + const struct ethtool_tunable *tun, + const void *data) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + int err; + + switch (tun->id) { + case ETHTOOL_PFC_PREVENTION_TOUT: { + u16 ps_timeout = *(u16 *)data; + + err = fbnic_mac_ps_protect_to_config(fbn->fbd, ps_timeout); + break; + } + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + static int fbnic_get_module_eeprom_by_page(struct net_device *netdev, const struct ethtool_module_eeprom *page_data, @@ -1915,6 +1956,8 @@ static const struct ethtool_ops fbnic_ethtool_ops = { .set_channels = fbnic_set_channels, .get_ts_info = fbnic_get_ts_info, .get_ts_stats = fbnic_get_ts_stats, + .get_tunable = fbnic_get_tunable, + .set_tunable = fbnic_set_tunable, .get_link_ksettings = fbnic_phylink_ethtool_ksettings_get, .get_fec_stats = fbnic_get_fec_stats, .get_fecparam = fbnic_phylink_get_fecparam, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c index 02e8b0b257fe..1e6a8fd6f702 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c @@ -170,6 +170,8 @@ int fbnic_mac_request_irq(struct fbnic_dev *fbd) fbnic_wr32(fbd, FBNIC_INTR_MSIX_CTRL(FBNIC_INTR_MSIX_CTRL_PCS_IDX), FBNIC_PCS_MSIX_ENTRY | FBNIC_INTR_MSIX_CTRL_ENABLE); + fbnic_wr32(fbd, FBNIC_INTR_MSIX_CTRL(FBNIC_INTR_MSIX_CTRL_RXB_IDX), 0); + fbd->mac_msix_vector = vector; return 0; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c index 9d0e4b2cc9ac..805107ba3b10 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c @@ -143,6 +143,7 @@ static void fbnic_mac_init_qm(struct fbnic_dev *fbd) #define FBNIC_DROP_EN_MASK 0x7d #define FBNIC_PAUSE_EN_MASK 0x14 #define FBNIC_ECN_EN_MASK 0x10 +#define FBNIC_PS_EN_MASK 0x01 struct fbnic_fifo_config { unsigned int addr; @@ -420,6 +421,14 @@ static void __fbnic_mac_stat_rd64(struct fbnic_dev *fbd, bool reset, u32 reg, #define fbnic_mac_stat_rd64(fbd, reset, __stat, __CSR) \ __fbnic_mac_stat_rd64(fbd, reset, FBNIC_##__CSR##_L, &(__stat)) +bool fbnic_mac_check_tx_pause(struct fbnic_dev *fbd) +{ + u32 command_config; + + command_config = rd32(fbd, FBNIC_MAC_COMMAND_CONFIG); + return !(command_config & FBNIC_MAC_COMMAND_CONFIG_TX_PAUSE_DIS); +} + static void fbnic_mac_tx_pause_config(struct fbnic_dev *fbd, bool tx_pause) { u32 rxb_pause_ctrl; @@ -434,6 +443,49 @@ static void fbnic_mac_tx_pause_config(struct fbnic_dev *fbd, bool tx_pause) wr32(fbd, FBNIC_RXB_PAUSE_DROP_CTRL, rxb_pause_ctrl); } +static void +fbnic_mac_ps_protect_to_reset(struct fbnic_dev *fbd, u16 timeout_ms) +{ + wr32(fbd, FBNIC_RXB_PAUSE_STORM_UNIT_WR, FBNIC_RXB_PS_CLK_DIV); + + wr32(fbd, FBNIC_RXB_PAUSE_STORM(FBNIC_RXB_INTF_NET), + FIELD_PREP(FBNIC_RXB_PAUSE_STORM_THLD_TIME, + FBNIC_MAC_RXB_PS_TO(timeout_ms)) | + FBNIC_RXB_PAUSE_STORM_FORCE_NORMAL); + wrfl(fbd); + wr32(fbd, FBNIC_RXB_PAUSE_STORM(FBNIC_RXB_INTF_NET), + FIELD_PREP(FBNIC_RXB_PAUSE_STORM_THLD_TIME, + FBNIC_MAC_RXB_PS_TO(timeout_ms))); +} + +static void +fbnic_mac_ps_protect_config(struct fbnic_dev *fbd, bool ps_protect) +{ + u16 timeout; + u32 reg; + + ps_protect = ps_protect && fbd->ps_timeout; + timeout = ps_protect ? fbd->ps_timeout : FBNIC_MAC_PS_TO_DEFAULT_MS; + + fbnic_mac_ps_protect_to_reset(fbd, timeout); + + reg = rd32(fbd, FBNIC_RXB_PAUSE_DROP_CTRL); + reg &= ~FBNIC_RXB_PAUSE_DROP_CTRL_PS_ENABLE; + reg |= FIELD_PREP(FBNIC_RXB_PAUSE_DROP_CTRL_PS_ENABLE, ps_protect); + wr32(fbd, FBNIC_RXB_PAUSE_DROP_CTRL, reg); + + /* Clear any pending interrupt status first */ + wr32(fbd, FBNIC_RXB_ERR_INTR_STS, + FIELD_PREP(FBNIC_RXB_ERR_INTR_STS_PS, FBNIC_PS_EN_MASK)); + + /* Unmask the Network to Host PS interrupt if tx_pause is on */ + reg = rd32(fbd, FBNIC_RXB_ERR_INTR_MASK); + reg |= FBNIC_RXB_ERR_INTR_STS_PS; + if (ps_protect) + reg &= ~FBNIC_RXB_ERR_INTR_STS_PS; + wr32(fbd, FBNIC_RXB_ERR_INTR_MASK, reg); +} + static int fbnic_mac_get_link_event(struct fbnic_dev *fbd) { u32 intr_mask = rd32(fbd, FBNIC_SIG_PCS_INTR_STS); @@ -658,6 +710,7 @@ static void fbnic_mac_link_up_asic(struct fbnic_dev *fbd, u32 cmd_cfg, mac_ctrl; fbnic_mac_tx_pause_config(fbd, tx_pause); + fbnic_mac_ps_protect_config(fbd, tx_pause); cmd_cfg = __fbnic_mac_cmd_config_asic(fbd, tx_pause, rx_pause); mac_ctrl = rd32(fbd, FBNIC_SIG_MAC_IN0); @@ -918,3 +971,46 @@ int fbnic_mac_init(struct fbnic_dev *fbd) return 0; } + +int fbnic_mac_ps_protect_to_config(struct fbnic_dev *fbd, u16 timeout_ms) +{ + u16 old_timeout_ms = fbd->ps_timeout; + + if (timeout_ms == old_timeout_ms) + return 0; + + if (timeout_ms == PFC_STORM_PREVENTION_AUTO) + timeout_ms = FBNIC_MAC_PS_TO_DEFAULT_MS; + + if (timeout_ms > FBNIC_MAC_PS_TO_MAX_MS) + return -EINVAL; + + fbd->ps_timeout = timeout_ms; + + if (!fbnic_mac_check_tx_pause(fbd)) + return 0; + + if (timeout_ms == 0) + fbnic_mac_ps_protect_config(fbd, false); + else if (old_timeout_ms == 0) + fbnic_mac_ps_protect_config(fbd, true); + else + fbnic_mac_ps_protect_to_reset(fbd, fbd->ps_timeout); + + return 0; +} + +void fbnic_mac_ps_protect_handler(struct fbnic_dev *fbd) +{ + u32 rxb_err_sts = rd32(fbd, FBNIC_RXB_ERR_INTR_STS); + + /* Check if pause storm interrupt for network was triggered */ + if (rxb_err_sts & FIELD_PREP(FBNIC_RXB_ERR_INTR_STS_PS, + FBNIC_PS_EN_MASK)) { + /* Write 1 to clear the interrupt status first */ + wr32(fbd, FBNIC_RXB_ERR_INTR_STS, + FIELD_PREP(FBNIC_RXB_ERR_INTR_STS_PS, FBNIC_PS_EN_MASK)); + + fbnic_mac_ps_protect_to_reset(fbd, fbd->ps_timeout); + } +} diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.h b/drivers/net/ethernet/meta/fbnic/fbnic_mac.h index f08fe8b7c497..10f30e0e8f69 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.h @@ -8,6 +8,30 @@ struct fbnic_dev; +/* The RXB clock runs at 600 MHZ in the ASIC and the PAUSE_STORM_UNIT_WR + * is 10us granularity, so set the clock to 6000 (0x1770) + */ +#define FBNIC_RXB_PS_CLK_DIV 0x1770 + +/* Convert milliseconds to pause storm timeout units (10us granularity) */ +#define FBNIC_MAC_RXB_PS_TO(ms) ((ms) * 100) + +/* Convert pause storm timeout units (10us granularity) to milliseconds */ +#define FBNIC_MAC_RXB_PS_TO_MS(ps) ((ps) / 100) + +/* Set the default timer to 500ms, which should be longer than any + * reasonable period of continuous pausing. The service task, which runs + * once per second, periodically resets the pause storm trigger. + * + * As a result, on a functioning system, if pause continues, we enforce + * a duty cycle determined by the configured pause storm timeout (50% + * default). A crashed system will not have the service task and therefore + * pause will remain disabled until reboot recovery. + */ +#define FBNIC_MAC_PS_TO_DEFAULT_MS 500 +#define FBNIC_MAC_PS_TO_MAX_MS \ + FBNIC_MAC_RXB_PS_TO_MS(FIELD_MAX(FBNIC_RXB_PAUSE_STORM_THLD_TIME)) + #define FBNIC_MAX_JUMBO_FRAME_SIZE 9742 /* States loosely based on section 136.8.11.7.5 of IEEE 802.3-2022 Ethernet @@ -119,4 +143,7 @@ struct fbnic_mac { int fbnic_mac_init(struct fbnic_dev *fbd); void fbnic_mac_get_fw_settings(struct fbnic_dev *fbd, u8 *aui, u8 *fec); +int fbnic_mac_ps_protect_to_config(struct fbnic_dev *fbd, u16 timeout); +void fbnic_mac_ps_protect_handler(struct fbnic_dev *fbd); +bool fbnic_mac_check_tx_pause(struct fbnic_dev *fbd); #endif /* _FBNIC_MAC_H_ */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index 3fa9d1910daa..e3aebbe3656d 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -220,6 +220,9 @@ static void fbnic_service_task(struct work_struct *work) fbnic_get_hw_stats32(fbd); + if (fbd->ps_timeout && fbnic_mac_check_tx_pause(fbd)) + fbnic_mac_ps_protect_handler(fbd); + fbnic_fw_check_heartbeat(fbd); fbnic_health_check(fbd); @@ -296,6 +299,8 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Populate driver with hardware-specific info and handlers */ fbd->max_num_queues = info->max_num_queues; + fbd->ps_timeout = FBNIC_MAC_PS_TO_DEFAULT_MS; + pci_set_master(pdev); pci_save_state(pdev); From 8d282b680c729203d04d4eee396f3216f29b35aa Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Mon, 2 Mar 2026 15:01:48 -0800 Subject: [PATCH 0297/1561] eth: fbnic: Fetch TX pause storm stats With pause storm protection in place, track the occurrence of pause storm events. Since there is a one-to-one mapping between pause storm interrupts and events, use the interrupt count to track this metric. ./ethtool -I -a eth0 Pause parameters for eth0: Autonegotiate: off RX: off TX: on Statistics: tx_pause_frames: 759657 rx_pause_frames: 0 tx_pause_storm_events: 219 Signed-off-by: Jakub Kicinski Signed-off-by: Mohsin Bashir Link: https://patch.msgid.link/20260302230149.1580195-5-mohsin.bashr@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_csr.h | 1 + drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c | 3 +++ drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h | 1 + drivers/net/ethernet/meta/fbnic/fbnic_mac.c | 15 +++++++++++++++ 4 files changed, 20 insertions(+) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h index e68c56237b61..72eb22a52572 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h @@ -627,6 +627,7 @@ enum { FBNIC_RXB_ENQUEUE_INDICES = 4 }; +#define FBNIC_RXB_INTR_PS_COUNT(n) (0x080e9 + (n)) /* 0x203a4 + 4*n */ #define FBNIC_RXB_DRBO_FRM_CNT_SRC(n) (0x080f9 + (n)) /* 0x203e4 + 4*n */ #define FBNIC_RXB_DRBO_BYTE_CNT_SRC_L(n) \ (0x080fd + (n)) /* 0x203f4 + 4*n */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c index ade9e667640f..70c995b8d1bd 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c @@ -1754,6 +1754,7 @@ fbnic_get_pause_stats(struct net_device *netdev, struct fbnic_net *fbn = netdev_priv(netdev); struct fbnic_mac_stats *mac_stats; struct fbnic_dev *fbd = fbn->fbd; + u64 tx_ps_events; mac_stats = &fbd->hw_stats.mac; @@ -1761,6 +1762,8 @@ fbnic_get_pause_stats(struct net_device *netdev, pause_stats->tx_pause_frames = mac_stats->pause.tx_pause_frames.value; pause_stats->rx_pause_frames = mac_stats->pause.rx_pause_frames.value; + tx_ps_events = mac_stats->pause.tx_pause_storm_events.value; + pause_stats->tx_pause_storm_events = tx_ps_events; } static void diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h index aa3f429a9aed..caea4be46762 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h @@ -54,6 +54,7 @@ struct fbnic_rmon_stats { struct fbnic_pause_stats { struct fbnic_stat_counter tx_pause_frames; struct fbnic_stat_counter rx_pause_frames; + struct fbnic_stat_counter tx_pause_storm_events; }; struct fbnic_eth_mac_stats { diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c index 805107ba3b10..53b7a938b4c2 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c @@ -418,6 +418,18 @@ static void __fbnic_mac_stat_rd64(struct fbnic_dev *fbd, bool reset, u32 reg, stat->reported = true; } +static void fbnic_mac_stat_rd32(struct fbnic_dev *fbd, bool reset, u32 reg, + struct fbnic_stat_counter *stat) +{ + u32 new_reg_value; + + new_reg_value = rd32(fbd, reg); + if (!reset) + stat->value += new_reg_value - stat->u.old_reg_value_32; + stat->u.old_reg_value_32 = new_reg_value; + stat->reported = true; +} + #define fbnic_mac_stat_rd64(fbd, reset, __stat, __CSR) \ __fbnic_mac_stat_rd64(fbd, reset, FBNIC_##__CSR##_L, &(__stat)) @@ -812,6 +824,9 @@ fbnic_mac_get_pause_stats(struct fbnic_dev *fbd, bool reset, MAC_STAT_TX_XOFF_STB); fbnic_mac_stat_rd64(fbd, reset, pause_stats->rx_pause_frames, MAC_STAT_RX_XOFF_STB); + fbnic_mac_stat_rd32(fbd, reset, + FBNIC_RXB_INTR_PS_COUNT(FBNIC_RXB_INTF_NET), + &pause_stats->tx_pause_storm_events); } static void From cc663d3fed062fb41b59df953a2cb9df5f56f943 Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Mon, 2 Mar 2026 15:01:49 -0800 Subject: [PATCH 0298/1561] eth: mlx5: Move pause storm errors to pause stats Report device_stall_critical_watermark_cnt as tx_pause_storm_events in the ethtool_pause_stats struct. This counter tracks pause storm error events which indicate the NIC has been sending pause frames for an extended period due to a stall. The ethtool_pause_stats struct reports these stalls as a single value, whereas the device supports tracking them per priority. Aggregate the counter across all priority classes to capture stalls on all priorities. Note that the stats are fetched from the device for each priority via mlx5_core_access_reg(). Signed-off-by: Jakub Kicinski Signed-off-by: Mohsin Bashir Link: https://patch.msgid.link/20260302230149.1580195-6-mohsin.bashr@gmail.com Signed-off-by: Paolo Abeni --- .../ethernet/mellanox/mlx5/core/en_stats.c | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index a8af84fc9763..1a3ecf073913 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -916,11 +916,30 @@ static int mlx5e_stats_get_ieee(struct mlx5_core_dev *mdev, sz, MLX5_REG_PPCNT, 0, 0); } +static int mlx5e_stats_get_per_prio(struct mlx5_core_dev *mdev, + u32 *ppcnt_per_prio, int prio) +{ + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + + if (!(MLX5_CAP_PCAM_FEATURE(mdev, pfcc_mask) && + MLX5_CAP_DEBUG(mdev, stall_detect))) + return -EOPNOTSUPP; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); + MLX5_SET(ppcnt_reg, in, prio_tc, prio); + return mlx5_core_access_reg(mdev, in, sz, ppcnt_per_prio, sz, + MLX5_REG_PPCNT, 0, 0); +} + void mlx5e_stats_pause_get(struct mlx5e_priv *priv, struct ethtool_pause_stats *pause_stats) { u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)]; struct mlx5_core_dev *mdev = priv->mdev; + u64 ps_stats = 0; + int prio; if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3)) return; @@ -933,6 +952,17 @@ void mlx5e_stats_pause_get(struct mlx5e_priv *priv, MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, eth_802_3_cntrs_grp_data_layout, a_pause_mac_ctrl_frames_received); + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + if (mlx5e_stats_get_per_prio(mdev, ppcnt_ieee_802_3, prio)) + return; + + ps_stats += MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_per_prio_grp_data_layout, + device_stall_critical_watermark_cnt); + } + + pause_stats->tx_pause_storm_events = ps_stats; } void mlx5e_stats_eth_phy_get(struct mlx5e_priv *priv, From 752941e3faf6be26c6b5a118e37bdbaea2b97171 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kry=C5=A1tof=20=C4=8Cern=C3=BD?= Date: Wed, 4 Mar 2026 13:03:10 +0100 Subject: [PATCH 0299/1561] net: phy: realtek: Add support for PHY LEDs on RTL8211F-VD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Realtek RTL8211F-VD has the same LED configuration and registers as RTL8211F. Use the existing LED related functions for this chip, so it is possible to also use the netdev trigger. Tested on ROCK Pi E. Signed-off-by: Kryštof Černý Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20260304-rtl8211fvd-add-leds-v2-1-d50bd8a50f08@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/realtek/realtek_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index 75565fbdbf6d..530b4e26d16e 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -2243,6 +2243,9 @@ static struct phy_driver realtek_drvs[] = { .read_page = rtl821x_read_page, .write_page = rtl821x_write_page, .flags = PHY_ALWAYS_CALL_SUSPEND, + .led_hw_is_supported = rtl8211x_led_hw_is_supported, + .led_hw_control_get = rtl8211f_led_hw_control_get, + .led_hw_control_set = rtl8211f_led_hw_control_set, }, { .name = "Generic FE-GE Realtek PHY", .match_phy_device = rtlgen_match_phy_device, From cfcceb7a39fc10a6f896af8229bf81d96acb22cc Mon Sep 17 00:00:00 2001 From: Keita Morisaki Date: Wed, 4 Mar 2026 20:15:17 +0900 Subject: [PATCH 0300/1561] tcp: shrink per-packet memset in __tcp_transmit_skb() Use struct_group() to group the three fields in tcp_out_options that are read unconditionally by tcp_options_write() and bpf_skops_write_hdr_opt() (mss, bpf_opt_len, num_sack_blocks), then replace the full-struct memset with a targeted memset of only that group. struct tcp_out_options is 40 bytes without MPTCP and 96 bytes with CONFIG_MPTCP=y (typical distro config). Every remaining field is either assigned before first use by tcp_established_options()/tcp_syn_options(), or gated behind its OPTION_* flag in tcp_options_write(). This memset runs on every transmitted TCP packet, so shrinking it from 96 (or 40) bytes to 4 bytes reduces per-packet overhead on the hot path. Assembly comparison (x86-64, GCC 13, CONFIG_MPTCP=y): Before: rep stos zeroing 96 bytes (5 instructions, 12 8-byte stores) After: movl $0x0 zeroing 4 bytes (1 instruction, 1 store) Also add opts->options = 0 at the top of tcp_syn_options(), which already used |= without a prior clear. tcp_established_options() already clears opts->options at its top. Reviewed-by: Jakub Sitnicki Signed-off-by: Keita Morisaki Acked-by: Matthieu Baerts (NGI0) Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260304111517.2088694-1-kmta1236@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_output.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 46bd48cf776a..f0ebcc7e2871 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -429,14 +429,19 @@ static void smc_options_write(__be32 *ptr, u16 *options) } struct tcp_out_options { + /* Following group is cleared in __tcp_transmit_skb() */ + struct_group(cleared, + u16 mss; /* 0 to disable */ + u8 bpf_opt_len; /* length of BPF hdr option */ + u8 num_sack_blocks; /* number of SACK blocks to include */ + ); + + /* Caution: following fields are not cleared in __tcp_transmit_skb() */ u16 options; /* bit field of OPTION_* */ - u16 mss; /* 0 to disable */ u8 ws; /* window scale, 0 to disable */ - u8 num_sack_blocks; /* number of SACK blocks to include */ u8 num_accecn_fields:7, /* number of AccECN fields needed */ use_synack_ecn_bytes:1; /* Use synack_ecn_bytes or not */ u8 hash_size; /* bytes in hash_location */ - u8 bpf_opt_len; /* length of BPF hdr option */ __u8 *hash_location; /* temporary pointer, overloaded */ __u32 tsval, tsecr; /* need to include OPTION_TS */ struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ @@ -965,6 +970,8 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_fastopen_request *fastopen = tp->fastopen_req; bool timestamps; + opts->options = 0; + /* Better than switch (key.type) as it has static branches */ if (tcp_key_is_md5(key)) { timestamps = false; @@ -1565,7 +1572,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, inet = inet_sk(sk); tcb = TCP_SKB_CB(skb); - memset(&opts, 0, sizeof(opts)); + memset(&opts.cleared, 0, sizeof(opts.cleared)); tcp_get_current_key(sk, &key); if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) { From 58a4c3e8006504bf18e0ddd2dcba6f414bed7680 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Wed, 4 Mar 2026 11:39:48 -0800 Subject: [PATCH 0301/1561] octeontx2-af: make PF_FUNC comparison consistent in NIX XOFF handling nix_smq_flush_enadis_xoff() compares PF_FUNC values with the FUNC bits masked off, but one operand applied the mask before extracting PF_FUNC via TXSCH_MAP_FUNC(). Apply RVU_PFVF_FUNC_MASK after TXSCH_MAP_FUNC() for the TL2 scheduler queue operand, matching the existing handling of the other operand and making the comparison consistent and clearer. No functional change intended. Signed-off-by: Alok Tiwari Reviewed-by: Subbaraya Sundeep Link: https://patch.msgid.link/20260304193950.2467391-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index badfa1d64252..ef5b081162eb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -2391,8 +2391,8 @@ static void nix_smq_flush_enadis_xoff(struct rvu *rvu, int blkaddr, continue; /* skip if PF_FUNC doesn't match */ if ((TXSCH_MAP_FUNC(txsch->pfvf_map[tl2]) & ~RVU_PFVF_FUNC_MASK) != - (TXSCH_MAP_FUNC(txsch->pfvf_map[tl2_schq] & - ~RVU_PFVF_FUNC_MASK))) + (TXSCH_MAP_FUNC(txsch->pfvf_map[tl2_schq]) & + ~RVU_PFVF_FUNC_MASK)) continue; /* enable/disable XOFF */ regoff = NIX_AF_TL2X_SW_XOFF(tl2); From 54f5a89da9e083322a0af171126020d509593414 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Wed, 4 Mar 2026 11:57:36 -0800 Subject: [PATCH 0302/1561] net: mdio: xgene: Fix misleading err message in xgene mdio read xgene_xfi_mdio_read() prints "write failed" when the MDIO management interface remains busy and the read times out. Update the message to "read failed" to match the operation. Signed-off-by: Alok Tiwari Reviewed-by: Andrew Lunn Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260304195755.2468204-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/mdio/mdio-xgene.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/mdio/mdio-xgene.c b/drivers/net/mdio/mdio-xgene.c index a8f91a4b7fed..ede6b9ddc426 100644 --- a/drivers/net/mdio/mdio-xgene.c +++ b/drivers/net/mdio/mdio-xgene.c @@ -250,7 +250,7 @@ static int xgene_xfi_mdio_read(struct mii_bus *bus, int phy_id, int reg) } while ((status & BUSY_MASK) && timeout--); if (status & BUSY_MASK) { - pr_err("XGENET_MII_MGMT write failed\n"); + pr_err("XGENET_MII_MGMT read failed\n"); return -EBUSY; } From 46cb1fcdb75b2dab2f3ed62caad04fe939549943 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Mar 2026 02:27:06 +0000 Subject: [PATCH 0303/1561] tcp: move tcp_v6_early_demux() to net/ipv6/ip6_input.c tcp_v6_early_demux() has a single caller : ip6_rcv_finish_core(). Move it to net/ipv6/ip6_input.c and mark it static, for possible compiler/linker optimizations. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260304022706.1062459-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 1 - net/ipv6/ip6_input.c | 40 ++++++++++++++++++++++++++++++++++++++++ net/ipv6/tcp_ipv6.c | 38 -------------------------------------- 3 files changed, 40 insertions(+), 39 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 3f1fe954e6aa..a64641423806 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1142,7 +1142,6 @@ static inline int tcp_v6_sdif(const struct sk_buff *skb) extern const struct inet_connection_sock_af_ops ipv6_specific; INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb)); -void tcp_v6_early_demux(struct sk_buff *skb); #endif diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 2bcb981c91aa..967b07aeb683 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -44,6 +44,46 @@ #include #include #include +#include + +static void tcp_v6_early_demux(struct sk_buff *skb) +{ + struct net *net = dev_net_rcu(skb->dev); + const struct ipv6hdr *hdr; + const struct tcphdr *th; + struct sock *sk; + + if (skb->pkt_type != PACKET_HOST) + return; + + if (!pskb_may_pull(skb, skb_transport_offset(skb) + + sizeof(struct tcphdr))) + return; + + hdr = ipv6_hdr(skb); + th = tcp_hdr(skb); + + if (th->doff < sizeof(struct tcphdr) / 4) + return; + + /* Note : We use inet6_iif() here, not tcp_v6_iif() */ + sk = __inet6_lookup_established(net, &hdr->saddr, th->source, + &hdr->daddr, ntohs(th->dest), + inet6_iif(skb), inet6_sdif(skb)); + if (sk) { + skb->sk = sk; + skb->destructor = sock_edemux; + if (sk_fullsock(sk)) { + struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); + + if (dst) + dst = dst_check(dst, sk->sk_rx_dst_cookie); + if (dst && + sk->sk_rx_dst_ifindex == skb->skb_iif) + skb_dst_set_noref(skb, dst); + } + } +} static void ip6_rcv_finish_core(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a62dd2999aec..164dceb842af 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1972,44 +1972,6 @@ do_time_wait: goto discard_it; } -void tcp_v6_early_demux(struct sk_buff *skb) -{ - struct net *net = dev_net_rcu(skb->dev); - const struct ipv6hdr *hdr; - const struct tcphdr *th; - struct sock *sk; - - if (skb->pkt_type != PACKET_HOST) - return; - - if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) - return; - - hdr = ipv6_hdr(skb); - th = tcp_hdr(skb); - - if (th->doff < sizeof(struct tcphdr) / 4) - return; - - /* Note : We use inet6_iif() here, not tcp_v6_iif() */ - sk = __inet6_lookup_established(net, &hdr->saddr, th->source, - &hdr->daddr, ntohs(th->dest), - inet6_iif(skb), inet6_sdif(skb)); - if (sk) { - skb->sk = sk; - skb->destructor = sock_edemux; - if (sk_fullsock(sk)) { - struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); - - if (dst) - dst = dst_check(dst, sk->sk_rx_dst_cookie); - if (dst && - sk->sk_rx_dst_ifindex == skb->skb_iif) - skb_dst_set_noref(skb, dst); - } - } -} - static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), }; From fc8ca5da896e22d29837f40032c08c486dcda76a Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Mar 2026 15:53:35 +0000 Subject: [PATCH 0304/1561] net: stmmac: qcom-ethqos: move ethqos_set_serdes_speed() Combine ethqos_set_serdes_speed() with ethqos_mac_finish_serdes() to simplify the code. Reviewed-by: Mohd Ayaan Anwar Tested-by: Mohd Ayaan Anwar Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vxS3z-0000000BQXO-2WpU@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../stmicro/stmmac/dwmac-qcom-ethqos.c | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index ad5b5d950fff..57cbe800f652 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -591,14 +591,6 @@ static void ethqos_configure_rgmii(struct qcom_ethqos *ethqos, ethqos_rgmii_macro_init(ethqos, speed); } -static void ethqos_set_serdes_speed(struct qcom_ethqos *ethqos, int speed) -{ - if (ethqos->serdes_speed != speed) { - phy_set_speed(ethqos->serdes_phy, speed); - ethqos->serdes_speed = speed; - } -} - static void ethqos_pcs_set_inband(struct stmmac_priv *priv, bool enable) { stmmac_pcs_ctrl_ane(priv, enable, 0); @@ -683,15 +675,23 @@ static int ethqos_mac_finish_serdes(struct net_device *ndev, void *priv, phy_interface_t interface) { struct qcom_ethqos *ethqos = priv; + int speed, ret = 0; qcom_ethqos_set_sgmii_loopback(ethqos, false); + speed = SPEED_UNKNOWN; if (interface == PHY_INTERFACE_MODE_SGMII) - ethqos_set_serdes_speed(ethqos, SPEED_1000); + speed = SPEED_1000; else if (interface == PHY_INTERFACE_MODE_2500BASEX) - ethqos_set_serdes_speed(ethqos, SPEED_2500); + speed = SPEED_2500; - return 0; + if (speed != SPEED_UNKNOWN && speed != ethqos->serdes_speed) { + ret = phy_set_speed(ethqos->serdes_phy, speed); + if (ret == 0) + ethqos->serdes_speed = speed; + } + + return ret; } static int ethqos_clks_config(void *priv, bool enabled) From 4999e0a2ab3464eb9e9aef141298123630d689e8 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Mar 2026 15:53:40 +0000 Subject: [PATCH 0305/1561] net: stmmac: qcom-ethqos: convert to use phy_set_mode_ext() qcom-sgmii-eth now accepts the phy_set_mode*() calls to configure the SerDes, taking a PHY interface mode rather than a speed. This allows the elimination of the interface mode to speed conversion in ethqos_mac_finish_serdes(). Tested-by: Mohd Ayaan Anwar Signed-off-by: Russell King (Oracle) Reviewed-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1vxS44-0000000BQXU-38lG@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../stmicro/stmmac/dwmac-qcom-ethqos.c | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 57cbe800f652..8913f6f02b9e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -105,7 +105,7 @@ struct qcom_ethqos { struct clk *link_clk; struct phy *serdes_phy; - int serdes_speed; + phy_interface_t serdes_mode; phy_interface_t phy_mode; const struct ethqos_emac_por *rgmii_por; @@ -653,7 +653,8 @@ static int qcom_ethqos_serdes_powerup(struct net_device *ndev, void *priv) return ret; } - ret = phy_set_speed(ethqos->serdes_phy, ethqos->serdes_speed); + ret = phy_set_mode_ext(ethqos->serdes_phy, PHY_MODE_ETHERNET, + ethqos->serdes_mode); if (ret) { phy_power_off(ethqos->serdes_phy); phy_exit(ethqos->serdes_phy); @@ -675,20 +676,16 @@ static int ethqos_mac_finish_serdes(struct net_device *ndev, void *priv, phy_interface_t interface) { struct qcom_ethqos *ethqos = priv; - int speed, ret = 0; + int ret = 0; qcom_ethqos_set_sgmii_loopback(ethqos, false); - speed = SPEED_UNKNOWN; - if (interface == PHY_INTERFACE_MODE_SGMII) - speed = SPEED_1000; - else if (interface == PHY_INTERFACE_MODE_2500BASEX) - speed = SPEED_2500; - - if (speed != SPEED_UNKNOWN && speed != ethqos->serdes_speed) { - ret = phy_set_speed(ethqos->serdes_phy, speed); + if (interface == PHY_INTERFACE_MODE_SGMII || + interface == PHY_INTERFACE_MODE_2500BASEX) { + ret = phy_set_mode_ext(ethqos->serdes_phy, PHY_MODE_ETHERNET, + interface); if (ret == 0) - ethqos->serdes_speed = speed; + ethqos->serdes_mode = interface; } return ret; @@ -819,7 +816,6 @@ static int qcom_ethqos_probe(struct platform_device *pdev) return dev_err_probe(dev, PTR_ERR(ethqos->serdes_phy), "Failed to get serdes phy\n"); - ethqos->serdes_speed = SPEED_1000; ethqos_set_clk_tx_rate(ethqos, NULL, plat_dat->phy_interface, SPEED_1000); @@ -843,6 +839,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev) plat_dat->host_dma_width = data->dma_addr_width; if (ethqos->serdes_phy) { + ethqos->serdes_mode = PHY_INTERFACE_MODE_SGMII; plat_dat->serdes_powerup = qcom_ethqos_serdes_powerup; plat_dat->serdes_powerdown = qcom_ethqos_serdes_powerdown; } From b7721597547de9e5cfaf0fc3f2720891f207ec77 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Mar 2026 15:53:45 +0000 Subject: [PATCH 0306/1561] phy: qcom-sgmii-eth: remove .set_speed() implementation Now that the qcom-ethqos driver has migrated to use phy_set_mode_ext() rather than phy_set_speed() to configure the SerDes, the support for phy_set_speed() is now obsolete. Remove support for this method. Using the MAC speed for the SerDes is never correct due to the PCS encoding. For SGMII and 2500BASE-X, the PCS uses 8B10B encoding, and so: MAC rate * PCS output bits / PCS input bits = SerDes rate 1000M * 10 / 8 = 1250M 2500M * 10 / 8 = 3125M Tested-by: Mohd Ayaan Anwar Acked-by: Vinod Koul Reviewed-by: Vladimir Oltean Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vxS49-0000000BQXa-3Zcg@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/phy/qualcomm/phy-qcom-sgmii-eth.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-sgmii-eth.c b/drivers/phy/qualcomm/phy-qcom-sgmii-eth.c index 4ea3dce7719f..dcfdb7d0e8ea 100644 --- a/drivers/phy/qualcomm/phy-qcom-sgmii-eth.c +++ b/drivers/phy/qualcomm/phy-qcom-sgmii-eth.c @@ -318,16 +318,6 @@ static int qcom_dwmac_sgmii_phy_set_mode(struct phy *phy, enum phy_mode mode, return qcom_dwmac_sgmii_phy_calibrate(phy); } -static int qcom_dwmac_sgmii_phy_set_speed(struct phy *phy, int speed) -{ - struct qcom_dwmac_sgmii_phy_data *data = phy_get_drvdata(phy); - - if (speed != data->speed) - data->speed = speed; - - return qcom_dwmac_sgmii_phy_calibrate(phy); -} - static int qcom_dwmac_sgmii_phy_validate(struct phy *phy, enum phy_mode mode, int submode, union phy_configure_opts *opts) @@ -341,7 +331,6 @@ static const struct phy_ops qcom_dwmac_sgmii_phy_ops = { .power_on = qcom_dwmac_sgmii_phy_power_on, .power_off = qcom_dwmac_sgmii_phy_power_off, .set_mode = qcom_dwmac_sgmii_phy_set_mode, - .set_speed = qcom_dwmac_sgmii_phy_set_speed, .validate = qcom_dwmac_sgmii_phy_validate, .calibrate = qcom_dwmac_sgmii_phy_calibrate, .owner = THIS_MODULE, From d2b20acdaed80a41fe191e326083fd71007f3456 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Mar 2026 15:53:50 +0000 Subject: [PATCH 0307/1561] phy: qcom-sgmii-eth: use PHY interface mode for SerDes settings As established in the previous commit, using SPEED_1000 and SPEED_2500 does not make sense for a SerDes due to the PCS encoding that is used over the SerDes link, which inflates the data rate at the SerDes. Thus, the use of these constants in a SerDes driver is incorrect. Since qcom-sgmii-eth no longer implements phy_set_speed(), but instead uses the PHY interface mode passed via the .set_mode() method, convert the driver to use the PHY interface mode internally to decide whether to configure the SerDes for 1.25Gbps or 3.125Gbps mode. Tested-by: Mohd Ayaan Anwar Acked-by: Vinod Koul Reviewed-by: Vladimir Oltean Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vxS4E-0000000BQXg-46dJ@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/phy/qualcomm/phy-qcom-sgmii-eth.c | 41 ++++++++++++----------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-sgmii-eth.c b/drivers/phy/qualcomm/phy-qcom-sgmii-eth.c index dcfdb7d0e8ea..58ff15601206 100644 --- a/drivers/phy/qualcomm/phy-qcom-sgmii-eth.c +++ b/drivers/phy/qualcomm/phy-qcom-sgmii-eth.c @@ -29,7 +29,7 @@ struct qcom_dwmac_sgmii_phy_data { struct regmap *regmap; struct clk *refclk; - int speed; + phy_interface_t interface; }; static void qcom_dwmac_sgmii_phy_init_1g(struct regmap *regmap) @@ -223,15 +223,18 @@ static int qcom_dwmac_sgmii_phy_calibrate(struct phy *phy) struct qcom_dwmac_sgmii_phy_data *data = phy_get_drvdata(phy); struct device *dev = phy->dev.parent; - switch (data->speed) { - case SPEED_10: - case SPEED_100: - case SPEED_1000: + switch (data->interface) { + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_1000BASEX: + /* 1.25Gbps mode */ qcom_dwmac_sgmii_phy_init_1g(data->regmap); break; - case SPEED_2500: + case PHY_INTERFACE_MODE_2500BASEX: + /* 3.125Gbps mode */ qcom_dwmac_sgmii_phy_init_2p5g(data->regmap); break; + default: + return -EINVAL; } if (qcom_dwmac_sgmii_phy_poll_status(data->regmap, @@ -287,17 +290,15 @@ static int qcom_dwmac_sgmii_phy_power_off(struct phy *phy) return 0; } -static int qcom_dwmac_sgmii_phy_speed(enum phy_mode mode, int submode) +static int qcom_dwmac_sgmii_phy_interface(enum phy_mode mode, int submode) { if (mode != PHY_MODE_ETHERNET) return -EINVAL; if (submode == PHY_INTERFACE_MODE_SGMII || - submode == PHY_INTERFACE_MODE_1000BASEX) - return SPEED_1000; - - if (submode == PHY_INTERFACE_MODE_2500BASEX) - return SPEED_2500; + submode == PHY_INTERFACE_MODE_1000BASEX || + submode == PHY_INTERFACE_MODE_2500BASEX) + return submode; return -EINVAL; } @@ -306,14 +307,14 @@ static int qcom_dwmac_sgmii_phy_set_mode(struct phy *phy, enum phy_mode mode, int submode) { struct qcom_dwmac_sgmii_phy_data *data = phy_get_drvdata(phy); - int speed; + int interface; - speed = qcom_dwmac_sgmii_phy_speed(mode, submode); - if (speed < 0) - return speed; + interface = qcom_dwmac_sgmii_phy_interface(mode, submode); + if (interface < 0) + return interface; - if (speed != data->speed) - data->speed = speed; + if (interface != data->interface) + data->interface = interface; return qcom_dwmac_sgmii_phy_calibrate(phy); } @@ -322,7 +323,7 @@ static int qcom_dwmac_sgmii_phy_validate(struct phy *phy, enum phy_mode mode, int submode, union phy_configure_opts *opts) { - int ret = qcom_dwmac_sgmii_phy_speed(mode, submode); + int ret = qcom_dwmac_sgmii_phy_interface(mode, submode); return ret < 0 ? ret : 0; } @@ -356,7 +357,7 @@ static int qcom_dwmac_sgmii_phy_probe(struct platform_device *pdev) if (!data) return -ENOMEM; - data->speed = SPEED_10; + data->interface = PHY_INTERFACE_MODE_SGMII; base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(base)) From f82210ce8cb8015a06af48a2732ce17a2e903f71 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Mar 2026 15:53:56 +0000 Subject: [PATCH 0308/1561] phy: qcom-sgmii-eth: remove qcom_dwmac_sgmii_phy_interface() Now that qcom_dwmac_sgmii_phy_interface() only serves to validate the passed interface mode, combine it with qcom_dwmac_sgmii_phy_validate(), and use qcom_dwmac_sgmii_phy_validate() to validate the mode in qcom_dwmac_sgmii_phy_set_mode(). Tested-by: Mohd Ayaan Anwar Acked-by: Vinod Koul Reviewed-by: Vladimir Oltean Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vxS4K-0000000BQXm-0OJL@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/phy/qualcomm/phy-qcom-sgmii-eth.c | 27 +++++++++-------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-sgmii-eth.c b/drivers/phy/qualcomm/phy-qcom-sgmii-eth.c index 58ff15601206..6332ff291fdf 100644 --- a/drivers/phy/qualcomm/phy-qcom-sgmii-eth.c +++ b/drivers/phy/qualcomm/phy-qcom-sgmii-eth.c @@ -290,7 +290,9 @@ static int qcom_dwmac_sgmii_phy_power_off(struct phy *phy) return 0; } -static int qcom_dwmac_sgmii_phy_interface(enum phy_mode mode, int submode) +static int qcom_dwmac_sgmii_phy_validate(struct phy *phy, enum phy_mode mode, + int submode, + union phy_configure_opts *opts) { if (mode != PHY_MODE_ETHERNET) return -EINVAL; @@ -298,7 +300,7 @@ static int qcom_dwmac_sgmii_phy_interface(enum phy_mode mode, int submode) if (submode == PHY_INTERFACE_MODE_SGMII || submode == PHY_INTERFACE_MODE_1000BASEX || submode == PHY_INTERFACE_MODE_2500BASEX) - return submode; + return 0; return -EINVAL; } @@ -307,27 +309,18 @@ static int qcom_dwmac_sgmii_phy_set_mode(struct phy *phy, enum phy_mode mode, int submode) { struct qcom_dwmac_sgmii_phy_data *data = phy_get_drvdata(phy); - int interface; + int ret; - interface = qcom_dwmac_sgmii_phy_interface(mode, submode); - if (interface < 0) - return interface; + ret = qcom_dwmac_sgmii_phy_validate(phy, mode, submode, NULL); + if (ret) + return ret; - if (interface != data->interface) - data->interface = interface; + if (submode != data->interface) + data->interface = submode; return qcom_dwmac_sgmii_phy_calibrate(phy); } -static int qcom_dwmac_sgmii_phy_validate(struct phy *phy, enum phy_mode mode, - int submode, - union phy_configure_opts *opts) -{ - int ret = qcom_dwmac_sgmii_phy_interface(mode, submode); - - return ret < 0 ? ret : 0; -} - static const struct phy_ops qcom_dwmac_sgmii_phy_ops = { .power_on = qcom_dwmac_sgmii_phy_power_on, .power_off = qcom_dwmac_sgmii_phy_power_off, From ebe8b48b88ad012cf6067226e184e9173b7ea9d6 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Mar 2026 15:54:01 +0000 Subject: [PATCH 0309/1561] phy: qcom-sgmii-eth: relax order of .power_on() vs .set_mode*() Allow any order of the .power_on() and .set_mode*() methods as per the recent discussion. This means phy_power_on() with this SerDes will now restore the previous setup without requiring a subsequent phy_set_mode*() call. Tested-by: Mohd Ayaan Anwar Acked-by: Vinod Koul Reviewed-by: Vladimir Oltean Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vxS4P-0000000BQXs-0vGB@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/phy/qualcomm/phy-qcom-sgmii-eth.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-sgmii-eth.c b/drivers/phy/qualcomm/phy-qcom-sgmii-eth.c index 6332ff291fdf..f48faa2929a6 100644 --- a/drivers/phy/qualcomm/phy-qcom-sgmii-eth.c +++ b/drivers/phy/qualcomm/phy-qcom-sgmii-eth.c @@ -271,8 +271,17 @@ static int qcom_dwmac_sgmii_phy_calibrate(struct phy *phy) static int qcom_dwmac_sgmii_phy_power_on(struct phy *phy) { struct qcom_dwmac_sgmii_phy_data *data = phy_get_drvdata(phy); + int ret; - return clk_prepare_enable(data->refclk); + ret = clk_prepare_enable(data->refclk); + if (ret < 0) + return ret; + + ret = qcom_dwmac_sgmii_phy_calibrate(phy); + if (ret < 0) + clk_disable_unprepare(data->refclk); + + return ret; } static int qcom_dwmac_sgmii_phy_power_off(struct phy *phy) @@ -318,6 +327,9 @@ static int qcom_dwmac_sgmii_phy_set_mode(struct phy *phy, enum phy_mode mode, if (submode != data->interface) data->interface = submode; + if (phy->power_count == 0) + return 0; + return qcom_dwmac_sgmii_phy_calibrate(phy); } From 038a8e8eb90d4dd06ce6bda825970c71af09f4b0 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Mar 2026 15:54:06 +0000 Subject: [PATCH 0310/1561] net: stmmac: qcom-ethqos: remove phy_set_mode_ext() after phy_power_on() The call to phy_set_mode_ext() after phy_power_on() was a work-around for the qcom-sgmii-eth SerDes driver that only re-enabled its clocks on phy_power_on() but did not configure the PHY. Now that the SerDes driver fully configures the SerDes at phy_power_on(), there is no need to call phy_set_mode_ext() immediately afterwards. This also means we no longer need to record the previous operating mode of the driver - this is up to the SerDes driver. In any case, the only thing that we care about is the SerDes provides the necessary clocks to the stmmac core to allow it to reset at this point. The actual mode is irrelevant at this point as the correct mode will be configured in ethqos_mac_finish_serdes() just before the network device is brought online. Reviewed-by: Mohd Ayaan Anwar Tested-by: Mohd Ayaan Anwar Reviewed-by: Vladimir Oltean Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vxS4U-0000000BQXy-1Q1v@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../stmicro/stmmac/dwmac-qcom-ethqos.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 8913f6f02b9e..cb1c074c2053 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -105,7 +105,6 @@ struct qcom_ethqos { struct clk *link_clk; struct phy *serdes_phy; - phy_interface_t serdes_mode; phy_interface_t phy_mode; const struct ethqos_emac_por *rgmii_por; @@ -648,17 +647,8 @@ static int qcom_ethqos_serdes_powerup(struct net_device *ndev, void *priv) return ret; ret = phy_power_on(ethqos->serdes_phy); - if (ret) { + if (ret) phy_exit(ethqos->serdes_phy); - return ret; - } - - ret = phy_set_mode_ext(ethqos->serdes_phy, PHY_MODE_ETHERNET, - ethqos->serdes_mode); - if (ret) { - phy_power_off(ethqos->serdes_phy); - phy_exit(ethqos->serdes_phy); - } return ret; } @@ -681,12 +671,9 @@ static int ethqos_mac_finish_serdes(struct net_device *ndev, void *priv, qcom_ethqos_set_sgmii_loopback(ethqos, false); if (interface == PHY_INTERFACE_MODE_SGMII || - interface == PHY_INTERFACE_MODE_2500BASEX) { + interface == PHY_INTERFACE_MODE_2500BASEX) ret = phy_set_mode_ext(ethqos->serdes_phy, PHY_MODE_ETHERNET, interface); - if (ret == 0) - ethqos->serdes_mode = interface; - } return ret; } @@ -839,7 +826,6 @@ static int qcom_ethqos_probe(struct platform_device *pdev) plat_dat->host_dma_width = data->dma_addr_width; if (ethqos->serdes_phy) { - ethqos->serdes_mode = PHY_INTERFACE_MODE_SGMII; plat_dat->serdes_powerup = qcom_ethqos_serdes_powerup; plat_dat->serdes_powerdown = qcom_ethqos_serdes_powerdown; } From bf5a54bc0e3d8962474fcd611f1266eba036232f Mon Sep 17 00:00:00 2001 From: "Remy D. Farley" Date: Tue, 3 Mar 2026 19:57:41 +0000 Subject: [PATCH 0311/1561] doc/netlink: netlink-raw: Add max check Add definitions for max check and len-or-limit type, the same as in other specifications. Suggested-by: Donald Hunter Reviewed-by: Donald Hunter Signed-off-by: Remy D. Farley Link: https://patch.msgid.link/20260303195638.381642-2-one-d-wide@protonmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/netlink-raw.yaml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Documentation/netlink/netlink-raw.yaml b/Documentation/netlink/netlink-raw.yaml index 0166a7e4afbb..dd98dda55bd0 100644 --- a/Documentation/netlink/netlink-raw.yaml +++ b/Documentation/netlink/netlink-raw.yaml @@ -19,6 +19,12 @@ $defs: type: [ string, integer ] pattern: ^[0-9A-Za-z_-]+( - 1)?$ minimum: 0 + len-or-limit: + # literal int, const name, or limit based on fixed-width type + # e.g. u8-min, u16-max, etc. + type: [ string, integer ] + pattern: ^[0-9A-Za-z_-]+$ + minimum: 0 # Schema for specs title: Protocol @@ -270,7 +276,10 @@ properties: type: string min: description: Min value for an integer attribute. - type: integer + $ref: '#/$defs/len-or-limit' + max: + description: Max value for an integer attribute. + $ref: '#/$defs/len-or-limit' min-len: description: Min length for a binary attribute. $ref: '#/$defs/len-or-define' From a3a54ba4ef2b2f788c45caf02255efd457fbadd0 Mon Sep 17 00:00:00 2001 From: "Remy D. Farley" Date: Tue, 3 Mar 2026 19:58:13 +0000 Subject: [PATCH 0312/1561] doc/netlink: nftables: Add definitions New enums/flags: - payload-base - range-ops - registers - numgen-types - log-level - log-flags Added missing enumerations: - bitwise-ops Annotated doc comment or associated enum: - bitwise-ops Reviewed-by: Donald Hunter Signed-off-by: Remy D. Farley Link: https://patch.msgid.link/20260303195638.381642-3-one-d-wide@protonmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/nftables.yaml | 181 +++++++++++++++++++++- 1 file changed, 178 insertions(+), 3 deletions(-) diff --git a/Documentation/netlink/specs/nftables.yaml b/Documentation/netlink/specs/nftables.yaml index 17ad707fa0d5..f15f825cb3a1 100644 --- a/Documentation/netlink/specs/nftables.yaml +++ b/Documentation/netlink/specs/nftables.yaml @@ -66,9 +66,21 @@ definitions: name: bitwise-ops type: enum entries: - - bool - - lshift - - rshift + - + name: mask-xor # aka bool (old name) + doc: >- + mask-and-xor operation used to implement NOT, AND, OR and XOR boolean + operations + - + name: lshift + - + name: rshift + - + name: and + - + name: or + - + name: xor - name: cmp-ops type: enum @@ -132,6 +144,12 @@ definitions: - object - concat - expr + - + name: set-elem-flags + type: flags + entries: + - interval-end + - catchall - name: lookup-flags type: flags @@ -225,6 +243,147 @@ definitions: - icmp-unreach - tcp-rst - icmpx-unreach + - + name: reject-inet-code + doc: These codes are mapped to real ICMP and ICMPv6 codes. + type: enum + entries: + - icmpx-no-route + - icmpx-port-unreach + - icmpx-host-unreach + - icmpx-admin-prohibited + - + name: payload-base + type: enum + entries: + - link-layer-header + - network-header + - transport-header + - inner-header + - tun-header + - + name: range-ops + doc: Range operator + type: enum + entries: + - eq + - neq + - + name: registers + doc: | + nf_tables registers. + nf_tables used to have five registers: a verdict register and four data + registers of size 16. The data registers have been changed to 16 registers + of size 4. For compatibility reasons, the NFT_REG_[1-4] registers still + map to areas of size 16, the 4 byte registers are addressed using + NFT_REG32_00 - NFT_REG32_15. + type: enum + entries: + - + name: reg-verdict + - + name: reg-1 + - + name: reg-2 + - + name: reg-3 + - + name: reg-4 + - + name: reg32-00 + value: 8 + - + name: reg32-01 + - + name: reg32-02 + - + name: reg32-03 + - + name: reg32-04 + - + name: reg32-05 + - + name: reg32-06 + - + name: reg32-07 + - + name: reg32-08 + - + name: reg32-09 + - + name: reg32-10 + - + name: reg32-11 + - + name: reg32-12 + - + name: reg32-13 + - + name: reg32-14 + - + name: reg32-15 + - + name: numgen-types + type: enum + entries: + - incremental + - random + - + name: log-level + doc: nf_tables log levels + type: enum + entries: + - + name: emerg + doc: system is unusable + - + name: alert + doc: action must be taken immediately + - + name: crit + doc: critical conditions + - + name: err + doc: error conditions + - + name: warning + doc: warning conditions + - + name: notice + doc: normal but significant condition + - + name: info + doc: informational + - + name: debug + doc: debug-level messages + - + name: audit + doc: enabling audit logging + - + name: log-flags + doc: nf_tables log flags + header: linux/netfilter/nf_log.h + type: flags + entries: + - + name: tcpseq + doc: Log TCP sequence numbers + - + name: tcpopt + doc: Log TCP options + - + name: ipopt + doc: Log IP options + - + name: uid + doc: Log UID owning local socket + - + name: nflog + doc: Unsupported, don't reuse + - + name: macdecode + doc: Decode MAC header attribute-sets: - @@ -767,6 +926,22 @@ attribute-sets: nested-attributes: hook-dev-attrs - name: expr-bitwise-attrs + doc: | + The bitwise expression supports boolean and shift operations. It + implements the boolean operations by performing the following + operation:: + + dreg = (sreg & mask) ^ xor + + with these mask and xor values: + + op mask xor + ---- ---- --- + NOT: 1 1 + OR: ~x x + XOR: 1 x + AND: x 0 + attributes: - name: sreg From 482da27d5274dfe5f2828d150981b966b8f9f3b1 Mon Sep 17 00:00:00 2001 From: "Remy D. Farley" Date: Tue, 3 Mar 2026 19:58:52 +0000 Subject: [PATCH 0313/1561] doc/netlink: nftables: Update attribute sets New attribute sets: - log-attrs - numgen-attrs - range-attrs - compat-target-attrs - compat-match-attrs - compat-attrs Added missing attributes: - table-attrs (pad, owner) - set-attrs (type, count) Added missing checks: - range-attrs - expr-bitwise-attrs - compat-target-attrs - compat-match-attrs - compat-attrs Annotated doc comment or associated enum: - batch-attrs - verdict-attrs - expr-payload-attrs Fixed byte order: - nft-counter-attrs - expr-counter-attrs - rule-compat-attrs Reviewed-by: Donald Hunter Signed-off-by: Remy D. Farley Link: https://patch.msgid.link/20260303195638.381642-4-one-d-wide@protonmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/nftables.yaml | 210 +++++++++++++++++++++- 1 file changed, 205 insertions(+), 5 deletions(-) diff --git a/Documentation/netlink/specs/nftables.yaml b/Documentation/netlink/specs/nftables.yaml index f15f825cb3a1..ba62adfb4c63 100644 --- a/Documentation/netlink/specs/nftables.yaml +++ b/Documentation/netlink/specs/nftables.yaml @@ -387,16 +387,100 @@ definitions: attribute-sets: - - name: empty-attrs + name: log-attrs + doc: log expression netlink attributes + attributes: + # Mentioned in nft_log_init() + - + name: group + doc: netlink group to send messages to + type: u16 + byte-order: big-endian + - + name: prefix + doc: prefix to prepend to log messages + type: string + - + name: snaplen + doc: length of payload to include in netlink message + type: u32 + byte-order: big-endian + - + name: qthreshold + doc: queue threshold + type: u16 + byte-order: big-endian + - + name: level + doc: log level + type: u32 + enum: log-level + byte-order: big-endian + - + name: flags + doc: logging flags + type: u32 + enum: log-flags + byte-order: big-endian + - + name: numgen-attrs + doc: nf_tables number generator expression netlink attributes attributes: - - name: name - type: string + name: dreg + doc: destination register + type: u32 + enum: registers + - + name: modulus + doc: maximum counter value + type: u32 + byte-order: big-endian + - + name: type + doc: operation type + type: u32 + byte-order: big-endian + enum: numgen-types + - + name: offset + doc: offset to be added to the counter + type: u32 + byte-order: big-endian + - + name: range-attrs + attributes: + # Mentioned in net/netfilter/nft_range.c + - + name: sreg + doc: source register of data to compare + type: u32 + byte-order: big-endian + enum: registers + - + name: op + doc: cmp operation + type: u32 + byte-order: big-endian + enum: range-ops + checks: + max: 255 + - + name: from-data + doc: data range from + type: nest + nested-attributes: data-attrs + - + name: to-data + doc: data range to + type: nest + nested-attributes: data-attrs - name: batch-attrs attributes: - name: genid + doc: generation ID for this changeset type: u32 byte-order: big-endian - @@ -423,10 +507,18 @@ attribute-sets: type: u64 byte-order: big-endian doc: numeric handle of the table + - + name: pad + type: pad - name: userdata type: binary doc: user data + - + name: owner + type: u32 + byte-order: big-endian + doc: owner of this table through netlink portID - name: chain-attrs attributes: @@ -530,9 +622,11 @@ attribute-sets: - name: bytes type: u64 + byte-order: big-endian - name: packets type: u64 + byte-order: big-endian - name: rule-attrs attributes: @@ -602,15 +696,18 @@ attribute-sets: selector: name doc: type specific data - + # Mentioned in nft_parse_compat() in net/netfilter/nft_compat.c name: rule-compat-attrs attributes: - name: proto - type: binary + type: u32 + byte-order: big-endian doc: numeric value of the handled protocol - name: flags - type: binary + type: u32 + byte-order: big-endian doc: bitmask of flags - name: set-attrs @@ -699,6 +796,15 @@ attribute-sets: type: nest nested-attributes: set-list-attrs doc: list of expressions + - + name: type + type: string + doc: set backend type + - + name: count + type: u32 + byte-order: big-endian + doc: number of set elements - name: set-desc-attrs attributes: @@ -968,6 +1074,8 @@ attribute-sets: type: u32 byte-order: big-endian enum: bitwise-ops + checks: + max: 255 - name: data type: nest @@ -1004,25 +1112,31 @@ attribute-sets: attributes: - name: code + doc: nf_tables verdict type: u32 byte-order: big-endian enum: verdict-code - name: chain + doc: jump target chain name type: string - name: chain-id + doc: jump target chain ID type: u32 + byte-order: big-endian - name: expr-counter-attrs attributes: - name: bytes type: u64 + byte-order: big-endian doc: Number of bytes - name: packets type: u64 + byte-order: big-endian doc: Number of packets - name: pad @@ -1107,6 +1221,25 @@ attribute-sets: type: u32 byte-order: big-endian enum: lookup-flags + - + name: expr-masq-attrs + attributes: + - + name: flags + type: u32 + byte-order: big-endian + enum: nat-range-flags + enum-as-flags: true + - + name: reg-proto-min + type: u32 + byte-order: big-endian + enum: registers + - + name: reg-proto-max + type: u32 + byte-order: big-endian + enum: registers - name: expr-meta-attrs attributes: @@ -1158,37 +1291,49 @@ attribute-sets: enum-as-flags: true - name: expr-payload-attrs + doc: nf_tables payload expression netlink attributes attributes: - name: dreg + doc: destination register to load data into type: u32 byte-order: big-endian + enum: registers - name: base + doc: payload base type: u32 + enum: payload-base byte-order: big-endian - name: offset + doc: payload offset relative to base type: u32 byte-order: big-endian - name: len + doc: payload length type: u32 byte-order: big-endian - name: sreg + doc: source register to load data from type: u32 byte-order: big-endian + enum: registers - name: csum-type + doc: checksum type type: u32 byte-order: big-endian - name: csum-offset + doc: checksum offset relative to base type: u32 byte-order: big-endian - name: csum-flags + doc: checksum flags type: u32 byte-order: big-endian - @@ -1254,6 +1399,61 @@ attribute-sets: type: u32 byte-order: big-endian doc: id of object map + - + name: compat-target-attrs + header: linux/netfilter/nf_tables_compat.h + attributes: + - + name: name + type: string + checks: + max-len: 32 + - + name: rev + type: u32 + byte-order: big-endian + checks: + max: 255 + - + name: info + type: binary + - + name: compat-match-attrs + header: linux/netfilter/nf_tables_compat.h + attributes: + - + name: name + type: string + checks: + max-len: 32 + - + name: rev + type: u32 + byte-order: big-endian + checks: + max: 255 + - + name: info + type: binary + - + name: compat-attrs + header: linux/netfilter/nf_tables_compat.h + attributes: + - + name: name + type: string + checks: + max-len: 32 + - + name: rev + type: u32 + byte-order: big-endian + checks: + max: 255 + - + name: type + type: u32 + byte-order: big-endian sub-messages: - From 27c7ee6d26ddd1a769bdcfb22862ef443da461e8 Mon Sep 17 00:00:00 2001 From: "Remy D. Farley" Date: Tue, 3 Mar 2026 19:59:19 +0000 Subject: [PATCH 0314/1561] doc/netlink: nftables: Add sub-messages New sub-messsages: - log - match - numgen - range Reviewed-by: Donald Hunter Signed-off-by: Remy D. Farley Link: https://patch.msgid.link/20260303195638.381642-5-one-d-wide@protonmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/nftables.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Documentation/netlink/specs/nftables.yaml b/Documentation/netlink/specs/nftables.yaml index ba62adfb4c63..086b16b12b0f 100644 --- a/Documentation/netlink/specs/nftables.yaml +++ b/Documentation/netlink/specs/nftables.yaml @@ -1480,15 +1480,24 @@ sub-messages: - value: immediate attribute-set: expr-immediate-attrs + - + value: log + attribute-set: log-attrs - value: lookup attribute-set: expr-lookup-attrs + - + value: match + attribute-set: compat-match-attrs - value: meta attribute-set: expr-meta-attrs - value: nat attribute-set: expr-nat-attrs + - + value: numgen + attribute-set: numgen-attrs - value: objref attribute-set: expr-objref-attrs @@ -1498,6 +1507,9 @@ sub-messages: - value: quota attribute-set: quota-attrs + - + value: range + attribute-set: range-attrs - value: reject attribute-set: expr-reject-attrs @@ -1507,6 +1519,9 @@ sub-messages: - value: tproxy attribute-set: expr-tproxy-attrs + # There're more sub-messages to go: + # grep -A10 nft_expr_type + # and look for .name\s*=\s*"..." - name: obj-data formats: From 568b370f128ce328cf3750eda5a84080043f97d6 Mon Sep 17 00:00:00 2001 From: "Remy D. Farley" Date: Tue, 3 Mar 2026 20:00:12 +0000 Subject: [PATCH 0315/1561] doc/netlink: nftables: Fill out operation attributes Filled out operation attributes: - newtable - gettable - deltable - destroytable - newchain - getchain - delchain - destroychain - newrule - getrule - getrule-reset - delrule - destroyrule - newset - getset - delset - destroyset - newsetelem - getsetelem - getsetelem-reset - delsetelem - destroysetelem - getgen - newobj - getobj - delobj - destroyobj - newflowtable - getflowtable - delflowtable - destroyflowtable Signed-off-by: Remy D. Farley Link: https://patch.msgid.link/20260303195638.381642-6-one-d-wide@protonmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/nftables.yaml | 285 ++++++++++++++++++---- 1 file changed, 243 insertions(+), 42 deletions(-) diff --git a/Documentation/netlink/specs/nftables.yaml b/Documentation/netlink/specs/nftables.yaml index 086b16b12b0f..21edf3d25f34 100644 --- a/Documentation/netlink/specs/nftables.yaml +++ b/Documentation/netlink/specs/nftables.yaml @@ -1568,7 +1568,10 @@ operations: request: value: 0xa00 attributes: + # Mentioned in nf_tables_newtable() - name + - flags + - userdata - name: gettable doc: Get / dump tables. @@ -1578,11 +1581,21 @@ operations: request: value: 0xa01 attributes: + # Mentioned in nf_tables_gettable() - name reply: value: 0xa00 - attributes: + attributes: &get-table + # Mentioned in nf_tables_fill_table_info() - name + - use + - handle + - flags + - owner + - userdata + dump: + reply: + attributes: *get-table - name: deltable doc: Delete an existing table. @@ -1591,8 +1604,10 @@ operations: do: request: value: 0xa02 - attributes: + attributes: &del-table + # Mentioned in nf_tables_deltable() - name + - handle - name: destroytable doc: | @@ -1603,8 +1618,7 @@ operations: do: request: value: 0xa1a - attributes: - - name + attributes: *del-table - name: newchain doc: Create a new chain. @@ -1614,7 +1628,19 @@ operations: request: value: 0xa03 attributes: + # Mentioned in nf_tables_newchain() + - table + - handle + - policy + - flags + # Mentioned in nf_tables_updchain() + - hook - name + - counters + # Mentioned in nf_tables_addchain() + - userdata + # Mentioned in nft_chain_parse_hook() + - type - name: getchain doc: Get / dump chains. @@ -1624,11 +1650,27 @@ operations: request: value: 0xa04 attributes: + # Mentioned in nf_tables_getchain() + - table - name reply: value: 0xa03 - attributes: + attributes: &get-chain + # Mentioned in nf_tables_fill_chain_info() + - table - name + - handle + - hook + - policy + - type + - flags + - counters + - id + - use + - userdata + dump: + reply: + attributes: *get-chain - name: delchain doc: Delete an existing chain. @@ -1637,8 +1679,12 @@ operations: do: request: value: 0xa05 - attributes: + attributes: &del-chain + # Mentioned in nf_tables_delchain() + - table + - handle - name + - hook - name: destroychain doc: | @@ -1649,8 +1695,7 @@ operations: do: request: value: 0xa1b - attributes: - - name + attributes: *del-chain - name: newrule doc: Create a new rule. @@ -1660,7 +1705,16 @@ operations: request: value: 0xa06 attributes: - - name + # Mentioned in nf_tables_newrule() + - table + - chain + - chain-id + - handle + - position + - position-id + - expressions + - userdata + - compat - name: getrule doc: Get / dump rules. @@ -1669,12 +1723,30 @@ operations: do: request: value: 0xa07 - attributes: - - name + attributes: &get-rule-request + # Mentioned in nf_tables_getrule_single() + - table + - chain + - handle reply: value: 0xa06 + attributes: &get-rule + # Mentioned in nf_tables_fill_rule_info() + - table + - chain + - handle + - position + - expressions + - userdata + dump: + request: attributes: - - name + # Mentioned in nf_tables_dump_rules_start() + - table + - chain + reply: + attributes: *get-rule + - name: getrule-reset doc: Get / dump rules and reset stateful expressions. @@ -1683,12 +1755,15 @@ operations: do: request: value: 0xa19 - attributes: - - name + attributes: *get-rule-request reply: value: 0xa06 - attributes: - - name + attributes: *get-rule + dump: + request: + attributes: *get-rule-request + reply: + attributes: *get-rule - name: delrule doc: Delete an existing rule. @@ -1697,8 +1772,11 @@ operations: do: request: value: 0xa08 - attributes: - - name + attributes: &del-rule + - table + - chain + - handle + - id - name: destroyrule doc: | @@ -1708,8 +1786,7 @@ operations: do: request: value: 0xa1c - attributes: - - name + attributes: *del-rule - name: newset doc: Create a new set. @@ -1719,7 +1796,21 @@ operations: request: value: 0xa09 attributes: + # Mentioned in nf_tables_newset() + - table - name + - key-len + - id + - key-type + - flags + - data-type + - data-len + - obj-type + - timeout + - gc-interval + - policy + - desc + - userdata - name: getset doc: Get / dump sets. @@ -1729,11 +1820,35 @@ operations: request: value: 0xa0a attributes: + # Mentioned in nf_tables_getset() + - table - name reply: value: 0xa09 - attributes: + attributes: &get-set + # Mentioned in nf_tables_fill_set() + - table - name + - handle + - flags + - key-len + - key-type + - data-type + - data-len + - obj-type + - gc-interval + - policy + - userdata + - desc + - expr + - expressions + dump: + request: + attributes: + # Mentioned in nf_tables_getset() + - table + reply: + attributes: *get-set - name: delset doc: Delete an existing set. @@ -1742,7 +1857,10 @@ operations: do: request: value: 0xa0b - attributes: + attributes: &del-set + # Mentioned in nf_tables_delset() + - table + - handle - name - name: destroyset @@ -1753,8 +1871,7 @@ operations: do: request: value: 0xa1d - attributes: - - name + attributes: *del-set - name: newsetelem doc: Create a new set element. @@ -1764,7 +1881,11 @@ operations: request: value: 0xa0c attributes: - - name + # Mentioned in nf_tables_newsetelem() + - table + - set + - set-id + - elements - name: getsetelem doc: Get / dump set elements. @@ -1774,11 +1895,27 @@ operations: request: value: 0xa0d attributes: - - name + # Mentioned in nf_tables_getsetelem() + - table + - set + - elements reply: value: 0xa0c attributes: - - name + # Mentioned in nf_tables_fill_setelem_info() + - elements + dump: + request: + attributes: &dump-set-request + # Mentioned in nft_set_dump_ctx_init() + - table + - set + reply: + attributes: &dump-set + # Mentioned in nf_tables_dump_set() + - table + - set + - elements - name: getsetelem-reset doc: Get / dump set elements and reset stateful expressions. @@ -1788,11 +1925,20 @@ operations: request: value: 0xa21 attributes: - - name + # Mentioned in nf_tables_getsetelem_reset() + - elements reply: value: 0xa0c attributes: - - name + # Mentioned in nf_tables_dumpreset_set() + - table + - set + - elements + dump: + request: + attributes: *dump-set-request + reply: + attributes: *dump-set - name: delsetelem doc: Delete an existing set element. @@ -1801,8 +1947,11 @@ operations: do: request: value: 0xa0e - attributes: - - name + attributes: &del-setelem + # Mentioned in nf_tables_delsetelem() + - table + - set + - elements - name: destroysetelem doc: Delete an existing set element with destroy semantics. @@ -1811,8 +1960,7 @@ operations: do: request: value: 0xa1e - attributes: - - name + attributes: *del-setelem - name: getgen doc: Get / dump rule-set generation. @@ -1821,12 +1969,16 @@ operations: do: request: value: 0xa10 - attributes: - - name reply: value: 0xa0f - attributes: - - name + attributes: &get-gen + # Mentioned in nf_tables_fill_gen_info() + - id + - proc-pid + - proc-name + dump: + reply: + attributes: *get-gen - name: newobj doc: Create a new stateful object. @@ -1836,7 +1988,12 @@ operations: request: value: 0xa12 attributes: + # Mentioned in nf_tables_newobj() + - type - name + - data + - table + - userdata - name: getobj doc: Get / dump stateful objects. @@ -1846,11 +2003,29 @@ operations: request: value: 0xa13 attributes: + # Mentioned in nf_tables_getobj_single() - name + - type + - table reply: value: 0xa12 - attributes: + attributes: &obj-info + # Mentioned in nf_tables_fill_obj_info() + - table - name + - type + - handle + - use + - data + - userdata + dump: + request: + attributes: + # Mentioned in nf_tables_dump_obj_start() + - table + - type + reply: + attributes: *obj-info - name: delobj doc: Delete an existing stateful object. @@ -1860,7 +2035,11 @@ operations: request: value: 0xa14 attributes: + # Mentioned in nf_tables_delobj() + - table - name + - type + - handle - name: destroyobj doc: Delete an existing stateful object with destroy semantics. @@ -1870,7 +2049,11 @@ operations: request: value: 0xa1f attributes: + # Mentioned in nf_tables_delobj() + - table - name + - type + - handle - name: newflowtable doc: Create a new flow table. @@ -1880,7 +2063,11 @@ operations: request: value: 0xa16 attributes: + # Mentioned in nf_tables_newflowtable() + - table - name + - hook + - flags - name: getflowtable doc: Get / dump flow tables. @@ -1890,11 +2077,22 @@ operations: request: value: 0xa17 attributes: + # Mentioned in nf_tables_getflowtable() - name + - table reply: value: 0xa16 - attributes: + attributes: &flowtable-info + # Mentioned in nf_tables_fill_flowtable_info() + - table - name + - handle + - use + - flags + - hook + dump: + reply: + attributes: *flowtable-info - name: delflowtable doc: Delete an existing flow table. @@ -1903,8 +2101,12 @@ operations: do: request: value: 0xa18 - attributes: + attributes: &del-flowtable + # Mentioned in nf_tables_delflowtable() + - table - name + - handle + - hook - name: destroyflowtable doc: Delete an existing flow table with destroy semantics. @@ -1913,8 +2115,7 @@ operations: do: request: value: 0xa20 - attributes: - - name + attributes: *del-flowtable mcast-groups: list: From d4d8c6e6fd2a1c5144339884ca5f66e654ad54a5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 3 Mar 2026 23:54:16 +0000 Subject: [PATCH 0316/1561] tcp: Initialise ehash secrets during connect() and listen(). inet_ehashfn() and inet6_ehashfn() initialise random secrets on the first call by net_get_random_once(). While the init part is patched out using static keys, with CONFIG_STACKPROTECTOR_STRONG=y, this causes a compiler to generate a stack canary due to an automatic variable, unsigned long ___flags, in the DO_ONCE() macro being passed to __do_once_start(). With FDO, this is visible in __inet_lookup_established() and __inet6_lookup_established() too. Let's initialise the secrets by get_random_sleepable_once() in the slow paths: inet_hash() for listen(), and inet_hash_connect() and inet6_hash_connect() for connect(). Note that IPv6 listener will initialise both IPv4 & IPv6 secrets in inet_hash() for IPv4-mapped IPv6 address. With the patch, the stack size is reduced by 16 bytes (___flags + a stack canary) and NOPs for the static key go away. Before: __inet6_lookup_established() ... push %rbx sub $0x38,%rsp # stack is 56 bytes mov %edx,%ebx # sport mov %gs:0x299419f(%rip),%rax # load stack canary mov %rax,0x30(%rsp) and store it onto stack mov 0x440(%rdi),%r15 # net->ipv4.tcp_death_row.hashinfo nop 32: mov %r8d,%ebp # hnum shl $0x10,%ebp # hnum << 16 nop 3d: mov 0x70(%rsp),%r14d # sdif or %ebx,%ebp # INET_COMBINED_PORTS(sport, hnum) mov 0x11a8382(%rip),%eax # inet6_ehashfn() ... After: __inet6_lookup_established() ... push %rbx sub $0x28,%rsp # stack is 40 bytes mov 0x60(%rsp),%ebp # sdif mov %r8d,%r14d # hnum shl $0x10,%r14d # hnum << 16 or %edx,%r14d # INET_COMBINED_PORTS(sport, hnum) mov 0x440(%rdi),%rax # net->ipv4.tcp_death_row.hashinfo mov 0x1194f09(%rip),%r10d # inet6_ehashfn() ... Suggested-by: Eric Dumazet Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260303235424.3877267-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/net.h | 2 ++ include/net/inet6_hashtables.h | 2 ++ net/ipv4/inet_hashtables.c | 17 +++++++++++++++-- net/ipv6/inet6_hashtables.c | 13 ++++++++++--- 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/include/linux/net.h b/include/linux/net.h index f58b38ab37f8..a8e818de95b3 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -304,6 +304,8 @@ do { \ #define net_get_random_once(buf, nbytes) \ get_random_once((buf), (nbytes)) +#define net_get_random_sleepable_once(buf, nbytes) \ + get_random_sleepable_once((buf), (nbytes)) /* * E.g. XFS meta- & log-data is in slab pages, or bcache meta diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index c16de5b7963f..2cc5d416bbb5 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -24,6 +24,8 @@ struct inet_hashinfo; +void inet6_init_ehash_secret(void); + static inline unsigned int __inet6_ehashfn(const u32 lhash, const u16 lport, const u32 fhash, diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 61e654b395be..ac7b67c603b5 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -30,12 +30,16 @@ #include #include +static void inet_init_ehash_secret(void) +{ + net_get_random_sleepable_once(&inet_ehash_secret, + sizeof(inet_ehash_secret)); +} + u32 inet_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport) { - net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret)); - return lport + __inet_ehashfn(laddr, 0, faddr, fport, inet_ehash_secret + net_hash_mix(net)); } @@ -793,6 +797,13 @@ int inet_hash(struct sock *sk) local_bh_enable(); return 0; } + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + inet6_init_ehash_secret(); +#endif + inet_init_ehash_secret(); + WARN_ON(!sk_unhashed(sk)); ilb2 = inet_lhash2_bucket_sk(hashinfo, sk); @@ -1239,6 +1250,8 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, if (!inet_sk(sk)->inet_num) port_offset = inet_sk_port_offset(sk); + inet_init_ehash_secret(); + hash_port0 = inet_ehashfn(net, inet->inet_rcv_saddr, 0, inet->inet_daddr, inet->inet_dport); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 182d38e6d6d8..72bc68fef48a 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -23,15 +23,20 @@ #include #include +void inet6_init_ehash_secret(void) +{ + net_get_random_sleepable_once(&inet6_ehash_secret, + sizeof(inet6_ehash_secret)); + net_get_random_sleepable_once(&tcp_ipv6_hash_secret, + sizeof(tcp_ipv6_hash_secret)); +} + u32 inet6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport) { u32 lhash, fhash; - net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret)); - net_get_random_once(&tcp_ipv6_hash_secret, sizeof(tcp_ipv6_hash_secret)); - lhash = (__force u32)laddr->s6_addr32[3]; fhash = __ipv6_addr_jhash(faddr, tcp_ipv6_hash_secret); @@ -363,6 +368,8 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row, if (!inet_sk(sk)->inet_num) port_offset = inet6_sk_port_offset(sk); + inet6_init_ehash_secret(); + hash_port0 = inet6_ehashfn(net, daddr, 0, saddr, inet->inet_dport); return __inet_hash_connect(death_row, sk, port_offset, hash_port0, From 237577e603cedc8910fc61c774b47eb1fc18eef9 Mon Sep 17 00:00:00 2001 From: Nicolai Buchwitz Date: Wed, 4 Mar 2026 11:54:28 +0100 Subject: [PATCH 0317/1561] net: cadence: macb: add EEE LPI statistics counters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GEM MAC provides four read-only, clear-on-read LPI statistics registers at offsets 0x270-0x27c: GEM_RXLPI (0x270): RX LPI transition count (16-bit) GEM_RXLPITIME (0x274): cumulative RX LPI time (24-bit) GEM_TXLPI (0x278): TX LPI transition count (16-bit) GEM_TXLPITIME (0x27c): cumulative TX LPI time (24-bit) Add register offset definitions, extend struct gem_stats with corresponding u64 software accumulators, and register the four counters in gem_statistics[] so they appear in ethtool -S output. Because the hardware counters clear on read, the existing macb_update_stats() path accumulates them into the u64 fields on every stats poll, preventing loss between userspace reads. These registers are present on SAMA5D2, SAME70, PIC32CZ, and RP1 variants of the Cadence GEM IP and have been confirmed on RP1 via devmem reads. Reviewed-by: Claudiu Beznea Reviewed-by: Théo Lebrun Signed-off-by: Nicolai Buchwitz Link: https://patch.msgid.link/20260304105432.631186-2-nb@tipi-net.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 87414a2ddf6e..19aa98d01c8c 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -170,6 +170,10 @@ #define GEM_PCSANNPTX 0x021c /* PCS AN Next Page TX */ #define GEM_PCSANNPLP 0x0220 /* PCS AN Next Page LP */ #define GEM_PCSANEXTSTS 0x023c /* PCS AN Extended Status */ +#define GEM_RXLPI 0x0270 /* RX LPI Transitions */ +#define GEM_RXLPITIME 0x0274 /* RX LPI Time */ +#define GEM_TXLPI 0x0278 /* TX LPI Transitions */ +#define GEM_TXLPITIME 0x027c /* TX LPI Time */ #define GEM_DCFG1 0x0280 /* Design Config 1 */ #define GEM_DCFG2 0x0284 /* Design Config 2 */ #define GEM_DCFG3 0x0288 /* Design Config 3 */ @@ -1043,6 +1047,10 @@ struct gem_stats { u64 rx_ip_header_checksum_errors; u64 rx_tcp_checksum_errors; u64 rx_udp_checksum_errors; + u64 rx_lpi_transitions; + u64 rx_lpi_time; + u64 tx_lpi_transitions; + u64 tx_lpi_time; }; /* Describes the name and offset of an individual statistic register, as @@ -1142,6 +1150,10 @@ static const struct gem_statistic gem_statistics[] = { GEM_BIT(NDS_RXERR)), GEM_STAT_TITLE_BITS(RXUDPCCNT, "rx_udp_checksum_errors", GEM_BIT(NDS_RXERR)), + GEM_STAT_TITLE(RXLPI, "rx_lpi_transitions"), + GEM_STAT_TITLE(RXLPITIME, "rx_lpi_time"), + GEM_STAT_TITLE(TXLPI, "tx_lpi_transitions"), + GEM_STAT_TITLE(TXLPITIME, "tx_lpi_time"), }; #define GEM_STATS_LEN ARRAY_SIZE(gem_statistics) From 0cc425f18f59f992c61c1802331d25ce689ff5d1 Mon Sep 17 00:00:00 2001 From: Nicolai Buchwitz Date: Wed, 4 Mar 2026 11:54:29 +0100 Subject: [PATCH 0318/1561] net: cadence: macb: implement EEE TX LPI support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GEM MAC has hardware LPI registers (NCR bit 19: TXLPIEN) but no built-in idle timer, so asserting TXLPIEN blocks all TX immediately with no automatic wake. A software idle timer is required, as noted in Microchip documentation (section 40.6.19): "It is best to use firmware to control LPI." Implement phylink managed EEE using the mac_enable_tx_lpi and mac_disable_tx_lpi callbacks: - macb_tx_lpi_set(): sets or clears TXLPIEN; requires bp->lock to be held by the caller (asserted with lockdep_assert_held). Returns bool indicating whether the register actually changed, avoiding redundant writes and unnecessary udelay on the xmit fast path. - macb_tx_lpi_work_fn(): delayed_work handler that enters LPI if all TX queues are idle and EEE is still active. Takes bp->lock with irqsave before calling macb_tx_lpi_set(). - macb_tx_lpi_schedule(): arms the work timer using the LPI timer value provided by phylink (default 250 ms). Called from macb_tx_complete() after each TX drain so the idle countdown restarts whenever the ring goes quiet. - macb_tx_lpi_wake(): called from macb_start_xmit() under bp->lock, immediately before TSTART. Returns early if eee_active is false to avoid a register read on the common path when EEE is disabled. Clears TXLPIEN and applies a 50 us udelay for PHY wake (IEEE 802.3az Tw_sys_tx is 16.5 us for 1000BASE-T / 30 us for 100BASE-TX; GEM has no hardware enforcement). Only delays when TXLPIEN was actually set. The delay is placed after tx_head is advanced so the work_fn's queue-idle check sees a non-empty ring and cannot race back into LPI before the frame is transmitted. - mac_enable_tx_lpi: stores the timer and sets eee_active under bp->lock, then defers the first LPI entry by 1 second per IEEE 802.3az section 22.7a. - mac_disable_tx_lpi: cancels the work (sync, without the lock to avoid deadlock with the work_fn), then takes bp->lock to clear eee_active and deassert TXLPIEN. Populate phylink_config lpi_interfaces (MII, GMII, RGMII variants) and lpi_capabilities (MAC_100FD | MAC_1000FD) so phylink can negotiate EEE with the PHY and call the callbacks appropriately. Set lpi_timer_default to 250000 us and eee_enabled_default to true. Reviewed-by: Claudiu Beznea Reviewed-by: Théo Lebrun Signed-off-by: Nicolai Buchwitz Link: https://patch.msgid.link/20260304105432.631186-3-nb@tipi-net.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb.h | 8 ++ drivers/net/ethernet/cadence/macb_main.c | 124 +++++++++++++++++++++++ 2 files changed, 132 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 19aa98d01c8c..c69828b27dae 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -309,6 +309,8 @@ #define MACB_IRXFCS_SIZE 1 /* GEM specific NCR bitfields. */ +#define GEM_TXLPIEN_OFFSET 19 +#define GEM_TXLPIEN_SIZE 1 #define GEM_ENABLE_HS_MAC_OFFSET 31 #define GEM_ENABLE_HS_MAC_SIZE 1 @@ -783,6 +785,7 @@ #define MACB_CAPS_DMA_PTP BIT(22) #define MACB_CAPS_RSC BIT(23) #define MACB_CAPS_NO_LSO BIT(24) +#define MACB_CAPS_EEE BIT(25) /* LSO settings */ #define MACB_LSO_UFO_ENABLE 0x01 @@ -1369,6 +1372,11 @@ struct macb { struct work_struct hresp_err_bh_work; + /* EEE / LPI state */ + bool eee_active; + struct delayed_work tx_lpi_work; + u32 tx_lpi_timer; + int rx_bd_rd_prefetch; int tx_bd_rd_prefetch; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 17f0ad3d7a09..4e776c67f408 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -621,6 +622,107 @@ static const struct phylink_pcs_ops macb_phylink_pcs_ops = { .pcs_config = macb_pcs_config, }; +static bool macb_tx_lpi_set(struct macb *bp, bool enable) +{ + u32 old, ncr; + + lockdep_assert_held(&bp->lock); + + ncr = macb_readl(bp, NCR); + old = ncr; + if (enable) + ncr |= GEM_BIT(TXLPIEN); + else + ncr &= ~GEM_BIT(TXLPIEN); + if (old != ncr) + macb_writel(bp, NCR, ncr); + + return old != ncr; +} + +static bool macb_tx_all_queues_idle(struct macb *bp) +{ + struct macb_queue *queue; + unsigned int q; + + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { + if (READ_ONCE(queue->tx_head) != READ_ONCE(queue->tx_tail)) + return false; + } + return true; +} + +static void macb_tx_lpi_work_fn(struct work_struct *work) +{ + struct macb *bp = container_of(work, struct macb, tx_lpi_work.work); + unsigned long flags; + + spin_lock_irqsave(&bp->lock, flags); + if (bp->eee_active && macb_tx_all_queues_idle(bp)) + macb_tx_lpi_set(bp, true); + spin_unlock_irqrestore(&bp->lock, flags); +} + +static void macb_tx_lpi_schedule(struct macb *bp) +{ + if (bp->eee_active) + mod_delayed_work(system_wq, &bp->tx_lpi_work, + usecs_to_jiffies(bp->tx_lpi_timer)); +} + +/* Wake from LPI before transmitting. The MAC must deassert TXLPIEN + * and wait for the PHY to exit LPI before any frame can be sent. + * IEEE 802.3az Tw_sys is ~17us for 1000BASE-T, ~30us for 100BASE-TX; + * we use a conservative 50us. + */ +static void macb_tx_lpi_wake(struct macb *bp) +{ + lockdep_assert_held(&bp->lock); + + if (!bp->eee_active) + return; + + if (!macb_tx_lpi_set(bp, false)) + return; + + cancel_delayed_work(&bp->tx_lpi_work); + udelay(50); +} + +static void macb_mac_disable_tx_lpi(struct phylink_config *config) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct macb *bp = netdev_priv(ndev); + unsigned long flags; + + cancel_delayed_work_sync(&bp->tx_lpi_work); + + spin_lock_irqsave(&bp->lock, flags); + bp->eee_active = false; + macb_tx_lpi_set(bp, false); + spin_unlock_irqrestore(&bp->lock, flags); +} + +static int macb_mac_enable_tx_lpi(struct phylink_config *config, u32 timer, + bool tx_clk_stop) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct macb *bp = netdev_priv(ndev); + unsigned long flags; + + spin_lock_irqsave(&bp->lock, flags); + bp->tx_lpi_timer = timer; + bp->eee_active = true; + spin_unlock_irqrestore(&bp->lock, flags); + + /* Defer initial LPI entry by 1 second after link-up per + * IEEE 802.3az section 22.7a. + */ + mod_delayed_work(system_wq, &bp->tx_lpi_work, msecs_to_jiffies(1000)); + + return 0; +} + static void macb_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) { @@ -769,6 +871,8 @@ static const struct phylink_mac_ops macb_phylink_ops = { .mac_config = macb_mac_config, .mac_link_down = macb_mac_link_down, .mac_link_up = macb_mac_link_up, + .mac_disable_tx_lpi = macb_mac_disable_tx_lpi, + .mac_enable_tx_lpi = macb_mac_enable_tx_lpi, }; static bool macb_phy_handle_exists(struct device_node *dn) @@ -864,6 +968,18 @@ static int macb_mii_probe(struct net_device *dev) } } + /* Configure EEE LPI if supported */ + if (bp->caps & MACB_CAPS_EEE) { + __set_bit(PHY_INTERFACE_MODE_MII, + bp->phylink_config.lpi_interfaces); + __set_bit(PHY_INTERFACE_MODE_GMII, + bp->phylink_config.lpi_interfaces); + phy_interface_set_rgmii(bp->phylink_config.lpi_interfaces); + bp->phylink_config.lpi_capabilities = MAC_100FD | MAC_1000FD; + bp->phylink_config.lpi_timer_default = 250000; + bp->phylink_config.eee_enabled_default = true; + } + bp->phylink = phylink_create(&bp->phylink_config, bp->pdev->dev.fwnode, bp->phy_interface, &macb_phylink_ops); if (IS_ERR(bp->phylink)) { @@ -1260,6 +1376,9 @@ static int macb_tx_complete(struct macb_queue *queue, int budget) netif_wake_subqueue(bp->dev, queue_index); spin_unlock_irqrestore(&queue->tx_ptr_lock, flags); + if (packets) + macb_tx_lpi_schedule(bp); + return packets; } @@ -2366,6 +2485,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev) skb->len); spin_lock(&bp->lock); + macb_tx_lpi_wake(bp); macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); spin_unlock(&bp->lock); @@ -3026,6 +3146,8 @@ static int macb_close(struct net_device *dev) netdev_tx_reset_queue(netdev_get_tx_queue(dev, q)); } + cancel_delayed_work_sync(&bp->tx_lpi_work); + phylink_stop(bp->phylink); phylink_disconnect_phy(bp->phylink); @@ -5629,6 +5751,7 @@ static int macb_probe(struct platform_device *pdev) } INIT_WORK(&bp->hresp_err_bh_work, macb_hresp_error_task); + INIT_DELAYED_WORK(&bp->tx_lpi_work, macb_tx_lpi_work_fn); netdev_info(dev, "Cadence %s rev 0x%08x at 0x%08lx irq %d (%pM)\n", macb_is_gem(bp) ? "GEM" : "MACB", macb_readl(bp, MID), @@ -5672,6 +5795,7 @@ static void macb_remove(struct platform_device *pdev) mdiobus_free(bp->mii_bus); device_set_wakeup_enable(&bp->pdev->dev, 0); + cancel_delayed_work_sync(&bp->tx_lpi_work); cancel_work_sync(&bp->hresp_err_bh_work); pm_runtime_disable(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); From 61332b78761cb1638e2bf29273050cf009fbbc0e Mon Sep 17 00:00:00 2001 From: Nicolai Buchwitz Date: Wed, 4 Mar 2026 11:54:30 +0100 Subject: [PATCH 0319/1561] net: cadence: macb: add ethtool EEE support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement get_eee and set_eee ethtool ops for GEM as simple passthroughs to phylink_ethtool_get_eee() and phylink_ethtool_set_eee(). No MACB_CAPS_EEE guard is needed: phylink returns -EOPNOTSUPP from both ops when mac_supports_eee is false, which is the case when lpi_capabilities and lpi_interfaces are not populated. Those fields are only set when MACB_CAPS_EEE is present (previous patch), so phylink already handles the unsupported case correctly. Reviewed-by: Claudiu Beznea Reviewed-by: Théo Lebrun Signed-off-by: Nicolai Buchwitz Link: https://patch.msgid.link/20260304105432.631186-4-nb@tipi-net.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 4e776c67f408..f01366394cff 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4058,6 +4058,20 @@ static const struct ethtool_ops macb_ethtool_ops = { .set_ringparam = macb_set_ringparam, }; +static int macb_get_eee(struct net_device *dev, struct ethtool_keee *eee) +{ + struct macb *bp = netdev_priv(dev); + + return phylink_ethtool_get_eee(bp->phylink, eee); +} + +static int macb_set_eee(struct net_device *dev, struct ethtool_keee *eee) +{ + struct macb *bp = netdev_priv(dev); + + return phylink_ethtool_set_eee(bp->phylink, eee); +} + static const struct ethtool_ops gem_ethtool_ops = { .get_regs_len = macb_get_regs_len, .get_regs = macb_get_regs, @@ -4080,6 +4094,8 @@ static const struct ethtool_ops gem_ethtool_ops = { .set_rxnfc = gem_set_rxnfc, .get_rx_ring_count = gem_get_rx_ring_count, .nway_reset = phy_ethtool_nway_reset, + .get_eee = macb_get_eee, + .set_eee = macb_set_eee, }; static int macb_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) From 92ba3307431a1756a3c8094787527a38ce19623d Mon Sep 17 00:00:00 2001 From: Nicolai Buchwitz Date: Wed, 4 Mar 2026 11:54:31 +0100 Subject: [PATCH 0320/1561] net: cadence: macb: enable EEE for Raspberry Pi RP1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set MACB_CAPS_EEE for the Raspberry Pi 5 RP1 southbridge (Cadence GEM_GXL rev 0x00070109 paired with BCM54213PE PHY). EEE has been verified on RP1 hardware: the LPI counter registers at 0x270-0x27c return valid data, the TXLPIEN bit in NCR (bit 19) controls LPI transmission correctly, and ethtool --show-eee reports the negotiated state after link-up. Other GEM variants that share the same LPI register layout (SAMA5D2, SAME70, PIC32CZ) can be enabled by adding MACB_CAPS_EEE to their respective config entries once tested. Reviewed-by: Claudiu Beznea Reviewed-by: Théo Lebrun Signed-off-by: Nicolai Buchwitz Link: https://patch.msgid.link/20260304105432.631186-5-nb@tipi-net.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index f01366394cff..5933a5042bc8 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5537,7 +5537,8 @@ static const struct macb_config eyeq5_config = { static const struct macb_config raspberrypi_rp1_config = { .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_CLK_HW_CHG | MACB_CAPS_JUMBO | - MACB_CAPS_GEM_HAS_PTP, + MACB_CAPS_GEM_HAS_PTP | + MACB_CAPS_EEE, .dma_burst_length = 16, .clk_init = macb_clk_init, .init = macb_init, From 48575b6e16d12365d6a69de13d061d9df2b775ec Mon Sep 17 00:00:00 2001 From: Nicolai Buchwitz Date: Wed, 4 Mar 2026 11:54:32 +0100 Subject: [PATCH 0321/1561] net: cadence: macb: enable EEE for Mobileye EyeQ5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set MACB_CAPS_EEE for the Mobileye EyeQ5 GEM instance. EEE has been verified on EyeQ5 hardware using a loopback setup with ethtool --show-eee confirming EEE active on both ends at 100baseT/Full and 1000baseT/Full. Tested-by: Théo Lebrun Signed-off-by: Nicolai Buchwitz Link: https://patch.msgid.link/20260304105432.631186-6-nb@tipi-net.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 5933a5042bc8..3dcae4d5f74c 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5526,7 +5526,7 @@ static const struct macb_config versal_config = { static const struct macb_config eyeq5_config = { .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO | MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_QUEUE_DISABLE | - MACB_CAPS_NO_LSO, + MACB_CAPS_NO_LSO | MACB_CAPS_EEE, .dma_burst_length = 16, .clk_init = macb_clk_init, .init = eyeq5_init, From 7600fb3b41dd6ab65ed61169df1b6099044edf97 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 4 Mar 2026 11:56:47 +0100 Subject: [PATCH 0322/1561] net: airoha: Rely __field_prep for non-constant masks Rely on __field_prep macros for non-constant masks preparing the values for register updates instead of open-coding. Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260304-airoha-__field_prep-v1-1-b185facc4e2f@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 3779f93b47bc..09acbe669536 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -1727,7 +1727,7 @@ static int airhoha_set_gdm2_loopback(struct airoha_gdm_port *port) airoha_fe_rmw(eth, REG_SP_DFT_CPORT(src_port >> fls(SP_CPORT_DFT_MASK)), SP_CPORT_MASK(val), - FE_PSE_PORT_CDM2 << __ffs(SP_CPORT_MASK(val))); + __field_prep(SP_CPORT_MASK(val), FE_PSE_PORT_CDM2)); if (port->id != AIROHA_GDM3_IDX && airoha_is_7581(eth)) airoha_fe_rmw(eth, REG_SRC_PORT_FC_MAP6, @@ -1781,7 +1781,7 @@ static int airoha_dev_init(struct net_device *dev) ppe_id = pse_port == FE_PSE_PORT_PPE2 ? 1 : 0; airoha_fe_rmw(eth, REG_PPE_DFT_CPORT0(ppe_id), DFT_CPORT_MASK(port->id), - fe_cpu_port << __ffs(DFT_CPORT_MASK(port->id))); + __field_prep(DFT_CPORT_MASK(port->id), fe_cpu_port)); return 0; } @@ -2138,7 +2138,7 @@ static int airoha_qdma_set_chan_tx_sched(struct airoha_gdm_port *port, airoha_qdma_rmw(port->qdma, REG_CHAN_QOS_MODE(channel >> 3), CHAN_QOS_MODE_MASK(channel), - mode << __ffs(CHAN_QOS_MODE_MASK(channel))); + __field_prep(CHAN_QOS_MODE_MASK(channel), mode)); return 0; } From e5e09233e8a9179b260460fdb28df5c24bcfbed6 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Wed, 4 Mar 2026 15:10:09 +0100 Subject: [PATCH 0323/1561] tools: ynl: add uns-admin-perm to genetlink GENL_UNS_ADMIN_PERM may be required by protocols using the `genetlink` family, however, this flag is currently only allowed in `genetlink-legacy`. Add it to the list of possible values in genetlink.yaml too. Cc: Simon Horman Cc: Donald Hunter Link: https://github.com/OpenVPN/ovpn-net-next/issues/33 Suggested-by: Ralf Lici Signed-off-by: Antonio Quartulli Link: https://patch.msgid.link/20260304141020.23270-1-antonio@openvpn.net Signed-off-by: Jakub Kicinski --- Documentation/netlink/genetlink.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml index b020a537d8ac..a1194d5d93fc 100644 --- a/Documentation/netlink/genetlink.yaml +++ b/Documentation/netlink/genetlink.yaml @@ -262,7 +262,7 @@ properties: description: Command flags. type: array items: - enum: [ admin-perm ] + enum: [ admin-perm, uns-admin-perm ] dont-validate: description: Kernel attribute validation flags. type: array From 08e6183ed2568e733e05e7e1c9de737d91c21155 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Feb 2026 18:36:07 +0100 Subject: [PATCH 0324/1561] wifi: move action code from per-type frame structs The action code actually serves to identify the type of action frame, so it really isn't part of the per-type structure. Pull it out and have it in the general action frame format. In theory, whether or not the action code is present in this way is up to each category, but all categories that are defined right now all have that value. While at it, and since this change requires changing all users, remove the 'u' and make it an anonymous union in this case, so that all code using this changes. Change IEEE80211_MIN_ACTION_SIZE to take an argument which says how much of the frame is needed, e.g. category, action_code or the specific frame type that's defined in the union. Again this also ensures that all code is updated. In some cases, fix bugs where the SKB length was checked after having accessed beyond the checked length, in particular in FTM code, e.g. ieee80211_is_ftm(). Link: https://patch.msgid.link/20260226183607.67e71846b59e.I9a24328e3ffcaae179466a935f1c3345029f9961@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath11k/mac.c | 4 +- drivers/net/wireless/ath/ath12k/mac.c | 4 +- drivers/net/wireless/ath/ath12k/wifi7/hw.c | 2 +- .../wireless/intel/iwlwifi/mld/time_sync.c | 6 +- .../intel/iwlwifi/mvm/ftm-initiator.c | 7 +- .../wireless/intel/iwlwifi/mvm/time-sync.c | 6 +- drivers/net/wireless/marvell/mwifiex/tdls.c | 12 +- drivers/net/wireless/marvell/mwl8k.c | 4 +- .../wireless/mediatek/mt76/mt76_connac_mac.c | 6 +- .../net/wireless/mediatek/mt76/mt7925/mac.c | 4 +- .../net/wireless/mediatek/mt76/mt7996/mac.c | 4 +- drivers/net/wireless/realtek/rtl8xxxu/core.c | 14 +- drivers/net/wireless/realtek/rtlwifi/base.c | 28 ++-- drivers/net/wireless/realtek/rtlwifi/pci.c | 2 +- drivers/net/wireless/silabs/wfx/data_rx.c | 8 +- include/linux/ieee80211.h | 83 +++++------- net/mac80211/agg-rx.c | 27 ++-- net/mac80211/agg-tx.c | 28 ++-- net/mac80211/eht.c | 21 ++- net/mac80211/ht.c | 31 ++--- net/mac80211/ibss.c | 18 +-- net/mac80211/iface.c | 18 +-- net/mac80211/mesh.c | 14 +- net/mac80211/mesh_hwmp.c | 20 ++- net/mac80211/mesh_plink.c | 21 ++- net/mac80211/mlme.c | 82 +++++------- net/mac80211/rx.c | 123 ++++++++---------- net/mac80211/s1g.c | 28 ++-- net/mac80211/spectmgmt.c | 31 ++--- net/mac80211/tdls.c | 29 ++--- net/mac80211/util.c | 5 +- net/mac80211/vht.c | 10 +- 32 files changed, 308 insertions(+), 392 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index d1c121e943cb..a48b6bf1f29a 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -6288,10 +6288,10 @@ static int ath11k_mac_mgmt_action_frame_fill_elem_data(struct ath11k_vif *arvif, lockdep_assert_held(&ar->conf_mutex); /* make sure category field is present */ - if (skb->len < IEEE80211_MIN_ACTION_SIZE) + if (skb->len < IEEE80211_MIN_ACTION_SIZE(category)) return -EINVAL; - remaining_len = skb->len - IEEE80211_MIN_ACTION_SIZE; + remaining_len = skb->len - IEEE80211_MIN_ACTION_SIZE(category); has_protected = ieee80211_has_protected(hdr->frame_control); /* In case of SW crypto and hdr protected (PMF), packet will already be encrypted, diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index af7590639dbf..a03881c73d68 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -9119,10 +9119,10 @@ static int ath12k_mac_mgmt_action_frame_fill_elem_data(struct ath12k_link_vif *a lockdep_assert_wiphy(wiphy); /* make sure category field is present */ - if (skb->len < IEEE80211_MIN_ACTION_SIZE) + if (skb->len < IEEE80211_MIN_ACTION_SIZE(category)) return -EINVAL; - remaining_len = skb->len - IEEE80211_MIN_ACTION_SIZE; + remaining_len = skb->len - IEEE80211_MIN_ACTION_SIZE(category); has_protected = ieee80211_has_protected(hdr->frame_control); /* In case of SW crypto and hdr protected (PMF), packet will already be encrypted, diff --git a/drivers/net/wireless/ath/ath12k/wifi7/hw.c b/drivers/net/wireless/ath/ath12k/wifi7/hw.c index 27acdfc35459..ec6dba96640b 100644 --- a/drivers/net/wireless/ath/ath12k/wifi7/hw.c +++ b/drivers/net/wireless/ath/ath12k/wifi7/hw.c @@ -104,7 +104,7 @@ static bool ath12k_is_addba_resp_action_code(struct ieee80211_mgmt *mgmt) if (mgmt->u.action.category != WLAN_CATEGORY_BACK) return false; - if (mgmt->u.action.u.addba_resp.action_code != WLAN_ACTION_ADDBA_RESP) + if (mgmt->u.action.action_code != WLAN_ACTION_ADDBA_RESP) return false; return true; diff --git a/drivers/net/wireless/intel/iwlwifi/mld/time_sync.c b/drivers/net/wireless/intel/iwlwifi/mld/time_sync.c index 897ab65b71aa..474dd555e70b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/time_sync.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/time_sync.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2025 Intel Corporation + * Copyright (C) 2025-2026 Intel Corporation */ #include "mld.h" @@ -116,9 +116,9 @@ static bool iwl_mld_is_skb_match(struct sk_buff *skb, u8 *addr, u8 dialog_token) u8 skb_dialog_token; if (ieee80211_is_timing_measurement(skb)) - skb_dialog_token = mgmt->u.action.u.wnm_timing_msr.dialog_token; + skb_dialog_token = mgmt->u.action.wnm_timing_msr.dialog_token; else - skb_dialog_token = mgmt->u.action.u.ftm.dialog_token; + skb_dialog_token = mgmt->u.action.ftm.dialog_token; if ((ether_addr_equal(mgmt->sa, addr) || ether_addr_equal(mgmt->da, addr)) && diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c index ebc569e94f55..1b67836b1fac 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* * Copyright (C) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2025 Intel Corporation + * Copyright (C) 2018-2026 Intel Corporation */ #include #include @@ -1409,8 +1409,7 @@ void iwl_mvm_ftm_lc_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) struct iwl_mvm_loc_entry *entry; const u8 *ies, *lci, *civic, *msr_ie; size_t ies_len, lci_len = 0, civic_len = 0; - size_t baselen = IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.ftm); + size_t baselen = IEEE80211_MIN_ACTION_SIZE(ftm); static const u8 rprt_type_lci = IEEE80211_SPCT_MSR_RPRT_TYPE_LCI; static const u8 rprt_type_civic = IEEE80211_SPCT_MSR_RPRT_TYPE_CIVIC; @@ -1419,7 +1418,7 @@ void iwl_mvm_ftm_lc_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) lockdep_assert_held(&mvm->mutex); - ies = mgmt->u.action.u.ftm.variable; + ies = mgmt->u.action.ftm.variable; ies_len = len - baselen; msr_ie = cfg80211_find_ie_match(WLAN_EID_MEASURE_REPORT, ies, ies_len, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-sync.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-sync.c index edae3e24192b..039b4daac73f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-sync.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-sync.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022, 2026 Intel Corporation */ #include "mvm.h" @@ -18,9 +18,9 @@ static bool iwl_mvm_is_skb_match(struct sk_buff *skb, u8 *addr, u8 dialog_token) u8 skb_dialog_token; if (ieee80211_is_timing_measurement(skb)) - skb_dialog_token = mgmt->u.action.u.wnm_timing_msr.dialog_token; + skb_dialog_token = mgmt->u.action.wnm_timing_msr.dialog_token; else - skb_dialog_token = mgmt->u.action.u.ftm.dialog_token; + skb_dialog_token = mgmt->u.action.ftm.dialog_token; if ((ether_addr_equal(mgmt->sa, addr) || ether_addr_equal(mgmt->da, addr)) && diff --git a/drivers/net/wireless/marvell/mwifiex/tdls.c b/drivers/net/wireless/marvell/mwifiex/tdls.c index a4cf323e704b..845f2a22e071 100644 --- a/drivers/net/wireless/marvell/mwifiex/tdls.c +++ b/drivers/net/wireless/marvell/mwifiex/tdls.c @@ -755,16 +755,12 @@ mwifiex_construct_tdls_action_frame(struct mwifiex_private *priv, switch (action_code) { case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: /* See the layout of 'struct ieee80211_mgmt'. */ - extra = sizeof(mgmt->u.action.u.tdls_discover_resp) + - sizeof(mgmt->u.action.category); + extra = IEEE80211_MIN_ACTION_SIZE(tdls_discover_resp) - 24; skb_put(skb, extra); mgmt->u.action.category = WLAN_CATEGORY_PUBLIC; - mgmt->u.action.u.tdls_discover_resp.action_code = - WLAN_PUB_ACTION_TDLS_DISCOVER_RES; - mgmt->u.action.u.tdls_discover_resp.dialog_token = - dialog_token; - mgmt->u.action.u.tdls_discover_resp.capability = - cpu_to_le16(capab); + mgmt->u.action.action_code = WLAN_PUB_ACTION_TDLS_DISCOVER_RES; + mgmt->u.action.tdls_discover_resp.dialog_token = dialog_token; + mgmt->u.action.tdls_discover_resp.capability = cpu_to_le16(capab); /* move back for addr4 */ memmove(pos + ETH_ALEN, &mgmt->u.action, extra); /* init address 4 */ diff --git a/drivers/net/wireless/marvell/mwl8k.c b/drivers/net/wireless/marvell/mwl8k.c index 99321d180f34..b1af02180341 100644 --- a/drivers/net/wireless/marvell/mwl8k.c +++ b/drivers/net/wireless/marvell/mwl8k.c @@ -1985,9 +1985,9 @@ mwl8k_txq_xmit(struct ieee80211_hw *hw, */ if (unlikely(ieee80211_is_action(wh->frame_control) && mgmt->u.action.category == WLAN_CATEGORY_BACK && - mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ && + mgmt->u.action.action_code == WLAN_ACTION_ADDBA_REQ && priv->ap_fw)) { - u16 capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab); + u16 capab = le16_to_cpu(mgmt->u.action.addba_req.capab); tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; index = mwl8k_tid_queue_mapping(tid); } diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c index b41ca1410da9..f946ddc20a47 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c @@ -413,10 +413,10 @@ mt76_connac2_mac_write_txwi_80211(struct mt76_dev *dev, __le32 *txwi, u32 val; if (ieee80211_is_action(fc) && - skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 + 1 + 2 && + skb->len >= IEEE80211_MIN_ACTION_SIZE(addba_req.capab) && mgmt->u.action.category == WLAN_CATEGORY_BACK && - mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ) { - u16 capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab); + mgmt->u.action.action_code == WLAN_ACTION_ADDBA_REQ) { + u16 capab = le16_to_cpu(mgmt->u.action.addba_req.capab); txwi[5] |= cpu_to_le32(MT_TXD5_ADD_BA); tid = (capab >> 2) & IEEE80211_QOS_CTL_TID_MASK; diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c index 0d9435900423..caaf71c31480 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c @@ -668,9 +668,9 @@ mt7925_mac_write_txwi_80211(struct mt76_dev *dev, __le32 *txwi, u32 val; if (ieee80211_is_action(fc) && - skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 && + skb->len >= IEEE80211_MIN_ACTION_SIZE(action_code) && mgmt->u.action.category == WLAN_CATEGORY_BACK && - mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ) + mgmt->u.action.action_code == WLAN_ACTION_ADDBA_REQ) tid = MT_TX_ADDBA; else if (ieee80211_is_mgmt(hdr->frame_control)) tid = MT_TX_NORMAL; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index d4f3ee943b47..84cbf36b493c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -800,9 +800,9 @@ mt7996_mac_write_txwi_80211(struct mt7996_dev *dev, __le32 *txwi, u32 val; if (ieee80211_is_action(fc) && - skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 && + skb->len >= IEEE80211_MIN_ACTION_SIZE(action_code) && mgmt->u.action.category == WLAN_CATEGORY_BACK && - mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ) { + mgmt->u.action.action_code == WLAN_ACTION_ADDBA_REQ) { if (is_mt7990(&dev->mt76)) txwi[6] |= cpu_to_le32(FIELD_PREP(MT_TXD6_TID_ADDBA, tid)); else diff --git a/drivers/net/wireless/realtek/rtl8xxxu/core.c b/drivers/net/wireless/realtek/rtl8xxxu/core.c index 794187d28caa..804fc604e5f8 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/core.c @@ -5146,10 +5146,10 @@ static void rtl8xxxu_dump_action(struct device *dev, if (!(rtl8xxxu_debug & RTL8XXXU_DEBUG_ACTION)) return; - switch (mgmt->u.action.u.addba_resp.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_ACTION_ADDBA_RESP: - cap = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); - timeout = le16_to_cpu(mgmt->u.action.u.addba_resp.timeout); + cap = le16_to_cpu(mgmt->u.action.addba_resp.capab); + timeout = le16_to_cpu(mgmt->u.action.addba_resp.timeout); dev_info(dev, "WLAN_ACTION_ADDBA_RESP: " "timeout %i, tid %02x, buf_size %02x, policy %02x, " "status %02x\n", @@ -5157,11 +5157,11 @@ static void rtl8xxxu_dump_action(struct device *dev, (cap & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2, (cap & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6, (cap >> 1) & 0x1, - le16_to_cpu(mgmt->u.action.u.addba_resp.status)); + le16_to_cpu(mgmt->u.action.addba_resp.status)); break; case WLAN_ACTION_ADDBA_REQ: - cap = le16_to_cpu(mgmt->u.action.u.addba_req.capab); - timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout); + cap = le16_to_cpu(mgmt->u.action.addba_req.capab); + timeout = le16_to_cpu(mgmt->u.action.addba_req.timeout); dev_info(dev, "WLAN_ACTION_ADDBA_REQ: " "timeout %i, tid %02x, buf_size %02x, policy %02x\n", timeout, @@ -5171,7 +5171,7 @@ static void rtl8xxxu_dump_action(struct device *dev, break; default: dev_info(dev, "action frame %02x\n", - mgmt->u.action.u.addba_resp.action_code); + mgmt->u.action.action_code); break; } } diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index 0ac9cf0937aa..aad377864e73 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -1409,7 +1409,7 @@ bool rtl_action_proc(struct ieee80211_hw *hw, struct sk_buff *skb, u8 is_tx) sta_entry = (struct rtl_sta_info *)sta->drv_priv; capab = - le16_to_cpu(mgmt->u.action.u.addba_req.capab); + le16_to_cpu(mgmt->u.action.addba_req.capab); tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; if (tid >= MAX_TID_COUNT) { @@ -2392,35 +2392,35 @@ static struct sk_buff *rtl_make_smps_action(struct ieee80211_hw *hw, struct sk_buff *skb; struct ieee80211_mgmt *action_frame; - /* 27 = header + category + action + smps mode */ - skb = dev_alloc_skb(27 + hw->extra_tx_headroom); + skb = dev_alloc_skb(IEEE80211_MIN_ACTION_SIZE(ht_smps) + + hw->extra_tx_headroom); if (!skb) return NULL; skb_reserve(skb, hw->extra_tx_headroom); - action_frame = skb_put_zero(skb, 27); + action_frame = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE(ht_smps)); memcpy(action_frame->da, da, ETH_ALEN); memcpy(action_frame->sa, rtlefuse->dev_addr, ETH_ALEN); memcpy(action_frame->bssid, bssid, ETH_ALEN); action_frame->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); action_frame->u.action.category = WLAN_CATEGORY_HT; - action_frame->u.action.u.ht_smps.action = WLAN_HT_ACTION_SMPS; + action_frame->u.action.action_code = WLAN_HT_ACTION_SMPS; switch (smps) { case IEEE80211_SMPS_AUTOMATIC:/* 0 */ case IEEE80211_SMPS_NUM_MODES:/* 4 */ WARN_ON(1); fallthrough; case IEEE80211_SMPS_OFF:/* 1 */ /*MIMO_PS_NOLIMIT*/ - action_frame->u.action.u.ht_smps.smps_control = + action_frame->u.action.ht_smps.smps_control = WLAN_HT_SMPS_CONTROL_DISABLED;/* 0 */ break; case IEEE80211_SMPS_STATIC:/* 2 */ /*MIMO_PS_STATIC*/ - action_frame->u.action.u.ht_smps.smps_control = + action_frame->u.action.ht_smps.smps_control = WLAN_HT_SMPS_CONTROL_STATIC;/* 1 */ break; case IEEE80211_SMPS_DYNAMIC:/* 3 */ /*MIMO_PS_DYNAMIC*/ - action_frame->u.action.u.ht_smps.smps_control = + action_frame->u.action.ht_smps.smps_control = WLAN_HT_SMPS_CONTROL_DYNAMIC;/* 3 */ break; } @@ -2519,25 +2519,25 @@ struct sk_buff *rtl_make_del_ba(struct ieee80211_hw *hw, struct ieee80211_mgmt *action_frame; u16 params; - /* 27 = header + category + action + smps mode */ - skb = dev_alloc_skb(34 + hw->extra_tx_headroom); + skb = dev_alloc_skb(IEEE80211_MIN_ACTION_SIZE(delba) + + hw->extra_tx_headroom); if (!skb) return NULL; skb_reserve(skb, hw->extra_tx_headroom); - action_frame = skb_put_zero(skb, 34); + action_frame = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE(delba)); memcpy(action_frame->sa, sa, ETH_ALEN); memcpy(action_frame->da, rtlefuse->dev_addr, ETH_ALEN); memcpy(action_frame->bssid, bssid, ETH_ALEN); action_frame->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); action_frame->u.action.category = WLAN_CATEGORY_BACK; - action_frame->u.action.u.delba.action_code = WLAN_ACTION_DELBA; + action_frame->u.action.action_code = WLAN_ACTION_DELBA; params = (u16)(1 << 11); /* bit 11 initiator */ params |= (u16)(tid << 12); /* bit 15:12 TID number */ - action_frame->u.action.u.delba.params = cpu_to_le16(params); - action_frame->u.action.u.delba.reason_code = + action_frame->u.action.delba.params = cpu_to_le16(params); + action_frame->u.action.delba.reason_code = cpu_to_le16(WLAN_REASON_QSTA_TIMEOUT); return skb; diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index d080469264cf..19e2ff62d9f1 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -507,7 +507,7 @@ static void _rtl_pci_tx_isr(struct ieee80211_hw *hw, int prio) if (ieee80211_is_action(fc)) { struct ieee80211_mgmt *action_frame = (struct ieee80211_mgmt *)skb->data; - if (action_frame->u.action.u.ht_smps.action == + if (action_frame->u.action.action_code == WLAN_HT_ACTION_SMPS) { dev_kfree_skb(skb); goto tx_status_ok; diff --git a/drivers/net/wireless/silabs/wfx/data_rx.c b/drivers/net/wireless/silabs/wfx/data_rx.c index e099a9e65bae..15c06b2b8633 100644 --- a/drivers/net/wireless/silabs/wfx/data_rx.c +++ b/drivers/net/wireless/silabs/wfx/data_rx.c @@ -21,14 +21,14 @@ static void wfx_rx_handle_ba(struct wfx_vif *wvif, struct ieee80211_mgmt *mgmt) if (wfx_api_older_than(wvif->wdev, 3, 6)) return; - switch (mgmt->u.action.u.addba_req.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_ACTION_ADDBA_REQ: - params = le16_to_cpu(mgmt->u.action.u.addba_req.capab); + params = le16_to_cpu(mgmt->u.action.addba_req.capab); tid = (params & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; ieee80211_start_rx_ba_session_offl(vif, mgmt->sa, tid); break; case WLAN_ACTION_DELBA: - params = le16_to_cpu(mgmt->u.action.u.delba.params); + params = le16_to_cpu(mgmt->u.action.delba.params); tid = (params & IEEE80211_DELBA_PARAM_TID_MASK) >> 12; ieee80211_stop_rx_ba_session_offl(vif, mgmt->sa, tid); break; @@ -80,7 +80,7 @@ void wfx_rx_cb(struct wfx_vif *wvif, const struct wfx_hif_ind_rx *arg, struct sk */ if (ieee80211_is_action(frame->frame_control) && mgmt->u.action.category == WLAN_CATEGORY_BACK && - skb->len > IEEE80211_MIN_ACTION_SIZE) { + skb->len > IEEE80211_MIN_ACTION_SIZE(action_code)) { wfx_rx_handle_ba(wvif, mgmt); goto drop; } diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 3651b2e6c518..aea360e90cb1 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1046,31 +1046,28 @@ struct ieee80211_mgmt { } __packed probe_resp; struct { u8 category; + u8 action_code; union { struct { - u8 action_code; u8 dialog_token; u8 status_code; u8 variable[]; } __packed wme_action; struct{ - u8 action_code; + u8 no_fixed_fields[0]; u8 variable[]; } __packed chan_switch; struct{ - u8 action_code; struct ieee80211_ext_chansw_ie data; u8 variable[]; } __packed ext_chan_switch; struct{ - u8 action_code; u8 dialog_token; u8 element_id; u8 length; struct ieee80211_msrment_ie msr_elem; } __packed measurement; struct{ - u8 action_code; u8 dialog_token; __le16 capab; __le16 timeout; @@ -1079,7 +1076,6 @@ struct ieee80211_mgmt { u8 variable[]; } __packed addba_req; struct{ - u8 action_code; u8 dialog_token; __le16 status; __le16 capab; @@ -1088,54 +1084,45 @@ struct ieee80211_mgmt { u8 variable[]; } __packed addba_resp; struct{ - u8 action_code; __le16 params; __le16 reason_code; } __packed delba; struct { - u8 action_code; + u8 no_fixed_fields[0]; u8 variable[]; } __packed self_prot; struct{ - u8 action_code; + u8 no_fixed_fields[0]; u8 variable[]; } __packed mesh_action; struct { - u8 action; u8 trans_id[WLAN_SA_QUERY_TR_ID_LEN]; } __packed sa_query; struct { - u8 action; u8 smps_control; } __packed ht_smps; struct { - u8 action_code; u8 chanwidth; } __packed ht_notify_cw; struct { - u8 action_code; u8 dialog_token; __le16 capability; u8 variable[]; } __packed tdls_discover_resp; struct { - u8 action_code; u8 operating_mode; } __packed vht_opmode_notif; struct { - u8 action_code; u8 membership[WLAN_MEMBERSHIP_LEN]; u8 position[WLAN_USER_POSITION_LEN]; } __packed vht_group_notif; struct { - u8 action_code; u8 dialog_token; u8 tpc_elem_id; u8 tpc_elem_length; struct ieee80211_tpc_report_ie tpc; } __packed tpc_report; struct { - u8 action_code; u8 dialog_token; u8 follow_up; u8 tod[6]; @@ -1145,11 +1132,10 @@ struct ieee80211_mgmt { u8 variable[]; } __packed ftm; struct { - u8 action_code; + u8 no_fixed_fields[0]; u8 variable[]; } __packed s1g; struct { - u8 action_code; u8 dialog_token; u8 follow_up; u32 tod; @@ -1158,41 +1144,37 @@ struct ieee80211_mgmt { u8 max_toa_error; } __packed wnm_timing_msr; struct { - u8 action_code; u8 dialog_token; u8 variable[]; } __packed ttlm_req; struct { - u8 action_code; u8 dialog_token; __le16 status_code; u8 variable[]; } __packed ttlm_res; struct { - u8 action_code; + u8 no_fixed_fields[0]; + /* no variable fields either */ } __packed ttlm_tear_down; struct { - u8 action_code; u8 dialog_token; u8 variable[]; } __packed ml_reconf_req; struct { - u8 action_code; u8 dialog_token; u8 count; u8 variable[]; } __packed ml_reconf_resp; struct { - u8 action_code; + u8 no_fixed_fields[0]; u8 variable[]; } __packed epcs; struct { - u8 action_code; u8 dialog_token; u8 control; u8 variable[]; } __packed eml_omn; - } u; + }; } __packed action; DECLARE_FLEX_ARRAY(u8, body); /* Generic frame body */ } u; @@ -1210,8 +1192,7 @@ struct ieee80211_mgmt { #define BSS_MEMBERSHIP_SELECTOR_MIN BSS_MEMBERSHIP_SELECTOR_UHR_PHY -/* mgmt header + 1 byte category code */ -#define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u) +#define IEEE80211_MIN_ACTION_SIZE(type) offsetofend(struct ieee80211_mgmt, u.action.type) /* Management MIC information element (IEEE 802.11w) for CMAC */ @@ -2391,7 +2372,7 @@ static inline bool ieee80211_is_bufferable_mmpdu(struct sk_buff *skb) if (!ieee80211_is_action(fc)) return false; - if (skb->len < offsetofend(typeof(*mgmt), u.action.u.ftm.action_code)) + if (skb->len < IEEE80211_MIN_ACTION_SIZE(action_code)) return true; /* action frame - additionally check for non-bufferable FTM */ @@ -2400,8 +2381,8 @@ static inline bool ieee80211_is_bufferable_mmpdu(struct sk_buff *skb) mgmt->u.action.category != WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION) return true; - if (mgmt->u.action.u.ftm.action_code == WLAN_PUB_ACTION_FTM_REQUEST || - mgmt->u.action.u.ftm.action_code == WLAN_PUB_ACTION_FTM_RESPONSE) + if (mgmt->u.action.action_code == WLAN_PUB_ACTION_FTM_REQUEST || + mgmt->u.action.action_code == WLAN_PUB_ACTION_FTM_RESPONSE) return false; return true; @@ -2451,7 +2432,7 @@ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr) */ static inline bool ieee80211_is_robust_mgmt_frame(struct sk_buff *skb) { - if (skb->len < IEEE80211_MIN_ACTION_SIZE) + if (skb->len < IEEE80211_MIN_ACTION_SIZE(category)) return false; return _ieee80211_is_robust_mgmt_frame((void *)skb->data); } @@ -2467,7 +2448,7 @@ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr, { struct ieee80211_mgmt *mgmt = (void *)hdr; - if (len < IEEE80211_MIN_ACTION_SIZE) + if (len < IEEE80211_MIN_ACTION_SIZE(category)) return false; if (!ieee80211_is_action(hdr->frame_control)) return false; @@ -2485,13 +2466,14 @@ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr, static inline bool ieee80211_is_protected_dual_of_public_action(struct sk_buff *skb) { + struct ieee80211_mgmt *mgmt = (void *)skb->data; u8 action; if (!ieee80211_is_public_action((void *)skb->data, skb->len) || - skb->len < IEEE80211_MIN_ACTION_SIZE + 1) + skb->len < IEEE80211_MIN_ACTION_SIZE(action_code)) return false; - action = *(u8 *)(skb->data + IEEE80211_MIN_ACTION_SIZE); + action = mgmt->u.action.action_code; return action != WLAN_PUB_ACTION_20_40_BSS_COEX && action != WLAN_PUB_ACTION_DSE_REG_LOC_ANN && @@ -2530,7 +2512,7 @@ static inline bool _ieee80211_is_group_privacy_action(struct ieee80211_hdr *hdr) */ static inline bool ieee80211_is_group_privacy_action(struct sk_buff *skb) { - if (skb->len < IEEE80211_MIN_ACTION_SIZE) + if (skb->len < IEEE80211_MIN_ACTION_SIZE(category)) return false; return _ieee80211_is_group_privacy_action((void *)skb->data); } @@ -2626,8 +2608,7 @@ static inline bool ieee80211_action_contains_tpc(struct sk_buff *skb) if (!ieee80211_is_action(mgmt->frame_control)) return false; - if (skb->len < IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.tpc_report)) + if (skb->len < IEEE80211_MIN_ACTION_SIZE(tpc_report)) return false; /* @@ -2646,12 +2627,11 @@ static inline bool ieee80211_action_contains_tpc(struct sk_buff *skb) return false; /* both spectrum mgmt and link measurement have same action code */ - if (mgmt->u.action.u.tpc_report.action_code != - WLAN_ACTION_SPCT_TPC_RPRT) + if (mgmt->u.action.action_code != WLAN_ACTION_SPCT_TPC_RPRT) return false; - if (mgmt->u.action.u.tpc_report.tpc_elem_id != WLAN_EID_TPC_REPORT || - mgmt->u.action.u.tpc_report.tpc_elem_length != + if (mgmt->u.action.tpc_report.tpc_elem_id != WLAN_EID_TPC_REPORT || + mgmt->u.action.tpc_report.tpc_elem_length != sizeof(struct ieee80211_tpc_report_ie)) return false; @@ -2667,16 +2647,15 @@ static inline bool ieee80211_is_timing_measurement(struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *)skb->data; - if (skb->len < IEEE80211_MIN_ACTION_SIZE) + if (skb->len < IEEE80211_MIN_ACTION_SIZE(wnm_timing_msr)) return false; if (!ieee80211_is_action(mgmt->frame_control)) return false; if (mgmt->u.action.category == WLAN_CATEGORY_WNM_UNPROTECTED && - mgmt->u.action.u.wnm_timing_msr.action_code == - WLAN_UNPROTECTED_WNM_ACTION_TIMING_MEASUREMENT_RESPONSE && - skb->len >= offsetofend(typeof(*mgmt), u.action.u.wnm_timing_msr)) + mgmt->u.action.action_code == + WLAN_UNPROTECTED_WNM_ACTION_TIMING_MEASUREMENT_RESPONSE) return true; return false; @@ -2691,15 +2670,13 @@ static inline bool ieee80211_is_ftm(struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *)skb->data; + if (skb->len < IEEE80211_MIN_ACTION_SIZE(ftm)) + return false; + if (!ieee80211_is_public_action((void *)mgmt, skb->len)) return false; - if (mgmt->u.action.u.ftm.action_code == - WLAN_PUB_ACTION_FTM_RESPONSE && - skb->len >= offsetofend(typeof(*mgmt), u.action.u.ftm)) - return true; - - return false; + return mgmt->u.action.action_code == WLAN_PUB_ACTION_FTM_RESPONSE; } struct element { diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index f301a8622bee..0a2be8cb600f 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2025 Intel Corporation + * Copyright (C) 2018-2026 Intel Corporation */ /** @@ -251,19 +251,20 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid, skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = ieee80211_mgmt_ba(skb, da, sdata); - skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp)); + skb_put(skb, 2 + sizeof(mgmt->u.action.addba_resp)); mgmt->u.action.category = WLAN_CATEGORY_BACK; - mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP; - mgmt->u.action.u.addba_resp.dialog_token = dialog_token; + mgmt->u.action.action_code = WLAN_ACTION_ADDBA_RESP; + + mgmt->u.action.addba_resp.dialog_token = dialog_token; capab = u16_encode_bits(amsdu, IEEE80211_ADDBA_PARAM_AMSDU_MASK); capab |= u16_encode_bits(policy, IEEE80211_ADDBA_PARAM_POLICY_MASK); capab |= u16_encode_bits(tid, IEEE80211_ADDBA_PARAM_TID_MASK); capab |= u16_encode_bits(buf_size, IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK); - mgmt->u.action.u.addba_resp.capab = cpu_to_le16(capab); - mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout); - mgmt->u.action.u.addba_resp.status = cpu_to_le16(status); + mgmt->u.action.addba_resp.capab = cpu_to_le16(capab); + mgmt->u.action.addba_resp.timeout = cpu_to_le16(timeout); + mgmt->u.action.addba_resp.status = cpu_to_le16(status); if (sta->sta.valid_links || sta->sta.deflink.he_cap.has_he) ieee80211_add_addbaext(skb, req_addba_ext_data, buf_size); @@ -477,22 +478,22 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, u8 dialog_token, addba_ext_data; /* extract session parameters from addba request frame */ - dialog_token = mgmt->u.action.u.addba_req.dialog_token; - timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout); + dialog_token = mgmt->u.action.addba_req.dialog_token; + timeout = le16_to_cpu(mgmt->u.action.addba_req.timeout); start_seq_num = - le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4; + le16_to_cpu(mgmt->u.action.addba_req.start_seq_num) >> 4; - capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab); + capab = le16_to_cpu(mgmt->u.action.addba_req.capab); ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1; tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; addba_ext_data = ieee80211_retrieve_addba_ext_data(sta, - mgmt->u.action.u.addba_req.variable, + mgmt->u.action.addba_req.variable, len - offsetof(typeof(*mgmt), - u.action.u.addba_req.variable), + u.action.addba_req.variable), &buf_size); __ieee80211_start_rx_ba_session(sta, dialog_token, timeout, diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 93b47a7ba9c4..d5a62b8d5a80 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2024 Intel Corporation + * Copyright (C) 2018-2026 Intel Corporation */ #include @@ -68,7 +68,7 @@ static void ieee80211_send_addba_request(struct sta_info *sta, u16 tid, struct ieee80211_mgmt *mgmt; u16 capab; - skb = dev_alloc_skb(sizeof(*mgmt) + + skb = dev_alloc_skb(IEEE80211_MIN_ACTION_SIZE(addba_req) + 2 + sizeof(struct ieee80211_addba_ext_ie) + local->hw.extra_tx_headroom); if (!skb) @@ -77,21 +77,21 @@ static void ieee80211_send_addba_request(struct sta_info *sta, u16 tid, skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = ieee80211_mgmt_ba(skb, sta->sta.addr, sdata); - skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_req)); + skb_put(skb, 2 + sizeof(mgmt->u.action.addba_req)); mgmt->u.action.category = WLAN_CATEGORY_BACK; - mgmt->u.action.u.addba_req.action_code = WLAN_ACTION_ADDBA_REQ; + mgmt->u.action.action_code = WLAN_ACTION_ADDBA_REQ; - mgmt->u.action.u.addba_req.dialog_token = dialog_token; + mgmt->u.action.addba_req.dialog_token = dialog_token; capab = IEEE80211_ADDBA_PARAM_AMSDU_MASK; capab |= IEEE80211_ADDBA_PARAM_POLICY_MASK; capab |= u16_encode_bits(tid, IEEE80211_ADDBA_PARAM_TID_MASK); capab |= u16_encode_bits(agg_size, IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK); - mgmt->u.action.u.addba_req.capab = cpu_to_le16(capab); + mgmt->u.action.addba_req.capab = cpu_to_le16(capab); - mgmt->u.action.u.addba_req.timeout = cpu_to_le16(timeout); - mgmt->u.action.u.addba_req.start_seq_num = + mgmt->u.action.addba_req.timeout = cpu_to_le16(timeout); + mgmt->u.action.addba_req.start_seq_num = cpu_to_le16(start_seq_num << 4); if (sta->sta.deflink.he_cap.has_he) @@ -978,15 +978,15 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, lockdep_assert_wiphy(sta->local->hw.wiphy); - capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); + capab = le16_to_cpu(mgmt->u.action.addba_resp.capab); amsdu = capab & IEEE80211_ADDBA_PARAM_AMSDU_MASK; tid = u16_get_bits(capab, IEEE80211_ADDBA_PARAM_TID_MASK); buf_size = u16_get_bits(capab, IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK); ieee80211_retrieve_addba_ext_data(sta, - mgmt->u.action.u.addba_resp.variable, + mgmt->u.action.addba_resp.variable, len - offsetof(typeof(*mgmt), - u.action.u.addba_resp.variable), + u.action.addba_resp.variable), &buf_size); buf_size = min(buf_size, local->hw.max_tx_aggregation_subframes); @@ -999,7 +999,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, if (!tid_tx) return; - if (mgmt->u.action.u.addba_resp.dialog_token != tid_tx->dialog_token) { + if (mgmt->u.action.addba_resp.dialog_token != tid_tx->dialog_token) { ht_dbg(sta->sdata, "wrong addBA response token, %pM tid %d\n", sta->sta.addr, tid); return; @@ -1029,7 +1029,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, * is set to 0, the Buffer Size subfield is set to a value * of at least 1. */ - if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) + if (le16_to_cpu(mgmt->u.action.addba_resp.status) == WLAN_STATUS_SUCCESS && buf_size) { if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { @@ -1046,7 +1046,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, sta->ampdu_mlme.addba_req_num[tid] = 0; tid_tx->timeout = - le16_to_cpu(mgmt->u.action.u.addba_resp.timeout); + le16_to_cpu(mgmt->u.action.addba_resp.timeout); if (tid_tx->timeout) { mod_timer(&tid_tx->session_timer, diff --git a/net/mac80211/eht.c b/net/mac80211/eht.c index 078e1e23d8d1..768bfc4e737d 100644 --- a/net/mac80211/eht.c +++ b/net/mac80211/eht.c @@ -108,7 +108,7 @@ static void ieee80211_send_eml_op_mode_notif(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *req, int opt_len) { - int len = offsetofend(struct ieee80211_mgmt, u.action.u.eml_omn); + int len = IEEE80211_MIN_ACTION_SIZE(eml_omn); struct ieee80211_local *local = sdata->local; struct ieee80211_mgmt *mgmt; struct sk_buff *skb; @@ -127,16 +127,15 @@ ieee80211_send_eml_op_mode_notif(struct ieee80211_sub_if_data *sdata, memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT; - mgmt->u.action.u.eml_omn.action_code = - WLAN_PROTECTED_EHT_ACTION_EML_OP_MODE_NOTIF; - mgmt->u.action.u.eml_omn.dialog_token = - req->u.action.u.eml_omn.dialog_token; - mgmt->u.action.u.eml_omn.control = req->u.action.u.eml_omn.control & + mgmt->u.action.action_code = WLAN_PROTECTED_EHT_ACTION_EML_OP_MODE_NOTIF; + mgmt->u.action.eml_omn.dialog_token = + req->u.action.eml_omn.dialog_token; + mgmt->u.action.eml_omn.control = req->u.action.eml_omn.control & ~(IEEE80211_EML_CTRL_EMLSR_PARAM_UPDATE | IEEE80211_EML_CTRL_INDEV_COEX_ACT); /* Copy optional fields from the received notification frame */ - memcpy(mgmt->u.action.u.eml_omn.variable, - req->u.action.u.eml_omn.variable, opt_len); + memcpy(mgmt->u.action.eml_omn.variable, + req->u.action.eml_omn.variable, opt_len); ieee80211_tx_skb(sdata, skb); } @@ -144,14 +143,14 @@ ieee80211_send_eml_op_mode_notif(struct ieee80211_sub_if_data *sdata, void ieee80211_rx_eml_op_mode_notif(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { - int len = offsetofend(struct ieee80211_mgmt, u.action.u.eml_omn); + int len = IEEE80211_MIN_ACTION_SIZE(eml_omn); enum nl80211_iftype type = ieee80211_vif_type_p2p(&sdata->vif); struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); const struct wiphy_iftype_ext_capab *ift_ext_capa; struct ieee80211_mgmt *mgmt = (void *)skb->data; struct ieee80211_local *local = sdata->local; - u8 control = mgmt->u.action.u.eml_omn.control; - u8 *ptr = mgmt->u.action.u.eml_omn.variable; + u8 control = mgmt->u.action.eml_omn.control; + u8 *ptr = mgmt->u.action.eml_omn.variable; struct ieee80211_eml_params eml_params = { .link_id = status->link_id, .control = control, diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 1c82a28b03de..9e2469a8ce64 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation * Copyright 2017 Intel Deutschland GmbH - * Copyright(c) 2020-2025 Intel Corporation + * Copyright(c) 2020-2026 Intel Corporation */ #include @@ -462,22 +462,23 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt; u16 params; - skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); + skb = dev_alloc_skb(IEEE80211_MIN_ACTION_SIZE(delba) + + local->hw.extra_tx_headroom); if (!skb) return; skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = ieee80211_mgmt_ba(skb, da, sdata); - skb_put(skb, 1 + sizeof(mgmt->u.action.u.delba)); + skb_put(skb, 2 + sizeof(mgmt->u.action.delba)); mgmt->u.action.category = WLAN_CATEGORY_BACK; - mgmt->u.action.u.delba.action_code = WLAN_ACTION_DELBA; + mgmt->u.action.action_code = WLAN_ACTION_DELBA; params = (u16)(initiator << 11); /* bit 11 initiator */ params |= (u16)(tid << 12); /* bit 15:12 TID number */ - mgmt->u.action.u.delba.params = cpu_to_le16(params); - mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code); + mgmt->u.action.delba.params = cpu_to_le16(params); + mgmt->u.action.delba.reason_code = cpu_to_le16(reason_code); ieee80211_tx_skb(sdata, skb); } @@ -489,14 +490,14 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, u16 tid, params; u16 initiator; - params = le16_to_cpu(mgmt->u.action.u.delba.params); + params = le16_to_cpu(mgmt->u.action.delba.params); tid = (params & IEEE80211_DELBA_PARAM_TID_MASK) >> 12; initiator = (params & IEEE80211_DELBA_PARAM_INITIATOR_MASK) >> 11; ht_dbg_ratelimited(sdata, "delba from %pM (%s) tid %d reason code %d\n", mgmt->sa, initiator ? "initiator" : "recipient", tid, - le16_to_cpu(mgmt->u.action.u.delba.reason_code)); + le16_to_cpu(mgmt->u.action.delba.reason_code)); if (initiator == WLAN_BACK_INITIATOR) __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_INITIATOR, 0, @@ -530,20 +531,20 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, struct ieee80211_tx_info *info; u8 status_link_id = link_id < 0 ? 0 : link_id; - /* 27 = header + category + action + smps mode */ - skb = dev_alloc_skb(27 + local->hw.extra_tx_headroom); + skb = dev_alloc_skb(IEEE80211_MIN_ACTION_SIZE(ht_smps) + + local->hw.extra_tx_headroom); if (!skb) return -ENOMEM; skb_reserve(skb, local->hw.extra_tx_headroom); - action_frame = skb_put(skb, 27); + action_frame = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE(ht_smps)); memcpy(action_frame->da, da, ETH_ALEN); memcpy(action_frame->sa, sdata->dev->dev_addr, ETH_ALEN); memcpy(action_frame->bssid, bssid, ETH_ALEN); action_frame->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); action_frame->u.action.category = WLAN_CATEGORY_HT; - action_frame->u.action.u.ht_smps.action = WLAN_HT_ACTION_SMPS; + action_frame->u.action.action_code = WLAN_HT_ACTION_SMPS; switch (smps) { case IEEE80211_SMPS_AUTOMATIC: case IEEE80211_SMPS_NUM_MODES: @@ -551,15 +552,15 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, smps = IEEE80211_SMPS_OFF; fallthrough; case IEEE80211_SMPS_OFF: - action_frame->u.action.u.ht_smps.smps_control = + action_frame->u.action.ht_smps.smps_control = WLAN_HT_SMPS_CONTROL_DISABLED; break; case IEEE80211_SMPS_STATIC: - action_frame->u.action.u.ht_smps.smps_control = + action_frame->u.action.ht_smps.smps_control = WLAN_HT_SMPS_CONTROL_STATIC; break; case IEEE80211_SMPS_DYNAMIC: - action_frame->u.action.u.ht_smps.smps_control = + action_frame->u.action.ht_smps.smps_control = WLAN_HT_SMPS_CONTROL_DYNAMIC; break; } diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 168f84a1353b..0298272c37ec 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -9,7 +9,7 @@ * Copyright 2009, Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2016 Intel Deutschland GmbH - * Copyright(c) 2018-2025 Intel Corporation + * Copyright(c) 2018-2026 Intel Corporation */ #include @@ -888,19 +888,11 @@ ieee80211_rx_mgmt_spectrum_mgmt(struct ieee80211_sub_if_data *sdata, struct ieee80211_rx_status *rx_status, struct ieee802_11_elems *elems) { - int required_len; - - if (len < IEEE80211_MIN_ACTION_SIZE + 1) + if (len < IEEE80211_MIN_ACTION_SIZE(chan_switch)) return; /* CSA is the only action we handle for now */ - if (mgmt->u.action.u.measurement.action_code != - WLAN_ACTION_SPCT_CHL_SWITCH) - return; - - required_len = IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.chan_switch); - if (len < required_len) + if (mgmt->u.action.action_code != WLAN_ACTION_SPCT_CHL_SWITCH) return; if (!sdata->vif.bss_conf.csa_active) @@ -1613,12 +1605,12 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, case WLAN_CATEGORY_SPECTRUM_MGMT: ies_len = skb->len - offsetof(struct ieee80211_mgmt, - u.action.u.chan_switch.variable); + u.action.chan_switch.variable); if (ies_len < 0) break; - elems = ieee802_11_parse_elems(mgmt->u.action.u.chan_switch.variable, + elems = ieee802_11_parse_elems(mgmt->u.action.chan_switch.variable, ies_len, IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 676b2a43c9f2..2e391cec73a0 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1579,7 +1579,7 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, sta = sta_info_get_bss(sdata, mgmt->sa); if (sta) { - switch (mgmt->u.action.u.addba_req.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_ACTION_ADDBA_REQ: ieee80211_process_addba_request(local, sta, mgmt, len); @@ -1599,9 +1599,9 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, } } else if (ieee80211_is_action(mgmt->frame_control) && mgmt->u.action.category == WLAN_CATEGORY_HT) { - switch (mgmt->u.action.u.ht_smps.action) { + switch (mgmt->u.action.action_code) { case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: { - u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth; + u8 chanwidth = mgmt->u.action.ht_notify_cw.chanwidth; struct ieee80211_rx_status *status; struct link_sta_info *link_sta; struct sta_info *sta; @@ -1628,7 +1628,7 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, } } else if (ieee80211_is_action(mgmt->frame_control) && mgmt->u.action.category == WLAN_CATEGORY_VHT) { - switch (mgmt->u.action.u.vht_group_notif.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_VHT_ACTION_OPMODE_NOTIF: { struct ieee80211_rx_status *status; enum nl80211_band band; @@ -1637,7 +1637,7 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, status = IEEE80211_SKB_RXCB(skb); band = status->band; - opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode; + opmode = mgmt->u.action.vht_opmode_notif.operating_mode; sta = sta_info_get_bss(sdata, mgmt->sa); @@ -1658,7 +1658,7 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, } } else if (ieee80211_is_action(mgmt->frame_control) && mgmt->u.action.category == WLAN_CATEGORY_S1G) { - switch (mgmt->u.action.u.s1g.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_S1G_TWT_TEARDOWN: case WLAN_S1G_TWT_SETUP: ieee80211_s1g_rx_twt_action(sdata, skb); @@ -1669,7 +1669,7 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, } else if (ieee80211_is_action(mgmt->frame_control) && mgmt->u.action.category == WLAN_CATEGORY_PROTECTED_EHT) { if (sdata->vif.type == NL80211_IFTYPE_AP) { - switch (mgmt->u.action.u.eml_omn.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_PROTECTED_EHT_ACTION_EML_OP_MODE_NOTIF: ieee80211_rx_eml_op_mode_notif(sdata, skb); break; @@ -1677,7 +1677,7 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, break; } } else if (sdata->vif.type == NL80211_IFTYPE_STATION) { - switch (mgmt->u.action.u.ttlm_req.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_PROTECTED_EHT_ACTION_TTLM_REQ: ieee80211_process_neg_ttlm_req(sdata, mgmt, skb->len); @@ -1765,7 +1765,7 @@ static void ieee80211_iface_process_status(struct ieee80211_sub_if_data *sdata, if (ieee80211_is_action(mgmt->frame_control) && mgmt->u.action.category == WLAN_CATEGORY_S1G) { - switch (mgmt->u.action.u.s1g.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_S1G_TWT_TEARDOWN: case WLAN_S1G_TWT_SETUP: ieee80211_s1g_status_twt_action(sdata, skb); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 28624e57aa49..6696c611dfa4 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. - * Copyright (C) 2018 - 2025 Intel Corporation + * Copyright (C) 2018-2026 Intel Corporation * Authors: Luis Carlos Cobo * Javier Cardona */ @@ -19,8 +19,7 @@ static struct kmem_cache *rm_cache; bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt) { - return (mgmt->u.action.u.mesh_action.action_code == - WLAN_MESH_ACTION_HWMP_PATH_SELECTION); + return mgmt->u.action.action_code == WLAN_MESH_ACTION_HWMP_PATH_SELECTION; } void ieee80211s_init(void) @@ -1618,13 +1617,12 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, size_t baselen; u8 *pos; - if (mgmt->u.action.u.measurement.action_code != - WLAN_ACTION_SPCT_CHL_SWITCH) + if (mgmt->u.action.action_code != WLAN_ACTION_SPCT_CHL_SWITCH) return; - pos = mgmt->u.action.u.chan_switch.variable; + pos = mgmt->u.action.chan_switch.variable; baselen = offsetof(struct ieee80211_mgmt, - u.action.u.chan_switch.variable); + u.action.chan_switch.variable); elems = ieee802_11_parse_elems(pos, len - baselen, IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION, @@ -1670,7 +1668,7 @@ static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata, { switch (mgmt->u.action.category) { case WLAN_CATEGORY_SELF_PROTECTED: - switch (mgmt->u.action.u.self_prot.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_SP_MESH_PEERING_OPEN: case WLAN_SP_MESH_PEERING_CLOSE: case WLAN_SP_MESH_PEERING_CONFIRM: diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 98d5aaa36d00..9d89ebcce1c1 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. - * Copyright (C) 2019, 2021-2023, 2025 Intel Corporation + * Copyright (C) 2019, 2021-2023, 2025-2026 Intel Corporation * Author: Luis Carlos Cobo */ @@ -105,12 +105,11 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, u32 lifetime, u32 metric, u32 preq_id, struct ieee80211_sub_if_data *sdata) { + int hdr_len = IEEE80211_MIN_ACTION_SIZE(mesh_action); struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; u8 *pos, ie_len; - int hdr_len = offsetofend(struct ieee80211_mgmt, - u.action.u.mesh_action); skb = dev_alloc_skb(local->tx_headroom + hdr_len + @@ -127,8 +126,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, /* BSSID == SA */ memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); mgmt->u.action.category = WLAN_CATEGORY_MESH_ACTION; - mgmt->u.action.u.mesh_action.action_code = - WLAN_MESH_ACTION_HWMP_PATH_SELECTION; + mgmt->u.action.action_code = WLAN_MESH_ACTION_HWMP_PATH_SELECTION; switch (action) { case MPATH_PREQ: @@ -237,13 +235,12 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, u8 ttl, const u8 *target, u32 target_sn, u16 target_rcode, const u8 *ra) { + int hdr_len = IEEE80211_MIN_ACTION_SIZE(mesh_action); struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_mgmt *mgmt; u8 *pos, ie_len; - int hdr_len = offsetofend(struct ieee80211_mgmt, - u.action.u.mesh_action); if (time_before(jiffies, ifmsh->next_perr)) return -EAGAIN; @@ -265,8 +262,7 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, /* BSSID == SA */ memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); mgmt->u.action.category = WLAN_CATEGORY_MESH_ACTION; - mgmt->u.action.u.mesh_action.action_code = - WLAN_MESH_ACTION_HWMP_PATH_SELECTION; + mgmt->u.action.action_code = WLAN_MESH_ACTION_HWMP_PATH_SELECTION; ie_len = 15; pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_PERR; @@ -938,7 +934,7 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; /* need action_code */ - if (len < IEEE80211_MIN_ACTION_SIZE + 1) + if (len < IEEE80211_MIN_ACTION_SIZE(mesh_action)) return; rcu_read_lock(); @@ -949,8 +945,8 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, } rcu_read_unlock(); - baselen = (u8 *) mgmt->u.action.u.mesh_action.variable - (u8 *) mgmt; - elems = ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable, + baselen = mgmt->u.action.mesh_action.variable - (u8 *)mgmt; + elems = ieee802_11_parse_elems(mgmt->u.action.mesh_action.variable, len - baselen, IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION, diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 04c931cd2063..7d823a55636f 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. - * Copyright (C) 2019, 2021-2025 Intel Corporation + * Copyright (C) 2019, 2021-2026 Intel Corporation * Author: Luis Carlos Cobo */ #include @@ -13,7 +13,7 @@ #include "rate.h" #include "mesh.h" -#define PLINK_CNF_AID(mgmt) ((mgmt)->u.action.u.self_prot.variable + 2) +#define PLINK_CNF_AID(mgmt) ((mgmt)->u.action.self_prot.variable + 2) #define PLINK_GET_LLID(p) (p + 2) #define PLINK_GET_PLID(p) (p + 4) @@ -215,6 +215,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, enum ieee80211_self_protected_actioncode action, u8 *da, u16 llid, u16 plid, u16 reason) { + int hdr_len = IEEE80211_MIN_ACTION_SIZE(self_prot); struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_tx_info *info; @@ -223,7 +224,6 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, u16 peering_proto = 0; u8 *pos, ie_len = 4; u8 ie_len_he_cap, ie_len_eht_cap; - int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.self_prot); int err = -ENOMEM; ie_len_he_cap = ieee80211_ie_len_he_cap(sdata); @@ -260,7 +260,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); mgmt->u.action.category = WLAN_CATEGORY_SELF_PROTECTED; - mgmt->u.action.u.self_prot.action_code = action; + mgmt->u.action.action_code = action; if (action != WLAN_SP_MESH_PEERING_CLOSE) { struct ieee80211_supported_band *sband; @@ -1141,7 +1141,7 @@ mesh_process_plink_frame(struct ieee80211_sub_if_data *sdata, return; } - ftype = mgmt->u.action.u.self_prot.action_code; + ftype = mgmt->u.action.action_code; if ((ftype == WLAN_SP_MESH_PEERING_OPEN && ie_len != 4) || (ftype == WLAN_SP_MESH_PEERING_CONFIRM && ie_len != 6) || (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len != 6 @@ -1224,8 +1224,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, size_t baselen; u8 *baseaddr; - /* need action_code, aux */ - if (len < IEEE80211_MIN_ACTION_SIZE + 3) + /* need aux */ + if (len < IEEE80211_MIN_ACTION_SIZE(self_prot) + 1) return; if (sdata->u.mesh.user_mpm) @@ -1238,10 +1238,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, return; } - baseaddr = mgmt->u.action.u.self_prot.variable; - baselen = (u8 *) mgmt->u.action.u.self_prot.variable - (u8 *) mgmt; - if (mgmt->u.action.u.self_prot.action_code == - WLAN_SP_MESH_PEERING_CONFIRM) { + baseaddr = mgmt->u.action.self_prot.variable; + baselen = mgmt->u.action.self_prot.variable - (u8 *)mgmt; + if (mgmt->u.action.action_code == WLAN_SP_MESH_PEERING_CONFIRM) { baseaddr += 4; baselen += 4; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 170330d924a3..da79df92994d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7957,7 +7957,7 @@ ieee80211_send_neg_ttlm_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_mgmt *mgmt; struct sk_buff *skb; - int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.ttlm_req); + int hdr_len = IEEE80211_MIN_ACTION_SIZE(ttlm_req); int ttlm_max_len = 2 + 1 + sizeof(struct ieee80211_ttlm_elem) + 1 + 2 * 2 * IEEE80211_TTLM_NUM_TIDS; @@ -7974,9 +7974,8 @@ ieee80211_send_neg_ttlm_req(struct ieee80211_sub_if_data *sdata, memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN); mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT; - mgmt->u.action.u.ttlm_req.action_code = - WLAN_PROTECTED_EHT_ACTION_TTLM_REQ; - mgmt->u.action.u.ttlm_req.dialog_token = dialog_token; + mgmt->u.action.action_code = WLAN_PROTECTED_EHT_ACTION_TTLM_REQ; + mgmt->u.action.ttlm_req.dialog_token = dialog_token; ieee80211_neg_ttlm_add_suggested_map(skb, neg_ttlm); ieee80211_tx_skb(sdata, skb); } @@ -8026,7 +8025,7 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_mgmt *mgmt; struct sk_buff *skb; - int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.ttlm_res); + int hdr_len = IEEE80211_MIN_ACTION_SIZE(ttlm_res); int ttlm_max_len = 2 + 1 + sizeof(struct ieee80211_ttlm_elem) + 1 + 2 * 2 * IEEE80211_TTLM_NUM_TIDS; u16 status_code; @@ -8044,9 +8043,8 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN); mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT; - mgmt->u.action.u.ttlm_res.action_code = - WLAN_PROTECTED_EHT_ACTION_TTLM_RES; - mgmt->u.action.u.ttlm_res.dialog_token = dialog_token; + mgmt->u.action.action_code = WLAN_PROTECTED_EHT_ACTION_TTLM_RES; + mgmt->u.action.ttlm_res.dialog_token = dialog_token; switch (ttlm_res) { default: WARN_ON(1); @@ -8063,7 +8061,7 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, break; } - mgmt->u.action.u.ttlm_res.status_code = cpu_to_le16(status_code); + mgmt->u.action.ttlm_res.status_code = cpu_to_le16(status_code); ieee80211_tx_skb(sdata, skb); } @@ -8163,10 +8161,9 @@ void ieee80211_process_neg_ttlm_req(struct ieee80211_sub_if_data *sdata, if (!ieee80211_vif_is_mld(&sdata->vif)) return; - dialog_token = mgmt->u.action.u.ttlm_req.dialog_token; - ies_len = len - offsetof(struct ieee80211_mgmt, - u.action.u.ttlm_req.variable); - elems = ieee802_11_parse_elems(mgmt->u.action.u.ttlm_req.variable, + dialog_token = mgmt->u.action.ttlm_req.dialog_token; + ies_len = len - IEEE80211_MIN_ACTION_SIZE(ttlm_req); + elems = ieee802_11_parse_elems(mgmt->u.action.ttlm_req.variable, ies_len, IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION, @@ -8217,8 +8214,7 @@ void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { if (!ieee80211_vif_is_mld(&sdata->vif) || - mgmt->u.action.u.ttlm_req.dialog_token != - sdata->u.mgd.dialog_token_alloc) + mgmt->u.action.ttlm_res.dialog_token != sdata->u.mgd.dialog_token_alloc) return; wiphy_delayed_work_cancel(sdata->local->hw.wiphy, @@ -8232,7 +8228,7 @@ void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, * This can be better implemented in the future, to handle request * rejections. */ - if (le16_to_cpu(mgmt->u.action.u.ttlm_res.status_code) != WLAN_STATUS_SUCCESS) + if (le16_to_cpu(mgmt->u.action.ttlm_res.status_code) != WLAN_STATUS_SUCCESS) __ieee80211_disconnect(sdata); } @@ -8265,12 +8261,11 @@ static void ieee80211_teardown_ttlm_work(struct wiphy *wiphy, void ieee80211_send_teardown_neg_ttlm(struct ieee80211_vif *vif) { + int frame_len = IEEE80211_MIN_ACTION_SIZE(ttlm_tear_down); struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct ieee80211_mgmt *mgmt; struct sk_buff *skb; - int frame_len = offsetofend(struct ieee80211_mgmt, - u.action.u.ttlm_tear_down); struct ieee80211_tx_info *info; skb = dev_alloc_skb(local->hw.extra_tx_headroom + frame_len); @@ -8286,8 +8281,7 @@ void ieee80211_send_teardown_neg_ttlm(struct ieee80211_vif *vif) memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN); mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT; - mgmt->u.action.u.ttlm_tear_down.action_code = - WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN; + mgmt->u.action.action_code = WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN; info = IEEE80211_SKB_CB(skb); info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; @@ -8370,13 +8364,13 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, case WLAN_CATEGORY_SPECTRUM_MGMT: ies_len = skb->len - offsetof(struct ieee80211_mgmt, - u.action.u.chan_switch.variable); + u.action.chan_switch.variable); if (ies_len < 0) break; /* CSA IE cannot be overridden, no need for BSSID */ - elems = ieee802_11_parse_elems(mgmt->u.action.u.chan_switch.variable, + elems = ieee802_11_parse_elems(mgmt->u.action.chan_switch.variable, ies_len, IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION, @@ -8398,7 +8392,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, case WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION: ies_len = skb->len - offsetof(struct ieee80211_mgmt, - u.action.u.ext_chan_switch.variable); + u.action.ext_chan_switch.variable); if (ies_len < 0) break; @@ -8407,7 +8401,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, * extended CSA IE can't be overridden, no need for * BSSID */ - elems = ieee802_11_parse_elems(mgmt->u.action.u.ext_chan_switch.variable, + elems = ieee802_11_parse_elems(mgmt->u.action.ext_chan_switch.variable, ies_len, IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION, @@ -8424,7 +8418,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, /* for the handling code pretend it was an IE */ elems->ext_chansw_ie = - &mgmt->u.action.u.ext_chan_switch.data; + &mgmt->u.action.ext_chan_switch.data; ieee80211_sta_process_chanswitch(link, rx_status->mactime, @@ -10426,25 +10420,25 @@ void ieee80211_process_ml_reconf_resp(struct ieee80211_sub_if_data *sdata, u8 *pos; if (!ieee80211_vif_is_mld(&sdata->vif) || - len < offsetofend(typeof(*mgmt), u.action.u.ml_reconf_resp) || - mgmt->u.action.u.ml_reconf_resp.dialog_token != - sdata->u.mgd.reconf.dialog_token || + len < IEEE80211_MIN_ACTION_SIZE(ml_reconf_resp) || + mgmt->u.action.ml_reconf_resp.dialog_token != + sdata->u.mgd.reconf.dialog_token || !sta_changed_links) return; - pos = mgmt->u.action.u.ml_reconf_resp.variable; - len -= offsetofend(typeof(*mgmt), u.action.u.ml_reconf_resp); + pos = mgmt->u.action.ml_reconf_resp.variable; + len -= offsetofend(typeof(*mgmt), u.action.ml_reconf_resp); /* each status duple is 3 octets */ - if (len < mgmt->u.action.u.ml_reconf_resp.count * 3) { + if (len < mgmt->u.action.ml_reconf_resp.count * 3) { sdata_info(sdata, "mlo: reconf: unexpected len=%zu, count=%u\n", - len, mgmt->u.action.u.ml_reconf_resp.count); + len, mgmt->u.action.ml_reconf_resp.count); goto disconnect; } link_mask = sta_changed_links; - for (i = 0; i < mgmt->u.action.u.ml_reconf_resp.count; i++) { + for (i = 0; i < mgmt->u.action.ml_reconf_resp.count; i++) { u16 status = get_unaligned_le16(pos + 1); link_id = *pos; @@ -10729,8 +10723,7 @@ ieee80211_build_ml_reconf_req(struct ieee80211_sub_if_data *sdata, return NULL; skb_reserve(skb, local->hw.extra_tx_headroom); - mgmt = skb_put_zero(skb, offsetofend(struct ieee80211_mgmt, - u.action.u.ml_reconf_req)); + mgmt = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE(ml_reconf_req)); /* Add the MAC header */ mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | @@ -10741,12 +10734,11 @@ ieee80211_build_ml_reconf_req(struct ieee80211_sub_if_data *sdata, /* Add the action frame fixed fields */ mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT; - mgmt->u.action.u.ml_reconf_req.action_code = - WLAN_PROTECTED_EHT_ACTION_LINK_RECONFIG_REQ; + mgmt->u.action.action_code = WLAN_PROTECTED_EHT_ACTION_LINK_RECONFIG_REQ; /* allocate a dialog token and store it */ sdata->u.mgd.reconf.dialog_token = ++sdata->u.mgd.dialog_token_alloc; - mgmt->u.action.u.ml_reconf_req.dialog_token = + mgmt->u.action.ml_reconf_req.dialog_token = sdata->u.mgd.reconf.dialog_token; /* Add the ML reconfiguration element and the common information */ @@ -11116,11 +11108,10 @@ static bool ieee80211_mgd_epcs_supp(struct ieee80211_sub_if_data *sdata) int ieee80211_mgd_set_epcs(struct ieee80211_sub_if_data *sdata, bool enable) { + int frame_len = IEEE80211_MIN_ACTION_SIZE(epcs) + (enable ? 1 : 0); struct ieee80211_local *local = sdata->local; struct ieee80211_mgmt *mgmt; struct sk_buff *skb; - int frame_len = offsetofend(struct ieee80211_mgmt, - u.action.u.epcs) + (enable ? 1 : 0); if (!ieee80211_mgd_epcs_supp(sdata)) return -EINVAL; @@ -11149,15 +11140,15 @@ int ieee80211_mgd_set_epcs(struct ieee80211_sub_if_data *sdata, bool enable) mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT; if (enable) { - u8 *pos = mgmt->u.action.u.epcs.variable; + u8 *pos = mgmt->u.action.epcs.variable; - mgmt->u.action.u.epcs.action_code = + mgmt->u.action.action_code = WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_REQ; *pos = ++sdata->u.mgd.dialog_token_alloc; sdata->u.mgd.epcs.dialog_token = *pos; } else { - mgmt->u.action.u.epcs.action_code = + mgmt->u.action.action_code = WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_TEARDOWN; ieee80211_epcs_teardown(sdata); @@ -11246,7 +11237,7 @@ void ieee80211_process_epcs_ena_resp(struct ieee80211_sub_if_data *sdata, return; /* Handle dialog token and status code */ - pos = mgmt->u.action.u.epcs.variable; + pos = mgmt->u.action.epcs.variable; dialog_token = *pos; status_code = get_unaligned_le16(pos + 1); @@ -11268,8 +11259,7 @@ void ieee80211_process_epcs_ena_resp(struct ieee80211_sub_if_data *sdata, return; pos += IEEE80211_EPCS_ENA_RESP_BODY_LEN; - ies_len = len - offsetof(struct ieee80211_mgmt, - u.action.u.epcs.variable) - + ies_len = len - IEEE80211_MIN_ACTION_SIZE(epcs) - IEEE80211_EPCS_ENA_RESP_BODY_LEN; elems = ieee802_11_parse_elems(pos, ies_len, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6c4b549444c6..3bd046bebf9e 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -274,7 +274,7 @@ static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata, if (!sdata) return; - BUILD_BUG_ON(sizeof(action) != IEEE80211_MIN_ACTION_SIZE + 1); + BUILD_BUG_ON(sizeof(action) != IEEE80211_MIN_ACTION_SIZE(action_code)); if (skb->len < rtap_space + sizeof(action) + VHT_MUMIMO_GROUPS_DATA_LEN) @@ -1162,7 +1162,7 @@ static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) u8 category; /* make sure category field is present */ - if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE) + if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE(category)) return RX_DROP_U_RUNT_ACTION; mgmt = (struct ieee80211_mgmt *)hdr; @@ -3422,7 +3422,7 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, return; } - if (len < 24 + 1 + sizeof(resp->u.action.u.sa_query)) { + if (len < IEEE80211_MIN_ACTION_SIZE(sa_query)) { /* Too short SA Query request frame */ return; } @@ -3432,17 +3432,16 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, return; skb_reserve(skb, local->hw.extra_tx_headroom); - resp = skb_put_zero(skb, 24); + resp = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE(sa_query)); memcpy(resp->da, sdata->vif.cfg.ap_addr, ETH_ALEN); memcpy(resp->sa, sdata->vif.addr, ETH_ALEN); memcpy(resp->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN); resp->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); - skb_put(skb, 1 + sizeof(resp->u.action.u.sa_query)); resp->u.action.category = WLAN_CATEGORY_SA_QUERY; - resp->u.action.u.sa_query.action = WLAN_ACTION_SA_QUERY_RESPONSE; - memcpy(resp->u.action.u.sa_query.trans_id, - mgmt->u.action.u.sa_query.trans_id, + resp->u.action.action_code = WLAN_ACTION_SA_QUERY_RESPONSE; + memcpy(resp->u.action.sa_query.trans_id, + mgmt->u.action.sa_query.trans_id, WLAN_SA_QUERY_TR_ID_LEN); ieee80211_tx_skb(sdata, skb); @@ -3516,7 +3515,7 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) /* drop too small action frames */ if (ieee80211_is_action(mgmt->frame_control) && - rx->skb->len < IEEE80211_MIN_ACTION_SIZE) + rx->skb->len < IEEE80211_MIN_ACTION_SIZE(category)) return RX_DROP_U_RUNT_ACTION; /* Drop non-broadcast Beacon frames */ @@ -3565,29 +3564,28 @@ ieee80211_process_rx_twt_action(struct ieee80211_rx_data *rx) if (!rx->sta) return false; - switch (mgmt->u.action.u.s1g.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_S1G_TWT_SETUP: { struct ieee80211_twt_setup *twt; - if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE + - 1 + /* action code */ + if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE(action_code) + sizeof(struct ieee80211_twt_setup) + 2 /* TWT req_type agrt */) break; - twt = (void *)mgmt->u.action.u.s1g.variable; + twt = (void *)mgmt->u.action.s1g.variable; if (twt->element_id != WLAN_EID_S1G_TWT) break; - if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE + - 4 + /* action code + token + tlv */ + if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE(action_code) + + 3 + /* token + tlv */ twt->length) break; return true; /* queue the frame */ } case WLAN_S1G_TWT_TEARDOWN: - if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE + 2) + if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE(action_code) + 1) break; return true; /* queue the frame */ @@ -3632,10 +3630,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) break; /* verify action & smps_control/chanwidth are present */ - if (len < IEEE80211_MIN_ACTION_SIZE + 2) + if (len < IEEE80211_MIN_ACTION_SIZE(ht_smps)) goto invalid; - switch (mgmt->u.action.u.ht_smps.action) { + switch (mgmt->u.action.action_code) { case WLAN_HT_ACTION_SMPS: { struct ieee80211_supported_band *sband; enum ieee80211_smps_mode smps_mode; @@ -3646,7 +3644,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) goto handled; /* convert to HT capability */ - switch (mgmt->u.action.u.ht_smps.smps_control) { + switch (mgmt->u.action.ht_smps.smps_control) { case WLAN_HT_SMPS_CONTROL_DISABLED: smps_mode = IEEE80211_SMPS_OFF; break; @@ -3679,7 +3677,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) goto handled; } case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: { - u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth; + u8 chanwidth = mgmt->u.action.ht_notify_cw.chanwidth; if (chanwidth != IEEE80211_HT_CHANWIDTH_20MHZ && chanwidth != IEEE80211_HT_CHANWIDTH_ANY) @@ -3699,7 +3697,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) break; case WLAN_CATEGORY_PUBLIC: case WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION: - if (len < IEEE80211_MIN_ACTION_SIZE + 1) + if (len < IEEE80211_MIN_ACTION_SIZE(action_code)) goto invalid; if (sdata->vif.type != NL80211_IFTYPE_STATION) break; @@ -3707,11 +3705,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) break; if (!ether_addr_equal(mgmt->bssid, sdata->deflink.u.mgd.bssid)) break; - if (mgmt->u.action.u.ext_chan_switch.action_code != + if (mgmt->u.action.action_code != WLAN_PUB_ACTION_EXT_CHANSW_ANN) break; - if (len < offsetof(struct ieee80211_mgmt, - u.action.u.ext_chan_switch.variable)) + if (len < IEEE80211_MIN_ACTION_SIZE(ext_chan_switch)) goto invalid; goto queue; case WLAN_CATEGORY_VHT: @@ -3723,18 +3720,18 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) break; /* verify action code is present */ - if (len < IEEE80211_MIN_ACTION_SIZE + 1) + if (len < IEEE80211_MIN_ACTION_SIZE(action_code)) goto invalid; - switch (mgmt->u.action.u.vht_opmode_notif.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_VHT_ACTION_OPMODE_NOTIF: { /* verify opmode is present */ - if (len < IEEE80211_MIN_ACTION_SIZE + 2) + if (len < IEEE80211_MIN_ACTION_SIZE(vht_opmode_notif)) goto invalid; goto queue; } case WLAN_VHT_ACTION_GROUPID_MGMT: { - if (len < IEEE80211_MIN_ACTION_SIZE + 25) + if (len < IEEE80211_MIN_ACTION_SIZE(vht_group_notif)) goto invalid; goto queue; } @@ -3751,23 +3748,20 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) break; /* verify action_code is present */ - if (len < IEEE80211_MIN_ACTION_SIZE + 1) + if (len < IEEE80211_MIN_ACTION_SIZE(action_code)) break; - switch (mgmt->u.action.u.addba_req.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_ACTION_ADDBA_REQ: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.addba_req))) + if (len < IEEE80211_MIN_ACTION_SIZE(addba_req)) goto invalid; break; case WLAN_ACTION_ADDBA_RESP: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.addba_resp))) + if (len < IEEE80211_MIN_ACTION_SIZE(addba_resp)) goto invalid; break; case WLAN_ACTION_DELBA: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.delba))) + if (len < IEEE80211_MIN_ACTION_SIZE(delba)) goto invalid; break; default: @@ -3777,16 +3771,15 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) goto queue; case WLAN_CATEGORY_SPECTRUM_MGMT: /* verify action_code is present */ - if (len < IEEE80211_MIN_ACTION_SIZE + 1) + if (len < IEEE80211_MIN_ACTION_SIZE(action_code)) break; - switch (mgmt->u.action.u.measurement.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_ACTION_SPCT_MSR_REQ: if (status->band != NL80211_BAND_5GHZ) break; - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.measurement))) + if (len < IEEE80211_MIN_ACTION_SIZE(measurement)) break; if (sdata->vif.type != NL80211_IFTYPE_STATION) @@ -3796,8 +3789,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) goto handled; case WLAN_ACTION_SPCT_CHL_SWITCH: { u8 *bssid; - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.chan_switch))) + if (len < IEEE80211_MIN_ACTION_SIZE(chan_switch)) break; if (sdata->vif.type != NL80211_IFTYPE_STATION && @@ -3822,11 +3814,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) } break; case WLAN_CATEGORY_SELF_PROTECTED: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.self_prot.action_code))) + if (len < IEEE80211_MIN_ACTION_SIZE(self_prot)) break; - switch (mgmt->u.action.u.self_prot.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_SP_MESH_PEERING_OPEN: case WLAN_SP_MESH_PEERING_CLOSE: case WLAN_SP_MESH_PEERING_CONFIRM: @@ -3844,8 +3835,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) } break; case WLAN_CATEGORY_MESH_ACTION: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.mesh_action.action_code))) + if (len < IEEE80211_MIN_ACTION_SIZE(action_code)) break; if (!ieee80211_vif_is_mesh(&sdata->vif)) @@ -3855,11 +3845,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) break; goto queue; case WLAN_CATEGORY_S1G: - if (len < offsetofend(typeof(*mgmt), - u.action.u.s1g.action_code)) + if (len < IEEE80211_MIN_ACTION_SIZE(action_code)) break; - switch (mgmt->u.action.u.s1g.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_S1G_TWT_SETUP: case WLAN_S1G_TWT_TEARDOWN: if (ieee80211_process_rx_twt_action(rx)) @@ -3870,33 +3859,29 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) } break; case WLAN_CATEGORY_PROTECTED_EHT: - if (len < offsetofend(typeof(*mgmt), - u.action.u.ttlm_req.action_code)) + if (len < IEEE80211_MIN_ACTION_SIZE(action_code)) break; - switch (mgmt->u.action.u.ttlm_req.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_PROTECTED_EHT_ACTION_TTLM_REQ: if (sdata->vif.type != NL80211_IFTYPE_STATION) break; - if (len < offsetofend(typeof(*mgmt), - u.action.u.ttlm_req)) + if (len < IEEE80211_MIN_ACTION_SIZE(ttlm_req)) goto invalid; goto queue; case WLAN_PROTECTED_EHT_ACTION_TTLM_RES: if (sdata->vif.type != NL80211_IFTYPE_STATION) break; - if (len < offsetofend(typeof(*mgmt), - u.action.u.ttlm_res)) + if (len < IEEE80211_MIN_ACTION_SIZE(ttlm_res)) goto invalid; goto queue; case WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN: if (sdata->vif.type != NL80211_IFTYPE_STATION) break; - if (len < offsetofend(typeof(*mgmt), - u.action.u.ttlm_tear_down)) + if (len < IEEE80211_MIN_ACTION_SIZE(ttlm_tear_down)) goto invalid; goto queue; case WLAN_PROTECTED_EHT_ACTION_LINK_RECONFIG_RESP: @@ -3906,34 +3891,29 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) /* The reconfiguration response action frame must * least one 'Status Duple' entry (3 octets) */ - if (len < - offsetofend(typeof(*mgmt), - u.action.u.ml_reconf_resp) + 3) + if (len < IEEE80211_MIN_ACTION_SIZE(ml_reconf_resp) + 3) goto invalid; goto queue; case WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_RESP: if (sdata->vif.type != NL80211_IFTYPE_STATION) break; - if (len < offsetofend(typeof(*mgmt), - u.action.u.epcs) + - IEEE80211_EPCS_ENA_RESP_BODY_LEN) + if (len < IEEE80211_MIN_ACTION_SIZE(epcs) + + IEEE80211_EPCS_ENA_RESP_BODY_LEN) goto invalid; goto queue; case WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_TEARDOWN: if (sdata->vif.type != NL80211_IFTYPE_STATION) break; - if (len < offsetofend(typeof(*mgmt), - u.action.u.epcs)) + if (len < IEEE80211_MIN_ACTION_SIZE(epcs)) goto invalid; goto queue; case WLAN_PROTECTED_EHT_ACTION_EML_OP_MODE_NOTIF: if (sdata->vif.type != NL80211_IFTYPE_AP) break; - if (len < offsetofend(typeof(*mgmt), - u.action.u.eml_omn)) + if (len < IEEE80211_MIN_ACTION_SIZE(eml_omn)) goto invalid; goto queue; default: @@ -4015,11 +3995,10 @@ ieee80211_rx_h_action_post_userspace(struct ieee80211_rx_data *rx) switch (mgmt->u.action.category) { case WLAN_CATEGORY_SA_QUERY: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.sa_query))) + if (len < IEEE80211_MIN_ACTION_SIZE(sa_query)) break; - switch (mgmt->u.action.u.sa_query.action) { + switch (mgmt->u.action.action_code) { case WLAN_ACTION_SA_QUERY_REQUEST: if (sdata->vif.type != NL80211_IFTYPE_STATION) break; diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c index 1f68df6e8067..297abaa6fecf 100644 --- a/net/mac80211/s1g.c +++ b/net/mac80211/s1g.c @@ -2,7 +2,7 @@ /* * S1G handling * Copyright(c) 2020 Adapt-IP - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023, 2026 Intel Corporation */ #include #include @@ -27,14 +27,14 @@ bool ieee80211_s1g_is_twt_setup(struct sk_buff *skb) if (likely(mgmt->u.action.category != WLAN_CATEGORY_S1G)) return false; - return mgmt->u.action.u.s1g.action_code == WLAN_S1G_TWT_SETUP; + return mgmt->u.action.action_code == WLAN_S1G_TWT_SETUP; } static void ieee80211_s1g_send_twt_setup(struct ieee80211_sub_if_data *sdata, const u8 *da, const u8 *bssid, struct ieee80211_twt_setup *twt) { - int len = IEEE80211_MIN_ACTION_SIZE + 4 + twt->length; + int len = IEEE80211_MIN_ACTION_SIZE(s1g) + 3 + twt->length; struct ieee80211_local *local = sdata->local; struct ieee80211_mgmt *mgmt; struct sk_buff *skb; @@ -52,8 +52,8 @@ ieee80211_s1g_send_twt_setup(struct ieee80211_sub_if_data *sdata, const u8 *da, memcpy(mgmt->bssid, bssid, ETH_ALEN); mgmt->u.action.category = WLAN_CATEGORY_S1G; - mgmt->u.action.u.s1g.action_code = WLAN_S1G_TWT_SETUP; - memcpy(mgmt->u.action.u.s1g.variable, twt, 3 + twt->length); + mgmt->u.action.action_code = WLAN_S1G_TWT_SETUP; + memcpy(mgmt->u.action.s1g.variable, twt, 3 + twt->length); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | IEEE80211_TX_INTFL_MLME_CONN_TX | @@ -71,12 +71,12 @@ ieee80211_s1g_send_twt_teardown(struct ieee80211_sub_if_data *sdata, u8 *id; skb = dev_alloc_skb(local->hw.extra_tx_headroom + - IEEE80211_MIN_ACTION_SIZE + 2); + IEEE80211_MIN_ACTION_SIZE(s1g) + 1); if (!skb) return; skb_reserve(skb, local->hw.extra_tx_headroom); - mgmt = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE + 2); + mgmt = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE(s1g) + 1); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); memcpy(mgmt->da, da, ETH_ALEN); @@ -84,8 +84,8 @@ ieee80211_s1g_send_twt_teardown(struct ieee80211_sub_if_data *sdata, memcpy(mgmt->bssid, bssid, ETH_ALEN); mgmt->u.action.category = WLAN_CATEGORY_S1G; - mgmt->u.action.u.s1g.action_code = WLAN_S1G_TWT_TEARDOWN; - id = (u8 *)mgmt->u.action.u.s1g.variable; + mgmt->u.action.action_code = WLAN_S1G_TWT_TEARDOWN; + id = (u8 *)mgmt->u.action.s1g.variable; *id = flowid; IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | @@ -98,7 +98,7 @@ ieee80211_s1g_rx_twt_setup(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *)skb->data; - struct ieee80211_twt_setup *twt = (void *)mgmt->u.action.u.s1g.variable; + struct ieee80211_twt_setup *twt = (void *)mgmt->u.action.s1g.variable; struct ieee80211_twt_params *twt_agrt = (void *)twt->params; twt_agrt->req_type &= cpu_to_le16(~IEEE80211_TWT_REQTYPE_REQUEST); @@ -128,7 +128,7 @@ ieee80211_s1g_rx_twt_teardown(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data; drv_twt_teardown_request(sdata->local, sdata, &sta->sta, - mgmt->u.action.u.s1g.variable[0]); + mgmt->u.action.s1g.variable[0]); } static void @@ -136,7 +136,7 @@ ieee80211_s1g_tx_twt_setup_fail(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data; - struct ieee80211_twt_setup *twt = (void *)mgmt->u.action.u.s1g.variable; + struct ieee80211_twt_setup *twt = (void *)mgmt->u.action.s1g.variable; struct ieee80211_twt_params *twt_agrt = (void *)twt->params; u8 flowid = le16_get_bits(twt_agrt->req_type, IEEE80211_TWT_REQTYPE_FLOWID); @@ -160,7 +160,7 @@ void ieee80211_s1g_rx_twt_action(struct ieee80211_sub_if_data *sdata, if (!sta) return; - switch (mgmt->u.action.u.s1g.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_S1G_TWT_SETUP: ieee80211_s1g_rx_twt_setup(sdata, sta, skb); break; @@ -185,7 +185,7 @@ void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata, if (!sta) return; - switch (mgmt->u.action.u.s1g.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_S1G_TWT_SETUP: /* process failed twt setup frames */ ieee80211_s1g_tx_twt_setup_fail(sdata, sta, skb); diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 7422888d3640..e2eaf8d8d7ff 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2008, Intel Corporation * Copyright 2008, Johannes Berg - * Copyright (C) 2018, 2020, 2022-2024 Intel Corporation + * Copyright (C) 2018, 2020, 2022-2024, 2026 Intel Corporation */ #include @@ -409,35 +409,30 @@ static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_da struct sk_buff *skb; struct ieee80211_mgmt *msr_report; - skb = dev_alloc_skb(sizeof(*msr_report) + local->hw.extra_tx_headroom + - sizeof(struct ieee80211_msrment_ie)); + skb = dev_alloc_skb(IEEE80211_MIN_ACTION_SIZE(measurement) + + local->hw.extra_tx_headroom); if (!skb) return; skb_reserve(skb, local->hw.extra_tx_headroom); - msr_report = skb_put_zero(skb, 24); + msr_report = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE(measurement)); memcpy(msr_report->da, da, ETH_ALEN); memcpy(msr_report->sa, sdata->vif.addr, ETH_ALEN); memcpy(msr_report->bssid, bssid, ETH_ALEN); msr_report->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); - skb_put(skb, 1 + sizeof(msr_report->u.action.u.measurement)); msr_report->u.action.category = WLAN_CATEGORY_SPECTRUM_MGMT; - msr_report->u.action.u.measurement.action_code = - WLAN_ACTION_SPCT_MSR_RPRT; - msr_report->u.action.u.measurement.dialog_token = dialog_token; + msr_report->u.action.action_code = WLAN_ACTION_SPCT_MSR_RPRT; - msr_report->u.action.u.measurement.element_id = WLAN_EID_MEASURE_REPORT; - msr_report->u.action.u.measurement.length = + msr_report->u.action.measurement.dialog_token = dialog_token; + msr_report->u.action.measurement.element_id = WLAN_EID_MEASURE_REPORT; + msr_report->u.action.measurement.length = sizeof(struct ieee80211_msrment_ie); - - memset(&msr_report->u.action.u.measurement.msr_elem, 0, - sizeof(struct ieee80211_msrment_ie)); - msr_report->u.action.u.measurement.msr_elem.token = request_ie->token; - msr_report->u.action.u.measurement.msr_elem.mode |= + msr_report->u.action.measurement.msr_elem.token = request_ie->token; + msr_report->u.action.measurement.msr_elem.mode |= IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED; - msr_report->u.action.u.measurement.msr_elem.type = request_ie->type; + msr_report->u.action.measurement.msr_elem.type = request_ie->type; ieee80211_tx_skb(sdata, skb); } @@ -454,7 +449,7 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, * TODO: Answer basic measurement as unmeasured */ ieee80211_send_refuse_measurement_request(sdata, - &mgmt->u.action.u.measurement.msr_elem, + &mgmt->u.action.measurement.msr_elem, mgmt->sa, mgmt->bssid, - mgmt->u.action.u.measurement.dialog_token); + mgmt->u.action.measurement.dialog_token); } diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index dbbfe2d6842f..1f30a4eda374 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -6,7 +6,7 @@ * Copyright 2014, Intel Corporation * Copyright 2014 Intel Mobile Communications GmbH * Copyright 2015 - 2016 Intel Deutschland GmbH - * Copyright (C) 2019, 2021-2025 Intel Corporation + * Copyright (C) 2019, 2021-2026 Intel Corporation */ #include @@ -879,28 +879,23 @@ ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_mgmt *mgmt; - mgmt = skb_put_zero(skb, 24); + if (action_code != WLAN_PUB_ACTION_TDLS_DISCOVER_RES) + return -EINVAL; + + mgmt = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE(tdls_discover_resp)); memcpy(mgmt->da, peer, ETH_ALEN); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); memcpy(mgmt->bssid, link->u.mgd.bssid, ETH_ALEN); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); - switch (action_code) { - case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: - skb_put(skb, 1 + sizeof(mgmt->u.action.u.tdls_discover_resp)); - mgmt->u.action.category = WLAN_CATEGORY_PUBLIC; - mgmt->u.action.u.tdls_discover_resp.action_code = - WLAN_PUB_ACTION_TDLS_DISCOVER_RES; - mgmt->u.action.u.tdls_discover_resp.dialog_token = - dialog_token; - mgmt->u.action.u.tdls_discover_resp.capability = - cpu_to_le16(ieee80211_get_tdls_sta_capab(link, - status_code)); - break; - default: - return -EINVAL; - } + mgmt->u.action.category = WLAN_CATEGORY_PUBLIC; + mgmt->u.action.action_code = WLAN_PUB_ACTION_TDLS_DISCOVER_RES; + + mgmt->u.action.tdls_discover_resp.dialog_token = dialog_token; + mgmt->u.action.tdls_discover_resp.capability = + cpu_to_le16(ieee80211_get_tdls_sta_capab(link, + status_code)); return 0; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index b2e6c8b98381..55054de62508 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3766,12 +3766,11 @@ again: int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, struct cfg80211_csa_settings *csa_settings) { + int hdr_len = IEEE80211_MIN_ACTION_SIZE(chan_switch); struct sk_buff *skb; struct ieee80211_mgmt *mgmt; struct ieee80211_local *local = sdata->local; int freq; - int hdr_len = offsetofend(struct ieee80211_mgmt, - u.action.u.chan_switch); u8 *pos; if (sdata->vif.type != NL80211_IFTYPE_ADHOC && @@ -3800,7 +3799,7 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, memcpy(mgmt->bssid, ifibss->bssid, ETH_ALEN); } mgmt->u.action.category = WLAN_CATEGORY_SPECTRUM_MGMT; - mgmt->u.action.u.chan_switch.action_code = WLAN_ACTION_SPCT_CHL_SWITCH; + mgmt->u.action.action_code = WLAN_ACTION_SPCT_CHL_SWITCH; pos = skb_put(skb, 5); *pos++ = WLAN_EID_CHANNEL_SWITCH; /* EID */ *pos++ = 3; /* IE length */ diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index b099d79e8fbb..80120f9f17b6 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -4,7 +4,7 @@ * * Portions of this file * Copyright(c) 2015 - 2016 Intel Deutschland GmbH - * Copyright (C) 2018 - 2024 Intel Corporation + * Copyright (C) 2018-2026 Intel Corporation */ #include @@ -723,17 +723,17 @@ void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata, if (!link_conf->mu_mimo_owner) return; - if (!memcmp(mgmt->u.action.u.vht_group_notif.position, + if (!memcmp(mgmt->u.action.vht_group_notif.position, link_conf->mu_group.position, WLAN_USER_POSITION_LEN) && - !memcmp(mgmt->u.action.u.vht_group_notif.membership, + !memcmp(mgmt->u.action.vht_group_notif.membership, link_conf->mu_group.membership, WLAN_MEMBERSHIP_LEN)) return; memcpy(link_conf->mu_group.membership, - mgmt->u.action.u.vht_group_notif.membership, + mgmt->u.action.vht_group_notif.membership, WLAN_MEMBERSHIP_LEN); memcpy(link_conf->mu_group.position, - mgmt->u.action.u.vht_group_notif.position, + mgmt->u.action.vht_group_notif.position, WLAN_USER_POSITION_LEN); ieee80211_link_info_change_notify(sdata, link, From 2f211be112e6453b3b3a1b752fd6f446e5297704 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 3 Mar 2026 15:06:14 +0100 Subject: [PATCH 0325/1561] wifi: mac80211: remove stale TODO item We've addressed multi-link operation, nothing appears to still need to get moved around. Link: https://patch.msgid.link/20260303150614.5f4b91c2490e.Ic49b55bd211129e05b1d035ad468d033cf24c0e9@changeid Signed-off-by: Johannes Berg --- net/mac80211/sta_info.h | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 2875ef7d7946..f1b1bbf2a2d4 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -510,7 +510,6 @@ struct ieee80211_fragment_cache { * during finalize * @debugfs_dir: debug filesystem directory dentry * @pub: public (driver visible) link STA data - * TODO Move other link params from sta_info as required for MLD operation */ struct link_sta_info { u8 addr[ETH_ALEN]; From 9f39e2cc2b01225c1af7f18ff112047c13240d06 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 3 Mar 2026 15:09:21 +0100 Subject: [PATCH 0326/1561] wifi: mac80211: remove AID bit stripping for print Since the top bits have already been masked off according to whether or not it's S1G, there's no need to mask again for printing. Remove the superfluous code. Link: https://patch.msgid.link/20260303150921.d04ad4dfdc48.I78da2953982e85aab386867dc9db83471bf35475@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index da79df92994d..5ecd3d1b172d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -6721,7 +6721,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, sdata_info(sdata, "RX %sssocResp from %pM (capab=0x%x status=%d aid=%d)\n", reassoc ? "Rea" : "A", assoc_data->ap_addr, - capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14)))); + capab_info, status_code, aid); ifmgd->broken_ap = false; From a140826caa2c14aa5a9a6990e514c5edbdb7eafd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 3 Mar 2026 15:16:15 +0100 Subject: [PATCH 0327/1561] wifi: nl80211: fix UHR capability validation The ieee80211_uhr_capa_size_ok() function returns a boolean, but we need an error code here. Fix that. Fixes: 072e6f7f416f ("wifi: cfg80211: add initial UHR support") Cc: # no drivers with UHR yet Link: https://patch.msgid.link/20260303151614.e87ea9995be5.Ie164040a51855a3e548f05f0d0291d7d7993c7ee@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2225f5d0b124..699687a0caa9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -339,7 +339,9 @@ static int validate_uhr_capa(const struct nlattr *attr, const u8 *data = nla_data(attr); unsigned int len = nla_len(attr); - return ieee80211_uhr_capa_size_ok(data, len, false); + if (!ieee80211_uhr_capa_size_ok(data, len, false)) + return -EINVAL; + return 0; } /* policy for the attributes */ From 9aa84d5c6c99480c523aeb7a6ce93b6635f3e290 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 4 Mar 2026 14:41:48 +0100 Subject: [PATCH 0328/1561] wifi: ieee80211: fix UHR operation DBE vs. P-EDCA order Draft P802.11bn_D1.3 switched the order here to align with the order of the fields. Adjust the code accordingly. Link: https://patch.msgid.link/20260304144148.ce45942294e1.I22ab3f16e6376a19c3953cf81dd67105ea8e529d@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211-uhr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/ieee80211-uhr.h b/include/linux/ieee80211-uhr.h index 9729d23e4766..d199f3ebdba0 100644 --- a/include/linux/ieee80211-uhr.h +++ b/include/linux/ieee80211-uhr.h @@ -12,8 +12,8 @@ #define IEEE80211_UHR_OPER_PARAMS_DPS_ENA 0x0001 #define IEEE80211_UHR_OPER_PARAMS_NPCA_ENA 0x0002 -#define IEEE80211_UHR_OPER_PARAMS_DBE_ENA 0x0004 -#define IEEE80211_UHR_OPER_PARAMS_PEDCA_ENA 0x0008 +#define IEEE80211_UHR_OPER_PARAMS_PEDCA_ENA 0x0004 +#define IEEE80211_UHR_OPER_PARAMS_DBE_ENA 0x0008 struct ieee80211_uhr_operation { __le16 params; From 98acd4c1d9f7dc9c426e840c16e81b57315ff84b Mon Sep 17 00:00:00 2001 From: Ria Thomas Date: Thu, 5 Mar 2026 14:43:04 +0530 Subject: [PATCH 0329/1561] wifi: mac80211: add support for NDP ADDBA/DELBA for S1G S1G defines use of NDP Block Ack (BA) for aggregation, requiring negotiation of NDP ADDBA/DELBA action frames. If the S1G recipient supports HT-immediate block ack, the sender must send an NDP ADDBA Request indicating it expects only NDP BlockAck frames for the agreement. Introduce support for NDP ADDBA and DELBA exchange in mac80211. The implementation negotiates the BA mechanism during setup based on station capabilities and driver support (IEEE80211_HW_SUPPORTS_NDP_BLOCKACK). If negotiation fails due to mismatched expectations, a rejection with status code WLAN_STATUS_REJECTED_NDP_BLOCK_ACK_SUGGESTED is returned as per IEEE 802.11-2024. Trace sample: IEEE 802.11 Wireless Management Fixed parameters Category code: Block Ack (3) Action code: NDP ADDBA Request (0x80) Dialog token: 0x01 Block Ack Parameters: 0x1003, A-MSDUs, Block Ack Policy .... .... .... ...1 = A-MSDUs: Permitted in QoS Data MPDUs .... .... .... ..1. = Block Ack Policy: Immediate Block Ack .... .... ..00 00.. = Traffic Identifier: 0x0 0001 0000 00.. .... = Number of Buffers (1 Buffer = 2304 Bytes): 64 Block Ack Timeout: 0x0000 Block Ack Starting Sequence Control (SSC): 0x0010 .... .... .... 0000 = Fragment: 0 0000 0000 0001 .... = Starting Sequence Number: 1 IEEE 802.11 Wireless Management Fixed parameters Category code: Block Ack (3) Action code: NDP ADDBA Response (0x81) Dialog token: 0x02 Status code: BlockAck negotiation refused because, due to buffer constraints and other unspecified reasons, the recipient prefers to generate only NDP BlockAck frames (0x006d) Block Ack Parameters: 0x1002, Block Ack Policy .... .... .... ...0 = A-MSDUs: Not Permitted .... .... .... ..1. = Block Ack Policy: Immediate Block Ack .... .... ..00 00.. = Traffic Identifier: 0x0 0001 0000 00.. .... = Number of Buffers (1 Buffer = 2304 Bytes): 64 Block Ack Timeout: 0x0000 Signed-off-by: Ria Thomas Link: https://patch.msgid.link/20260305091304.310990-1-ria.thomas@morsemicro.com Signed-off-by: Johannes Berg --- include/linux/ieee80211-ht.h | 3 +++ include/linux/ieee80211.h | 2 ++ include/net/mac80211.h | 4 ++++ net/mac80211/agg-rx.c | 24 +++++++++++++++++++++--- net/mac80211/agg-tx.c | 13 +++++++++---- net/mac80211/debugfs.c | 1 + net/mac80211/ht.c | 8 +++++--- net/mac80211/ieee80211_i.h | 6 +++++- net/mac80211/iface.c | 3 +++ net/mac80211/rx.c | 11 +++++++++-- net/mac80211/s1g.c | 8 ++++++++ net/mac80211/sta_info.h | 2 ++ 12 files changed, 72 insertions(+), 13 deletions(-) diff --git a/include/linux/ieee80211-ht.h b/include/linux/ieee80211-ht.h index 21bbf470540f..7612b72f9c7c 100644 --- a/include/linux/ieee80211-ht.h +++ b/include/linux/ieee80211-ht.h @@ -281,6 +281,9 @@ enum ieee80211_back_actioncode { WLAN_ACTION_ADDBA_REQ = 0, WLAN_ACTION_ADDBA_RESP = 1, WLAN_ACTION_DELBA = 2, + WLAN_ACTION_NDP_ADDBA_REQ = 128, + WLAN_ACTION_NDP_ADDBA_RESP = 129, + WLAN_ACTION_NDP_DELBA = 130, }; /* BACK (block-ack) parties */ diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index aea360e90cb1..52db36120314 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1482,6 +1482,8 @@ enum ieee80211_statuscode { WLAN_STATUS_REJECT_DSE_BAND = 96, WLAN_STATUS_DENIED_WITH_SUGGESTED_BAND_AND_CHANNEL = 99, WLAN_STATUS_DENIED_DUE_TO_SPECTRUM_MANAGEMENT = 103, + /* 802.11ah */ + WLAN_STATUS_REJECTED_NDP_BLOCK_ACK_SUGGESTED = 109, /* 802.11ai */ WLAN_STATUS_FILS_AUTHENTICATION_FAILURE = 112, WLAN_STATUS_UNKNOWN_AUTHENTICATION_SERVER = 113, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 9f8251fb9832..9cc482191ab9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2913,6 +2913,9 @@ struct ieee80211_txq { * HW flag so drivers can opt in according to their own control, e.g. in * testing. * + * @IEEE80211_HW_SUPPORTS_NDP_BLOCKACK: HW can transmit/receive S1G NDP + * BlockAck frames. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2973,6 +2976,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_DISALLOW_PUNCTURING, IEEE80211_HW_HANDLES_QUIET_CSA, IEEE80211_HW_STRICT, + IEEE80211_HW_SUPPORTS_NDP_BLOCKACK, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 0a2be8cb600f..0140ac826b23 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -94,7 +94,8 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, /* check if this is a self generated aggregation halt */ if (initiator == WLAN_BACK_RECIPIENT && tx) ieee80211_send_delba(sta->sdata, sta->sta.addr, - tid, WLAN_BACK_RECIPIENT, reason); + tid, WLAN_BACK_RECIPIENT, reason, + ieee80211_s1g_use_ndp_ba(sta->sdata, sta)); /* * return here in case tid_rx is not assigned - which will happen if @@ -240,6 +241,7 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid, struct sk_buff *skb; struct ieee80211_mgmt *mgmt; bool amsdu = ieee80211_hw_check(&local->hw, SUPPORTS_AMSDU_IN_AMPDU); + bool use_ndp = ieee80211_s1g_use_ndp_ba(sdata, sta); u16 capab; skb = dev_alloc_skb(sizeof(*mgmt) + @@ -253,7 +255,8 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid, skb_put(skb, 2 + sizeof(mgmt->u.action.addba_resp)); mgmt->u.action.category = WLAN_CATEGORY_BACK; - mgmt->u.action.action_code = WLAN_ACTION_ADDBA_RESP; + mgmt->u.action.action_code = use_ndp ? + WLAN_ACTION_NDP_ADDBA_RESP : WLAN_ACTION_ADDBA_RESP; mgmt->u.action.addba_resp.dialog_token = dialog_token; @@ -276,6 +279,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, u8 dialog_token, u16 timeout, u16 start_seq_num, u16 ba_policy, u16 tid, u16 buf_size, bool tx, bool auto_seq, + bool req_ndp, const u8 addba_ext_data) { struct ieee80211_local *local = sta->sdata->local; @@ -301,6 +305,18 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, goto end; } + if (tx && ieee80211_s1g_use_ndp_ba(sta->sdata, sta) && !req_ndp) { + /* + * According to IEEE 802.11-2024: Inform S1G originator + * ADDBA rejected as NDP BlockAck is preferred + */ + status = WLAN_STATUS_REJECTED_NDP_BLOCK_ACK_SUGGESTED; + ht_dbg(sta->sdata, + "Rejecting AddBA Req from %pM tid %u - require NDP BlockAck\n", + sta->sta.addr, tid); + goto end; + } + if (!sta->sta.valid_links && !sta->sta.deflink.ht_cap.ht_supported && !sta->sta.deflink.he_cap.has_he && @@ -474,6 +490,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, struct ieee80211_mgmt *mgmt, size_t len) { + bool req_ndp = mgmt->u.action.action_code == WLAN_ACTION_NDP_ADDBA_REQ; u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num; u8 dialog_token, addba_ext_data; @@ -498,7 +515,8 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, __ieee80211_start_rx_ba_session(sta, dialog_token, timeout, start_seq_num, ba_policy, tid, - buf_size, true, false, addba_ext_data); + buf_size, true, false, + req_ndp, addba_ext_data); } void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index d5a62b8d5a80..01d927b88264 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -60,7 +60,7 @@ static void ieee80211_send_addba_request(struct sta_info *sta, u16 tid, u8 dialog_token, u16 start_seq_num, - u16 agg_size, u16 timeout) + u16 agg_size, u16 timeout, bool ndp) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; @@ -80,7 +80,8 @@ static void ieee80211_send_addba_request(struct sta_info *sta, u16 tid, skb_put(skb, 2 + sizeof(mgmt->u.action.addba_req)); mgmt->u.action.category = WLAN_CATEGORY_BACK; - mgmt->u.action.action_code = WLAN_ACTION_ADDBA_REQ; + mgmt->u.action.action_code = ndp ? + WLAN_ACTION_NDP_ADDBA_REQ : WLAN_ACTION_ADDBA_REQ; mgmt->u.action.addba_req.dialog_token = dialog_token; capab = IEEE80211_ADDBA_PARAM_AMSDU_MASK; @@ -484,7 +485,8 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta, /* send AddBA request */ ieee80211_send_addba_request(sta, tid, tid_tx->dialog_token, - tid_tx->ssn, buf_size, tid_tx->timeout); + tid_tx->ssn, buf_size, tid_tx->timeout, + tid_tx->ndp); WARN_ON(test_and_set_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state)); } @@ -521,6 +523,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) */ synchronize_net(); + tid_tx->ndp = ieee80211_s1g_use_ndp_ba(sdata, sta); params.ssn = sta->tid_seq[tid] >> 4; ret = drv_ampdu_action(local, sdata, ¶ms); tid_tx->ssn = params.ssn; @@ -940,7 +943,9 @@ void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, if (send_delba) ieee80211_send_delba(sdata, sta->sta.addr, tid, - WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); + WLAN_BACK_INITIATOR, + WLAN_REASON_QSTA_NOT_USE, + tid_tx->ndp); } void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index d02f07368c51..e8d0a8b71d59 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -490,6 +490,7 @@ static const char *hw_flag_names[] = { FLAG(DISALLOW_PUNCTURING), FLAG(HANDLES_QUIET_CSA), FLAG(STRICT), + FLAG(SUPPORTS_NDP_BLOCKACK), #undef FLAG }; diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 9e2469a8ce64..33f1e1b235e9 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -379,7 +379,7 @@ void ieee80211_ba_session_work(struct wiphy *wiphy, struct wiphy_work *work) sta->ampdu_mlme.tid_rx_manage_offl)) __ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid, IEEE80211_MAX_AMPDU_BUF_HT, - false, true, 0); + false, true, false, 0); if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS, sta->ampdu_mlme.tid_rx_manage_offl)) @@ -455,7 +455,8 @@ void ieee80211_ba_session_work(struct wiphy *wiphy, struct wiphy_work *work) void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid, - u16 initiator, u16 reason_code) + u16 initiator, u16 reason_code, + bool use_ndp) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; @@ -473,7 +474,8 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, skb_put(skb, 2 + sizeof(mgmt->u.action.delba)); mgmt->u.action.category = WLAN_CATEGORY_BACK; - mgmt->u.action.action_code = WLAN_ACTION_DELBA; + mgmt->u.action.action_code = use_ndp ? + WLAN_ACTION_NDP_DELBA : WLAN_ACTION_DELBA; params = (u16)(initiator << 11); /* bit 11 initiator */ params |= (u16)(tid << 12); /* bit 15:12 TID number */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index a4babf7624e5..d71e0c6d2165 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2190,7 +2190,8 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, struct link_sta_info *link_sta); void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid, - u16 initiator, u16 reason_code); + u16 initiator, u16 reason_code, + bool use_ndp); int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps, const u8 *da, const u8 *bssid, int link_id); @@ -2206,6 +2207,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, u8 dialog_token, u16 timeout, u16 start_seq_num, u16 ba_policy, u16 tid, u16 buf_size, bool tx, bool auto_seq, + bool req_ndp, const u8 addba_ext_data); void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, enum ieee80211_agg_stop_reason reason); @@ -2331,6 +2333,8 @@ void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata, void ieee80211_s1g_cap_to_sta_s1g_cap(struct ieee80211_sub_if_data *sdata, const struct ieee80211_s1g_cap *s1g_cap_ie, struct link_sta_info *link_sta); +bool ieee80211_s1g_use_ndp_ba(const struct ieee80211_sub_if_data *sdata, + const struct sta_info *sta); /* Spectrum management */ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 2e391cec73a0..40ce0bb72726 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1581,14 +1581,17 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, if (sta) { switch (mgmt->u.action.action_code) { case WLAN_ACTION_ADDBA_REQ: + case WLAN_ACTION_NDP_ADDBA_REQ: ieee80211_process_addba_request(local, sta, mgmt, len); break; case WLAN_ACTION_ADDBA_RESP: + case WLAN_ACTION_NDP_ADDBA_RESP: ieee80211_process_addba_resp(local, sta, mgmt, len); break; case WLAN_ACTION_DELBA: + case WLAN_ACTION_NDP_DELBA: ieee80211_process_delba(sdata, sta, mgmt, len); break; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 3bd046bebf9e..19c33f7a8193 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1475,7 +1475,9 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, !test_and_set_bit(tid, rx->sta->ampdu_mlme.unexpected_agg)) ieee80211_send_delba(rx->sdata, rx->sta->sta.addr, tid, WLAN_BACK_RECIPIENT, - WLAN_REASON_QSTA_REQUIRE_SETUP); + WLAN_REASON_QSTA_REQUIRE_SETUP, + ieee80211_s1g_use_ndp_ba(rx->sdata, + rx->sta)); goto dont_reorder; } @@ -3372,7 +3374,9 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) !test_and_set_bit(tid, rx->sta->ampdu_mlme.unexpected_agg)) ieee80211_send_delba(rx->sdata, rx->sta->sta.addr, tid, WLAN_BACK_RECIPIENT, - WLAN_REASON_QSTA_REQUIRE_SETUP); + WLAN_REASON_QSTA_REQUIRE_SETUP, + ieee80211_s1g_use_ndp_ba(rx->sdata, + rx->sta)); tid_agg_rx = rcu_dereference(rx->sta->ampdu_mlme.tid_rx[tid]); if (!tid_agg_rx) @@ -3753,14 +3757,17 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) switch (mgmt->u.action.action_code) { case WLAN_ACTION_ADDBA_REQ: + case WLAN_ACTION_NDP_ADDBA_REQ: if (len < IEEE80211_MIN_ACTION_SIZE(addba_req)) goto invalid; break; case WLAN_ACTION_ADDBA_RESP: + case WLAN_ACTION_NDP_ADDBA_RESP: if (len < IEEE80211_MIN_ACTION_SIZE(addba_resp)) goto invalid; break; case WLAN_ACTION_DELBA: + case WLAN_ACTION_NDP_DELBA: if (len < IEEE80211_MIN_ACTION_SIZE(delba)) goto invalid; break; diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c index 297abaa6fecf..5af4a0c6c642 100644 --- a/net/mac80211/s1g.c +++ b/net/mac80211/s1g.c @@ -220,3 +220,11 @@ void ieee80211_s1g_cap_to_sta_s1g_cap(struct ieee80211_sub_if_data *sdata, ieee80211_sta_recalc_aggregates(&link_sta->sta->sta); } + +bool ieee80211_s1g_use_ndp_ba(const struct ieee80211_sub_if_data *sdata, + const struct sta_info *sta) +{ + return sdata->vif.cfg.s1g && + ieee80211_hw_check(&sdata->local->hw, SUPPORTS_NDP_BLOCKACK) && + (sta && sta->sta.deflink.s1g_cap.s1g); +} diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index f1b1bbf2a2d4..58ccbea7f6f6 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -171,6 +171,7 @@ struct sta_info; * @bar_pending: BAR needs to be re-sent * @amsdu: support A-MSDU within A-MDPU * @ssn: starting sequence number of the session + * @ndp: this session is using NDP Block ACKs * * This structure's lifetime is managed by RCU, assignments to * the array holding it must hold the aggregation mutex. @@ -199,6 +200,7 @@ struct tid_ampdu_tx { u16 failed_bar_ssn; bool bar_pending; bool amsdu; + bool ndp; u8 tid; }; From 35de87bf598ca48d5cd5cc7f328ce00df2b5fb59 Mon Sep 17 00:00:00 2001 From: Tim Bird Date: Thu, 5 Mar 2026 15:04:21 -0700 Subject: [PATCH 0330/1561] wifi: Add SPDX ids to some files in the wireless subsystem Add SPDX-License-Identifier lines to some files where they are missing in the net/wireless directory. Remove licensing text from individual files headers (where present) to canonicalize the library references. Use (mostly) either GPL-2.0 or ISC, depending on the license wording (or not) in the files. radiotap.c does not mention which BSD variant it intends for the license. My selection of 'OR BSD-2-Clause' for radiotap.c was based on research into this code's history and its licensing elsewhere. In OpenBSD, radiotap code (which likely either derived from this code, or this code was derived from) has the BSD-2-Clause license. Very similar code in the radiotap library user space tool, by the same authors Andy Green and Johannes Berg, has an ISC license. Also the ISC license is used by Johannes for other contributions in the Linux wireless system. Since the radiotap.c license text here mentions BSD, but not a specific version, I chose the closest BSD variant to ISC, which is BSD-2-Clause. Signed-off-by: Tim Bird Link: https://patch.msgid.link/20260305220422.24161-1-tim.bird@sony.com [modify subject since it's not all radiotap] Signed-off-by: Johannes Berg --- net/wireless/of.c | 13 +------------ net/wireless/radiotap.c | 10 +--------- net/wireless/reg.c | 13 +------------ net/wireless/reg.h | 13 +------------ net/wireless/trace.c | 1 + net/wireless/wext-core.c | 3 +-- net/wireless/wext-priv.c | 3 +-- net/wireless/wext-proc.c | 3 +-- 8 files changed, 8 insertions(+), 51 deletions(-) diff --git a/net/wireless/of.c b/net/wireless/of.c index 60a864465331..99acbea3beee 100644 --- a/net/wireless/of.c +++ b/net/wireless/of.c @@ -1,17 +1,6 @@ +// SPDX-License-Identifier: ISC /* * Copyright (C) 2017 Rafał Miłecki - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index c85eaa583a46..df29048a0449 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c @@ -1,17 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* * Radiotap parser * * Copyright 2007 Andy Green * Copyright 2009 Johannes Berg - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Alternatively, this software may be distributed under the terms of BSD - * license. - * - * See COPYING for more details. */ #include diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 1c5c38d18feb..20bba7e491c5 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: ISC /* * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. @@ -6,18 +7,6 @@ * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2017 Intel Deutschland GmbH * Copyright (C) 2018 - 2026 Intel Corporation - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ diff --git a/net/wireless/reg.h b/net/wireless/reg.h index e1b211c4f75c..fc31c5f9a61a 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: ISC */ #ifndef __NET_WIRELESS_REG_H #define __NET_WIRELESS_REG_H @@ -6,18 +7,6 @@ /* * Copyright 2008-2011 Luis R. Rodriguez * Copyright (C) 2019, 2023 Intel Corporation - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ enum ieee80211_regd_source { diff --git a/net/wireless/trace.c b/net/wireless/trace.c index 95f997fad755..7cb93acf1a8f 100644 --- a/net/wireless/trace.c +++ b/net/wireless/trace.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include #ifndef __CHECKER__ diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 7b8e94214b07..c19dece2bc6e 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * This file implement the Wireless Extensions core API. * @@ -5,8 +6,6 @@ * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. * Copyright 2009 Johannes Berg * Copyright (C) 2024 Intel Corporation - * - * (As all part of the Linux kernel, this file is GPL) */ #include #include diff --git a/net/wireless/wext-priv.c b/net/wireless/wext-priv.c index 37d1147019c2..ce9022843dfd 100644 --- a/net/wireless/wext-priv.c +++ b/net/wireless/wext-priv.c @@ -1,11 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * This file implement the Wireless Extensions priv API. * * Authors : Jean Tourrilhes - HPL - * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. * Copyright 2009 Johannes Berg - * - * (As all part of the Linux kernel, this file is GPL) */ #include #include diff --git a/net/wireless/wext-proc.c b/net/wireless/wext-proc.c index cadcf8613af2..be6b2b695bf9 100644 --- a/net/wireless/wext-proc.c +++ b/net/wireless/wext-proc.c @@ -1,10 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * This file implement the Wireless Extensions proc API. * * Authors : Jean Tourrilhes - HPL - * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. - * - * (As all part of the Linux kernel, this file is GPL) */ /* From 4e0417a00971cd621e568396c18109f96be2a01d Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Fri, 6 Mar 2026 11:39:27 +0530 Subject: [PATCH 0331/1561] wifi: mac80211_hwsim: add incumbent signal interference detection support Add a debugfs 'simulate_incumbent_signal_interference' with custom file_operations and a .write that accepts " ". The handler selects the 6 GHz chanctx whose primary 20 MHz center matches and reports the event via cfg80211_incumbent_signal_notify(). The bitmap marks affected 20 MHz segments within the current chandef (lowest bit = lowest segment) Signed-off-by: Aditya Kumar Singh Signed-off-by: Amith A Link: https://patch.msgid.link/20260306060927.504567-2-amith.a@oss.qualcomm.com Signed-off-by: Johannes Berg --- drivers/net/wireless/virtual/mac80211_hwsim.c | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index 32897d88bda3..82adcc848189 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include "mac80211_hwsim.h" #define WARN_QUEUE 100 @@ -1203,6 +1205,65 @@ static const struct file_operations hwsim_background_cac_ops = { .llseek = default_llseek, }; +struct hwsim_chanctx_iter_arg { + struct ieee80211_chanctx_conf *conf; + u32 freq_mhz; +}; + +static void hwsim_6ghz_chanctx_iter(struct ieee80211_hw *hw, + struct ieee80211_chanctx_conf *conf, + void *data) +{ + struct hwsim_chanctx_iter_arg *arg = data; + + if (conf->def.chan && + conf->def.chan->band == NL80211_BAND_6GHZ && + conf->def.chan->center_freq == arg->freq_mhz) + arg->conf = conf; +} + +static ssize_t hwsim_simulate_incumbent_signal_write(struct file *file, + const char __user *ubuf, + size_t len, loff_t *ppos) +{ + struct mac80211_hwsim_data *data = file->private_data; + struct hwsim_chanctx_iter_arg arg = {}; + u32 bitmap; + char buf[64]; + + if (!len || len > sizeof(buf) - 1) + return -EINVAL; + + if (copy_from_user(buf, ubuf, len)) + return -EFAULT; + buf[len] = '\0'; + + if (sscanf(buf, "%u %i", &arg.freq_mhz, &bitmap) != 2) + return -EINVAL; + + if (!arg.freq_mhz) + return -EINVAL; + + ieee80211_iter_chan_contexts_atomic(data->hw, + hwsim_6ghz_chanctx_iter, + &arg); + + if (!arg.conf) + return -EINVAL; + + cfg80211_incumbent_signal_notify(data->hw->wiphy, + &arg.conf->def, + bitmap, + GFP_KERNEL); + + return len; +} + +static const struct file_operations hwsim_simulate_incumbent_signal_fops = { + .open = simple_open, + .write = hwsim_simulate_incumbent_signal_write, +}; + static int hwsim_fops_group_read(void *dat, u64 *val) { struct mac80211_hwsim_data *data = dat; @@ -5950,6 +6011,9 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, debugfs_create_file("dfs_background_cac", 0200, data->debugfs, data, &hwsim_background_cac_ops); + debugfs_create_file("simulate_incumbent_signal_interference", 0200, + data->debugfs, + data, &hwsim_simulate_incumbent_signal_fops); if (param->pmsr_capa) { data->pmsr_capa = *param->pmsr_capa; From c882b7a603eff6d4a368678dd53beb7413a8414a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Mar 2026 09:51:32 +0100 Subject: [PATCH 0332/1561] wifi: at76c50x: drop redundant device reference Driver core holds a reference to the USB interface and its parent USB device while the interface is bound to a driver and there is no need to take additional references unless the structures are needed after disconnect. Drop the redundant device reference to reduce cargo culting, make it easier to spot drivers where an extra reference is needed, and reduce the risk of memory leaks when drivers fail to release it. Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260306085144.12064-7-johan@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/atmel/at76c50x-usb.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/atmel/at76c50x-usb.c b/drivers/net/wireless/atmel/at76c50x-usb.c index 6445332801a4..44b04ea3cc8b 100644 --- a/drivers/net/wireless/atmel/at76c50x-usb.c +++ b/drivers/net/wireless/atmel/at76c50x-usb.c @@ -2440,13 +2440,11 @@ static int at76_probe(struct usb_interface *interface, struct mib_fw_version *fwv; int board_type = (int)id->driver_info; - udev = usb_get_dev(interface_to_usbdev(interface)); + udev = interface_to_usbdev(interface); fwv = kmalloc_obj(*fwv); - if (!fwv) { - ret = -ENOMEM; - goto exit; - } + if (!fwv) + return -ENOMEM; /* Load firmware into kernel memory */ fwe = at76_load_firmware(udev, board_type); @@ -2534,8 +2532,7 @@ static int at76_probe(struct usb_interface *interface, exit: kfree(fwv); - if (ret < 0) - usb_put_dev(udev); + return ret; } @@ -2552,7 +2549,6 @@ static void at76_disconnect(struct usb_interface *interface) wiphy_info(priv->hw->wiphy, "disconnecting\n"); at76_delete_device(priv); - usb_put_dev(interface_to_usbdev(interface)); dev_info(&interface->dev, "disconnected\n"); } From 75e375816392e8b84feafc97e2c72513f8bd11d4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Mar 2026 09:51:33 +0100 Subject: [PATCH 0333/1561] wifi: libertas: drop redundant device reference Driver core holds a reference to the USB interface and its parent USB device while the interface is bound to a driver and there is no need to take additional references unless the structures are needed after disconnect. Drop the redundant device reference to reduce cargo culting, make it easier to spot drivers where an extra reference is needed, and reduce the risk of memory leaks when drivers fail to release it. Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260306085144.12064-8-johan@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/marvell/libertas/if_usb.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas/if_usb.c b/drivers/net/wireless/marvell/libertas/if_usb.c index 8a6bf1365cfa..05fcf9cc28fa 100644 --- a/drivers/net/wireless/marvell/libertas/if_usb.c +++ b/drivers/net/wireless/marvell/libertas/if_usb.c @@ -276,7 +276,6 @@ static int if_usb_probe(struct usb_interface *intf, cardp->boot2_version = udev->descriptor.bcdDevice; - usb_get_dev(udev); usb_set_intfdata(intf, cardp); r = lbs_get_firmware_async(priv, &udev->dev, cardp->model, @@ -287,7 +286,6 @@ static int if_usb_probe(struct usb_interface *intf, return 0; err_get_fw: - usb_put_dev(udev); lbs_remove_card(priv); err_add_card: if_usb_reset_device(cardp); @@ -321,7 +319,6 @@ static void if_usb_disconnect(struct usb_interface *intf) kfree(cardp); usb_set_intfdata(intf, NULL); - usb_put_dev(interface_to_usbdev(intf)); } /** From b1af0de313bde1539c2501ef6849791010950387 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Mar 2026 09:51:34 +0100 Subject: [PATCH 0334/1561] wifi: libertas_tf: drop redundant device reference Driver core holds a reference to the USB interface and its parent USB device while the interface is bound to a driver and there is no need to take additional references unless the structures are needed after disconnect. Drop the redundant device reference to reduce cargo culting, make it easier to spot drivers where an extra reference is needed, and reduce the risk of memory leaks when drivers fail to release it. Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260306085144.12064-9-johan@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/marvell/libertas_tf/if_usb.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas_tf/if_usb.c b/drivers/net/wireless/marvell/libertas_tf/if_usb.c index f49151c18b79..07b38f2b8f58 100644 --- a/drivers/net/wireless/marvell/libertas_tf/if_usb.c +++ b/drivers/net/wireless/marvell/libertas_tf/if_usb.c @@ -223,7 +223,6 @@ static int if_usb_probe(struct usb_interface *intf, if (!priv) goto dealloc; - usb_get_dev(udev); usb_set_intfdata(intf, cardp); return 0; @@ -258,7 +257,6 @@ static void if_usb_disconnect(struct usb_interface *intf) kfree(cardp); usb_set_intfdata(intf, NULL); - usb_put_dev(interface_to_usbdev(intf)); lbtf_deb_leave(LBTF_DEB_MAIN); } From 6f29eda7b77a9d44382bbeb99fe8a4c93b11aabb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Mar 2026 09:51:41 +0100 Subject: [PATCH 0335/1561] wifi: rt2x00: drop redundant device reference Driver core holds a reference to the USB interface and its parent USB device while the interface is bound to a driver and there is no need to take additional references unless the structures are needed after disconnect. Drop the redundant device reference to reduce cargo culting, make it easier to spot drivers where an extra reference is needed, and reduce the risk of memory leaks when drivers fail to release it. Acked-by: Stanislaw Gruszka Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260306085144.12064-16-johan@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/ralink/rt2x00/rt2x00usb.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c index 54599cad78f9..83d00b6baf64 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c @@ -802,14 +802,12 @@ int rt2x00usb_probe(struct usb_interface *usb_intf, struct rt2x00_dev *rt2x00dev; int retval; - usb_dev = usb_get_dev(usb_dev); usb_reset_device(usb_dev); hw = ieee80211_alloc_hw(sizeof(struct rt2x00_dev), ops->hw); if (!hw) { rt2x00_probe_err("Failed to allocate hardware\n"); - retval = -ENOMEM; - goto exit_put_device; + return -ENOMEM; } usb_set_intfdata(usb_intf, hw); @@ -851,10 +849,6 @@ exit_free_reg: exit_free_device: ieee80211_free_hw(hw); - -exit_put_device: - usb_put_dev(usb_dev); - usb_set_intfdata(usb_intf, NULL); return retval; @@ -873,11 +867,7 @@ void rt2x00usb_disconnect(struct usb_interface *usb_intf) rt2x00usb_free_reg(rt2x00dev); ieee80211_free_hw(hw); - /* - * Free the USB device data. - */ usb_set_intfdata(usb_intf, NULL); - usb_put_dev(interface_to_usbdev(usb_intf)); } EXPORT_SYMBOL_GPL(rt2x00usb_disconnect); From 97492c019da4b62df83255e968b23b81c0315530 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Mar 2026 09:51:35 +0100 Subject: [PATCH 0336/1561] wifi: mwifiex: drop redundant device reference Driver core holds a reference to the USB interface and its parent USB device while the interface is bound to a driver and there is no need to take additional references unless the structures are needed after disconnect. Drop the redundant device reference to reduce cargo culting, make it easier to spot drivers where an extra reference is needed, and reduce the risk of memory leaks when drivers fail to release it. Signed-off-by: Johan Hovold Acked-by: Francesco Dolcini Link: https://patch.msgid.link/20260306085144.12064-10-johan@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/marvell/mwifiex/usb.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c index 947ecb0a7b40..f4b94a1054f6 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.c +++ b/drivers/net/wireless/marvell/mwifiex/usb.c @@ -520,8 +520,6 @@ static int mwifiex_usb_probe(struct usb_interface *intf, return ret; } - usb_get_dev(udev); - return 0; } @@ -666,8 +664,6 @@ static void mwifiex_usb_disconnect(struct usb_interface *intf) mwifiex_dbg(adapter, FATAL, "%s: removing card\n", __func__); mwifiex_remove_card(adapter); - - usb_put_dev(interface_to_usbdev(intf)); } static void mwifiex_usb_coredump(struct device *dev) From 1635ecc61a24597f893d057d004051a535c1c643 Mon Sep 17 00:00:00 2001 From: Sarika Sharma Date: Thu, 26 Feb 2026 10:49:47 +0530 Subject: [PATCH 0337/1561] wifi: ath12k: account TX stats only when ACK/BA status is present The fields tx_retry_failed, tx_retry_count, and tx_duration are currently updated outside the HTT_PPDU_STATS_TAG_USR_COMPLTN_ACK_BA_STATUS flag check. In certain scenarios, firmware delivers multiple PPDU statistics for the same PPDU, first without BA/ACK information, and later with BA/ACK status once it becomes available. As the same PPDU is processed again, these counters are updated a second time, resulting in duplicate TX statistics. To address this, move the accounting of tx_retry_failed and tx_retry_count under the ACK/BA status flag check, and similarly gate tx_duration on the same path. This ensures that each PPDU contributes to these counters exactly once, avoids double counting, and provides consistent reporting in userspace tools such as station dump. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.6-01243-QCAHKSWPL_SILICONZ-1 Fixes: a0b963e1da5b ("wifi: ath12k: fetch tx_retry and tx_failed from htt_ppdu_stats_user_cmpltn_common_tlv") Signed-off-by: Sarika Sharma Reviewed-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20260226051947.1379716-1-sarika.sharma@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/dp_htt.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_htt.c b/drivers/net/wireless/ath/ath12k/dp_htt.c index e71bb71a6020..9c19d9707abf 100644 --- a/drivers/net/wireless/ath/ath12k/dp_htt.c +++ b/drivers/net/wireless/ath/ath12k/dp_htt.c @@ -205,16 +205,9 @@ ath12k_update_per_peer_tx_stats(struct ath12k_pdev_dp *dp_pdev, if (!(usr_stats->tlv_flags & BIT(HTT_PPDU_STATS_TAG_USR_RATE))) return; - if (usr_stats->tlv_flags & BIT(HTT_PPDU_STATS_TAG_USR_COMPLTN_COMMON)) { + if (usr_stats->tlv_flags & BIT(HTT_PPDU_STATS_TAG_USR_COMPLTN_COMMON)) is_ampdu = HTT_USR_CMPLTN_IS_AMPDU(usr_stats->cmpltn_cmn.flags); - tx_retry_failed = - __le16_to_cpu(usr_stats->cmpltn_cmn.mpdu_tried) - - __le16_to_cpu(usr_stats->cmpltn_cmn.mpdu_success); - tx_retry_count = - HTT_USR_CMPLTN_LONG_RETRY(usr_stats->cmpltn_cmn.flags) + - HTT_USR_CMPLTN_SHORT_RETRY(usr_stats->cmpltn_cmn.flags); - } if (usr_stats->tlv_flags & BIT(HTT_PPDU_STATS_TAG_USR_COMPLTN_ACK_BA_STATUS)) { @@ -223,10 +216,19 @@ ath12k_update_per_peer_tx_stats(struct ath12k_pdev_dp *dp_pdev, HTT_PPDU_STATS_ACK_BA_INFO_NUM_MSDU_M); tid = le32_get_bits(usr_stats->ack_ba.info, HTT_PPDU_STATS_ACK_BA_INFO_TID_NUM); - } - if (common->fes_duration_us) - tx_duration = le32_to_cpu(common->fes_duration_us); + if (usr_stats->tlv_flags & BIT(HTT_PPDU_STATS_TAG_USR_COMPLTN_COMMON)) { + tx_retry_failed = + __le16_to_cpu(usr_stats->cmpltn_cmn.mpdu_tried) - + __le16_to_cpu(usr_stats->cmpltn_cmn.mpdu_success); + tx_retry_count = + HTT_USR_CMPLTN_LONG_RETRY(usr_stats->cmpltn_cmn.flags) + + HTT_USR_CMPLTN_SHORT_RETRY(usr_stats->cmpltn_cmn.flags); + } + + if (common->fes_duration_us) + tx_duration = le32_to_cpu(common->fes_duration_us); + } user_rate = &usr_stats->rate; flags = HTT_USR_RATE_PREAMBLE(user_rate->rate_flags); From aecb569d7fb689e3e5b0005ca7bd0a2ef28915e8 Mon Sep 17 00:00:00 2001 From: Manish Dharanenthiran Date: Thu, 26 Feb 2026 09:49:11 +0530 Subject: [PATCH 0338/1561] wifi: ath12k: Fix the assignment of logical link index Per-link logical index is assigned from the global counter, ahsta->num_peer. This logical index is sent to firmware during peer association. If there is a failure in creating a link station, ath12k_mac_free_unassign_link_sta() clears the link, but does not decrement the logical link index. This will result in a higher logical link index for the next link station created. Also, if there is a leak in logical link index as we assign the incremented num_peer, then the index can exceed the maximum valid value of 15. As an example, let's say we have a 2 GHz + 5 GHz + 6 GHz MLO setup. So the logical link indices that they have are 0, 1 and 2, respectively. If the 5 GHz link is removed, logical link index 1 becomes available, and num_peer is not reduced to 2 and still remains at 3. If a new 5 GHz link is added later, it gets the index 3, instead of reusing link index 1. Also, num_peer is increased to 4, though only 3 links are present. To resolve these, create a bitmap, free_logical_link_idx, that tracks the available logical link indices. When a link station is created, select the first free logical index and when a link station is removed, mark its logical link index as available by setting the bit. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.6-01181-QCAHKSWPL_SILICONZ-1 Signed-off-by: Manish Dharanenthiran Signed-off-by: Roopni Devanathan Reviewed-by: Rameshkumar Sundaram Reviewed-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20260226041911.2434999-1-roopni.devanathan@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.h | 2 +- drivers/net/wireless/ath/ath12k/mac.c | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 760c76d6f0f4..59c193b24764 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -523,7 +523,7 @@ struct ath12k_sta { u16 links_map; u8 assoc_link_id; u16 ml_peer_id; - u8 num_peer; + u16 free_logical_link_idx_map; enum ieee80211_sta_state state; }; diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 05268d425d6a..41fa85f89f0e 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -6786,6 +6786,8 @@ static void ath12k_mac_free_unassign_link_sta(struct ath12k_hw *ah, return; ahsta->links_map &= ~BIT(link_id); + ahsta->free_logical_link_idx_map |= BIT(arsta->link_idx); + rcu_assign_pointer(ahsta->link[link_id], NULL); synchronize_rcu(); @@ -7104,6 +7106,7 @@ static int ath12k_mac_assign_link_sta(struct ath12k_hw *ah, struct ieee80211_sta *sta = ath12k_ahsta_to_sta(ahsta); struct ieee80211_link_sta *link_sta; struct ath12k_link_vif *arvif; + int link_idx; lockdep_assert_wiphy(ah->hw->wiphy); @@ -7122,8 +7125,16 @@ static int ath12k_mac_assign_link_sta(struct ath12k_hw *ah, ether_addr_copy(arsta->addr, link_sta->addr); - /* logical index of the link sta in order of creation */ - arsta->link_idx = ahsta->num_peer++; + if (!ahsta->free_logical_link_idx_map) + return -ENOSPC; + + /* + * Allocate a logical link index by selecting the first available bit + * from the free logical index map + */ + link_idx = __ffs(ahsta->free_logical_link_idx_map); + ahsta->free_logical_link_idx_map &= ~BIT(link_idx); + arsta->link_idx = link_idx; arsta->link_id = link_id; ahsta->links_map |= BIT(arsta->link_id); @@ -7632,6 +7643,7 @@ int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, if (old_state == IEEE80211_STA_NOTEXIST && new_state == IEEE80211_STA_NONE) { memset(ahsta, 0, sizeof(*ahsta)); + ahsta->free_logical_link_idx_map = U16_MAX; arsta = &ahsta->deflink; From 616217a989e09c55398db8555e5ef0c64504cb66 Mon Sep 17 00:00:00 2001 From: P Praneesh Date: Mon, 9 Feb 2026 11:19:24 +0530 Subject: [PATCH 0339/1561] wifi: ath12k: Fix legacy rate mapping for monitor mode capture The current implementation incorrectly reports legacy CCK and OFDM rates in monitor mode radiotap headers. The rate field displays wrong values, for example showing 11 Mbps when the actual rate is 1 Mbps. This occurs because the HAL layer uses a unified enum for both CCK and OFDM rates without distinguishing between long/short preamble variants and proper rate mapping to hardware rate indices. The root cause is threefold: 1. The hal_rx_legacy_rate enum conflates CCK and OFDM rates into a single enumeration, making it impossible to differentiate between 802.11b CCK rates (with long/short preamble variants) and 802.11a/g OFDM rates. 2. The L-SIG-B parsing function maps hardware rate values to the wrong enum values. For CCK rates, it incorrectly combines long and short preamble cases (e.g., cases 2 and 5 both map to 2 Mbps), losing preamble information critical for proper rate identification. 3. The mac layer's rate-to-index conversion function does not properly handle the precedence between long preamble, short preamble, and OFDM rates when matching hardware rate values. Split the hal_rx_legacy_rate enum into two separate enumerations: hal_rx_legacy_rate for CCK rates with explicit long preamble (LP) and short preamble (SP) variants, and hal_rx_legacy_rates_ofdm for OFDM rates. This separation allows proper identification of rate types and preamble modes. Introduce a new mapping ath12k_wifi7_hal_mon_map_legacy_rate_to_hw_rate() that converts HAL CCK rate enums to hardware rate indices defined in ath12k_hw_rate_cck. This ensures the rate field in ppdu_info contains the correct hardware rate index that matches the mac layer's expectations. Update the L-SIG-B parsing to map each hardware rate value (1-7) to its corresponding CCK rate enum with proper preamble designation: - Cases 1-4: Long preamble (1, 2, 5.5, 11 Mbps) - Cases 5-7: Short preamble (2, 5.5, 11 Mbps) Update the L-SIG-A parsing to use the new OFDM-specific enum values, maintaining the existing rate mapping for 802.11a/g OFDM rates. Refactor the mac layer's ath12k_mac_hw_rate_to_idx() function to implement proper matching precedence: 1. First match OFDM rates using the IEEE80211_RATE_MANDATORY_A flag 2. Then match CCK short preamble rates 3. Finally match CCK long preamble rates as fallback Add helper macros ATH12K_MAC_RATE_A_M and ATH12K_MAC_RATE_B to improve readability of the rate table initialization and ensure the mandatory flag is set for OFDM rates. This fix ensures monitor mode captures display accurate rate information in the radiotap header, correctly distinguishing between 1 Mbps and 11 Mbps, and properly identifying preamble types for CCK rates. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.6-01181-QCAHKSWPL_SILICONZ-1 Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: P Praneesh Signed-off-by: Thiraviyam Mariyappan Reviewed-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20260209054924.2713072-1-thiraviyam.mariyappan@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/hal.h | 31 +++++--- drivers/net/wireless/ath/ath12k/mac.c | 51 +++++++------ .../net/wireless/ath/ath12k/wifi7/dp_mon.c | 76 +++++++++++++++---- 3 files changed, 108 insertions(+), 50 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/hal.h b/drivers/net/wireless/ath/ath12k/hal.h index 43e3880f8257..bf4f7dbae866 100644 --- a/drivers/net/wireless/ath/ath12k/hal.h +++ b/drivers/net/wireless/ath/ath12k/hal.h @@ -268,21 +268,28 @@ enum hal_rx_reception_type { }; enum hal_rx_legacy_rate { - HAL_RX_LEGACY_RATE_1_MBPS, - HAL_RX_LEGACY_RATE_2_MBPS, - HAL_RX_LEGACY_RATE_5_5_MBPS, - HAL_RX_LEGACY_RATE_6_MBPS, - HAL_RX_LEGACY_RATE_9_MBPS, - HAL_RX_LEGACY_RATE_11_MBPS, - HAL_RX_LEGACY_RATE_12_MBPS, - HAL_RX_LEGACY_RATE_18_MBPS, - HAL_RX_LEGACY_RATE_24_MBPS, - HAL_RX_LEGACY_RATE_36_MBPS, - HAL_RX_LEGACY_RATE_48_MBPS, - HAL_RX_LEGACY_RATE_54_MBPS, + HAL_RX_LEGACY_RATE_LP_1_MBPS, + HAL_RX_LEGACY_RATE_LP_2_MBPS, + HAL_RX_LEGACY_RATE_LP_5_5_MBPS, + HAL_RX_LEGACY_RATE_LP_11_MBPS, + HAL_RX_LEGACY_RATE_SP_2_MBPS, + HAL_RX_LEGACY_RATE_SP_5_5_MBPS, + HAL_RX_LEGACY_RATE_SP_11_MBPS, HAL_RX_LEGACY_RATE_INVALID, }; +enum hal_rx_legacy_rates_ofdm { + HAL_RX_LEGACY_RATE_OFDM_48_MBPS, + HAL_RX_LEGACY_RATE_OFDM_24_MBPS, + HAL_RX_LEGACY_RATE_OFDM_12_MBPS, + HAL_RX_LEGACY_RATE_OFDM_6_MBPS, + HAL_RX_LEGACY_RATE_OFDM_54_MBPS, + HAL_RX_LEGACY_RATE_OFDM_36_MBPS, + HAL_RX_LEGACY_RATE_OFDM_18_MBPS, + HAL_RX_LEGACY_RATE_OFDM_9_MBPS, + HAL_RX_LEGACY_RATE_OFDM_INVALID, +}; + enum hal_ring_type { HAL_REO_DST, HAL_REO_EXCEPTION, diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 41fa85f89f0e..1eaefdc87111 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -164,30 +164,31 @@ static const struct ieee80211_channel ath12k_6ghz_channels[] = { CHAN6G(233, 7115, 0), }; +#define ATH12K_MAC_RATE_A_M(bps, code) \ + { .bitrate = (bps), .hw_value = (code),\ + .flags = IEEE80211_RATE_MANDATORY_A } + +#define ATH12K_MAC_RATE_B(bps, code, code_short) \ + { .bitrate = (bps), .hw_value = (code), .hw_value_short = (code_short),\ + .flags = IEEE80211_RATE_SHORT_PREAMBLE } + static struct ieee80211_rate ath12k_legacy_rates[] = { { .bitrate = 10, .hw_value = ATH12K_HW_RATE_CCK_LP_1M }, - { .bitrate = 20, - .hw_value = ATH12K_HW_RATE_CCK_LP_2M, - .hw_value_short = ATH12K_HW_RATE_CCK_SP_2M, - .flags = IEEE80211_RATE_SHORT_PREAMBLE }, - { .bitrate = 55, - .hw_value = ATH12K_HW_RATE_CCK_LP_5_5M, - .hw_value_short = ATH12K_HW_RATE_CCK_SP_5_5M, - .flags = IEEE80211_RATE_SHORT_PREAMBLE }, - { .bitrate = 110, - .hw_value = ATH12K_HW_RATE_CCK_LP_11M, - .hw_value_short = ATH12K_HW_RATE_CCK_SP_11M, - .flags = IEEE80211_RATE_SHORT_PREAMBLE }, - - { .bitrate = 60, .hw_value = ATH12K_HW_RATE_OFDM_6M }, - { .bitrate = 90, .hw_value = ATH12K_HW_RATE_OFDM_9M }, - { .bitrate = 120, .hw_value = ATH12K_HW_RATE_OFDM_12M }, - { .bitrate = 180, .hw_value = ATH12K_HW_RATE_OFDM_18M }, - { .bitrate = 240, .hw_value = ATH12K_HW_RATE_OFDM_24M }, - { .bitrate = 360, .hw_value = ATH12K_HW_RATE_OFDM_36M }, - { .bitrate = 480, .hw_value = ATH12K_HW_RATE_OFDM_48M }, - { .bitrate = 540, .hw_value = ATH12K_HW_RATE_OFDM_54M }, + ATH12K_MAC_RATE_B(20, ATH12K_HW_RATE_CCK_LP_2M, + ATH12K_HW_RATE_CCK_SP_2M), + ATH12K_MAC_RATE_B(55, ATH12K_HW_RATE_CCK_LP_5_5M, + ATH12K_HW_RATE_CCK_SP_5_5M), + ATH12K_MAC_RATE_B(110, ATH12K_HW_RATE_CCK_LP_11M, + ATH12K_HW_RATE_CCK_SP_11M), + ATH12K_MAC_RATE_A_M(60, ATH12K_HW_RATE_OFDM_6M), + ATH12K_MAC_RATE_A_M(90, ATH12K_HW_RATE_OFDM_9M), + ATH12K_MAC_RATE_A_M(120, ATH12K_HW_RATE_OFDM_12M), + ATH12K_MAC_RATE_A_M(180, ATH12K_HW_RATE_OFDM_18M), + ATH12K_MAC_RATE_A_M(240, ATH12K_HW_RATE_OFDM_24M), + ATH12K_MAC_RATE_A_M(360, ATH12K_HW_RATE_OFDM_36M), + ATH12K_MAC_RATE_A_M(480, ATH12K_HW_RATE_OFDM_48M), + ATH12K_MAC_RATE_A_M(540, ATH12K_HW_RATE_OFDM_54M), }; static const int @@ -732,11 +733,17 @@ u8 ath12k_mac_hw_rate_to_idx(const struct ieee80211_supported_band *sband, if (ath12k_mac_bitrate_is_cck(rate->bitrate) != cck) continue; - if (rate->hw_value == hw_rate) + /* To handle 802.11a PPDU type */ + if ((!cck) && (rate->hw_value == hw_rate) && + (rate->flags & IEEE80211_RATE_MANDATORY_A)) return i; + /* To handle 802.11b short PPDU type */ else if (rate->flags & IEEE80211_RATE_SHORT_PREAMBLE && rate->hw_value_short == hw_rate) return i; + /* To handle 802.11b long PPDU type */ + else if (rate->hw_value == hw_rate) + return i; } return 0; diff --git a/drivers/net/wireless/ath/ath12k/wifi7/dp_mon.c b/drivers/net/wireless/ath/ath12k/wifi7/dp_mon.c index c9cea597a92e..77f5d23be78d 100644 --- a/drivers/net/wireless/ath/ath12k/wifi7/dp_mon.c +++ b/drivers/net/wireless/ath/ath12k/wifi7/dp_mon.c @@ -405,6 +405,42 @@ ath12k_wifi7_dp_mon_hal_rx_parse_user_info(const struct hal_receive_user_info *r } } +static __always_inline u8 +ath12k_wifi7_hal_mon_map_legacy_rate_to_hw_rate(u8 rate) +{ + u8 ath12k_rate; + + /* Map hal_rx_legacy_rate to ath12k_hw_rate_cck */ + switch (rate) { + case HAL_RX_LEGACY_RATE_LP_1_MBPS: + ath12k_rate = ATH12K_HW_RATE_CCK_LP_1M; + break; + case HAL_RX_LEGACY_RATE_LP_2_MBPS: + ath12k_rate = ATH12K_HW_RATE_CCK_LP_2M; + break; + case HAL_RX_LEGACY_RATE_LP_5_5_MBPS: + ath12k_rate = ATH12K_HW_RATE_CCK_LP_5_5M; + break; + case HAL_RX_LEGACY_RATE_LP_11_MBPS: + ath12k_rate = ATH12K_HW_RATE_CCK_LP_11M; + break; + case HAL_RX_LEGACY_RATE_SP_2_MBPS: + ath12k_rate = ATH12K_HW_RATE_CCK_SP_2M; + break; + case HAL_RX_LEGACY_RATE_SP_5_5_MBPS: + ath12k_rate = ATH12K_HW_RATE_CCK_SP_5_5M; + break; + case HAL_RX_LEGACY_RATE_SP_11_MBPS: + ath12k_rate = ATH12K_HW_RATE_CCK_SP_11M; + break; + default: + ath12k_rate = rate; + break; + } + + return ath12k_rate; +} + static void ath12k_wifi7_dp_mon_parse_l_sig_b(const struct hal_rx_lsig_b_info *lsigb, struct hal_rx_mon_ppdu_info *ppdu_info) @@ -415,25 +451,32 @@ ath12k_wifi7_dp_mon_parse_l_sig_b(const struct hal_rx_lsig_b_info *lsigb, rate = u32_get_bits(info0, HAL_RX_LSIG_B_INFO_INFO0_RATE); switch (rate) { case 1: - rate = HAL_RX_LEGACY_RATE_1_MBPS; + rate = HAL_RX_LEGACY_RATE_LP_1_MBPS; break; case 2: - case 5: - rate = HAL_RX_LEGACY_RATE_2_MBPS; + rate = HAL_RX_LEGACY_RATE_LP_2_MBPS; break; case 3: - case 6: - rate = HAL_RX_LEGACY_RATE_5_5_MBPS; + rate = HAL_RX_LEGACY_RATE_LP_5_5_MBPS; break; case 4: + rate = HAL_RX_LEGACY_RATE_LP_11_MBPS; + break; + case 5: + rate = HAL_RX_LEGACY_RATE_SP_2_MBPS; + break; + case 6: + rate = HAL_RX_LEGACY_RATE_SP_5_5_MBPS; + break; case 7: - rate = HAL_RX_LEGACY_RATE_11_MBPS; + rate = HAL_RX_LEGACY_RATE_SP_11_MBPS; break; default: rate = HAL_RX_LEGACY_RATE_INVALID; + break; } - ppdu_info->rate = rate; + ppdu_info->rate = ath12k_wifi7_hal_mon_map_legacy_rate_to_hw_rate(rate); ppdu_info->cck_flag = 1; } @@ -447,31 +490,32 @@ ath12k_wifi7_dp_mon_parse_l_sig_a(const struct hal_rx_lsig_a_info *lsiga, rate = u32_get_bits(info0, HAL_RX_LSIG_A_INFO_INFO0_RATE); switch (rate) { case 8: - rate = HAL_RX_LEGACY_RATE_48_MBPS; + rate = HAL_RX_LEGACY_RATE_OFDM_48_MBPS; break; case 9: - rate = HAL_RX_LEGACY_RATE_24_MBPS; + rate = HAL_RX_LEGACY_RATE_OFDM_24_MBPS; break; case 10: - rate = HAL_RX_LEGACY_RATE_12_MBPS; + rate = HAL_RX_LEGACY_RATE_OFDM_12_MBPS; break; case 11: - rate = HAL_RX_LEGACY_RATE_6_MBPS; + rate = HAL_RX_LEGACY_RATE_OFDM_6_MBPS; break; case 12: - rate = HAL_RX_LEGACY_RATE_54_MBPS; + rate = HAL_RX_LEGACY_RATE_OFDM_54_MBPS; break; case 13: - rate = HAL_RX_LEGACY_RATE_36_MBPS; + rate = HAL_RX_LEGACY_RATE_OFDM_36_MBPS; break; case 14: - rate = HAL_RX_LEGACY_RATE_18_MBPS; + rate = HAL_RX_LEGACY_RATE_OFDM_18_MBPS; break; case 15: - rate = HAL_RX_LEGACY_RATE_9_MBPS; + rate = HAL_RX_LEGACY_RATE_OFDM_9_MBPS; break; default: - rate = HAL_RX_LEGACY_RATE_INVALID; + rate = HAL_RX_LEGACY_RATE_OFDM_INVALID; + break; } ppdu_info->rate = rate; From 8e0ab5b9adb7fec3149441621df1cf15325b7215 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 24 Feb 2026 13:46:17 +0900 Subject: [PATCH 0340/1561] wifi: ath6kl: wmi: Avoid -Wflex-array-member-not-at-end warning -Wflex-array-member-not-at-end was introduced in GCC-14, and we are getting ready to enable it, globally. Remove unused structures bss_bias_info and bss_bias, and member bss in struct roam_ctrl_cmd. After these changes, the size of struct roam_ctrl_cmd, along with its member's offsets remain the same, hence the memory layout doesn't change: Before changes: struct roam_ctrl_cmd { union { u8 bssid[6]; /* 0 6 */ u8 roam_mode; /* 0 1 */ struct bss_bias_info bss; /* 0 1 */ struct low_rssi_scan_params params; /* 0 8 */ } info; /* 0 8 */ u8 roam_ctrl; /* 8 1 */ /* size: 9, cachelines: 1, members: 2 */ /* last cacheline: 9 bytes */ } __attribute__((__packed__)); After changes: struct roam_ctrl_cmd { union { u8 bssid[6]; /* 0 6 */ u8 roam_mode; /* 0 1 */ struct low_rssi_scan_params params; /* 0 8 */ } info; /* 0 8 */ u8 roam_ctrl; /* 8 1 */ /* size: 9, cachelines: 1, members: 2 */ /* last cacheline: 9 bytes */ } __attribute__((__packed__)); With these changes fix the following warning: drivers/net/wireless/ath/ath6kl/wmi.h:1658:20: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] Signed-off-by: Gustavo A. R. Silva Link: https://patch.msgid.link/aZ0tGZnmtGckKJzY@kspp Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath6kl/wmi.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/net/wireless/ath/ath6kl/wmi.h b/drivers/net/wireless/ath/ath6kl/wmi.h index 3080d82e25cc..8fbece3fdad9 100644 --- a/drivers/net/wireless/ath/ath6kl/wmi.h +++ b/drivers/net/wireless/ath/ath6kl/wmi.h @@ -1630,16 +1630,6 @@ enum wmi_roam_mode { WMI_LOCK_BSS_MODE = 3, /* Lock to the current BSS */ }; -struct bss_bias { - u8 bssid[ETH_ALEN]; - s8 bias; -} __packed; - -struct bss_bias_info { - u8 num_bss; - struct bss_bias bss_bias[]; -} __packed; - struct low_rssi_scan_params { __le16 lrssi_scan_period; a_sle16 lrssi_scan_threshold; @@ -1652,7 +1642,6 @@ struct roam_ctrl_cmd { union { u8 bssid[ETH_ALEN]; /* WMI_FORCE_ROAM */ u8 roam_mode; /* WMI_SET_ROAM_MODE */ - struct bss_bias_info bss; /* WMI_SET_HOST_BIAS */ struct low_rssi_scan_params params; /* WMI_SET_LRSSI_SCAN_PARAMS */ } __packed info; From 01748996f649d3de1c6ce08c829662d56bfd281b Mon Sep 17 00:00:00 2001 From: David Wei Date: Thu, 5 Mar 2026 10:18:00 -0800 Subject: [PATCH 0341/1561] selftests/net: Add bpf skb forwarding program Add nk_forward.bpf.c, a BPF program that forwards skbs matching some IPv6 prefix received on eth0 ifindex to a specified netkit ifindex. This will be needed by netkit container tests. Signed-off-by: David Wei Signed-off-by: Daniel Borkmann Acked-by: Stanislav Fomichev Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260305181803.2912736-2-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/hw/nk_forward.bpf.c | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 tools/testing/selftests/drivers/net/hw/nk_forward.bpf.c diff --git a/tools/testing/selftests/drivers/net/hw/nk_forward.bpf.c b/tools/testing/selftests/drivers/net/hw/nk_forward.bpf.c new file mode 100644 index 000000000000..86ebfc1445b6 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/nk_forward.bpf.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include + +#define TC_ACT_OK 0 +#define ETH_P_IPV6 0x86DD + +#define ctx_ptr(field) ((void *)(long)(field)) + +#define v6_p64_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \ + a.s6_addr32[1] == b.s6_addr32[1]) + +volatile __u32 netkit_ifindex; +volatile __u8 ipv6_prefix[16]; + +SEC("tc/ingress") +int tc_redirect_peer(struct __sk_buff *skb) +{ + void *data_end = ctx_ptr(skb->data_end); + void *data = ctx_ptr(skb->data); + struct in6_addr *peer_addr; + struct ipv6hdr *ip6h; + struct ethhdr *eth; + + peer_addr = (struct in6_addr *)ipv6_prefix; + + if (skb->protocol != bpf_htons(ETH_P_IPV6)) + return TC_ACT_OK; + + eth = data; + if ((void *)(eth + 1) > data_end) + return TC_ACT_OK; + + ip6h = data + sizeof(struct ethhdr); + if ((void *)(ip6h + 1) > data_end) + return TC_ACT_OK; + + if (!v6_p64_equal(ip6h->daddr, (*peer_addr))) + return TC_ACT_OK; + + return bpf_redirect_peer(netkit_ifindex, 0); +} + +char __license[] SEC("license") = "GPL"; From 68ab8ba927758d77439785661f1f8b2a47318c5a Mon Sep 17 00:00:00 2001 From: David Wei Date: Thu, 5 Mar 2026 10:18:01 -0800 Subject: [PATCH 0342/1561] selftests/net: Export Netlink class via lib.py Making rtnl newlink calls requires constants defined in Netlink class in pyynl. Export it. Signed-off-by: David Wei Link: https://patch.msgid.link/20260305181803.2912736-3-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/lib/py/__init__.py | 4 ++-- tools/testing/selftests/net/lib/py/__init__.py | 4 ++-- tools/testing/selftests/net/lib/py/ynl.py | 10 ++++++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index 5872d114f142..e5ef5e949c12 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -19,7 +19,7 @@ try: # Import one by one to avoid pylint false positives from net.lib.py import NetNS, NetNSEnter, NetdevSimDev from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ - NlError, RtnlFamily, DevlinkFamily, PSPFamily + NlError, RtnlFamily, DevlinkFamily, PSPFamily, Netlink from net.lib.py import CmdExitFailure from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ fd_read_timeout, ip, rand_port, rand_ports, wait_port_listen, wait_file @@ -31,7 +31,7 @@ try: __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev", "EthtoolFamily", "NetdevFamily", "NetshaperFamily", - "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily", + "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily", "Netlink", "CmdExitFailure", "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool", "fd_read_timeout", "ip", "rand_port", "rand_ports", diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index a584e7f806a4..54e84282781c 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -16,7 +16,7 @@ from .utils import CmdExitFailure, fd_read_timeout, cmd, bkg, defer, \ bpftool, ip, ethtool, bpftrace, rand_port, rand_ports, wait_port_listen, \ wait_file, tool from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily -from .ynl import NetshaperFamily, DevlinkFamily, PSPFamily +from .ynl import NetshaperFamily, DevlinkFamily, PSPFamily, Netlink __all__ = ["KSRC", "KsftFailEx", "KsftSkipEx", "KsftXfailEx", "ksft_pr", "ksft_eq", @@ -31,4 +31,4 @@ __all__ = ["KSRC", "NetdevSim", "NetdevSimDev", "NetshaperFamily", "DevlinkFamily", "PSPFamily", "NlError", "YnlFamily", "EthtoolFamily", "NetdevFamily", "RtnlFamily", - "RtnlAddrFamily"] + "RtnlAddrFamily", "Netlink"] diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index 32c223e93b2c..b42986bf39e3 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -13,20 +13,26 @@ try: SPEC_PATH = KSFT_DIR / "net/lib/specs" sys.path.append(tools_full_path.as_posix()) - from net.lib.ynl.pyynl.lib import YnlFamily, NlError + from net.lib.ynl.pyynl.lib import YnlFamily, NlError, Netlink else: # Running in tree tools_full_path = KSRC / "tools" SPEC_PATH = KSRC / "Documentation/netlink/specs" sys.path.append(tools_full_path.as_posix()) - from net.ynl.pyynl.lib import YnlFamily, NlError + from net.ynl.pyynl.lib import YnlFamily, NlError, Netlink except ModuleNotFoundError as e: ksft_pr("Failed importing `ynl` library from kernel sources") ksft_pr(str(e)) ktap_result(True, comment="SKIP") sys.exit(4) +__all__ = [ + "NlError", "Netlink", "YnlFamily", "SPEC_PATH", + "EthtoolFamily", "RtnlFamily", "RtnlAddrFamily", + "NetdevFamily", "NetshaperFamily", "DevlinkFamily", "PSPFamily", +] + # # Wrapper classes, loading the right specs # Set schema='' to avoid jsonschema validation, it's slow From 3f74d5bb807ee500cb68f167df7dc4e4252a42ec Mon Sep 17 00:00:00 2001 From: David Wei Date: Thu, 5 Mar 2026 10:18:02 -0800 Subject: [PATCH 0343/1561] selftests/net: Add env for container based tests Add an env NetDrvContEnv for container based selftests. This automates the setup of a netns, netkit pair with one inside the netns, and a BPF program that forwards skbs from the NETIF host inside the container. Currently only netkit is used, but other virtual netdevs e.g. veth can be used too. Expect netkit container datapath selftests to have a publicly routable IP prefix to assign to netkit in a container, such that packets will land on eth0. The BPF skb forward program will then forward such packets from the host netns to the container netns. Signed-off-by: David Wei Signed-off-by: Daniel Borkmann Link: https://patch.msgid.link/20260305181803.2912736-4-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- .../testing/selftests/drivers/net/README.rst | 38 ++++ .../drivers/net/hw/lib/py/__init__.py | 7 +- .../selftests/drivers/net/lib/py/__init__.py | 7 +- .../selftests/drivers/net/lib/py/env.py | 207 ++++++++++++++++++ 4 files changed, 253 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/drivers/net/README.rst b/tools/testing/selftests/drivers/net/README.rst index dc70a69ee885..c94992acf10b 100644 --- a/tools/testing/selftests/drivers/net/README.rst +++ b/tools/testing/selftests/drivers/net/README.rst @@ -66,6 +66,44 @@ LOCAL_V4, LOCAL_V6, REMOTE_V4, REMOTE_V6 Local and remote endpoint IP addresses. +LOCAL_PREFIX_V6 +~~~~~~~~~~~~~~~ + +Local IP prefix/subnet which can be used to allocate extra IP addresses (for +network name spaces behind macvlan, veth, netkit devices). DUT must be +reachable using these addresses from the endpoint. + +LOCAL_PREFIX_V6 must NOT match LOCAL_V6. + +Example: + NETIF = "eth0" + LOCAL_V6 = "2001:db8:1::1" + REMOTE_V6 = "2001:db8:1::2" + LOCAL_PREFIX_V6 = "2001:db8:2::0/64" + + +-----------------------------+ +------------------------------+ + dst | INIT NS | | TEST NS | + 2001: | +---------------+ | | | + db8:2::2| | NETIF | | bpf | | + +---|>| 2001:db8:1::1 | |redirect| +-------------------------+ | + | | | |-----------|--------|>| Netkit | | + | | +---------------+ | _peer | | nk_guest | | + | | +-------------+ Netkit pair | | | fe80::2/64 | | + | | | Netkit |.............|........|>| 2001:db8:2::2/64 | | + | | | nk_host | | | +-------------------------+ | + | | | fe80::1/64 | | | | + | | +-------------+ | | route: | + | | | | default | + | | route: | | via fe80::1 dev nk_guest | + | | 2001:db8:2::2/128 | +------------------------------+ + | | via fe80::2 dev nk_host | + | +-----------------------------+ + | + | +---------------+ + | | REMOTE | + +---| 2001:db8:1::2 | + +---------------+ + REMOTE_TYPE ~~~~~~~~~~~ diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py index 1971577d47e9..b8d9ae282390 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -3,6 +3,7 @@ """ Driver test environment (hardware-only tests). NetDrvEnv and NetDrvEpEnv are the main environment classes. +NetDrvContEnv extends NetDrvEpEnv with netkit container support. Former is for local host only tests, latter creates / connects to a remote endpoint. See NIPA wiki for more information about running and writing driver tests. @@ -30,7 +31,7 @@ try: from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none from drivers.net.lib.py import GenerateTraffic, Remote, Iperf3Runner - from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv + from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv, NetDrvContEnv __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev", "EthtoolFamily", "NetdevFamily", "NetshaperFamily", @@ -45,8 +46,8 @@ try: "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt", "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt", "ksft_not_none", "ksft_not_none", - "NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote", - "Iperf3Runner"] + "NetDrvEnv", "NetDrvEpEnv", "NetDrvContEnv", "GenerateTraffic", + "Remote", "Iperf3Runner"] except ModuleNotFoundError as e: print("Failed importing `net` library from kernel sources") print(str(e)) diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index e5ef5e949c12..374d4f08dd05 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -3,6 +3,7 @@ """ Driver test environment. NetDrvEnv and NetDrvEpEnv are the main environment classes. +NetDrvContEnv extends NetDrvEpEnv with netkit container support. Former is for local host only tests, latter creates / connects to a remote endpoint. See NIPA wiki for more information about running and writing driver tests. @@ -43,12 +44,12 @@ try: "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt", "ksft_not_none", "ksft_not_none"] - from .env import NetDrvEnv, NetDrvEpEnv + from .env import NetDrvEnv, NetDrvEpEnv, NetDrvContEnv from .load import GenerateTraffic, Iperf3Runner from .remote import Remote - __all__ += ["NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote", - "Iperf3Runner"] + __all__ += ["NetDrvEnv", "NetDrvEpEnv", "NetDrvContEnv", "GenerateTraffic", + "Remote", "Iperf3Runner"] except ModuleNotFoundError as e: print("Failed importing `net` library from kernel sources") print(str(e)) diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 41cc248ac848..ccff345fe1c1 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -1,13 +1,16 @@ # SPDX-License-Identifier: GPL-2.0 +import ipaddress import os import time +import json from pathlib import Path from lib.py import KsftSkipEx, KsftXfailEx from lib.py import ksft_setup, wait_file from lib.py import cmd, ethtool, ip, CmdExitFailure from lib.py import NetNS, NetdevSimDev from .remote import Remote +from . import bpftool, RtnlFamily, Netlink class NetDrvEnvBase: @@ -289,3 +292,207 @@ class NetDrvEpEnv(NetDrvEnvBase): data.get('stats-block-usecs', 0) / 1000 / 1000 time.sleep(self._stats_settle_time) + + +class NetDrvContEnv(NetDrvEpEnv): + """ + Class for an environment with a netkit pair setup for forwarding traffic + between the physical interface and a network namespace. + NETIF = "eth0" + LOCAL_V6 = "2001:db8:1::1" + REMOTE_V6 = "2001:db8:1::2" + LOCAL_PREFIX_V6 = "2001:db8:2::0/64" + + +-----------------------------+ +------------------------------+ + dst | INIT NS | | TEST NS | + 2001: | +---------------+ | | | + db8:2::2| | NETIF | | bpf | | + +---|>| 2001:db8:1::1 | |redirect| +-------------------------+ | + | | | |-----------|--------|>| Netkit | | + | | +---------------+ | _peer | | nk_guest | | + | | +-------------+ Netkit pair | | | fe80::2/64 | | + | | | Netkit |.............|........|>| 2001:db8:2::2/64 | | + | | | nk_host | | | +-------------------------+ | + | | | fe80::1/64 | | | | + | | +-------------+ | | route: | + | | | | default | + | | route: | | via fe80::1 dev nk_guest | + | | 2001:db8:2::2/128 | +------------------------------+ + | | via fe80::2 dev nk_host | + | +-----------------------------+ + | + | +---------------+ + | | REMOTE | + +---| 2001:db8:1::2 | + +---------------+ + """ + + def __init__(self, src_path, rxqueues=1, **kwargs): + self.netns = None + self._nk_host_ifname = None + self._nk_guest_ifname = None + self._tc_clsact_added = False + self._tc_attached = False + self._bpf_prog_pref = None + self._bpf_prog_id = None + self._init_ns_attached = False + self._old_fwd = None + self._old_accept_ra = None + + super().__init__(src_path, **kwargs) + + self.require_ipver("6") + local_prefix = self.env.get("LOCAL_PREFIX_V6") + if not local_prefix: + raise KsftSkipEx("LOCAL_PREFIX_V6 required") + + net = ipaddress.IPv6Network(local_prefix, strict=False) + self.ipv6_prefix = str(net.network_address) + self.nk_host_ipv6 = f"{self.ipv6_prefix}2:1" + self.nk_guest_ipv6 = f"{self.ipv6_prefix}2:2" + + local_v6 = ipaddress.IPv6Address(self.addr_v["6"]) + if local_v6 in net: + raise KsftSkipEx("LOCAL_V6 must not fall within LOCAL_PREFIX_V6") + + rtnl = RtnlFamily() + rtnl.newlink( + { + "linkinfo": { + "kind": "netkit", + "data": { + "mode": "l2", + "policy": "forward", + "peer-policy": "forward", + }, + }, + "num-rx-queues": rxqueues, + }, + flags=[Netlink.NLM_F_CREATE, Netlink.NLM_F_EXCL], + ) + + all_links = ip("-d link show", json=True) + netkit_links = [link for link in all_links + if link.get('linkinfo', {}).get('info_kind') == 'netkit' + and 'UP' not in link.get('flags', [])] + + if len(netkit_links) != 2: + raise KsftSkipEx("Failed to create netkit pair") + + netkit_links.sort(key=lambda x: x['ifindex']) + self._nk_host_ifname = netkit_links[1]['ifname'] + self._nk_guest_ifname = netkit_links[0]['ifname'] + self.nk_host_ifindex = netkit_links[1]['ifindex'] + self.nk_guest_ifindex = netkit_links[0]['ifindex'] + + self._setup_ns() + self._attach_bpf() + + def __del__(self): + if self._tc_attached: + cmd(f"tc filter del dev {self.ifname} ingress pref {self._bpf_prog_pref}") + self._tc_attached = False + + if self._tc_clsact_added: + cmd(f"tc qdisc del dev {self.ifname} clsact") + self._tc_clsact_added = False + + if self._nk_host_ifname: + cmd(f"ip link del dev {self._nk_host_ifname}") + self._nk_host_ifname = None + self._nk_guest_ifname = None + + if self._init_ns_attached: + cmd("ip netns del init", fail=False) + self._init_ns_attached = False + + if self.netns: + del self.netns + self.netns = None + + if self._old_fwd is not None: + with open("/proc/sys/net/ipv6/conf/all/forwarding", "w", + encoding="utf-8") as f: + f.write(self._old_fwd) + self._old_fwd = None + if self._old_accept_ra is not None: + with open("/proc/sys/net/ipv6/conf/all/accept_ra", "w", + encoding="utf-8") as f: + f.write(self._old_accept_ra) + self._old_accept_ra = None + + super().__del__() + + def _setup_ns(self): + fwd_path = "/proc/sys/net/ipv6/conf/all/forwarding" + ra_path = "/proc/sys/net/ipv6/conf/all/accept_ra" + with open(fwd_path, encoding="utf-8") as f: + self._old_fwd = f.read().strip() + with open(ra_path, encoding="utf-8") as f: + self._old_accept_ra = f.read().strip() + with open(fwd_path, "w", encoding="utf-8") as f: + f.write("1") + with open(ra_path, "w", encoding="utf-8") as f: + f.write("2") + + self.netns = NetNS() + cmd("ip netns attach init 1") + self._init_ns_attached = True + ip("netns set init 0", ns=self.netns) + ip(f"link set dev {self._nk_guest_ifname} netns {self.netns.name}") + ip(f"link set dev {self._nk_host_ifname} up") + ip(f"-6 addr add fe80::1/64 dev {self._nk_host_ifname} nodad") + ip(f"-6 route add {self.nk_guest_ipv6}/128 via fe80::2 dev {self._nk_host_ifname}") + + ip("link set lo up", ns=self.netns) + ip(f"link set dev {self._nk_guest_ifname} up", ns=self.netns) + ip(f"-6 addr add fe80::2/64 dev {self._nk_guest_ifname}", ns=self.netns) + ip(f"-6 addr add {self.nk_guest_ipv6}/64 dev {self._nk_guest_ifname} nodad", ns=self.netns) + ip(f"-6 route add default via fe80::1 dev {self._nk_guest_ifname}", ns=self.netns) + + def _tc_ensure_clsact(self): + qdisc = json.loads(cmd(f"tc -j qdisc show dev {self.ifname}").stdout) + for q in qdisc: + if q['kind'] == 'clsact': + return + cmd(f"tc qdisc add dev {self.ifname} clsact") + self._tc_clsact_added = True + + def _get_bpf_prog_ids(self): + filters = json.loads(cmd(f"tc -j filter show dev {self.ifname} ingress").stdout) + for bpf in filters: + if 'options' not in bpf: + continue + if bpf['options']['bpf_name'].startswith('nk_forward.bpf'): + return (bpf['pref'], bpf['options']['prog']['id']) + raise Exception("Failed to get BPF prog ID") + + def _attach_bpf(self): + bpf_obj = self.test_dir / "nk_forward.bpf.o" + if not bpf_obj.exists(): + raise KsftSkipEx("BPF prog not found") + + self._tc_ensure_clsact() + cmd(f"tc filter add dev {self.ifname} ingress bpf obj {bpf_obj}" + " sec tc/ingress direct-action") + self._tc_attached = True + + (self._bpf_prog_pref, self._bpf_prog_id) = self._get_bpf_prog_ids() + prog_info = bpftool(f"prog show id {self._bpf_prog_id}", json=True) + map_ids = prog_info.get("map_ids", []) + + bss_map_id = None + for map_id in map_ids: + map_info = bpftool(f"map show id {map_id}", json=True) + if map_info.get("name").endswith("bss"): + bss_map_id = map_id + + if bss_map_id is None: + raise Exception("Failed to find .bss map") + + ipv6_addr = ipaddress.IPv6Address(self.ipv6_prefix) + ipv6_bytes = ipv6_addr.packed + ifindex_bytes = self.nk_host_ifindex.to_bytes(4, byteorder='little') + value = ipv6_bytes + ifindex_bytes + value_hex = ' '.join(f'{b:02x}' for b in value) + bpftool(f"map update id {bss_map_id} key hex 00 00 00 00 value hex {value_hex}") From 37d24994a7a0ad777c80e2b90c3a4a528753d70d Mon Sep 17 00:00:00 2001 From: David Wei Date: Thu, 5 Mar 2026 10:18:03 -0800 Subject: [PATCH 0344/1561] selftests/net: Add netkit container ping test Add a basic ping test using NetDrvContEnv that sets up a netkit pair, with one end in a netns. Use LOCAL_PREFIX_V6 and nk_forward BPF program to ping from a remote host to the netkit in netns. Signed-off-by: David Wei Signed-off-by: Daniel Borkmann Link: https://patch.msgid.link/20260305181803.2912736-5-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- .../testing/selftests/drivers/net/hw/Makefile | 1 + tools/testing/selftests/drivers/net/hw/config | 3 ++ .../selftests/drivers/net/hw/nk_netns.py | 29 +++++++++++++++++++ 3 files changed, 33 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/hw/nk_netns.py diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index a64140333a46..91df028abfc0 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -32,6 +32,7 @@ TEST_PROGS = \ irq.py \ loopback.sh \ nic_timestamp.py \ + nk_netns.py \ pp_alloc_fail.py \ rss_api.py \ rss_ctx.py \ diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config index 2307aa001be1..235c0a1cd21e 100644 --- a/tools/testing/selftests/drivers/net/hw/config +++ b/tools/testing/selftests/drivers/net/hw/config @@ -1,3 +1,4 @@ +CONFIG_BPF_SYSCALL=y CONFIG_FAIL_FUNCTION=y CONFIG_FAULT_INJECTION=y CONFIG_FAULT_INJECTION_DEBUG_FS=y @@ -5,7 +6,9 @@ CONFIG_FUNCTION_ERROR_INJECTION=y CONFIG_IO_URING=y CONFIG_IPV6=y CONFIG_IPV6_GRE=y +CONFIG_NET_CLS_BPF=y CONFIG_NET_IPGRE=y CONFIG_NET_IPGRE_DEMUX=y +CONFIG_NETKIT=y CONFIG_UDMABUF=y CONFIG_VXLAN=y diff --git a/tools/testing/selftests/drivers/net/hw/nk_netns.py b/tools/testing/selftests/drivers/net/hw/nk_netns.py new file mode 100755 index 000000000000..8b7ab75aa27f --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/nk_netns.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Test exercising NetDrvContEnv() itself, a NetDrvContEnv() selftest. +""" + +from lib.py import ksft_run, ksft_exit +from lib.py import NetDrvContEnv +from lib.py import cmd + + +def test_ping(cfg) -> None: + """ Run ping between the container and the remote system. """ + cfg.require_ipver("6") + + cmd(f"ping -c 1 -W5 {cfg.nk_guest_ipv6}", host=cfg.remote) + cmd(f"ping -c 1 -W5 {cfg.remote_addr_v['6']}", ns=cfg.netns) + + +def main() -> None: + """ Ksft boiler plate main """ + with NetDrvContEnv(__file__) as cfg: + ksft_run([test_ping], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() From 8e235bc43326f47adfd77d926c7d6ad39077569a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 4 Mar 2026 07:16:46 -0800 Subject: [PATCH 0345/1561] docs: netdev: refine netdevsim testing guidance The library to create tests for both NIC HW and netdevsim has existed for almost a year. netdevsim-only tests we get increasingly feel like a waste, we should try to write tests that work both on netdevsim and real HW. Refine the guidance accordingly. Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260304151647.2770466-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/process/maintainer-netdev.rst | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst index 6bce4507d5d3..3aa13bc2405d 100644 --- a/Documentation/process/maintainer-netdev.rst +++ b/Documentation/process/maintainer-netdev.rst @@ -479,8 +479,14 @@ netdevsim ``netdevsim`` is a test driver which can be used to exercise driver configuration APIs without requiring capable hardware. -Mock-ups and tests based on ``netdevsim`` are strongly encouraged when -adding new APIs, but ``netdevsim`` in itself is **not** considered +Mock-ups and tests based on ``netdevsim`` are encouraged when +adding new APIs with complex logic in the stack. The tests should +be written so that they can run both against ``netdevsim`` and a real +device (see ``tools/testing/selftests/drivers/net/README.rst``). +``netdevsim``-only tests should focus on testing corner cases +and failure paths in the core which are hard to exercise with a real driver. + +``netdevsim`` in itself is **not** considered a use case/user. You must also implement the new APIs in a real driver. We give no guarantees that ``netdevsim`` won't change in the future From 1a9940317c1b46b7641af6b4b14c9d2509ac88b2 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 5 Mar 2026 18:42:04 +0100 Subject: [PATCH 0346/1561] Revert "net: phy: improve mdiobus_stats_acct" This reverts commit 1afccc5a201ec7c9023370958bae1312369b64da. As reported by Marek the change causes a warning on non-PREEMPT_RT 32 bit systems. Reported-by: Marek Szyprowski Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/c3a1aba9-3fae-4c4b-bcb1-fb620fb7a309@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mdio_bus.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index b32a369cd25f..48c0447e6a8f 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -358,17 +358,22 @@ EXPORT_SYMBOL(of_mdio_find_bus); static void mdiobus_stats_acct(struct mdio_bus_stats *stats, bool op, int ret) { + preempt_disable(); u64_stats_update_begin(&stats->syncp); u64_stats_inc(&stats->transfers); - if (ret < 0) + if (ret < 0) { u64_stats_inc(&stats->errors); - else if (op) + goto out; + } + + if (op) u64_stats_inc(&stats->reads); else u64_stats_inc(&stats->writes); - +out: u64_stats_update_end(&stats->syncp); + preempt_enable(); } /** From 507ccb668f2db1377defdc54068eab3398bd5974 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 5 Mar 2026 10:04:45 +0200 Subject: [PATCH 0347/1561] selftests: drv-net: iou-zcrx: wait for memory cleanup of probe run The large chunks test does a probe run of iou-zcrx before it runs the actual test. After the probe run finishes, the context will still exist until the deferred io_uring teardown. When running iou-zcrx the second time, io_uring_register_ifq() can return -EEXIST due to the existence of the old context. The fix is simple: wait for the context teardown using the new mp_clear_wait() utility before running the second instance of iou-zcrx. Signed-off-by: Dragos Tatulea Link: https://patch.msgid.link/20260305080446.897628-2-dtatulea@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/iou-zcrx.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py index 66dd496ec5cf..e81724cb5542 100755 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -151,6 +151,7 @@ def test_zcrx_large_chunks(cfg) -> None: if probe.ret == SKIP_CODE: raise KsftSkipEx(probe.stdout.strip()) + mp_clear_wait(cfg) with bkg(rx_cmd, exit_wait=True): wait_port_listen(cfg.port, proto="tcp") cmd(tx_cmd, host=cfg.remote) From 4c7e0e081889be876e2d9ee23332c75b6207d5eb Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 5 Mar 2026 10:42:26 +0000 Subject: [PATCH 0348/1561] net: stmmac: mdio: convert MDC clock divisor selection to tables Convert the MDC clock divisor selection to tabular format. Note that there is a change for 300MHz, but this is not a problem, as the MDC clock remains within the useable ranges, which are: STMMAC_CSR_500_800M /324 1.54 - 2.47MHz STMMAC_CSR_300_500M /204 1.47 - 2.45MHz STMMAC_CSR_250_300M /124 2.02 - 2.42MHz STMMAC_CSR_150_250M /102 1.47 - 2.45MHz STMMAC_CSR_100_150M /62 1.61 - 2.42MHz STMMAC_CSR_60_100M /42 1.43 - 2.38MHz STMMAC_CSR_35_60M /26 1.35 - 2.31MHz STMMAC_CSR_20_35M /16 1.25 - 2.19MHz Thus, with the change of divisor for exactly 300MHz, MDC temporarily changes from 2.42MHz to 1.47MHz for the sake of consistency. The databook does not specify whether the frequency limits for the CSR divider are inclusive or exclusive. Signed-off-by: Russell King (Oracle) Tested-by: Maxime Chevallier Link: https://patch.msgid.link/E1vy69y-0000000Btwd-3oq7@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_mdio.c | 98 ++++++++++++------- 1 file changed, 62 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 485a0d790baa..c4123d2260bd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -473,6 +473,52 @@ void stmmac_pcs_clean(struct net_device *ndev) priv->hw->xpcs = NULL; } +struct stmmac_clk_rate { + unsigned long rate; + u8 cr; +}; + +/* The standard clk_csr_i to GMII_Address CR field mapping. The rate provided + * in this table is the exclusive maximum frequency for the divisor. The + * comments for each entry give the divisor and the resulting range of MDC + * clock frequencies. + */ +static const struct stmmac_clk_rate stmmac_std_csr_to_mdc[] = { + { CSR_F_35M, STMMAC_CSR_20_35M }, + { CSR_F_60M, STMMAC_CSR_35_60M }, + { CSR_F_100M, STMMAC_CSR_60_100M }, + { CSR_F_150M, STMMAC_CSR_100_150M }, + { CSR_F_250M, STMMAC_CSR_150_250M }, + { CSR_F_300M, STMMAC_CSR_250_300M }, + { CSR_F_500M, STMMAC_CSR_300_500M }, + { CSR_F_800M, STMMAC_CSR_500_800M }, + { }, +}; + +/* The sun8i clk_csr_i to GMII_Address CR field mapping uses rate as the + * exclusive minimum frequency for the divisor. Note that the last entry + * is valid and also acts as the sentinel. + */ +static const struct stmmac_clk_rate stmmac_sun8i_csr_to_mdc[] = { + { 160000000, 3 }, + { 80000000, 2 }, + { 40000000, 1 }, + { 0, 0 }, +}; + +/* The xgmac clk_csr_i to GMII_Address CR field mapping similarly uses rate + * as the exclusive minimum frequency for the divisor, and again the last + * entry is valid and also the sentinel. + */ +static const struct stmmac_clk_rate stmmac_xgmac_csr_to_mdc[] = { + { 400000000, 5 }, + { 350000000, 4 }, + { 300000000, 3 }, + { 250000000, 2 }, + { 150000000, 1 }, + { 0, 0 }, +}; + /** * stmmac_clk_csr_set - dynamically set the MDC clock * @priv: driver private structure @@ -490,6 +536,7 @@ static u32 stmmac_clk_csr_set(struct stmmac_priv *priv) { unsigned long clk_rate; u32 value = ~0; + int i; clk_rate = clk_get_rate(priv->plat->stmmac_clk); @@ -500,47 +547,26 @@ static u32 stmmac_clk_csr_set(struct stmmac_priv *priv) * the frequency of clk_csr_i. So we do not change the default * divider. */ - if (clk_rate < CSR_F_35M) - value = STMMAC_CSR_20_35M; - else if (clk_rate < CSR_F_60M) - value = STMMAC_CSR_35_60M; - else if (clk_rate < CSR_F_100M) - value = STMMAC_CSR_60_100M; - else if (clk_rate < CSR_F_150M) - value = STMMAC_CSR_100_150M; - else if (clk_rate < CSR_F_250M) - value = STMMAC_CSR_150_250M; - else if (clk_rate <= CSR_F_300M) - value = STMMAC_CSR_250_300M; - else if (clk_rate < CSR_F_500M) - value = STMMAC_CSR_300_500M; - else if (clk_rate < CSR_F_800M) - value = STMMAC_CSR_500_800M; + for (i = 0; stmmac_std_csr_to_mdc[i].rate; i++) + if (clk_rate < stmmac_std_csr_to_mdc[i].rate) { + value = stmmac_std_csr_to_mdc[i].cr; + break; + } if (priv->plat->flags & STMMAC_FLAG_HAS_SUN8I) { - if (clk_rate > 160000000) - value = 0x03; - else if (clk_rate > 80000000) - value = 0x02; - else if (clk_rate > 40000000) - value = 0x01; - else - value = 0; + /* Note the different test - this is intentional. */ + for (i = 0; stmmac_sun8i_csr_to_mdc[i].rate; i++) + if (clk_rate > stmmac_sun8i_csr_to_mdc[i].rate) + break; + value = stmmac_sun8i_csr_to_mdc[i].cr; } if (priv->plat->core_type == DWMAC_CORE_XGMAC) { - if (clk_rate > 400000000) - value = 0x5; - else if (clk_rate > 350000000) - value = 0x4; - else if (clk_rate > 300000000) - value = 0x3; - else if (clk_rate > 250000000) - value = 0x2; - else if (clk_rate > 150000000) - value = 0x1; - else - value = 0x0; + /* Note the different test - this is intentional. */ + for (i = 0; stmmac_xgmac_csr_to_mdc[i].rate; i++) + if (clk_rate > stmmac_xgmac_csr_to_mdc[i].rate) + break; + value = stmmac_xgmac_csr_to_mdc[i].cr; } return value; From b6687ef976036d3a3e672384f47de5742b2574c7 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 5 Mar 2026 10:42:32 +0000 Subject: [PATCH 0349/1561] net: stmmac: mdio: use same test for MDC clock divisor lookups Use the same frequency test for all clk_csr value lookups (clock rate > table rate). This has the side effect that the standard rate table results in the divider being used for the maximum frequency for the divider rather than the next higher divider. This still allows MDC to meet the IEE 802.3 specification, but at a rate closer to 2.5MHz for these frequencies. Signed-off-by: Russell King (Oracle) Tested-by: Maxime Chevallier Link: https://patch.msgid.link/E1vy6A4-0000000Btwj-0ATB@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/common.h | 1 + .../net/ethernet/stmicro/stmmac/stmmac_mdio.c | 27 +++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 42a48f655849..e4ce1167ebab 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -257,6 +257,7 @@ struct stmmac_safety_stats { (sizeof(struct stmmac_safety_stats) / sizeof(unsigned long)) /* CSR Frequency Access Defines*/ +#define CSR_F_20M 20000000 #define CSR_F_35M 35000000 #define CSR_F_60M 60000000 #define CSR_F_100M 100000000 diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index c4123d2260bd..6292911fb54b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -484,15 +484,16 @@ struct stmmac_clk_rate { * clock frequencies. */ static const struct stmmac_clk_rate stmmac_std_csr_to_mdc[] = { - { CSR_F_35M, STMMAC_CSR_20_35M }, - { CSR_F_60M, STMMAC_CSR_35_60M }, - { CSR_F_100M, STMMAC_CSR_60_100M }, - { CSR_F_150M, STMMAC_CSR_100_150M }, - { CSR_F_250M, STMMAC_CSR_150_250M }, - { CSR_F_300M, STMMAC_CSR_250_300M }, - { CSR_F_500M, STMMAC_CSR_300_500M }, - { CSR_F_800M, STMMAC_CSR_500_800M }, - { }, + { CSR_F_800M, ~0 }, + { CSR_F_500M, STMMAC_CSR_500_800M }, + { CSR_F_300M, STMMAC_CSR_300_500M }, + { CSR_F_250M, STMMAC_CSR_250_300M }, + { CSR_F_150M, STMMAC_CSR_150_250M }, + { CSR_F_100M, STMMAC_CSR_100_150M }, + { CSR_F_60M, STMMAC_CSR_60_100M }, + { CSR_F_35M, STMMAC_CSR_35_60M }, + { CSR_F_20M, STMMAC_CSR_20_35M }, + { 0, ~0 }, }; /* The sun8i clk_csr_i to GMII_Address CR field mapping uses rate as the @@ -548,13 +549,12 @@ static u32 stmmac_clk_csr_set(struct stmmac_priv *priv) * divider. */ for (i = 0; stmmac_std_csr_to_mdc[i].rate; i++) - if (clk_rate < stmmac_std_csr_to_mdc[i].rate) { - value = stmmac_std_csr_to_mdc[i].cr; + if (clk_rate > stmmac_std_csr_to_mdc[i].rate) break; - } + if (stmmac_std_csr_to_mdc[i].cr != (u8)~0) + value = stmmac_std_csr_to_mdc[i].cr; if (priv->plat->flags & STMMAC_FLAG_HAS_SUN8I) { - /* Note the different test - this is intentional. */ for (i = 0; stmmac_sun8i_csr_to_mdc[i].rate; i++) if (clk_rate > stmmac_sun8i_csr_to_mdc[i].rate) break; @@ -562,7 +562,6 @@ static u32 stmmac_clk_csr_set(struct stmmac_priv *priv) } if (priv->plat->core_type == DWMAC_CORE_XGMAC) { - /* Note the different test - this is intentional. */ for (i = 0; stmmac_xgmac_csr_to_mdc[i].rate; i++) if (clk_rate > stmmac_xgmac_csr_to_mdc[i].rate) break; From 506f78f43c58094009b2d62ed0f8070bb4635954 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 5 Mar 2026 10:42:37 +0000 Subject: [PATCH 0350/1561] net: stmmac: mdio: simplify MDC clock divisor lookup As each lookup now iterates over each table in the same way, simplfy the code to select the table, and then walk that table. Signed-off-by: Russell King (Oracle) Tested-by: Maxime Chevallier Link: https://patch.msgid.link/E1vy6A9-0000000Btwp-0lxY@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_mdio.c | 29 +++++++------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 6292911fb54b..c9f0b8b601d2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -535,6 +535,7 @@ static const struct stmmac_clk_rate stmmac_xgmac_csr_to_mdc[] = { */ static u32 stmmac_clk_csr_set(struct stmmac_priv *priv) { + const struct stmmac_clk_rate *rates; unsigned long clk_rate; u32 value = ~0; int i; @@ -548,25 +549,17 @@ static u32 stmmac_clk_csr_set(struct stmmac_priv *priv) * the frequency of clk_csr_i. So we do not change the default * divider. */ - for (i = 0; stmmac_std_csr_to_mdc[i].rate; i++) - if (clk_rate > stmmac_std_csr_to_mdc[i].rate) + rates = stmmac_std_csr_to_mdc; + if (priv->plat->flags & STMMAC_FLAG_HAS_SUN8I) + rates = stmmac_sun8i_csr_to_mdc; + if (priv->plat->core_type == DWMAC_CORE_XGMAC) + rates = stmmac_xgmac_csr_to_mdc; + + for (i = 0; rates[i].rate; i++) + if (clk_rate > rates[i].rate) break; - if (stmmac_std_csr_to_mdc[i].cr != (u8)~0) - value = stmmac_std_csr_to_mdc[i].cr; - - if (priv->plat->flags & STMMAC_FLAG_HAS_SUN8I) { - for (i = 0; stmmac_sun8i_csr_to_mdc[i].rate; i++) - if (clk_rate > stmmac_sun8i_csr_to_mdc[i].rate) - break; - value = stmmac_sun8i_csr_to_mdc[i].cr; - } - - if (priv->plat->core_type == DWMAC_CORE_XGMAC) { - for (i = 0; stmmac_xgmac_csr_to_mdc[i].rate; i++) - if (clk_rate > stmmac_xgmac_csr_to_mdc[i].rate) - break; - value = stmmac_xgmac_csr_to_mdc[i].cr; - } + if (rates[i].cr != (u8)~0) + value = rates[i].cr; return value; } From 58bd0039002ba58019138608de4f2a3041ae0026 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 5 Mar 2026 10:42:42 +0000 Subject: [PATCH 0351/1561] net: stmmac: mdio: convert field prep to use field_prep() Convert the MDIO field preparation to use field_prep(), which removes the need to store separate mask and shifts. Also convert the clk_csr value using __ffs() to do the shift as we need to detect overflows for this. Signed-off-by: Russell King (Oracle) Reviewed-by: Maxime Chevallier Tested-by: Maxime Chevallier Link: https://patch.msgid.link/E1vy6AE-0000000Btwv-1LM4@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/common.h | 3 --- drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c | 3 --- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 3 --- drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 3 --- drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c | 3 --- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 3 --- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 6 ------ drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 6 +++--- 8 files changed, 3 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index e4ce1167ebab..978f90065681 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -608,11 +608,8 @@ struct mac_link { struct mii_regs { unsigned int addr; /* MII Address */ unsigned int data; /* MII Data */ - unsigned int addr_shift; /* MII address shift */ - unsigned int reg_shift; /* MII reg shift */ unsigned int addr_mask; /* MII address mask */ unsigned int reg_mask; /* MII reg mask */ - unsigned int clk_csr_shift; unsigned int clk_csr_mask; }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index 9c51c96223ad..6dfccc969d0e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -367,11 +367,8 @@ static int loongson_dwmac_setup(void *apriv, struct mac_device_info *mac) mac->link.speed_mask = GMAC_CONTROL_PS | GMAC_CONTROL_FES; mac->mii.addr = GMAC_MII_ADDR; mac->mii.data = GMAC_MII_DATA; - mac->mii.addr_shift = 11; mac->mii.addr_mask = 0x0000F800; - mac->mii.reg_shift = 6; mac->mii.reg_mask = 0x000007C0; - mac->mii.clk_csr_shift = 2; mac->mii.clk_csr_mask = GENMASK(5, 2); return 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index c01b86fd64da..6b18072112db 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -1063,11 +1063,8 @@ static int sun8i_dwmac_setup(void *ppriv, struct mac_device_info *mac) mac->link.duplex = EMAC_DUPLEX_FULL; mac->mii.addr = EMAC_MDIO_CMD; mac->mii.data = EMAC_MDIO_DATA; - mac->mii.reg_shift = 4; mac->mii.reg_mask = GENMASK(8, 4); - mac->mii.addr_shift = 12; mac->mii.addr_mask = GENMASK(16, 12); - mac->mii.clk_csr_shift = 20; mac->mii.clk_csr_mask = GENMASK(22, 20); mac->unicast_filter_entries = 8; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index af566636fad9..10acca806f4f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -496,11 +496,8 @@ int dwmac1000_setup(struct stmmac_priv *priv) mac->link.speed_mask = GMAC_CONTROL_PS | GMAC_CONTROL_FES; mac->mii.addr = GMAC_MII_ADDR; mac->mii.data = GMAC_MII_DATA; - mac->mii.addr_shift = 11; mac->mii.addr_mask = 0x0000F800; - mac->mii.reg_shift = 6; mac->mii.reg_mask = 0x000007C0; - mac->mii.clk_csr_shift = 2; mac->mii.clk_csr_mask = GENMASK(5, 2); return 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c index db4fbe64a38a..911753b9889e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c @@ -184,11 +184,8 @@ int dwmac100_setup(struct stmmac_priv *priv) mac->link.speed_mask = MAC_CONTROL_PS; mac->mii.addr = MAC_MII_ADDR; mac->mii.data = MAC_MII_DATA; - mac->mii.addr_shift = 11; mac->mii.addr_mask = 0x0000F800; - mac->mii.reg_shift = 6; mac->mii.reg_mask = 0x000007C0; - mac->mii.clk_csr_shift = 2; mac->mii.clk_csr_mask = GENMASK(5, 2); return 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 623868afe93d..0e87590c806b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -1030,11 +1030,8 @@ int dwmac4_setup(struct stmmac_priv *priv) mac->link.speed_mask = GMAC_CONFIG_FES | GMAC_CONFIG_PS; mac->mii.addr = GMAC_MDIO_ADDR; mac->mii.data = GMAC_MDIO_DATA; - mac->mii.addr_shift = 21; mac->mii.addr_mask = GENMASK(25, 21); - mac->mii.reg_shift = 16; mac->mii.reg_mask = GENMASK(20, 16); - mac->mii.clk_csr_shift = 8; mac->mii.clk_csr_mask = GENMASK(11, 8); mac->num_vlan = stmmac_get_num_vlan(priv->ioaddr); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 49893b9fb88c..ba98e22f1c12 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -1551,11 +1551,8 @@ int dwxgmac2_setup(struct stmmac_priv *priv) mac->mii.addr = XGMAC_MDIO_ADDR; mac->mii.data = XGMAC_MDIO_DATA; - mac->mii.addr_shift = 16; mac->mii.addr_mask = GENMASK(20, 16); - mac->mii.reg_shift = 0; mac->mii.reg_mask = GENMASK(15, 0); - mac->mii.clk_csr_shift = 19; mac->mii.clk_csr_mask = GENMASK(21, 19); mac->num_vlan = stmmac_get_num_vlan(priv->ioaddr); @@ -1594,11 +1591,8 @@ int dwxlgmac2_setup(struct stmmac_priv *priv) mac->mii.addr = XGMAC_MDIO_ADDR; mac->mii.data = XGMAC_MDIO_DATA; - mac->mii.addr_shift = 16; mac->mii.addr_mask = GENMASK(20, 16); - mac->mii.reg_shift = 0; mac->mii.reg_mask = GENMASK(15, 0); - mac->mii.clk_csr_shift = 19; mac->mii.clk_csr_mask = GENMASK(21, 19); return 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index c9f0b8b601d2..afe98ff5bdcb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -234,8 +234,8 @@ static u32 stmmac_mdio_format_addr(struct stmmac_priv *priv, { const struct mii_regs *mii_regs = &priv->hw->mii; - return ((pa << mii_regs->addr_shift) & mii_regs->addr_mask) | - ((gr << mii_regs->reg_shift) & mii_regs->reg_mask) | + return field_prep(mii_regs->addr_mask, pa) | + field_prep(mii_regs->reg_mask, gr) | priv->gmii_address_bus_config | MII_ADDR_GBUSY; } @@ -577,7 +577,7 @@ static void stmmac_mdio_bus_config(struct stmmac_priv *priv) else value = stmmac_clk_csr_set(priv); - value <<= priv->hw->mii.clk_csr_shift; + value <<= __ffs(priv->hw->mii.clk_csr_mask); if (value & ~priv->hw->mii.clk_csr_mask) dev_warn(priv->device, From df388b4d3913fe2bb1356355ffadb126f70f82d1 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 5 Mar 2026 10:42:47 +0000 Subject: [PATCH 0352/1561] net: stmmac: use u32 for MDIO register field masks MDIO registers are 32-bit, so use u32 to describe the masks for these registers. Convert the GENMASK() initialisers to GENMASK_U32() for type compatibility. Signed-off-by: Russell King (Oracle) Tested-by: Maxime Chevallier Link: https://patch.msgid.link/E1vy6AJ-0000000Btx1-1teC@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/common.h | 6 +++--- drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 6 +++--- drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 6 +++--- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 12 ++++++------ 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 978f90065681..46454e2886ce 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -608,9 +608,9 @@ struct mac_link { struct mii_regs { unsigned int addr; /* MII Address */ unsigned int data; /* MII Data */ - unsigned int addr_mask; /* MII address mask */ - unsigned int reg_mask; /* MII reg mask */ - unsigned int clk_csr_mask; + u32 addr_mask; /* MII address mask */ + u32 reg_mask; /* MII reg mask */ + u32 clk_csr_mask; }; struct mac_device_info { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index 6dfccc969d0e..a6f8e78856aa 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -369,7 +369,7 @@ static int loongson_dwmac_setup(void *apriv, struct mac_device_info *mac) mac->mii.data = GMAC_MII_DATA; mac->mii.addr_mask = 0x0000F800; mac->mii.reg_mask = 0x000007C0; - mac->mii.clk_csr_mask = GENMASK(5, 2); + mac->mii.clk_csr_mask = GENMASK_U32(5, 2); return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 6b18072112db..3ce03b059277 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -1063,9 +1063,9 @@ static int sun8i_dwmac_setup(void *ppriv, struct mac_device_info *mac) mac->link.duplex = EMAC_DUPLEX_FULL; mac->mii.addr = EMAC_MDIO_CMD; mac->mii.data = EMAC_MDIO_DATA; - mac->mii.reg_mask = GENMASK(8, 4); - mac->mii.addr_mask = GENMASK(16, 12); - mac->mii.clk_csr_mask = GENMASK(22, 20); + mac->mii.reg_mask = GENMASK_U32(8, 4); + mac->mii.addr_mask = GENMASK_U32(16, 12); + mac->mii.clk_csr_mask = GENMASK_U32(22, 20); mac->unicast_filter_entries = 8; /* Synopsys Id is not available */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 10acca806f4f..4ea55be04c5c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -498,7 +498,7 @@ int dwmac1000_setup(struct stmmac_priv *priv) mac->mii.data = GMAC_MII_DATA; mac->mii.addr_mask = 0x0000F800; mac->mii.reg_mask = 0x000007C0; - mac->mii.clk_csr_mask = GENMASK(5, 2); + mac->mii.clk_csr_mask = GENMASK_U32(5, 2); return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c index 911753b9889e..7c91ea2775d4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c @@ -186,7 +186,7 @@ int dwmac100_setup(struct stmmac_priv *priv) mac->mii.data = MAC_MII_DATA; mac->mii.addr_mask = 0x0000F800; mac->mii.reg_mask = 0x000007C0; - mac->mii.clk_csr_mask = GENMASK(5, 2); + mac->mii.clk_csr_mask = GENMASK_U32(5, 2); return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 0e87590c806b..602771e19d0f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -1030,9 +1030,9 @@ int dwmac4_setup(struct stmmac_priv *priv) mac->link.speed_mask = GMAC_CONFIG_FES | GMAC_CONFIG_PS; mac->mii.addr = GMAC_MDIO_ADDR; mac->mii.data = GMAC_MDIO_DATA; - mac->mii.addr_mask = GENMASK(25, 21); - mac->mii.reg_mask = GENMASK(20, 16); - mac->mii.clk_csr_mask = GENMASK(11, 8); + mac->mii.addr_mask = GENMASK_U32(25, 21); + mac->mii.reg_mask = GENMASK_U32(20, 16); + mac->mii.clk_csr_mask = GENMASK_U32(11, 8); mac->num_vlan = stmmac_get_num_vlan(priv->ioaddr); return 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index ba98e22f1c12..915e7c2ab11f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -1551,9 +1551,9 @@ int dwxgmac2_setup(struct stmmac_priv *priv) mac->mii.addr = XGMAC_MDIO_ADDR; mac->mii.data = XGMAC_MDIO_DATA; - mac->mii.addr_mask = GENMASK(20, 16); - mac->mii.reg_mask = GENMASK(15, 0); - mac->mii.clk_csr_mask = GENMASK(21, 19); + mac->mii.addr_mask = GENMASK_U32(20, 16); + mac->mii.reg_mask = GENMASK_U32(15, 0); + mac->mii.clk_csr_mask = GENMASK_U32(21, 19); mac->num_vlan = stmmac_get_num_vlan(priv->ioaddr); return 0; @@ -1591,9 +1591,9 @@ int dwxlgmac2_setup(struct stmmac_priv *priv) mac->mii.addr = XGMAC_MDIO_ADDR; mac->mii.data = XGMAC_MDIO_DATA; - mac->mii.addr_mask = GENMASK(20, 16); - mac->mii.reg_mask = GENMASK(15, 0); - mac->mii.clk_csr_mask = GENMASK(21, 19); + mac->mii.addr_mask = GENMASK_U32(20, 16); + mac->mii.reg_mask = GENMASK_U32(15, 0); + mac->mii.clk_csr_mask = GENMASK_U32(21, 19); return 0; } From a64d927aecf17827a02976fef8ba7b7b04a4b26f Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 5 Mar 2026 10:42:52 +0000 Subject: [PATCH 0353/1561] net: stmmac: use GENMASK_U32() for mdio bitfields Rather than using hex numbers, use GENMASK() for mdio bitfields. Signed-off-by: Russell King (Oracle) Reviewed-by: Maxime Chevallier Tested-by: Maxime Chevallier Link: https://patch.msgid.link/E1vy6AO-0000000Btx7-2NDV@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c | 4 ++-- drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 4 ++-- drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index a6f8e78856aa..b913fe5af488 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -367,8 +367,8 @@ static int loongson_dwmac_setup(void *apriv, struct mac_device_info *mac) mac->link.speed_mask = GMAC_CONTROL_PS | GMAC_CONTROL_FES; mac->mii.addr = GMAC_MII_ADDR; mac->mii.data = GMAC_MII_DATA; - mac->mii.addr_mask = 0x0000F800; - mac->mii.reg_mask = 0x000007C0; + mac->mii.addr_mask = GENMASK_U32(15, 11); + mac->mii.reg_mask = GENMASK_U32(10, 6); mac->mii.clk_csr_mask = GENMASK_U32(5, 2); return 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 4ea55be04c5c..c7cb30672604 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -496,8 +496,8 @@ int dwmac1000_setup(struct stmmac_priv *priv) mac->link.speed_mask = GMAC_CONTROL_PS | GMAC_CONTROL_FES; mac->mii.addr = GMAC_MII_ADDR; mac->mii.data = GMAC_MII_DATA; - mac->mii.addr_mask = 0x0000F800; - mac->mii.reg_mask = 0x000007C0; + mac->mii.addr_mask = GENMASK_U32(15, 11); + mac->mii.reg_mask = GENMASK_U32(10, 6); mac->mii.clk_csr_mask = GENMASK_U32(5, 2); return 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c index 7c91ea2775d4..6b5cf3a0866a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c @@ -184,8 +184,8 @@ int dwmac100_setup(struct stmmac_priv *priv) mac->link.speed_mask = MAC_CONTROL_PS; mac->mii.addr = MAC_MII_ADDR; mac->mii.data = MAC_MII_DATA; - mac->mii.addr_mask = 0x0000F800; - mac->mii.reg_mask = 0x000007C0; + mac->mii.addr_mask = GENMASK_U32(15, 11); + mac->mii.reg_mask = GENMASK_U32(10, 6); mac->mii.clk_csr_mask = GENMASK_U32(5, 2); return 0; From 3cd963fa915c494a6d0da0287bd10cb6f2204f9e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 5 Mar 2026 10:42:57 +0000 Subject: [PATCH 0354/1561] net: stmmac: mdio_bus_data->default_an_inband is boolean default_an_inband is declared as an unsigned int, but is set to true/ false and is assigned to phylink_config's member of the same name which is a bool. Declare this also as a bool for consistency. Signed-off-by: Russell King (Oracle) Reviewed-by: Maxime Chevallier Tested-by: Maxime Chevallier Link: https://patch.msgid.link/E1vy6AT-0000000BtxD-2qm7@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/stmmac.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 2fc169c7117e..678d03d6d3bd 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -86,10 +86,10 @@ struct stmmac_priv; struct stmmac_mdio_bus_data { unsigned int phy_mask; unsigned int pcs_mask; - unsigned int default_an_inband; int *irqs; int probed_phy_irq; bool needs_reset; + bool default_an_inband; }; struct stmmac_dma_cfg { From e4fd855c52ec5af34d920206190be29919fadca3 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 5 Mar 2026 10:43:02 +0000 Subject: [PATCH 0355/1561] net: stmmac: make pcs_mask and phy_mask u32 The PCS and PHY masks are passed to the mdio bus layer as phy_mask to prevent bus addresses between 0 and 31 inclusive being scanned, and this is declared as u32. Also declare these as u32 in stmmac for type consistency. Since this is a u32, use BIT_U32() rather than BIT() to generate values for these fields. Signed-off-by: Russell King (Oracle) Reviewed-by: Maxime Chevallier Tested-by: Maxime Chevallier Link: https://patch.msgid.link/E1vy6AY-0000000BtxJ-3smT@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c | 2 +- include/linux/stmmac.h | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index ece2a0c38562..fc13bfb47783 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -699,7 +699,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, /* Intel mgbe SGMII interface uses pcs-xcps */ if (plat->phy_interface == PHY_INTERFACE_MODE_SGMII || plat->phy_interface == PHY_INTERFACE_MODE_1000BASEX) { - plat->mdio_bus_data->pcs_mask = BIT(INTEL_MGBE_XPCS_ADDR); + plat->mdio_bus_data->pcs_mask = BIT_U32(INTEL_MGBE_XPCS_ADDR); plat->mdio_bus_data->default_an_inband = true; plat->select_pcs = intel_mgbe_select_pcs; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index b913fe5af488..ada6c6ef1f5c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -168,7 +168,7 @@ static int loongson_gnet_data(struct pci_dev *pdev, loongson_default_data(pdev, plat); plat->phy_interface = PHY_INTERFACE_MODE_GMII; - plat->mdio_bus_data->phy_mask = ~(u32)BIT(2); + plat->mdio_bus_data->phy_mask = ~BIT_U32(2); plat->fix_mac_speed = loongson_gnet_fix_speed; return 0; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 678d03d6d3bd..965ada809fdf 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -84,8 +84,8 @@ struct stmmac_priv; /* Platfrom data for platform device structure's platform_data field */ struct stmmac_mdio_bus_data { - unsigned int phy_mask; - unsigned int pcs_mask; + u32 phy_mask; + u32 pcs_mask; int *irqs; int probed_phy_irq; bool needs_reset; From c698f5cc940de5871ea3c65c94f5fd7fbc6844e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Mar 2026 11:48:29 +0000 Subject: [PATCH 0356/1561] inet_diag: report delayed ack timer information inet_sk_diag_fill() populates r->idiag_timer with the following precedence order: 1 - Retransmit timer. 4 - Probe0 timer. 2 - Keepalive timer. This patch adds a new value, last in the list, if other timers are not active. 5 - Delayed ACK timer. A corresponding iproute2 patch will follow to replace "unknown" with "delack": ESTAB 10 0 [2002:a05:6830:1f86::]:12875 [2002:a05:6830:1f85::]:50438 timer:(unknown,003ms,0) ino:152178 sk:3004 cgroup:unreachable:189 <-> skmem:(r1344,rb12780520,t0,tb262144,f2752,w0,o250,bl0,d0) ts usec_ts ... Also add the following enum in uapi/linux/inet_diag.h as suggested by David Ahern. enum { IDIAG_TIMER_OFF, IDIAG_TIMER_ON, IDIAG_TIMER_KEEPALIVE, IDIAG_TIMER_TIMEWAIT, IDIAG_TIMER_PROBE0, IDIAG_TIMER_DELACK, }; Neal Cardwell suggested to test for ICSK_ACK_TIMER: inet_csk_clear_xmit_timer() does not call sk_stop_timer() because INET_CSK_CLEAR_TIMERS is unset. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Neal Cardwell Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260305114829.2163276-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/inet_diag.h | 9 +++++++++ net/ipv4/inet_diag.c | 13 +++++++++---- net/ipv4/tcp_diag.c | 4 ++-- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 86bb2e8b17c9..21f0d735fbae 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -129,6 +129,15 @@ struct inet_diag_msg { __u32 idiag_inode; }; +enum { + IDIAG_TIMER_OFF, + IDIAG_TIMER_ON, + IDIAG_TIMER_KEEPALIVE, + IDIAG_TIMER_TIMEWAIT, + IDIAG_TIMER_PROBE0, + IDIAG_TIMER_DELACK, +}; + /* Extensions */ enum { diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 9d215485b5c7..34b77aa87d0a 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -241,7 +241,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, inet_diag_msg_common_fill(r, sk); r->idiag_state = sk->sk_state; - r->idiag_timer = 0; + r->idiag_timer = IDIAG_TIMER_OFF; r->idiag_retrans = 0; r->idiag_expires = 0; @@ -284,20 +284,25 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, if (icsk_pending == ICSK_TIME_RETRANS || icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk_pending == ICSK_TIME_LOSS_PROBE) { - r->idiag_timer = 1; + r->idiag_timer = IDIAG_TIMER_ON; r->idiag_retrans = READ_ONCE(icsk->icsk_retransmits); r->idiag_expires = jiffies_delta_to_msecs(tcp_timeout_expires(sk) - jiffies); } else if (icsk_pending == ICSK_TIME_PROBE0) { - r->idiag_timer = 4; + r->idiag_timer = IDIAG_TIMER_PROBE0; r->idiag_retrans = READ_ONCE(icsk->icsk_probes_out); r->idiag_expires = jiffies_delta_to_msecs(tcp_timeout_expires(sk) - jiffies); } else if (timer_pending(&icsk->icsk_keepalive_timer)) { - r->idiag_timer = 2; + r->idiag_timer = IDIAG_TIMER_KEEPALIVE; r->idiag_retrans = READ_ONCE(icsk->icsk_probes_out); r->idiag_expires = jiffies_delta_to_msecs(icsk->icsk_keepalive_timer.expires - jiffies); + } else if ((READ_ONCE(icsk->icsk_ack.pending) & ICSK_ACK_TIMER) && + timer_pending(&icsk->icsk_delack_timer)) { + r->idiag_timer = IDIAG_TIMER_DELACK; + r->idiag_expires = + jiffies_delta_to_msecs(icsk_delack_timeout(icsk) - jiffies); } if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) { diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 7935702e394b..ba1fdbe9807f 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -212,7 +212,7 @@ static int tcp_twsk_diag_fill(struct sock *sk, r->idiag_retrans = 0; r->idiag_state = READ_ONCE(tw->tw_substate); - r->idiag_timer = 3; + r->idiag_timer = IDIAG_TIMER_TIMEWAIT; tmo = tw->tw_timer.expires - jiffies; r->idiag_expires = jiffies_delta_to_msecs(tmo); r->idiag_rqueue = 0; @@ -247,7 +247,7 @@ static int tcp_req_diag_fill(struct sock *sk, struct sk_buff *skb, r = nlmsg_data(nlh); inet_diag_msg_common_fill(r, sk); r->idiag_state = TCP_SYN_RECV; - r->idiag_timer = 1; + r->idiag_timer = IDIAG_TIMER_ON; r->idiag_retrans = READ_ONCE(reqsk->num_retrans); BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != From 260d27b3aec9f30d68f9f3cacc674655897eb745 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 4 Mar 2026 21:17:28 +0100 Subject: [PATCH 0357/1561] net: phy: remove phy_attach 378e6523ebb1 ("net: bcmgenet: remove unused platform code") removed the last user of phy_attach(). So remove this function. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/8812176a-e319-4e9f-815d-99ea339df8b2@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_device.c | 38 ------------------------------------ include/linux/phy.h | 2 -- 2 files changed, 40 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 3bd415710bf3..d1cbcfc3d2a6 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1895,44 +1895,6 @@ error_put_device: } EXPORT_SYMBOL(phy_attach_direct); -/** - * phy_attach - attach a network device to a particular PHY device - * @dev: network device to attach - * @bus_id: Bus ID of PHY device to attach - * @interface: PHY device's interface - * - * Description: Same as phy_attach_direct() except that a PHY bus_id - * string is passed instead of a pointer to a struct phy_device. - */ -struct phy_device *phy_attach(struct net_device *dev, const char *bus_id, - phy_interface_t interface) -{ - struct phy_device *phydev; - struct device *d; - int rc; - - if (!dev) - return ERR_PTR(-EINVAL); - - /* Search the list of PHY devices on the mdio bus for the - * PHY with the requested name - */ - d = bus_find_device_by_name(&mdio_bus_type, NULL, bus_id); - if (!d) { - pr_err("PHY %s not found\n", bus_id); - return ERR_PTR(-ENODEV); - } - phydev = to_phy_device(d); - - rc = phy_attach_direct(dev, phydev, phydev->dev_flags, interface); - put_device(d); - if (rc) - return ERR_PTR(rc); - - return phydev; -} -EXPORT_SYMBOL(phy_attach); - /** * phy_detach - detach a PHY device from its network device * @phydev: target phy_device struct diff --git a/include/linux/phy.h b/include/linux/phy.h index 6f9979a26892..e9b0d7427b0e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -2152,8 +2152,6 @@ int phy_suspend(struct phy_device *phydev); int phy_resume(struct phy_device *phydev); int __phy_resume(struct phy_device *phydev); int phy_loopback(struct phy_device *phydev, bool enable, int speed); -struct phy_device *phy_attach(struct net_device *dev, const char *bus_id, - phy_interface_t interface); struct phy_device *phy_find_next(struct mii_bus *bus, struct phy_device *pos); int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, u32 flags, phy_interface_t interface); From ad4c9559603e1897e3a978c966b04424be68f4dc Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Wed, 4 Mar 2026 14:42:52 +0800 Subject: [PATCH 0358/1561] net: annotate data races around sk->sk_prot inet_sendmsg() and inet_recvmsg() access sk->sk_prot without lock_sock() or any other synchronization. sock_replace_proto() (used by sockmap), TLS and MPTCP can change sk->sk_prot under us, so these functions need READ_ONCE() to avoid load tearing. Signed-off-by: Jiayuan Chen Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260304064253.16955-1-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- net/ipv4/af_inet.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index babcd75a08e2..e95ffa070568 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -852,11 +852,13 @@ EXPORT_SYMBOL_GPL(inet_send_prepare); int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; + const struct proto *prot; if (unlikely(inet_send_prepare(sk))) return -EAGAIN; - return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udp_sendmsg, + prot = READ_ONCE(sk->sk_prot); + return INDIRECT_CALL_2(prot->sendmsg, tcp_sendmsg, udp_sendmsg, sk, msg, size); } EXPORT_SYMBOL(inet_sendmsg); @@ -882,11 +884,13 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; + const struct proto *prot; if (likely(!(flags & MSG_ERRQUEUE))) sock_rps_record_flow(sk); - return INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udp_recvmsg, + prot = READ_ONCE(sk->sk_prot); + return INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udp_recvmsg, sk, msg, size, flags); } EXPORT_SYMBOL(inet_recvmsg); From 21c0dc7cdd0723e34103d960a22f4130303eec21 Mon Sep 17 00:00:00 2001 From: Aleksei Oladko Date: Thu, 5 Mar 2026 21:10:00 +0000 Subject: [PATCH 0359/1561] selftests: net: forwarding: fix IPv6 address leak in cleanup Several forwarding tests (e.g., gre_multipath.sh) initialize both IPv4 and IPv6 addresses using simple_if_init, but only clean up IPv4 in simple_if_fini. This leaves stale IPv6 addresses on the interfaces, which causes subsequent tests to fail when they encounter unexpected address configuration. The issue can be reproduced by running tests in sequence: # run_kselftest.sh -t net/forwarding:ipip_hier_gre.sh # run_kselftest.sh -t net/forwarding:min_max_mtu.sh TAP version 13 1..1 # timeout set to 0 # selftests: net/forwarding: min_max_mtu.sh # TEST: ping [ OK ] # TEST: ping6 [ OK ] # TEST: Test maximum MTU configuration [ OK ] # TEST: Test traffic, packet size is maximum MTU [FAIL] # Ping6, packet size: 65487 succeeded, but should have failed # TEST: Test minimum MTU configuration [ OK ] # TEST: Test traffic, packet size is minimum MTU [ OK ] not ok 1 selftests: net/forwarding: min_max_mtu.sh # exit=1 Fix this by removing the unused IPv6 argument from simple_if_init in tests that don't use IPv6 (gre_multipath.sh, ipip_lib.sh), and by adding the missing IPv6 argument to simple_if_fini in tests that use IPv6 (gre_multipath_nh.sh, gre_multipath_nh_res.sh). Signed-off-by: Aleksei Oladko Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20260305211000.515301-1-aleksey.oladko@virtuozzo.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/gre_multipath.sh | 2 +- tools/testing/selftests/net/forwarding/gre_multipath_nh.sh | 2 +- tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh | 2 +- tools/testing/selftests/net/forwarding/ipip_lib.sh | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/gre_multipath.sh b/tools/testing/selftests/net/forwarding/gre_multipath.sh index 57531c1d884d..ce4ae74843d9 100755 --- a/tools/testing/selftests/net/forwarding/gre_multipath.sh +++ b/tools/testing/selftests/net/forwarding/gre_multipath.sh @@ -65,7 +65,7 @@ source lib.sh h1_create() { - simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 + simple_if_init $h1 192.0.2.1/28 ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2 } diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh index 7d5b2b9cc133..c667b81da37f 100755 --- a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh +++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh @@ -80,7 +80,7 @@ h1_destroy() { ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2 ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2 - simple_if_fini $h1 192.0.2.1/28 + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 } sw1_create() diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh index 370f9925302d..d04bad58a96a 100755 --- a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh +++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh @@ -80,7 +80,7 @@ h1_destroy() { ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2 ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2 - simple_if_fini $h1 192.0.2.1/28 + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 } sw1_create() diff --git a/tools/testing/selftests/net/forwarding/ipip_lib.sh b/tools/testing/selftests/net/forwarding/ipip_lib.sh index 01e62c4ac94d..b255646b737a 100644 --- a/tools/testing/selftests/net/forwarding/ipip_lib.sh +++ b/tools/testing/selftests/net/forwarding/ipip_lib.sh @@ -144,7 +144,7 @@ h1_create() { - simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 + simple_if_init $h1 192.0.2.1/28 ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2 } From e3f8800aa24369d0766b1005b2b2834b6e262083 Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Wed, 4 Mar 2026 22:43:17 +0700 Subject: [PATCH 0360/1561] virtio-net: xsk: Support wakeup on RX side When XDP_USE_NEED_WAKEUP is used and the fill ring is empty so no buffer is allocated on RX side, allow RX NAPI to be descheduled. This avoids wasting CPU cycles on polling. Users will be notified and they need to make a wakeup call after refilling the ring. Reviewed-by: Jason Xing Signed-off-by: Bui Quang Minh Link: https://patch.msgid.link/20260304154317.7506-1-minhquangbui99@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 72d6a9c6a5a2..811b90da15a9 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1454,8 +1454,19 @@ static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue xsk_buffs = rq->xsk_buffs; num = xsk_buff_alloc_batch(pool, xsk_buffs, rq->vq->num_free); - if (!num) + if (!num) { + if (xsk_uses_need_wakeup(pool)) { + xsk_set_rx_need_wakeup(pool); + /* Return 0 instead of -ENOMEM so that NAPI is + * descheduled. + */ + return 0; + } + return -ENOMEM; + } else { + xsk_clear_rx_need_wakeup(pool); + } len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len; @@ -1588,20 +1599,19 @@ static bool virtnet_xsk_xmit(struct send_queue *sq, struct xsk_buff_pool *pool, return sent; } -static void xsk_wakeup(struct send_queue *sq) +static void xsk_wakeup(struct napi_struct *napi, struct virtqueue *vq) { - if (napi_if_scheduled_mark_missed(&sq->napi)) + if (napi_if_scheduled_mark_missed(napi)) return; local_bh_disable(); - virtqueue_napi_schedule(&sq->napi, sq->vq); + virtqueue_napi_schedule(napi, vq); local_bh_enable(); } static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag) { struct virtnet_info *vi = netdev_priv(dev); - struct send_queue *sq; if (!netif_running(dev)) return -ENETDOWN; @@ -1609,9 +1619,18 @@ static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag) if (qid >= vi->curr_queue_pairs) return -EINVAL; - sq = &vi->sq[qid]; + if (flag & XDP_WAKEUP_TX) { + struct send_queue *sq = &vi->sq[qid]; + + xsk_wakeup(&sq->napi, sq->vq); + } + + if (flag & XDP_WAKEUP_RX) { + struct receive_queue *rq = &vi->rq[qid]; + + xsk_wakeup(&rq->napi, rq->vq); + } - xsk_wakeup(sq); return 0; } @@ -1623,7 +1642,7 @@ static void virtnet_xsk_completed(struct send_queue *sq, int num) * wakeup the tx napi to consume the xsk tx queue, because the tx * interrupt may not be triggered. */ - xsk_wakeup(sq); + xsk_wakeup(&sq->napi, sq->vq); } static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, @@ -2816,7 +2835,9 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, } /* - * Returns false if we couldn't fill entirely (OOM). + * Returns false if we couldn't fill entirely (OOM) and need to retry. + * In XSK mode, it's when the receive buffer is not allocated and + * xsk_use_need_wakeup is not set. * * Normally run in the receive path, but can also be run from ndo_open * before we're receiving packets, or from refill_work which is From 6c7f710325228a54a364ae433acd8779c2fc2bcf Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 5 Mar 2026 12:10:18 +0100 Subject: [PATCH 0361/1561] nfc: pn533: drop redundant device reference Driver core holds a reference to the USB interface and its parent USB device while the interface is bound to a driver and there is no need to take additional references unless the structures are needed after disconnect. Drop the redundant device reference to reduce cargo culting, make it easier to spot drivers where an extra reference is needed, and reduce the risk of memory leaks when drivers fail to release it. Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260305111019.18030-2-johan@kernel.org Signed-off-by: Jakub Kicinski --- drivers/nfc/pn533/usb.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c index 0f12f86ebb02..0ff2f0d7caf4 100644 --- a/drivers/nfc/pn533/usb.c +++ b/drivers/nfc/pn533/usb.c @@ -500,7 +500,7 @@ static int pn533_usb_probe(struct usb_interface *interface, if (!in_buf) return -ENOMEM; - phy->udev = usb_get_dev(interface_to_usbdev(interface)); + phy->udev = interface_to_usbdev(interface); phy->interface = interface; iface_desc = interface->cur_altsetting; @@ -600,7 +600,6 @@ error: usb_free_urb(phy->in_urb); usb_free_urb(phy->out_urb); usb_free_urb(phy->ack_urb); - usb_put_dev(phy->udev); kfree(in_buf); kfree(phy->ack_buffer); @@ -628,7 +627,6 @@ static void pn533_usb_disconnect(struct usb_interface *interface) usb_free_urb(phy->out_urb); usb_free_urb(phy->ack_urb); kfree(phy->ack_buffer); - usb_put_dev(phy->udev); nfc_info(&interface->dev, "NXP PN533 NFC device disconnected\n"); } From 1a2d4bfa04919c2e1676b756ccc21dd8e22d3838 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 5 Mar 2026 12:10:19 +0100 Subject: [PATCH 0362/1561] nfc: port100: drop redundant device reference Driver core holds a reference to the USB interface and its parent USB device while the interface is bound to a driver and there is no need to take additional references unless the structures are needed after disconnect. Drop the redundant device reference to reduce cargo culting, make it easier to spot drivers where an extra reference is needed, and reduce the risk of memory leaks when drivers fail to release it. Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260305111019.18030-3-johan@kernel.org Signed-off-by: Jakub Kicinski --- drivers/nfc/port100.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c index a922569d7b48..09dcc6f4c4f3 100644 --- a/drivers/nfc/port100.c +++ b/drivers/nfc/port100.c @@ -1504,7 +1504,7 @@ static int port100_probe(struct usb_interface *interface, return -ENOMEM; mutex_init(&dev->out_urb_lock); - dev->udev = usb_get_dev(interface_to_usbdev(interface)); + dev->udev = interface_to_usbdev(interface); dev->interface = interface; usb_set_intfdata(interface, dev); @@ -1616,7 +1616,6 @@ error: usb_free_urb(dev->in_urb); usb_kill_urb(dev->out_urb); usb_free_urb(dev->out_urb); - usb_put_dev(dev->udev); return rc; } @@ -1636,7 +1635,6 @@ static void port100_disconnect(struct usb_interface *interface) usb_free_urb(dev->in_urb); usb_free_urb(dev->out_urb); - usb_put_dev(dev->udev); kfree(dev->cmd); From 70eba59f92076d84264762d63d30532685943017 Mon Sep 17 00:00:00 2001 From: Vivian Wang Date: Thu, 5 Mar 2026 15:00:29 +0800 Subject: [PATCH 0363/1561] net: spacemit: Remove unused buff_addr fields These were never used. Just remove them. No functional change intended. Signed-off-by: Vivian Wang Link: https://patch.msgid.link/20260305-k1-ethernet-cleanup-buff_addr-v1-1-e978ef119231@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/spacemit/k1_emac.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/spacemit/k1_emac.c b/drivers/net/ethernet/spacemit/k1_emac.c index 338a2637b1da..af58eae6fcd8 100644 --- a/drivers/net/ethernet/spacemit/k1_emac.c +++ b/drivers/net/ethernet/spacemit/k1_emac.c @@ -57,7 +57,6 @@ struct desc_buf { u64 dma_addr; - void *buff_addr; u16 dma_len; u8 map_as_page; }; @@ -70,7 +69,6 @@ struct emac_tx_desc_buffer { struct emac_rx_desc_buffer { struct sk_buff *skb; u64 dma_addr; - void *buff_addr; u16 dma_len; u8 map_as_page; }; @@ -340,7 +338,6 @@ static void emac_free_tx_buf(struct emac_priv *priv, int i) buf->dma_addr = 0; buf->map_as_page = false; - buf->buff_addr = NULL; } if (tx_buf->skb) { From ee970634c7773f248a0a27d8a645500371ae5249 Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Fri, 6 Mar 2026 00:56:36 +0900 Subject: [PATCH 0364/1561] net: ntb_netdev: Introduce per-queue context Prepare ntb_netdev for multi-queue operation by moving queue-pair state out of struct ntb_netdev. Introduce struct ntb_netdev_queue to carry the ntb_transport_qp pointer, the per-QP TX timer and queue id. Pass this object as the callback context and convert the RX/TX handlers and link event path accordingly. The probe path allocates a fixed upper bound for netdev queues while instantiating only a single ntb_transport queue pair, preserving the previous behavior. Also store client_dev for future queue pair creation/removal via the ntb_transport API. Signed-off-by: Koichiro Den Link: https://patch.msgid.link/20260305155639.1885517-2-den@valinux.co.jp Signed-off-by: Jakub Kicinski --- drivers/net/ntb_netdev.c | 279 ++++++++++++++++++++++++++------------- 1 file changed, 189 insertions(+), 90 deletions(-) diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index fbeae05817e9..4b65e938d549 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -53,6 +53,7 @@ #include #include #include +#include #define NTB_NETDEV_VER "0.7" @@ -70,11 +71,24 @@ static unsigned int tx_start = 10; /* Number of descriptors still available before stop upper layer tx */ static unsigned int tx_stop = 5; -struct ntb_netdev { - struct pci_dev *pdev; - struct net_device *ndev; +#define NTB_NETDEV_MAX_QUEUES 64 +#define NTB_NETDEV_DEFAULT_QUEUES 1 + +struct ntb_netdev; + +struct ntb_netdev_queue { + struct ntb_netdev *ntdev; struct ntb_transport_qp *qp; struct timer_list tx_timer; + u16 qid; +}; + +struct ntb_netdev { + struct pci_dev *pdev; + struct device *client_dev; + struct net_device *ndev; + unsigned int num_queues; + struct ntb_netdev_queue *queues; }; #define NTB_TX_TIMEOUT_MS 1000 @@ -82,14 +96,17 @@ struct ntb_netdev { static void ntb_netdev_event_handler(void *data, int link_is_up) { - struct net_device *ndev = data; - struct ntb_netdev *dev = netdev_priv(ndev); + struct ntb_netdev_queue *q = data; + struct ntb_netdev *dev = q->ntdev; + struct net_device *ndev; - netdev_dbg(ndev, "Event %x, Link %x\n", link_is_up, - ntb_transport_link_query(dev->qp)); + ndev = dev->ndev; + + netdev_dbg(ndev, "Event %x, Link %x, qp %u\n", link_is_up, + ntb_transport_link_query(q->qp), q->qid); if (link_is_up) { - if (ntb_transport_link_query(dev->qp)) + if (ntb_transport_link_query(q->qp)) netif_carrier_on(ndev); } else { netif_carrier_off(ndev); @@ -99,10 +116,13 @@ static void ntb_netdev_event_handler(void *data, int link_is_up) static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, int len) { - struct net_device *ndev = qp_data; + struct ntb_netdev_queue *q = qp_data; + struct ntb_netdev *dev = q->ntdev; + struct net_device *ndev; struct sk_buff *skb; int rc; + ndev = dev->ndev; skb = data; if (!skb) return; @@ -118,6 +138,7 @@ static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data, skb_put(skb, len); skb->protocol = eth_type_trans(skb, ndev); skb->ip_summed = CHECKSUM_NONE; + skb_record_rx_queue(skb, q->qid); if (netif_rx(skb) == NET_RX_DROP) { ndev->stats.rx_errors++; @@ -144,42 +165,43 @@ enqueue_again: } static int __ntb_netdev_maybe_stop_tx(struct net_device *netdev, - struct ntb_transport_qp *qp, int size) + struct ntb_netdev_queue *q, int size) { - struct ntb_netdev *dev = netdev_priv(netdev); + netif_stop_subqueue(netdev, q->qid); - netif_stop_queue(netdev); /* Make sure to see the latest value of ntb_transport_tx_free_entry() * since the queue was last started. */ smp_mb(); - if (likely(ntb_transport_tx_free_entry(qp) < size)) { - mod_timer(&dev->tx_timer, jiffies + usecs_to_jiffies(tx_time)); + if (likely(ntb_transport_tx_free_entry(q->qp) < size)) { + mod_timer(&q->tx_timer, jiffies + usecs_to_jiffies(tx_time)); return -EBUSY; } - netif_start_queue(netdev); + netif_start_subqueue(netdev, q->qid); return 0; } static int ntb_netdev_maybe_stop_tx(struct net_device *ndev, - struct ntb_transport_qp *qp, int size) + struct ntb_netdev_queue *q, int size) { - if (netif_queue_stopped(ndev) || - (ntb_transport_tx_free_entry(qp) >= size)) + if (__netif_subqueue_stopped(ndev, q->qid) || + (ntb_transport_tx_free_entry(q->qp) >= size)) return 0; - return __ntb_netdev_maybe_stop_tx(ndev, qp, size); + return __ntb_netdev_maybe_stop_tx(ndev, q, size); } static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data, int len) { - struct net_device *ndev = qp_data; + struct ntb_netdev_queue *q = qp_data; + struct ntb_netdev *dev = q->ntdev; + struct net_device *ndev; struct sk_buff *skb; - struct ntb_netdev *dev = netdev_priv(ndev); + ndev = dev->ndev; skb = data; if (!skb || !ndev) return; @@ -194,13 +216,13 @@ static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp, void *qp_data, dev_kfree_skb_any(skb); - if (ntb_transport_tx_free_entry(dev->qp) >= tx_start) { + if (ntb_transport_tx_free_entry(qp) >= tx_start) { /* Make sure anybody stopping the queue after this sees the new * value of ntb_transport_tx_free_entry() */ smp_mb(); - if (netif_queue_stopped(ndev)) - netif_wake_queue(ndev); + if (__netif_subqueue_stopped(ndev, q->qid)) + netif_wake_subqueue(ndev, q->qid); } } @@ -208,16 +230,20 @@ static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb, struct net_device *ndev) { struct ntb_netdev *dev = netdev_priv(ndev); + u16 qid = skb_get_queue_mapping(skb); + struct ntb_netdev_queue *q; int rc; - ntb_netdev_maybe_stop_tx(ndev, dev->qp, tx_stop); + q = &dev->queues[qid]; - rc = ntb_transport_tx_enqueue(dev->qp, skb, skb->data, skb->len); + ntb_netdev_maybe_stop_tx(ndev, q, tx_stop); + + rc = ntb_transport_tx_enqueue(q->qp, skb, skb->data, skb->len); if (rc) goto err; /* check for next submit */ - ntb_netdev_maybe_stop_tx(ndev, dev->qp, tx_stop); + ntb_netdev_maybe_stop_tx(ndev, q, tx_stop); return NETDEV_TX_OK; @@ -229,80 +255,104 @@ err: static void ntb_netdev_tx_timer(struct timer_list *t) { - struct ntb_netdev *dev = timer_container_of(dev, t, tx_timer); - struct net_device *ndev = dev->ndev; + struct ntb_netdev_queue *q = timer_container_of(q, t, tx_timer); + struct ntb_netdev *dev = q->ntdev; + struct net_device *ndev; - if (ntb_transport_tx_free_entry(dev->qp) < tx_stop) { - mod_timer(&dev->tx_timer, jiffies + usecs_to_jiffies(tx_time)); + ndev = dev->ndev; + + if (ntb_transport_tx_free_entry(q->qp) < tx_stop) { + mod_timer(&q->tx_timer, jiffies + usecs_to_jiffies(tx_time)); } else { /* Make sure anybody stopping the queue after this sees the new * value of ntb_transport_tx_free_entry() */ smp_mb(); - if (netif_queue_stopped(ndev)) - netif_wake_queue(ndev); + if (__netif_subqueue_stopped(ndev, q->qid)) + netif_wake_subqueue(ndev, q->qid); } } static int ntb_netdev_open(struct net_device *ndev) { struct ntb_netdev *dev = netdev_priv(ndev); + struct ntb_netdev_queue *queue; struct sk_buff *skb; - int rc, i, len; + int rc = 0, i, len; + unsigned int q; - /* Add some empty rx bufs */ - for (i = 0; i < NTB_RXQ_SIZE; i++) { - skb = netdev_alloc_skb(ndev, ndev->mtu + ETH_HLEN); - if (!skb) { - rc = -ENOMEM; - goto err; + /* Add some empty rx bufs for each queue */ + for (q = 0; q < dev->num_queues; q++) { + queue = &dev->queues[q]; + + for (i = 0; i < NTB_RXQ_SIZE; i++) { + skb = netdev_alloc_skb(ndev, ndev->mtu + ETH_HLEN); + if (!skb) { + rc = -ENOMEM; + goto err; + } + + rc = ntb_transport_rx_enqueue(queue->qp, skb, skb->data, + ndev->mtu + ETH_HLEN); + if (rc) { + dev_kfree_skb(skb); + goto err; + } } - rc = ntb_transport_rx_enqueue(dev->qp, skb, skb->data, - ndev->mtu + ETH_HLEN); - if (rc) { - dev_kfree_skb(skb); - goto err; - } + timer_setup(&queue->tx_timer, ntb_netdev_tx_timer, 0); } - timer_setup(&dev->tx_timer, ntb_netdev_tx_timer, 0); - netif_carrier_off(ndev); - ntb_transport_link_up(dev->qp); + + for (q = 0; q < dev->num_queues; q++) + ntb_transport_link_up(dev->queues[q].qp); + netif_start_queue(ndev); return 0; err: - while ((skb = ntb_transport_rx_remove(dev->qp, &len))) - dev_kfree_skb(skb); + for (q = 0; q < dev->num_queues; q++) { + queue = &dev->queues[q]; + + while ((skb = ntb_transport_rx_remove(queue->qp, &len))) + dev_kfree_skb(skb); + } return rc; } static int ntb_netdev_close(struct net_device *ndev) { struct ntb_netdev *dev = netdev_priv(ndev); + struct ntb_netdev_queue *queue; struct sk_buff *skb; + unsigned int q; int len; - ntb_transport_link_down(dev->qp); - while ((skb = ntb_transport_rx_remove(dev->qp, &len))) - dev_kfree_skb(skb); + for (q = 0; q < dev->num_queues; q++) { + queue = &dev->queues[q]; - timer_delete_sync(&dev->tx_timer); + ntb_transport_link_down(queue->qp); + while ((skb = ntb_transport_rx_remove(queue->qp, &len))) + dev_kfree_skb(skb); + + timer_delete_sync(&queue->tx_timer); + } return 0; } static int ntb_netdev_change_mtu(struct net_device *ndev, int new_mtu) { struct ntb_netdev *dev = netdev_priv(ndev); + struct ntb_netdev_queue *queue; struct sk_buff *skb; - int len, rc; + unsigned int q, i; + int len, rc = 0; - if (new_mtu > ntb_transport_max_size(dev->qp) - ETH_HLEN) + if (new_mtu > ntb_transport_max_size(dev->queues[0].qp) - ETH_HLEN) return -EINVAL; if (!netif_running(ndev)) { @@ -311,41 +361,54 @@ static int ntb_netdev_change_mtu(struct net_device *ndev, int new_mtu) } /* Bring down the link and dispose of posted rx entries */ - ntb_transport_link_down(dev->qp); + for (q = 0; q < dev->num_queues; q++) + ntb_transport_link_down(dev->queues[q].qp); if (ndev->mtu < new_mtu) { - int i; + for (q = 0; q < dev->num_queues; q++) { + queue = &dev->queues[q]; - for (i = 0; (skb = ntb_transport_rx_remove(dev->qp, &len)); i++) - dev_kfree_skb(skb); - - for (; i; i--) { - skb = netdev_alloc_skb(ndev, new_mtu + ETH_HLEN); - if (!skb) { - rc = -ENOMEM; - goto err; - } - - rc = ntb_transport_rx_enqueue(dev->qp, skb, skb->data, - new_mtu + ETH_HLEN); - if (rc) { + for (i = 0; + (skb = ntb_transport_rx_remove(queue->qp, &len)); + i++) dev_kfree_skb(skb); - goto err; + + for (; i; i--) { + skb = netdev_alloc_skb(ndev, + new_mtu + ETH_HLEN); + if (!skb) { + rc = -ENOMEM; + goto err; + } + + rc = ntb_transport_rx_enqueue(queue->qp, skb, + skb->data, + new_mtu + + ETH_HLEN); + if (rc) { + dev_kfree_skb(skb); + goto err; + } } } } WRITE_ONCE(ndev->mtu, new_mtu); - ntb_transport_link_up(dev->qp); + for (q = 0; q < dev->num_queues; q++) + ntb_transport_link_up(dev->queues[q].qp); return 0; err: - ntb_transport_link_down(dev->qp); + for (q = 0; q < dev->num_queues; q++) { + struct ntb_netdev_queue *queue = &dev->queues[q]; - while ((skb = ntb_transport_rx_remove(dev->qp, &len))) - dev_kfree_skb(skb); + ntb_transport_link_down(queue->qp); + + while ((skb = ntb_transport_rx_remove(queue->qp, &len))) + dev_kfree_skb(skb); + } netdev_err(ndev, "Error changing MTU, device inoperable\n"); return rc; @@ -404,6 +467,7 @@ static int ntb_netdev_probe(struct device *client_dev) struct net_device *ndev; struct pci_dev *pdev; struct ntb_netdev *dev; + unsigned int q; int rc; ntb = dev_ntb(client_dev->parent); @@ -411,7 +475,7 @@ static int ntb_netdev_probe(struct device *client_dev) if (!pdev) return -ENODEV; - ndev = alloc_etherdev(sizeof(*dev)); + ndev = alloc_etherdev_mq(sizeof(*dev), NTB_NETDEV_MAX_QUEUES); if (!ndev) return -ENOMEM; @@ -420,6 +484,16 @@ static int ntb_netdev_probe(struct device *client_dev) dev = netdev_priv(ndev); dev->ndev = ndev; dev->pdev = pdev; + dev->client_dev = client_dev; + dev->num_queues = 0; + + dev->queues = kzalloc_objs(*dev->queues, NTB_NETDEV_MAX_QUEUES, + GFP_KERNEL); + if (!dev->queues) { + rc = -ENOMEM; + goto err_free_netdev; + } + ndev->features = NETIF_F_HIGHDMA; ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; @@ -436,26 +510,47 @@ static int ntb_netdev_probe(struct device *client_dev) ndev->min_mtu = 0; ndev->max_mtu = ETH_MAX_MTU; - dev->qp = ntb_transport_create_queue(ndev, client_dev, - &ntb_netdev_handlers); - if (!dev->qp) { - rc = -EIO; - goto err; + for (q = 0; q < NTB_NETDEV_DEFAULT_QUEUES; q++) { + struct ntb_netdev_queue *queue = &dev->queues[q]; + + queue->ntdev = dev; + queue->qid = q; + queue->qp = ntb_transport_create_queue(queue, client_dev, + &ntb_netdev_handlers); + if (!queue->qp) + break; + + dev->num_queues++; } - ndev->mtu = ntb_transport_max_size(dev->qp) - ETH_HLEN; + if (!dev->num_queues) { + rc = -EIO; + goto err_free_queues; + } + + rc = netif_set_real_num_queues(ndev, dev->num_queues, dev->num_queues); + if (rc) + goto err_free_qps; + + ndev->mtu = ntb_transport_max_size(dev->queues[0].qp) - ETH_HLEN; rc = register_netdev(ndev); if (rc) - goto err1; + goto err_free_qps; dev_set_drvdata(client_dev, ndev); - dev_info(&pdev->dev, "%s created\n", ndev->name); + dev_info(&pdev->dev, "%s created with %u queue pairs\n", + ndev->name, dev->num_queues); return 0; -err1: - ntb_transport_free_queue(dev->qp); -err: +err_free_qps: + for (q = 0; q < dev->num_queues; q++) + ntb_transport_free_queue(dev->queues[q].qp); + +err_free_queues: + kfree(dev->queues); + +err_free_netdev: free_netdev(ndev); return rc; } @@ -464,9 +559,13 @@ static void ntb_netdev_remove(struct device *client_dev) { struct net_device *ndev = dev_get_drvdata(client_dev); struct ntb_netdev *dev = netdev_priv(ndev); + unsigned int q; unregister_netdev(ndev); - ntb_transport_free_queue(dev->qp); + for (q = 0; q < dev->num_queues; q++) + ntb_transport_free_queue(dev->queues[q].qp); + + kfree(dev->queues); free_netdev(ndev); } From 304132b7a5e6de84737ed4735c124d3d515f8c7a Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Fri, 6 Mar 2026 00:56:37 +0900 Subject: [PATCH 0365/1561] net: ntb_netdev: Gate subqueue stop/wake by transport link When ntb_netdev is extended to multiple ntb_transport queue pairs, the netdev carrier can be up as long as at least one QP link is up. In that setup, a given QP may be link-down while the carrier remains on. Make the link event handler start/stop the corresponding netdev TX subqueue and drive carrier state based on whether any QP link is up. Also guard subqueue wake/start points in the TX completion and timer paths so a subqueue is not restarted while its QP link is down. Stop all queues in ndo_open() and let the link event handler wake each subqueue once ntb_transport link negotiation succeeds. Signed-off-by: Koichiro Den Link: https://patch.msgid.link/20260305155639.1885517-3-den@valinux.co.jp Signed-off-by: Jakub Kicinski --- drivers/net/ntb_netdev.c | 42 ++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index 4b65e938d549..e4c1422d1d7a 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -99,18 +99,32 @@ static void ntb_netdev_event_handler(void *data, int link_is_up) struct ntb_netdev_queue *q = data; struct ntb_netdev *dev = q->ntdev; struct net_device *ndev; + bool any_up = false; + unsigned int i; ndev = dev->ndev; netdev_dbg(ndev, "Event %x, Link %x, qp %u\n", link_is_up, ntb_transport_link_query(q->qp), q->qid); - if (link_is_up) { - if (ntb_transport_link_query(q->qp)) - netif_carrier_on(ndev); - } else { - netif_carrier_off(ndev); + if (netif_running(ndev)) { + if (link_is_up) + netif_wake_subqueue(ndev, q->qid); + else + netif_stop_subqueue(ndev, q->qid); } + + for (i = 0; i < dev->num_queues; i++) { + if (ntb_transport_link_query(dev->queues[i].qp)) { + any_up = true; + break; + } + } + + if (any_up) + netif_carrier_on(ndev); + else + netif_carrier_off(ndev); } static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data, @@ -179,7 +193,10 @@ static int __ntb_netdev_maybe_stop_tx(struct net_device *netdev, return -EBUSY; } - netif_start_subqueue(netdev, q->qid); + /* The subqueue must be kept stopped if the link is down */ + if (ntb_transport_link_query(q->qp)) + netif_start_subqueue(netdev, q->qid); + return 0; } @@ -221,7 +238,8 @@ static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp, void *qp_data, * value of ntb_transport_tx_free_entry() */ smp_mb(); - if (__netif_subqueue_stopped(ndev, q->qid)) + if (__netif_subqueue_stopped(ndev, q->qid) && + ntb_transport_link_query(q->qp)) netif_wake_subqueue(ndev, q->qid); } } @@ -268,7 +286,10 @@ static void ntb_netdev_tx_timer(struct timer_list *t) * value of ntb_transport_tx_free_entry() */ smp_mb(); - if (__netif_subqueue_stopped(ndev, q->qid)) + + /* The subqueue must be kept stopped if the link is down */ + if (__netif_subqueue_stopped(ndev, q->qid) && + ntb_transport_link_query(q->qp)) netif_wake_subqueue(ndev, q->qid); } } @@ -304,12 +325,11 @@ static int ntb_netdev_open(struct net_device *ndev) } netif_carrier_off(ndev); + netif_tx_stop_all_queues(ndev); for (q = 0; q < dev->num_queues; q++) ntb_transport_link_up(dev->queues[q].qp); - netif_start_queue(ndev); - return 0; err: @@ -330,6 +350,8 @@ static int ntb_netdev_close(struct net_device *ndev) unsigned int q; int len; + netif_tx_stop_all_queues(ndev); + netif_carrier_off(ndev); for (q = 0; q < dev->num_queues; q++) { queue = &dev->queues[q]; From b83bf617dc849f11e7b2f907540017b5aa42d167 Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Fri, 6 Mar 2026 00:56:38 +0900 Subject: [PATCH 0366/1561] net: ntb_netdev: Factor out multi-queue helpers Implementing .set_channels will otherwise duplicate the same multi-queue operations at multiple call sites. Factor out the following helpers: - ntb_netdev_update_carrier(): carrier is switched on when at least one QP link is up - ntb_netdev_queue_rx_drain(): drain and free all queued RX packets for one QP - ntb_netdev_queue_rx_fill(): prefill RX ring for one QP No functional change. Signed-off-by: Koichiro Den Link: https://patch.msgid.link/20260305155639.1885517-4-den@valinux.co.jp Signed-off-by: Jakub Kicinski --- drivers/net/ntb_netdev.c | 101 +++++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 40 deletions(-) diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index e4c1422d1d7a..ac39652b0488 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -94,13 +94,63 @@ struct ntb_netdev { #define NTB_TX_TIMEOUT_MS 1000 #define NTB_RXQ_SIZE 100 +static void ntb_netdev_update_carrier(struct ntb_netdev *dev) +{ + struct net_device *ndev; + bool any_up = false; + unsigned int i; + + ndev = dev->ndev; + + for (i = 0; i < dev->num_queues; i++) { + if (ntb_transport_link_query(dev->queues[i].qp)) { + any_up = true; + break; + } + } + + if (any_up) + netif_carrier_on(ndev); + else + netif_carrier_off(ndev); +} + +static void ntb_netdev_queue_rx_drain(struct ntb_netdev_queue *queue) +{ + struct sk_buff *skb; + int len; + + while ((skb = ntb_transport_rx_remove(queue->qp, &len))) + dev_kfree_skb(skb); +} + +static int ntb_netdev_queue_rx_fill(struct net_device *ndev, + struct ntb_netdev_queue *queue) +{ + struct sk_buff *skb; + int rc, i; + + for (i = 0; i < NTB_RXQ_SIZE; i++) { + skb = netdev_alloc_skb(ndev, ndev->mtu + ETH_HLEN); + if (!skb) + return -ENOMEM; + + rc = ntb_transport_rx_enqueue(queue->qp, skb, skb->data, + ndev->mtu + ETH_HLEN); + if (rc) { + dev_kfree_skb(skb); + return rc; + } + } + + return 0; +} + static void ntb_netdev_event_handler(void *data, int link_is_up) { struct ntb_netdev_queue *q = data; struct ntb_netdev *dev = q->ntdev; struct net_device *ndev; - bool any_up = false; - unsigned int i; ndev = dev->ndev; @@ -114,17 +164,7 @@ static void ntb_netdev_event_handler(void *data, int link_is_up) netif_stop_subqueue(ndev, q->qid); } - for (i = 0; i < dev->num_queues; i++) { - if (ntb_transport_link_query(dev->queues[i].qp)) { - any_up = true; - break; - } - } - - if (any_up) - netif_carrier_on(ndev); - else - netif_carrier_off(ndev); + ntb_netdev_update_carrier(dev); } static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data, @@ -298,28 +338,16 @@ static int ntb_netdev_open(struct net_device *ndev) { struct ntb_netdev *dev = netdev_priv(ndev); struct ntb_netdev_queue *queue; - struct sk_buff *skb; - int rc = 0, i, len; unsigned int q; + int rc = 0; /* Add some empty rx bufs for each queue */ for (q = 0; q < dev->num_queues; q++) { queue = &dev->queues[q]; - for (i = 0; i < NTB_RXQ_SIZE; i++) { - skb = netdev_alloc_skb(ndev, ndev->mtu + ETH_HLEN); - if (!skb) { - rc = -ENOMEM; - goto err; - } - - rc = ntb_transport_rx_enqueue(queue->qp, skb, skb->data, - ndev->mtu + ETH_HLEN); - if (rc) { - dev_kfree_skb(skb); - goto err; - } - } + rc = ntb_netdev_queue_rx_fill(ndev, queue); + if (rc) + goto err; timer_setup(&queue->tx_timer, ntb_netdev_tx_timer, 0); } @@ -335,9 +363,7 @@ static int ntb_netdev_open(struct net_device *ndev) err: for (q = 0; q < dev->num_queues; q++) { queue = &dev->queues[q]; - - while ((skb = ntb_transport_rx_remove(queue->qp, &len))) - dev_kfree_skb(skb); + ntb_netdev_queue_rx_drain(queue); } return rc; } @@ -346,9 +372,7 @@ static int ntb_netdev_close(struct net_device *ndev) { struct ntb_netdev *dev = netdev_priv(ndev); struct ntb_netdev_queue *queue; - struct sk_buff *skb; unsigned int q; - int len; netif_tx_stop_all_queues(ndev); netif_carrier_off(ndev); @@ -357,12 +381,10 @@ static int ntb_netdev_close(struct net_device *ndev) queue = &dev->queues[q]; ntb_transport_link_down(queue->qp); - - while ((skb = ntb_transport_rx_remove(queue->qp, &len))) - dev_kfree_skb(skb); - + ntb_netdev_queue_rx_drain(queue); timer_delete_sync(&queue->tx_timer); } + return 0; } @@ -428,8 +450,7 @@ err: ntb_transport_link_down(queue->qp); - while ((skb = ntb_transport_rx_remove(queue->qp, &len))) - dev_kfree_skb(skb); + ntb_netdev_queue_rx_drain(queue); } netdev_err(ndev, "Error changing MTU, device inoperable\n"); From 24d9e73c7e000af7c1f1a1f669bdaa9bd7e88ea5 Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Fri, 6 Mar 2026 00:56:39 +0900 Subject: [PATCH 0367/1561] net: ntb_netdev: Support ethtool channels for multi-queue Support dynamic queue pair addition/removal via ethtool channels. Use the combined channel count to control the number of netdev TX/RX queues, each corresponding to a ntb_transport queue pair. When the number of queues is reduced, tear down and free the removed ntb_transport queue pairs (not just deactivate them) so other ntb_transport clients can reuse the freed resources. When the number of queues is increased, create additional queue pairs up to NTB_NETDEV_MAX_QUEUES (=64). The effective limit is determined by the underlying ntb_transport implementation and NTB hardware resources (the number of MWs), so set_channels may return -ENOSPC if no more QPs can be allocated. Keep the default at one queue pair to preserve the previous behavior. Signed-off-by: Koichiro Den Link: https://patch.msgid.link/20260305155639.1885517-5-den@valinux.co.jp Signed-off-by: Jakub Kicinski --- drivers/net/ntb_netdev.c | 157 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 151 insertions(+), 6 deletions(-) diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index ac39652b0488..6397d898c991 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -284,6 +284,12 @@ static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp, void *qp_data, } } +static const struct ntb_queue_handlers ntb_netdev_handlers = { + .tx_handler = ntb_netdev_tx_handler, + .rx_handler = ntb_netdev_rx_handler, + .event_handler = ntb_netdev_event_handler, +}; + static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb, struct net_device *ndev) { @@ -492,16 +498,155 @@ static int ntb_get_link_ksettings(struct net_device *dev, return 0; } +static void ntb_get_channels(struct net_device *ndev, + struct ethtool_channels *channels) +{ + struct ntb_netdev *dev = netdev_priv(ndev); + + channels->combined_count = dev->num_queues; + channels->max_combined = ndev->num_tx_queues; +} + +static int ntb_inc_channels(struct net_device *ndev, + unsigned int old, unsigned int new) +{ + struct ntb_netdev *dev = netdev_priv(ndev); + bool running = netif_running(ndev); + struct ntb_netdev_queue *queue; + unsigned int q, created; + int rc; + + created = old; + for (q = old; q < new; q++) { + queue = &dev->queues[q]; + + queue->ntdev = dev; + queue->qid = q; + queue->qp = ntb_transport_create_queue(queue, dev->client_dev, + &ntb_netdev_handlers); + if (!queue->qp) { + rc = -ENOSPC; + goto err_new; + } + created++; + + if (!running) + continue; + + timer_setup(&queue->tx_timer, ntb_netdev_tx_timer, 0); + + rc = ntb_netdev_queue_rx_fill(ndev, queue); + if (rc) + goto err_new; + + /* + * Carrier may already be on due to other QPs. Keep the new + * subqueue stopped until we get a Link Up event for this QP. + */ + netif_stop_subqueue(ndev, q); + } + + rc = netif_set_real_num_queues(ndev, new, new); + if (rc) + goto err_new; + + dev->num_queues = new; + + if (running) + for (q = old; q < new; q++) + ntb_transport_link_up(dev->queues[q].qp); + + return 0; + +err_new: + if (running) { + unsigned int rollback = created; + + while (rollback-- > old) { + queue = &dev->queues[rollback]; + ntb_transport_link_down(queue->qp); + ntb_netdev_queue_rx_drain(queue); + timer_delete_sync(&queue->tx_timer); + } + } + while (created-- > old) { + queue = &dev->queues[created]; + ntb_transport_free_queue(queue->qp); + queue->qp = NULL; + } + return rc; +} + +static int ntb_dec_channels(struct net_device *ndev, + unsigned int old, unsigned int new) +{ + struct ntb_netdev *dev = netdev_priv(ndev); + bool running = netif_running(ndev); + struct ntb_netdev_queue *queue; + unsigned int q; + int rc; + + if (running) + for (q = new; q < old; q++) + netif_stop_subqueue(ndev, q); + + rc = netif_set_real_num_queues(ndev, new, new); + if (rc) + goto err; + + /* Publish new queue count before invalidating QP pointers */ + dev->num_queues = new; + + for (q = new; q < old; q++) { + queue = &dev->queues[q]; + + if (running) { + ntb_transport_link_down(queue->qp); + ntb_netdev_queue_rx_drain(queue); + timer_delete_sync(&queue->tx_timer); + } + + ntb_transport_free_queue(queue->qp); + queue->qp = NULL; + } + + /* + * It might be the case that the removed queues are the only queues that + * were up, so see if the global carrier needs to change. + */ + ntb_netdev_update_carrier(dev); + return 0; + +err: + if (running) { + for (q = new; q < old; q++) + netif_wake_subqueue(ndev, q); + } + return rc; +} + +static int ntb_set_channels(struct net_device *ndev, + struct ethtool_channels *channels) +{ + struct ntb_netdev *dev = netdev_priv(ndev); + unsigned int new = channels->combined_count; + unsigned int old = dev->num_queues; + + if (new == old) + return 0; + + if (new < old) + return ntb_dec_channels(ndev, old, new); + else + return ntb_inc_channels(ndev, old, new); +} + static const struct ethtool_ops ntb_ethtool_ops = { .get_drvinfo = ntb_get_drvinfo, .get_link = ethtool_op_get_link, .get_link_ksettings = ntb_get_link_ksettings, -}; - -static const struct ntb_queue_handlers ntb_netdev_handlers = { - .tx_handler = ntb_netdev_tx_handler, - .rx_handler = ntb_netdev_rx_handler, - .event_handler = ntb_netdev_event_handler, + .get_channels = ntb_get_channels, + .set_channels = ntb_set_channels, }; static int ntb_netdev_probe(struct device *client_dev) From f4ac0cc88e9949d41bbdd101caa3117afe983ec9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 5 Mar 2026 11:50:06 +0100 Subject: [PATCH 0368/1561] net: usb: lan78xx: drop redundant device reference Driver core holds a reference to the USB interface and its parent USB device while the interface is bound to a driver and there is no need to take additional references unless the structures are needed after disconnect. Drop the redundant device reference to reduce cargo culting, make it easier to spot drivers where an extra reference is needed, and reduce the risk of memory leaks when drivers fail to release it. Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260305105006.16415-1-johan@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index a0021df12fde..2b6cb9266945 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -4530,7 +4530,6 @@ static void intr_complete(struct urb *urb) static void lan78xx_disconnect(struct usb_interface *intf) { struct lan78xx_net *dev; - struct usb_device *udev; struct net_device *net; dev = usb_get_intfdata(intf); @@ -4538,7 +4537,6 @@ static void lan78xx_disconnect(struct usb_interface *intf) if (!dev) return; - udev = interface_to_usbdev(intf); net = dev->net; rtnl_lock(); @@ -4567,7 +4565,6 @@ static void lan78xx_disconnect(struct usb_interface *intf) usb_free_urb(dev->urb_intr); free_netdev(net); - usb_put_dev(udev); } static void lan78xx_tx_timeout(struct net_device *net, unsigned int txqueue) @@ -4629,13 +4626,11 @@ static int lan78xx_probe(struct usb_interface *intf, u8 *buf = NULL; udev = interface_to_usbdev(intf); - udev = usb_get_dev(udev); netdev = alloc_etherdev(sizeof(struct lan78xx_net)); if (!netdev) { dev_err(&intf->dev, "Error: OOM\n"); - ret = -ENOMEM; - goto out1; + return -ENOMEM; } SET_NETDEV_DEV(netdev, &intf->dev); @@ -4784,8 +4779,6 @@ out3: lan78xx_free_tx_resources(dev); out2: free_netdev(netdev); -out1: - usb_put_dev(udev); return ret; } From 0bcac7b11262557c990da1ac564d45777eb6b005 Mon Sep 17 00:00:00 2001 From: Aleksei Oladko Date: Fri, 6 Mar 2026 00:01:23 +0000 Subject: [PATCH 0369/1561] selftests: net: make ovs-dpctl.py fail when pyroute2 is unsupported The pmtu.sh kselftest configures OVS using ovs-dpctl.py and falls back to ovs-vsctl only when ovs-dpctl.py fails. However, ovs-dpctl.py exits with a success status when the installed pyroute2 package version is lower than 0.6, even though the OVS datapath is not configured. As a result, pmtu.sh assumes that the setup was successful and continues running the test, which later fails due to the missing OVS configuration. Fix the exit code handling in ovs-dpctl.py so that pmtu.sh can detect that the setup did not complete successfully and fall back to ovs-vsctl. Signed-off-by: Aleksei Oladko Reviewed-by: Aaron Conole Link: https://patch.msgid.link/20260306000127.519064-3-aleksey.oladko@virtuozzo.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/openvswitch/ovs-dpctl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index b521e0dea506..848f61fdcee0 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -2583,7 +2583,7 @@ def main(argv): prverscheck = pyroute2.__version__.split(".") if int(prverscheck[0]) == 0 and int(prverscheck[1]) < 6: print("Need to upgrade the python pyroute2 package to >= 0.6.") - sys.exit(0) + sys.exit(1) parser = argparse.ArgumentParser() parser.add_argument( From e0aa0c61758f517ded2befa084ebcad93809b421 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 6 Mar 2026 19:36:21 -0800 Subject: [PATCH 0370/1561] tools: ynl: move samples to tests The "samples" were always poor man's tests (used to manually confirm that C YNL works). Move all C sample programs from tools/net/ynl/samples/ to tools/net/ynl/tests/, "merge" the Makefiles. The subsequent changes will convert each sample into a proper KTAP selftests. Since these are now tests rather than samples - default to enabling asan. After all we're testing user space code here. Sort the gitignore while at it, the page-pool entry was a leftover so delete it. Reviewed-by: Donald Hunter Tested-by: Donald Hunter Link: https://patch.msgid.link/20260307033630.1396085-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/Makefile | 4 +- tools/net/ynl/samples/Makefile | 36 -------------- tools/net/ynl/{samples => tests}/.gitignore | 3 +- tools/net/ynl/tests/Makefile | 47 +++++++++++++++++-- tools/net/ynl/{samples => tests}/devlink.c | 0 tools/net/ynl/{samples => tests}/ethtool.c | 0 tools/net/ynl/{samples => tests}/netdev.c | 0 tools/net/ynl/{samples => tests}/ovs.c | 0 tools/net/ynl/{samples => tests}/rt-addr.c | 0 tools/net/ynl/{samples => tests}/rt-link.c | 0 tools/net/ynl/{samples => tests}/rt-route.c | 0 .../ynl/{samples => tests}/tc-filter-add.c | 0 tools/net/ynl/{samples => tests}/tc.c | 0 13 files changed, 45 insertions(+), 45 deletions(-) delete mode 100644 tools/net/ynl/samples/Makefile rename tools/net/ynl/{samples => tests}/.gitignore (87%) rename tools/net/ynl/{samples => tests}/devlink.c (100%) rename tools/net/ynl/{samples => tests}/ethtool.c (100%) rename tools/net/ynl/{samples => tests}/netdev.c (100%) rename tools/net/ynl/{samples => tests}/ovs.c (100%) rename tools/net/ynl/{samples => tests}/rt-addr.c (100%) rename tools/net/ynl/{samples => tests}/rt-link.c (100%) rename tools/net/ynl/{samples => tests}/rt-route.c (100%) rename tools/net/ynl/{samples => tests}/tc-filter-add.c (100%) rename tools/net/ynl/{samples => tests}/tc.c (100%) diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile index 9b692f368be7..d514a48dae27 100644 --- a/tools/net/ynl/Makefile +++ b/tools/net/ynl/Makefile @@ -14,12 +14,12 @@ includedir ?= $(prefix)/include SPECDIR=../../../Documentation/netlink/specs -SUBDIRS = lib generated samples ynltool tests +SUBDIRS = lib generated ynltool tests all: $(SUBDIRS) libynl.a +tests: | lib generated libynl.a ynltool: | lib generated libynl.a -samples: | lib generated libynl.a: | lib generated @echo -e "\tAR $@" @ar rcs $@ lib/ynl.o generated/*-user.o diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile deleted file mode 100644 index d76cbd41cbb1..000000000000 --- a/tools/net/ynl/samples/Makefile +++ /dev/null @@ -1,36 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -include ../Makefile.deps - -CC=gcc -CFLAGS += -std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow \ - -I../lib/ -I../generated/ -idirafter $(UAPI_PATH) -ifeq ("$(DEBUG)","1") - CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan -endif - -LDLIBS=../lib/ynl.a ../generated/protos.a - -SRCS=$(wildcard *.c) -BINS=$(patsubst %.c,%,${SRCS}) - -include $(wildcard *.d) - -all: $(BINS) - -CFLAGS_page-pool=$(CFLAGS_netdev) -CFLAGS_tc-filter-add:=$(CFLAGS_tc) - -$(BINS): ../lib/ynl.a ../generated/protos.a $(SRCS) - @echo -e '\tCC sample $@' - @$(COMPILE.c) $(CFLAGS_$@) $@.c -o $@.o - @$(LINK.c) $@.o -o $@ $(LDLIBS) - -clean: - rm -f *.o *.d *~ - -distclean: clean - rm -f $(BINS) - -.PHONY: all clean distclean -.DEFAULT_GOAL=all diff --git a/tools/net/ynl/samples/.gitignore b/tools/net/ynl/tests/.gitignore similarity index 87% rename from tools/net/ynl/samples/.gitignore rename to tools/net/ynl/tests/.gitignore index 05087ee323ba..045385df42a4 100644 --- a/tools/net/ynl/samples/.gitignore +++ b/tools/net/ynl/tests/.gitignore @@ -1,8 +1,7 @@ -ethtool devlink +ethtool netdev ovs -page-pool rt-addr rt-link rt-route diff --git a/tools/net/ynl/tests/Makefile b/tools/net/ynl/tests/Makefile index eb166c9550db..5fa36c877235 100644 --- a/tools/net/ynl/tests/Makefile +++ b/tools/net/ynl/tests/Makefile @@ -1,21 +1,51 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for YNL tests +include ../Makefile.deps + +CC=gcc +CFLAGS += -std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow \ + -I../lib/ -I../generated/ -I../../../testing/selftests/ \ + -idirafter $(UAPI_PATH) +ifneq ("$(NDEBUG)","1") + CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan +endif + +LDLIBS=../lib/ynl.a ../generated/protos.a + TEST_PROGS := \ test_ynl_cli.sh \ test_ynl_ethtool.sh \ # end of TEST_PROGS +SRCS=$(wildcard *.c) +BINS=$(patsubst %.c,%,${SRCS}) + +CFLAGS_tc-filter-add:=$(CFLAGS_tc) + +include $(wildcard *.d) + INSTALL_PATH ?= $(DESTDIR)/usr/share/kselftest -all: $(TEST_PROGS) +all: $(BINS) $(TEST_PROGS) + +../lib/ynl.a: + @$(MAKE) -C ../lib + + ../generated/protos.a: + @$(MAKE) -C ../generated + +$(BINS): ../lib/ynl.a ../generated/protos.a + @echo -e '\tCC test $@' + @$(COMPILE.c) $(CFLAGS_$@) $@.c -o $@.o + @$(LINK.c) $@.o -o $@ $(LDLIBS) run_tests: @for test in $(TEST_PROGS); do \ ./$$test; \ done -install: $(TEST_PROGS) +install: $(TEST_PROGS) $(BINS) @mkdir -p $(INSTALL_PATH)/ynl @cp ../../../testing/selftests/kselftest/ktap_helpers.sh $(INSTALL_PATH)/ @for test in $(TEST_PROGS); do \ @@ -26,11 +56,18 @@ install: $(TEST_PROGS) $$test > $(INSTALL_PATH)/ynl/$$name; \ chmod +x $(INSTALL_PATH)/ynl/$$name; \ done + @for bin in $(BINS); do \ + cp $$bin $(INSTALL_PATH)/ynl/$$bin; \ + done @for test in $(TEST_PROGS); do \ echo "ynl:$$test"; \ done > $(INSTALL_PATH)/kselftest-list.txt -clean distclean: - @# Nothing to clean +clean: + rm -f *.o *.d *~ -.PHONY: all install clean run_tests +distclean: clean + rm -f $(BINS) + +.PHONY: all install clean distclean run_tests +.DEFAULT_GOAL=all diff --git a/tools/net/ynl/samples/devlink.c b/tools/net/ynl/tests/devlink.c similarity index 100% rename from tools/net/ynl/samples/devlink.c rename to tools/net/ynl/tests/devlink.c diff --git a/tools/net/ynl/samples/ethtool.c b/tools/net/ynl/tests/ethtool.c similarity index 100% rename from tools/net/ynl/samples/ethtool.c rename to tools/net/ynl/tests/ethtool.c diff --git a/tools/net/ynl/samples/netdev.c b/tools/net/ynl/tests/netdev.c similarity index 100% rename from tools/net/ynl/samples/netdev.c rename to tools/net/ynl/tests/netdev.c diff --git a/tools/net/ynl/samples/ovs.c b/tools/net/ynl/tests/ovs.c similarity index 100% rename from tools/net/ynl/samples/ovs.c rename to tools/net/ynl/tests/ovs.c diff --git a/tools/net/ynl/samples/rt-addr.c b/tools/net/ynl/tests/rt-addr.c similarity index 100% rename from tools/net/ynl/samples/rt-addr.c rename to tools/net/ynl/tests/rt-addr.c diff --git a/tools/net/ynl/samples/rt-link.c b/tools/net/ynl/tests/rt-link.c similarity index 100% rename from tools/net/ynl/samples/rt-link.c rename to tools/net/ynl/tests/rt-link.c diff --git a/tools/net/ynl/samples/rt-route.c b/tools/net/ynl/tests/rt-route.c similarity index 100% rename from tools/net/ynl/samples/rt-route.c rename to tools/net/ynl/tests/rt-route.c diff --git a/tools/net/ynl/samples/tc-filter-add.c b/tools/net/ynl/tests/tc-filter-add.c similarity index 100% rename from tools/net/ynl/samples/tc-filter-add.c rename to tools/net/ynl/tests/tc-filter-add.c diff --git a/tools/net/ynl/samples/tc.c b/tools/net/ynl/tests/tc.c similarity index 100% rename from tools/net/ynl/samples/tc.c rename to tools/net/ynl/tests/tc.c From 285804d63f35b333b216f90255104264c590514b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 6 Mar 2026 19:36:22 -0800 Subject: [PATCH 0371/1561] tools: ynl: convert netdev sample to selftest Convert netdev.c to produce KTAP output with 3 tests: - dev_dump: dump all netdev devices, skip if empty - dev_get: query first device from dump by ifindex - ntf_check: subscribe to "mgmt", create a veth via rt-link, verify netdev notification is received, then delete the veth Remove stdin/scanf-based UI. Add rt-link dependency for the veth notification test. TAP version 13 1..3 # Starting 3 tests from 1 test cases. # RUN netdev.dump ... # lo[1] xdp-features (0): xdp-rx-metadata-features (0): xsk-fea... # sit0[2] xdp-features (0): xdp-rx-metadata-features (0): xsk-fea... # OK netdev.dump ok 1 netdev.dump # RUN netdev.get ... # lo[1] xdp-features (0): xdp-rx-metadata-features (0): xsk-fea... # OK netdev.get ok 2 netdev.get # RUN netdev.ntf_check ... # veth0[7] xdp-features (0): xdp-rx-metadata-features (7): timesta... # OK netdev.ntf_check ok 3 netdev.ntf_check # PASSED: 3 / 3 tests passed. # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0 Reviewed-by: Donald Hunter Tested-by: Donald Hunter Link: https://patch.msgid.link/20260307033630.1396085-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/tests/Makefile | 29 +++-- tools/net/ynl/tests/netdev.c | 229 +++++++++++++++++++++++++---------- 2 files changed, 187 insertions(+), 71 deletions(-) diff --git a/tools/net/ynl/tests/Makefile b/tools/net/ynl/tests/Makefile index 5fa36c877235..1d77d3662f46 100644 --- a/tools/net/ynl/tests/Makefile +++ b/tools/net/ynl/tests/Makefile @@ -18,16 +18,29 @@ TEST_PROGS := \ test_ynl_ethtool.sh \ # end of TEST_PROGS -SRCS=$(wildcard *.c) -BINS=$(patsubst %.c,%,${SRCS}) +TEST_GEN_PROGS := \ + netdev \ +# end of TEST_GEN_PROGS +BINS := \ + devlink \ + ethtool \ + ovs \ + rt-addr \ + rt-link \ + rt-route \ + tc \ + tc-filter-add \ +# end of BINS + +CFLAGS_netdev:=$(CFLAGS_netdev) $(CFLAGS_rt-link) CFLAGS_tc-filter-add:=$(CFLAGS_tc) include $(wildcard *.d) INSTALL_PATH ?= $(DESTDIR)/usr/share/kselftest -all: $(BINS) $(TEST_PROGS) +all: $(TEST_GEN_PROGS) $(BINS) ../lib/ynl.a: @$(MAKE) -C ../lib @@ -35,7 +48,7 @@ all: $(BINS) $(TEST_PROGS) ../generated/protos.a: @$(MAKE) -C ../generated -$(BINS): ../lib/ynl.a ../generated/protos.a +$(TEST_GEN_PROGS) $(BINS): %: %.c ../lib/ynl.a ../generated/protos.a @echo -e '\tCC test $@' @$(COMPILE.c) $(CFLAGS_$@) $@.c -o $@.o @$(LINK.c) $@.o -o $@ $(LDLIBS) @@ -45,7 +58,7 @@ run_tests: ./$$test; \ done -install: $(TEST_PROGS) $(BINS) +install: $(TEST_GEN_PROGS) $(BINS) @mkdir -p $(INSTALL_PATH)/ynl @cp ../../../testing/selftests/kselftest/ktap_helpers.sh $(INSTALL_PATH)/ @for test in $(TEST_PROGS); do \ @@ -56,10 +69,10 @@ install: $(TEST_PROGS) $(BINS) $$test > $(INSTALL_PATH)/ynl/$$name; \ chmod +x $(INSTALL_PATH)/ynl/$$name; \ done - @for bin in $(BINS); do \ + @for bin in $(TEST_GEN_PROGS) $(BINS); do \ cp $$bin $(INSTALL_PATH)/ynl/$$bin; \ done - @for test in $(TEST_PROGS); do \ + @for test in $(TEST_PROGS) $(TEST_GEN_PROGS); do \ echo "ynl:$$test"; \ done > $(INSTALL_PATH)/kselftest-list.txt @@ -67,7 +80,7 @@ clean: rm -f *.o *.d *~ distclean: clean - rm -f $(BINS) + rm -f $(TEST_GEN_PROGS) $(BINS) .PHONY: all install clean distclean run_tests .DEFAULT_GOAL=all diff --git a/tools/net/ynl/tests/netdev.c b/tools/net/ynl/tests/netdev.c index 22609d44c89a..f849e3d7f4b3 100644 --- a/tools/net/ynl/tests/netdev.c +++ b/tools/net/ynl/tests/netdev.c @@ -6,29 +6,29 @@ #include +#include + #include "netdev-user.h" +#include "rt-link-user.h" -/* netdev genetlink family code sample - * This sample shows off basics of the netdev family but also notification - * handling, hence the somewhat odd UI. We subscribe to notifications first - * then wait for ifc selection, so the socket may already accumulate - * notifications as we wait. This allows us to test that YNL can handle - * requests and notifications getting interleaved. - */ - -static void netdev_print_device(struct netdev_dev_get_rsp *d, unsigned int op) +static void netdev_print_device(struct __test_metadata *_metadata, + struct netdev_dev_get_rsp *d, unsigned int op) { char ifname[IF_NAMESIZE]; const char *name; + EXPECT_TRUE((bool)d->_present.ifindex); if (!d->_present.ifindex) return; name = if_indextoname(d->ifindex, ifname); + EXPECT_TRUE((bool)name); if (name) - printf("%8s", name); - printf("[%d]\t", d->ifindex); + ksft_print_msg("%8s[%d]\t", name, d->ifindex); + else + ksft_print_msg("[%d]\t", d->ifindex); + EXPECT_TRUE((bool)d->_present.xdp_features); if (!d->_present.xdp_features) return; @@ -38,10 +38,12 @@ static void netdev_print_device(struct netdev_dev_get_rsp *d, unsigned int op) printf(" %s", netdev_xdp_act_str(1 << i)); } - printf(" xdp-rx-metadata-features (%llx):", d->xdp_rx_metadata_features); + printf(" xdp-rx-metadata-features (%llx):", + d->xdp_rx_metadata_features); for (int i = 0; d->xdp_rx_metadata_features >= 1U << i; i++) { if (d->xdp_rx_metadata_features & (1U << i)) - printf(" %s", netdev_xdp_rx_metadata_str(1 << i)); + printf(" %s", + netdev_xdp_rx_metadata_str(1 << i)); } printf(" xsk-features (%llx):", d->xsk_features); @@ -58,71 +60,172 @@ static void netdev_print_device(struct netdev_dev_get_rsp *d, unsigned int op) printf("\n"); } -int main(int argc, char **argv) +static int veth_create(struct ynl_sock *ys_link) +{ + struct rt_link_getlink_ntf *ntf_gl; + struct rt_link_newlink_req *req; + struct ynl_ntf_base_type *ntf; + int ret; + + req = rt_link_newlink_req_alloc(); + if (!req) + return -1; + + rt_link_newlink_req_set_nlflags(req, NLM_F_CREATE | NLM_F_ECHO); + rt_link_newlink_req_set_linkinfo_kind(req, "veth"); + + ret = rt_link_newlink(ys_link, req); + rt_link_newlink_req_free(req); + if (ret) + return -1; + + if (!ynl_has_ntf(ys_link)) + return 0; + + ntf = ynl_ntf_dequeue(ys_link); + if (!ntf || ntf->cmd != RTM_NEWLINK) { + ynl_ntf_free(ntf); + return 0; + } + ntf_gl = (void *)ntf; + ret = ntf_gl->obj._hdr.ifi_index; + ynl_ntf_free(ntf); + + return ret; +} + +static void veth_delete(struct __test_metadata *_metadata, + struct ynl_sock *ys_link, int ifindex) +{ + struct rt_link_dellink_req *req; + + req = rt_link_dellink_req_alloc(); + ASSERT_NE(NULL, req); + + req->_hdr.ifi_index = ifindex; + EXPECT_EQ(0, rt_link_dellink(ys_link, req)); + rt_link_dellink_req_free(req); +} + +FIXTURE(netdev) +{ + struct ynl_sock *ys; + struct ynl_sock *ys_link; +}; + +FIXTURE_SETUP(netdev) +{ + struct ynl_error yerr; + + self->ys = ynl_sock_create(&ynl_netdev_family, &yerr); + ASSERT_NE(NULL, self->ys) { + TH_LOG("Failed to create YNL netdev socket: %s", yerr.msg); + } +} + +FIXTURE_TEARDOWN(netdev) +{ + if (self->ys_link) + ynl_sock_destroy(self->ys_link); + ynl_sock_destroy(self->ys); +} + +TEST_F(netdev, dump) { struct netdev_dev_get_list *devs; - struct ynl_ntf_base_type *ntf; - struct ynl_error yerr; - struct ynl_sock *ys; + + devs = netdev_dev_get_dump(self->ys); + ASSERT_NE(NULL, devs) { + TH_LOG("dump failed: %s", self->ys->err.msg); + } + + if (ynl_dump_empty(devs)) { + netdev_dev_get_list_free(devs); + SKIP(return, "no entries in dump"); + } + + ynl_dump_foreach(devs, d) + netdev_print_device(_metadata, d, 0); + + netdev_dev_get_list_free(devs); +} + +TEST_F(netdev, get) +{ + struct netdev_dev_get_list *devs; + struct netdev_dev_get_req *req; + struct netdev_dev_get_rsp *dev; int ifindex = 0; - if (argc > 1) - ifindex = strtol(argv[1], NULL, 0); - - ys = ynl_sock_create(&ynl_netdev_family, &yerr); - if (!ys) { - fprintf(stderr, "YNL: %s\n", yerr.msg); - return 1; + devs = netdev_dev_get_dump(self->ys); + ASSERT_NE(NULL, devs) { + TH_LOG("dump failed: %s", self->ys->err.msg); } - if (ynl_subscribe(ys, "mgmt")) - goto err_close; + ynl_dump_foreach(devs, d) { + if (d->_present.ifindex) { + ifindex = d->ifindex; + break; + } + } + netdev_dev_get_list_free(devs); - printf("Select ifc ($ifindex; or 0 = dump; or -2 ntf check): "); - if (scanf("%d", &ifindex) != 1) { - fprintf(stderr, "Error: unable to parse input\n"); - goto err_destroy; + if (!ifindex) + SKIP(return, "no device to query"); + + req = netdev_dev_get_req_alloc(); + ASSERT_NE(NULL, req); + netdev_dev_get_req_set_ifindex(req, ifindex); + + dev = netdev_dev_get(self->ys, req); + netdev_dev_get_req_free(req); + ASSERT_NE(NULL, dev) { + TH_LOG("dev_get failed: %s", self->ys->err.msg); } - if (ifindex > 0) { - struct netdev_dev_get_req *req; - struct netdev_dev_get_rsp *d; + netdev_print_device(_metadata, dev, 0); + netdev_dev_get_rsp_free(dev); +} - req = netdev_dev_get_req_alloc(); - netdev_dev_get_req_set_ifindex(req, ifindex); +TEST_F(netdev, ntf_check) +{ + struct ynl_ntf_base_type *ntf; + int veth_ifindex; + bool received; + int ret; - d = netdev_dev_get(ys, req); - netdev_dev_get_req_free(req); - if (!d) - goto err_close; - - netdev_print_device(d, 0); - netdev_dev_get_rsp_free(d); - } else if (!ifindex) { - devs = netdev_dev_get_dump(ys); - if (!devs) - goto err_close; - - if (ynl_dump_empty(devs)) - fprintf(stderr, "Error: no devices reported\n"); - ynl_dump_foreach(devs, d) - netdev_print_device(d, 0); - netdev_dev_get_list_free(devs); - } else if (ifindex == -2) { - ynl_ntf_check(ys); + ret = ynl_subscribe(self->ys, "mgmt"); + ASSERT_EQ(0, ret) { + TH_LOG("subscribe failed: %s", self->ys->err.msg); } - while ((ntf = ynl_ntf_dequeue(ys))) { - netdev_print_device((struct netdev_dev_get_rsp *)&ntf->data, + + self->ys_link = ynl_sock_create(&ynl_rt_link_family, NULL); + ASSERT_NE(NULL, self->ys_link) + TH_LOG("failed to create rt-link socket"); + + veth_ifindex = veth_create(self->ys_link); + ASSERT_GT(veth_ifindex, 0) + TH_LOG("failed to create veth"); + + ynl_ntf_check(self->ys); + + ntf = ynl_ntf_dequeue(self->ys); + received = ntf; + if (ntf) { + netdev_print_device(_metadata, + (struct netdev_dev_get_rsp *)&ntf->data, ntf->cmd); ynl_ntf_free(ntf); } - ynl_sock_destroy(ys); - return 0; + /* Drain any remaining notifications */ + while ((ntf = ynl_ntf_dequeue(self->ys))) + ynl_ntf_free(ntf); -err_close: - fprintf(stderr, "YNL: %s\n", ys->err.msg); -err_destroy: - ynl_sock_destroy(ys); - return 2; + veth_delete(_metadata, self->ys_link, veth_ifindex); + + ASSERT_TRUE(received) + TH_LOG("no notification received"); } + +TEST_HARNESS_MAIN From 7e3effbc76278bfe3c452074c301233c8e69101f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 6 Mar 2026 19:36:23 -0800 Subject: [PATCH 0372/1561] tools: ynl: convert ovs sample to selftest Convert ovs.c to produce KTAP output with kselftest_harness. The single "crud" test creates a new OVS datapath, fetches it back by name, then dumps all datapaths verifying the new one appears. IIRC I added this test because ovs is a genetlink family but has a family-specific fixed header. TAP version 13 1..1 # Starting 1 tests from 1 test cases. # RUN ovs.crud ... # get: # ynl-test(3): pid:0 cache:256 # dump: # ynl-test(3): pid:0 cache:256 # OK ovs.crud ok 1 ovs.crud # PASSED: 1 / 1 tests passed. # Totals: pass:1 fail:0 xfail:0 xpass:0 skip:0 error:0 Reviewed-by: Donald Hunter Tested-by: Donald Hunter Link: https://patch.msgid.link/20260307033630.1396085-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/tests/Makefile | 3 +- tools/net/ynl/tests/config | 1 + tools/net/ynl/tests/ovs.c | 128 ++++++++++++++++++++++++----------- 3 files changed, 91 insertions(+), 41 deletions(-) diff --git a/tools/net/ynl/tests/Makefile b/tools/net/ynl/tests/Makefile index 1d77d3662f46..df9d37c8b2a4 100644 --- a/tools/net/ynl/tests/Makefile +++ b/tools/net/ynl/tests/Makefile @@ -20,12 +20,12 @@ TEST_PROGS := \ TEST_GEN_PROGS := \ netdev \ + ovs \ # end of TEST_GEN_PROGS BINS := \ devlink \ ethtool \ - ovs \ rt-addr \ rt-link \ rt-route \ @@ -34,6 +34,7 @@ BINS := \ # end of BINS CFLAGS_netdev:=$(CFLAGS_netdev) $(CFLAGS_rt-link) +CFLAGS_ovs:=$(CFLAGS_ovs_datapath) CFLAGS_tc-filter-add:=$(CFLAGS_tc) include $(wildcard *.d) diff --git a/tools/net/ynl/tests/config b/tools/net/ynl/tests/config index 339f1309c03f..357b34611da4 100644 --- a/tools/net/ynl/tests/config +++ b/tools/net/ynl/tests/config @@ -3,4 +3,5 @@ CONFIG_INET_DIAG=y CONFIG_IPV6=y CONFIG_NET_NS=y CONFIG_NETDEVSIM=m +CONFIG_OPENVSWITCH=m CONFIG_VETH=m diff --git a/tools/net/ynl/tests/ovs.c b/tools/net/ynl/tests/ovs.c index 3e975c003d77..d49f5a8e647e 100644 --- a/tools/net/ynl/tests/ovs.c +++ b/tools/net/ynl/tests/ovs.c @@ -4,57 +4,105 @@ #include +#include + #include "ovs_datapath-user.h" -int main(int argc, char **argv) +static void ovs_print_datapath(struct __test_metadata *_metadata, + struct ovs_datapath_get_rsp *dp) +{ + EXPECT_TRUE((bool)dp->_len.name); + if (!dp->_len.name) + return; + + EXPECT_TRUE((bool)dp->_hdr.dp_ifindex); + ksft_print_msg("%s(%d): pid:%u cache:%u\n", + dp->name, dp->_hdr.dp_ifindex, + dp->upcall_pid, dp->masks_cache_size); +} + +FIXTURE(ovs) { struct ynl_sock *ys; + char *dp_name; +}; + +FIXTURE_SETUP(ovs) +{ + self->ys = ynl_sock_create(&ynl_ovs_datapath_family, NULL); + ASSERT_NE(NULL, self->ys) + TH_LOG("failed to create OVS datapath socket"); +} + +FIXTURE_TEARDOWN(ovs) +{ + if (self->dp_name) { + struct ovs_datapath_del_req *req; + + req = ovs_datapath_del_req_alloc(); + if (req) { + ovs_datapath_del_req_set_name(req, self->dp_name); + ovs_datapath_del(self->ys, req); + ovs_datapath_del_req_free(req); + } + } + ynl_sock_destroy(self->ys); +} + +TEST_F(ovs, crud) +{ + struct ovs_datapath_get_req_dump *dreq; + struct ovs_datapath_new_req *new_req; + struct ovs_datapath_get_list *dps; + struct ovs_datapath_get_rsp *dp; + struct ovs_datapath_get_req *req; + bool found = false; int err; - ys = ynl_sock_create(&ynl_ovs_datapath_family, NULL); - if (!ys) - return 1; + new_req = ovs_datapath_new_req_alloc(); + ASSERT_NE(NULL, new_req); + ovs_datapath_new_req_set_upcall_pid(new_req, 1); + ovs_datapath_new_req_set_name(new_req, "ynl-test"); - if (argc > 1) { - struct ovs_datapath_new_req *req; + err = ovs_datapath_new(self->ys, new_req); + ovs_datapath_new_req_free(new_req); + ASSERT_EQ(0, err) { + TH_LOG("new failed: %s", self->ys->err.msg); + } + self->dp_name = "ynl-test"; - req = ovs_datapath_new_req_alloc(); - if (!req) - goto err_close; + ksft_print_msg("get:\n"); + req = ovs_datapath_get_req_alloc(); + ASSERT_NE(NULL, req); + ovs_datapath_get_req_set_name(req, "ynl-test"); - ovs_datapath_new_req_set_upcall_pid(req, 1); - ovs_datapath_new_req_set_name(req, argv[1]); - - err = ovs_datapath_new(ys, req); - ovs_datapath_new_req_free(req); - if (err) - goto err_close; - } else { - struct ovs_datapath_get_req_dump *req; - struct ovs_datapath_get_list *dps; - - printf("Dump:\n"); - req = ovs_datapath_get_req_dump_alloc(); - - dps = ovs_datapath_get_dump(ys, req); - ovs_datapath_get_req_dump_free(req); - if (!dps) - goto err_close; - - ynl_dump_foreach(dps, dp) { - printf(" %s(%d): pid:%u cache:%u\n", - dp->name, dp->_hdr.dp_ifindex, - dp->upcall_pid, dp->masks_cache_size); - } - ovs_datapath_get_list_free(dps); + dp = ovs_datapath_get(self->ys, req); + ovs_datapath_get_req_free(req); + ASSERT_NE(NULL, dp) { + TH_LOG("get failed: %s", self->ys->err.msg); } - ynl_sock_destroy(ys); + ovs_print_datapath(_metadata, dp); + EXPECT_STREQ("ynl-test", dp->name); + ovs_datapath_get_rsp_free(dp); - return 0; + ksft_print_msg("dump:\n"); + dreq = ovs_datapath_get_req_dump_alloc(); + ASSERT_NE(NULL, dreq); -err_close: - fprintf(stderr, "YNL (%d): %s\n", ys->err.code, ys->err.msg); - ynl_sock_destroy(ys); - return 2; + dps = ovs_datapath_get_dump(self->ys, dreq); + ovs_datapath_get_req_dump_free(dreq); + ASSERT_NE(NULL, dps) { + TH_LOG("dump failed: %s", self->ys->err.msg); + } + + ynl_dump_foreach(dps, d) { + ovs_print_datapath(_metadata, d); + if (d->name && !strcmp(d->name, "ynl-test")) + found = true; + } + ovs_datapath_get_list_free(dps); + EXPECT_TRUE(found); } + +TEST_HARNESS_MAIN From 5c3206786c2d2c42540ad075d73fc895a9eb7f39 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 6 Mar 2026 19:36:24 -0800 Subject: [PATCH 0373/1561] tools: ynl: convert rt-link sample to selftest Convert rt-link.c to use kselftest_harness.h with FIXTURE/TEST_F. Move rt-link from BINS to TEST_GEN_PROGS. Output: TAP version 13 1..3 # Starting 3 tests from 1 test cases. # RUN rt_link.dump ... # 1: lo: mtu 65536 # 2: sit0: mtu 1480 kind sit # OK rt_link.dump ok 1 rt_link.dump # RUN rt_link.netkit ... # 4: nk1: mtu 1500 kind netkit primary 1 policy blackhole # OK rt_link.netkit ok 2 rt_link.netkit # RUN rt_link.netkit_err_msg ... # OK rt_link.netkit_err_msg ok 3 rt_link.netkit_err_msg # PASSED: 3 / 3 tests passed. # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0 Reviewed-by: Donald Hunter Tested-by: Donald Hunter Link: https://patch.msgid.link/20260307033630.1396085-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/tests/Makefile | 2 +- tools/net/ynl/tests/config | 1 + tools/net/ynl/tests/rt-link.c | 196 +++++++++++++++++++--------------- 3 files changed, 111 insertions(+), 88 deletions(-) diff --git a/tools/net/ynl/tests/Makefile b/tools/net/ynl/tests/Makefile index df9d37c8b2a4..08d1146d91ce 100644 --- a/tools/net/ynl/tests/Makefile +++ b/tools/net/ynl/tests/Makefile @@ -21,13 +21,13 @@ TEST_PROGS := \ TEST_GEN_PROGS := \ netdev \ ovs \ + rt-link \ # end of TEST_GEN_PROGS BINS := \ devlink \ ethtool \ rt-addr \ - rt-link \ rt-route \ tc \ tc-filter-add \ diff --git a/tools/net/ynl/tests/config b/tools/net/ynl/tests/config index 357b34611da4..b4c58d86a6c2 100644 --- a/tools/net/ynl/tests/config +++ b/tools/net/ynl/tests/config @@ -3,5 +3,6 @@ CONFIG_INET_DIAG=y CONFIG_IPV6=y CONFIG_NET_NS=y CONFIG_NETDEVSIM=m +CONFIG_NETKIT=y CONFIG_OPENVSWITCH=m CONFIG_VETH=m diff --git a/tools/net/ynl/tests/rt-link.c b/tools/net/ynl/tests/rt-link.c index acdd4b4a0f74..ef619ce6143f 100644 --- a/tools/net/ynl/tests/rt-link.c +++ b/tools/net/ynl/tests/rt-link.c @@ -7,16 +7,21 @@ #include #include +#include + #include "rt-link-user.h" -static void rt_link_print(struct rt_link_getlink_rsp *r) +static void rt_link_print(struct __test_metadata *_metadata, + struct rt_link_getlink_rsp *r) { unsigned int i; - printf("%3d: ", r->_hdr.ifi_index); + EXPECT_TRUE((bool)r->_hdr.ifi_index); + ksft_print_msg("%3d: ", r->_hdr.ifi_index); + EXPECT_TRUE((bool)r->_len.ifname); if (r->_len.ifname) - printf("%16s: ", r->ifname); + printf("%6s: ", r->ifname); if (r->_present.mtu) printf("mtu %5d ", r->mtu); @@ -50,7 +55,7 @@ static void rt_link_print(struct rt_link_getlink_rsp *r) printf("\n"); } -static int rt_link_create_netkit(struct ynl_sock *ys) +static int netkit_create(struct ynl_sock *ys) { struct rt_link_getlink_ntf *ntf_gl; struct rt_link_newlink_req *req; @@ -58,49 +63,24 @@ static int rt_link_create_netkit(struct ynl_sock *ys) int ret; req = rt_link_newlink_req_alloc(); - if (!req) { - fprintf(stderr, "Can't alloc req\n"); + if (!req) return -1; - } - /* rtnetlink doesn't provide info about the created object. - * It expects us to set the ECHO flag and the dig the info out - * of the notifications... - */ rt_link_newlink_req_set_nlflags(req, NLM_F_CREATE | NLM_F_ECHO); - rt_link_newlink_req_set_linkinfo_kind(req, "netkit"); - - /* Test error messages */ - rt_link_newlink_req_set_linkinfo_data_netkit_policy(req, 10); - ret = rt_link_newlink(ys, req); - if (ret) { - printf("Testing error message for policy being bad:\n\t%s\n", ys->err.msg); - } else { - fprintf(stderr, "Warning: unexpected success creating netkit with bad attrs\n"); - goto created; - } - rt_link_newlink_req_set_linkinfo_data_netkit_policy(req, NETKIT_DROP); ret = rt_link_newlink(ys, req); -created: rt_link_newlink_req_free(req); - if (ret) { - fprintf(stderr, "YNL: %s\n", ys->err.msg); + if (ret) return -1; - } - if (!ynl_has_ntf(ys)) { - fprintf(stderr, - "Warning: interface created but received no notification, won't delete the interface\n"); + if (!ynl_has_ntf(ys)) return 0; - } ntf = ynl_ntf_dequeue(ys); - if (ntf->cmd != RTM_NEWLINK) { - fprintf(stderr, - "Warning: unexpected notification type, won't delete the interface\n"); + if (!ntf || ntf->cmd != RTM_NEWLINK) { + ynl_ntf_free(ntf); return 0; } ntf_gl = (void *)ntf; @@ -110,75 +90,117 @@ created: return ret; } -static void rt_link_del(struct ynl_sock *ys, int ifindex) +static void netkit_delete(struct __test_metadata *_metadata, + struct ynl_sock *ys, int ifindex) { struct rt_link_dellink_req *req; req = rt_link_dellink_req_alloc(); - if (!req) { - fprintf(stderr, "Can't alloc req\n"); - return; - } + ASSERT_NE(NULL, req); req->_hdr.ifi_index = ifindex; - if (rt_link_dellink(ys, req)) - fprintf(stderr, "YNL: %s\n", ys->err.msg); - else - fprintf(stderr, - "Trying to delete a Netkit interface (ifindex %d)\n", - ifindex); - + EXPECT_EQ(0, rt_link_dellink(ys, req)); rt_link_dellink_req_free(req); } -int main(int argc, char **argv) +FIXTURE(rt_link) +{ + struct ynl_sock *ys; +}; + +FIXTURE_SETUP(rt_link) +{ + struct ynl_error yerr; + + self->ys = ynl_sock_create(&ynl_rt_link_family, &yerr); + ASSERT_NE(NULL, self->ys) { + TH_LOG("failed to create rt-link socket: %s", yerr.msg); + } +} + +FIXTURE_TEARDOWN(rt_link) +{ + ynl_sock_destroy(self->ys); +} + +TEST_F(rt_link, dump) { struct rt_link_getlink_req_dump *req; struct rt_link_getlink_list *rsp; - struct ynl_error yerr; - struct ynl_sock *ys; - int created = 0; - - ys = ynl_sock_create(&ynl_rt_link_family, &yerr); - if (!ys) { - fprintf(stderr, "YNL: %s\n", yerr.msg); - return 1; - } - - if (argc > 1) { - fprintf(stderr, "Trying to create a Netkit interface\n"); - created = rt_link_create_netkit(ys); - if (created < 0) - goto err_destroy; - } req = rt_link_getlink_req_dump_alloc(); - if (!req) - goto err_del_ifc; - - rsp = rt_link_getlink_dump(ys, req); + ASSERT_NE(NULL, req); + rsp = rt_link_getlink_dump(self->ys, req); rt_link_getlink_req_dump_free(req); - if (!rsp) - goto err_close; + ASSERT_NE(NULL, rsp) { + TH_LOG("dump failed: %s", self->ys->err.msg); + } + ASSERT_FALSE(ynl_dump_empty(rsp)); - if (ynl_dump_empty(rsp)) - fprintf(stderr, "Error: no links reported\n"); ynl_dump_foreach(rsp, link) - rt_link_print(link); + rt_link_print(_metadata, link); + rt_link_getlink_list_free(rsp); - - if (created) - rt_link_del(ys, created); - - ynl_sock_destroy(ys); - return 0; - -err_close: - fprintf(stderr, "YNL: %s\n", ys->err.msg); -err_del_ifc: - if (created) - rt_link_del(ys, created); -err_destroy: - ynl_sock_destroy(ys); - return 2; } + +TEST_F(rt_link, netkit) +{ + struct rt_link_getlink_req_dump *dreq; + struct rt_link_getlink_list *rsp; + bool found = false; + int netkit_ifindex; + + /* Create netkit with valid policy */ + netkit_ifindex = netkit_create(self->ys); + ASSERT_GT(netkit_ifindex, 0) + TH_LOG("failed to create netkit: %s", self->ys->err.msg); + + /* Verify it appears in a dump */ + dreq = rt_link_getlink_req_dump_alloc(); + ASSERT_NE(NULL, dreq); + rsp = rt_link_getlink_dump(self->ys, dreq); + rt_link_getlink_req_dump_free(dreq); + ASSERT_NE(NULL, rsp) { + TH_LOG("dump failed: %s", self->ys->err.msg); + } + + ynl_dump_foreach(rsp, link) { + if (link->_hdr.ifi_index == netkit_ifindex) { + rt_link_print(_metadata, link); + found = true; + } + } + rt_link_getlink_list_free(rsp); + EXPECT_TRUE(found); + + netkit_delete(_metadata, self->ys, netkit_ifindex); +} + +TEST_F(rt_link, netkit_err_msg) +{ + struct rt_link_newlink_req *req; + int ret; + + /* Test creating netkit with bad policy - should fail */ + req = rt_link_newlink_req_alloc(); + ASSERT_NE(NULL, req); + rt_link_newlink_req_set_nlflags(req, NLM_F_CREATE); + rt_link_newlink_req_set_linkinfo_kind(req, "netkit"); + rt_link_newlink_req_set_linkinfo_data_netkit_policy(req, 10); + + ret = rt_link_newlink(self->ys, req); + rt_link_newlink_req_free(req); + EXPECT_NE(0, ret) { + TH_LOG("creating netkit with bad policy should fail"); + } + + /* Expect: + * Kernel error: 'Provided default xmit policy not supported' (bad attribute: .linkinfo.data(netkit).policy) + */ + EXPECT_NE(NULL, strstr(self->ys->err.msg, "bad attribute: .linkinfo.data(netkit).policy")) { + TH_LOG("expected extack msg not found: %s", + self->ys->err.msg); + } +} + +TEST_HARNESS_MAIN From 6cf8fb4722c35270df8e087c0c589da0577377a6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 6 Mar 2026 19:36:25 -0800 Subject: [PATCH 0374/1561] tools: ynl: convert tc and tc-filter-add samples to selftest Convert tc.c and tc-filter-add.c to produce KTAP output with kselftest_harness. Merge the two tests together. They both test TC one is testing qdisc and the other classifiers but they can easily live in a single selftest. Make the test spawn a new netns, and run the operations on lo to avoid onerous setup and cleanup. TAP version 13 1..2 # Starting 2 tests from 1 test cases. # RUN tc.qdisc ... # lo: fq_codel limit: 10240p target: 5ms new_flow_cnt: 0 # OK tc.qdisc ok 1 tc.qdisc # RUN tc.flower ... # flower pref 1 proto: 0x8100 # flower: # vlan_id: 100 # vlan_prio: 5 # num_of_vlans: 3 # action order: 1 vlan push id 200 protocol 0x8100 priority 0 # action order: 2 vlan push id 300 protocol 0x8100 priority 0 # OK tc.flower ok 2 tc.flower # PASSED: 2 / 2 tests passed. # Totals: pass:2 fail:0 xfail:0 xpass:0 skip:0 error:0 Reviewed-by: Donald Hunter Tested-by: Donald Hunter Link: https://patch.msgid.link/20260307033630.1396085-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/tests/Makefile | 4 +- tools/net/ynl/tests/config | 6 + tools/net/ynl/tests/tc-filter-add.c | 335 ----------------------- tools/net/ynl/tests/tc.c | 405 +++++++++++++++++++++++++--- 4 files changed, 374 insertions(+), 376 deletions(-) delete mode 100644 tools/net/ynl/tests/tc-filter-add.c diff --git a/tools/net/ynl/tests/Makefile b/tools/net/ynl/tests/Makefile index 08d1146d91ce..524092a8de7e 100644 --- a/tools/net/ynl/tests/Makefile +++ b/tools/net/ynl/tests/Makefile @@ -22,6 +22,7 @@ TEST_GEN_PROGS := \ netdev \ ovs \ rt-link \ + tc \ # end of TEST_GEN_PROGS BINS := \ @@ -29,13 +30,10 @@ BINS := \ ethtool \ rt-addr \ rt-route \ - tc \ - tc-filter-add \ # end of BINS CFLAGS_netdev:=$(CFLAGS_netdev) $(CFLAGS_rt-link) CFLAGS_ovs:=$(CFLAGS_ovs_datapath) -CFLAGS_tc-filter-add:=$(CFLAGS_tc) include $(wildcard *.d) diff --git a/tools/net/ynl/tests/config b/tools/net/ynl/tests/config index b4c58d86a6c2..75c0fe72391f 100644 --- a/tools/net/ynl/tests/config +++ b/tools/net/ynl/tests/config @@ -1,7 +1,13 @@ CONFIG_DUMMY=m CONFIG_INET_DIAG=y CONFIG_IPV6=y +CONFIG_NET_ACT_VLAN=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_SCH_FQ_CODEL=m +CONFIG_NET_SCH_INGRESS=m CONFIG_NET_NS=y +CONFIG_NET_SCHED=y CONFIG_NETDEVSIM=m CONFIG_NETKIT=y CONFIG_OPENVSWITCH=m diff --git a/tools/net/ynl/tests/tc-filter-add.c b/tools/net/ynl/tests/tc-filter-add.c deleted file mode 100644 index 97871e9e9edc..000000000000 --- a/tools/net/ynl/tests/tc-filter-add.c +++ /dev/null @@ -1,335 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "tc-user.h" - -#define TC_HANDLE (0xFFFF << 16) - -const char *vlan_act_name(struct tc_vlan *p) -{ - switch (p->v_action) { - case TCA_VLAN_ACT_POP: - return "pop"; - case TCA_VLAN_ACT_PUSH: - return "push"; - case TCA_VLAN_ACT_MODIFY: - return "modify"; - default: - break; - } - - return "not supported"; -} - -const char *gact_act_name(struct tc_gact *p) -{ - switch (p->action) { - case TC_ACT_SHOT: - return "drop"; - case TC_ACT_OK: - return "ok"; - case TC_ACT_PIPE: - return "pipe"; - default: - break; - } - - return "not supported"; -} - -static void print_vlan(struct tc_act_vlan_attrs *vlan) -{ - printf("%s ", vlan_act_name(vlan->parms)); - if (vlan->_present.push_vlan_id) - printf("id %u ", vlan->push_vlan_id); - if (vlan->_present.push_vlan_protocol) - printf("protocol %#x ", ntohs(vlan->push_vlan_protocol)); - if (vlan->_present.push_vlan_priority) - printf("priority %u ", vlan->push_vlan_priority); -} - -static void print_gact(struct tc_act_gact_attrs *gact) -{ - struct tc_gact *p = gact->parms; - - printf("%s ", gact_act_name(p)); -} - -static void flower_print(struct tc_flower_attrs *flower, const char *kind) -{ - struct tc_act_attrs *a; - unsigned int i; - - printf("%s:\n", kind); - - if (flower->_present.key_vlan_id) - printf(" vlan_id: %u\n", flower->key_vlan_id); - if (flower->_present.key_vlan_prio) - printf(" vlan_prio: %u\n", flower->key_vlan_prio); - if (flower->_present.key_num_of_vlans) - printf(" num_of_vlans: %u\n", flower->key_num_of_vlans); - - for (i = 0; i < flower->_count.act; i++) { - a = &flower->act[i]; - printf("action order: %i %s ", i + 1, a->kind); - if (a->options._present.vlan) - print_vlan(&a->options.vlan); - else if (a->options._present.gact) - print_gact(&a->options.gact); - printf("\n"); - } - printf("\n"); -} - -static void tc_filter_print(struct tc_gettfilter_rsp *f) -{ - struct tc_options_msg *opt = &f->options; - - if (opt->_present.flower) - flower_print(&opt->flower, f->kind); - else if (f->_len.kind) - printf("%s pref %u proto: %#x\n", f->kind, - (f->_hdr.tcm_info >> 16), - ntohs(TC_H_MIN(f->_hdr.tcm_info))); -} - -static int tc_filter_add(struct ynl_sock *ys, int ifi) -{ - struct tc_newtfilter_req *req; - struct tc_act_attrs *acts; - struct tc_vlan p = { - .action = TC_ACT_PIPE, - .v_action = TCA_VLAN_ACT_PUSH - }; - __u16 flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE; - int ret; - - req = tc_newtfilter_req_alloc(); - if (!req) { - fprintf(stderr, "tc_newtfilter_req_alloc failed\n"); - return -1; - } - memset(req, 0, sizeof(*req)); - - acts = tc_act_attrs_alloc(3); - if (!acts) { - fprintf(stderr, "tc_act_attrs_alloc\n"); - tc_newtfilter_req_free(req); - return -1; - } - memset(acts, 0, sizeof(*acts) * 3); - - req->_hdr.tcm_ifindex = ifi; - req->_hdr.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS); - req->_hdr.tcm_info = TC_H_MAKE(1 << 16, htons(ETH_P_8021Q)); - req->chain = 0; - - tc_newtfilter_req_set_nlflags(req, flags); - tc_newtfilter_req_set_kind(req, "flower"); - tc_newtfilter_req_set_options_flower_key_vlan_id(req, 100); - tc_newtfilter_req_set_options_flower_key_vlan_prio(req, 5); - tc_newtfilter_req_set_options_flower_key_num_of_vlans(req, 3); - - __tc_newtfilter_req_set_options_flower_act(req, acts, 3); - - /* Skip action at index 0 because in TC, the action array - * index starts at 1, with each index defining the action's - * order. In contrast, in YNL indexed arrays start at index 0. - */ - tc_act_attrs_set_kind(&acts[1], "vlan"); - tc_act_attrs_set_options_vlan_parms(&acts[1], &p, sizeof(p)); - tc_act_attrs_set_options_vlan_push_vlan_id(&acts[1], 200); - tc_act_attrs_set_kind(&acts[2], "vlan"); - tc_act_attrs_set_options_vlan_parms(&acts[2], &p, sizeof(p)); - tc_act_attrs_set_options_vlan_push_vlan_id(&acts[2], 300); - - tc_newtfilter_req_set_options_flower_flags(req, 0); - tc_newtfilter_req_set_options_flower_key_eth_type(req, htons(0x8100)); - - ret = tc_newtfilter(ys, req); - if (ret) - fprintf(stderr, "tc_newtfilter: %s\n", ys->err.msg); - - tc_newtfilter_req_free(req); - - return ret; -} - -static int tc_filter_show(struct ynl_sock *ys, int ifi) -{ - struct tc_gettfilter_req_dump *req; - struct tc_gettfilter_list *rsp; - - req = tc_gettfilter_req_dump_alloc(); - if (!req) { - fprintf(stderr, "tc_gettfilter_req_dump_alloc failed\n"); - return -1; - } - memset(req, 0, sizeof(*req)); - - req->_hdr.tcm_ifindex = ifi; - req->_hdr.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS); - req->_present.chain = 1; - req->chain = 0; - - rsp = tc_gettfilter_dump(ys, req); - tc_gettfilter_req_dump_free(req); - if (!rsp) { - fprintf(stderr, "YNL: %s\n", ys->err.msg); - return -1; - } - - if (ynl_dump_empty(rsp)) - fprintf(stderr, "Error: no filters reported\n"); - else - ynl_dump_foreach(rsp, flt) tc_filter_print(flt); - - tc_gettfilter_list_free(rsp); - - return 0; -} - -static int tc_filter_del(struct ynl_sock *ys, int ifi) -{ - struct tc_deltfilter_req *req; - __u16 flags = NLM_F_REQUEST; - int ret; - - req = tc_deltfilter_req_alloc(); - if (!req) { - fprintf(stderr, "tc_deltfilter_req_alloc failed\n"); - return -1; - } - memset(req, 0, sizeof(*req)); - - req->_hdr.tcm_ifindex = ifi; - req->_hdr.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS); - req->_hdr.tcm_info = TC_H_MAKE(1 << 16, htons(ETH_P_8021Q)); - tc_deltfilter_req_set_nlflags(req, flags); - - ret = tc_deltfilter(ys, req); - if (ret) - fprintf(stderr, "tc_deltfilter failed: %s\n", ys->err.msg); - - tc_deltfilter_req_free(req); - - return ret; -} - -static int tc_clsact_add(struct ynl_sock *ys, int ifi) -{ - struct tc_newqdisc_req *req; - __u16 flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE; - int ret; - - req = tc_newqdisc_req_alloc(); - if (!req) { - fprintf(stderr, "tc_newqdisc_req_alloc failed\n"); - return -1; - } - memset(req, 0, sizeof(*req)); - - req->_hdr.tcm_ifindex = ifi; - req->_hdr.tcm_parent = TC_H_CLSACT; - req->_hdr.tcm_handle = TC_HANDLE; - tc_newqdisc_req_set_nlflags(req, flags); - tc_newqdisc_req_set_kind(req, "clsact"); - - ret = tc_newqdisc(ys, req); - if (ret) - fprintf(stderr, "tc_newqdisc failed: %s\n", ys->err.msg); - - tc_newqdisc_req_free(req); - - return ret; -} - -static int tc_clsact_del(struct ynl_sock *ys, int ifi) -{ - struct tc_delqdisc_req *req; - __u16 flags = NLM_F_REQUEST; - int ret; - - req = tc_delqdisc_req_alloc(); - if (!req) { - fprintf(stderr, "tc_delqdisc_req_alloc failed\n"); - return -1; - } - memset(req, 0, sizeof(*req)); - - req->_hdr.tcm_ifindex = ifi; - req->_hdr.tcm_parent = TC_H_CLSACT; - req->_hdr.tcm_handle = TC_HANDLE; - tc_delqdisc_req_set_nlflags(req, flags); - - ret = tc_delqdisc(ys, req); - if (ret) - fprintf(stderr, "tc_delqdisc failed: %s\n", ys->err.msg); - - tc_delqdisc_req_free(req); - - return ret; -} - -static int tc_filter_config(struct ynl_sock *ys, int ifi) -{ - int ret = 0; - - if (tc_filter_add(ys, ifi)) - return -1; - - ret = tc_filter_show(ys, ifi); - - if (tc_filter_del(ys, ifi)) - return -1; - - return ret; -} - -int main(int argc, char **argv) -{ - struct ynl_error yerr; - struct ynl_sock *ys; - int ifi, ret = 0; - - if (argc < 2) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return 1; - } - ifi = if_nametoindex(argv[1]); - if (!ifi) { - perror("if_nametoindex"); - return 1; - } - - ys = ynl_sock_create(&ynl_tc_family, &yerr); - if (!ys) { - fprintf(stderr, "YNL: %s\n", yerr.msg); - return 1; - } - - if (tc_clsact_add(ys, ifi)) { - ret = 2; - goto err_destroy; - } - - if (tc_filter_config(ys, ifi)) - ret = 3; - - if (tc_clsact_del(ys, ifi)) - ret = 4; - -err_destroy: - ynl_sock_destroy(ys); - return ret; -} diff --git a/tools/net/ynl/tests/tc.c b/tools/net/ynl/tests/tc.c index 0bfff0fdd792..6ff13876578d 100644 --- a/tools/net/ynl/tests/tc.c +++ b/tools/net/ynl/tests/tc.c @@ -1,21 +1,34 @@ // SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include #include #include +#include +#include +#include +#include +#include +#include +#include #include -#include +#include #include "tc-user.h" -static void tc_qdisc_print(struct tc_getqdisc_rsp *q) +#define TC_HANDLE (0xFFFF << 16) + +static bool tc_qdisc_print(struct __test_metadata *_metadata, + struct tc_getqdisc_rsp *q) { + bool was_fq_codel = false; char ifname[IF_NAMESIZE]; const char *name; name = if_indextoname(q->_hdr.tcm_ifindex, ifname); - if (name) - printf("%16s: ", name); + EXPECT_TRUE((bool)name); + ksft_print_msg("%16s: ", name ?: "no-name"); if (q->_len.kind) { printf("%s ", q->kind); @@ -27,6 +40,11 @@ static void tc_qdisc_print(struct tc_getqdisc_rsp *q) fq_codel = &q->options.fq_codel; stats = q->stats2.app.fq_codel; + EXPECT_EQ(true, + fq_codel->_present.limit && + fq_codel->_present.target && + q->stats2.app._len.fq_codel); + if (fq_codel->_present.limit) printf("limit: %dp ", fq_codel->limit); if (fq_codel->_present.target) @@ -35,46 +53,357 @@ static void tc_qdisc_print(struct tc_getqdisc_rsp *q) if (q->stats2.app._len.fq_codel) printf("new_flow_cnt: %d ", stats->qdisc_stats.new_flow_count); + was_fq_codel = true; } } - printf("\n"); + + return was_fq_codel; } -int main(int argc, char **argv) +static const char *vlan_act_name(struct tc_vlan *p) { - struct tc_getqdisc_req_dump *req; - struct tc_getqdisc_list *rsp; - struct ynl_error yerr; - struct ynl_sock *ys; - - ys = ynl_sock_create(&ynl_tc_family, &yerr); - if (!ys) { - fprintf(stderr, "YNL: %s\n", yerr.msg); - return 1; + switch (p->v_action) { + case TCA_VLAN_ACT_POP: + return "pop"; + case TCA_VLAN_ACT_PUSH: + return "push"; + case TCA_VLAN_ACT_MODIFY: + return "modify"; + default: + break; } - req = tc_getqdisc_req_dump_alloc(); - if (!req) - goto err_destroy; - - rsp = tc_getqdisc_dump(ys, req); - tc_getqdisc_req_dump_free(req); - if (!rsp) - goto err_close; - - if (ynl_dump_empty(rsp)) - fprintf(stderr, "Error: no addresses reported\n"); - ynl_dump_foreach(rsp, qdisc) - tc_qdisc_print(qdisc); - tc_getqdisc_list_free(rsp); - - ynl_sock_destroy(ys); - return 0; - -err_close: - fprintf(stderr, "YNL: %s\n", ys->err.msg); -err_destroy: - ynl_sock_destroy(ys); - return 2; + return "not supported"; } + +static const char *gact_act_name(struct tc_gact *p) +{ + switch (p->action) { + case TC_ACT_SHOT: + return "drop"; + case TC_ACT_OK: + return "ok"; + case TC_ACT_PIPE: + return "pipe"; + default: + break; + } + + return "not supported"; +} + +static void print_vlan(struct tc_act_vlan_attrs *vlan) +{ + printf("%s ", vlan_act_name(vlan->parms)); + if (vlan->_present.push_vlan_id) + printf("id %u ", vlan->push_vlan_id); + if (vlan->_present.push_vlan_protocol) + printf("protocol %#x ", ntohs(vlan->push_vlan_protocol)); + if (vlan->_present.push_vlan_priority) + printf("priority %u ", vlan->push_vlan_priority); +} + +static void print_gact(struct tc_act_gact_attrs *gact) +{ + struct tc_gact *p = gact->parms; + + printf("%s ", gact_act_name(p)); +} + +static void flower_print(struct tc_flower_attrs *flower, const char *kind) +{ + struct tc_act_attrs *a; + unsigned int i; + + ksft_print_msg("%s:\n", kind); + + if (flower->_present.key_vlan_id) + ksft_print_msg(" vlan_id: %u\n", flower->key_vlan_id); + if (flower->_present.key_vlan_prio) + ksft_print_msg(" vlan_prio: %u\n", flower->key_vlan_prio); + if (flower->_present.key_num_of_vlans) + ksft_print_msg(" num_of_vlans: %u\n", + flower->key_num_of_vlans); + + for (i = 0; i < flower->_count.act; i++) { + a = &flower->act[i]; + ksft_print_msg("action order: %i %s ", i + 1, a->kind); + if (a->options._present.vlan) + print_vlan(&a->options.vlan); + else if (a->options._present.gact) + print_gact(&a->options.gact); + printf("\n"); + } +} + +static void tc_filter_print(struct __test_metadata *_metadata, + struct tc_gettfilter_rsp *f) +{ + struct tc_options_msg *opt = &f->options; + + if (opt->_present.flower) { + EXPECT_TRUE((bool)f->_len.kind); + flower_print(&opt->flower, f->kind); + } else if (f->_len.kind) { + ksft_print_msg("%s pref %u proto: %#x\n", f->kind, + (f->_hdr.tcm_info >> 16), + ntohs(TC_H_MIN(f->_hdr.tcm_info))); + } +} + +static int tc_clsact_add(struct ynl_sock *ys, int ifi) +{ + struct tc_newqdisc_req *req; + int ret; + + req = tc_newqdisc_req_alloc(); + if (!req) + return -1; + memset(req, 0, sizeof(*req)); + + req->_hdr.tcm_ifindex = ifi; + req->_hdr.tcm_parent = TC_H_CLSACT; + req->_hdr.tcm_handle = TC_HANDLE; + tc_newqdisc_req_set_nlflags(req, + NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE); + tc_newqdisc_req_set_kind(req, "clsact"); + + ret = tc_newqdisc(ys, req); + tc_newqdisc_req_free(req); + + return ret; +} + +static int tc_clsact_del(struct ynl_sock *ys, int ifi) +{ + struct tc_delqdisc_req *req; + int ret; + + req = tc_delqdisc_req_alloc(); + if (!req) + return -1; + memset(req, 0, sizeof(*req)); + + req->_hdr.tcm_ifindex = ifi; + req->_hdr.tcm_parent = TC_H_CLSACT; + req->_hdr.tcm_handle = TC_HANDLE; + tc_delqdisc_req_set_nlflags(req, NLM_F_REQUEST); + + ret = tc_delqdisc(ys, req); + tc_delqdisc_req_free(req); + + return ret; +} + +static int tc_filter_add(struct ynl_sock *ys, int ifi) +{ + struct tc_newtfilter_req *req; + struct tc_act_attrs *acts; + struct tc_vlan p = { + .action = TC_ACT_PIPE, + .v_action = TCA_VLAN_ACT_PUSH + }; + int ret; + + req = tc_newtfilter_req_alloc(); + if (!req) + return -1; + memset(req, 0, sizeof(*req)); + + acts = tc_act_attrs_alloc(3); + if (!acts) { + tc_newtfilter_req_free(req); + return -1; + } + memset(acts, 0, sizeof(*acts) * 3); + + req->_hdr.tcm_ifindex = ifi; + req->_hdr.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS); + req->_hdr.tcm_info = TC_H_MAKE(1 << 16, htons(ETH_P_8021Q)); + req->chain = 0; + + tc_newtfilter_req_set_nlflags(req, NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE); + tc_newtfilter_req_set_kind(req, "flower"); + tc_newtfilter_req_set_options_flower_key_vlan_id(req, 100); + tc_newtfilter_req_set_options_flower_key_vlan_prio(req, 5); + tc_newtfilter_req_set_options_flower_key_num_of_vlans(req, 3); + + __tc_newtfilter_req_set_options_flower_act(req, acts, 3); + + /* Skip action at index 0 because in TC, the action array + * index starts at 1, with each index defining the action's + * order. In contrast, in YNL indexed arrays start at index 0. + */ + tc_act_attrs_set_kind(&acts[1], "vlan"); + tc_act_attrs_set_options_vlan_parms(&acts[1], &p, sizeof(p)); + tc_act_attrs_set_options_vlan_push_vlan_id(&acts[1], 200); + tc_act_attrs_set_kind(&acts[2], "vlan"); + tc_act_attrs_set_options_vlan_parms(&acts[2], &p, sizeof(p)); + tc_act_attrs_set_options_vlan_push_vlan_id(&acts[2], 300); + + tc_newtfilter_req_set_options_flower_flags(req, 0); + tc_newtfilter_req_set_options_flower_key_eth_type(req, htons(0x8100)); + + ret = tc_newtfilter(ys, req); + tc_newtfilter_req_free(req); + + return ret; +} + +static int tc_filter_del(struct ynl_sock *ys, int ifi) +{ + struct tc_deltfilter_req *req; + int ret; + + req = tc_deltfilter_req_alloc(); + if (!req) + return -1; + memset(req, 0, sizeof(*req)); + + req->_hdr.tcm_ifindex = ifi; + req->_hdr.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS); + req->_hdr.tcm_info = TC_H_MAKE(1 << 16, htons(ETH_P_8021Q)); + tc_deltfilter_req_set_nlflags(req, NLM_F_REQUEST); + + ret = tc_deltfilter(ys, req); + tc_deltfilter_req_free(req); + + return ret; +} + +FIXTURE(tc) +{ + struct ynl_sock *ys; + int ifindex; +}; + +FIXTURE_SETUP(tc) +{ + struct ynl_error yerr; + int ret; + + ret = unshare(CLONE_NEWNET); + ASSERT_EQ(0, ret); + + self->ifindex = 1; /* loopback */ + + self->ys = ynl_sock_create(&ynl_tc_family, &yerr); + ASSERT_NE(NULL, self->ys) { + TH_LOG("failed to create tc socket: %s", yerr.msg); + } +} + +FIXTURE_TEARDOWN(tc) +{ + ynl_sock_destroy(self->ys); +} + +TEST_F(tc, qdisc) +{ + struct tc_getqdisc_req_dump *dreq; + struct tc_newqdisc_req *add_req; + struct tc_delqdisc_req *del_req; + struct tc_getqdisc_list *rsp; + bool found = false; + int ret; + + add_req = tc_newqdisc_req_alloc(); + ASSERT_NE(NULL, add_req); + memset(add_req, 0, sizeof(*add_req)); + + add_req->_hdr.tcm_ifindex = self->ifindex; + add_req->_hdr.tcm_parent = TC_H_ROOT; + tc_newqdisc_req_set_nlflags(add_req, + NLM_F_REQUEST | NLM_F_CREATE); + tc_newqdisc_req_set_kind(add_req, "fq_codel"); + + ret = tc_newqdisc(self->ys, add_req); + tc_newqdisc_req_free(add_req); + ASSERT_EQ(0, ret) { + TH_LOG("qdisc add failed: %s", self->ys->err.msg); + } + + dreq = tc_getqdisc_req_dump_alloc(); + ASSERT_NE(NULL, dreq); + rsp = tc_getqdisc_dump(self->ys, dreq); + tc_getqdisc_req_dump_free(dreq); + ASSERT_NE(NULL, rsp) { + TH_LOG("dump failed: %s", self->ys->err.msg); + } + ASSERT_FALSE(ynl_dump_empty(rsp)); + + ynl_dump_foreach(rsp, qdisc) { + found |= tc_qdisc_print(_metadata, qdisc); + } + tc_getqdisc_list_free(rsp); + EXPECT_TRUE(found); + + del_req = tc_delqdisc_req_alloc(); + ASSERT_NE(NULL, del_req); + memset(del_req, 0, sizeof(*del_req)); + + del_req->_hdr.tcm_ifindex = self->ifindex; + del_req->_hdr.tcm_parent = TC_H_ROOT; + tc_delqdisc_req_set_nlflags(del_req, NLM_F_REQUEST); + + ret = tc_delqdisc(self->ys, del_req); + tc_delqdisc_req_free(del_req); + EXPECT_EQ(0, ret) { + TH_LOG("qdisc del failed: %s", self->ys->err.msg); + } +} + +TEST_F(tc, flower) +{ + struct tc_gettfilter_req_dump *dreq; + struct tc_gettfilter_list *rsp; + bool found = false; + int ret; + + ret = tc_clsact_add(self->ys, self->ifindex); + if (ret) + SKIP(return, "clsact not supported: %s", self->ys->err.msg); + + ret = tc_filter_add(self->ys, self->ifindex); + ASSERT_EQ(0, ret) { + TH_LOG("filter add failed: %s", self->ys->err.msg); + } + + dreq = tc_gettfilter_req_dump_alloc(); + ASSERT_NE(NULL, dreq); + memset(dreq, 0, sizeof(*dreq)); + dreq->_hdr.tcm_ifindex = self->ifindex; + dreq->_hdr.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS); + dreq->_present.chain = 1; + dreq->chain = 0; + + rsp = tc_gettfilter_dump(self->ys, dreq); + tc_gettfilter_req_dump_free(dreq); + ASSERT_NE(NULL, rsp) { + TH_LOG("filter dump failed: %s", self->ys->err.msg); + } + + ynl_dump_foreach(rsp, flt) { + tc_filter_print(_metadata, flt); + if (flt->options._present.flower) { + EXPECT_EQ(100, flt->options.flower.key_vlan_id); + EXPECT_EQ(5, flt->options.flower.key_vlan_prio); + found = true; + } + } + tc_gettfilter_list_free(rsp); + EXPECT_TRUE(found); + + ret = tc_filter_del(self->ys, self->ifindex); + EXPECT_EQ(0, ret) { + TH_LOG("filter del failed: %s", self->ys->err.msg); + } + + ret = tc_clsact_del(self->ys, self->ifindex); + EXPECT_EQ(0, ret) { + TH_LOG("clsact del failed: %s", self->ys->err.msg); + } +} + +TEST_HARNESS_MAIN From 7a95e52562936e54c77f3fb2177016d68f1f203f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 6 Mar 2026 19:36:26 -0800 Subject: [PATCH 0375/1561] tools: ynl: add netdevsim wrapper library for YNL tests Some tests need netdevsim setup which is painful to do from C. Add ynl_nsim_lib.sh, a shared library providing nsim_setup and nsim_cleanup functions for tests that need a netdevsim device. Reviewed-by: Donald Hunter Tested-by: Donald Hunter Link: https://patch.msgid.link/20260307033630.1396085-7-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/tests/Makefile | 5 +++++ tools/net/ynl/tests/ynl_nsim_lib.sh | 35 +++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 tools/net/ynl/tests/ynl_nsim_lib.sh diff --git a/tools/net/ynl/tests/Makefile b/tools/net/ynl/tests/Makefile index 524092a8de7e..a329de031add 100644 --- a/tools/net/ynl/tests/Makefile +++ b/tools/net/ynl/tests/Makefile @@ -32,6 +32,8 @@ BINS := \ rt-route \ # end of BINS +TEST_FILES := ynl_nsim_lib.sh + CFLAGS_netdev:=$(CFLAGS_netdev) $(CFLAGS_rt-link) CFLAGS_ovs:=$(CFLAGS_ovs_datapath) @@ -68,6 +70,9 @@ install: $(TEST_GEN_PROGS) $(BINS) $$test > $(INSTALL_PATH)/ynl/$$name; \ chmod +x $(INSTALL_PATH)/ynl/$$name; \ done + @for file in $(TEST_FILES); do \ + cp $$file $(INSTALL_PATH)/ynl/$$file; \ + done @for bin in $(TEST_GEN_PROGS) $(BINS); do \ cp $$bin $(INSTALL_PATH)/ynl/$$bin; \ done diff --git a/tools/net/ynl/tests/ynl_nsim_lib.sh b/tools/net/ynl/tests/ynl_nsim_lib.sh new file mode 100644 index 000000000000..98cdce44a69c --- /dev/null +++ b/tools/net/ynl/tests/ynl_nsim_lib.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Shared netdevsim setup/cleanup for YNL C test wrappers + +NSIM_ID="1337" +NSIM_DEV="" +KSFT_SKIP=4 + +nsim_cleanup() { + echo "$NSIM_ID" > /sys/bus/netdevsim/del_device 2>/dev/null || true +} + +nsim_setup() { + modprobe netdevsim 2>/dev/null + if ! [ -f /sys/bus/netdevsim/new_device ]; then + echo "netdevsim module not available, skipping" >&2 + exit "$KSFT_SKIP" + fi + + trap nsim_cleanup EXIT + + echo "$NSIM_ID 1" > /sys/bus/netdevsim/new_device + udevadm settle + + NSIM_DEV=$(ls /sys/bus/netdevsim/devices/netdevsim${NSIM_ID}/net 2>/dev/null | head -1) + if [ -z "$NSIM_DEV" ]; then + echo "failed to find netdevsim device" >&2 + exit 1 + fi + + ip link set dev "$NSIM_DEV" name nsim0 + ip link set dev nsim0 up + ip addr add 192.168.1.1/24 dev nsim0 + ip addr add 2001:db8::1/64 dev nsim0 nodad +} From db20b374e7f74d2336c8d9f8a6e4806985328980 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 6 Mar 2026 19:36:27 -0800 Subject: [PATCH 0376/1561] tools: ynl: convert devlink sample to selftest Convert devlink.c to use kselftest_harness.h with FIXTURE/TEST_F. Move devlink from BINS to TEST_GEN_FILES in the Makefile since it's invoked via the devlink.sh wrapper which sets up netdevsim. Output: TAP version 13 1..2 # Starting 2 tests from 1 test cases. # RUN devlink.dump ... # netdevsim/netdevsim1337 # OK devlink.dump ok 1 devlink.dump # RUN devlink.info ... # netdevsim/netdevsim1337: # driver: netdevsim # running fw: # fw.mgmt: 10.20.30 # OK devlink.info ok 2 devlink.info # PASSED: 2 / 2 tests passed. # Totals: pass:2 fail:0 xfail:0 xpass:0 skip:0 error:0 Reviewed-by: Donald Hunter Tested-by: Donald Hunter Link: https://patch.msgid.link/20260307033630.1396085-8-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/tests/Makefile | 16 ++++-- tools/net/ynl/tests/devlink.c | 100 +++++++++++++++++++++++---------- tools/net/ynl/tests/devlink.sh | 5 ++ 3 files changed, 85 insertions(+), 36 deletions(-) create mode 100755 tools/net/ynl/tests/devlink.sh diff --git a/tools/net/ynl/tests/Makefile b/tools/net/ynl/tests/Makefile index a329de031add..14d399a70f10 100644 --- a/tools/net/ynl/tests/Makefile +++ b/tools/net/ynl/tests/Makefile @@ -14,6 +14,7 @@ endif LDLIBS=../lib/ynl.a ../generated/protos.a TEST_PROGS := \ + devlink.sh \ test_ynl_cli.sh \ test_ynl_ethtool.sh \ # end of TEST_PROGS @@ -25,8 +26,11 @@ TEST_GEN_PROGS := \ tc \ # end of TEST_GEN_PROGS -BINS := \ +TEST_GEN_FILES := \ devlink \ +# end of TEST_GEN_FILES + +BINS := \ ethtool \ rt-addr \ rt-route \ @@ -41,7 +45,7 @@ include $(wildcard *.d) INSTALL_PATH ?= $(DESTDIR)/usr/share/kselftest -all: $(TEST_GEN_PROGS) $(BINS) +all: $(TEST_GEN_PROGS) $(TEST_GEN_FILES) $(BINS) ../lib/ynl.a: @$(MAKE) -C ../lib @@ -49,7 +53,7 @@ all: $(TEST_GEN_PROGS) $(BINS) ../generated/protos.a: @$(MAKE) -C ../generated -$(TEST_GEN_PROGS) $(BINS): %: %.c ../lib/ynl.a ../generated/protos.a +$(TEST_GEN_PROGS) $(TEST_GEN_FILES) $(BINS): %: %.c ../lib/ynl.a ../generated/protos.a @echo -e '\tCC test $@' @$(COMPILE.c) $(CFLAGS_$@) $@.c -o $@.o @$(LINK.c) $@.o -o $@ $(LDLIBS) @@ -59,7 +63,7 @@ run_tests: ./$$test; \ done -install: $(TEST_GEN_PROGS) $(BINS) +install: $(TEST_GEN_PROGS) $(TEST_GEN_FILES) $(BINS) @mkdir -p $(INSTALL_PATH)/ynl @cp ../../../testing/selftests/kselftest/ktap_helpers.sh $(INSTALL_PATH)/ @for test in $(TEST_PROGS); do \ @@ -73,7 +77,7 @@ install: $(TEST_GEN_PROGS) $(BINS) @for file in $(TEST_FILES); do \ cp $$file $(INSTALL_PATH)/ynl/$$file; \ done - @for bin in $(TEST_GEN_PROGS) $(BINS); do \ + @for bin in $(TEST_GEN_PROGS) $(TEST_GEN_FILES) $(BINS); do \ cp $$bin $(INSTALL_PATH)/ynl/$$bin; \ done @for test in $(TEST_PROGS) $(TEST_GEN_PROGS); do \ @@ -84,7 +88,7 @@ clean: rm -f *.o *.d *~ distclean: clean - rm -f $(TEST_GEN_PROGS) $(BINS) + rm -f $(TEST_GEN_PROGS) $(TEST_GEN_FILES) $(BINS) .PHONY: all install clean distclean run_tests .DEFAULT_GOAL=all diff --git a/tools/net/ynl/tests/devlink.c b/tools/net/ynl/tests/devlink.c index ac9dfb01f280..2e668bb15af1 100644 --- a/tools/net/ynl/tests/devlink.c +++ b/tools/net/ynl/tests/devlink.c @@ -4,58 +4,98 @@ #include +#include + #include "devlink-user.h" -int main(int argc, char **argv) +FIXTURE(devlink) +{ + struct ynl_sock *ys; +}; + +FIXTURE_SETUP(devlink) +{ + self->ys = ynl_sock_create(&ynl_devlink_family, NULL); + ASSERT_NE(NULL, self->ys) + TH_LOG("failed to create devlink socket"); +} + +FIXTURE_TEARDOWN(devlink) +{ + ynl_sock_destroy(self->ys); +} + +TEST_F(devlink, dump) { struct devlink_get_list *devs; - struct ynl_sock *ys; - ys = ynl_sock_create(&ynl_devlink_family, NULL); - if (!ys) - return 1; + devs = devlink_get_dump(self->ys); + ASSERT_NE(NULL, devs) { + TH_LOG("dump failed: %s", self->ys->err.msg); + } - devs = devlink_get_dump(ys); - if (!devs) - goto err_close; + if (ynl_dump_empty(devs)) { + devlink_get_list_free(devs); + SKIP(return, "no entries in dump"); + } + + ynl_dump_foreach(devs, d) { + EXPECT_TRUE((bool)d->_len.bus_name); + EXPECT_TRUE((bool)d->_len.dev_name); + ksft_print_msg("%s/%s\n", d->bus_name, d->dev_name); + } + + devlink_get_list_free(devs); +} + +TEST_F(devlink, info) +{ + struct devlink_get_list *devs; + + devs = devlink_get_dump(self->ys); + ASSERT_NE(NULL, devs) { + TH_LOG("dump failed: %s", self->ys->err.msg); + } + + if (ynl_dump_empty(devs)) { + devlink_get_list_free(devs); + SKIP(return, "no devices to query"); + } ynl_dump_foreach(devs, d) { struct devlink_info_get_req *info_req; struct devlink_info_get_rsp *info_rsp; - unsigned i; + unsigned int i; - printf("%s/%s:\n", d->bus_name, d->dev_name); + EXPECT_TRUE((bool)d->_len.bus_name); + EXPECT_TRUE((bool)d->_len.dev_name); + ksft_print_msg("%s/%s:\n", d->bus_name, d->dev_name); info_req = devlink_info_get_req_alloc(); + ASSERT_NE(NULL, info_req); devlink_info_get_req_set_bus_name(info_req, d->bus_name); devlink_info_get_req_set_dev_name(info_req, d->dev_name); - info_rsp = devlink_info_get(ys, info_req); + info_rsp = devlink_info_get(self->ys, info_req); devlink_info_get_req_free(info_req); - if (!info_rsp) - goto err_free_devs; + ASSERT_NE(NULL, info_rsp) { + devlink_get_list_free(devs); + TH_LOG("info_get failed: %s", self->ys->err.msg); + } + EXPECT_TRUE((bool)info_rsp->_len.info_driver_name); if (info_rsp->_len.info_driver_name) - printf(" driver: %s\n", info_rsp->info_driver_name); + ksft_print_msg(" driver: %s\n", + info_rsp->info_driver_name); if (info_rsp->_count.info_version_running) - printf(" running fw:\n"); + ksft_print_msg(" running fw:\n"); for (i = 0; i < info_rsp->_count.info_version_running; i++) - printf(" %s: %s\n", - info_rsp->info_version_running[i].info_version_name, - info_rsp->info_version_running[i].info_version_value); - printf(" ...\n"); + ksft_print_msg(" %s: %s\n", + info_rsp->info_version_running[i].info_version_name, + info_rsp->info_version_running[i].info_version_value); devlink_info_get_rsp_free(info_rsp); } devlink_get_list_free(devs); - - ynl_sock_destroy(ys); - - return 0; - -err_free_devs: - devlink_get_list_free(devs); -err_close: - fprintf(stderr, "YNL: %s\n", ys->err.msg); - ynl_sock_destroy(ys); - return 2; } + +TEST_HARNESS_MAIN diff --git a/tools/net/ynl/tests/devlink.sh b/tools/net/ynl/tests/devlink.sh new file mode 100755 index 000000000000..a684c749aa5e --- /dev/null +++ b/tools/net/ynl/tests/devlink.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source "$(dirname "$(realpath "$0")")/ynl_nsim_lib.sh" +nsim_setup +"$(dirname "$(realpath "$0")")/devlink" From 1419fbf5a8179f6d5cc1e2487c01d51c97bcade0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 6 Mar 2026 19:36:28 -0800 Subject: [PATCH 0377/1561] tools: ynl: convert ethtool sample to selftest Convert ethtool.c to use kselftest_harness.h with FIXTURE/TEST_F. Move ethtool from BINS to TEST_GEN_FILES and add ethtool.sh wrapper which sets up a netdevsim device before running the test binary. Output: TAP version 13 1..2 # Starting 2 tests from 1 test cases. # RUN ethtool.channels ... # nsim0: combined 1 # OK ethtool.channels ok 1 ethtool.channels # RUN ethtool.rings ... # nsim0: rx 512 tx 512 # OK ethtool.rings ok 2 ethtool.rings # PASSED: 2 / 2 tests passed. # Totals: pass:2 fail:0 xfail:0 xpass:0 skip:0 error:0 Reviewed-by: Donald Hunter Tested-by: Donald Hunter Link: https://patch.msgid.link/20260307033630.1396085-9-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/tests/Makefile | 3 +- tools/net/ynl/tests/ethtool.c | 83 ++++++++++++++++++++++------------ tools/net/ynl/tests/ethtool.sh | 5 ++ 3 files changed, 62 insertions(+), 29 deletions(-) create mode 100755 tools/net/ynl/tests/ethtool.sh diff --git a/tools/net/ynl/tests/Makefile b/tools/net/ynl/tests/Makefile index 14d399a70f10..c380e9f331a3 100644 --- a/tools/net/ynl/tests/Makefile +++ b/tools/net/ynl/tests/Makefile @@ -15,6 +15,7 @@ LDLIBS=../lib/ynl.a ../generated/protos.a TEST_PROGS := \ devlink.sh \ + ethtool.sh \ test_ynl_cli.sh \ test_ynl_ethtool.sh \ # end of TEST_PROGS @@ -28,10 +29,10 @@ TEST_GEN_PROGS := \ TEST_GEN_FILES := \ devlink \ + ethtool \ # end of TEST_GEN_FILES BINS := \ - ethtool \ rt-addr \ rt-route \ # end of BINS diff --git a/tools/net/ynl/tests/ethtool.c b/tools/net/ynl/tests/ethtool.c index a7ebbd1b98db..926a75d23c9b 100644 --- a/tools/net/ynl/tests/ethtool.c +++ b/tools/net/ynl/tests/ethtool.c @@ -6,28 +6,49 @@ #include +#include + #include "ethtool-user.h" -int main(int argc, char **argv) +FIXTURE(ethtool) +{ + struct ynl_sock *ys; +}; + +FIXTURE_SETUP(ethtool) +{ + self->ys = ynl_sock_create(&ynl_ethtool_family, NULL); + ASSERT_NE(NULL, self->ys) + TH_LOG("failed to create ethtool socket"); +} + +FIXTURE_TEARDOWN(ethtool) +{ + ynl_sock_destroy(self->ys); +} + +TEST_F(ethtool, channels) { struct ethtool_channels_get_req_dump creq = {}; - struct ethtool_rings_get_req_dump rreq = {}; struct ethtool_channels_get_list *channels; - struct ethtool_rings_get_list *rings; - struct ynl_sock *ys; - ys = ynl_sock_create(&ynl_ethtool_family, NULL); - if (!ys) - return 1; + creq._present.header = 1; /* ethtool needs an empty nest */ + channels = ethtool_channels_get_dump(self->ys, &creq); + ASSERT_NE(NULL, channels) { + TH_LOG("channels dump failed: %s", self->ys->err.msg); + } - creq._present.header = 1; /* ethtool needs an empty nest, sigh */ - channels = ethtool_channels_get_dump(ys, &creq); - if (!channels) - goto err_close; + if (ynl_dump_empty(channels)) { + ethtool_channels_get_list_free(channels); + SKIP(return, "no entries in channels dump"); + } - printf("Channels:\n"); ynl_dump_foreach(channels, dev) { - printf(" %8s: ", dev->header.dev_name); + EXPECT_TRUE((bool)dev->header._len.dev_name); + ksft_print_msg("%8s: ", dev->header.dev_name); + EXPECT_TRUE(dev->_present.rx_count || + dev->_present.tx_count || + dev->_present.combined_count); if (dev->_present.rx_count) printf("rx %d ", dev->rx_count); if (dev->_present.tx_count) @@ -37,15 +58,28 @@ int main(int argc, char **argv) printf("\n"); } ethtool_channels_get_list_free(channels); +} - rreq._present.header = 1; /* ethtool needs an empty nest.. */ - rings = ethtool_rings_get_dump(ys, &rreq); - if (!rings) - goto err_close; +TEST_F(ethtool, rings) +{ + struct ethtool_rings_get_req_dump rreq = {}; + struct ethtool_rings_get_list *rings; + + rreq._present.header = 1; /* ethtool needs an empty nest */ + rings = ethtool_rings_get_dump(self->ys, &rreq); + ASSERT_NE(NULL, rings) { + TH_LOG("rings dump failed: %s", self->ys->err.msg); + } + + if (ynl_dump_empty(rings)) { + ethtool_rings_get_list_free(rings); + SKIP(return, "no entries in rings dump"); + } - printf("Rings:\n"); ynl_dump_foreach(rings, dev) { - printf(" %8s: ", dev->header.dev_name); + EXPECT_TRUE((bool)dev->header._len.dev_name); + ksft_print_msg("%8s: ", dev->header.dev_name); + EXPECT_TRUE(dev->_present.rx || dev->_present.tx); if (dev->_present.rx) printf("rx %d ", dev->rx); if (dev->_present.tx) @@ -53,13 +87,6 @@ int main(int argc, char **argv) printf("\n"); } ethtool_rings_get_list_free(rings); - - ynl_sock_destroy(ys); - - return 0; - -err_close: - fprintf(stderr, "YNL (%d): %s\n", ys->err.code, ys->err.msg); - ynl_sock_destroy(ys); - return 2; } + +TEST_HARNESS_MAIN diff --git a/tools/net/ynl/tests/ethtool.sh b/tools/net/ynl/tests/ethtool.sh new file mode 100755 index 000000000000..0859ddd697e8 --- /dev/null +++ b/tools/net/ynl/tests/ethtool.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source "$(dirname "$(realpath "$0")")/ynl_nsim_lib.sh" +nsim_setup +"$(dirname "$(realpath "$0")")/ethtool" From e7a39b8f5fcf076b88925d0e24bdba69d988cf9e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 6 Mar 2026 19:36:29 -0800 Subject: [PATCH 0378/1561] tools: ynl: convert rt-addr sample to selftest Convert rt-addr.c to use kselftest_harness.h with FIXTURE/TEST_F. Validate that the addresses configured by the wrapper (192.168.1.1 and 2001:db8::1) appear in the dump. Output: TAP version 13 1..1 # Starting 1 tests from 1 test cases. # RUN rt_addr.dump ... # lo: 127.0.0.1 # nsim0: 192.168.1.1 # lo: ::1 # nsim0: 2001:db8::1 # nsim0: fe80::7c66:c9ff:fe5f:bf01 # OK rt_addr.dump ok 1 rt_addr.dump # PASSED: 1 / 1 tests passed. # Totals: pass:1 fail:0 xfail:0 xpass:0 skip:0 error:0 Reviewed-by: Donald Hunter Tested-by: Donald Hunter Link: https://patch.msgid.link/20260307033630.1396085-10-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/tests/Makefile | 3 +- tools/net/ynl/tests/rt-addr.c | 85 +++++++++++++++++++++++----------- tools/net/ynl/tests/rt-addr.sh | 5 ++ 3 files changed, 65 insertions(+), 28 deletions(-) create mode 100755 tools/net/ynl/tests/rt-addr.sh diff --git a/tools/net/ynl/tests/Makefile b/tools/net/ynl/tests/Makefile index c380e9f331a3..52b54ea6c90f 100644 --- a/tools/net/ynl/tests/Makefile +++ b/tools/net/ynl/tests/Makefile @@ -16,6 +16,7 @@ LDLIBS=../lib/ynl.a ../generated/protos.a TEST_PROGS := \ devlink.sh \ ethtool.sh \ + rt-addr.sh \ test_ynl_cli.sh \ test_ynl_ethtool.sh \ # end of TEST_PROGS @@ -30,10 +31,10 @@ TEST_GEN_PROGS := \ TEST_GEN_FILES := \ devlink \ ethtool \ + rt-addr \ # end of TEST_GEN_FILES BINS := \ - rt-addr \ rt-route \ # end of BINS diff --git a/tools/net/ynl/tests/rt-addr.c b/tools/net/ynl/tests/rt-addr.c index 2edde5c36b18..f6c3715b2f20 100644 --- a/tools/net/ynl/tests/rt-addr.c +++ b/tools/net/ynl/tests/rt-addr.c @@ -7,9 +7,12 @@ #include #include +#include + #include "rt-addr-user.h" -static void rt_addr_print(struct rt_addr_getaddr_rsp *a) +static void rt_addr_print(struct __test_metadata *_metadata, + struct rt_addr_getaddr_rsp *a) { char ifname[IF_NAMESIZE]; char addr_str[64]; @@ -17,9 +20,11 @@ static void rt_addr_print(struct rt_addr_getaddr_rsp *a) const char *name; name = if_indextoname(a->_hdr.ifa_index, ifname); + EXPECT_NE(NULL, name); if (name) - printf("%16s: ", name); + ksft_print_msg("%16s: ", name); + EXPECT_TRUE(a->_len.address == 4 || a->_len.address == 16); switch (a->_len.address) { case 4: addr = inet_ntop(AF_INET, a->address, @@ -41,40 +46,66 @@ static void rt_addr_print(struct rt_addr_getaddr_rsp *a) printf("\n"); } -int main(int argc, char **argv) +FIXTURE(rt_addr) +{ + struct ynl_sock *ys; +}; + +FIXTURE_SETUP(rt_addr) +{ + struct ynl_error yerr; + + self->ys = ynl_sock_create(&ynl_rt_addr_family, &yerr); + ASSERT_NE(NULL, self->ys) + TH_LOG("failed to create rt-addr socket: %s", yerr.msg); +} + +FIXTURE_TEARDOWN(rt_addr) +{ + ynl_sock_destroy(self->ys); +} + +TEST_F(rt_addr, dump) { struct rt_addr_getaddr_list *rsp; struct rt_addr_getaddr_req *req; - struct ynl_error yerr; - struct ynl_sock *ys; + struct in6_addr v6_expected; + struct in_addr v4_expected; + bool found_v4 = false; + bool found_v6 = false; - ys = ynl_sock_create(&ynl_rt_addr_family, &yerr); - if (!ys) { - fprintf(stderr, "YNL: %s\n", yerr.msg); - return 1; - } + /* The bash wrapper for this test adds these addresses on nsim0, + * make sure we can find them in the dump. + */ + inet_pton(AF_INET, "192.168.1.1", &v4_expected); + inet_pton(AF_INET6, "2001:db8::1", &v6_expected); req = rt_addr_getaddr_req_alloc(); - if (!req) - goto err_destroy; + ASSERT_NE(NULL, req); - rsp = rt_addr_getaddr_dump(ys, req); + rsp = rt_addr_getaddr_dump(self->ys, req); rt_addr_getaddr_req_free(req); - if (!rsp) - goto err_close; + ASSERT_NE(NULL, rsp) { + TH_LOG("dump failed: %s", self->ys->err.msg); + } - if (ynl_dump_empty(rsp)) - fprintf(stderr, "Error: no addresses reported\n"); - ynl_dump_foreach(rsp, addr) - rt_addr_print(addr); + ASSERT_FALSE(ynl_dump_empty(rsp)) { + rt_addr_getaddr_list_free(rsp); + TH_LOG("no addresses reported"); + } + + ynl_dump_foreach(rsp, addr) { + rt_addr_print(_metadata, addr); + + found_v4 |= addr->_len.address == 4 && + !memcmp(addr->address, &v4_expected, 4); + found_v6 |= addr->_len.address == 16 && + !memcmp(addr->address, &v6_expected, 16); + } rt_addr_getaddr_list_free(rsp); - ynl_sock_destroy(ys); - return 0; - -err_close: - fprintf(stderr, "YNL: %s\n", ys->err.msg); -err_destroy: - ynl_sock_destroy(ys); - return 2; + EXPECT_TRUE(found_v4); + EXPECT_TRUE(found_v6); } + +TEST_HARNESS_MAIN diff --git a/tools/net/ynl/tests/rt-addr.sh b/tools/net/ynl/tests/rt-addr.sh new file mode 100755 index 000000000000..87661236d126 --- /dev/null +++ b/tools/net/ynl/tests/rt-addr.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source "$(dirname "$(realpath "$0")")/ynl_nsim_lib.sh" +nsim_setup +"$(dirname "$(realpath "$0")")/rt-addr" From aa234faa5a4d0f213697c4b9012c75b80b9d2c20 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 6 Mar 2026 19:36:30 -0800 Subject: [PATCH 0379/1561] tools: ynl: convert rt-route sample to selftest Convert rt-route.c to use kselftest_harness.h with FIXTURE/TEST_F. This is the last test to convert so clean up the Makefile. Validate that the connected routes for 192.168.1.0/24 and 2001:db8::/64 appear in the dump. Output: TAP version 13 1..1 # Starting 1 tests from 1 test cases. # RUN rt_route.dump ... # oif: nsim0 dst: 192.168.1.0/24 # oif: lo dst: ::1/128 # oif: nsim0 dst: 2001:db8::1/128 # oif: nsim0 dst: 2001:db8::/64 # oif: nsim0 dst: fe80::/64 # oif: nsim0 dst: ff00::/8 # OK rt_route.dump ok 1 rt_route.dump # PASSED: 1 / 1 tests passed. # Totals: pass:1 fail:0 xfail:0 xpass:0 skip:0 error:0 Reviewed-by: Donald Hunter Tested-by: Donald Hunter Link: https://patch.msgid.link/20260307033630.1396085-11-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/tests/Makefile | 16 +++--- tools/net/ynl/tests/rt-route.c | 87 +++++++++++++++++++++++---------- tools/net/ynl/tests/rt-route.sh | 5 ++ 3 files changed, 72 insertions(+), 36 deletions(-) create mode 100755 tools/net/ynl/tests/rt-route.sh diff --git a/tools/net/ynl/tests/Makefile b/tools/net/ynl/tests/Makefile index 52b54ea6c90f..2a02958c7039 100644 --- a/tools/net/ynl/tests/Makefile +++ b/tools/net/ynl/tests/Makefile @@ -17,6 +17,7 @@ TEST_PROGS := \ devlink.sh \ ethtool.sh \ rt-addr.sh \ + rt-route.sh \ test_ynl_cli.sh \ test_ynl_ethtool.sh \ # end of TEST_PROGS @@ -32,11 +33,8 @@ TEST_GEN_FILES := \ devlink \ ethtool \ rt-addr \ -# end of TEST_GEN_FILES - -BINS := \ rt-route \ -# end of BINS +# end of TEST_GEN_FILES TEST_FILES := ynl_nsim_lib.sh @@ -47,7 +45,7 @@ include $(wildcard *.d) INSTALL_PATH ?= $(DESTDIR)/usr/share/kselftest -all: $(TEST_GEN_PROGS) $(TEST_GEN_FILES) $(BINS) +all: $(TEST_GEN_PROGS) $(TEST_GEN_FILES) ../lib/ynl.a: @$(MAKE) -C ../lib @@ -55,7 +53,7 @@ all: $(TEST_GEN_PROGS) $(TEST_GEN_FILES) $(BINS) ../generated/protos.a: @$(MAKE) -C ../generated -$(TEST_GEN_PROGS) $(TEST_GEN_FILES) $(BINS): %: %.c ../lib/ynl.a ../generated/protos.a +$(TEST_GEN_PROGS) $(TEST_GEN_FILES): %: %.c ../lib/ynl.a ../generated/protos.a @echo -e '\tCC test $@' @$(COMPILE.c) $(CFLAGS_$@) $@.c -o $@.o @$(LINK.c) $@.o -o $@ $(LDLIBS) @@ -65,7 +63,7 @@ run_tests: ./$$test; \ done -install: $(TEST_GEN_PROGS) $(TEST_GEN_FILES) $(BINS) +install: $(TEST_GEN_PROGS) $(TEST_GEN_FILES) @mkdir -p $(INSTALL_PATH)/ynl @cp ../../../testing/selftests/kselftest/ktap_helpers.sh $(INSTALL_PATH)/ @for test in $(TEST_PROGS); do \ @@ -79,7 +77,7 @@ install: $(TEST_GEN_PROGS) $(TEST_GEN_FILES) $(BINS) @for file in $(TEST_FILES); do \ cp $$file $(INSTALL_PATH)/ynl/$$file; \ done - @for bin in $(TEST_GEN_PROGS) $(TEST_GEN_FILES) $(BINS); do \ + @for bin in $(TEST_GEN_PROGS) $(TEST_GEN_FILES); do \ cp $$bin $(INSTALL_PATH)/ynl/$$bin; \ done @for test in $(TEST_PROGS) $(TEST_GEN_PROGS); do \ @@ -90,7 +88,7 @@ clean: rm -f *.o *.d *~ distclean: clean - rm -f $(TEST_GEN_PROGS) $(TEST_GEN_FILES) $(BINS) + rm -f $(TEST_GEN_PROGS) $(TEST_GEN_FILES) .PHONY: all install clean distclean run_tests .DEFAULT_GOAL=all diff --git a/tools/net/ynl/tests/rt-route.c b/tools/net/ynl/tests/rt-route.c index 7427104a96df..c9fd2bc48144 100644 --- a/tools/net/ynl/tests/rt-route.c +++ b/tools/net/ynl/tests/rt-route.c @@ -7,9 +7,12 @@ #include #include +#include + #include "rt-route-user.h" -static void rt_route_print(struct rt_route_getroute_rsp *r) +static void rt_route_print(struct __test_metadata *_metadata, + struct rt_route_getroute_rsp *r) { char ifname[IF_NAMESIZE]; char route_str[64]; @@ -22,8 +25,9 @@ static void rt_route_print(struct rt_route_getroute_rsp *r) if (r->_present.oif) { name = if_indextoname(r->oif, ifname); + EXPECT_NE(NULL, name); if (name) - printf("oif: %-16s ", name); + ksft_print_msg("oif: %-16s ", name); } if (r->_len.dst) { @@ -41,40 +45,69 @@ static void rt_route_print(struct rt_route_getroute_rsp *r) printf("\n"); } -int main(int argc, char **argv) +FIXTURE(rt_route) +{ + struct ynl_sock *ys; +}; + +FIXTURE_SETUP(rt_route) +{ + struct ynl_error yerr; + + self->ys = ynl_sock_create(&ynl_rt_route_family, &yerr); + ASSERT_NE(NULL, self->ys) + TH_LOG("failed to create rt-route socket: %s", yerr.msg); +} + +FIXTURE_TEARDOWN(rt_route) +{ + ynl_sock_destroy(self->ys); +} + +TEST_F(rt_route, dump) { struct rt_route_getroute_req_dump *req; struct rt_route_getroute_list *rsp; - struct ynl_error yerr; - struct ynl_sock *ys; + struct in6_addr v6_expected; + struct in_addr v4_expected; + bool found_v4 = false; + bool found_v6 = false; - ys = ynl_sock_create(&ynl_rt_route_family, &yerr); - if (!ys) { - fprintf(stderr, "YNL: %s\n", yerr.msg); - return 1; - } + /* The bash wrapper configures 192.168.1.1/24 and 2001:db8::1/64, + * make sure we can find the connected routes in the dump. + */ + inet_pton(AF_INET, "192.168.1.0", &v4_expected); + inet_pton(AF_INET6, "2001:db8::", &v6_expected); req = rt_route_getroute_req_dump_alloc(); - if (!req) - goto err_destroy; + ASSERT_NE(NULL, req); - rsp = rt_route_getroute_dump(ys, req); + rsp = rt_route_getroute_dump(self->ys, req); rt_route_getroute_req_dump_free(req); - if (!rsp) - goto err_close; + ASSERT_NE(NULL, rsp) { + TH_LOG("dump failed: %s", self->ys->err.msg); + } - if (ynl_dump_empty(rsp)) - fprintf(stderr, "Error: no routeesses reported\n"); - ynl_dump_foreach(rsp, route) - rt_route_print(route); + ASSERT_FALSE(ynl_dump_empty(rsp)) { + rt_route_getroute_list_free(rsp); + TH_LOG("no routes reported"); + } + + ynl_dump_foreach(rsp, route) { + rt_route_print(_metadata, route); + + if (route->_hdr.rtm_table == RT_TABLE_LOCAL) + continue; + + if (route->_len.dst == 4 && route->_hdr.rtm_dst_len == 24) + found_v4 |= !memcmp(route->dst, &v4_expected, 4); + if (route->_len.dst == 16 && route->_hdr.rtm_dst_len == 64) + found_v6 |= !memcmp(route->dst, &v6_expected, 16); + } rt_route_getroute_list_free(rsp); - ynl_sock_destroy(ys); - return 0; - -err_close: - fprintf(stderr, "YNL: %s\n", ys->err.msg); -err_destroy: - ynl_sock_destroy(ys); - return 2; + EXPECT_TRUE(found_v4); + EXPECT_TRUE(found_v6); } + +TEST_HARNESS_MAIN diff --git a/tools/net/ynl/tests/rt-route.sh b/tools/net/ynl/tests/rt-route.sh new file mode 100755 index 000000000000..020338f0a238 --- /dev/null +++ b/tools/net/ynl/tests/rt-route.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source "$(dirname "$(realpath "$0")")/ynl_nsim_lib.sh" +nsim_setup +"$(dirname "$(realpath "$0")")/rt-route" From 2ed4b46b4fc77749cb0f8dd31a01441b82c8dbaa Mon Sep 17 00:00:00 2001 From: Tim Bird Date: Wed, 4 Mar 2026 17:47:22 -0700 Subject: [PATCH 0380/1561] net: Add SPDX ids to some source files Add SPDX-License-Identifier lines to several source files under the network sub-directory. Work on files in the core, dns_resolver, ipv4, ipv6 and netfilter sub-dirs. Remove boilerplate and license reference text to avoid ambiguity. Rusty Russell has expressed that his contributions were intended to be GPL-2.0-or-later. Signed-off-by: Tim Bird Link: https://patch.msgid.link/20260305004724.87469-1-tim.bird@sony.com Signed-off-by: Jakub Kicinski --- net/core/fib_notifier.c | 1 + net/core/sock_diag.c | 3 +-- net/dns_resolver/dns_key.c | 14 +------------- net/dns_resolver/dns_query.c | 14 +------------- net/dns_resolver/internal.h | 14 +------------- net/ipv4/inetpeer.c | 3 +-- net/ipv4/ipmr_base.c | 1 + net/ipv4/netfilter.c | 3 ++- net/ipv4/tcp_bbr.c | 1 + net/ipv4/tcp_dctcp.h | 1 + net/ipv4/tcp_plb.c | 1 + net/ipv6/fib6_notifier.c | 1 + net/ipv6/ila/ila_common.c | 1 + net/ipv6/netfilter.c | 3 ++- net/netfilter/core.c | 3 +-- net/netfilter/nf_conntrack_netlink.c | 4 +--- net/netfilter/nf_flow_table_offload.c | 1 + net/netfilter/nf_queue.c | 1 + net/netfilter/nfnetlink.c | 4 +--- net/netfilter/nft_chain_filter.c | 1 + net/netfilter/xt_connbytes.c | 3 ++- net/netfilter/xt_connlimit.c | 3 ++- net/netfilter/xt_time.c | 3 +-- 23 files changed, 27 insertions(+), 57 deletions(-) diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c index 5cdca49b1d7c..6bb2cc7e88ca 100644 --- a/net/core/fib_notifier.c +++ b/net/core/fib_notifier.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include #include #include diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index c83335c62360..f67accd60675 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -1,5 +1,4 @@ -/* License: GPL */ - +// SPDX-License-Identifier: GPL-2.0 #include #include #include diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index 891287a86979..c3c8c3240ef9 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later /* Key type used to cache DNS lookups made by the kernel * * See Documentation/networking/dns_resolver.rst @@ -7,19 +8,6 @@ * Steve French (sfrench@us.ibm.com) * Wang Lei (wang840925@gmail.com) * David Howells (dhowells@redhat.com) - * - * This library is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, see . */ #include #include diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c index 53da62984447..e1c09d7b8200 100644 --- a/net/dns_resolver/dns_query.c +++ b/net/dns_resolver/dns_query.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later /* Upcall routine, designed to work as a key type and working through * /sbin/request-key to contact userspace when handling DNS queries. * @@ -20,19 +21,6 @@ * For example to use this module to query AFSDB RR: * * create dns_resolver afsdb:* * /sbin/dns.afsdb %k - * - * This library is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, see . */ #include diff --git a/net/dns_resolver/internal.h b/net/dns_resolver/internal.h index 0c570d40e4d6..d0d8edcea092 100644 --- a/net/dns_resolver/internal.h +++ b/net/dns_resolver/internal.h @@ -1,21 +1,9 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ /* * Copyright (c) 2010 Wang Lei * Author(s): Wang Lei (wang840925@gmail.com). All Rights Reserved. * * Internal DNS Rsolver stuff - * - * This library is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, see . */ #include diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 7b1e0a2d6906..9fa396d5f09f 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -1,8 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * INETPEER - A storage for permanent information about peers * - * This source is covered by the GNU GPL, the same as all kernel sources. - * * Authors: Andrey V. Savochkin */ diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index b0fd9ffa01a2..fd27f2ca6978 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Linux multicast routing support * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation */ diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index ce310eb779e0..ce9e1bfa4259 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -1,7 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * IPv4 specific functions of netfilter core * - * Rusty Russell (C) 2000 -- This code is GPL. + * Rusty Russell (C) 2000 * Patrick McHardy (C) 2006-2012 */ #include diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 760941e55153..05d52372ca8f 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Bottleneck Bandwidth and RTT (BBR) congestion control * * BBR congestion control computes the sending rate based on the delivery diff --git a/net/ipv4/tcp_dctcp.h b/net/ipv4/tcp_dctcp.h index 4b0259111d81..f13f8d770576 100644 --- a/net/ipv4/tcp_dctcp.h +++ b/net/ipv4/tcp_dctcp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _TCP_DCTCP_H #define _TCP_DCTCP_H diff --git a/net/ipv4/tcp_plb.c b/net/ipv4/tcp_plb.c index 4bcf7eff95e3..68ccdb9a5412 100644 --- a/net/ipv4/tcp_plb.c +++ b/net/ipv4/tcp_plb.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Protective Load Balancing (PLB) * * PLB was designed to reduce link load imbalance across datacenter diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c index 949b72610df7..64cd4ed8864c 100644 --- a/net/ipv6/fib6_notifier.c +++ b/net/ipv6/fib6_notifier.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include #include #include diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index b8d43ed4689d..e71571455c8a 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include #include #include diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 46540a5a4331..c3dc90dfab80 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -1,7 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * IPv6 specific functions of netfilter core * - * Rusty Russell (C) 2000 -- This code is GPL. + * Rusty Russell (C) 2000 * Patrick McHardy (C) 2006-2012 */ #include diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 11a702065bab..d5df44ea9e7b 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -1,10 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* netfilter.c: look after the filters for various protocols. * Heavily influenced by the old firewall.c by David Bonn and Alan Cox. * * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any * way. - * - * This code is GPL. */ #include #include diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c9d725fc2d71..9b295867a105 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Connection tracking via netlink socket. Allows for user space * protocol helpers and general trouble making from userspace. * @@ -10,9 +11,6 @@ * generally made possible by Network Robots, Inc. (www.networkrobots.com) * * Further development of this code funded by Astaro AG (http://www.astaro.com) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. */ #include diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 9b677e116487..b2e4fb6fa011 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include #include #include diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 7f12e56e6e52..a6c81c04b3a5 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Rusty Russell (C)2000 -- This code is GPL. * Patrick McHardy (c) 2006-2012 diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index e62a0dea24ea..47f3ed441f64 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Netfilter messages via netlink socket. Allows for user space * protocol helpers and general trouble making from userspace. * @@ -9,9 +10,6 @@ * generally made possible by Network Robots, Inc. (www.networkrobots.com) * * Further development of this code funded by Astaro AG (http://www.astaro.com) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. */ #include diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index b16185e9a6dd..14b14c46c314 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include #include #include diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 2aabdcea8707..1c6ffc7f1622 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -1,5 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* Kernel module to match connection tracking byte counter. - * GPL (C) 2002 Martin Devera (devik@cdi.cz). + * (C) 2002 Martin Devera (devik@cdi.cz). */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 848287ab79cf..42df9e175aff 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * netfilter module to limit the number of parallel tcp * connections per IP address. @@ -9,7 +10,7 @@ * based on ... * * Kernel module to match connection tracking information. - * GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au). + * (C) 1999 Rusty Russell (rusty@rustcorp.com.au). */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index 00319d2a54da..9068cf88ec30 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * xt_time * Copyright © CC Computer Consultants GmbH, 2007 @@ -6,8 +7,6 @@ * This is a module which is used for time matching * It is using some modified code from dietlibc (localtime() function) * that you can find at https://www.fefe.de/dietlibc/ - * This file is distributed under the terms of the GNU General Public - * License (GPL). Copies of the GPL can be obtained from gnu.org/gpl. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt From 8ca8eb05767395cd2fd0db12e052ca0bc5d1597c Mon Sep 17 00:00:00 2001 From: Keita Morisaki Date: Sat, 7 Mar 2026 14:16:19 +0900 Subject: [PATCH 0381/1561] tcp: remove unused hash_size from struct tcp_out_options hash_size is declared but never read. The MD5 path always uses a fixed size of 16, and the TCP-AO path uses tcp_ao_maclen(). This closes a 7-byte hole and reduces the struct size from 96 to 88 bytes. Suggested-by: Jakub Sitnicki Signed-off-by: Keita Morisaki Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260307051619.51685-1-kmta1236@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_output.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f0ebcc7e2871..4377b3673da9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -441,7 +441,6 @@ struct tcp_out_options { u8 ws; /* window scale, 0 to disable */ u8 num_accecn_fields:7, /* number of AccECN fields needed */ use_synack_ecn_bytes:1; /* Use synack_ecn_bytes or not */ - u8 hash_size; /* bytes in hash_location */ __u8 *hash_location; /* temporary pointer, overloaded */ __u32 tsval, tsecr; /* need to include OPTION_TS */ struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ From 50636e5ff8861c35ebb190521ba540074ab583ad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 6 Mar 2026 13:11:30 +0000 Subject: [PATCH 0382/1561] tcp: move tcp_v4_early_demux() to net/ipv4/ip_input.c tcp_v4_early_demux() has a single caller : ip_rcv_finish_core(). Move it to net/ipv4/ip_input.c and mark it static, for possible compiler/linker optimizations. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260306131130.654991-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 1 - net/ipv4/ip_input.c | 39 +++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 38 -------------------------------------- 3 files changed, 39 insertions(+), 39 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index a64641423806..f07aef7faa65 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -363,7 +363,6 @@ int tcp_v4_err(struct sk_buff *skb, u32); void tcp_shutdown(struct sock *sk, int how); -int tcp_v4_early_demux(struct sk_buff *skb); int tcp_v4_rcv(struct sk_buff *skb); void tcp_remove_empty_skb(struct sock *sk); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 19d3141dad1f..9860178752b8 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -319,6 +319,45 @@ static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph, ip_hdr(hint)->tos == iph->tos; } +static int tcp_v4_early_demux(struct sk_buff *skb) +{ + struct net *net = dev_net_rcu(skb->dev); + const struct iphdr *iph; + const struct tcphdr *th; + struct sock *sk; + + if (skb->pkt_type != PACKET_HOST) + return 0; + + if (!pskb_may_pull(skb, skb_transport_offset(skb) + + sizeof(struct tcphdr))) + return 0; + + iph = ip_hdr(skb); + th = tcp_hdr(skb); + + if (th->doff < sizeof(struct tcphdr) / 4) + return 0; + + sk = __inet_lookup_established(net, iph->saddr, th->source, + iph->daddr, ntohs(th->dest), + skb->skb_iif, inet_sdif(skb)); + if (sk) { + skb->sk = sk; + skb->destructor = sock_edemux; + if (sk_fullsock(sk)) { + struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); + + if (dst) + dst = dst_check(dst, 0); + if (dst && + sk->sk_rx_dst_ifindex == skb->skb_iif) + skb_dst_set_noref(skb, dst); + } + } + return 0; +} + static int ip_rcv_finish_core(struct net *net, struct sk_buff *skb, struct net_device *dev, const struct sk_buff *hint) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 190e8a238876..f27995a64561 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1914,44 +1914,6 @@ err_discard: } EXPORT_SYMBOL(tcp_v4_do_rcv); -int tcp_v4_early_demux(struct sk_buff *skb) -{ - struct net *net = dev_net_rcu(skb->dev); - const struct iphdr *iph; - const struct tcphdr *th; - struct sock *sk; - - if (skb->pkt_type != PACKET_HOST) - return 0; - - if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) - return 0; - - iph = ip_hdr(skb); - th = tcp_hdr(skb); - - if (th->doff < sizeof(struct tcphdr) / 4) - return 0; - - sk = __inet_lookup_established(net, iph->saddr, th->source, - iph->daddr, ntohs(th->dest), - skb->skb_iif, inet_sdif(skb)); - if (sk) { - skb->sk = sk; - skb->destructor = sock_edemux; - if (sk_fullsock(sk)) { - struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); - - if (dst) - dst = dst_check(dst, 0); - if (dst && - sk->sk_rx_dst_ifindex == skb->skb_iif) - skb_dst_set_noref(skb, dst); - } - } - return 0; -} - bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason *reason) { From 6927430735802bd8bc5dbe352edbd94a04459567 Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Thu, 5 Mar 2026 18:54:49 -0800 Subject: [PATCH 0383/1561] net: rocker: kzalloc + kcalloc to kzalloc_flex Combining the allocations simplifies things, especially the free path. Remove ofdpa_group_tbl_entry_free as a result. kfree is shorter. Add __counted_by for extra runtime analysis. Signed-off-by: Rosen Penev Acked-by: Gustavo A. R. Silva Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20260306025449.12333-1-rosenp@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/rocker/rocker_ofdpa.c | 31 +++++----------------- 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index 50ea5f9ef63a..66a8ae67c3ea 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -104,7 +104,6 @@ struct ofdpa_group_tbl_entry { u32 cmd; u32 group_id; /* key */ u16 group_count; - u32 *group_ids; union { struct { u8 pop_vlan; @@ -123,6 +122,8 @@ struct ofdpa_group_tbl_entry { u32 group_id; } l3_unicast; }; + + u32 group_ids[] __counted_by(group_count); }; struct ofdpa_fdb_tbl_entry { @@ -1059,19 +1060,6 @@ ofdpa_group_tbl_find(const struct ofdpa *ofdpa, return NULL; } -static void ofdpa_group_tbl_entry_free(struct ofdpa_group_tbl_entry *entry) -{ - switch (ROCKER_GROUP_TYPE_GET(entry->group_id)) { - case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD: - case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST: - kfree(entry->group_ids); - break; - default: - break; - } - kfree(entry); -} - static int ofdpa_group_tbl_add(struct ofdpa_port *ofdpa_port, int flags, struct ofdpa_group_tbl_entry *match) { @@ -1085,7 +1073,7 @@ static int ofdpa_group_tbl_add(struct ofdpa_port *ofdpa_port, int flags, if (found) { hash_del(&found->entry); - ofdpa_group_tbl_entry_free(found); + kfree(found); found = match; found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD; } else { @@ -1122,14 +1110,14 @@ static int ofdpa_group_tbl_del(struct ofdpa_port *ofdpa_port, int flags, spin_unlock_irqrestore(&ofdpa->group_tbl_lock, lock_flags); - ofdpa_group_tbl_entry_free(match); + kfree(match); if (found) { err = rocker_cmd_exec(ofdpa_port->rocker_port, ofdpa_flags_nowait(flags), ofdpa_cmd_group_tbl_del, found, NULL, NULL); - ofdpa_group_tbl_entry_free(found); + kfree(found); } return err; @@ -1166,18 +1154,13 @@ static int ofdpa_group_l2_fan_out(struct ofdpa_port *ofdpa_port, { struct ofdpa_group_tbl_entry *entry; - entry = kzalloc_obj(*entry); + entry = kzalloc_flex(*entry, group_ids, group_count); if (!entry) return -ENOMEM; - entry->group_id = group_id; entry->group_count = group_count; + entry->group_id = group_id; - entry->group_ids = kcalloc(group_count, sizeof(u32), GFP_KERNEL); - if (!entry->group_ids) { - kfree(entry); - return -ENOMEM; - } memcpy(entry->group_ids, group_ids, group_count * sizeof(u32)); return ofdpa_group_tbl_do(ofdpa_port, flags, entry); From 82f36517a13e6339ddd9737d8a310949e057e596 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 6 Mar 2026 15:43:22 +0000 Subject: [PATCH 0384/1561] tcp: avoid dst->ops->check() call in tcp_v{4,6}_do_rcv() If incoming skb dst matches the socket cached one, there is no need to call again dst->ops->check(). Network layer already validated the skb dst for us, usually from tcp_v{4,6}_early_demux(). Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260306154322.1086539-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f27995a64561..2ea8253b737a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1855,7 +1855,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); - if (dst) { + if (dst && unlikely(dst != skb_dst(skb))) { if (sk->sk_rx_dst_ifindex != skb->skb_iif || !INDIRECT_CALL_1(dst->ops->check, ipv4_dst_check, dst, 0)) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 164dceb842af..074b83c9a551 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1596,7 +1596,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); - if (dst) { + if (dst && unlikely(dst != skb_dst(skb))) { if (sk->sk_rx_dst_ifindex != skb->skb_iif || INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, dst, sk->sk_rx_dst_cookie) == NULL) { From 47e8dbb6e763e5ccfed2ab4aa55cbb163382aec1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 7 Mar 2026 16:34:30 +0000 Subject: [PATCH 0385/1561] net/sched: do not reset queues in graft operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Following typical script is extremely disruptive, because each graft operation calls dev_deactivate() which resets all the queues of the device. QPARAM="limit 100000 flow_limit 1000 buckets 4096" TXQS=64 for ETH in eth1 do tc qd del dev $ETH root 2>/dev/null tc qd add dev $ETH root handle 1: mq for i in `seq 1 $TXQS` do slot=$( printf %x $(( i )) ) tc qd add dev $ETH parent 1:$slot fq $QPARAM done done One can add "ip link set dev $ETH down/up" to reduce the disruption time: QPARAM="limit 100000 flow_limit 1000 buckets 4096" TXQS=64 for ETH in eth1 do ip link set dev $ETH down tc qd del dev $ETH root 2>/dev/null tc qd add dev $ETH root handle 1: mq for i in `seq 1 $TXQS` do slot=$( printf %x $(( i )) ) tc qd add dev $ETH parent 1:$slot fq $QPARAM done ip link set dev $ETH up done Or we can add a @reset_needed flag to dev_deactivate() and dev_deactivate_many(). This flag is set to true at device dismantle or linkwatch_do_dev(), and to false for graft operations. In the future, we might only stop one queue instead of the whole device, ie call dev_deactivate_queue() instead of dev_deactivate(). I think the problem (quadratic behavior) was added in commit 2fb541c862c9 ("net: sch_generic: aviod concurrent reset and enqueue op for lockless qdisc") but this does not look serious enough to deserve risky backports. Signed-off-by: Eric Dumazet Cc: Yunsheng Lin Reviewed-by: Jamal Hadi Salim Reviewed-by: Toke Høiland-Jørgensen Reviewed-by: Victor Nogueira Link: https://patch.msgid.link/20260307163430.470644-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 4 ++-- net/core/dev.c | 2 +- net/core/link_watch.c | 2 +- net/sched/sch_api.c | 2 +- net/sched/sch_generic.c | 20 ++++++++++++-------- net/sched/sch_htb.c | 4 ++-- net/sched/sch_mq.c | 2 +- net/sched/sch_mqprio.c | 2 +- net/sched/sch_taprio.c | 2 +- 9 files changed, 22 insertions(+), 18 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c355300893a3..16beba40914e 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -710,8 +710,8 @@ void dev_qdisc_change_real_num_tx(struct net_device *dev, void dev_init_scheduler(struct net_device *dev); void dev_shutdown(struct net_device *dev); void dev_activate(struct net_device *dev); -void dev_deactivate(struct net_device *dev); -void dev_deactivate_many(struct list_head *head); +void dev_deactivate(struct net_device *dev, bool reset_needed); +void dev_deactivate_many(struct list_head *head, bool reset_needed); struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *qdisc); void qdisc_reset(struct Qdisc *qdisc); diff --git a/net/core/dev.c b/net/core/dev.c index 203dc36aaed5..6fc9350f0be8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1756,7 +1756,7 @@ static void __dev_close_many(struct list_head *head) smp_mb__after_atomic(); /* Commit netif_running(). */ } - dev_deactivate_many(head); + dev_deactivate_many(head, true); list_for_each_entry(dev, head, close_list) { const struct net_device_ops *ops = dev->netdev_ops; diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 25c455c10a01..ff2c1d4538ef 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -181,7 +181,7 @@ static void linkwatch_do_dev(struct net_device *dev) if (netif_carrier_ok(dev)) dev_activate(dev); else - dev_deactivate(dev); + dev_deactivate(dev, true); netif_state_change(dev); } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index cc43e3f7574f..c0bab092ea80 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1120,7 +1120,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, } if (dev->flags & IFF_UP) - dev_deactivate(dev); + dev_deactivate(dev, false); qdisc_offload_graft_root(dev, new, old, extack); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 556e0d800316..d4fe907c4ad5 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1370,11 +1370,12 @@ static bool some_qdisc_is_busy(struct net_device *dev) /** * dev_deactivate_many - deactivate transmissions on several devices * @head: list of devices to deactivate + * @reset_needed: qdisc should be reset if true. * * This function returns only when all outstanding transmissions * have completed, unless all devices are in dismantle phase. */ -void dev_deactivate_many(struct list_head *head) +void dev_deactivate_many(struct list_head *head, bool reset_needed) { bool sync_needed = false; struct net_device *dev; @@ -1393,11 +1394,14 @@ void dev_deactivate_many(struct list_head *head) if (sync_needed) synchronize_net(); - list_for_each_entry(dev, head, close_list) { - netdev_for_each_tx_queue(dev, dev_reset_queue, NULL); + if (reset_needed) { + list_for_each_entry(dev, head, close_list) { + netdev_for_each_tx_queue(dev, dev_reset_queue, NULL); - if (dev_ingress_queue(dev)) - dev_reset_queue(dev, dev_ingress_queue(dev), NULL); + if (dev_ingress_queue(dev)) + dev_reset_queue(dev, dev_ingress_queue(dev), + NULL); + } } /* Wait for outstanding qdisc_run calls. */ @@ -1412,12 +1416,12 @@ void dev_deactivate_many(struct list_head *head) } } -void dev_deactivate(struct net_device *dev) +void dev_deactivate(struct net_device *dev, bool reset_needed) { LIST_HEAD(single); list_add(&dev->close_list, &single); - dev_deactivate_many(&single); + dev_deactivate_many(&single, reset_needed); list_del(&single); } EXPORT_SYMBOL(dev_deactivate); @@ -1473,7 +1477,7 @@ int dev_qdisc_change_tx_queue_len(struct net_device *dev) int ret = 0; if (up) - dev_deactivate(dev); + dev_deactivate(dev, false); for (i = 0; i < dev->num_tx_queues; i++) { ret = qdisc_change_tx_queue_len(dev, &dev->_tx[i]); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index cf6cd4ccfa20..eb12381795ce 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1387,7 +1387,7 @@ htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q) struct Qdisc *old_q; if (dev->flags & IFF_UP) - dev_deactivate(dev); + dev_deactivate(dev, false); old_q = dev_graft_qdisc(dev_queue, new_q); if (new_q) new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; @@ -1421,7 +1421,7 @@ static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old, struct Qdisc *qdisc; if (dev->flags & IFF_UP) - dev_deactivate(dev); + dev_deactivate(dev, false); qdisc = dev_graft_qdisc(queue_old, NULL); WARN_ON(qdisc != cl_old->leaf.q); } diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 0ed199fa18f0..a0133a7b9d3b 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -201,7 +201,7 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, struct net_device *dev = qdisc_dev(sch); if (dev->flags & IFF_UP) - dev_deactivate(dev); + dev_deactivate(dev, false); *old = dev_graft_qdisc(dev_queue, new); if (new) diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index b83276409416..002add5ce9e0 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -469,7 +469,7 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, return -EINVAL; if (dev->flags & IFF_UP) - dev_deactivate(dev); + dev_deactivate(dev, false); *old = dev_graft_qdisc(dev_queue, new); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index f721c03514f6..8e3752811950 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -2184,7 +2184,7 @@ static int taprio_graft(struct Qdisc *sch, unsigned long cl, return -EINVAL; if (dev->flags & IFF_UP) - dev_deactivate(dev); + dev_deactivate(dev, false); /* In offload mode, the child Qdisc is directly attached to the netdev * TX queue, and thus, we need to keep its refcount elevated in order From a23c657e332f2feb5eb9c4a3e8371386aa7392a6 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 6 Mar 2026 15:44:39 +0100 Subject: [PATCH 0386/1561] net: ethernet: ti: am65-cpsw: Use also port number to identify timestamps The driver uses packet-type (RX/TX) PTP-message type and PTP-sequence number to identify a matching timestamp packet for a skb. If the same PTP packet arrives on both ports (as in a PRP environment) then it is not obvious which event belongs to which skb. The event contains also the port number on which it was received. Instead of masking it out, use it for matching. Tested-by: Chintan Vankar Reviewed-by: Martin Kaistra Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20260306144439.cVwaaopR@linutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 4 ++-- drivers/net/ethernet/ti/am65-cpts.c | 23 +++++++++-------------- drivers/net/ethernet/ti/am65-cpts.h | 8 ++++++-- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 967918050433..a38bf7f4f434 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1352,7 +1352,7 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow, am65_cpsw_nuss_set_offload_fwd_mark(skb, ndev_priv->offload_fwd_mark); skb_put(skb, pkt_len); if (port->rx_ts_enabled) - am65_cpts_rx_timestamp(common->cpts, skb); + am65_cpts_rx_timestamp(common->cpts, port_id, skb); skb_mark_for_recycle(skb); skb->protocol = eth_type_trans(skb, ndev); am65_cpsw_nuss_rx_csum(skb, csum_info); @@ -1607,7 +1607,7 @@ static netdev_tx_t am65_cpsw_nuss_ndo_slave_xmit(struct sk_buff *skb, /* SKB TX timestamp */ if (port->tx_ts_enabled) - am65_cpts_prep_tx_timestamp(common->cpts, skb); + am65_cpts_prep_tx_timestamp(common->cpts, port->port_id, skb); q_idx = skb_get_queue_mapping(skb); dev_dbg(dev, "%s skb_queue:%d\n", __func__, q_idx); diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c index 8ffbfaa3ab18..efbf2ef976a3 100644 --- a/drivers/net/ethernet/ti/am65-cpts.c +++ b/drivers/net/ethernet/ti/am65-cpts.c @@ -796,11 +796,7 @@ static bool am65_cpts_match_tx_ts(struct am65_cpts *cpts, bool found = false; u32 mtype_seqid; - mtype_seqid = event->event1 & - (AM65_CPTS_EVENT_1_MESSAGE_TYPE_MASK | - AM65_CPTS_EVENT_1_EVENT_TYPE_MASK | - AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK); - + mtype_seqid = event->event1; __skb_queue_head_init(&txq_list); spin_lock_irqsave(&cpts->txq.lock, flags); @@ -922,7 +918,6 @@ static u64 am65_cpts_find_rx_ts(struct am65_cpts *cpts, u32 skb_mtype_seqid) struct list_head *this, *next; struct am65_cpts_event *event; unsigned long flags; - u32 mtype_seqid; u64 ns = 0; spin_lock_irqsave(&cpts->lock, flags); @@ -934,12 +929,7 @@ static u64 am65_cpts_find_rx_ts(struct am65_cpts *cpts, u32 skb_mtype_seqid) continue; } - mtype_seqid = event->event1 & - (AM65_CPTS_EVENT_1_MESSAGE_TYPE_MASK | - AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK | - AM65_CPTS_EVENT_1_EVENT_TYPE_MASK); - - if (mtype_seqid == skb_mtype_seqid) { + if (event->event1 == skb_mtype_seqid) { ns = event->timestamp; list_move(&event->list, &cpts->pool); break; @@ -950,7 +940,8 @@ static u64 am65_cpts_find_rx_ts(struct am65_cpts *cpts, u32 skb_mtype_seqid) return ns; } -void am65_cpts_rx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb) +void am65_cpts_rx_timestamp(struct am65_cpts *cpts, unsigned int port_id, + struct sk_buff *skb) { struct am65_cpts_skb_cb_data *skb_cb = (struct am65_cpts_skb_cb_data *)skb->cb; struct skb_shared_hwtstamps *ssh; @@ -966,6 +957,7 @@ void am65_cpts_rx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb) if (!ret) return; /* if not PTP class packet */ + skb_cb->skb_mtype_seqid |= port_id << AM65_CPTS_EVENT_1_PORT_NUMBER_SHIFT; skb_cb->skb_mtype_seqid |= (AM65_CPTS_EV_RX << AM65_CPTS_EVENT_1_EVENT_TYPE_SHIFT); dev_dbg(cpts->dev, "%s mtype seqid %08x\n", __func__, skb_cb->skb_mtype_seqid); @@ -1009,13 +1001,15 @@ EXPORT_SYMBOL_GPL(am65_cpts_tx_timestamp); /** * am65_cpts_prep_tx_timestamp - check and prepare tx packet for timestamping * @cpts: cpts handle + * @port_id: The port on which the skb will be sent * @skb: packet * * This functions should be called from .xmit(). * It checks if packet can be timestamped, fills internal cpts data * in skb-cb and marks packet as SKBTX_IN_PROGRESS. */ -void am65_cpts_prep_tx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb) +void am65_cpts_prep_tx_timestamp(struct am65_cpts *cpts, unsigned int port_id, + struct sk_buff *skb) { struct am65_cpts_skb_cb_data *skb_cb = (void *)skb->cb; int ret; @@ -1026,6 +1020,7 @@ void am65_cpts_prep_tx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb) ret = am65_skb_get_mtype_seqid(skb, &skb_cb->skb_mtype_seqid); if (!ret) return; + skb_cb->skb_mtype_seqid |= port_id << AM65_CPTS_EVENT_1_PORT_NUMBER_SHIFT; skb_cb->skb_mtype_seqid |= (AM65_CPTS_EV_TX << AM65_CPTS_EVENT_1_EVENT_TYPE_SHIFT); diff --git a/drivers/net/ethernet/ti/am65-cpts.h b/drivers/net/ethernet/ti/am65-cpts.h index 6099d772799d..5fa77b0fc837 100644 --- a/drivers/net/ethernet/ti/am65-cpts.h +++ b/drivers/net/ethernet/ti/am65-cpts.h @@ -22,9 +22,11 @@ void am65_cpts_release(struct am65_cpts *cpts); struct am65_cpts *am65_cpts_create(struct device *dev, void __iomem *regs, struct device_node *node); int am65_cpts_phc_index(struct am65_cpts *cpts); -void am65_cpts_rx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb); +void am65_cpts_rx_timestamp(struct am65_cpts *cpts, unsigned int port_id, + struct sk_buff *skb); void am65_cpts_tx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb); -void am65_cpts_prep_tx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb); +void am65_cpts_prep_tx_timestamp(struct am65_cpts *cpts, unsigned int port_id, + struct sk_buff *skb); u64 am65_cpts_ns_gettime(struct am65_cpts *cpts); int am65_cpts_estf_enable(struct am65_cpts *cpts, int idx, struct am65_cpts_estf_cfg *cfg); @@ -49,6 +51,7 @@ static inline int am65_cpts_phc_index(struct am65_cpts *cpts) } static inline void am65_cpts_rx_timestamp(struct am65_cpts *cpts, + unsigned int port_id, struct sk_buff *skb) { } @@ -59,6 +62,7 @@ static inline void am65_cpts_tx_timestamp(struct am65_cpts *cpts, } static inline void am65_cpts_prep_tx_timestamp(struct am65_cpts *cpts, + unsigned int port_id, struct sk_buff *skb) { } From 58e4d35ae7b9325ab622bbcc34312b17af425c8f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 6 Mar 2026 13:31:54 +0000 Subject: [PATCH 0387/1561] net/sched: use rtnl_kfree_skbs() in pfifo_fast_reset() rtnl_kfree_skbs() reduces RTNL and qdisc spinlock hold time. skbs are freed later after RTNL has been released. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260306133154.678730-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index d4fe907c4ad5..69d5ac4f17d1 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -850,7 +850,7 @@ static void pfifo_fast_reset(struct Qdisc *qdisc) continue; while ((skb = __skb_array_consume(q)) != NULL) - kfree_skb(skb); + rtnl_kfree_skbs(skb, skb); } if (qdisc_is_percpu_stats(qdisc)) { From abb0eb0b033a0a8980eb9215e02626e4801ead3f Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Fri, 6 Mar 2026 17:36:49 +0800 Subject: [PATCH 0388/1561] ppp: simplify input error handling Currently, ppp_input_error() indicates an error by allocating a 0-length skb and calling ppp_do_recv(). It takes an error code argument, which is stored in skb->cb, but not used by ppp_receive_frame(). Simplify the error handling by removing the unused parameter and the unnecessary skb allocation. Instead, call ppp_receive_error() directly from ppp_input_error() under the recv lock, and the length check in ppp_receive_frame() can be removed. Signed-off-by: Qingfang Deng Signed-off-by: Jakub Kicinski --- drivers/net/ppp/ppp_async.c | 2 +- drivers/net/ppp/ppp_generic.c | 31 ++++++++++--------------------- drivers/net/ppp/ppp_synctty.c | 2 +- include/linux/ppp_channel.h | 2 +- net/atm/pppoatm.c | 2 +- 5 files changed, 14 insertions(+), 25 deletions(-) diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c index b4cf2d09c6bd..93a7b0f6c4e7 100644 --- a/drivers/net/ppp/ppp_async.c +++ b/drivers/net/ppp/ppp_async.c @@ -491,7 +491,7 @@ static void ppp_async_process(struct tasklet_struct *t) /* process received packets */ while ((skb = skb_dequeue(&ap->rqueue)) != NULL) { if (skb->cb[0]) - ppp_input_error(&ap->chan, 0); + ppp_input_error(&ap->chan); ppp_input(&ap->chan, skb); } diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 2081da6c2144..6344c5eb0f98 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -2383,12 +2383,10 @@ done: rcu_read_unlock_bh(); } -/* Put a 0-length skb in the receive queue as an error indication */ void -ppp_input_error(struct ppp_channel *chan, int code) +ppp_input_error(struct ppp_channel *chan) { struct channel *pch = chan->ppp; - struct sk_buff *skb; struct ppp *ppp; if (!pch) @@ -2397,12 +2395,9 @@ ppp_input_error(struct ppp_channel *chan, int code) rcu_read_lock_bh(); ppp = rcu_dereference_bh(pch->ppp); if (ppp) { - skb = alloc_skb(0, GFP_ATOMIC); - if (skb) { - skb->len = 0; /* probably unnecessary */ - skb->cb[0] = code; - ppp_do_recv(ppp, skb, pch); - } + ppp_recv_lock(ppp); + ppp_receive_error(ppp); + ppp_recv_unlock(ppp); } rcu_read_unlock_bh(); } @@ -2414,20 +2409,14 @@ ppp_input_error(struct ppp_channel *chan, int code) static void ppp_receive_frame(struct ppp *ppp, struct sk_buff *skb, struct channel *pch) { - /* note: a 0-length skb is used as an error indication */ - if (skb->len > 0) { - skb_checksum_complete_unset(skb); + skb_checksum_complete_unset(skb); #ifdef CONFIG_PPP_MULTILINK - /* XXX do channel-level decompression here */ - if (PPP_PROTO(skb) == PPP_MP) - ppp_receive_mp_frame(ppp, skb, pch); - else + /* XXX do channel-level decompression here */ + if (PPP_PROTO(skb) == PPP_MP) + ppp_receive_mp_frame(ppp, skb, pch); + else #endif /* CONFIG_PPP_MULTILINK */ - ppp_receive_nonmp_frame(ppp, skb); - } else { - kfree_skb(skb); - ppp_receive_error(ppp); - } + ppp_receive_nonmp_frame(ppp, skb); } static void diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c index c2063961f395..b7f243b416f8 100644 --- a/drivers/net/ppp/ppp_synctty.c +++ b/drivers/net/ppp/ppp_synctty.c @@ -483,7 +483,7 @@ static void ppp_sync_process(struct tasklet_struct *t) while ((skb = skb_dequeue(&ap->rqueue)) != NULL) { if (skb->len == 0) { /* zero length buffers indicate error */ - ppp_input_error(&ap->chan, 0); + ppp_input_error(&ap->chan); kfree_skb(skb); } else diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h index f73fbea0dbc2..ca8ad03eeef0 100644 --- a/include/linux/ppp_channel.h +++ b/include/linux/ppp_channel.h @@ -55,7 +55,7 @@ extern void ppp_input(struct ppp_channel *, struct sk_buff *); /* Called by the channel when an input error occurs, indicating that we may have missed a packet. */ -extern void ppp_input_error(struct ppp_channel *, int code); +extern void ppp_input_error(struct ppp_channel *); /* Attach a channel to a given PPP unit in specified net. */ extern int ppp_register_net_channel(struct net *, struct ppp_channel *); diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index 2574aae3e066..e3c422dc533a 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -228,7 +228,7 @@ static void pppoatm_push(struct atm_vcc *atmvcc, struct sk_buff *skb) error: kfree_skb(skb); - ppp_input_error(&pvcc->chan, 0); + ppp_input_error(&pvcc->chan); } static int pppoatm_may_send(struct pppoatm_vcc *pvcc, int size) From dc9c9193c7c19a0163552da3a73a1706b7faea12 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Fri, 6 Mar 2026 10:08:19 -0800 Subject: [PATCH 0389/1561] selftests: fib_tests: fix link-local retrieval in fib6_nexthop() fib6_nexthop() retrieves the link-local address for two interfaces used in the test. However, both lldummy and llv1 are obtained from dummy0. llv1 is expected to be retrieved from veth1, which is the interface used later in the test. The subsequent check and error message also expect the address to be retrieved from veth1. Fix this by retrieving llv1 from veth1. Signed-off-by: Alok Tiwari Link: https://patch.msgid.link/20260306180830.2329477-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index c5694cc4ddd2..439461080c60 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -545,7 +545,7 @@ fib4_nexthop() fib6_nexthop() { local lldummy=$(get_linklocal dummy0) - local llv1=$(get_linklocal dummy0) + local llv1=$(get_linklocal veth1) if [ -z "$lldummy" ]; then echo "Failed to get linklocal address for dummy0" From 014c607f86abc903d7bf46e13373d89392e371fe Mon Sep 17 00:00:00 2001 From: Ankit Garg Date: Fri, 6 Mar 2026 22:48:16 +0000 Subject: [PATCH 0390/1561] gve: add support for UDP GSO for DQO format Enable support for UDP GSO when using DQO format. Advertise the feature flag during device initialization and enable offload by default. Signed-off-by: Ankit Garg Reviewed-by: Willem de Bruijn Signed-off-by: Harshitha Ramamurthy Link: https://patch.msgid.link/20260306224816.3391551-1-hramamurthy@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_adminq.c | 8 +++-- drivers/net/ethernet/google/gve/gve_tx_dqo.c | 33 ++++++++++++++------ 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 129f3e11a442..08587bf40ed4 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -1117,10 +1117,12 @@ int gve_adminq_describe_device(struct gve_priv *priv) gve_set_default_rss_sizes(priv); - /* DQO supports HW-GRO. */ + /* DQO supports HW-GRO and UDP_GSO */ if (gve_is_dqo(priv)) { - priv->dev->hw_features |= NETIF_F_GRO_HW; - priv->dev->features |= NETIF_F_GRO_HW; + u64 additional_features = NETIF_F_GRO_HW | NETIF_F_GSO_UDP_L4; + + priv->dev->hw_features |= additional_features; + priv->dev->features |= additional_features; } priv->max_registered_pages = diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index f33035018b89..80ab0a449ff5 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -570,9 +570,11 @@ static void gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring *tx, u32 *desc_idx, */ static int gve_prep_tso(struct sk_buff *skb) { + struct skb_shared_info *shinfo = skb_shinfo(skb); + u32 paylen, l4_start; struct tcphdr *tcp; + struct udphdr *udp; int header_len; - u32 paylen; int err; /* Note: HW requires MSS (gso_size) to be <= 9728 and the total length @@ -583,21 +585,34 @@ static int gve_prep_tso(struct sk_buff *skb) * - Kernel will not produce a TSO larger than 64k */ - if (unlikely(skb_shinfo(skb)->gso_size < GVE_TX_MIN_TSO_MSS_DQO)) + if (unlikely(shinfo->gso_size < GVE_TX_MIN_TSO_MSS_DQO)) return -1; - if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) - return -EINVAL; - /* Needed because we will modify header. */ err = skb_cow_head(skb, 0); if (err < 0) return err; - tcp = tcp_hdr(skb); - paylen = skb->len - skb_transport_offset(skb); - csum_replace_by_diff(&tcp->check, (__force __wsum)htonl(paylen)); - header_len = skb_tcp_all_headers(skb); + l4_start = skb_transport_offset(skb); + paylen = skb->len - l4_start; + + switch (shinfo->gso_type) { + case SKB_GSO_TCPV4: + case SKB_GSO_TCPV6: + tcp = tcp_hdr(skb); + csum_replace_by_diff(&tcp->check, + (__force __wsum)htonl(paylen)); + header_len = skb_tcp_all_headers(skb); + break; + case SKB_GSO_UDP_L4: + udp = udp_hdr(skb); + csum_replace_by_diff(&udp->check, + (__force __wsum)htonl(paylen)); + header_len = sizeof(struct udphdr) + l4_start; + break; + default: + return -EINVAL; + } if (unlikely(header_len > GVE_TX_MAX_HDR_SIZE_DQO)) return -EINVAL; From bf3471e6e6c02137dc0d26caa783ac1849f9aab8 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 6 Mar 2026 09:07:27 +0100 Subject: [PATCH 0391/1561] net: airoha: Make flow control source port mapping dependent on nbq parameter Flow control source port mapping for USB serdes needs to be configured according to the GDM port nbq parameter. This is a preliminary patch since nbq parameter is specific for the given port serdes and needs to be read from the DTS (in the current codebase is assigned statically). Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260306-airoha-fix-loopback-for-usb-serdes-v2-1-319de9c96826@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 10 ++++++---- drivers/net/ethernet/airoha/airoha_regs.h | 5 +---- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 09acbe669536..7b47d9e066ce 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -1729,10 +1729,12 @@ static int airhoha_set_gdm2_loopback(struct airoha_gdm_port *port) SP_CPORT_MASK(val), __field_prep(SP_CPORT_MASK(val), FE_PSE_PORT_CDM2)); - if (port->id != AIROHA_GDM3_IDX && airoha_is_7581(eth)) - airoha_fe_rmw(eth, REG_SRC_PORT_FC_MAP6, - FC_ID_OF_SRC_PORT24_MASK, - FIELD_PREP(FC_ID_OF_SRC_PORT24_MASK, 2)); + if (port->id == AIROHA_GDM4_IDX && airoha_is_7581(eth)) { + u32 mask = FC_ID_OF_SRC_PORT_MASK(nbq); + + airoha_fe_rmw(eth, REG_SRC_PORT_FC_MAP6, mask, + __field_prep(mask, AIROHA_GDM2_IDX)); + } return 0; } diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h index ed4e3407f4a0..29878b954c77 100644 --- a/drivers/net/ethernet/airoha/airoha_regs.h +++ b/drivers/net/ethernet/airoha/airoha_regs.h @@ -376,10 +376,7 @@ #define SP_CPORT_MASK(_n) GENMASK(3 + ((_n) << 2), ((_n) << 2)) #define REG_SRC_PORT_FC_MAP6 0x2298 -#define FC_ID_OF_SRC_PORT27_MASK GENMASK(28, 24) -#define FC_ID_OF_SRC_PORT26_MASK GENMASK(20, 16) -#define FC_ID_OF_SRC_PORT25_MASK GENMASK(12, 8) -#define FC_ID_OF_SRC_PORT24_MASK GENMASK(4, 0) +#define FC_ID_OF_SRC_PORT_MASK(_n) GENMASK(4 + ((_n) << 3), ((_n) << 3)) #define REG_CDM5_RX_OQ1_DROP_CNT 0x29d4 From e8eb33d650cd5e60b008f9d958262e489de6e7a9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 7 Mar 2026 09:22:14 +0000 Subject: [PATCH 0392/1561] tcp: move sysctl_tcp_shrink_window to netns_ipv4_read_txrx group Commit 18fd64d25422 ("netns-ipv4: reorganize netns_ipv4 fast path variables") missed that __tcp_select_window() is reading net->ipv4.sysctl_tcp_shrink_window. Move this field to netns_ipv4_read_txrx group, as __tcp_select_window() is used both in tx and rx paths. Saves a potential cache line miss. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260307092214.2433548-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/netns/ipv4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 4c249aeaf7f1..38624beff9b3 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -74,6 +74,7 @@ struct netns_ipv4 { /* TXRX readonly hotpath cache lines */ __cacheline_group_begin(netns_ipv4_read_txrx); + u8 sysctl_tcp_shrink_window; __cacheline_group_end(netns_ipv4_read_txrx); /* RX readonly hotpath cache line */ @@ -122,7 +123,6 @@ struct netns_ipv4 { #endif bool fib_has_custom_local_routes; bool fib_offload_disabled; - u8 sysctl_tcp_shrink_window; #ifdef CONFIG_IP_ROUTE_CLASSID atomic_t fib_num_tclassid_users; #endif From f2db7b80b03f268ff65fe825a7c761a8f551aa48 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 7 Mar 2026 13:36:01 +0000 Subject: [PATCH 0393/1561] net/sched: refine indirect call mitigation in tc_wrapper.h Some modern cpus disable X86_FEATURE_RETPOLINE feature, even if a direct call can still be beneficial. Even when IBRS is present, an indirect call is more expensive than a direct one: Direct Calls: Compilers can perform powerful optimizations like inlining, where the function body is directly inserted at the call site, eliminating call overhead entirely. Indirect Calls: Inlining is much harder, if not impossible, because the compiler doesn't know the target function at compile time. Techniques like Indirect Call Promotion can help by using profile-guided optimization to turn frequently taken indirect calls into conditional direct calls, but they still add complexity and potential overhead compared to a truly direct call. In this patch, I split tc_skip_wrapper in two different static keys, one for tc_act() (tc_skip_wrapper_act) and one for tc_classify() (tc_skip_wrapper_cls). Then I enable the tc_skip_wrapper_cls only if the count of builtin classifiers is above one. I enable tc_skip_wrapper_act only it the count of builtin actions is above one. In our production kernels, we only have CONFIG_NET_CLS_BPF=y and CONFIG_NET_ACT_BPF=y. Other are modules or are not compiled. Tested on AMD Turin cpus, cls_bpf_classify() cost went from 1% down to 0.18 %, and FDO will be able to inline it in tcf_classify() for further gains. Signed-off-by: Eric Dumazet Acked-by: Jamal Hadi Salim Reviewed-by: Pedro Tammela Reviewed-by: Victor Nogueira Link: https://patch.msgid.link/20260307133601.3863071-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tc_wrapper.h | 47 +++++++++++++++++++++++++++++++++++----- net/sched/sch_api.c | 3 ++- 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h index ffe58a02537c..4ebb053bb0dd 100644 --- a/include/net/tc_wrapper.h +++ b/include/net/tc_wrapper.h @@ -12,7 +12,8 @@ #define TC_INDIRECT_SCOPE -extern struct static_key_false tc_skip_wrapper; +extern struct static_key_false tc_skip_wrapper_act; +extern struct static_key_false tc_skip_wrapper_cls; /* TC Actions */ #ifdef CONFIG_NET_CLS_ACT @@ -46,7 +47,7 @@ TC_INDIRECT_ACTION_DECLARE(tunnel_key_act); static inline int tc_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { - if (static_branch_likely(&tc_skip_wrapper)) + if (static_branch_likely(&tc_skip_wrapper_act)) goto skip; #if IS_BUILTIN(CONFIG_NET_ACT_GACT) @@ -153,7 +154,7 @@ TC_INDIRECT_FILTER_DECLARE(u32_classify); static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { - if (static_branch_likely(&tc_skip_wrapper)) + if (static_branch_likely(&tc_skip_wrapper_cls)) goto skip; #if IS_BUILTIN(CONFIG_NET_CLS_BPF) @@ -202,8 +203,44 @@ skip: static inline void tc_wrapper_init(void) { #ifdef CONFIG_X86 - if (!cpu_feature_enabled(X86_FEATURE_RETPOLINE)) - static_branch_enable(&tc_skip_wrapper); + int cnt_cls = IS_BUILTIN(CONFIG_NET_CLS_BPF) + + IS_BUILTIN(CONFIG_NET_CLS_U32) + + IS_BUILTIN(CONFIG_NET_CLS_FLOWER) + + IS_BUILTIN(CONFIG_NET_CLS_FW) + + IS_BUILTIN(CONFIG_NET_CLS_MATCHALL) + + IS_BUILTIN(CONFIG_NET_CLS_BASIC) + + IS_BUILTIN(CONFIG_NET_CLS_CGROUP) + + IS_BUILTIN(CONFIG_NET_CLS_FLOW) + + IS_BUILTIN(CONFIG_NET_CLS_ROUTE4); + + int cnt_act = IS_BUILTIN(CONFIG_NET_ACT_GACT) + + IS_BUILTIN(CONFIG_NET_ACT_MIRRED) + + IS_BUILTIN(CONFIG_NET_ACT_PEDIT) + + IS_BUILTIN(CONFIG_NET_ACT_SKBEDIT) + + IS_BUILTIN(CONFIG_NET_ACT_SKBMOD) + + IS_BUILTIN(CONFIG_NET_ACT_POLICE) + + IS_BUILTIN(CONFIG_NET_ACT_BPF) + + IS_BUILTIN(CONFIG_NET_ACT_CONNMARK) + + IS_BUILTIN(CONFIG_NET_ACT_CSUM) + + IS_BUILTIN(CONFIG_NET_ACT_CT) + + IS_BUILTIN(CONFIG_NET_ACT_CTINFO) + + IS_BUILTIN(CONFIG_NET_ACT_GATE) + + IS_BUILTIN(CONFIG_NET_ACT_MPLS) + + IS_BUILTIN(CONFIG_NET_ACT_NAT) + + IS_BUILTIN(CONFIG_NET_ACT_TUNNEL_KEY) + + IS_BUILTIN(CONFIG_NET_ACT_VLAN) + + IS_BUILTIN(CONFIG_NET_ACT_IFE) + + IS_BUILTIN(CONFIG_NET_ACT_SIMP) + + IS_BUILTIN(CONFIG_NET_ACT_SAMPLE); + + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) + return; + + if (cnt_cls > 1) + static_branch_enable(&tc_skip_wrapper_cls); + + if (cnt_act > 1) + static_branch_enable(&tc_skip_wrapper_act); #endif } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c0bab092ea80..ed869a5ffc73 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -2479,7 +2479,8 @@ static struct pernet_operations psched_net_ops = { }; #if IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) -DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper); +DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper_act); +DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper_cls); #endif static const struct rtnl_msg_handler psched_rtnl_msg_handlers[] __initconst = { From 4b78c9cbd8f1fbb9517aee48b372646f4cf05442 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Mar 2026 12:23:02 +0000 Subject: [PATCH 0394/1561] tcp: move tp->chrono_type next tp->chrono_stat[] chrono_type is currently in tcp_sock_read_txrx group, which is supposed to hold read-mostly fields. But chrono_type is mostly written in tx path, it should be moved to tcp_sock_write_tx group, close to other chrono fields (chrono_stat[], chrono_start). Note this adds holes, but data locality is far more important. Use a full u8 for the time being, compiler can generate more efficient code. Signed-off-by: Eric Dumazet Reviewed-by: Neal Cardwell Link: https://patch.msgid.link/20260308122302.2895067-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/tcp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index f72eef31fa23..c44cf9ae8d16 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -228,8 +228,7 @@ struct tcp_sock { u32 sacked_out; /* SACK'd packets */ u16 tcp_header_len; /* Bytes of tcp header to send */ u8 scaling_ratio; /* see tcp_win_from_space() */ - u8 chrono_type : 2, /* current chronograph type */ - repair : 1, + u8 repair : 1, tcp_usec_ts : 1, /* TSval values in usec */ is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ @@ -264,6 +263,7 @@ struct tcp_sock { * total number of data bytes sent. */ u32 snd_sml; /* Last byte of the most recently transmitted small packet */ + u8 chrono_type; /* current chronograph type */ u32 chrono_start; /* Start time in jiffies of a TCP chrono */ u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ From d6d4ff335db2d9242937ca474d292010acd35c38 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Mar 2026 12:35:49 +0000 Subject: [PATCH 0395/1561] tcp: inline tcp_chrono_start() tcp_chrono_start() is small enough, and used in TCP sendmsg() fast path (from tcp_skb_entail()). Note clang is already inlining it from functions in tcp_output.c. Inlining it improves performance and reduces bloat : $ scripts/bloat-o-meter -t vmlinux.old vmlinux.new add/remove: 0/2 grow/shrink: 1/0 up/down: 1/-84 (-83) Function old new delta tcp_skb_entail 280 281 +1 __pfx_tcp_chrono_start 16 - -16 tcp_chrono_start 68 - -68 Total: Before=25192434, After=25192351, chg -0.00% Note that tcp_chrono_stop() is too big. Signed-off-by: Eric Dumazet Reviewed-by: Neal Cardwell Link: https://patch.msgid.link/20260308123549.2924460-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 25 ++++++++++++++++++++++++- net/ipv4/tcp_output.c | 24 ------------------------ 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index f07aef7faa65..9f0aee9e5d76 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2159,7 +2159,30 @@ enum tcp_chrono { __TCP_CHRONO_MAX, }; -void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type); +static inline void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new) +{ + const u32 now = tcp_jiffies32; + enum tcp_chrono old = tp->chrono_type; + + if (old > TCP_CHRONO_UNSPEC) + tp->chrono_stat[old - 1] += now - tp->chrono_start; + tp->chrono_start = now; + tp->chrono_type = new; +} + +static inline void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type) +{ + struct tcp_sock *tp = tcp_sk(sk); + + /* If there are multiple conditions worthy of tracking in a + * chronograph then the highest priority enum takes precedence + * over the other conditions. So that if something "more interesting" + * starts happening, stop the previous chrono and start a new one. + */ + if (type > tp->chrono_type) + tcp_chrono_set(tp, type); +} + void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type); /* This helper is needed, because skb->tcp_tsorted_anchor uses diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4377b3673da9..a53802f28dd1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2903,30 +2903,6 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, return false; } -static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new) -{ - const u32 now = tcp_jiffies32; - enum tcp_chrono old = tp->chrono_type; - - if (old > TCP_CHRONO_UNSPEC) - tp->chrono_stat[old - 1] += now - tp->chrono_start; - tp->chrono_start = now; - tp->chrono_type = new; -} - -void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type) -{ - struct tcp_sock *tp = tcp_sk(sk); - - /* If there are multiple conditions worthy of tracking in a - * chronograph then the highest priority enum takes precedence - * over the other conditions. So that if something "more interesting" - * starts happening, stop the previous chrono and start a new one. - */ - if (type > tp->chrono_type) - tcp_chrono_set(tp, type); -} - void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type) { struct tcp_sock *tp = tcp_sk(sk); From 56acc7f51974c824142dd0a529b1ce05ec5fa75a Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Sat, 7 Mar 2026 19:07:24 -0300 Subject: [PATCH 0396/1561] selftests/tc-testing: Adapt test's output to HFSC's iproute2 printing changes To make the printing of HFSC's defcls consistent with HTB's, iproute2 is now printing defcls prepended with "0x". This commit adapts test a4c3 to this change. Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20260307220724.2501212-1-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 6a39640aa2a8..ec0f85af6379 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -570,7 +570,7 @@ "cmdUnderTest": "$TC class change dev $DUMMY parent 3:0 classid 3:1 hfsc sc m1 5Mbit d 10ms m2 10Mbit", "expExitCode": "0", "verifyCmd": "$TC -s qdisc show dev $DUMMY", - "matchPattern": "qdisc hfsc 3:.*parent 1:2.*default 1", + "matchPattern": "qdisc hfsc 3:.*parent 1:2.*default 0x1", "matchCount": "1", "teardown": [ "$TC qdisc del dev $DUMMY handle 1:0 root", From c127d4087930cc4706b99c9431e35ba00776a634 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 7 Mar 2026 10:55:08 +0000 Subject: [PATCH 0397/1561] net: stmmac: remove stmmac_dwmac4_get_mac_addr() stmmac_dwmac4_get_mac_addr() is identical to stmmac_get_mac_addr(). Remove stmmac_dwmac4_get_mac_addr() to avoid this code duplication. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vypJM-0000000CSiJ-48yO@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac4_core.c | 4 ++-- .../net/ethernet/stmicro/stmmac/dwmac4_lib.c | 18 ------------------ 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 602771e19d0f..e6bcb77b22a2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -373,8 +373,8 @@ static void dwmac4_get_umac_addr(struct mac_device_info *hw, { void __iomem *ioaddr = hw->pcsr; - stmmac_dwmac4_get_mac_addr(ioaddr, addr, GMAC_ADDR_HIGH(reg_n), - GMAC_ADDR_LOW(reg_n)); + stmmac_get_mac_addr(ioaddr, addr, GMAC_ADDR_HIGH(reg_n), + GMAC_ADDR_LOW(reg_n)); } static int dwmac4_set_lpi_mode(struct mac_device_info *hw, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c index 8c87a20880c4..a0249715fafa 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c @@ -226,21 +226,3 @@ void stmmac_dwmac4_set_mac(void __iomem *ioaddr, bool enable) if (value != old_val) writel(value, ioaddr + GMAC_CONFIG); } - -void stmmac_dwmac4_get_mac_addr(void __iomem *ioaddr, unsigned char *addr, - unsigned int high, unsigned int low) -{ - unsigned int hi_addr, lo_addr; - - /* Read the MAC address from the hardware */ - hi_addr = readl(ioaddr + high); - lo_addr = readl(ioaddr + low); - - /* Extract the MAC address from the high and low words */ - addr[0] = lo_addr & 0xff; - addr[1] = (lo_addr >> 8) & 0xff; - addr[2] = (lo_addr >> 16) & 0xff; - addr[3] = (lo_addr >> 24) & 0xff; - addr[4] = hi_addr & 0xff; - addr[5] = (hi_addr >> 8) & 0xff; -} From b560d4434f98b5a8f7d04e5ec1a515a21aa8d914 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 7 Mar 2026 10:53:15 +0000 Subject: [PATCH 0398/1561] net: stmmac: ptp: rearrange n_ext_ts initialisation Use local variables for n_ext_ts rather than referencing the DMA capability several times. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vypHX-0000000CSbc-123K@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 98da499ba3b1..654d04f8c373 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -335,6 +335,7 @@ const struct ptp_clock_info dwmac1000_ptp_clock_ops = { void stmmac_ptp_register(struct stmmac_priv *priv) { unsigned int pps_out_num = priv->dma_cap.pps_out_num; + unsigned int n_ext_ts; int i; if (pps_out_num > STMMAC_PPS_MAX) { @@ -358,8 +359,9 @@ void stmmac_ptp_register(struct stmmac_priv *priv) if (pps_out_num) priv->ptp_clock_ops.n_per_out = pps_out_num; - if (priv->dma_cap.aux_snapshot_n) - priv->ptp_clock_ops.n_ext_ts = priv->dma_cap.aux_snapshot_n; + n_ext_ts = priv->dma_cap.aux_snapshot_n; + if (n_ext_ts) + priv->ptp_clock_ops.n_ext_ts = n_ext_ts; if (priv->plat->ptp_max_adj) priv->ptp_clock_ops.max_adj = priv->plat->ptp_max_adj; From 687e7863f027426175791f8a23a59b7c4c816fe9 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 7 Mar 2026 10:53:20 +0000 Subject: [PATCH 0399/1561] net: stmmac: ptp: remove redundant priv->pps[].available priv->pps[].available is set in stmmac_ptp_register() for all PPS outputs reported by hardware up to STMMAC_PPS_MAX. Since we now set priv->ptp_clock_ops.n_per_out to the number of PPS outputs that both the hardware and driver can support to prevent array overflow in stmmac_enable(), this makes priv->pps[].available redundant. Remove this struct member. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vypHc-0000000CSbl-1X6v@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac5.c | 2 -- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 2 -- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 1 - drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 4 ---- 4 files changed, 9 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c index 1c431b918719..f1bb981cab7c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c @@ -525,8 +525,6 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index, u32 val = readl(ioaddr + MAC_PPS_CONTROL); u64 period; - if (!cfg->available) - return -EINVAL; if (tnsec & TRGTBUSY0) return -EBUSY; if (!sub_second_inc || !systime_flags) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 915e7c2ab11f..efa76b147f9e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -1162,8 +1162,6 @@ static int dwxgmac2_flex_pps_config(void __iomem *ioaddr, int index, u32 val = readl(ioaddr + XGMAC_PPS_CONTROL); u64 period; - if (!cfg->available) - return -EINVAL; if (tnsec & XGMAC_TRGTBUSY0) return -EBUSY; if (!sub_second_inc || !systime_flags) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 1fe96cd24b4f..335e60439b42 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -181,7 +181,6 @@ struct stmmac_tc_entry { #define STMMAC_PPS_MAX 4 struct stmmac_pps_cfg { - bool available; struct timespec64 start; struct timespec64 period; }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 654d04f8c373..960249960004 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -336,7 +336,6 @@ void stmmac_ptp_register(struct stmmac_priv *priv) { unsigned int pps_out_num = priv->dma_cap.pps_out_num; unsigned int n_ext_ts; - int i; if (pps_out_num > STMMAC_PPS_MAX) { dev_warn(priv->device, @@ -345,9 +344,6 @@ void stmmac_ptp_register(struct stmmac_priv *priv) pps_out_num = STMMAC_PPS_MAX; } - for (i = 0; i < pps_out_num; i++) - priv->pps[i].available = true; - /* Calculate the clock domain crossing (CDC) error if necessary */ priv->plat->cdc_error_adj = 0; if (priv->plat->core_type == DWMAC_CORE_GMAC4) From 46097d011f77f5758fb47b7059b4f1f2e7403940 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 6 Mar 2026 16:09:47 +0100 Subject: [PATCH 0400/1561] net: airoha: Move GDM forward port configuration in ndo_open/ndo_stop callbacks This change allows to set GDM forward port configuration to FE_PSE_PORT_DROP stopping the network device. Hw design requires to stop packet forwarding putting the interface down. Moreover, PPE firmware requires to use PPE1 for GDM3 or GDM4. Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260306-airoha-gdm-forward-ndo-open-stop-v1-1-7b7a20dd9ef0@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_eth.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 7b47d9e066ce..3e9ec5c178f8 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -1611,6 +1611,7 @@ static int airoha_dev_open(struct net_device *dev) int err, len = ETH_HLEN + dev->mtu + ETH_FCS_LEN; struct airoha_gdm_port *port = netdev_priv(dev); struct airoha_qdma *qdma = port->qdma; + u32 pse_port = FE_PSE_PORT_PPE1; netif_tx_start_all_queues(dev); err = airoha_set_vip_for_gdm_port(port, true); @@ -1634,6 +1635,14 @@ static int airoha_dev_open(struct net_device *dev) GLOBAL_CFG_RX_DMA_EN_MASK); atomic_inc(&qdma->users); + if (port->id == AIROHA_GDM2_IDX && + airoha_ppe_is_enabled(qdma->eth, 1)) { + /* For PPE2 always use secondary cpu port. */ + pse_port = FE_PSE_PORT_PPE2; + } + airoha_set_gdm_port_fwd_cfg(qdma->eth, REG_GDM_FWD_CFG(port->id), + pse_port); + return 0; } @@ -1651,6 +1660,9 @@ static int airoha_dev_stop(struct net_device *dev) for (i = 0; i < ARRAY_SIZE(qdma->q_tx); i++) netdev_tx_reset_subqueue(dev, i); + airoha_set_gdm_port_fwd_cfg(qdma->eth, REG_GDM_FWD_CFG(port->id), + FE_PSE_PORT_DROP); + if (atomic_dec_and_test(&qdma->users)) { airoha_qdma_clear(qdma, REG_QDMA_GLOBAL_CFG, GLOBAL_CFG_TX_DMA_EN_MASK | @@ -1744,7 +1756,7 @@ static int airoha_dev_init(struct net_device *dev) struct airoha_gdm_port *port = netdev_priv(dev); struct airoha_qdma *qdma = port->qdma; struct airoha_eth *eth = qdma->eth; - u32 pse_port, fe_cpu_port; + u32 fe_cpu_port; u8 ppe_id; airoha_set_macaddr(port, dev->dev_addr); @@ -1765,7 +1777,7 @@ static int airoha_dev_init(struct net_device *dev) if (airoha_ppe_is_enabled(eth, 1)) { /* For PPE2 always use secondary cpu port. */ fe_cpu_port = FE_PSE_PORT_CDM2; - pse_port = FE_PSE_PORT_PPE2; + ppe_id = 1; break; } fallthrough; @@ -1774,13 +1786,11 @@ static int airoha_dev_init(struct net_device *dev) /* For PPE1 select cpu port according to the running QDMA. */ fe_cpu_port = qdma_id ? FE_PSE_PORT_CDM2 : FE_PSE_PORT_CDM1; - pse_port = FE_PSE_PORT_PPE1; + ppe_id = 0; break; } } - airoha_set_gdm_port_fwd_cfg(eth, REG_GDM_FWD_CFG(port->id), pse_port); - ppe_id = pse_port == FE_PSE_PORT_PPE2 ? 1 : 0; airoha_fe_rmw(eth, REG_PPE_DFT_CPORT0(ppe_id), DFT_CPORT_MASK(port->id), __field_prep(DFT_CPORT_MASK(port->id), fe_cpu_port)); From 89fe91c65992a37863241e35aec151210efc53ce Mon Sep 17 00:00:00 2001 From: Erni Sri Satya Vennela Date: Fri, 6 Mar 2026 13:12:06 -0800 Subject: [PATCH 0401/1561] net: mana: hardening: Validate doorbell ID from GDMA_REGISTER_DEVICE response As a part of MANA hardening for CVM, add validation for the doorbell ID (db_id) received from hardware in the GDMA_REGISTER_DEVICE response to prevent out-of-bounds memory access when calculating the doorbell page address. In mana_gd_ring_doorbell(), the doorbell page address is calculated as: addr = db_page_base + db_page_size * db_index = (bar0_va + db_page_off) + db_page_size * db_index A hardware could return values that cause this address to fall outside the BAR0 MMIO region. In Confidential VM environments, hardware responses cannot be fully trusted. Add the following validations: - Store the BAR0 size (bar0_size) in gdma_context during probe. - Validate the doorbell page offset (db_page_off) read from device registers does not exceed bar0_size during initialization, converting mana_gd_init_registers() to return an error code. - Validate db_id from GDMA_REGISTER_DEVICE response against the maximum number of doorbell pages that fit within BAR0. Signed-off-by: Erni Sri Satya Vennela Link: https://patch.msgid.link/20260306211212.543376-1-ernis@linux.microsoft.com Signed-off-by: Paolo Abeni --- .../net/ethernet/microsoft/mana/gdma_main.c | 60 ++++++++++++++----- include/net/mana/gdma.h | 4 +- 2 files changed, 49 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index aef8612b73cb..ef0dbfaac8f4 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -39,49 +39,66 @@ static u64 mana_gd_r64(struct gdma_context *g, u64 offset) return readq(g->bar0_va + offset); } -static void mana_gd_init_pf_regs(struct pci_dev *pdev) +static int mana_gd_init_pf_regs(struct pci_dev *pdev) { struct gdma_context *gc = pci_get_drvdata(pdev); void __iomem *sriov_base_va; u64 sriov_base_off; gc->db_page_size = mana_gd_r32(gc, GDMA_PF_REG_DB_PAGE_SIZE) & 0xFFFF; - gc->db_page_base = gc->bar0_va + - mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF); + gc->db_page_off = mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF); - gc->phys_db_page_base = gc->bar0_pa + - mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF); + /* Validate doorbell offset is within BAR0 */ + if (gc->db_page_off >= gc->bar0_size) { + dev_err(gc->dev, + "Doorbell offset 0x%llx exceeds BAR0 size 0x%llx\n", + gc->db_page_off, (u64)gc->bar0_size); + return -EPROTO; + } + + gc->db_page_base = gc->bar0_va + gc->db_page_off; + gc->phys_db_page_base = gc->bar0_pa + gc->db_page_off; sriov_base_off = mana_gd_r64(gc, GDMA_SRIOV_REG_CFG_BASE_OFF); sriov_base_va = gc->bar0_va + sriov_base_off; gc->shm_base = sriov_base_va + mana_gd_r64(gc, sriov_base_off + GDMA_PF_REG_SHM_OFF); + + return 0; } -static void mana_gd_init_vf_regs(struct pci_dev *pdev) +static int mana_gd_init_vf_regs(struct pci_dev *pdev) { struct gdma_context *gc = pci_get_drvdata(pdev); gc->db_page_size = mana_gd_r32(gc, GDMA_REG_DB_PAGE_SIZE) & 0xFFFF; + gc->db_page_off = mana_gd_r64(gc, GDMA_REG_DB_PAGE_OFFSET); - gc->db_page_base = gc->bar0_va + - mana_gd_r64(gc, GDMA_REG_DB_PAGE_OFFSET); + /* Validate doorbell offset is within BAR0 */ + if (gc->db_page_off >= gc->bar0_size) { + dev_err(gc->dev, + "Doorbell offset 0x%llx exceeds BAR0 size 0x%llx\n", + gc->db_page_off, (u64)gc->bar0_size); + return -EPROTO; + } - gc->phys_db_page_base = gc->bar0_pa + - mana_gd_r64(gc, GDMA_REG_DB_PAGE_OFFSET); + gc->db_page_base = gc->bar0_va + gc->db_page_off; + gc->phys_db_page_base = gc->bar0_pa + gc->db_page_off; gc->shm_base = gc->bar0_va + mana_gd_r64(gc, GDMA_REG_SHM_OFFSET); + + return 0; } -static void mana_gd_init_registers(struct pci_dev *pdev) +static int mana_gd_init_registers(struct pci_dev *pdev) { struct gdma_context *gc = pci_get_drvdata(pdev); if (gc->is_pf) - mana_gd_init_pf_regs(pdev); + return mana_gd_init_pf_regs(pdev); else - mana_gd_init_vf_regs(pdev); + return mana_gd_init_vf_regs(pdev); } /* Suppress logging when we set timeout to zero */ @@ -1256,6 +1273,17 @@ int mana_gd_register_device(struct gdma_dev *gd) return err ? err : -EPROTO; } + /* Validate that doorbell page for db_id is within the BAR0 region. + * In mana_gd_ring_doorbell(), the address is calculated as: + * addr = db_page_base + db_page_size * db_id + * = (bar0_va + db_page_off) + (db_page_size * db_id) + * So we need: db_page_off + db_page_size * (db_id + 1) <= bar0_size + */ + if (gc->db_page_off + gc->db_page_size * ((u64)resp.db_id + 1) > gc->bar0_size) { + dev_err(gc->dev, "Doorbell ID %u out of range\n", resp.db_id); + return -EPROTO; + } + gd->pdid = resp.pdid; gd->gpa_mkey = resp.gpa_mkey; gd->doorbell = resp.db_id; @@ -1890,7 +1918,10 @@ static int mana_gd_setup(struct pci_dev *pdev) struct gdma_context *gc = pci_get_drvdata(pdev); int err; - mana_gd_init_registers(pdev); + err = mana_gd_init_registers(pdev); + if (err) + return err; + mana_smc_init(&gc->shm_channel, gc->dev, gc->shm_base); gc->service_wq = alloc_ordered_workqueue("gdma_service_wq", 0); @@ -1996,6 +2027,7 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) mutex_init(&gc->eq_test_event_mutex); pci_set_drvdata(pdev, gc); gc->bar0_pa = pci_resource_start(pdev, 0); + gc->bar0_size = pci_resource_len(pdev, 0); bar0_va = pci_iomap(pdev, bar, 0); if (!bar0_va) diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index ec17004b10c0..7fe3a1b61b2d 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -421,10 +421,12 @@ struct gdma_context { phys_addr_t bar0_pa; void __iomem *bar0_va; + resource_size_t bar0_size; void __iomem *shm_base; void __iomem *db_page_base; phys_addr_t phys_db_page_base; - u32 db_page_size; + u64 db_page_off; + u64 db_page_size; int numa_node; /* Shared memory chanenl (used to bootstrap HWC) */ From 3fdd33697c2be9184668c89ba4f24a5ecbc8ec51 Mon Sep 17 00:00:00 2001 From: "Mike Marciniszyn (Meta)" Date: Sat, 7 Mar 2026 05:58:43 -0500 Subject: [PATCH 0402/1561] net: export netif_open for self_test usage dev_open() already is exported, but drivers which use the netdev instance lock need to use netif_open() instead. netif_close() is also already exported [1] so this completes the pairing. This export is required for the following fbnic self tests to avoid calling ndo_stop() and ndo_open() in favor of the more appropriate netif_open() and netif_close() that notifies any listeners that the interface went down to test and is now coming back up. Link: https://patch.msgid.link/20250309215851.2003708-1-sdf@fomichev.me [1] Signed-off-by: Mike Marciniszyn (Meta) Link: https://patch.msgid.link/20260307105847.1438-2-mike.marciniszyn@gmail.com Signed-off-by: Paolo Abeni --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/dev.c b/net/core/dev.c index 6fc9350f0be8..f48dc299e4b2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1731,6 +1731,7 @@ int netif_open(struct net_device *dev, struct netlink_ext_ack *extack) return ret; } +EXPORT_SYMBOL(netif_open); static void __dev_close_many(struct list_head *head) { From b43498b7e9be0ef76440980642b2bf20dc213e19 Mon Sep 17 00:00:00 2001 From: "Mike Marciniszyn (Meta)" Date: Sat, 7 Mar 2026 05:58:44 -0500 Subject: [PATCH 0403/1561] eth fbnic: Add register self test The register test will be used to verify hardware is behaving as expected. The test itself will have us writing to registers that should have no side effects due to us resetting after the test has been completed. While the test is being run the interface should be offline. This patch counts on the first patch of this series to export netif_open() and also ensures that the half close calls netif_close() to avoid deadlock. Signed-off-by: Mike Marciniszyn (Meta) Link: https://patch.msgid.link/20260307105847.1438-3-mike.marciniszyn@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_csr.c | 128 ++++++++++++++++++ drivers/net/ethernet/meta/fbnic/fbnic_csr.h | 19 +++ .../net/ethernet/meta/fbnic/fbnic_ethtool.c | 50 +++++++ 3 files changed, 197 insertions(+) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.c b/drivers/net/ethernet/meta/fbnic/fbnic_csr.c index d9c0dc1c2af9..dc62d623e37c 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.c @@ -147,3 +147,131 @@ int fbnic_csr_regs_len(struct fbnic_dev *fbd) return len; } + +/* CSR register test data + * + * The register test will be used to verify hardware is behaving as expected. + * + * The test itself will have us writing to registers that should have no + * side effects due to us resetting after the test has been completed. + * While the test is being run the interface should be offline. + */ +struct fbnic_csr_reg_test_data { + int reg; + u16 reg_offset; + u8 array_len; + u32 read; + u32 write; +}; + +#define FBNIC_QUEUE_REG_TEST(_name, _read, _write) { \ + .reg = FBNIC_QUEUE(0) + FBNIC_QUEUE_##_name, \ + .reg_offset = FBNIC_QUEUE_STRIDE, \ + .array_len = 64, \ + .read = _read, \ + .write = _write \ +} + +static const struct fbnic_csr_reg_test_data pattern_test[] = { + FBNIC_QUEUE_REG_TEST(TWQ0_CTL, FBNIC_QUEUE_TWQ_CTL_RESET, + FBNIC_QUEUE_TWQ_CTL_RESET), + FBNIC_QUEUE_REG_TEST(TWQ0_PTRS, 0, ~0), + FBNIC_QUEUE_REG_TEST(TWQ0_SIZE, FBNIC_QUEUE_TWQ_SIZE_MASK, ~0), + FBNIC_QUEUE_REG_TEST(TWQ0_BAL, FBNIC_QUEUE_BAL_MASK, ~0), + FBNIC_QUEUE_REG_TEST(TWQ0_BAH, ~0, ~0), + FBNIC_QUEUE_REG_TEST(TWQ1_CTL, FBNIC_QUEUE_TWQ_CTL_RESET, + FBNIC_QUEUE_TWQ_CTL_RESET), + FBNIC_QUEUE_REG_TEST(TWQ1_PTRS, 0, ~0), + FBNIC_QUEUE_REG_TEST(TWQ1_SIZE, FBNIC_QUEUE_TWQ_SIZE_MASK, ~0), + FBNIC_QUEUE_REG_TEST(TWQ1_BAL, FBNIC_QUEUE_BAL_MASK, ~0), + FBNIC_QUEUE_REG_TEST(TWQ1_BAH, ~0, ~0), + FBNIC_QUEUE_REG_TEST(TCQ_CTL, FBNIC_QUEUE_TCQ_CTL_RESET, + FBNIC_QUEUE_TCQ_CTL_RESET), + FBNIC_QUEUE_REG_TEST(TCQ_PTRS, 0, ~0), + FBNIC_QUEUE_REG_TEST(TCQ_SIZE, FBNIC_QUEUE_TCQ_SIZE_MASK, ~0), + FBNIC_QUEUE_REG_TEST(TCQ_BAL, FBNIC_QUEUE_BAL_MASK, ~0), + FBNIC_QUEUE_REG_TEST(TCQ_BAH, ~0, ~0), + FBNIC_QUEUE_REG_TEST(RCQ_CTL, FBNIC_QUEUE_RCQ_CTL_RESET, + FBNIC_QUEUE_RCQ_CTL_RESET), + FBNIC_QUEUE_REG_TEST(RCQ_PTRS, 0, ~0), + FBNIC_QUEUE_REG_TEST(RCQ_SIZE, FBNIC_QUEUE_RCQ_SIZE_MASK, ~0), + FBNIC_QUEUE_REG_TEST(RCQ_BAL, FBNIC_QUEUE_BAL_MASK, ~0), + FBNIC_QUEUE_REG_TEST(RCQ_BAH, ~0, ~0), + FBNIC_QUEUE_REG_TEST(BDQ_CTL, FBNIC_QUEUE_BDQ_CTL_RESET, + FBNIC_QUEUE_BDQ_CTL_RESET), + FBNIC_QUEUE_REG_TEST(BDQ_HPQ_PTRS, 0, ~0), + FBNIC_QUEUE_REG_TEST(BDQ_HPQ_SIZE, FBNIC_QUEUE_BDQ_SIZE_MASK, ~0), + FBNIC_QUEUE_REG_TEST(BDQ_HPQ_BAL, FBNIC_QUEUE_BAL_MASK, ~0), + FBNIC_QUEUE_REG_TEST(BDQ_HPQ_BAH, ~0, ~0), + FBNIC_QUEUE_REG_TEST(BDQ_PPQ_PTRS, 0, ~0), + FBNIC_QUEUE_REG_TEST(BDQ_PPQ_SIZE, FBNIC_QUEUE_BDQ_SIZE_MASK, ~0), + FBNIC_QUEUE_REG_TEST(BDQ_PPQ_BAL, FBNIC_QUEUE_BAL_MASK, ~0), + FBNIC_QUEUE_REG_TEST(BDQ_PPQ_BAH, ~0, ~0), +}; + +static enum fbnic_reg_self_test_codes +fbnic_csr_reg_pattern_test(struct fbnic_dev *fbd, int index, + const struct fbnic_csr_reg_test_data *test_data) +{ + static const u32 pattern[] = { ~0, 0x5A5A5A5A, 0xA5A5A5A5, 0}; + enum fbnic_reg_self_test_codes reg; + int i; + + reg = test_data->reg + test_data->reg_offset * index; + for (i = 0; i < ARRAY_SIZE(pattern); i++) { + u32 val = pattern[i] & test_data->write; + u32 result; + + wr32(fbd, reg, val); + result = rd32(fbd, reg); + val &= test_data->read; + + if (result == val) + continue; + + dev_err(fbd->dev, + "%s: reg 0x%06X failed, expected 0x%08X received 0x%08X\n", + __func__, reg, val, result); + + /* Note that FBNIC_INTR_STATUS(0) could be tested and fail + * and the result would not be reported since the register + * offset is 0. However as that register isn't included in + * the register test that isn't an issue. + */ + return reg; + } + + return FBNIC_REG_TEST_SUCCESS; +} + +/** + * fbnic_csr_regs_test() - Verify behavior of NIC registers + * @fbd: device to test + * + * This function is meant to test the bit values of various registers in + * the NIC device. Specifically this test will verify which bits are + * writable and which ones are not. It will write varying patterns of bits + * to the registers testing for sticky bits, or bits that are writable but + * should not be. + * + * Return: FBNIC_REG_TEST_SUCCESS on success, register number on failure + **/ +enum fbnic_reg_self_test_codes fbnic_csr_regs_test(struct fbnic_dev *fbd) +{ + const struct fbnic_csr_reg_test_data *test_data; + + for (test_data = pattern_test; + test_data < pattern_test + ARRAY_SIZE(pattern_test); test_data++) { + u32 i; + + for (i = 0; i < test_data->array_len; i++) { + enum fbnic_reg_self_test_codes reg = + fbnic_csr_reg_pattern_test(fbd, i, test_data); + + if (reg) + return reg; + } + } + + return FBNIC_REG_TEST_SUCCESS; +} diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h index 72eb22a52572..43de522af172 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h @@ -6,6 +6,8 @@ #include +struct fbnic_dev; + #define CSR_BIT(nr) (1u << (nr)) #define CSR_GENMASK(h, l) GENMASK(h, l) @@ -1221,4 +1223,21 @@ enum{ FBNIC_CSR_VERSION_V1_0_ASIC = 1, }; +/** + * enum fbnic_reg_self_test_codes - return codes from self test routines + * + * This is the code that is returned from the register self test + * routines. + * + * The test either returns success or the register number + * that failed during the test. + * + * @FBNIC_REG_TEST_SUCCESS: no errors + */ +enum fbnic_reg_self_test_codes { + FBNIC_REG_TEST_SUCCESS = 0, +}; + +enum fbnic_reg_self_test_codes fbnic_csr_regs_test(struct fbnic_dev *fbd); + #endif /* _FBNIC_CSR_H_ */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c index 70c995b8d1bd..cad963a3d825 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c @@ -126,6 +126,16 @@ static const struct fbnic_stat fbnic_gstrings_xdp_stats[] = { #define FBNIC_STATS_LEN \ (FBNIC_HW_STATS_LEN + FBNIC_XDP_STATS_LEN * FBNIC_MAX_XDPQS) +enum fbnic_self_test_results { + TEST_REG = 0, +}; + +static const char fbnic_gstrings_self_test[][ETH_GSTRING_LEN] = { + [TEST_REG] = "Register test (offline)", +}; + +#define FBNIC_TEST_LEN ARRAY_SIZE(fbnic_gstrings_self_test) + static void fbnic_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { @@ -475,6 +485,10 @@ static void fbnic_get_strings(struct net_device *dev, u32 sset, u8 *data) for (i = 0; i < FBNIC_MAX_XDPQS; i++) fbnic_get_xdp_queue_strings(&data, i); break; + case ETH_SS_TEST: + memcpy(data, fbnic_gstrings_self_test, + sizeof(fbnic_gstrings_self_test)); + break; } } @@ -566,6 +580,8 @@ static int fbnic_get_sset_count(struct net_device *dev, int sset) switch (sset) { case ETH_SS_STATS: return FBNIC_STATS_LEN; + case ETH_SS_TEST: + return FBNIC_TEST_LEN; default: return -EOPNOTSUPP; } @@ -1478,6 +1494,39 @@ fbnic_remove_rxfh_context(struct net_device *netdev, return 0; } +static int fbnic_ethtool_regs_test(struct net_device *netdev, u64 *data) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + struct fbnic_dev *fbd = fbn->fbd; + + *data = fbnic_csr_regs_test(fbd); + + return !!*data; +} + +static void fbnic_self_test(struct net_device *netdev, + struct ethtool_test *eth_test, u64 *data) +{ + bool if_running = netif_running(netdev); + + if (!(eth_test->flags & ETH_TEST_FL_OFFLINE)) { + data[TEST_REG] = 0; + return; + } + + if (if_running) + netif_close(netdev); + + if (fbnic_ethtool_regs_test(netdev, &data[TEST_REG])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + if (if_running && netif_open(netdev, NULL)) { + netdev_err(netdev, + "Failed to re-initialize hardware following test\n"); + eth_test->flags |= ETH_TEST_FL_FAILED; + } +} + static void fbnic_get_channels(struct net_device *netdev, struct ethtool_channels *ch) { @@ -1940,6 +1989,7 @@ static const struct ethtool_ops fbnic_ethtool_ops = { .get_pause_stats = fbnic_get_pause_stats, .get_pauseparam = fbnic_phylink_get_pauseparam, .set_pauseparam = fbnic_phylink_set_pauseparam, + .self_test = fbnic_self_test, .get_strings = fbnic_get_strings, .get_ethtool_stats = fbnic_get_ethtool_stats, .get_sset_count = fbnic_get_sset_count, From 99fc8d3d00c94e32c1a8b60783a0d24421053446 Mon Sep 17 00:00:00 2001 From: "Mike Marciniszyn (Meta)" Date: Sat, 7 Mar 2026 05:58:45 -0500 Subject: [PATCH 0404/1561] eth fbnic: Add msix self test This function is meant to test the global interrupt registers and the PCIe IP MSI-X functionality. It essentially goes through and tests various combinations of the set, clear, and mask bits in order to verify the behavior is as we expect it to be from the driver. Signed-off-by: Mike Marciniszyn (Meta) Link: https://patch.msgid.link/20260307105847.1438-4-mike.marciniszyn@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic.h | 32 ++++ .../net/ethernet/meta/fbnic/fbnic_ethtool.c | 28 ++++ drivers/net/ethernet/meta/fbnic/fbnic_irq.c | 154 ++++++++++++++++++ 3 files changed, 214 insertions(+) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index a760a27b1516..f7df5302e91a 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -197,6 +197,38 @@ void fbnic_synchronize_irq(struct fbnic_dev *fbd, int nr); int fbnic_request_irq(struct fbnic_dev *dev, int nr, irq_handler_t handler, unsigned long flags, const char *name, void *data); void fbnic_free_irq(struct fbnic_dev *dev, int nr, void *data); + +/** + * enum fbnic_msix_self_test_codes - return codes from self test routines + * + * These are the codes returned from the self test routines and + * stored in the test result array indexed by the specific + * test name. + * + * @FBNIC_TEST_MSIX_SUCCESS: no errors + * @FBNIC_TEST_MSIX_NOMEM: allocation failure + * @FBNIC_TEST_MSIX_IRQ_REQ_FAIL: IRQ request failure + * @FBNIC_TEST_MSIX_MASK: masking failed to prevent IRQ + * @FBNIC_TEST_MSIX_UNMASK: unmasking failure w/ sw status set + * @FBNIC_TEST_MSIX_IRQ_CLEAR: interrupt when clearing mask + * @FBNIC_TEST_MSIX_NO_INTERRUPT: no interrupt when not masked + * @FBNIC_TEST_MSIX_NO_CLEAR_OR_MASK: status not cleared, or mask not set + * @FBNIC_TEST_MSIX_BITS_SET_AFTER_TEST: Bits are set after test + */ +enum fbnic_msix_self_test_codes { + FBNIC_TEST_MSIX_SUCCESS = 0, + FBNIC_TEST_MSIX_NOMEM = 5, + FBNIC_TEST_MSIX_IRQ_REQ_FAIL = 10, + FBNIC_TEST_MSIX_MASK = 20, + FBNIC_TEST_MSIX_UNMASK = 30, + FBNIC_TEST_MSIX_IRQ_CLEAR = 40, + FBNIC_TEST_MSIX_NO_INTERRUPT = 50, + FBNIC_TEST_MSIX_NO_CLEAR_OR_MASK = 60, + FBNIC_TEST_MSIX_BITS_SET_AFTER_TEST = 70, +}; + +enum fbnic_msix_self_test_codes fbnic_msix_test(struct fbnic_dev *fbd); + void fbnic_free_irqs(struct fbnic_dev *fbd); int fbnic_alloc_irqs(struct fbnic_dev *fbd); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c index cad963a3d825..b5012601b102 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c @@ -128,10 +128,12 @@ static const struct fbnic_stat fbnic_gstrings_xdp_stats[] = { enum fbnic_self_test_results { TEST_REG = 0, + TEST_MSIX, }; static const char fbnic_gstrings_self_test[][ETH_GSTRING_LEN] = { [TEST_REG] = "Register test (offline)", + [TEST_MSIX] = "MSI-X Interrupt test (offline)", }; #define FBNIC_TEST_LEN ARRAY_SIZE(fbnic_gstrings_self_test) @@ -1504,6 +1506,28 @@ static int fbnic_ethtool_regs_test(struct net_device *netdev, u64 *data) return !!*data; } +/** + * fbnic_ethtool_msix_test - Verify behavior of NIC interrupts + * @netdev: netdev device to test + * @data: Pointer to results storage + * + * This function is meant to test the global interrupt registers and the + * PCIe IP MSI-X functionality. It essentially goes through and tests + * test various combinations of the set, clear, and mask bits in order to + * verify the behavior is as we expect it to be from the driver. + * + * Return: non-zero on failure. + **/ +static int fbnic_ethtool_msix_test(struct net_device *netdev, u64 *data) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + struct fbnic_dev *fbd = fbn->fbd; + + *data = fbnic_msix_test(fbd); + + return !!*data; +} + static void fbnic_self_test(struct net_device *netdev, struct ethtool_test *eth_test, u64 *data) { @@ -1511,6 +1535,7 @@ static void fbnic_self_test(struct net_device *netdev, if (!(eth_test->flags & ETH_TEST_FL_OFFLINE)) { data[TEST_REG] = 0; + data[TEST_MSIX] = 0; return; } @@ -1520,6 +1545,9 @@ static void fbnic_self_test(struct net_device *netdev, if (fbnic_ethtool_regs_test(netdev, &data[TEST_REG])) eth_test->flags |= ETH_TEST_FL_FAILED; + if (fbnic_ethtool_msix_test(netdev, &data[TEST_MSIX])) + eth_test->flags |= ETH_TEST_FL_FAILED; + if (if_running && netif_open(netdev, NULL)) { netdev_err(netdev, "Failed to re-initialize hardware following test\n"); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c index 1e6a8fd6f702..5e383d40abc7 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c @@ -240,6 +240,160 @@ void fbnic_free_irq(struct fbnic_dev *fbd, int nr, void *data) free_irq(irq, data); } +struct fbnic_msix_test_data { + struct fbnic_dev *fbd; + unsigned long test_msix_status[BITS_TO_LONGS(FBNIC_MAX_MSIX_VECS)]; + int irq_vector[FBNIC_MAX_MSIX_VECS]; +}; + +static irqreturn_t fbnic_irq_test(int irq, void *data) +{ + struct fbnic_msix_test_data *test_data = data; + struct fbnic_dev *fbd = test_data->fbd; + int i; + + for (i = fbd->num_irqs; i--;) { + if (test_data->irq_vector[i] == irq) { + set_bit(i, test_data->test_msix_status); + break; + } + } + + return IRQ_HANDLED; +} + +/** + * fbnic_msix_test - Verify behavior of NIC interrupts + * @fbd: device to test + * + * This function is meant to test the global interrupt registers and the + * PCIe IP MSI-X functionality. It essentially goes through and tests + * various combinations of the set, clear, and mask bits in order to + * verify the behavior is as we expect it to be from the driver. + * + * Return: See enum fbnic_msix_self_test_codes + **/ +enum fbnic_msix_self_test_codes fbnic_msix_test(struct fbnic_dev *fbd) +{ + enum fbnic_msix_self_test_codes result = FBNIC_TEST_MSIX_SUCCESS; + struct pci_dev *pdev = to_pci_dev(fbd->dev); + struct fbnic_msix_test_data *test_data; + u32 mask = 0; + int i; + + /* Allocate bitmap and IRQ vector table */ + test_data = kzalloc_obj(*test_data, GFP_KERNEL); + + /* memory allocation failure */ + if (!test_data) + return FBNIC_TEST_MSIX_NOMEM; + + /* Initialize test data */ + test_data->fbd = fbd; + + for (i = FBNIC_NON_NAPI_VECTORS; i < fbd->num_irqs; i++) { + /* Add IRQ to vector table so it can be found */ + test_data->irq_vector[i] = pci_irq_vector(pdev, i); + + /* Enable the interrupt */ + if (!fbnic_request_irq(fbd, i, fbnic_irq_test, 0, + fbd->netdev->name, test_data)) + continue; + + while (i-- > FBNIC_NON_NAPI_VECTORS) + fbnic_free_irq(fbd, i, test_data); + kfree(test_data); + + /* IRQ request failure */ + return FBNIC_TEST_MSIX_IRQ_REQ_FAIL; + } + + /* Test each bit individually */ + for (i = FBNIC_NON_NAPI_VECTORS; i < fbd->num_irqs; i++) { + mask = 1U << (i % 32); + + /* Start with mask set and interrupt cleared */ + fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(i / 32), mask); + fbnic_wrfl(fbd); + fbnic_wr32(fbd, FBNIC_INTR_CLEAR(i / 32), mask); + fbnic_wrfl(fbd); + + /* masking failure to prevent interrupt */ + result = FBNIC_TEST_MSIX_MASK; + + fbnic_wr32(fbd, FBNIC_INTR_SET(i / 32), mask); + fbnic_wrfl(fbd); + usleep_range(10000, 11000); + + if (test_bit(i, test_data->test_msix_status)) + break; + + /* unmasking failure w/ sw status set */ + result = FBNIC_TEST_MSIX_UNMASK; + + fbnic_wr32(fbd, FBNIC_INTR_MASK_CLEAR(i / 32), mask); + fbnic_wrfl(fbd); + usleep_range(10000, 11000); + + if (!test_bit(i, test_data->test_msix_status)) + break; + + /* interrupt when clearing mask */ + result = FBNIC_TEST_MSIX_IRQ_CLEAR; + + clear_bit(i, test_data->test_msix_status); + fbnic_wr32(fbd, FBNIC_INTR_MASK_CLEAR(i / 32), mask); + fbnic_wrfl(fbd); + usleep_range(10000, 11000); + + if (test_bit(i, test_data->test_msix_status)) + break; + + /* interrupt not triggering when not masked */ + result = FBNIC_TEST_MSIX_NO_INTERRUPT; + + fbnic_wr32(fbd, FBNIC_INTR_SET(i / 32), mask); + fbnic_wrfl(fbd); + usleep_range(10000, 11000); + + if (!test_bit(i, test_data->test_msix_status)) + break; + + /* status not cleared, or mask not set */ + result = FBNIC_TEST_MSIX_NO_CLEAR_OR_MASK; + if (mask & fbnic_rd32(fbd, FBNIC_INTR_STATUS(i / 32))) + break; + if (!(mask & fbnic_rd32(fbd, FBNIC_INTR_MASK(i / 32)))) + break; + + /* Result = 0 - Success */ + result = FBNIC_TEST_MSIX_SUCCESS; + + clear_bit(i, test_data->test_msix_status); + } + + if (i < fbd->num_irqs) { + fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(i / 32), mask); + fbnic_wrfl(fbd); + fbnic_wr32(fbd, FBNIC_INTR_CLEAR(i / 32), mask); + fbnic_wrfl(fbd); + clear_bit(i, test_data->test_msix_status); + } + + for (i = FBNIC_NON_NAPI_VECTORS; i < fbd->num_irqs; i++) { + /* Test for bits set after testing */ + if (test_bit(i, test_data->test_msix_status)) + result = FBNIC_TEST_MSIX_BITS_SET_AFTER_TEST; + + /* Free IRQ */ + fbnic_free_irq(fbd, i, test_data); + } + + kfree(test_data); + + return result; +} + void fbnic_napi_name_irqs(struct fbnic_dev *fbd) { unsigned int i; From d522b1b004800728de5466451e4d7032a4f53de6 Mon Sep 17 00:00:00 2001 From: "Mike Marciniszyn (Meta)" Date: Sat, 7 Mar 2026 05:58:46 -0500 Subject: [PATCH 0405/1561] eth fbnic: TLV support for use by MBX self test The TLV (Type-Value-Length) self uses a known set of data to create a TLV message. These routines support the MBX self test by creating the test messages and parsing the response message coming back from the firmware. Signed-off-by: Mike Marciniszyn (Meta) Link: https://patch.msgid.link/20260307105847.1438-5-mike.marciniszyn@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_tlv.c | 276 ++++++++++++++++++++ drivers/net/ethernet/meta/fbnic/fbnic_tlv.h | 27 ++ 2 files changed, 303 insertions(+) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_tlv.c b/drivers/net/ethernet/meta/fbnic/fbnic_tlv.c index 517ed8b2f1cb..c55d4f76a5fc 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_tlv.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_tlv.c @@ -551,6 +551,172 @@ int fbnic_tlv_parser_error(void *opaque, struct fbnic_tlv_msg **results) return -EBADMSG; } +#define FBNIC_TLV_TEST_STRING_LEN 32 + +struct fbnic_tlv_test { + u64 test_u64; + s64 test_s64; + u32 test_u32; + s32 test_s32; + u16 test_u16; + s16 test_s16; + u8 test_mac[ETH_ALEN]; + u8 test_mac_array[4][ETH_ALEN]; + u8 test_true; + u8 test_false; + char test_string[FBNIC_TLV_TEST_STRING_LEN]; +}; + +static struct fbnic_tlv_test test_struct; + +const struct fbnic_tlv_index fbnic_tlv_test_index[] = { + FBNIC_TLV_ATTR_U64(FBNIC_TLV_TEST_MSG_U64), + FBNIC_TLV_ATTR_S64(FBNIC_TLV_TEST_MSG_S64), + FBNIC_TLV_ATTR_U32(FBNIC_TLV_TEST_MSG_U32), + FBNIC_TLV_ATTR_S32(FBNIC_TLV_TEST_MSG_S32), + FBNIC_TLV_ATTR_U32(FBNIC_TLV_TEST_MSG_U16), + FBNIC_TLV_ATTR_S32(FBNIC_TLV_TEST_MSG_S16), + FBNIC_TLV_ATTR_MAC_ADDR(FBNIC_TLV_TEST_MSG_MAC_ADDR), + FBNIC_TLV_ATTR_FLAG(FBNIC_TLV_TEST_MSG_FLAG_TRUE), + FBNIC_TLV_ATTR_FLAG(FBNIC_TLV_TEST_MSG_FLAG_FALSE), + FBNIC_TLV_ATTR_STRING(FBNIC_TLV_TEST_MSG_STRING, + FBNIC_TLV_TEST_STRING_LEN), + FBNIC_TLV_ATTR_ARRAY(FBNIC_TLV_TEST_MSG_ARRAY), + FBNIC_TLV_ATTR_NESTED(FBNIC_TLV_TEST_MSG_NESTED), + FBNIC_TLV_ATTR_LAST +}; + +static void fbnic_tlv_test_struct_init(void) +{ + int i = FBNIC_TLV_TEST_STRING_LEN - 1; + + /* Populate the struct with random data */ + get_random_once(&test_struct, + offsetof(struct fbnic_tlv_test, test_string) + i); + + /* Force true/false to their expected values */ + test_struct.test_false = false; + test_struct.test_true = true; + + /* Convert test_string to a true ASCII string */ + test_struct.test_string[i] = '\0'; + while (i--) { + /* Force characters into displayable range */ + if (test_struct.test_string[i] < 64 || + test_struct.test_string[i] >= 96) { + test_struct.test_string[i] %= 32; + test_struct.test_string[i] += 64; + } + } +} + +static int fbnic_tlv_test_attr_data(struct fbnic_tlv_msg *msg) +{ + struct fbnic_tlv_msg *array; + int err, i; + + err = fbnic_tlv_attr_put_int(msg, FBNIC_TLV_TEST_MSG_U64, + test_struct.test_u64); + if (err) + return err; + + err = fbnic_tlv_attr_put_int(msg, FBNIC_TLV_TEST_MSG_S64, + test_struct.test_s64); + if (err) + return err; + + err = fbnic_tlv_attr_put_int(msg, FBNIC_TLV_TEST_MSG_U32, + test_struct.test_u32); + if (err) + return err; + + err = fbnic_tlv_attr_put_int(msg, FBNIC_TLV_TEST_MSG_S32, + test_struct.test_s32); + if (err) + return err; + + err = fbnic_tlv_attr_put_int(msg, FBNIC_TLV_TEST_MSG_U16, + test_struct.test_u16); + if (err) + return err; + + err = fbnic_tlv_attr_put_int(msg, FBNIC_TLV_TEST_MSG_S16, + test_struct.test_s16); + if (err) + return err; + + err = fbnic_tlv_attr_put_value(msg, FBNIC_TLV_TEST_MSG_MAC_ADDR, + test_struct.test_mac, ETH_ALEN); + if (err) + return err; + + /* Start MAC address array */ + array = fbnic_tlv_attr_nest_start(msg, FBNIC_TLV_TEST_MSG_ARRAY); + if (!array) + return -ENOSPC; + + for (i = 0; i < 4; i++) { + err = fbnic_tlv_attr_put_value(array, + FBNIC_TLV_TEST_MSG_MAC_ADDR, + test_struct.test_mac_array[i], + ETH_ALEN); + if (err) + return err; + } + + /* Close array */ + fbnic_tlv_attr_nest_stop(msg); + + err = fbnic_tlv_attr_put_flag(msg, FBNIC_TLV_TEST_MSG_FLAG_TRUE); + if (err) + return err; + + return fbnic_tlv_attr_put_string(msg, FBNIC_TLV_TEST_MSG_STRING, + test_struct.test_string); +} + +/** + * fbnic_tlv_test_create - Allocate a test message and fill it w/ data + * @fbd: FBNIC device structure + * + * Return: NULL on failure to allocate or pointer to new TLV test message. + **/ +struct fbnic_tlv_msg *fbnic_tlv_test_create(struct fbnic_dev *fbd) +{ + struct fbnic_tlv_msg *msg, *nest; + int err; + + msg = fbnic_tlv_msg_alloc(FBNIC_TLV_MSG_ID_TEST); + if (!msg) + return NULL; + + /* Randomize struct data */ + fbnic_tlv_test_struct_init(); + + /* Add first level of data to message */ + err = fbnic_tlv_test_attr_data(msg); + if (err) + goto free_message; + + /* Start second level nested */ + nest = fbnic_tlv_attr_nest_start(msg, FBNIC_TLV_TEST_MSG_NESTED); + if (!nest) + goto free_message; + + /* Add nested data */ + err = fbnic_tlv_test_attr_data(nest); + if (err) + goto free_message; + + /* Close nest and report full message */ + fbnic_tlv_attr_nest_stop(msg); + + return msg; +free_message: + free_page((unsigned long)msg); + return NULL; +} + void fbnic_tlv_attr_addr_copy(u8 *dest, struct fbnic_tlv_msg *src) { u8 *mac_addr; @@ -558,3 +724,113 @@ void fbnic_tlv_attr_addr_copy(u8 *dest, struct fbnic_tlv_msg *src) mac_addr = fbnic_tlv_attr_get_value_ptr(src); memcpy(dest, mac_addr, ETH_ALEN); } + +/** + * fbnic_tlv_parser_test_attr - Function loading test attributes into structure + * @str: Test structure to load + * @results: Pointer to results array + * + * Copies attributes into structure. Any attribute that doesn't exist in the + * results array is not populated. + **/ +static void fbnic_tlv_parser_test_attr(struct fbnic_tlv_test *str, + struct fbnic_tlv_msg **results) +{ + struct fbnic_tlv_msg *array_results[4]; + struct fbnic_tlv_msg *attr; + char *string = NULL; + int i, err; + + str->test_u64 = fta_get_uint(results, FBNIC_TLV_TEST_MSG_U64); + str->test_u32 = fta_get_uint(results, FBNIC_TLV_TEST_MSG_U32); + str->test_u16 = fta_get_uint(results, FBNIC_TLV_TEST_MSG_U16); + + str->test_s64 = fta_get_sint(results, FBNIC_TLV_TEST_MSG_S64); + str->test_s32 = fta_get_sint(results, FBNIC_TLV_TEST_MSG_S32); + str->test_s16 = fta_get_sint(results, FBNIC_TLV_TEST_MSG_S16); + + attr = results[FBNIC_TLV_TEST_MSG_MAC_ADDR]; + if (attr) + fbnic_tlv_attr_addr_copy(str->test_mac, attr); + + attr = results[FBNIC_TLV_TEST_MSG_ARRAY]; + if (attr) { + int len = le16_to_cpu(attr->hdr.len) / sizeof(u32) - 1; + + err = fbnic_tlv_attr_parse_array(&attr[1], len, + array_results, + fbnic_tlv_test_index, + FBNIC_TLV_TEST_MSG_MAC_ADDR, + 4); + if (!err) { + for (i = 0; i < 4 && array_results[i]; i++) + fbnic_tlv_attr_addr_copy(str->test_mac_array[i], + array_results[i]); + } + } + + str->test_true = !!results[FBNIC_TLV_TEST_MSG_FLAG_TRUE]; + str->test_false = !!results[FBNIC_TLV_TEST_MSG_FLAG_FALSE]; + + attr = results[FBNIC_TLV_TEST_MSG_STRING]; + if (attr) { + string = fbnic_tlv_attr_get_value_ptr(attr); + strscpy(str->test_string, string, FBNIC_TLV_TEST_STRING_LEN); + } +} + +static void fbnic_tlv_test_dump(struct fbnic_tlv_test *value, char *prefix) +{ + print_hex_dump(KERN_INFO, prefix, DUMP_PREFIX_OFFSET, 16, 1, + value, sizeof(*value), true); +} + +/** + * fbnic_tlv_parser_test - Function for parsing and testing test message + * @opaque: Unused value + * @results: Results of parser output + * + * Return: negative value on error, or 0 on success. + * + * Parses attributes to structures and compares the structure to the + * expected test value that should have been used to populate the message. + * + * Used to verify message generation and parser are working correctly. + **/ +int fbnic_tlv_parser_test(void *opaque, struct fbnic_tlv_msg **results) +{ + struct fbnic_tlv_msg *nest_results[FBNIC_TLV_RESULTS_MAX] = { 0 }; + struct fbnic_tlv_test result_struct; + struct fbnic_tlv_msg *attr; + int err; + + memset(&result_struct, 0, sizeof(result_struct)); + fbnic_tlv_parser_test_attr(&result_struct, results); + + if (memcmp(&test_struct, &result_struct, sizeof(test_struct))) { + fbnic_tlv_test_dump(&result_struct, "fbnic: found - "); + fbnic_tlv_test_dump(&test_struct, "fbnic: expected - "); + return -EINVAL; + } + + attr = results[FBNIC_TLV_TEST_MSG_NESTED]; + if (!attr) + return -EINVAL; + + err = fbnic_tlv_attr_parse(&attr[1], + le16_to_cpu(attr->hdr.len) / sizeof(u32) - 1, + nest_results, fbnic_tlv_test_index); + if (err) + return err; + + memset(&result_struct, 0, sizeof(result_struct)); + fbnic_tlv_parser_test_attr(&result_struct, nest_results); + + if (memcmp(&test_struct, &result_struct, sizeof(test_struct))) { + fbnic_tlv_test_dump(&result_struct, "fbnic: found - "); + fbnic_tlv_test_dump(&test_struct, "fbnic: expected - "); + return -EINVAL; + } + + return 0; +} diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_tlv.h b/drivers/net/ethernet/meta/fbnic/fbnic_tlv.h index 3508b46ebdd0..9c4e4759394a 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_tlv.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_tlv.h @@ -9,6 +9,8 @@ #include #include +struct fbnic_dev; + #define FBNIC_TLV_MSG_ALIGN(len) ALIGN(len, sizeof(u32)) #define FBNIC_TLV_MSG_SIZE(len) \ (FBNIC_TLV_MSG_ALIGN(len) / sizeof(u32)) @@ -153,6 +155,31 @@ int fbnic_tlv_parser_error(void *opaque, struct fbnic_tlv_msg **results); #define fta_get_str(_results, _id, _dst, _dstsize) \ fbnic_tlv_attr_get_string(_results[_id], _dst, _dstsize) +#define FBNIC_TLV_MSG_ID_TEST 0 + +enum fbnic_tlv_test_attr_id { + FBNIC_TLV_TEST_MSG_U64, + FBNIC_TLV_TEST_MSG_S64, + FBNIC_TLV_TEST_MSG_U32, + FBNIC_TLV_TEST_MSG_S32, + FBNIC_TLV_TEST_MSG_U16, + FBNIC_TLV_TEST_MSG_S16, + FBNIC_TLV_TEST_MSG_MAC_ADDR, + FBNIC_TLV_TEST_MSG_FLAG_TRUE, + FBNIC_TLV_TEST_MSG_FLAG_FALSE, + FBNIC_TLV_TEST_MSG_STRING, + FBNIC_TLV_TEST_MSG_NESTED, + FBNIC_TLV_TEST_MSG_ARRAY, + FBNIC_TLV_TEST_MSG_MAX +}; + +extern const struct fbnic_tlv_index fbnic_tlv_test_index[]; +struct fbnic_tlv_msg *fbnic_tlv_test_create(struct fbnic_dev *fbd); +int fbnic_tlv_parser_test(void *opaque, struct fbnic_tlv_msg **results); + +#define FBNIC_TLV_MSG_TEST \ + FBNIC_TLV_PARSER(TEST, fbnic_tlv_test_index, \ + fbnic_tlv_parser_test) #define FBNIC_TLV_MSG_ERROR \ FBNIC_TLV_PARSER(UNKNOWN, NULL, fbnic_tlv_parser_error) #endif /* _FBNIC_TLV_H_ */ From 8e5218199da48913960a0ce7e22193020dc23891 Mon Sep 17 00:00:00 2001 From: "Mike Marciniszyn (Meta)" Date: Sat, 7 Mar 2026 05:58:47 -0500 Subject: [PATCH 0406/1561] eth fbnic: Add mailbox self test The mailbox self test ensures the interface to and from the firmware is healthy by sending a test message and fielding the response from the firmware. This patch uses the new completion API [1][2] that allocates a completion structure, binds the completion to the TEST message, and uses a new FW parsing routine that wraps the completion processing around the TLV parser. Link: https://patch.msgid.link/20250516164804.741348-1-lee@trager.us [1] Link: https://patch.msgid.link/20260115003353.4150771-6-mohsin.bashr@gmail.com [2] Signed-off-by: Mike Marciniszyn (Meta) Link: https://patch.msgid.link/20260307105847.1438-6-mike.marciniszyn@gmail.com Signed-off-by: Paolo Abeni --- .../net/ethernet/meta/fbnic/fbnic_ethtool.c | 15 +++ drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 100 ++++++++++++++++++ drivers/net/ethernet/meta/fbnic/fbnic_fw.h | 27 +++++ 3 files changed, 142 insertions(+) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c index b5012601b102..f14de2366854 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c @@ -129,11 +129,13 @@ static const struct fbnic_stat fbnic_gstrings_xdp_stats[] = { enum fbnic_self_test_results { TEST_REG = 0, TEST_MSIX, + TEST_MBX, }; static const char fbnic_gstrings_self_test[][ETH_GSTRING_LEN] = { [TEST_REG] = "Register test (offline)", [TEST_MSIX] = "MSI-X Interrupt test (offline)", + [TEST_MBX] = "FW mailbox test (on/offline)", }; #define FBNIC_TEST_LEN ARRAY_SIZE(fbnic_gstrings_self_test) @@ -1528,11 +1530,24 @@ static int fbnic_ethtool_msix_test(struct net_device *netdev, u64 *data) return !!*data; } +static int fbnic_ethtool_mbx_self_test(struct net_device *netdev, u64 *data) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + struct fbnic_dev *fbd = fbn->fbd; + + *data = fbnic_fw_mbx_self_test(fbd); + + return !!*data; +} + static void fbnic_self_test(struct net_device *netdev, struct ethtool_test *eth_test, u64 *data) { bool if_running = netif_running(netdev); + if (fbnic_ethtool_mbx_self_test(netdev, &data[TEST_MBX])) + eth_test->flags |= ETH_TEST_FL_FAILED; + if (!(eth_test->flags & ETH_TEST_FL_OFFLINE)) { data[TEST_REG] = 0; data[TEST_MSIX] = 0; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index 1f0b6350bef4..c2bad51bdde6 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -378,6 +378,37 @@ fbnic_fw_get_cmpl_by_type(struct fbnic_dev *fbd, u32 msg_type) return cmpl_data; } +/** + * fbnic_fw_xmit_test_msg - Create and transmit a test message to FW mailbox + * @fbd: FBNIC device structure + * @cmpl: fw completion struct + * + * Return: zero on success, negative value on failure + * + * Generates a single page mailbox test message and places it in the Tx + * mailbox queue. Expectation is that the FW will validate that the nested + * value matches the external values, and then will echo them back to us. + * + * Also sets a completion slot for use in the completion wait calls when + * the cmpl arg is non-NULL. + */ +int fbnic_fw_xmit_test_msg(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl) +{ + struct fbnic_tlv_msg *test_msg; + int err; + + test_msg = fbnic_tlv_test_create(fbd); + if (!test_msg) + return -ENOMEM; + + err = fbnic_mbx_map_req_w_cmpl(fbd, test_msg, cmpl); + if (err) + free_page((unsigned long)test_msg); + + return err; +} + /** * fbnic_fw_xmit_simple_msg - Transmit a simple single TLV message w/o data * @fbd: FBNIC device structure @@ -1556,7 +1587,29 @@ free_message: return err; } +static int +fbnic_fw_parser_test(void *opaque, struct fbnic_tlv_msg **results) +{ + struct fbnic_fw_completion *cmpl; + struct fbnic_dev *fbd = opaque; + int err; + + /* find cmpl */ + cmpl = fbnic_fw_get_cmpl_by_type(fbd, FBNIC_TLV_MSG_ID_TEST); + if (!cmpl) + return -ENOSPC; + + err = fbnic_tlv_parser_test(opaque, results); + + cmpl->result = err; + complete(&cmpl->done); + fbnic_fw_put_cmpl(cmpl); + + return err; +} + static const struct fbnic_tlv_parser fbnic_fw_tlv_parser[] = { + FBNIC_TLV_PARSER(TEST, fbnic_tlv_test_index, fbnic_fw_parser_test), FBNIC_TLV_PARSER(FW_CAP_RESP, fbnic_fw_cap_resp_index, fbnic_fw_parse_cap_resp), FBNIC_TLV_PARSER(OWNERSHIP_RESP, fbnic_ownership_resp_index, @@ -1787,6 +1840,53 @@ void fbnic_mbx_flush_tx(struct fbnic_dev *fbd) } while (time_is_after_jiffies(timeout)); } +/** + * fbnic_fw_mbx_self_test() - verify firmware interface + * @fbd: device to test + * + * This function tests the interfaces to/from the firmware. + * + * Return: See enum fbnic_fw_self_test_codes + **/ +enum fbnic_fw_self_test_codes fbnic_fw_mbx_self_test(struct fbnic_dev *fbd) +{ + enum fbnic_fw_self_test_codes err; + struct fbnic_fw_completion *cmpl; + + /* Skip test if FW interface is not present */ + if (!fbnic_fw_present(fbd)) + return FBNIC_TEST_FW_NO_FIRMWARE; + + cmpl = fbnic_fw_alloc_cmpl(FBNIC_TLV_MSG_ID_TEST); + if (!cmpl) + return FBNIC_TEST_FW_NO_CMPL; + + /* Load a test message onto the FW mailbox interface + * and arm the completion. + */ + err = fbnic_fw_xmit_test_msg(fbd, cmpl); + if (err) { + err = FBNIC_TEST_FW_NO_XMIT; + goto exit_free; + } + + /* Verify we received a message back */ + if (!fbnic_mbx_wait_for_cmpl(cmpl)) { + err = FBNIC_TEST_FW_NO_MSG; + goto exit_cleanup; + } + + /* Verify there were no parsing errors */ + if (cmpl->result) + err = FBNIC_TEST_FW_PARSE; +exit_cleanup: + fbnic_mbx_clear_cmpl(fbd, cmpl); +exit_free: + fbnic_fw_put_cmpl(cmpl); + + return err; +} + int fbnic_fw_xmit_rpc_macda_sync(struct fbnic_dev *fbd) { struct fbnic_tlv_msg *mac_array; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h index 8f7218900562..d84723e4cfa3 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h @@ -104,6 +104,33 @@ void fbnic_mbx_clear_cmpl(struct fbnic_dev *fbd, void fbnic_mbx_poll(struct fbnic_dev *fbd); int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd); void fbnic_mbx_flush_tx(struct fbnic_dev *fbd); + +/** + * enum fbnic_fw_self_test_codes - return codes from self test routines + * + * These are the codes returned from the self test routines and + * stored in the test result array indexed by the specific + * test name. + * + * @FBNIC_TEST_FW_SUCCESS: test success + * @FBNIC_TEST_FW_NO_FIRMWARE: FW interface not present + * @FBNIC_TEST_FW_NO_CMPL: No completion available + * @FBNIC_TEST_FW_NO_XMIT: Could not xmit message + * @FBNIC_TEST_FW_NO_MSG: no message returned + * @FBNIC_TEST_FW_PARSE: returned message had parsing error + */ +enum fbnic_fw_self_test_codes { + FBNIC_TEST_FW_SUCCESS = 0, + FBNIC_TEST_FW_NO_FIRMWARE = 10, + FBNIC_TEST_FW_NO_CMPL = 20, + FBNIC_TEST_FW_NO_XMIT = 30, + FBNIC_TEST_FW_NO_MSG = 40, + FBNIC_TEST_FW_PARSE = 50, +}; + +enum fbnic_fw_self_test_codes fbnic_fw_mbx_self_test(struct fbnic_dev *fbd); +int fbnic_fw_xmit_test_msg(struct fbnic_dev *fbd, + struct fbnic_fw_completion *c); int fbnic_fw_xmit_ownership_msg(struct fbnic_dev *fbd, bool take_ownership); int fbnic_fw_init_heartbeat(struct fbnic_dev *fbd, bool poll); void fbnic_fw_check_heartbeat(struct fbnic_dev *fbd); From 86581adf05f526f53b90ebcbbc2fd4d9f9fd4c96 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Mar 2026 09:51:27 +0100 Subject: [PATCH 0407/1561] wifi: ath6kl: drop redundant device reference Driver core holds a reference to the USB interface and its parent USB device while the interface is bound to a driver and there is no need to take additional references unless the structures are needed after disconnect. Drop the redundant device reference to reduce cargo culting, make it easier to spot drivers where an extra reference is needed, and reduce the risk of memory leaks when drivers fail to release it. Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260306085144.12064-2-johan@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath6kl/usb.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/ath/ath6kl/usb.c b/drivers/net/wireless/ath/ath6kl/usb.c index 852e77e41bde..814faf96f1ff 100644 --- a/drivers/net/wireless/ath/ath6kl/usb.c +++ b/drivers/net/wireless/ath/ath6kl/usb.c @@ -1124,8 +1124,6 @@ static int ath6kl_usb_probe(struct usb_interface *interface, int vendor_id, product_id; int ret = 0; - usb_get_dev(dev); - vendor_id = le16_to_cpu(dev->descriptor.idVendor); product_id = le16_to_cpu(dev->descriptor.idProduct); @@ -1143,11 +1141,8 @@ static int ath6kl_usb_probe(struct usb_interface *interface, ath6kl_dbg(ATH6KL_DBG_USB, "USB 1.1 Host\n"); ar_usb = ath6kl_usb_create(interface); - - if (ar_usb == NULL) { - ret = -ENOMEM; - goto err_usb_put; - } + if (ar_usb == NULL) + return -ENOMEM; ar = ath6kl_core_create(&ar_usb->udev->dev); if (ar == NULL) { @@ -1176,15 +1171,12 @@ err_core_free: ath6kl_core_destroy(ar); err_usb_destroy: ath6kl_usb_destroy(ar_usb); -err_usb_put: - usb_put_dev(dev); return ret; } static void ath6kl_usb_remove(struct usb_interface *interface) { - usb_put_dev(interface_to_usbdev(interface)); ath6kl_usb_device_detached(interface); } From 0bc013d68a5d1943728d110d759c6587c2b81913 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Mar 2026 09:51:28 +0100 Subject: [PATCH 0408/1561] wifi: ath6kl: rename disconnect callback Rename the disconnect callback so that it reflects the callback name for consistency with the rest of the kernel (e.g. makes it easier to grep for). Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260306085144.12064-3-johan@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath6kl/usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath6kl/usb.c b/drivers/net/wireless/ath/ath6kl/usb.c index 814faf96f1ff..79c18f5ee02b 100644 --- a/drivers/net/wireless/ath/ath6kl/usb.c +++ b/drivers/net/wireless/ath/ath6kl/usb.c @@ -1175,7 +1175,7 @@ err_usb_destroy: return ret; } -static void ath6kl_usb_remove(struct usb_interface *interface) +static void ath6kl_usb_disconnect(struct usb_interface *interface) { ath6kl_usb_device_detached(interface); } @@ -1227,7 +1227,7 @@ static struct usb_driver ath6kl_usb_driver = { .probe = ath6kl_usb_probe, .suspend = ath6kl_usb_pm_suspend, .resume = ath6kl_usb_pm_resume, - .disconnect = ath6kl_usb_remove, + .disconnect = ath6kl_usb_disconnect, .id_table = ath6kl_usb_ids, .supports_autosuspend = true, .disable_hub_initiated_lpm = 1, From 2ddbec82e1650d57ea0f63d284b5da01d2f21293 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Mar 2026 09:51:29 +0100 Subject: [PATCH 0409/1561] wifi: ath9k: drop redundant device reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Driver core holds a reference to the USB interface and its parent USB device while the interface is bound to a driver and there is no need to take additional references unless the structures are needed after disconnect. Drop the redundant device reference to reduce cargo culting, make it easier to spot drivers where an extra reference is needed, and reduce the risk of memory leaks when drivers fail to release it. Signed-off-by: Johan Hovold Acked-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20260306085144.12064-4-johan@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath9k/hif_usb.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c index 8533b88974b2..821909b81ea9 100644 --- a/drivers/net/wireless/ath/ath9k/hif_usb.c +++ b/drivers/net/wireless/ath/ath9k/hif_usb.c @@ -1382,8 +1382,6 @@ static int ath9k_hif_usb_probe(struct usb_interface *interface, goto err_alloc; } - usb_get_dev(udev); - hif_dev->udev = udev; hif_dev->interface = interface; hif_dev->usb_device_id = id; @@ -1403,7 +1401,6 @@ static int ath9k_hif_usb_probe(struct usb_interface *interface, err_fw_req: usb_set_intfdata(interface, NULL); kfree(hif_dev); - usb_put_dev(udev); err_alloc: return ret; } @@ -1451,7 +1448,6 @@ static void ath9k_hif_usb_disconnect(struct usb_interface *interface) kfree(hif_dev); dev_info(&udev->dev, "ath9k_htc: USB layer deinitialized\n"); - usb_put_dev(udev); } #ifdef CONFIG_PM From c880c0794076f04b0058dd5cbc1f94c33d7bff44 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Mar 2026 09:51:30 +0100 Subject: [PATCH 0410/1561] wifi: ath10k: drop redundant device reference Driver core holds a reference to the USB interface and its parent USB device while the interface is bound to a driver and there is no need to take additional references unless the structures are needed after disconnect. Drop the redundant device reference to reduce cargo culting, make it easier to spot drivers where an extra reference is needed, and reduce the risk of memory leaks when drivers fail to release it. Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260306085144.12064-5-johan@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath10k/usb.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/usb.c b/drivers/net/wireless/ath/ath10k/usb.c index 6661fff326e0..ad1cf0681b19 100644 --- a/drivers/net/wireless/ath/ath10k/usb.c +++ b/drivers/net/wireless/ath/ath10k/usb.c @@ -1016,7 +1016,6 @@ static int ath10k_usb_probe(struct usb_interface *interface, netif_napi_add(ar->napi_dev, &ar->napi, ath10k_usb_napi_poll); - usb_get_dev(dev); vendor_id = le16_to_cpu(dev->descriptor.idVendor); product_id = le16_to_cpu(dev->descriptor.idProduct); @@ -1055,8 +1054,6 @@ err_usb_destroy: err: ath10k_core_destroy(ar); - usb_put_dev(dev); - return ret; } @@ -1071,7 +1068,6 @@ static void ath10k_usb_remove(struct usb_interface *interface) ath10k_core_unregister(ar_usb->ar); netif_napi_del(&ar_usb->ar->napi); ath10k_usb_destroy(ar_usb->ar); - usb_put_dev(interface_to_usbdev(interface)); ath10k_core_destroy(ar_usb->ar); } From fcc3555fce3c35333891e904c3592375d5e63cf4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Mar 2026 09:51:31 +0100 Subject: [PATCH 0411/1561] wifi: ath10k: rename disconnect callback Rename the disconnect callback so that it reflects the callback name for consistency with the rest of the kernel (e.g. makes it easier to grep for). Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260306085144.12064-6-johan@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath10k/usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/usb.c b/drivers/net/wireless/ath/ath10k/usb.c index ad1cf0681b19..987d57a01ddf 100644 --- a/drivers/net/wireless/ath/ath10k/usb.c +++ b/drivers/net/wireless/ath/ath10k/usb.c @@ -1057,7 +1057,7 @@ err: return ret; } -static void ath10k_usb_remove(struct usb_interface *interface) +static void ath10k_usb_disconnect(struct usb_interface *interface) { struct ath10k_usb *ar_usb; @@ -1113,7 +1113,7 @@ static struct usb_driver ath10k_usb_driver = { .probe = ath10k_usb_probe, .suspend = ath10k_usb_pm_suspend, .resume = ath10k_usb_pm_resume, - .disconnect = ath10k_usb_remove, + .disconnect = ath10k_usb_disconnect, .id_table = ath10k_usb_ids, .supports_autosuspend = true, .disable_hub_initiated_lpm = 1, From 27401c9b143278eb9fa7d46f97ab063d65e5afd5 Mon Sep 17 00:00:00 2001 From: Aaradhana Sahu Date: Fri, 6 Mar 2026 08:52:52 +0530 Subject: [PATCH 0412/1561] wifi: ath12k: Use .mbn firmware for AHB devices Currently ath12k AHB devices request firmware in .mdt/.bxx split format. AHB firmware is transitioning from the split format to a single .mbn file. Update ath12k to request q6_fw.mbn and iu_fw.mbn instead of q6_fw.mdt iu_fw.mdt respectively. Note: There is no impact to current devices since ath12k AHB support is not yet complete and no AHB firmware files are currently present in linux-firmware. Tested-on: IPQ5332 hw1.0 AHB WLAN.WBE.1.7-00587-QCAHKSWPL_SILICONZ-1 Signed-off-by: Aaradhana Sahu Link: https://patch.msgid.link/20260306032252.2237722-1-aaradhana.sahu@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/ahb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/ahb.h b/drivers/net/wireless/ath/ath12k/ahb.h index 8a040d03d27a..be9e31b3682d 100644 --- a/drivers/net/wireless/ath/ath12k/ahb.h +++ b/drivers/net/wireless/ath/ath12k/ahb.h @@ -21,8 +21,8 @@ #define ATH12K_ROOTPD_READY_TIMEOUT (5 * HZ) #define ATH12K_RPROC_AFTER_POWERUP QCOM_SSR_AFTER_POWERUP #define ATH12K_AHB_FW_PREFIX "q6_fw" -#define ATH12K_AHB_FW_SUFFIX ".mdt" -#define ATH12K_AHB_FW2 "iu_fw.mdt" +#define ATH12K_AHB_FW_SUFFIX ".mbn" +#define ATH12K_AHB_FW2 "iu_fw.mbn" #define ATH12K_AHB_UPD_SWID 0x12 #define ATH12K_USERPD_SPAWN_TIMEOUT (5 * HZ) #define ATH12K_USERPD_READY_TIMEOUT (10 * HZ) From 4a51ac9056c17e8216b245cd16152eefbd21abfb Mon Sep 17 00:00:00 2001 From: Kyoji Ogasawara Date: Mon, 9 Mar 2026 21:45:39 +0900 Subject: [PATCH 0413/1561] net/smc: fix indentation in smcr_buf_type section smcr_buf_type section used inconsistent indentation compared with the rest of this document. Signed-off-by: Kyoji Ogasawara Reviewed-by: D. Wythe Link: https://patch.msgid.link/20260309124541.22723-2-sawara04.o@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/networking/smc-sysctl.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst index 904a910f198e..17b8314c0e5e 100644 --- a/Documentation/networking/smc-sysctl.rst +++ b/Documentation/networking/smc-sysctl.rst @@ -23,17 +23,17 @@ autocorking_size - INTEGER Default: 64K smcr_buf_type - INTEGER - Controls which type of sndbufs and RMBs to use in later newly created - SMC-R link group. Only for SMC-R. + Controls which type of sndbufs and RMBs to use in later newly created + SMC-R link group. Only for SMC-R. - Default: 0 (physically contiguous sndbufs and RMBs) + Default: 0 (physically contiguous sndbufs and RMBs) - Possible values: + Possible values: - - 0 - Use physically contiguous buffers - - 1 - Use virtually contiguous buffers - - 2 - Mixed use of the two types. Try physically contiguous buffers first. - If not available, use virtually contiguous buffers then. + - 0 - Use physically contiguous buffers + - 1 - Use virtually contiguous buffers + - 2 - Mixed use of the two types. Try physically contiguous buffers first. + If not available, use virtually contiguous buffers then. smcr_testlink_time - INTEGER How frequently SMC-R link sends out TEST_LINK LLC messages to confirm From aa5ec9d03b9c4459e528ecd75d84f6ef98fb2f5a Mon Sep 17 00:00:00 2001 From: Kyoji Ogasawara Date: Mon, 9 Mar 2026 21:45:40 +0900 Subject: [PATCH 0414/1561] net/smc: Add documentation for limit_smc_hs and hs_ctrl Document missing SMC sysctl parameters limit_smc_hs and hs_ctrl Signed-off-by: Kyoji Ogasawara Reviewed-by: D. Wythe Link: https://patch.msgid.link/20260309124541.22723-3-sawara04.o@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/networking/smc-sysctl.rst | 27 +++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst index 17b8314c0e5e..a8b4f357174e 100644 --- a/Documentation/networking/smc-sysctl.rst +++ b/Documentation/networking/smc-sysctl.rst @@ -111,3 +111,30 @@ smcr_max_recv_wr - INTEGER like before having this control. Default: 48 + +limit_smc_hs - INTEGER + Whether to limit SMC handshake for newly created sockets. + + When enabled, SMC listen path applies handshake limitation based on + handshake worker congestion and queued SMC handshake load. + + Possible values: + + - 0 - Disable handshake limitation + - 1 - Enable handshake limitation + + Default: 0 (disable) + +hs_ctrl - STRING + Select the SMC handshake control profile by name. + + This string refers to the name of a user-implemented + BPF struct_ops instance of type smc_hs_ctrl. + + The selected profile controls whether SMC options are advertised + during TCP SYN/SYN-ACK handshake. + + Only available when CONFIG_SMC_HS_CTRL_BPF is enabled. + Write an empty string to clear the current profile. + + Default: empty string From b8a0e5eb6a126a641ab8768e825756a66848ca29 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 7 Mar 2026 09:59:16 -0800 Subject: [PATCH 0415/1561] tools: ynl: cli: order set->list conversion in JSON output NIPA tries to make sure that HW tests don't modify system state. It dumps some well known configs before and after the test and compares the outputs. Make sure that YNL json output is stable. Converting sets to lists with a naive list(o) results in a random order. Link: https://patch.msgid.link/20260307175916.1652518-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/net/ynl/pyynl/cli.py b/tools/net/ynl/pyynl/cli.py index 94a5ba348b69..b452d4fb9434 100755 --- a/tools/net/ynl/pyynl/cli.py +++ b/tools/net/ynl/pyynl/cli.py @@ -78,7 +78,7 @@ class YnlEncoder(json.JSONEncoder): if isinstance(o, bytes): return bytes.hex(o) if isinstance(o, set): - return list(o) + return sorted(o) return json.JSONEncoder.default(self, o) From 5a0c5702bd0016ce761dfa2ce84c009a3a32d863 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Sat, 7 Mar 2026 22:58:33 -0700 Subject: [PATCH 0416/1561] selftests: rds: Fix pylint warnings Tidy up all exiting pylint errors in test.py. No functional changes are introduced in this patch Signed-off-by: Allison Henderson Link: https://patch.msgid.link/20260308055835.1338257-2-achender@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/rds/test.py | 84 +++++++++++++------------ 1 file changed, 45 insertions(+), 39 deletions(-) diff --git a/tools/testing/selftests/net/rds/test.py b/tools/testing/selftests/net/rds/test.py index 4a7178d11193..8256afe6ad6f 100755 --- a/tools/testing/selftests/net/rds/test.py +++ b/tools/testing/selftests/net/rds/test.py @@ -11,7 +11,6 @@ import signal import socket import subprocess import sys -import atexit from pwd import getpwuid from os import stat @@ -23,45 +22,54 @@ from lib.py.utils import ip libc = ctypes.cdll.LoadLibrary('libc.so.6') setns = libc.setns -net0 = 'net0' -net1 = 'net1' +NET0 = 'net0' +NET1 = 'net1' -veth0 = 'veth0' -veth1 = 'veth1' +VETH0 = 'veth0' +VETH1 = 'veth1' # Helper function for creating a socket inside a network namespace. # We need this because otherwise RDS will detect that the two TCP # sockets are on the same interface and use the loop transport instead # of the TCP transport. -def netns_socket(netns, *args): +def netns_socket(netns, *sock_args): + """ + Creates sockets inside of network namespace + + :param netns: the name of the network namespace + :param sock_args: socket family and type + """ u0, u1 = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET) child = os.fork() if child == 0: # change network namespace - with open(f'/var/run/netns/{netns}') as f: + with open(f'/var/run/netns/{netns}', encoding='utf-8') as f: try: - ret = setns(f.fileno(), 0) + setns(f.fileno(), 0) except IOError as e: print(e.errno) print(e) # create socket in target namespace - s = socket.socket(*args) + sock = socket.socket(*sock_args) # send resulting socket to parent - socket.send_fds(u0, [], [s.fileno()]) + socket.send_fds(u0, [], [sock.fileno()]) sys.exit(0) # receive socket from child - _, s, _, _ = socket.recv_fds(u1, 0, 1) + _, fds, _, _ = socket.recv_fds(u1, 0, 1) os.waitpid(child, 0) u0.close() u1.close() - return socket.fromfd(s[0], *args) + return socket.fromfd(fds[0], *sock_args) -def signal_handler(sig, frame): +def signal_handler(_sig, _frame): + """ + Test timed out signal handler + """ print('Test timed out') sys.exit(1) @@ -81,13 +89,13 @@ parser.add_argument('-u', '--duplicate', help="Simulate tcp packet duplication", type=int, default=0) args = parser.parse_args() logdir=args.logdir -packet_loss=str(args.loss)+'%' -packet_corruption=str(args.corruption)+'%' -packet_duplicate=str(args.duplicate)+'%' +PACKET_LOSS=str(args.loss)+'%' +PACKET_CORRUPTION=str(args.corruption)+'%' +PACKET_DUPLICATE=str(args.duplicate)+'%' -ip(f"netns add {net0}") -ip(f"netns add {net1}") -ip(f"link add type veth") +ip(f"netns add {NET0}") +ip(f"netns add {NET1}") +ip("link add type veth") addrs = [ # we technically don't need different port numbers, but this will @@ -99,25 +107,25 @@ addrs = [ # move interfaces to separate namespaces so they can no longer be # bound directly; this prevents rds from switching over from the tcp # transport to the loop transport. -ip(f"link set {veth0} netns {net0} up") -ip(f"link set {veth1} netns {net1} up") +ip(f"link set {VETH0} netns {NET0} up") +ip(f"link set {VETH1} netns {NET1} up") # add addresses -ip(f"-n {net0} addr add {addrs[0][0]}/32 dev {veth0}") -ip(f"-n {net1} addr add {addrs[1][0]}/32 dev {veth1}") +ip(f"-n {NET0} addr add {addrs[0][0]}/32 dev {VETH0}") +ip(f"-n {NET1} addr add {addrs[1][0]}/32 dev {VETH1}") # add routes -ip(f"-n {net0} route add {addrs[1][0]}/32 dev {veth0}") -ip(f"-n {net1} route add {addrs[0][0]}/32 dev {veth1}") +ip(f"-n {NET0} route add {addrs[1][0]}/32 dev {VETH0}") +ip(f"-n {NET1} route add {addrs[0][0]}/32 dev {VETH1}") # sanity check that our two interfaces/addresses are correctly set up # and communicating by doing a single ping -ip(f"netns exec {net0} ping -c 1 {addrs[1][0]}") +ip(f"netns exec {NET0} ping -c 1 {addrs[1][0]}") # Start a packet capture on each network -for net in [net0, net1]: +for net in [NET0, NET1]: tcpdump_pid = os.fork() if tcpdump_pid == 0: pcap = logdir+'/'+net+'.pcap' @@ -127,10 +135,10 @@ for net in [net0, net1]: sys.exit(0) # simulate packet loss, duplication and corruption -for net, iface in [(net0, veth0), (net1, veth1)]: +for net, iface in [(NET0, VETH0), (NET1, VETH1)]: ip(f"netns exec {net} /usr/sbin/tc qdisc add dev {iface} root netem \ - corrupt {packet_corruption} loss {packet_loss} duplicate \ - {packet_duplicate}") + corrupt {PACKET_CORRUPTION} loss {PACKET_LOSS} duplicate \ + {PACKET_DUPLICATE}") # add a timeout if args.timeout > 0: @@ -138,8 +146,8 @@ if args.timeout > 0: signal.signal(signal.SIGALRM, signal_handler) sockets = [ - netns_socket(net0, socket.AF_RDS, socket.SOCK_SEQPACKET), - netns_socket(net1, socket.AF_RDS, socket.SOCK_SEQPACKET), + netns_socket(NET0, socket.AF_RDS, socket.SOCK_SEQPACKET), + netns_socket(NET1, socket.AF_RDS, socket.SOCK_SEQPACKET), ] for s, addr in zip(sockets, addrs): @@ -150,9 +158,7 @@ fileno_to_socket = { s.fileno(): s for s in sockets } -addr_to_socket = { - addr: s for addr, s in zip(addrs, sockets) -} +addr_to_socket = dict(zip(addrs, sockets)) socket_to_addr = { s: addr for addr, s in zip(addrs, sockets) @@ -166,14 +172,14 @@ ep = select.epoll() for s in sockets: ep.register(s, select.EPOLLRDNORM) -n = 50000 +NUM_PACKETS = 50000 nr_send = 0 nr_recv = 0 -while nr_send < n: +while nr_send < NUM_PACKETS: # Send as much as we can without blocking print("sending...", nr_send, nr_recv) - while nr_send < n: + while nr_send < NUM_PACKETS: send_data = hashlib.sha256( f'packet {nr_send}'.encode('utf-8')).hexdigest().encode('utf-8') @@ -212,7 +218,7 @@ while nr_send < n: break # exercise net/rds/tcp.c:rds_tcp_sysctl_reset() - for net in [net0, net1]: + for net in [NET0, NET1]: ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_rcvbuf=10000") ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_sndbuf=10000") From b873b4e16042fac1244499dab5a72373d25ce051 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Sat, 7 Mar 2026 22:58:34 -0700 Subject: [PATCH 0417/1561] selftests: rds: Add ksft timeout rds/run.sh sets a timer of 400s when calling test.py. However when tests are run through ksft, a default 45s timer is applied. Fix this by adding a ksft timeout in tools/testing/selftests/net/rds/settings Signed-off-by: Allison Henderson Link: https://patch.msgid.link/20260308055835.1338257-3-achender@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/rds/Makefile | 1 + tools/testing/selftests/net/rds/run.sh | 7 +++++-- tools/testing/selftests/net/rds/settings | 1 + 3 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/net/rds/settings diff --git a/tools/testing/selftests/net/rds/Makefile b/tools/testing/selftests/net/rds/Makefile index 762845cc973c..fe363be8e358 100644 --- a/tools/testing/selftests/net/rds/Makefile +++ b/tools/testing/selftests/net/rds/Makefile @@ -7,6 +7,7 @@ TEST_PROGS := run.sh TEST_FILES := \ include.sh \ + settings \ test.py \ # end of TEST_FILES diff --git a/tools/testing/selftests/net/rds/run.sh b/tools/testing/selftests/net/rds/run.sh index 8aee244f582a..897d17d1b8db 100755 --- a/tools/testing/selftests/net/rds/run.sh +++ b/tools/testing/selftests/net/rds/run.sh @@ -19,6 +19,9 @@ if test -f "$build_include"; then build_dir="$mk_build_dir" fi +# Source settings for timeout value (also used by ksft runner) +source "$current_dir"/settings + # This test requires kernel source and the *.gcda data therein # Locate the top level of the kernel source, and the net/rds # subfolder with the appropriate *.gcno object files @@ -194,8 +197,8 @@ set +e echo running RDS tests... echo Traces will be logged to "$TRACE_FILE" rm -f "$TRACE_FILE" -strace -T -tt -o "$TRACE_FILE" python3 "$(dirname "$0")/test.py" --timeout 400 -d "$LOG_DIR" \ - -l "$PLOSS" -c "$PCORRUPT" -u "$PDUP" +strace -T -tt -o "$TRACE_FILE" python3 "$(dirname "$0")/test.py" \ + --timeout "$timeout" -d "$LOG_DIR" -l "$PLOSS" -c "$PCORRUPT" -u "$PDUP" test_rc=$? dmesg > "${LOG_DIR}/dmesg.out" diff --git a/tools/testing/selftests/net/rds/settings b/tools/testing/selftests/net/rds/settings new file mode 100644 index 000000000000..d2009a64589c --- /dev/null +++ b/tools/testing/selftests/net/rds/settings @@ -0,0 +1 @@ +timeout=400 From 87fdf57ded3d3e83b90cffb160fce8e2a914142a Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Sat, 7 Mar 2026 22:58:35 -0700 Subject: [PATCH 0418/1561] selftests: rds: Fix tcpdump segfault in rds selftests net/rds/test.py sees a segfault in tcpdump when executed through the ksft runner. [ 21.903713] tcpdump[1469]: segfault at 0 ip 000072100e99126d sp 00007ffccf740fd0 error 4 [ 21.903721] in libc.so.6[16a26d,7798b149a000+188000] [ 21.905074] in libc.so.6[16a26d,72100e84f000+188000] likely on CPU 5 (core 5, socket 0) [ 21.905084] Code: 00 0f 85 a0 00 00 00 48 83 c4 38 89 d8 5b 41 5c 41 5d 41 5e 41 5f 5d c3 0f 1f 44 00 00 48 8b 05 91 8b 09 00 8b 4d ac 64 89 08 <41> 0f b6 07 83 e8 2b a8 fd 0f 84 54 ff ff ff 49 8b 36 4c 89 ff e8 [ 21.906760] likely on CPU 9 (core 9, socket 0) [ 21.913469] Code: 00 0f 85 a0 00 00 00 48 83 c4 38 89 d8 5b 41 5c 41 5d 41 5e 41 5f 5d c3 0f 1f 44 00 00 48 8b 05 91 8b 09 00 8b 4d ac 64 89 08 <41> 0f b6 07 83 e8 2b a8 fd 0f 84 54 ff ff ff 49 8b 36 4c 89 ff e8 The os.fork() call creates extra complexity because it forks the entire process including the python interpreter. ip() then calls cmd() which creates a subprocess.Popen. We can avoid the extra layering by simply calling subprocess.Popen directly. Track the process handles directly and terminate them at cleanup rather than relying on killall. Further tcpdump's -Z flag attempts to change savefile ownership, which is not supported by the 9p protocol. Fix this by writing pcap captures to "/tmp" during the test and move them to the log directory after tcpdump exits. Signed-off-by: Allison Henderson Link: https://patch.msgid.link/20260308055835.1338257-4-achender@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/rds/test.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/net/rds/test.py b/tools/testing/selftests/net/rds/test.py index 8256afe6ad6f..93e23e8b256c 100755 --- a/tools/testing/selftests/net/rds/test.py +++ b/tools/testing/selftests/net/rds/test.py @@ -11,8 +11,8 @@ import signal import socket import subprocess import sys -from pwd import getpwuid -from os import stat +import tempfile +import shutil # Allow utils module to be imported from different directory this_dir = os.path.dirname(os.path.realpath(__file__)) @@ -125,14 +125,14 @@ ip(f"-n {NET1} route add {addrs[0][0]}/32 dev {VETH1}") ip(f"netns exec {NET0} ping -c 1 {addrs[1][0]}") # Start a packet capture on each network +tcpdump_procs = [] for net in [NET0, NET1]: - tcpdump_pid = os.fork() - if tcpdump_pid == 0: - pcap = logdir+'/'+net+'.pcap' - subprocess.check_call(['touch', pcap]) - user = getpwuid(stat(pcap).st_uid).pw_name - ip(f"netns exec {net} /usr/sbin/tcpdump -Z {user} -i any -w {pcap}") - sys.exit(0) + pcap = logdir+'/'+net+'.pcap' + fd, pcap_tmp = tempfile.mkstemp(suffix=".pcap", prefix=f"{net}-", dir="/tmp") + p = subprocess.Popen( + ['ip', 'netns', 'exec', net, + '/usr/sbin/tcpdump', '-i', 'any', '-w', pcap_tmp]) + tcpdump_procs.append((p, pcap_tmp, pcap, fd)) # simulate packet loss, duplication and corruption for net, iface in [(NET0, VETH0), (NET1, VETH1)]: @@ -248,7 +248,11 @@ for s in sockets: print(f"getsockopt(): {nr_success}/{nr_error}") print("Stopping network packet captures") -subprocess.check_call(['killall', '-q', 'tcpdump']) +for p, pcap_tmp, pcap, fd in tcpdump_procs: + p.terminate() + p.wait() + os.close(fd) + shutil.move(pcap_tmp, pcap) # We're done sending and receiving stuff, now let's check if what # we received is what we sent. From 7da62262ec96a4b345d207b6bcd2ddf5231b7f7d Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Mon, 9 Mar 2026 03:39:45 +0100 Subject: [PATCH 0419/1561] inet: add ip_local_port_step_width sysctl to improve port usage distribution With the current port selection algorithm, ports after a reserved port range or long time used port are used more often than others [1]. This causes an uneven port usage distribution. This combines with cloud environments blocking connections between the application server and the database server if there was a previous connection with the same source port, leading to connectivity problems between applications on cloud environments. The real issue here is that these firewalls cannot cope with standards-compliant port reuse. This is a workaround for such situations and an improvement on the distribution of ports selected. The proposed solution is to implement a variant of RFC 6056 Algorithm 5. The step size is selected randomly on every connect() call ensuring it is a coprime with respect to the size of the range of ports we want to scan. This way, we can ensure that all ports within the range are scanned before returning an error. To enable this algorithm, the user must configure the new sysctl option "net.ipv4.ip_local_port_step_width". In addition, on graphs generated we can observe that the distribution of source ports is more even with the proposed approach. [2] [1] https://0xffsoftware.com/port_graph_current_alg.html [2] https://0xffsoftware.com/port_graph_random_step_alg.html Reviewed-by: Eric Dumazet Signed-off-by: Fernando Fernandez Mancera Link: https://patch.msgid.link/20260309023946.5473-2-fmancera@suse.de Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 16 +++++++++++ .../net_cachelines/netns_ipv4_sysctl.rst | 1 + include/net/netns/ipv4.h | 1 + net/ipv4/inet_hashtables.c | 28 +++++++++++++++++-- net/ipv4/sysctl_net_ipv4.c | 7 +++++ 5 files changed, 50 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 265158534cda..2e3a746fcc6d 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1630,6 +1630,22 @@ ip_local_reserved_ports - list of comma separated ranges Default: Empty +ip_local_port_step_width - INTEGER + Defines the numerical maximum increment between successive port + allocations within the ephemeral port range when an unavailable port is + reached. This can be used to mitigate accumulated nodes in port + distribution when reserved ports have been configured. Please note that + port collisions may be more frequent in a system with a very high load. + + It is recommended to set this value strictly larger than the largest + contiguous block of ports configure in ip_local_reserved_ports. For + large reserved port ranges, setting this to 3x or 4x the size of the + largest block is advised. Using a value equal or greater than the local + port range size completely solves the uneven port distribution problem, + but it can degrade performance under port exhaustion situations. + + Default: 0 (disabled) + ip_unprivileged_port_start - INTEGER This is a per-namespace sysctl. It defines the first unprivileged port in the network namespace. Privileged ports diff --git a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst index beaf1880a19b..cf284263e69b 100644 --- a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst +++ b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst @@ -52,6 +52,7 @@ u8 sysctl_ip_fwd_update_priority u8 sysctl_ip_nonlocal_bind u8 sysctl_ip_autobind_reuse u8 sysctl_ip_dynaddr +u32 sysctl_ip_local_port_step_width u8 sysctl_ip_early_demux read_mostly ip(6)_rcv_finish_core u8 sysctl_raw_l3mdev_accept u8 sysctl_tcp_early_demux read_mostly ip(6)_rcv_finish_core diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 38624beff9b3..80ccd4dda8e0 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -166,6 +166,7 @@ struct netns_ipv4 { u8 sysctl_ip_autobind_reuse; /* Shall we try to damage output packets if routing dev changes? */ u8 sysctl_ip_dynaddr; + u32 sysctl_ip_local_port_step_width; #ifdef CONFIG_NET_L3_MASTER_DEV u8 sysctl_raw_l3mdev_accept; #endif diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index ac7b67c603b5..13310c72b0bf 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -1057,12 +1058,12 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct net *net = sock_net(sk); struct inet_bind2_bucket *tb2; struct inet_bind_bucket *tb; + int step, scan_step, l3mdev; + u32 index, max_rand_step; bool tb_created = false; u32 remaining, offset; int ret, i, low, high; bool local_ports; - int step, l3mdev; - u32 index; if (port) { local_bh_disable(); @@ -1076,6 +1077,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, local_ports = inet_sk_get_local_port_range(sk, &low, &high); step = local_ports ? 1 : 2; + scan_step = step; + max_rand_step = READ_ONCE(net->ipv4.sysctl_ip_local_port_step_width); high++; /* [32768, 60999] -> [32768, 61000[ */ remaining = high - low; @@ -1094,9 +1097,28 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, */ if (!local_ports) offset &= ~1U; + + if (max_rand_step && remaining > 1) { + u32 range = remaining / step; + u32 upper_bound; + + upper_bound = min(range, max_rand_step); + scan_step = get_random_u32_inclusive(1, upper_bound); + while (gcd(scan_step, range) != 1) { + scan_step++; + /* if both scan_step and range are even gcd won't be 1 */ + if (!(scan_step & 1) && !(range & 1)) + scan_step++; + if (unlikely(scan_step > upper_bound)) { + scan_step = 1; + break; + } + } + scan_step *= step; + } other_parity_scan: port = low + offset; - for (i = 0; i < remaining; i += step, port += step) { + for (i = 0; i < remaining; i += step, port += scan_step) { if (unlikely(port >= high)) port -= remaining; if (inet_is_local_reserved_port(net, port)) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5654cc9c8a0b..d8bdb1bdbff1 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -823,6 +823,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = ipv4_local_port_range, }, + { + .procname = "ip_local_port_step_width", + .maxlen = sizeof(u32), + .data = &init_net.ipv4.sysctl_ip_local_port_step_width, + .mode = 0644, + .proc_handler = proc_douintvec, + }, { .procname = "ip_local_reserved_ports", .data = &init_net.ipv4.sysctl_local_reserved_ports, From 690043b95c1804cccce8ae6a6677a6b5de33ca77 Mon Sep 17 00:00:00 2001 From: Dimitri Daskalakis Date: Mon, 9 Mar 2026 13:42:15 -0700 Subject: [PATCH 0420/1561] selftests: drv-net: rss: Add retries to test_rss_key_indir to reduce flakes The test generates 16 flows, and verifies that traffic is distributed across two queues via the NICs RSS indirection table. The likelihood of the flows skewing to a single queue is high, so we retry sending traffic up to 3 times. Alternatively, we could increase the number of generated flows. But debug kernels may struggle to ramp this many flows. During manual testing, the test passed for 10,000 consecutive runs. Signed-off-by: Dimitri Daskalakis Reviewed-by: Pavan Chebbi Link: https://patch.msgid.link/20260309204215.2110486-1-dimitri.daskalakis1@gmail.com Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/hw/rss_ctx.py | 29 +++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index d7cb30306368..51f4e7bc3e5d 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -166,9 +166,17 @@ def test_rss_key_indir(cfg): ksft_eq(1, max(data['rss-indirection-table'])) # Check we only get traffic on the first 2 queues - cnts = _get_rx_cnts(cfg) - GenerateTraffic(cfg).wait_pkts_and_stop(20000) - cnts = _get_rx_cnts(cfg, prev=cnts) + + # Retry a few times in case the flows skew to a single queue. + attempts = 3 + for attempt in range(attempts): + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + if cnts[0] >= 5000 and cnts[1] >= 5000: + break + ksft_pr(f"Skewed queue distribution, attempt {attempt + 1}/{attempts}: " + str(cnts)) + # 2 queues, 20k packets, must be at least 5k per queue ksft_ge(cnts[0], 5000, "traffic on main context (1/2): " + str(cnts)) ksft_ge(cnts[1], 5000, "traffic on main context (2/2): " + str(cnts)) @@ -178,9 +186,18 @@ def test_rss_key_indir(cfg): # Restore, and check traffic gets spread again reset_indir.exec() - cnts = _get_rx_cnts(cfg) - GenerateTraffic(cfg).wait_pkts_and_stop(20000) - cnts = _get_rx_cnts(cfg, prev=cnts) + for attempt in range(attempts): + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + if qcnt > 4: + if sum(cnts[:2]) < sum(cnts[2:]): + break + else: + if cnts[2] >= 3500: + break + ksft_pr(f"Skewed queue distribution, attempt {attempt + 1}/{attempts}: " + str(cnts)) + if qcnt > 4: # First two queues get less traffic than all the rest ksft_lt(sum(cnts[:2]), sum(cnts[2:]), From 73a864352570fd30d942652f05bfe9340d7a2055 Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Fri, 6 Mar 2026 19:17:09 -0800 Subject: [PATCH 0421/1561] net: mvneta: support EPROBE_DEFER when reading MAC address If nvmem loads after the ethernet driver, mac address assignments will not take effect. of_get_ethdev_address returns EPROBE_DEFER in such a case so we need to handle that to avoid eth_hw_addr_random. Add extra goto section to just free stats as they are allocated right above. Signed-off-by: Rosen Penev Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260307031709.640141-1-rosenp@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvneta.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 9ba4aef7080c..0c061fb0ed07 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -5620,6 +5620,8 @@ static int mvneta_probe(struct platform_device *pdev) } err = of_get_ethdev_address(dn, dev); + if (err == -EPROBE_DEFER) + goto err_free_stats; if (!err) { mac_from = "device tree"; } else { @@ -5755,6 +5757,7 @@ err_netdev: 1 << pp->id); mvneta_bm_put(pp->bm_priv); } +err_free_stats: free_percpu(pp->stats); err_free_ports: free_percpu(pp->ports); From 7b1309c33927d126d1cc47ddaf33bf2ae86f000c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Mar 2026 17:53:33 -0700 Subject: [PATCH 0422/1561] tools: ynl: handle pad type during decode Apparently Python code only handled the 'pad' type in structs until now. Add it to attr decoding. nlctrl policy dumps need it. Link: https://patch.msgid.link/20260310005337.3594225-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/lib/ynl.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py index 9774005e7ad1..8302c19ab55c 100644 --- a/tools/net/ynl/pyynl/lib/ynl.py +++ b/tools/net/ynl/pyynl/lib/ynl.py @@ -814,7 +814,9 @@ class YnlFamily(SpecFamily): continue try: - if attr_spec["type"] == 'nest': + if attr_spec["type"] == 'pad': + continue + elif attr_spec["type"] == 'nest': subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes'], search_attrs) From c26fda6212b88af1e667474728dd241ebf6ba1d1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Mar 2026 17:53:34 -0700 Subject: [PATCH 0423/1561] tools: ynl: move policy decoding out of NlMsg We'll soon need to decode policies from dump so move _decode_policy() out of class NlMsg. Link: https://patch.msgid.link/20260310005337.3594225-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/lib/ynl.py | 53 ++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py index 8302c19ab55c..17482c17a976 100644 --- a/tools/net/ynl/pyynl/lib/ynl.py +++ b/tools/net/ynl/pyynl/lib/ynl.py @@ -247,7 +247,7 @@ class NlMsg: elif extack.type == Netlink.NLMSGERR_ATTR_OFFS: self.extack['bad-attr-offs'] = extack.as_scalar('u32') elif extack.type == Netlink.NLMSGERR_ATTR_POLICY: - self.extack['policy'] = self._decode_policy(extack.raw) + self.extack['policy'] = _genl_decode_policy(extack.raw) else: if 'unknown' not in self.extack: self.extack['unknown'] = [] @@ -256,30 +256,6 @@ class NlMsg: if attr_space: self.annotate_extack(attr_space) - def _decode_policy(self, raw): - policy = {} - for attr in NlAttrs(raw): - if attr.type == Netlink.NL_POLICY_TYPE_ATTR_TYPE: - type_ = attr.as_scalar('u32') - policy['type'] = Netlink.AttrType(type_).name - elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MIN_VALUE_S: - policy['min-value'] = attr.as_scalar('s64') - elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MAX_VALUE_S: - policy['max-value'] = attr.as_scalar('s64') - elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MIN_VALUE_U: - policy['min-value'] = attr.as_scalar('u64') - elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MAX_VALUE_U: - policy['max-value'] = attr.as_scalar('u64') - elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MIN_LENGTH: - policy['min-length'] = attr.as_scalar('u32') - elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MAX_LENGTH: - policy['max-length'] = attr.as_scalar('u32') - elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_BITFIELD32_MASK: - policy['bitfield32-mask'] = attr.as_scalar('u32') - elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MASK: - policy['mask'] = attr.as_scalar('u64') - return policy - def annotate_extack(self, attr_space): """ Make extack more human friendly with attribute information """ @@ -333,6 +309,33 @@ def _genl_msg_finalize(msg): return struct.pack("I", len(msg) + 4) + msg +def _genl_decode_policy(raw): + policy = {} + for attr in NlAttrs(raw): + if attr.type == Netlink.NL_POLICY_TYPE_ATTR_TYPE: + type_ = attr.as_scalar('u32') + policy['type'] = Netlink.AttrType(type_).name + elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MIN_VALUE_S: + policy['min-value'] = attr.as_scalar('s64') + elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MAX_VALUE_S: + policy['max-value'] = attr.as_scalar('s64') + elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MIN_VALUE_U: + policy['min-value'] = attr.as_scalar('u64') + elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MAX_VALUE_U: + policy['max-value'] = attr.as_scalar('u64') + elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MIN_LENGTH: + policy['min-length'] = attr.as_scalar('u32') + elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MAX_LENGTH: + policy['max-length'] = attr.as_scalar('u32') + elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_POLICY_IDX: + policy['policy-idx'] = attr.as_scalar('u32') + elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_BITFIELD32_MASK: + policy['bitfield32-mask'] = attr.as_scalar('u32') + elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MASK: + policy['mask'] = attr.as_scalar('u64') + return policy + + # pylint: disable=too-many-nested-blocks def _genl_load_families(): genl_family_name_to_id = {} From 8bbcfce5db97abc6ca2066b540e88702f461128b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Mar 2026 17:53:35 -0700 Subject: [PATCH 0424/1561] tools: ynl: add short doc to class YnlFamily The class is quite long. It's getting hard to find the user-facing methods. Add a short doc at the class level explaining the main API. Link: https://patch.msgid.link/20260310005337.3594225-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/lib/ynl.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py index 17482c17a976..ec4d95f583fe 100644 --- a/tools/net/ynl/pyynl/lib/ynl.py +++ b/tools/net/ynl/pyynl/lib/ynl.py @@ -491,6 +491,32 @@ class SpaceAttrs: class YnlFamily(SpecFamily): + """ + YNL family -- a Netlink interface built from a YAML spec. + + Primary use of the class is to execute Netlink commands: + + ynl.(attrs, ...) + + By default this will execute the as "do", pass dump=True + to perform a dump operation. + + ynl. is a shorthand / convenience wrapper for the following + methods which take the op_name as a string: + + ynl.do(op_name, attrs, flags=None) -- execute a do operation + ynl.dump(op_name, attrs) -- execute a dump operation + ynl.do_multi(ops) -- batch multiple do operations + + The flags argument in ynl.do() allows passing in extra NLM_F_* flags + which may be necessary for old families. + + Notification API: + + ynl.ntf_subscribe(mcast_name) -- join a multicast group + ynl.check_ntf() -- drain pending notifications + ynl.poll_ntf(duration=None) -- yield notifications + """ def __init__(self, def_path, schema=None, process_unknown=False, recv_size=0): super().__init__(def_path, schema) From 77a6401a8722be20ea8db98ac900c93ccc7068ff Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Mar 2026 17:53:36 -0700 Subject: [PATCH 0425/1561] tools: ynl: add Python API for easier access to policies The format of Netlink policy dump is a bit curious with messages in the same dump carrying both attrs and mapping info. Plus each message carries a single piece of the puzzle the caller must then reassemble. I need to do this reassembly for a test, but I think it's generally useful. So let's add proper support to YnlFamily to return more user-friendly representation. See the various docs in the patch for more details. Link: https://patch.msgid.link/20260310005337.3594225-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/lib/__init__.py | 5 +- tools/net/ynl/pyynl/lib/ynl.py | 134 ++++++++++++++++++++++ tools/testing/selftests/net/lib/py/ynl.py | 6 +- 3 files changed, 140 insertions(+), 5 deletions(-) diff --git a/tools/net/ynl/pyynl/lib/__init__.py b/tools/net/ynl/pyynl/lib/__init__.py index 33a96155fb3b..be741985ae4e 100644 --- a/tools/net/ynl/pyynl/lib/__init__.py +++ b/tools/net/ynl/pyynl/lib/__init__.py @@ -5,11 +5,12 @@ from .nlspec import SpecAttr, SpecAttrSet, SpecEnumEntry, SpecEnumSet, \ SpecFamily, SpecOperation, SpecSubMessage, SpecSubMessageFormat, \ SpecException -from .ynl import YnlFamily, Netlink, NlError, YnlException +from .ynl import YnlFamily, Netlink, NlError, NlPolicy, YnlException from .doc_generator import YnlDocGenerator __all__ = ["SpecAttr", "SpecAttrSet", "SpecEnumEntry", "SpecEnumSet", "SpecFamily", "SpecOperation", "SpecSubMessage", "SpecSubMessageFormat", "SpecException", - "YnlFamily", "Netlink", "NlError", "YnlDocGenerator", "YnlException"] + "YnlFamily", "Netlink", "NlError", "NlPolicy", "YnlException", + "YnlDocGenerator"] diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py index ec4d95f583fe..0eedeee465d8 100644 --- a/tools/net/ynl/pyynl/lib/ynl.py +++ b/tools/net/ynl/pyynl/lib/ynl.py @@ -77,15 +77,22 @@ class Netlink: # nlctrl CTRL_CMD_GETFAMILY = 3 + CTRL_CMD_GETPOLICY = 10 CTRL_ATTR_FAMILY_ID = 1 CTRL_ATTR_FAMILY_NAME = 2 CTRL_ATTR_MAXATTR = 5 CTRL_ATTR_MCAST_GROUPS = 7 + CTRL_ATTR_POLICY = 8 + CTRL_ATTR_OP_POLICY = 9 + CTRL_ATTR_OP = 10 CTRL_ATTR_MCAST_GRP_NAME = 1 CTRL_ATTR_MCAST_GRP_ID = 2 + CTRL_ATTR_POLICY_DO = 1 + CTRL_ATTR_POLICY_DUMP = 2 + # Extack types NLMSGERR_ATTR_MSG = 1 NLMSGERR_ATTR_OFFS = 2 @@ -136,6 +143,34 @@ class ConfigError(Exception): pass +# pylint: disable=too-few-public-methods +class NlPolicy: + """Kernel policy for one mode (do or dump) of one operation. + + Returned by YnlFamily.get_policy(). Contains a dict of attributes + the kernel accepts, with their validation constraints. + + Attributes: + attrs: dict mapping attribute names to policy dicts, e.g. + page-pool-stats-get do policy:: + + { + 'info': {'type': 'nested', 'policy': { + 'id': {'type': 'uint', 'min-value': 1, + 'max-value': 4294967295}, + 'ifindex': {'type': 'u32', 'min-value': 1, + 'max-value': 2147483647}, + }}, + } + + Each policy dict always contains 'type' (e.g. u32, string, + nested). Optional keys: min-value, max-value, min-length, + max-length, mask, policy. + """ + def __init__(self, attrs): + self.attrs = attrs + + class NlAttr: ScalarFormat = namedtuple('ScalarFormat', ['native', 'big', 'little']) type_formats = { @@ -384,6 +419,52 @@ def _genl_load_families(): genl_family_name_to_id[fam['name']] = fam +# pylint: disable=too-many-nested-blocks +def _genl_policy_dump(family_id, op): + op_policy = {} + policy_table = {} + + with socket.socket(socket.AF_NETLINK, socket.SOCK_RAW, Netlink.NETLINK_GENERIC) as sock: + sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_CAP_ACK, 1) + + msg = _genl_msg(Netlink.GENL_ID_CTRL, + Netlink.NLM_F_REQUEST | Netlink.NLM_F_ACK | Netlink.NLM_F_DUMP, + Netlink.CTRL_CMD_GETPOLICY, 1) + msg += struct.pack('HHHxx', 6, Netlink.CTRL_ATTR_FAMILY_ID, family_id) + msg += struct.pack('HHI', 8, Netlink.CTRL_ATTR_OP, op) + msg = _genl_msg_finalize(msg) + + sock.send(msg, 0) + + while True: + reply = sock.recv(128 * 1024) + nms = NlMsgs(reply) + for nl_msg in nms: + if nl_msg.error: + raise YnlException(f"Netlink error: {nl_msg.error}") + if nl_msg.done: + return op_policy, policy_table + + gm = GenlMsg(nl_msg) + for attr in NlAttrs(gm.raw): + if attr.type == Netlink.CTRL_ATTR_OP_POLICY: + for op_attr in NlAttrs(attr.raw): + for method_attr in NlAttrs(op_attr.raw): + if method_attr.type == Netlink.CTRL_ATTR_POLICY_DO: + op_policy['do'] = method_attr.as_scalar('u32') + elif method_attr.type == Netlink.CTRL_ATTR_POLICY_DUMP: + op_policy['dump'] = method_attr.as_scalar('u32') + elif attr.type == Netlink.CTRL_ATTR_POLICY: + for pidx_attr in NlAttrs(attr.raw): + policy_idx = pidx_attr.type + for aid_attr in NlAttrs(pidx_attr.raw): + attr_id = aid_attr.type + decoded = _genl_decode_policy(aid_attr.raw) + if policy_idx not in policy_table: + policy_table[policy_idx] = {} + policy_table[policy_idx][attr_id] = decoded + + class GenlMsg: def __init__(self, nl_msg): self.nl = nl_msg @@ -516,6 +597,11 @@ class YnlFamily(SpecFamily): ynl.ntf_subscribe(mcast_name) -- join a multicast group ynl.check_ntf() -- drain pending notifications ynl.poll_ntf(duration=None) -- yield notifications + + Policy introspection allows querying validation criteria from the running + kernel. Allows checking whether kernel supports a given attribute or value. + + ynl.get_policy(op_name, mode) -- query kernel policy for an op """ def __init__(self, def_path, schema=None, process_unknown=False, recv_size=0): @@ -1221,3 +1307,51 @@ class YnlFamily(SpecFamily): def do_multi(self, ops): return self._ops(ops) + + def _resolve_policy(self, policy_idx, policy_table, attr_set): + attrs = {} + if policy_idx not in policy_table: + return attrs + for attr_id, decoded in policy_table[policy_idx].items(): + if attr_set and attr_id in attr_set.attrs_by_val: + spec = attr_set.attrs_by_val[attr_id] + name = spec['name'] + else: + spec = None + name = f'attr-{attr_id}' + if 'policy-idx' in decoded: + sub_set = None + if spec and 'nested-attributes' in spec.yaml: + sub_set = self.attr_sets[spec.yaml['nested-attributes']] + nested = self._resolve_policy(decoded['policy-idx'], + policy_table, sub_set) + del decoded['policy-idx'] + decoded['policy'] = nested + attrs[name] = decoded + return attrs + + def get_policy(self, op_name, mode): + """Query running kernel for the Netlink policy of an operation. + + Allows checking whether kernel supports a given attribute or value. + This method consults the running kernel, not the YAML spec. + + Args: + op_name: operation name as it appears in the YAML spec + mode: 'do' or 'dump' + + Returns: + NlPolicy with an attrs dict mapping attribute names to + their policy properties (type, min/max, nested, etc.), + or None if the operation has no policy for the given mode. + Empty policy usually implies that the operation rejects + all attributes. + """ + op = self.ops[op_name] + op_policy, policy_table = _genl_policy_dump(self.nlproto.family_id, + op.req_value) + if mode not in op_policy: + return None + policy_idx = op_policy[mode] + attrs = self._resolve_policy(policy_idx, policy_table, op.attr_set) + return NlPolicy(attrs) diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index b42986bf39e3..e8aa97936f6c 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -13,14 +13,14 @@ try: SPEC_PATH = KSFT_DIR / "net/lib/specs" sys.path.append(tools_full_path.as_posix()) - from net.lib.ynl.pyynl.lib import YnlFamily, NlError, Netlink + from net.lib.ynl.pyynl.lib import YnlFamily, NlError, NlPolicy, Netlink else: # Running in tree tools_full_path = KSRC / "tools" SPEC_PATH = KSRC / "Documentation/netlink/specs" sys.path.append(tools_full_path.as_posix()) - from net.ynl.pyynl.lib import YnlFamily, NlError, Netlink + from net.ynl.pyynl.lib import YnlFamily, NlError, NlPolicy, Netlink except ModuleNotFoundError as e: ksft_pr("Failed importing `ynl` library from kernel sources") ksft_pr(str(e)) @@ -28,7 +28,7 @@ except ModuleNotFoundError as e: sys.exit(4) __all__ = [ - "NlError", "Netlink", "YnlFamily", "SPEC_PATH", + "NlError", "NlPolicy", "Netlink", "YnlFamily", "SPEC_PATH", "EthtoolFamily", "RtnlFamily", "RtnlAddrFamily", "NetdevFamily", "NetshaperFamily", "DevlinkFamily", "PSPFamily", ] From d6df5e9b2a565be08330e46a8a615aac9ed8711b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Mar 2026 17:53:37 -0700 Subject: [PATCH 0426/1561] tools: ynl: cli: add --policy support Add --policy flag which can be combined with --do or --dump to query the kernel's netlink policy for an operation instead of executing it. Examples: $ ynl --family netdev --do dev-get --policy {'ifindex': {'max-value': 4294967295, 'min-value': 1, 'type': 'u32'}} $ ynl --family ethtool --do channels-get --policy --output-json {"header": {"type": "nested", "policy": {"dev-index": ...}}} $ ynl --family netdev --dump dev-get --policy {} Link: https://patch.msgid.link/20260310005337.3594225-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/cli.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/net/ynl/pyynl/cli.py b/tools/net/ynl/pyynl/cli.py index b452d4fb9434..fc9e84754e4b 100755 --- a/tools/net/ynl/pyynl/cli.py +++ b/tools/net/ynl/pyynl/cli.py @@ -256,6 +256,8 @@ def main(): schema_group.add_argument('--no-schema', action='store_true') dbg_group = parser.add_argument_group('Debug options') + io_group.add_argument('--policy', action='store_true', + help='Query kernel policy for the operation instead of executing it') dbg_group.add_argument('--dbg-small-recv', default=0, const=4000, action='store', nargs='?', type=int, metavar='INT', help="Length of buffers used for recv()") @@ -308,6 +310,16 @@ def main(): if args.dbg_small_recv: ynl.set_recv_dbg(True) + if args.policy: + if args.do: + pol = ynl.get_policy(args.do, 'do') + output(pol.attrs if pol else None) + args.do = None + if args.dump: + pol = ynl.get_policy(args.dump, 'dump') + output(pol.attrs if pol else None) + args.dump = None + if args.ntf: ynl.ntf_subscribe(args.ntf) From 1f9cab56e79eb88acec4d350c192d327244887c3 Mon Sep 17 00:00:00 2001 From: Eric Joyner Date: Fri, 6 Mar 2026 13:56:34 -0800 Subject: [PATCH 0427/1561] ionic: Report additional media types from firmware The device firmware supports reporting more media types than what was there in the past, so map these new media types to existing ethtool bits, which appears to be what other drivers do for media types that match speeds but not physical spec. And while here, make a very small cleanup in ionic_get_link_ksettings() to remove some unnecessary code duplication. Reviewed-by: Brett Creeley Signed-off-by: Eric Joyner Link: https://patch.msgid.link/20260306215634.64550-1-eric.joyner@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_ethtool.c | 9 ++++++--- drivers/net/ethernet/pensando/ionic/ionic_if.h | 6 ++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index 1514c1019f28..78a802eb159f 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -188,10 +188,9 @@ static int ionic_get_link_ksettings(struct net_device *netdev, case IONIC_XCVR_PID_QSFP_100G_CWDM4: case IONIC_XCVR_PID_QSFP_100G_PSM4: case IONIC_XCVR_PID_QSFP_100G_LR4: - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseLR4_ER4_Full); - break; case IONIC_XCVR_PID_QSFP_100G_ER4: + case IONIC_XCVR_PID_QSFP_100G_FR4: + case IONIC_XCVR_PID_QSFP_100G_DR4: ethtool_link_ksettings_add_link_mode(ks, supported, 100000baseLR4_ER4_Full); break; @@ -212,6 +211,7 @@ static int ionic_get_link_ksettings(struct net_device *netdev, break; case IONIC_XCVR_PID_QSFP_200G_AOC: case IONIC_XCVR_PID_QSFP_200G_SR4: + case IONIC_XCVR_PID_QSFP_200G_AEC: ethtool_link_ksettings_add_link_mode(ks, supported, 200000baseSR4_Full); break; @@ -232,6 +232,9 @@ static int ionic_get_link_ksettings(struct net_device *netdev, 400000baseDR4_Full); break; case IONIC_XCVR_PID_QSFP_400G_SR4: + case IONIC_XCVR_PID_QSFP_400G_AOC: + case IONIC_XCVR_PID_QSFP_400G_AEC: + case IONIC_XCVR_PID_QSFP_400G_LPO: ethtool_link_ksettings_add_link_mode(ks, supported, 400000baseSR4_Full); break; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h index 47559c909c8b..23d6e2b4791e 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_if.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h @@ -1341,6 +1341,12 @@ enum ionic_xcvr_pid { IONIC_XCVR_PID_QSFP_400G_DR4 = 80, IONIC_XCVR_PID_QSFP_400G_SR4 = 81, IONIC_XCVR_PID_QSFP_400G_VR4 = 82, + IONIC_XCVR_PID_QSFP_400G_AOC = 83, + IONIC_XCVR_PID_QSFP_400G_AEC = 84, + IONIC_XCVR_PID_QSFP_200G_AEC = 85, + IONIC_XCVR_PID_QSFP_400G_LPO = 86, + IONIC_XCVR_PID_QSFP_100G_FR4 = 87, + IONIC_XCVR_PID_QSFP_100G_DR4 = 88, }; /** From 9278b888920ee8f3cea06622f04da681536b6601 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Sat, 7 Mar 2026 10:55:32 +0100 Subject: [PATCH 0428/1561] net: ethernet: ravb: Disable interrupts when closing device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Disable E-MAC interrupts when closing the device. Signed-off-by: Yoshihiro Shimoda [Niklas: Rebase from BSP and reword commit message] Signed-off-by: Niklas Söderlund Link: https://patch.msgid.link/20260307095532.2118495-1-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 84b657fc2e15..2c725824b348 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2367,6 +2367,7 @@ static int ravb_close(struct net_device *ndev) ravb_write(ndev, 0, RIC0); ravb_write(ndev, 0, RIC2); ravb_write(ndev, 0, TIC); + ravb_write(ndev, 0, ECSIPR); /* PHY disconnect */ if (ndev->phydev) { From 34bd3c6b0bd383a76d987c8c45c4f309b681b255 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Mon, 9 Mar 2026 20:39:16 +0800 Subject: [PATCH 0429/1561] net: sched: cls_u32: Avoid memcpy() false-positive warning in u32_init_knode() Syzbot reported a warning in u32_init_knode() [1]. Similar to commit 7cba18332e36 ("net: sched: cls_u32: Avoid memcpy() false-positive warning") which addressed the same issue in u32_change(), use unsafe_memcpy() in u32_init_knode() to work around the compiler's inability to see into composite flexible array structs. This silences the false-positive reported by syzbot: memcpy: detected field-spanning write (size 32) of single field "&new->sel" at net/sched/cls_u32.c:855 (size 16) Since the memory is correctly allocated with kzalloc_flex() using s->nkeys, this is purely a false positive and does not need a Fixes tag. [1] https://syzkaller.appspot.com/bug?extid=d5ace703ed883df56e42 Reported-by: syzbot+d5ace703ed883df56e42@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/69a811b9.a70a0220.b118c.0019.GAE@google.com/T/ Reviewed-by: Simon Horman Acked-by: Gustavo A. R. Silva Signed-off-by: Jiayuan Chen Link: https://patch.msgid.link/20260309123917.402183-1-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- net/sched/cls_u32.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 9241c025aa74..8f30cc82181d 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -852,7 +852,10 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, /* Similarly success statistics must be moved as pointers */ new->pcpu_success = n->pcpu_success; #endif - memcpy(&new->sel, s, struct_size(s, keys, s->nkeys)); + unsafe_memcpy(&new->sel, s, struct_size(s, keys, s->nkeys), + /* A composite flex-array structure destination, + * which was correctly sized with kzalloc_flex(), + * above. */); if (tcf_exts_init(&new->exts, net, TCA_U32_ACT, TCA_U32_POLICE)) { kfree(new); From fe81629217e09ed8772e63a4c9cb0d864d849174 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Sun, 8 Mar 2026 14:58:50 +0530 Subject: [PATCH 0430/1561] amd-xgbe: Simplify powerdown/powerup paths The caller parameter in xgbe_powerdown() and xgbe_powerup() was intended to differentiate between driver and ioctl contexts, but the only remaining usage is from the driver suspend/resume path. Simplify this by: - Removing the unused XGMAC_DRIVER_CONTEXT and XGMAC_IOCTL_CONTEXT macros - Dropping the now-unused caller parameter - Reordering operations in xgbe_powerdown() to disable NAPI before stopping TX/RX, matching the order used in xgbe_stop() This makes the powerdown/powerup paths easier to follow and keeps the ordering consistent with the rest of the driver. Signed-off-by: Raju Rangoju Link: https://patch.msgid.link/20260308092851.1510214-2-Raju.Rangoju@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 47 ++++++++----------- drivers/net/ethernet/amd/xgbe/xgbe-pci.c | 16 +++---- drivers/net/ethernet/amd/xgbe/xgbe-platform.c | 4 +- drivers/net/ethernet/amd/xgbe/xgbe.h | 8 +--- 4 files changed, 32 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 8b79d88480db..d57daf6306e1 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1116,69 +1116,62 @@ static int xgbe_phy_reset(struct xgbe_prv_data *pdata) return pdata->phy_if.phy_reset(pdata); } -int xgbe_powerdown(struct net_device *netdev, unsigned int caller) +int xgbe_powerdown(struct net_device *netdev) { struct xgbe_prv_data *pdata = netdev_priv(netdev); struct xgbe_hw_if *hw_if = &pdata->hw_if; - DBGPR("-->xgbe_powerdown\n"); - - if (!netif_running(netdev) || - (caller == XGMAC_IOCTL_CONTEXT && pdata->power_down)) { - netdev_alert(netdev, "Device is already powered down\n"); - DBGPR("<--xgbe_powerdown\n"); + if (!netif_running(netdev)) { + netdev_dbg(netdev, "Device is not running, skipping powerdown\n"); return -EINVAL; } - if (caller == XGMAC_DRIVER_CONTEXT) - netif_device_detach(netdev); + if (pdata->power_down) { + netdev_dbg(netdev, "Device is already powered down\n"); + return -EINVAL; + } + netif_device_detach(netdev); netif_tx_stop_all_queues(netdev); xgbe_stop_timers(pdata); flush_workqueue(pdata->dev_workqueue); + xgbe_napi_disable(pdata, 0); + hw_if->powerdown_tx(pdata); hw_if->powerdown_rx(pdata); - xgbe_napi_disable(pdata, 0); - pdata->power_down = 1; - DBGPR("<--xgbe_powerdown\n"); - return 0; } -int xgbe_powerup(struct net_device *netdev, unsigned int caller) +int xgbe_powerup(struct net_device *netdev) { struct xgbe_prv_data *pdata = netdev_priv(netdev); struct xgbe_hw_if *hw_if = &pdata->hw_if; - DBGPR("-->xgbe_powerup\n"); - - if (!netif_running(netdev) || - (caller == XGMAC_IOCTL_CONTEXT && !pdata->power_down)) { - netdev_alert(netdev, "Device is already powered up\n"); - DBGPR("<--xgbe_powerup\n"); + if (!netif_running(netdev)) { + netdev_dbg(netdev, "Device is not running, skipping powerup\n"); return -EINVAL; } - pdata->power_down = 0; - - xgbe_napi_enable(pdata, 0); + if (!pdata->power_down) { + netdev_dbg(netdev, "Device is already powered up\n"); + return -EINVAL; + } hw_if->powerup_tx(pdata); hw_if->powerup_rx(pdata); - if (caller == XGMAC_DRIVER_CONTEXT) - netif_device_attach(netdev); + xgbe_napi_enable(pdata, 0); netif_tx_start_all_queues(netdev); - xgbe_start_timers(pdata); + netif_device_attach(netdev); - DBGPR("<--xgbe_powerup\n"); + pdata->power_down = 0; return 0; } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c index f54a5040a493..9e09b269cb1d 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c @@ -360,14 +360,14 @@ static void xgbe_pci_remove(struct pci_dev *pdev) xgbe_free_pdata(pdata); } -static int __maybe_unused xgbe_pci_suspend(struct device *dev) +static int xgbe_pci_suspend(struct device *dev) { struct xgbe_prv_data *pdata = dev_get_drvdata(dev); struct net_device *netdev = pdata->netdev; int ret = 0; if (netif_running(netdev)) - ret = xgbe_powerdown(netdev, XGMAC_DRIVER_CONTEXT); + ret = xgbe_powerdown(netdev); pdata->lpm_ctrl = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); pdata->lpm_ctrl |= MDIO_CTRL1_LPOWER; @@ -376,7 +376,7 @@ static int __maybe_unused xgbe_pci_suspend(struct device *dev) return ret; } -static int __maybe_unused xgbe_pci_resume(struct device *dev) +static int xgbe_pci_resume(struct device *dev) { struct xgbe_prv_data *pdata = dev_get_drvdata(dev); struct net_device *netdev = pdata->netdev; @@ -388,7 +388,7 @@ static int __maybe_unused xgbe_pci_resume(struct device *dev) XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); if (netif_running(netdev)) { - ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT); + ret = xgbe_powerup(netdev); /* Schedule a restart in case the link or phy state changed * while we were powered down. @@ -461,16 +461,16 @@ static const struct pci_device_id xgbe_pci_table[] = { }; MODULE_DEVICE_TABLE(pci, xgbe_pci_table); -static SIMPLE_DEV_PM_OPS(xgbe_pci_pm_ops, xgbe_pci_suspend, xgbe_pci_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(xgbe_pci_pm_ops, + xgbe_pci_suspend, + xgbe_pci_resume); static struct pci_driver xgbe_driver = { .name = XGBE_DRV_NAME, .id_table = xgbe_pci_table, .probe = xgbe_pci_probe, .remove = xgbe_pci_remove, - .driver = { - .pm = &xgbe_pci_pm_ops, - } + .driver.pm = pm_sleep_ptr(&xgbe_pci_pm_ops), }; int xgbe_pci_init(void) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c index 47d53e59ccf6..98b03a3f3a95 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c @@ -384,7 +384,7 @@ static int xgbe_platform_suspend(struct device *dev) DBGPR("-->xgbe_suspend\n"); if (netif_running(netdev)) - ret = xgbe_powerdown(netdev, XGMAC_DRIVER_CONTEXT); + ret = xgbe_powerdown(netdev); pdata->lpm_ctrl = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); pdata->lpm_ctrl |= MDIO_CTRL1_LPOWER; @@ -407,7 +407,7 @@ static int xgbe_platform_resume(struct device *dev) XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); if (netif_running(netdev)) { - ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT); + ret = xgbe_powerup(netdev); /* Schedule a restart in case the link or phy state changed * while we were powered down. diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 4333d269ee84..3b9345506c2b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -146,10 +146,6 @@ #define XGBE_MAX_PPS_OUT 4 #define XGBE_MAX_AUX_SNAP 4 -/* Driver PMT macros */ -#define XGMAC_DRIVER_CONTEXT 1 -#define XGMAC_IOCTL_CONTEXT 2 - #define XGMAC_FIFO_MIN_ALLOC 2048 #define XGMAC_FIFO_UNIT 256 #define XGMAC_FIFO_ALIGN(_x) \ @@ -1309,8 +1305,8 @@ void xgbe_dump_rx_desc(struct xgbe_prv_data *, struct xgbe_ring *, unsigned int); void xgbe_print_pkt(struct net_device *, struct sk_buff *, bool); void xgbe_get_all_hw_features(struct xgbe_prv_data *); -int xgbe_powerup(struct net_device *, unsigned int); -int xgbe_powerdown(struct net_device *, unsigned int); +int xgbe_powerup(struct net_device *netdev); +int xgbe_powerdown(struct net_device *netdev); void xgbe_init_rx_coalesce(struct xgbe_prv_data *); void xgbe_init_tx_coalesce(struct xgbe_prv_data *); void xgbe_restart_dev(struct xgbe_prv_data *pdata); From 7644e76956baa9a6bc3d208dfd92928f9ecd6a93 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Sun, 8 Mar 2026 14:58:51 +0530 Subject: [PATCH 0431/1561] amd-xgbe: add PCI power management for S0i3 support The current suspend/resume implementation does not correctly handle PCI device power state transitions, which prevents AMD platforms from reaching the deepest suspend state (S0i3) when the amd-xgbe driver is enabled. In particular, the amd_pmc driver reports: "Last suspend didn't reach deepest state" when this device is present. Implement proper PCI power management operations following the standard PCI PM model so that the device can be cleanly powered down and resumed. Suspend path: - Power down the network interface - Put the PHY into low-power mode - Disable bus mastering to prevent DMA activity - Save PCI configuration space - Disable the PCI device - Disable wake from D3 (S0i3 does not require Wake-on-LAN) - Set the device to D3hot Resume path: - Restore the PCI power state to D0 - Restore PCI configuration space - Enable the PCI device - Re-enable bus mastering - Re-enable device interrupts - Clear the PHY low-power mode - Power up the network interface This allows systems using amd-xgbe to reach the deepest suspend state when entering modern standby (S0i3). Signed-off-by: Raju Rangoju Link: https://patch.msgid.link/20260308092851.1510214-3-Raju.Rangoju@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/xgbe/xgbe-pci.c | 67 ++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c index 9e09b269cb1d..dbdd791380a4 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c @@ -360,19 +360,67 @@ static void xgbe_pci_remove(struct pci_dev *pdev) xgbe_free_pdata(pdata); } +static void xgbe_pci_synchronize_irqs(struct xgbe_prv_data *pdata) +{ + unsigned int i; + + /* Synchronize main device interrupt */ + synchronize_irq(pdata->dev_irq); + + /* Synchronize ECC interrupt if separate from main device interrupt */ + if (pdata->vdata->ecc_support && pdata->dev_irq != pdata->ecc_irq) + synchronize_irq(pdata->ecc_irq); + + /* Synchronize I2C interrupt if separate from main device interrupt */ + if (pdata->vdata->i2c_support && pdata->dev_irq != pdata->i2c_irq) + synchronize_irq(pdata->i2c_irq); + + /* Synchronize AN interrupt if separate from main device interrupt */ + if (pdata->dev_irq != pdata->an_irq) + synchronize_irq(pdata->an_irq); + + /* Synchronize per-channel DMA interrupts */ + if (pdata->per_channel_irq) { + for (i = 0; i < pdata->channel_count; i++) + synchronize_irq(pdata->channel[i]->dma_irq); + } +} + static int xgbe_pci_suspend(struct device *dev) { struct xgbe_prv_data *pdata = dev_get_drvdata(dev); struct net_device *netdev = pdata->netdev; + struct pci_dev *pdev = to_pci_dev(dev); int ret = 0; if (netif_running(netdev)) ret = xgbe_powerdown(netdev); + /* Disable all device interrupts to prevent spurious wakeups */ + XP_IOWRITE(pdata, XP_INT_EN, 0x0); + + /* Ensure no IRQ handlers are still executing before powering down. + * This prevents race conditions where an IRQ handler could access + * invalid register state after the device is disabled. + */ + xgbe_pci_synchronize_irqs(pdata); + + /* Set PHY to low-power mode */ pdata->lpm_ctrl = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); pdata->lpm_ctrl |= MDIO_CTRL1_LPOWER; XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); + /* Disable bus mastering to prevent DMA activity */ + pci_clear_master(pdev); + + /* Save PCI configuration state and disable device */ + pci_save_state(pdev); + pci_disable_device(pdev); + + /* Disable wake from D3 - required for S0i3 deep sleep */ + pci_wake_from_d3(pdev, false); + pci_set_power_state(pdev, PCI_D3hot); + return ret; } @@ -380,10 +428,29 @@ static int xgbe_pci_resume(struct device *dev) { struct xgbe_prv_data *pdata = dev_get_drvdata(dev); struct net_device *netdev = pdata->netdev; + struct pci_dev *pdev = to_pci_dev(dev); int ret = 0; + /* Restore PCI power state */ + pci_set_power_state(pdev, PCI_D0); + + /* Restore PCI configuration state */ + pci_restore_state(pdev); + + /* Enable PCI device */ + ret = pci_enable_device(pdev); + if (ret) { + dev_err(dev, "pci_enable_device failed: %d\n", ret); + return ret; + } + + /* Re-enable bus mastering */ + pci_set_master(pdev); + + /* Re-enable all device interrupts */ XP_IOWRITE(pdata, XP_INT_EN, 0x1fffff); + /* Clear PHY low-power mode */ pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER; XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); From 1a6ca6497a40f5c7795575a35f0da7d013b79bdd Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 9 Mar 2026 09:26:41 +0100 Subject: [PATCH 0432/1561] net: mdio: mvusb: drop redundant device reference Driver core holds a reference to the USB interface and its parent USB device while the interface is bound to a driver and there is no need to take additional references unless the structures are needed after disconnect. Drop the redundant device reference to reduce cargo culting, make it easier to spot drivers where an extra reference is needed, and reduce the risk of memory leaks when drivers fail to release it. Signed-off-by: Johan Hovold Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260309082641.15574-1-johan@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/mdio/mdio-mvusb.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/net/mdio/mdio-mvusb.c b/drivers/net/mdio/mdio-mvusb.c index 554837c21e73..86786ecaa4b1 100644 --- a/drivers/net/mdio/mdio-mvusb.c +++ b/drivers/net/mdio/mdio-mvusb.c @@ -67,7 +67,6 @@ static int mvusb_mdio_probe(struct usb_interface *interface, struct device *dev = &interface->dev; struct mvusb_mdio *mvusb; struct mii_bus *mdio; - int ret; mdio = devm_mdiobus_alloc_size(dev, sizeof(*mvusb)); if (!mdio) @@ -75,7 +74,7 @@ static int mvusb_mdio_probe(struct usb_interface *interface, mvusb = mdio->priv; mvusb->mdio = mdio; - mvusb->udev = usb_get_dev(interface_to_usbdev(interface)); + mvusb->udev = interface_to_usbdev(interface); /* Reversed from USB PCAPs, no idea what these mean. */ mvusb->buf[MVUSB_CMD_PREAMBLE0] = cpu_to_le16(0xe800); @@ -88,25 +87,16 @@ static int mvusb_mdio_probe(struct usb_interface *interface, mdio->write = mvusb_mdio_write; usb_set_intfdata(interface, mvusb); - ret = of_mdiobus_register(mdio, dev->of_node); - if (ret) - goto put_dev; - return 0; - -put_dev: - usb_put_dev(mvusb->udev); - return ret; + return of_mdiobus_register(mdio, dev->of_node); } static void mvusb_mdio_disconnect(struct usb_interface *interface) { struct mvusb_mdio *mvusb = usb_get_intfdata(interface); - struct usb_device *udev = mvusb->udev; mdiobus_unregister(mvusb->mdio); usb_set_intfdata(interface, NULL); - usb_put_dev(udev); } static struct usb_driver mvusb_mdio_driver = { From 7a6387dec8cee5a237dc5092269e97028f5a983b Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 9 Mar 2026 09:39:18 +0000 Subject: [PATCH 0433/1561] net: stmmac: provide plat_dat->dma_cfg in stmmac_plat_dat_alloc() plat_dat->dma_cfg is unconditionally required for the operation of the driver, so it would make sense to allocate it along with the plat_dat. On Arm64, sizeof(*plat_dat) has recently shrunk from 880 to 816 bytes and sizeof(*plat_dat->dma_cfg) has shrunk from 32 to 20 bytes. Given that dma_cfg is required, and it is now less than a cache line, It doesn't make sense to allocate this separateny, so place it at the end of struct plat_stmmacenet_data, and set plat_dat->dma_cfg to point at that to avoid mass changes. Signed-off-by: Russell King (Oracle) Reviewed-by: Mohd Ayaan Anwar Tested-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1vzX54-0000000CVrw-2jfu@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 5 ----- drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c | 4 ---- drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c | 4 ---- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 ++ drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 5 ----- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 8 +------- include/linux/stmmac.h | 1 + 7 files changed, 4 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index fc13bfb47783..0b32560cd059 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -1251,11 +1251,6 @@ static int intel_eth_pci_probe(struct pci_dev *pdev, if (!plat->mdio_bus_data) return -ENOMEM; - plat->dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->dma_cfg), - GFP_KERNEL); - if (!plat->dma_cfg) - return -ENOMEM; - plat->safety_feat_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->safety_feat_cfg), GFP_KERNEL); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index ada6c6ef1f5c..51b1562f84d1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -513,10 +513,6 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id if (!plat->mdio_bus_data) return -ENOMEM; - plat->dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->dma_cfg), GFP_KERNEL); - if (!plat->dma_cfg) - return -ENOMEM; - ld = devm_kzalloc(&pdev->dev, sizeof(*ld), GFP_KERNEL); if (!ld) return -ENOMEM; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c index 8b45b9cf7202..d245546b90db 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c @@ -218,10 +218,6 @@ motorcomm_default_plat_data(struct pci_dev *pdev) if (!plat->mdio_bus_data) return NULL; - plat->dma_cfg = devm_kzalloc(dev, sizeof(*plat->dma_cfg), GFP_KERNEL); - if (!plat->dma_cfg) - return NULL; - plat->axi = devm_kzalloc(dev, sizeof(*plat->axi), GFP_KERNEL); if (!plat->axi) return NULL; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f0160ff54a59..87f43811faa0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7730,6 +7730,8 @@ struct plat_stmmacenet_data *stmmac_plat_dat_alloc(struct device *dev) if (!plat_dat) return NULL; + plat_dat->dma_cfg = &plat_dat->__dma_cfg; + /* Set the defaults: * - phy autodetection * - determine GMII_Address CR field from CSR clock diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 270ad066ced3..836fed7d60ab 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -134,11 +134,6 @@ static int stmmac_pci_probe(struct pci_dev *pdev, if (!plat->mdio_bus_data) return -ENOMEM; - plat->dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->dma_cfg), - GFP_KERNEL); - if (!plat->dma_cfg) - return -ENOMEM; - plat->safety_feat_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->safety_feat_cfg), GFP_KERNEL); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index c34998486293..1aed48fe0db6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -548,13 +548,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) &plat->multicast_filter_bins); } - dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*dma_cfg), - GFP_KERNEL); - if (!dma_cfg) { - ret = ERR_PTR(-ENOMEM); - goto error_put_mdio; - } - plat->dma_cfg = dma_cfg; + dma_cfg = plat->dma_cfg; of_property_read_u32(np, "snps,pbl", &dma_cfg->pbl); if (!dma_cfg->pbl) diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 965ada809fdf..919196713c05 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -306,5 +306,6 @@ struct plat_stmmacenet_data { int msi_tx_base_vec; const struct dwmac4_addrs *dwmac4_addrs; unsigned int flags; + struct stmmac_dma_cfg __dma_cfg; }; #endif From c3d08424e025aaac8fb54134f76e611ef919cd08 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 9 Mar 2026 09:39:23 +0000 Subject: [PATCH 0434/1561] net: stmmac: convert plat_stmmacenet_data booleans to type bool Convert members of struct plat_stmmacenet_data that are booleans to type 'bool' and ensure their initialisers are true/false. Move the has_xxx for the GMAC cores together, and move the COE members to the end of the list of bool to avoid unused holes in the struct. Signed-off-by: Russell King (Oracle) Reviewed-by: Mohd Ayaan Anwar Tested-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1vzX59-0000000CVs2-3MHc@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../stmicro/stmmac/dwmac-dwc-qos-eth.c | 2 +- .../net/ethernet/stmicro/stmmac/dwmac-intel.c | 2 +- .../ethernet/stmicro/stmmac/dwmac-loongson.c | 2 +- .../ethernet/stmicro/stmmac/dwmac-mediatek.c | 2 +- .../stmicro/stmmac/dwmac-qcom-ethqos.c | 2 +- .../net/ethernet/stmicro/stmmac/dwmac-s32.c | 2 +- .../ethernet/stmicro/stmmac/dwmac-socfpga.c | 2 +- .../net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 2 +- .../net/ethernet/stmicro/stmmac/dwmac-sunxi.c | 2 +- .../net/ethernet/stmicro/stmmac/dwmac-tegra.c | 2 +- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- .../net/ethernet/stmicro/stmmac/stmmac_pci.c | 6 +++--- .../ethernet/stmicro/stmmac/stmmac_platform.c | 20 +++++++++---------- include/linux/stmmac.h | 14 ++++++------- 14 files changed, 31 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index 0495437d3a6e..b0c5d1ecabce 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -88,7 +88,7 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev, plat_dat->core_type = DWMAC_CORE_GMAC4; plat_dat->dma_cfg->aal = 1; plat_dat->flags |= STMMAC_FLAG_TSO_EN; - plat_dat->pmt = 1; + plat_dat->pmt = true; return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 0b32560cd059..421c6c81ca5e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -566,7 +566,7 @@ static void common_default_data(struct plat_stmmacenet_data *plat) /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */ plat->clk_csr = STMMAC_CSR_20_35M; plat->core_type = DWMAC_CORE_GMAC; - plat->force_sf_dma_mode = 1; + plat->force_sf_dma_mode = true; plat->mdio_bus_data->needs_reset = true; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index 51b1562f84d1..eb14c197d6ae 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -94,7 +94,7 @@ static void loongson_default_data(struct pci_dev *pdev, /* clk_csr_i = 100-150MHz & MDC = clk_csr_i/62 */ plat->clk_csr = STMMAC_CSR_100_150M; plat->core_type = DWMAC_CORE_GMAC; - plat->force_sf_dma_mode = 1; + plat->force_sf_dma_mode = true; /* Increase the default value for multicast hash bins */ plat->multicast_filter_bins = 256; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index 1f2d7d19ca56..a139db6a8cbb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -564,7 +564,7 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev, plat->flags &= ~STMMAC_FLAG_USE_PHY_WOL; else plat->flags |= STMMAC_FLAG_USE_PHY_WOL; - plat->riwt_off = 1; + plat->riwt_off = true; plat->maxmtu = ETH_DATA_LEN; plat->host_dma_width = priv_plat->variant->dma_bit_mask; plat->bsp_priv = priv_plat; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index cb1c074c2053..388e9fdeb86c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -817,7 +817,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev) plat_dat->core_type = DWMAC_CORE_GMAC4; if (ethqos->has_emac_ge_3) plat_dat->dwmac4_addrs = &data->dwmac4_addrs; - plat_dat->pmt = 1; + plat_dat->pmt = true; if (of_property_read_bool(np, "snps,tso")) plat_dat->flags |= STMMAC_FLAG_TSO_EN; if (of_device_is_compatible(np, "qcom,qcs404-ethqos")) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c index af594a096676..48fceadc55b1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c @@ -163,7 +163,7 @@ static int s32_dwmac_probe(struct platform_device *pdev) /* S32CC core feature set */ plat->core_type = DWMAC_CORE_GMAC4; - plat->pmt = 1; + plat->pmt = true; plat->flags |= STMMAC_FLAG_SPH_DISABLE; plat->rx_fifo_size = 20480; plat->tx_fifo_size = 20480; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index c6b99814d391..5f89fd968ae9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -565,7 +565,7 @@ static void socfpga_gen5_setup_plat_dat(struct socfpga_dwmac *dwmac) plat_dat->core_type = DWMAC_CORE_GMAC; /* Rx watchdog timer in dwmac is buggy in this hw */ - plat_dat->riwt_off = 1; + plat_dat->riwt_off = true; } static void socfpga_agilex5_setup_plat_dat(struct socfpga_dwmac *dwmac) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 3ce03b059277..6dbe5d5a3224 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -1179,7 +1179,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) * hardware features were copied from Allwinner drivers. */ plat_dat->rx_coe = STMMAC_RX_COE_TYPE2; - plat_dat->tx_coe = 1; + plat_dat->tx_coe = true; plat_dat->flags |= STMMAC_FLAG_HAS_SUN8I; plat_dat->bsp_priv = gmac; plat_dat->init = sun8i_dwmac_init; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c index 52593ba3a3a3..74bd996d93c9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c @@ -135,7 +135,7 @@ static int sun7i_gmac_probe(struct platform_device *pdev) /* platform data specifying hardware features and callbacks. * hardware features were copied from Allwinner drivers. */ - plat_dat->tx_coe = 1; + plat_dat->tx_coe = true; plat_dat->core_type = DWMAC_CORE_GMAC; plat_dat->bsp_priv = gmac; plat_dat->init = sun7i_gmac_init; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c index d765acbe3754..b4b39e6a169e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c @@ -310,7 +310,7 @@ static int tegra_mgbe_probe(struct platform_device *pdev) plat->core_type = DWMAC_CORE_XGMAC; plat->flags |= STMMAC_FLAG_TSO_EN; - plat->pmt = 1; + plat->pmt = true; plat->bsp_priv = mgbe; if (!plat->mdio_node) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 87f43811faa0..939431255fa5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7401,7 +7401,7 @@ static int stmmac_hw_init(struct stmmac_priv *priv) /* TXCOE doesn't work in thresh DMA mode */ if (priv->plat->force_thresh_dma_mode) - priv->plat->tx_coe = 0; + priv->plat->tx_coe = false; else priv->plat->tx_coe = priv->dma_cap.tx_coe; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 836fed7d60ab..d584fd2daa6f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -25,7 +25,7 @@ static void common_default_data(struct plat_stmmacenet_data *plat) /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */ plat->clk_csr = STMMAC_CSR_20_35M; plat->core_type = DWMAC_CORE_GMAC; - plat->force_sf_dma_mode = 1; + plat->force_sf_dma_mode = true; plat->mdio_bus_data->needs_reset = true; } @@ -58,9 +58,9 @@ static int snps_gmac5_default_data(struct pci_dev *pdev, plat->clk_csr = STMMAC_CSR_250_300M; plat->core_type = DWMAC_CORE_GMAC4; - plat->force_sf_dma_mode = 1; + plat->force_sf_dma_mode = true; plat->flags |= STMMAC_FLAG_TSO_EN; - plat->pmt = 1; + plat->pmt = true; /* Set default number of RX and TX queues to use */ plat->tx_queues_to_use = 4; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 1aed48fe0db6..0d3bad0f8915 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -514,34 +514,34 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) plat->multicast_filter_bins = dwmac1000_validate_mcast_bins( &pdev->dev, plat->multicast_filter_bins); plat->core_type = DWMAC_CORE_GMAC; - plat->pmt = 1; + plat->pmt = true; } if (of_device_is_compatible(np, "snps,dwmac-3.40a")) { plat->core_type = DWMAC_CORE_GMAC; - plat->enh_desc = 1; - plat->tx_coe = 1; - plat->bugged_jumbo = 1; - plat->pmt = 1; + plat->enh_desc = true; + plat->tx_coe = true; + plat->bugged_jumbo = true; + plat->pmt = true; } if (of_device_compatible_match(np, stmmac_gmac4_compats)) { plat->core_type = DWMAC_CORE_GMAC4; - plat->pmt = 1; + plat->pmt = true; if (of_property_read_bool(np, "snps,tso")) plat->flags |= STMMAC_FLAG_TSO_EN; } if (of_device_is_compatible(np, "snps,dwmac-3.610") || of_device_is_compatible(np, "snps,dwmac-3.710")) { - plat->enh_desc = 1; - plat->bugged_jumbo = 1; - plat->force_sf_dma_mode = 1; + plat->enh_desc = true; + plat->bugged_jumbo = true; + plat->force_sf_dma_mode = true; } if (of_device_is_compatible(np, "snps,dwxgmac")) { plat->core_type = DWMAC_CORE_XGMAC; - plat->pmt = 1; + plat->pmt = true; if (of_property_read_bool(np, "snps,tso")) plat->flags |= STMMAC_FLAG_TSO_EN; of_property_read_u32(np, "snps,multicast-filter-bins", diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 919196713c05..9420da96a4ff 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -229,14 +229,14 @@ struct plat_stmmacenet_data { struct stmmac_dma_cfg *dma_cfg; struct stmmac_safety_feature_cfg *safety_feat_cfg; int clk_csr; - int enh_desc; - int tx_coe; + bool enh_desc; + bool tx_coe; + bool bugged_jumbo; + bool pmt; + bool force_sf_dma_mode; + bool force_thresh_dma_mode; + bool riwt_off; int rx_coe; - int bugged_jumbo; - int pmt; - int force_sf_dma_mode; - int force_thresh_dma_mode; - int riwt_off; int max_speed; int maxmtu; int multicast_filter_bins; From 3357642e65e9454c3da64b62c0ed987ee4010008 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 9 Mar 2026 09:39:28 +0000 Subject: [PATCH 0435/1561] net: stmmac: reorder structs to reduce memory consumption Reorder some of the stmmac structures to allow them to pack better, thereby using less memory. On aarch64, sizeof(struct stmmac_priv) was 880, and with this change becomes 816, saving 64 bytes, which is an 8% saving. Signed-off-by: Russell King (Oracle) Tested-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1vzX5E-0000000CVs8-40w4@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/stmmac.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 9420da96a4ff..411cdd3ea034 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -108,37 +108,37 @@ struct stmmac_dma_cfg { #define AXI_BLEN 7 struct stmmac_axi { - bool axi_lpi_en; - bool axi_xit_frm; u32 axi_wr_osr_lmt; u32 axi_rd_osr_lmt; - bool axi_kbbe; u32 axi_blen_regval; + bool axi_lpi_en; + bool axi_xit_frm; + bool axi_kbbe; bool axi_fb; bool axi_mb; bool axi_rb; }; struct stmmac_rxq_cfg { - u8 mode_to_use; u32 chan; + u32 prio; + u8 mode_to_use; u8 pkt_route; bool use_prio; - u32 prio; }; struct stmmac_txq_cfg { u32 weight; - bool coe_unsupported; - u8 mode_to_use; /* Credit Base Shaper parameters */ u32 send_slope; u32 idle_slope; u32 high_credit; u32 low_credit; - bool use_prio; u32 prio; int tbs_en; + bool use_prio; + bool coe_unsupported; + u8 mode_to_use; }; struct stmmac_safety_feature_cfg { From 94808793fed71ee47741df0923d353024b6904ff Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 9 Mar 2026 09:39:34 +0000 Subject: [PATCH 0436/1561] net: stmmac: use u8 for ?x_queues_to_use and number_?x_queues The maximum number of queues is a compile time constant of only eight. This makes using a 32-bit quantity wastefulf. Instead, use u8 for these and their associated variables. When reading the DT properties, saturdate at U8_MAX. Provided the core provides DMA capabilities to describe the number of queues, this will be capped by stmmac_hw_init() with a warning. Signed-off-by: Russell King (Oracle) Tested-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1vzX5K-0000000CVsE-0J0Y@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/common.h | 4 +- .../net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 2 +- .../ethernet/stmicro/stmmac/dwmac1000_core.c | 2 +- .../ethernet/stmicro/stmmac/dwmac100_core.c | 2 +- .../net/ethernet/stmicro/stmmac/dwmac4_core.c | 4 +- .../ethernet/stmicro/stmmac/dwxgmac2_core.c | 4 +- drivers/net/ethernet/stmicro/stmmac/hwif.h | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 2 +- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 223 +++++++++--------- .../ethernet/stmicro/stmmac/stmmac_platform.c | 15 +- include/linux/stmmac.h | 4 +- 11 files changed, 136 insertions(+), 128 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 46454e2886ce..f1628de8ed18 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -446,8 +446,8 @@ struct dma_features { unsigned int number_rx_channel; unsigned int number_tx_channel; /* TX and RX number of queues */ - unsigned int number_rx_queues; - unsigned int number_tx_queues; + u8 number_rx_queues; + u8 number_tx_queues; /* PPS output */ unsigned int pps_out_num; /* Number of Traffic Classes */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 6dbe5d5a3224..48c52eb96233 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -718,7 +718,7 @@ static void sun8i_dwmac_set_filter(struct mac_device_info *hw, static void sun8i_dwmac_flow_ctrl(struct mac_device_info *hw, unsigned int duplex, unsigned int fc, - unsigned int pause_time, u32 tx_cnt) + unsigned int pause_time, u8 tx_cnt) { void __iomem *ioaddr = hw->pcsr; u32 v; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index c7cb30672604..01f8353eb6ef 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -222,7 +222,7 @@ static void dwmac1000_set_filter(struct mac_device_info *hw, static void dwmac1000_flow_ctrl(struct mac_device_info *hw, unsigned int duplex, unsigned int fc, unsigned int pause_time, - u32 tx_cnt) + u8 tx_cnt) { void __iomem *ioaddr = hw->pcsr; /* Set flow such that DZPQ in Mac Register 6 is 0, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c index 6b5cf3a0866a..94d24d355d95 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c @@ -126,7 +126,7 @@ static void dwmac100_set_filter(struct mac_device_info *hw, static void dwmac100_flow_ctrl(struct mac_device_info *hw, unsigned int duplex, unsigned int fc, unsigned int pause_time, - u32 tx_cnt) + u8 tx_cnt) { void __iomem *ioaddr = hw->pcsr; unsigned int flow = MAC_FLOW_CTRL_ENABLE; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index e6bcb77b22a2..4c6fed3ecbcf 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -547,11 +547,11 @@ static void dwmac4_set_filter(struct mac_device_info *hw, static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex, unsigned int fc, unsigned int pause_time, - u32 tx_cnt) + u8 tx_cnt) { void __iomem *ioaddr = hw->pcsr; unsigned int flow = 0; - u32 queue = 0; + u8 queue; pr_debug("GMAC Flow-Control:\n"); if (fc & FLOW_RX) { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index efa76b147f9e..f02b434bbd50 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -355,10 +355,10 @@ static int dwxgmac2_host_mtl_irq_status(struct stmmac_priv *priv, static void dwxgmac2_flow_ctrl(struct mac_device_info *hw, unsigned int duplex, unsigned int fc, unsigned int pause_time, - u32 tx_cnt) + u8 tx_cnt) { void __iomem *ioaddr = hw->pcsr; - u32 i; + u8 i; if (fc & FLOW_RX) writel(XGMAC_RFE, ioaddr + XGMAC_RX_FLOW_CTRL); diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 374f326efa01..010b4d32484a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -352,7 +352,7 @@ struct stmmac_ops { void (*set_filter)(struct mac_device_info *hw, struct net_device *dev); /* Flow control setting */ void (*flow_ctrl)(struct mac_device_info *hw, unsigned int duplex, - unsigned int fc, unsigned int pause_time, u32 tx_cnt); + unsigned int fc, unsigned int pause_time, u8 tx_cnt); /* Set power management mode (e.g. magic frame) */ void (*pmt)(struct mac_device_info *hw, unsigned long mode); /* Set/Get Unicast MAC addresses */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 335e60439b42..bba9bb9c95bf 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -407,7 +407,7 @@ void stmmac_dvr_remove(struct device *dev); int stmmac_dvr_probe(struct device *device, struct plat_stmmacenet_data *plat_dat, struct stmmac_resources *res); -int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt); +int stmmac_reinit_queues(struct net_device *dev, u8 rx_cnt, u8 tx_cnt); int stmmac_reinit_ringparam(struct net_device *dev, u32 rx_size, u32 tx_size); int stmmac_set_clk_tx_rate(void *bsp_priv, struct clk *clk_tx_i, phy_interface_t interface, int speed); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 939431255fa5..11150bddd872 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -264,10 +264,10 @@ static void stmmac_verify_args(void) static void __stmmac_disable_all_queues(struct stmmac_priv *priv) { - u32 rx_queues_cnt = priv->plat->rx_queues_to_use; - u32 tx_queues_cnt = priv->plat->tx_queues_to_use; - u32 maxq = max(rx_queues_cnt, tx_queues_cnt); - u32 queue; + u8 rx_queues_cnt = priv->plat->rx_queues_to_use; + u8 tx_queues_cnt = priv->plat->tx_queues_to_use; + u8 maxq = max(rx_queues_cnt, tx_queues_cnt); + u8 queue; for (queue = 0; queue < maxq; queue++) { struct stmmac_channel *ch = &priv->channel[queue]; @@ -291,9 +291,9 @@ static void __stmmac_disable_all_queues(struct stmmac_priv *priv) */ static void stmmac_disable_all_queues(struct stmmac_priv *priv) { - u32 rx_queues_cnt = priv->plat->rx_queues_to_use; + u8 rx_queues_cnt = priv->plat->rx_queues_to_use; struct stmmac_rx_queue *rx_q; - u32 queue; + u8 queue; /* synchronize_rcu() needed for pending XDP buffers to drain */ for (queue = 0; queue < rx_queues_cnt; queue++) { @@ -313,10 +313,10 @@ static void stmmac_disable_all_queues(struct stmmac_priv *priv) */ static void stmmac_enable_all_queues(struct stmmac_priv *priv) { - u32 rx_queues_cnt = priv->plat->rx_queues_to_use; - u32 tx_queues_cnt = priv->plat->tx_queues_to_use; - u32 maxq = max(rx_queues_cnt, tx_queues_cnt); - u32 queue; + u8 rx_queues_cnt = priv->plat->rx_queues_to_use; + u8 tx_queues_cnt = priv->plat->tx_queues_to_use; + u8 maxq = max(rx_queues_cnt, tx_queues_cnt); + u8 queue; for (queue = 0; queue < maxq; queue++) { struct stmmac_channel *ch = &priv->channel[queue]; @@ -377,8 +377,8 @@ static inline u32 stmmac_rx_dirty(struct stmmac_priv *priv, u32 queue) static bool stmmac_eee_tx_busy(struct stmmac_priv *priv) { - u32 tx_cnt = priv->plat->tx_queues_to_use; - u32 queue; + u8 tx_cnt = priv->plat->tx_queues_to_use; + u8 queue; /* check if all TX queues have the work finished */ for (queue = 0; queue < tx_cnt; queue++) { @@ -909,7 +909,7 @@ static int stmmac_legacy_serdes_power_up(struct stmmac_priv *priv) static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex, unsigned int flow_ctrl) { - u32 tx_cnt = priv->plat->tx_queues_to_use; + u8 tx_cnt = priv->plat->tx_queues_to_use; stmmac_flow_ctrl(priv, priv->hw, duplex, flow_ctrl, priv->pause_time, tx_cnt); @@ -1410,10 +1410,10 @@ static int stmmac_phylink_setup(struct stmmac_priv *priv) static void stmmac_display_rx_rings(struct stmmac_priv *priv, struct stmmac_dma_conf *dma_conf) { - u32 rx_cnt = priv->plat->rx_queues_to_use; + u8 rx_cnt = priv->plat->rx_queues_to_use; unsigned int desc_size; void *head_rx; - u32 queue; + u8 queue; /* Display RX rings */ for (queue = 0; queue < rx_cnt; queue++) { @@ -1438,10 +1438,10 @@ static void stmmac_display_rx_rings(struct stmmac_priv *priv, static void stmmac_display_tx_rings(struct stmmac_priv *priv, struct stmmac_dma_conf *dma_conf) { - u32 tx_cnt = priv->plat->tx_queues_to_use; + u8 tx_cnt = priv->plat->tx_queues_to_use; unsigned int desc_size; void *head_tx; - u32 queue; + u8 queue; /* Display TX rings */ for (queue = 0; queue < tx_cnt; queue++) { @@ -1571,9 +1571,9 @@ static void stmmac_clear_tx_descriptors(struct stmmac_priv *priv, static void stmmac_clear_descriptors(struct stmmac_priv *priv, struct stmmac_dma_conf *dma_conf) { - u32 rx_queue_cnt = priv->plat->rx_queues_to_use; - u32 tx_queue_cnt = priv->plat->tx_queues_to_use; - u32 queue; + u8 rx_queue_cnt = priv->plat->rx_queues_to_use; + u8 tx_queue_cnt = priv->plat->tx_queues_to_use; + u8 queue; /* Clear the RX descriptors */ for (queue = 0; queue < rx_queue_cnt; queue++) @@ -1891,7 +1891,7 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags) { struct stmmac_priv *priv = netdev_priv(dev); - u32 rx_count = priv->plat->rx_queues_to_use; + u8 rx_count = priv->plat->rx_queues_to_use; int queue; int ret; @@ -1985,8 +1985,8 @@ static int init_dma_tx_desc_rings(struct net_device *dev, struct stmmac_dma_conf *dma_conf) { struct stmmac_priv *priv = netdev_priv(dev); - u32 tx_queue_cnt; - u32 queue; + u8 tx_queue_cnt; + u8 queue; tx_queue_cnt = priv->plat->tx_queues_to_use; @@ -2057,8 +2057,8 @@ static void dma_free_tx_skbufs(struct stmmac_priv *priv, */ static void stmmac_free_tx_skbufs(struct stmmac_priv *priv) { - u32 tx_queue_cnt = priv->plat->tx_queues_to_use; - u32 queue; + u8 tx_queue_cnt = priv->plat->tx_queues_to_use; + u8 queue; for (queue = 0; queue < tx_queue_cnt; queue++) dma_free_tx_skbufs(priv, &priv->dma_conf, queue); @@ -2106,8 +2106,8 @@ static void __free_dma_rx_desc_resources(struct stmmac_priv *priv, static void free_dma_rx_desc_resources(struct stmmac_priv *priv, struct stmmac_dma_conf *dma_conf) { - u32 rx_count = priv->plat->rx_queues_to_use; - u32 queue; + u8 rx_count = priv->plat->rx_queues_to_use; + u8 queue; /* Free RX queue resources */ for (queue = 0; queue < rx_count; queue++) @@ -2153,8 +2153,8 @@ static void __free_dma_tx_desc_resources(struct stmmac_priv *priv, static void free_dma_tx_desc_resources(struct stmmac_priv *priv, struct stmmac_dma_conf *dma_conf) { - u32 tx_count = priv->plat->tx_queues_to_use; - u32 queue; + u8 tx_count = priv->plat->tx_queues_to_use; + u8 queue; /* Free TX queue resources */ for (queue = 0; queue < tx_count; queue++) @@ -2255,8 +2255,8 @@ static int __alloc_dma_rx_desc_resources(struct stmmac_priv *priv, static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv, struct stmmac_dma_conf *dma_conf) { - u32 rx_count = priv->plat->rx_queues_to_use; - u32 queue; + u8 rx_count = priv->plat->rx_queues_to_use; + u8 queue; int ret; /* RX queues buffers and DMA */ @@ -2331,8 +2331,8 @@ static int __alloc_dma_tx_desc_resources(struct stmmac_priv *priv, static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv, struct stmmac_dma_conf *dma_conf) { - u32 tx_count = priv->plat->tx_queues_to_use; - u32 queue; + u8 tx_count = priv->plat->tx_queues_to_use; + u8 queue; int ret; /* TX queues buffers and DMA */ @@ -2396,8 +2396,8 @@ static void free_dma_desc_resources(struct stmmac_priv *priv, */ static void stmmac_mac_enable_rx_queues(struct stmmac_priv *priv) { - u32 rx_queues_count = priv->plat->rx_queues_to_use; - int queue; + u8 rx_queues_count = priv->plat->rx_queues_to_use; + u8 queue; u8 mode; for (queue = 0; queue < rx_queues_count; queue++) { @@ -2460,10 +2460,10 @@ static void stmmac_stop_tx_dma(struct stmmac_priv *priv, u32 chan) static void stmmac_enable_all_dma_irq(struct stmmac_priv *priv) { - u32 rx_channels_count = priv->plat->rx_queues_to_use; - u32 tx_channels_count = priv->plat->tx_queues_to_use; - u32 dma_csr_ch = max(rx_channels_count, tx_channels_count); - u32 chan; + u8 rx_channels_count = priv->plat->rx_queues_to_use; + u8 tx_channels_count = priv->plat->tx_queues_to_use; + u8 dma_csr_ch = max(rx_channels_count, tx_channels_count); + u8 chan; for (chan = 0; chan < dma_csr_ch; chan++) { struct stmmac_channel *ch = &priv->channel[chan]; @@ -2483,9 +2483,9 @@ static void stmmac_enable_all_dma_irq(struct stmmac_priv *priv) */ static void stmmac_start_all_dma(struct stmmac_priv *priv) { - u32 rx_channels_count = priv->plat->rx_queues_to_use; - u32 tx_channels_count = priv->plat->tx_queues_to_use; - u32 chan = 0; + u8 rx_channels_count = priv->plat->rx_queues_to_use; + u8 tx_channels_count = priv->plat->tx_queues_to_use; + u8 chan; for (chan = 0; chan < rx_channels_count; chan++) stmmac_start_rx_dma(priv, chan); @@ -2502,9 +2502,9 @@ static void stmmac_start_all_dma(struct stmmac_priv *priv) */ static void stmmac_stop_all_dma(struct stmmac_priv *priv) { - u32 rx_channels_count = priv->plat->rx_queues_to_use; - u32 tx_channels_count = priv->plat->tx_queues_to_use; - u32 chan = 0; + u8 rx_channels_count = priv->plat->rx_queues_to_use; + u8 tx_channels_count = priv->plat->tx_queues_to_use; + u8 chan; for (chan = 0; chan < rx_channels_count; chan++) stmmac_stop_rx_dma(priv, chan); @@ -2521,14 +2521,14 @@ static void stmmac_stop_all_dma(struct stmmac_priv *priv) */ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) { - u32 rx_channels_count = priv->plat->rx_queues_to_use; - u32 tx_channels_count = priv->plat->tx_queues_to_use; + u8 rx_channels_count = priv->plat->rx_queues_to_use; + u8 tx_channels_count = priv->plat->tx_queues_to_use; int rxfifosz = priv->plat->rx_fifo_size; int txfifosz = priv->plat->tx_fifo_size; u32 txmode = 0; u32 rxmode = 0; - u32 chan = 0; u8 qmode = 0; + u8 chan; if (rxfifosz == 0) rxfifosz = priv->dma_cap.rx_fifo_size; @@ -3012,8 +3012,8 @@ static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode, { u8 rxqmode = priv->plat->rx_queues_cfg[chan].mode_to_use; u8 txqmode = priv->plat->tx_queues_cfg[chan].mode_to_use; - u32 rx_channels_count = priv->plat->rx_queues_to_use; - u32 tx_channels_count = priv->plat->tx_queues_to_use; + u8 rx_channels_count = priv->plat->rx_queues_to_use; + u8 tx_channels_count = priv->plat->tx_queues_to_use; int rxfifosz = priv->plat->rx_fifo_size; int txfifosz = priv->plat->tx_fifo_size; @@ -3088,12 +3088,12 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan, u32 dir) */ static void stmmac_dma_interrupt(struct stmmac_priv *priv) { - u32 tx_channel_count = priv->plat->tx_queues_to_use; - u32 rx_channel_count = priv->plat->rx_queues_to_use; - u32 channels_to_check = tx_channel_count > rx_channel_count ? - tx_channel_count : rx_channel_count; - u32 chan; + u8 tx_channel_count = priv->plat->tx_queues_to_use; + u8 rx_channel_count = priv->plat->rx_queues_to_use; + u8 channels_to_check = tx_channel_count > rx_channel_count ? + tx_channel_count : rx_channel_count; int status[MAX_T(u32, MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES)]; + u8 chan; /* Make sure we never check beyond our status buffer. */ if (WARN_ON_ONCE(channels_to_check > ARRAY_SIZE(status))) @@ -3237,13 +3237,13 @@ static int stmmac_prereset_configure(struct stmmac_priv *priv) */ static int stmmac_init_dma_engine(struct stmmac_priv *priv) { - u32 rx_channels_count = priv->plat->rx_queues_to_use; - u32 tx_channels_count = priv->plat->tx_queues_to_use; - u32 dma_csr_ch = max(rx_channels_count, tx_channels_count); + u8 rx_channels_count = priv->plat->rx_queues_to_use; + u8 tx_channels_count = priv->plat->tx_queues_to_use; + u8 dma_csr_ch = max(rx_channels_count, tx_channels_count); struct stmmac_rx_queue *rx_q; struct stmmac_tx_queue *tx_q; - u32 chan = 0; int ret = 0; + u8 chan; ret = stmmac_prereset_configure(priv); if (ret) @@ -3359,9 +3359,9 @@ static enum hrtimer_restart stmmac_tx_timer(struct hrtimer *t) */ static void stmmac_init_coalesce(struct stmmac_priv *priv) { - u32 tx_channel_count = priv->plat->tx_queues_to_use; - u32 rx_channel_count = priv->plat->rx_queues_to_use; - u32 chan; + u8 tx_channel_count = priv->plat->tx_queues_to_use; + u8 rx_channel_count = priv->plat->rx_queues_to_use; + u8 chan; for (chan = 0; chan < tx_channel_count; chan++) { struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan]; @@ -3378,9 +3378,9 @@ static void stmmac_init_coalesce(struct stmmac_priv *priv) static void stmmac_set_rings_length(struct stmmac_priv *priv) { - u32 rx_channels_count = priv->plat->rx_queues_to_use; - u32 tx_channels_count = priv->plat->tx_queues_to_use; - u32 chan; + u8 rx_channels_count = priv->plat->rx_queues_to_use; + u8 tx_channels_count = priv->plat->tx_queues_to_use; + u8 chan; /* set TX ring length */ for (chan = 0; chan < tx_channels_count; chan++) @@ -3400,9 +3400,9 @@ static void stmmac_set_rings_length(struct stmmac_priv *priv) */ static void stmmac_set_tx_queue_weight(struct stmmac_priv *priv) { - u32 tx_queues_count = priv->plat->tx_queues_to_use; + u8 tx_queues_count = priv->plat->tx_queues_to_use; u32 weight; - u32 queue; + u8 queue; for (queue = 0; queue < tx_queues_count; queue++) { weight = priv->plat->tx_queues_cfg[queue].weight; @@ -3417,9 +3417,9 @@ static void stmmac_set_tx_queue_weight(struct stmmac_priv *priv) */ static void stmmac_configure_cbs(struct stmmac_priv *priv) { - u32 tx_queues_count = priv->plat->tx_queues_to_use; + u8 tx_queues_count = priv->plat->tx_queues_to_use; u32 mode_to_use; - u32 queue; + u8 queue; /* queue 0 is reserved for legacy traffic */ for (queue = 1; queue < tx_queues_count; queue++) { @@ -3443,8 +3443,8 @@ static void stmmac_configure_cbs(struct stmmac_priv *priv) */ static void stmmac_rx_queue_dma_chan_map(struct stmmac_priv *priv) { - u32 rx_queues_count = priv->plat->rx_queues_to_use; - u32 queue; + u8 rx_queues_count = priv->plat->rx_queues_to_use; + u8 queue; u32 chan; for (queue = 0; queue < rx_queues_count; queue++) { @@ -3460,8 +3460,8 @@ static void stmmac_rx_queue_dma_chan_map(struct stmmac_priv *priv) */ static void stmmac_mac_config_rx_queues_prio(struct stmmac_priv *priv) { - u32 rx_queues_count = priv->plat->rx_queues_to_use; - u32 queue; + u8 rx_queues_count = priv->plat->rx_queues_to_use; + u8 queue; u32 prio; for (queue = 0; queue < rx_queues_count; queue++) { @@ -3480,8 +3480,8 @@ static void stmmac_mac_config_rx_queues_prio(struct stmmac_priv *priv) */ static void stmmac_mac_config_tx_queues_prio(struct stmmac_priv *priv) { - u32 tx_queues_count = priv->plat->tx_queues_to_use; - u32 queue; + u8 tx_queues_count = priv->plat->tx_queues_to_use; + u8 queue; u32 prio; for (queue = 0; queue < tx_queues_count; queue++) { @@ -3500,9 +3500,9 @@ static void stmmac_mac_config_tx_queues_prio(struct stmmac_priv *priv) */ static void stmmac_mac_config_rx_queues_routing(struct stmmac_priv *priv) { - u32 rx_queues_count = priv->plat->rx_queues_to_use; - u32 queue; + u8 rx_queues_count = priv->plat->rx_queues_to_use; u8 packet; + u8 queue; for (queue = 0; queue < rx_queues_count; queue++) { /* no specific packet type routing specified for the queue */ @@ -3537,8 +3537,8 @@ static void stmmac_mac_config_rss(struct stmmac_priv *priv) */ static void stmmac_mtl_configuration(struct stmmac_priv *priv) { - u32 rx_queues_count = priv->plat->rx_queues_to_use; - u32 tx_queues_count = priv->plat->tx_queues_to_use; + u8 rx_queues_count = priv->plat->rx_queues_to_use; + u8 tx_queues_count = priv->plat->tx_queues_to_use; if (tx_queues_count > 1) stmmac_set_tx_queue_weight(priv); @@ -3606,10 +3606,10 @@ static void stmmac_safety_feat_configuration(struct stmmac_priv *priv) static int stmmac_hw_setup(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); - u32 rx_cnt = priv->plat->rx_queues_to_use; - u32 tx_cnt = priv->plat->tx_queues_to_use; + u8 rx_cnt = priv->plat->rx_queues_to_use; + u8 tx_cnt = priv->plat->tx_queues_to_use; bool sph_en; - u32 chan; + u8 chan; int ret; /* Make sure RX clock is enabled */ @@ -4001,7 +4001,8 @@ static struct stmmac_dma_conf * stmmac_setup_dma_desc(struct stmmac_priv *priv, unsigned int mtu) { struct stmmac_dma_conf *dma_conf; - int chan, bfsize, ret; + int bfsize, ret; + u8 chan; dma_conf = kzalloc_obj(*dma_conf); if (!dma_conf) { @@ -4076,7 +4077,7 @@ static int __stmmac_open(struct net_device *dev, struct stmmac_dma_conf *dma_conf) { struct stmmac_priv *priv = netdev_priv(dev); - u32 chan; + u8 chan; int ret; for (int i = 0; i < MTL_MAX_TX_QUEUES; i++) @@ -4175,7 +4176,7 @@ err_dma_resources: static void __stmmac_release(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); - u32 chan; + u8 chan; /* Stop and disconnect the PHY */ phylink_stop(priv->phylink); @@ -6123,7 +6124,7 @@ static int stmmac_set_features(struct net_device *netdev, if (priv->sph_capable) { bool sph_en = (priv->hw->rx_csum > 0) && priv->sph_active; - u32 chan; + u8 chan; for (chan = 0; chan < priv->plat->rx_queues_to_use; chan++) stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan); @@ -6143,11 +6144,11 @@ static int stmmac_set_features(struct net_device *netdev, static void stmmac_common_interrupt(struct stmmac_priv *priv) { - u32 rx_cnt = priv->plat->rx_queues_to_use; - u32 tx_cnt = priv->plat->tx_queues_to_use; - u32 queues_count; - u32 queue; + u8 rx_cnt = priv->plat->rx_queues_to_use; + u8 tx_cnt = priv->plat->tx_queues_to_use; + u8 queues_count; bool xmac; + u8 queue; xmac = dwmac_is_xmac(priv->plat->core_type); queues_count = (rx_cnt > tx_cnt) ? rx_cnt : tx_cnt; @@ -6445,9 +6446,9 @@ static int stmmac_rings_status_show(struct seq_file *seq, void *v) { struct net_device *dev = seq->private; struct stmmac_priv *priv = netdev_priv(dev); - u32 rx_count = priv->plat->rx_queues_to_use; - u32 tx_count = priv->plat->tx_queues_to_use; - u32 queue; + u8 rx_count = priv->plat->rx_queues_to_use; + u8 tx_count = priv->plat->tx_queues_to_use; + u8 queue; if ((dev->flags & IFF_UP) == 0) return 0; @@ -6572,9 +6573,9 @@ static int stmmac_dma_cap_show(struct seq_file *seq, void *v) priv->dma_cap.number_rx_channel); seq_printf(seq, "\tNumber of Additional TX channel: %d\n", priv->dma_cap.number_tx_channel); - seq_printf(seq, "\tNumber of Additional RX queues: %d\n", + seq_printf(seq, "\tNumber of Additional RX queues: %u\n", priv->dma_cap.number_rx_queues); - seq_printf(seq, "\tNumber of Additional TX queues: %d\n", + seq_printf(seq, "\tNumber of Additional TX queues: %u\n", priv->dma_cap.number_tx_queues); seq_printf(seq, "\tEnhanced descriptors: %s\n", (priv->dma_cap.enh_desc) ? "Y" : "N"); @@ -7043,7 +7044,7 @@ void stmmac_enable_tx_queue(struct stmmac_priv *priv, u32 queue) void stmmac_xdp_release(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); - u32 chan; + u8 chan; /* Ensure tx function is not running */ netif_tx_disable(dev); @@ -7076,14 +7077,14 @@ void stmmac_xdp_release(struct net_device *dev) int stmmac_xdp_open(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); - u32 rx_cnt = priv->plat->rx_queues_to_use; - u32 tx_cnt = priv->plat->tx_queues_to_use; - u32 dma_csr_ch = max(rx_cnt, tx_cnt); + u8 rx_cnt = priv->plat->rx_queues_to_use; + u8 tx_cnt = priv->plat->tx_queues_to_use; + u8 dma_csr_ch = max(rx_cnt, tx_cnt); struct stmmac_rx_queue *rx_q; struct stmmac_tx_queue *tx_q; u32 buf_size; bool sph_en; - u32 chan; + u8 chan; int ret; ret = alloc_dma_desc_resources(priv, &priv->dma_conf); @@ -7219,10 +7220,10 @@ int stmmac_xsk_wakeup(struct net_device *dev, u32 queue, u32 flags) static void stmmac_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct stmmac_priv *priv = netdev_priv(dev); - u32 tx_cnt = priv->plat->tx_queues_to_use; - u32 rx_cnt = priv->plat->rx_queues_to_use; + u8 tx_cnt = priv->plat->tx_queues_to_use; + u8 rx_cnt = priv->plat->rx_queues_to_use; unsigned int start; - int q; + u8 q; for (q = 0; q < tx_cnt; q++) { struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[q]; @@ -7511,7 +7512,7 @@ static int stmmac_hw_init(struct stmmac_priv *priv) static void stmmac_napi_add(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); - u32 queue, maxq; + u8 queue, maxq; maxq = max(priv->plat->rx_queues_to_use, priv->plat->tx_queues_to_use); @@ -7540,7 +7541,7 @@ static void stmmac_napi_add(struct net_device *dev) static void stmmac_napi_del(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); - u32 queue, maxq; + u8 queue, maxq; maxq = max(priv->plat->rx_queues_to_use, priv->plat->tx_queues_to_use); @@ -7558,7 +7559,7 @@ static void stmmac_napi_del(struct net_device *dev) } } -int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt) +int stmmac_reinit_queues(struct net_device *dev, u8 rx_cnt, u8 tx_cnt) { struct stmmac_priv *priv = netdev_priv(dev); int ret = 0, i; @@ -7763,8 +7764,8 @@ static int __stmmac_dvr_probe(struct device *device, { struct net_device *ndev = NULL; struct stmmac_priv *priv; - u32 rxq; int i, ret = 0; + u8 rxq; if (!plat_dat->dma_cfg || !plat_dat->dma_cfg->pbl) { dev_err(device, "invalid DMA configuration\n"); @@ -8147,7 +8148,7 @@ int stmmac_suspend(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct stmmac_priv *priv = netdev_priv(ndev); - u32 chan; + u8 chan; if (!ndev || !netif_running(ndev)) goto suspend_bsp; @@ -8222,9 +8223,9 @@ static void stmmac_reset_tx_queue(struct stmmac_priv *priv, u32 queue) */ static void stmmac_reset_queues_param(struct stmmac_priv *priv) { - u32 rx_cnt = priv->plat->rx_queues_to_use; - u32 tx_cnt = priv->plat->tx_queues_to_use; - u32 queue; + u8 rx_cnt = priv->plat->rx_queues_to_use; + u8 tx_cnt = priv->plat->tx_queues_to_use; + u8 queue; for (queue = 0; queue < rx_cnt; queue++) stmmac_reset_rx_queue(priv, queue); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 0d3bad0f8915..3b514a702612 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -138,6 +138,7 @@ static int stmmac_mtl_setup(struct platform_device *pdev, struct device_node *tx_node; u8 queue = 0; int ret = 0; + u32 value; /* First Queue must always be in DCB mode. As MTL_QUEUE_DCB = 1 we need * to always set this, otherwise Queue will be classified as AVB @@ -157,8 +158,11 @@ static int stmmac_mtl_setup(struct platform_device *pdev, } /* Processing RX queues common config */ - of_property_read_u32(rx_node, "snps,rx-queues-to-use", - &plat->rx_queues_to_use); + if (!of_property_read_u32(rx_node, "snps,rx-queues-to-use", &value)) { + if (value > U8_MAX) + value = U8_MAX; + plat->rx_queues_to_use = value; + } if (of_property_read_bool(rx_node, "snps,rx-sched-sp")) plat->rx_sched_algorithm = MTL_RX_ALGORITHM_SP; @@ -208,8 +212,11 @@ static int stmmac_mtl_setup(struct platform_device *pdev, } /* Processing TX queues common config */ - of_property_read_u32(tx_node, "snps,tx-queues-to-use", - &plat->tx_queues_to_use); + if (!of_property_read_u32(tx_node, "snps,tx-queues-to-use", &value)) { + if (value > U8_MAX) + value = U8_MAX; + plat->tx_queues_to_use = value; + } if (of_property_read_bool(tx_node, "snps,tx-sched-wrr")) plat->tx_sched_algorithm = MTL_TX_ALGORITHM_WRR; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 411cdd3ea034..03fd85060a73 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -244,8 +244,8 @@ struct plat_stmmacenet_data { int tx_fifo_size; int rx_fifo_size; u32 host_dma_width; - u32 rx_queues_to_use; - u32 tx_queues_to_use; + u8 rx_queues_to_use; + u8 tx_queues_to_use; u8 rx_sched_algorithm; u8 tx_sched_algorithm; struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES]; From 758ed85aadd0668c66cb359c63f384992b10938c Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 9 Mar 2026 09:39:39 +0000 Subject: [PATCH 0437/1561] net: stmmac: use u8 for host_dma_width and similar struct members We aren't going to see >= 256-bit address busses soon, so reduce host_dma_width and associated other struct members that initialise this from u32 to u8. Signed-off-by: Russell King (Oracle) Acked-by: Mohd Ayaan Anwar # qcom-ethqos Tested-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1vzX5P-0000000CVsK-0iwX@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c | 6 +++--- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 2 +- include/linux/stmmac.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index 9f5a15b81f8a..9d1bd72ffb73 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -42,8 +42,8 @@ struct imx_priv_data; struct imx_dwmac_ops { - u32 addr_width; u32 flags; + u8 addr_width; bool mac_rgmii_txclk_auto_adj; int (*fix_soc_reset)(struct stmmac_priv *priv); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index a139db6a8cbb..30ae0dba7fff 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -93,9 +93,9 @@ struct mediatek_dwmac_variant { const char * const *clk_list; int num_clks; - u32 dma_bit_mask; u32 rx_delay_max; u32 tx_delay_max; + u8 dma_bit_mask; }; /* list of clocks required for mac */ @@ -268,9 +268,9 @@ static const struct mediatek_dwmac_variant mt2712_gmac_variant = { .dwmac_set_delay = mt2712_set_delay, .clk_list = mt2712_dwmac_clk_l, .num_clks = ARRAY_SIZE(mt2712_dwmac_clk_l), - .dma_bit_mask = 33, .rx_delay_max = 17600, .tx_delay_max = 17600, + .dma_bit_mask = 33, }; static int mt8195_set_interface(struct mediatek_dwmac_plat_data *plat, @@ -418,9 +418,9 @@ static const struct mediatek_dwmac_variant mt8195_gmac_variant = { .dwmac_set_delay = mt8195_set_delay, .clk_list = mt8195_dwmac_clk_l, .num_clks = ARRAY_SIZE(mt8195_dwmac_clk_l), - .dma_bit_mask = 35, .rx_delay_max = 9280, .tx_delay_max = 9280, + .dma_bit_mask = 35, }; static int mediatek_dwmac_config_dt(struct mediatek_dwmac_plat_data *plat) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 388e9fdeb86c..3ccf20fdf52a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -91,8 +91,8 @@ struct ethqos_emac_driver_data { unsigned int num_rgmii_por; bool rgmii_config_loopback_en; bool has_emac_ge_3; + u8 dma_addr_width; const char *link_clk_name; - u32 dma_addr_width; struct dwmac4_addrs dwmac4_addrs; bool needs_sgmii_loopback; }; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 03fd85060a73..11886189bf51 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -243,7 +243,7 @@ struct plat_stmmacenet_data { int unicast_filter_entries; int tx_fifo_size; int rx_fifo_size; - u32 host_dma_width; + u8 host_dma_width; u8 rx_queues_to_use; u8 tx_queues_to_use; u8 rx_sched_algorithm; From 9fe167ab790b10c9eb9ef82f46a03c83f9953b61 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 9 Mar 2026 09:39:44 +0000 Subject: [PATCH 0438/1561] net: stmmac: add documentation for stmmac_dma_cfg members Add documentation of each of the struct stmmac_dma_cfg members. dche remains undocumented as I don't have documentation that covers this. Signed-off-by: Russell King (Oracle) Tested-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1vzX5U-0000000CVsQ-162V@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/stmmac.h | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 11886189bf51..1af3a5e197c9 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -93,16 +93,37 @@ struct stmmac_mdio_bus_data { }; struct stmmac_dma_cfg { + /* pbl: programmable burst limit + * txpbl: transmit programmable burst limit + * rxpbl: receive programmable burst limit + * If txpbl or rxpbl are zero, the value of pbl will be substituted. + * Range 0 - 63. + */ int pbl; int txpbl; int rxpbl; + /* pblx8: multiplies pbl, txpbl, rxpbl by a factor of 8 for dwmac >= + * 3.50a, or a factor of 4 for previous versions. + */ bool pblx8; + /* fixed_burst: + * when set, AXI bursts defined by axi_blen_regval are permitted. + * AHB uses SINGLE, INCR4, INCR8 or INCR16 during burst transfers. + * when clear, AXI and AHB use SINGLE or INCR bursts. + */ bool fixed_burst; + /* mixed_burst: + * when set and fixed_burst is clear, AHB uses INCR for bursts > 16 + * and SINGLE or INCRx for bursts <= 16. + */ bool mixed_burst; + /* aal: address aligned bursts for AHB and AXI master interface */ bool aal; - bool eame; - bool multi_msi_en; bool dche; + bool eame; + /* multi_msi_en: stmmac core internal */ + bool multi_msi_en; + /* atds: stmmac core internal */ bool atds; }; From 315bab9411f3bd3465a47a64a3e44323bfab60be Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 9 Mar 2026 09:39:49 +0000 Subject: [PATCH 0439/1561] net: stmmac: add documentation for clocks Add documentation covering stmmac_clk, pclk, clk_ptp_ref and clk_tx_i in the hope that this will help understand what each of these clocks are for. There is confusion around stmmac_clk and pclk which can't be easily resolved today as the Imagination Technologies Pistachio board that pclk was introduced for has no public documentation and is likely now obsolete. So the origins of pclk are lost to the winds of time. Signed-off-by: Russell King (Oracle) Tested-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1vzX5Z-0000000CVsb-1XTm@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/stmmac.h | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 1af3a5e197c9..937985276e6b 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -300,10 +300,41 @@ struct plat_stmmacenet_data { struct phylink_pcs *(*select_pcs)(struct stmmac_priv *priv, phy_interface_t interface); void *bsp_priv; + + /* stmmac clocks: + * stmmac_clk: CSR clock (which can be hclk_i, clk_csr_i, aclk_i, + * or clk_app_i depending on GMAC configuration). This clock + * generates the MDC clock. + * + * pclk: introduced for Imagination Technologies Pistachio board - + * see 5f9755d26fbf ("stmmac: Add an optional register interface + * clock"). This is probably used for cases where separate clocks + * are provided for the host interface and register interface. In + * this case, as the MDC clock is derived from stmmac_clk, pclk + * can only really be the "application clock" for the "host + * interface" and not the "register interface" aka CSR clock as + * it is never used when determining the divider for the MDC + * clock. + * + * clk_ptp_ref: optional PTP reference clock (clk_ptp_ref_i). When + * present, this clock increments the timestamp value. Otherwise, + * the rate of stmmac_clk will be used. + * + * clk_tx_i: MAC transmit clock, which will be 2.5MHz for 10M, + * 25MHz for 100M, or 125MHz for 1G irrespective of the interface + * mode. For the DWMAC PHY interface modes: + * + * GMII/MII PHY's transmit clock for 10M (2.5MHz) or 100M (25MHz), + * or 125MHz local clock for 1G mode + * RMII 50MHz RMII clock divided by 2 or 20. + * RGMII 125MHz local clock divided by 1, 5, or 50. + * SGMII 125MHz SerDes clock divided by 1, 5, or 50. + * TBI/RTBI 125MHz SerDes clock + */ struct clk *stmmac_clk; struct clk *pclk; struct clk *clk_ptp_ref; - struct clk *clk_tx_i; /* clk_tx_i to MAC core */ + struct clk *clk_tx_i; unsigned long clk_ptp_rate; unsigned long clk_ref_rate; struct clk_bulk_data *clks; From 34c0378b156f02f5fd8149f24a7aa75b255e8cda Mon Sep 17 00:00:00 2001 From: Soichiro Ueda Date: Tue, 10 Mar 2026 16:28:31 +0900 Subject: [PATCH 0440/1561] selftests: af_unix: validate SO_PEEK_OFF advancement and reset Extend the so_peek_off selftest to ensure the socket peek offset is handled correctly after both MSG_PEEK and actual data consumption. Verify that the peek offset advances by the same amount as the number of bytes read when performing a read with MSG_PEEK. After exercising SO_PEEK_OFF via MSG_PEEK, drain the receive queue with a non-peek recv() and verify that it can receive all the content in the buffer and SO_PEEK_OFF returns back to 0. The verification after actual data consumption was suggested by Miao Wang when the original so_peek_off selftest was introduced. Link: https://lore.kernel.org/all/7B657CC7-B5CA-46D2-8A4B-8AB5FB83C6DA@gmail.com/ Suggested-by: Miao Wang Reviewed-by: Willem de Bruijn Reviewed-by: Kuniyuki Iwashima Signed-off-by: Soichiro Ueda Link: https://patch.msgid.link/20260310072832.127848-1-the.latticeheart@gmail.com Signed-off-by: Jakub Kicinski --- .../selftests/net/af_unix/so_peek_off.c | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/tools/testing/selftests/net/af_unix/so_peek_off.c b/tools/testing/selftests/net/af_unix/so_peek_off.c index 86e7b0fb522d..f6466a717f49 100644 --- a/tools/testing/selftests/net/af_unix/so_peek_off.c +++ b/tools/testing/selftests/net/af_unix/so_peek_off.c @@ -76,6 +76,19 @@ FIXTURE_TEARDOWN(so_peek_off) ASSERT_STREQ(str, buf); \ } while (0) +#define peekoffeq(fd, expected) \ + do { \ + socklen_t optlen = sizeof(int); \ + int off = -1; \ + int ret; \ + \ + ret = getsockopt(fd, SOL_SOCKET, SO_PEEK_OFF, \ + &off, &optlen); \ + ASSERT_EQ(0, ret); \ + ASSERT_EQ((socklen_t)sizeof(off), optlen); \ + ASSERT_EQ(expected, off); \ + } while (0) + #define async \ for (pid_t pid = (pid = fork(), \ pid < 0 ? \ @@ -91,7 +104,12 @@ TEST_F(so_peek_off, single_chunk) sendeq(self->fd[0], "aaaabbbb", 0); recveq(self->fd[1], "aaaa", 4, MSG_PEEK); + peekoffeq(self->fd[1], 4); recveq(self->fd[1], "bbbb", 100, MSG_PEEK); + peekoffeq(self->fd[1], 8); + + recveq(self->fd[1], "aaaabbbb", 8, 0); + peekoffeq(self->fd[1], 0); } TEST_F(so_peek_off, two_chunks) @@ -100,7 +118,13 @@ TEST_F(so_peek_off, two_chunks) sendeq(self->fd[0], "bbbb", 0); recveq(self->fd[1], "aaaa", 4, MSG_PEEK); + peekoffeq(self->fd[1], 4); recveq(self->fd[1], "bbbb", 100, MSG_PEEK); + peekoffeq(self->fd[1], 8); + + recveq(self->fd[1], "aaaa", 4, 0); + recveq(self->fd[1], "bbbb", 4, 0); + peekoffeq(self->fd[1], 0); } TEST_F(so_peek_off, two_chunks_blocking) @@ -111,6 +135,7 @@ TEST_F(so_peek_off, two_chunks_blocking) } recveq(self->fd[1], "aaaa", 4, MSG_PEEK); + peekoffeq(self->fd[1], 4); async { usleep(1000); @@ -119,24 +144,38 @@ TEST_F(so_peek_off, two_chunks_blocking) /* goto again; -> goto redo; in unix_stream_read_generic(). */ recveq(self->fd[1], "bbbb", 100, MSG_PEEK); + peekoffeq(self->fd[1], 8); + + recveq(self->fd[1], "aaaa", 4, 0); + recveq(self->fd[1], "bbbb", 4, 0); + peekoffeq(self->fd[1], 0); } TEST_F(so_peek_off, two_chunks_overlap) { sendeq(self->fd[0], "aaaa", 0); recveq(self->fd[1], "aa", 2, MSG_PEEK); + peekoffeq(self->fd[1], 2); sendeq(self->fd[0], "bbbb", 0); if (variant->type == SOCK_STREAM) { /* SOCK_STREAM tries to fill the buffer. */ recveq(self->fd[1], "aabb", 4, MSG_PEEK); + peekoffeq(self->fd[1], 6); recveq(self->fd[1], "bb", 100, MSG_PEEK); + peekoffeq(self->fd[1], 8); } else { /* SOCK_DGRAM and SOCK_SEQPACKET returns at the skb boundary. */ recveq(self->fd[1], "aa", 100, MSG_PEEK); + peekoffeq(self->fd[1], 4); recveq(self->fd[1], "bbbb", 100, MSG_PEEK); + peekoffeq(self->fd[1], 8); } + + recveq(self->fd[1], "aaaa", 4, 0); + recveq(self->fd[1], "bbbb", 4, 0); + peekoffeq(self->fd[1], 0); } TEST_F(so_peek_off, two_chunks_overlap_blocking) @@ -147,6 +186,7 @@ TEST_F(so_peek_off, two_chunks_overlap_blocking) } recveq(self->fd[1], "aa", 2, MSG_PEEK); + peekoffeq(self->fd[1], 2); async { usleep(1000); @@ -155,8 +195,14 @@ TEST_F(so_peek_off, two_chunks_overlap_blocking) /* Even SOCK_STREAM does not wait if at least one byte is read. */ recveq(self->fd[1], "aa", 100, MSG_PEEK); + peekoffeq(self->fd[1], 4); recveq(self->fd[1], "bbbb", 100, MSG_PEEK); + peekoffeq(self->fd[1], 8); + + recveq(self->fd[1], "aaaa", 4, 0); + recveq(self->fd[1], "bbbb", 4, 0); + peekoffeq(self->fd[1], 0); } TEST_HARNESS_MAIN From 410593fec752f15c97062d2ded5e3a9dd654dcb2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Mar 2026 07:38:55 +0000 Subject: [PATCH 0441/1561] tcp: add sysctl_tcp_shrink_window to netns_ipv4_sysctl.rst Add missing entry for sysctl_tcp_shrink_window. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260310073855.564927-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst index cf284263e69b..6dbd97d435e9 100644 --- a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst +++ b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst @@ -105,6 +105,7 @@ u8 sysctl_tcp_nometrics_save u8 sysctl_tcp_no_ssthresh_metrics_save TCP_LAST_ACK/tcp_(update/init)_metrics u8 sysctl_tcp_moderate_rcvbuf read_mostly tcp_rcvbuf_grow() u32 sysctl_tcp_rcvbuf_low_rtt read_mostly tcp_rcvbuf_grow() +u8 sysctl_tcp_shrink_window read_mostly read_mostly __tcp_select_window() u8 sysctl_tcp_tso_win_divisor read_mostly tcp_tso_should_defer(tcp_write_xmit) u8 sysctl_tcp_workaround_signed_windows tcp_select_window int sysctl_tcp_limit_output_bytes read_mostly tcp_small_queue_check(tcp_write_xmit) From 7e27d6202e90bc1c2ff16cd3118cb5456214ee42 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 10 Mar 2026 18:10:32 +0000 Subject: [PATCH 0442/1561] selftests: net: local_termination: test link-local protocols Add tests to local_termination.sh to verify that link-local frames arrive. On some switches the DSA driver uses bridges to connect the user ports to their CPU ports. More "intelligent" switches typically don't forward link-local frames, but may trap them to an internal microcontroller. The driver may have to change trapping rules, so link-local frames end up on the DSA CPU ports instead of being silently dropped or trapped to the internal microcontroller of the switch. Add two tests which help to validate this has been done correctly: - Link-local STP BPDU should arrive at the Linux netdev when the bridge has STP disabled (BR_NO_STP), in which case the bridge forwards them rather than consuming them in the control plane - Link-local LLDP should arrive at standalone ports (and the test should be skipped on bridged ports similar to how it is done for the IEEE1588v2/PTP tests) Signed-off-by: Daniel Golle Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean Link: https://patch.msgid.link/1a67081b2ede1e6d2d32f7dd54ae9688f3566152.1773166131.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- .../net/forwarding/local_termination.sh | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index 1f2bf6e81847..9bc9d25e7136 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -100,6 +100,15 @@ PTP_1588_IPV6_PDELAY_REQ=" \ 00 00 3e 37 63 ff fe cf 17 0e 00 01 00 01 05 7f \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00" +LINK_LOCAL_STP_BPDU=" \ +01:80:c2:00:00:00 00:00:de:ad:be:ef 00 26 42 42 03 \ +00 00 00 00 00 80 00 aa bb cc dd ee ff 00 00 00 00 \ +80 00 aa bb cc dd ee ff 80 01 00 00 14 00 02 00 \ +0f 00" +LINK_LOCAL_LLDP=" \ +01:80:c2:00:00:0e 00:00:de:ad:be:ef 88:cc 02 07 04 \ +00 11 22 33 44 55 04 05 05 65 74 68 30 06 02 00 \ +78 00 00" # Disable promisc to ensure we don't receive unknown MAC DA packets export TCPDUMP_EXTRA_FLAGS="-pl" @@ -213,7 +222,15 @@ run_test() mc_route_destroy $rcv_if_name mc_route_destroy $send_if_name + ip maddress add 01:80:c2:00:00:00 dev $rcv_if_name + send_raw $send_if_name "$LINK_LOCAL_STP_BPDU" + ip maddress del 01:80:c2:00:00:00 dev $rcv_if_name + if [ $skip_ptp = false ]; then + ip maddress add 01:80:c2:00:00:0e dev $rcv_if_name + send_raw $send_if_name "$LINK_LOCAL_LLDP" + ip maddress del 01:80:c2:00:00:0e dev $rcv_if_name + ip maddress add 01:1b:19:00:00:00 dev $rcv_if_name send_raw $send_if_name "$PTP_1588_L2_SYNC" send_raw $send_if_name "$PTP_1588_L2_FOLLOW_UP" @@ -304,7 +321,15 @@ run_test() "$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \ true "$test_name" + check_rcv $rcv_if_name "Link-local STP BPDU" \ + "> 01:80:c2:00:00:00" \ + true "$test_name" + if [ $skip_ptp = false ]; then + check_rcv $rcv_if_name "Link-local LLDP" \ + "> 01:80:c2:00:00:0e" \ + true "$test_name" + check_rcv $rcv_if_name "1588v2 over L2 transport, Sync" \ "ethertype PTP (0x88f7).* PTPv2.* msg type *: sync msg" \ true "$test_name" From f97977944d1587fc01736da8b138d7bb9c526d02 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 10 Mar 2026 18:12:07 +0800 Subject: [PATCH 0443/1561] net: macb: Clean up the .clk_init setting in the macb_config instances All instances of macb_config currently have .clk_init set, but most of them use macb_clk_init(). In fact, there is no need to duplicate this across all macb_config instances. Introduce a new macb_clk_init() function that executes the specific .clk_init if it is set; otherwise, it runs the default clock initialization function. Signed-off-by: Kevin Hao Link: https://patch.msgid.link/20260310-macb-cleanup-v1-1-928c1a91a7dc@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 41 +++++++++++------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 3dcae4d5f74c..82d3f6017287 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4548,9 +4548,9 @@ static void macb_clks_disable(struct clk *pclk, struct clk *hclk, struct clk *tx clk_bulk_disable_unprepare(ARRAY_SIZE(clks), clks); } -static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, - struct clk **hclk, struct clk **tx_clk, - struct clk **rx_clk, struct clk **tsu_clk) +static int macb_clk_init_dflt(struct platform_device *pdev, struct clk **pclk, + struct clk **hclk, struct clk **tx_clk, + struct clk **rx_clk, struct clk **tsu_clk) { struct macb_platform_data *pdata; int err; @@ -4633,6 +4633,19 @@ err_disable_pclk: return err; } +static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, + struct clk **hclk, struct clk **tx_clk, + struct clk **rx_clk, struct clk **tsu_clk, + const struct macb_config *config) +{ + if (config->clk_init) + return config->clk_init(pdev, pclk, hclk, tx_clk, rx_clk, + tsu_clk); + else + return macb_clk_init_dflt(pdev, pclk, hclk, tx_clk, rx_clk, + tsu_clk); +} + static int macb_init(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); @@ -5241,7 +5254,7 @@ static int fu540_c000_clk_init(struct platform_device *pdev, struct clk **pclk, struct clk_init_data init; int err = 0; - err = macb_clk_init(pdev, pclk, hclk, tx_clk, rx_clk, tsu_clk); + err = macb_clk_init_dflt(pdev, pclk, hclk, tx_clk, rx_clk, tsu_clk); if (err) return err; @@ -5389,7 +5402,6 @@ static const struct macb_config fu540_c000_config = { static const struct macb_config at91sam9260_config = { .caps = MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII, - .clk_init = macb_clk_init, .init = macb_init, .usrio = &macb_default_usrio, }; @@ -5397,7 +5409,6 @@ static const struct macb_config at91sam9260_config = { static const struct macb_config sama5d3macb_config = { .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII, - .clk_init = macb_clk_init, .init = macb_init, .usrio = &macb_default_usrio, }; @@ -5405,7 +5416,6 @@ static const struct macb_config sama5d3macb_config = { static const struct macb_config pc302gem_config = { .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE, .dma_burst_length = 16, - .clk_init = macb_clk_init, .init = macb_init, .usrio = &macb_default_usrio, }; @@ -5413,7 +5423,6 @@ static const struct macb_config pc302gem_config = { static const struct macb_config sama5d2_config = { .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_JUMBO, .dma_burst_length = 16, - .clk_init = macb_clk_init, .init = macb_init, .jumbo_max_len = 10240, .usrio = &macb_default_usrio, @@ -5422,7 +5431,6 @@ static const struct macb_config sama5d2_config = { static const struct macb_config sama5d29_config = { .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, - .clk_init = macb_clk_init, .init = macb_init, .usrio = &macb_default_usrio, }; @@ -5431,7 +5439,6 @@ static const struct macb_config sama5d3_config = { .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_JUMBO, .dma_burst_length = 16, - .clk_init = macb_clk_init, .init = macb_init, .jumbo_max_len = 10240, .usrio = &macb_default_usrio, @@ -5440,7 +5447,6 @@ static const struct macb_config sama5d3_config = { static const struct macb_config sama5d4_config = { .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII, .dma_burst_length = 4, - .clk_init = macb_clk_init, .init = macb_init, .usrio = &macb_default_usrio, }; @@ -5454,7 +5460,6 @@ static const struct macb_config emac_config = { static const struct macb_config np4_config = { .caps = MACB_CAPS_USRIO_DISABLED, - .clk_init = macb_clk_init, .init = macb_init, .usrio = &macb_default_usrio, }; @@ -5464,7 +5469,6 @@ static const struct macb_config zynqmp_config = { MACB_CAPS_JUMBO | MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH, .dma_burst_length = 16, - .clk_init = macb_clk_init, .init = init_reset_optional, .jumbo_max_len = 10240, .usrio = &macb_default_usrio, @@ -5474,7 +5478,6 @@ static const struct macb_config zynq_config = { .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_NO_GIGABIT_HALF | MACB_CAPS_NEEDS_RSTONUBR, .dma_burst_length = 16, - .clk_init = macb_clk_init, .init = macb_init, .usrio = &macb_default_usrio, }; @@ -5484,7 +5487,6 @@ static const struct macb_config mpfs_config = { MACB_CAPS_JUMBO | MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, - .clk_init = macb_clk_init, .init = init_reset_optional, .usrio = &macb_default_usrio, .max_tx_length = 4040, /* Cadence Erratum 1686 */ @@ -5496,7 +5498,6 @@ static const struct macb_config sama7g5_gem_config = { MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_MIIONRGMII | MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, - .clk_init = macb_clk_init, .init = macb_init, .usrio = &sama7g5_usrio, }; @@ -5506,7 +5507,6 @@ static const struct macb_config sama7g5_emac_config = { MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_MIIONRGMII | MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, - .clk_init = macb_clk_init, .init = macb_init, .usrio = &sama7g5_usrio, }; @@ -5517,7 +5517,6 @@ static const struct macb_config versal_config = { MACB_CAPS_NEED_TSUCLK | MACB_CAPS_QUEUE_DISABLE | MACB_CAPS_QBV, .dma_burst_length = 16, - .clk_init = macb_clk_init, .init = init_reset_optional, .jumbo_max_len = 10240, .usrio = &macb_default_usrio, @@ -5528,7 +5527,6 @@ static const struct macb_config eyeq5_config = { MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_QUEUE_DISABLE | MACB_CAPS_NO_LSO | MACB_CAPS_EEE, .dma_burst_length = 16, - .clk_init = macb_clk_init, .init = eyeq5_init, .jumbo_max_len = 10240, .usrio = &macb_default_usrio, @@ -5540,7 +5538,6 @@ static const struct macb_config raspberrypi_rp1_config = { MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_EEE, .dma_burst_length = 16, - .clk_init = macb_clk_init, .init = macb_init, .usrio = &macb_default_usrio, .jumbo_max_len = 10240, @@ -5581,7 +5578,6 @@ static const struct macb_config default_gem_config = { MACB_CAPS_JUMBO | MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, - .clk_init = macb_clk_init, .init = macb_init, .usrio = &macb_default_usrio, .jumbo_max_len = 10240, @@ -5611,7 +5607,8 @@ static int macb_probe(struct platform_device *pdev) if (!macb_config) macb_config = &default_gem_config; - err = macb_config->clk_init(pdev, &pclk, &hclk, &tx_clk, &rx_clk, &tsu_clk); + err = macb_clk_init(pdev, &pclk, &hclk, &tx_clk, &rx_clk, &tsu_clk, + macb_config); if (err) return err; From 9179711ee2f70f3ba1d56d5c2e9fce04fb754198 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 10 Mar 2026 18:12:08 +0800 Subject: [PATCH 0444/1561] net: macb: Clean up the .init settings in macb_config instances All instances of macb_config currently have the .init field set, but most of them use macb_init(). In fact, there is no need to duplicate this across all macb_config instances. Introduce a new macb_init() function that executes the specific .init if it is set; otherwise, it runs a default initialization function. Signed-off-by: Kevin Hao Link: https://patch.msgid.link/20260310-macb-cleanup-v1-2-928c1a91a7dc@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 32 +++++++++++------------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 82d3f6017287..27ec1f16c4f1 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4646,7 +4646,7 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, tsu_clk); } -static int macb_init(struct platform_device *pdev) +static int macb_init_dflt(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); unsigned int hw_q, q; @@ -4801,6 +4801,15 @@ static int macb_init(struct platform_device *pdev) return 0; } +static int macb_init(struct platform_device *pdev, + const struct macb_config *config) +{ + if (config->init) + return config->init(pdev); + else + return macb_init_dflt(pdev); +} + static const struct macb_usrio_config macb_default_usrio = { .mii = MACB_BIT(MII), .rmii = MACB_BIT(RMII), @@ -5301,7 +5310,7 @@ static int fu540_c000_init(struct platform_device *pdev) if (IS_ERR(mgmt->reg)) return PTR_ERR(mgmt->reg); - return macb_init(pdev); + return macb_init_dflt(pdev); } static int init_reset_optional(struct platform_device *pdev) @@ -5351,7 +5360,7 @@ static int init_reset_optional(struct platform_device *pdev) return dev_err_probe(&pdev->dev, ret, "failed to reset controller"); } - ret = macb_init(pdev); + ret = macb_init_dflt(pdev); err_out_phy_exit: if (ret) @@ -5376,7 +5385,7 @@ static int eyeq5_init(struct platform_device *pdev) if (ret) return dev_err_probe(dev, ret, "failed to init PHY\n"); - ret = macb_init(pdev); + ret = macb_init_dflt(pdev); if (ret) phy_exit(bp->phy); return ret; @@ -5402,28 +5411,24 @@ static const struct macb_config fu540_c000_config = { static const struct macb_config at91sam9260_config = { .caps = MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII, - .init = macb_init, .usrio = &macb_default_usrio, }; static const struct macb_config sama5d3macb_config = { .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII, - .init = macb_init, .usrio = &macb_default_usrio, }; static const struct macb_config pc302gem_config = { .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE, .dma_burst_length = 16, - .init = macb_init, .usrio = &macb_default_usrio, }; static const struct macb_config sama5d2_config = { .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_JUMBO, .dma_burst_length = 16, - .init = macb_init, .jumbo_max_len = 10240, .usrio = &macb_default_usrio, }; @@ -5431,7 +5436,6 @@ static const struct macb_config sama5d2_config = { static const struct macb_config sama5d29_config = { .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, - .init = macb_init, .usrio = &macb_default_usrio, }; @@ -5439,7 +5443,6 @@ static const struct macb_config sama5d3_config = { .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_JUMBO, .dma_burst_length = 16, - .init = macb_init, .jumbo_max_len = 10240, .usrio = &macb_default_usrio, }; @@ -5447,7 +5450,6 @@ static const struct macb_config sama5d3_config = { static const struct macb_config sama5d4_config = { .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII, .dma_burst_length = 4, - .init = macb_init, .usrio = &macb_default_usrio, }; @@ -5460,7 +5462,6 @@ static const struct macb_config emac_config = { static const struct macb_config np4_config = { .caps = MACB_CAPS_USRIO_DISABLED, - .init = macb_init, .usrio = &macb_default_usrio, }; @@ -5478,7 +5479,6 @@ static const struct macb_config zynq_config = { .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_NO_GIGABIT_HALF | MACB_CAPS_NEEDS_RSTONUBR, .dma_burst_length = 16, - .init = macb_init, .usrio = &macb_default_usrio, }; @@ -5498,7 +5498,6 @@ static const struct macb_config sama7g5_gem_config = { MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_MIIONRGMII | MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, - .init = macb_init, .usrio = &sama7g5_usrio, }; @@ -5507,7 +5506,6 @@ static const struct macb_config sama7g5_emac_config = { MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_MIIONRGMII | MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, - .init = macb_init, .usrio = &sama7g5_usrio, }; @@ -5538,7 +5536,6 @@ static const struct macb_config raspberrypi_rp1_config = { MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_EEE, .dma_burst_length = 16, - .init = macb_init, .usrio = &macb_default_usrio, .jumbo_max_len = 10240, }; @@ -5578,7 +5575,6 @@ static const struct macb_config default_gem_config = { MACB_CAPS_JUMBO | MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, - .init = macb_init, .usrio = &macb_default_usrio, .jumbo_max_len = 10240, }; @@ -5748,7 +5744,7 @@ static int macb_probe(struct platform_device *pdev) bp->phy_interface = interface; /* IP specific init */ - err = macb_config->init(pdev); + err = macb_init(pdev, macb_config); if (err) goto err_out_free_netdev; From 0ae998c4efd69fd5e331db7844b8cfaf07d47aea Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 10 Mar 2026 18:12:09 +0800 Subject: [PATCH 0445/1561] net: macb: Clean up the .usrio settings in macb_config instances All instances of macb_config currently have the .usrio set, but most of them use &macb_default_usrio. In fact, there is no need to duplicate this across all macb_config instances. Remove the .usrio setting from instances that use &macb_default_usrio, and ensure that the default is selected at runtime when no other value is explicitly set. Signed-off-by: Kevin Hao Link: https://patch.msgid.link/20260310-macb-cleanup-v1-3-928c1a91a7dc@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 27ec1f16c4f1..0524109d4753 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5406,37 +5406,31 @@ static const struct macb_config fu540_c000_config = { .clk_init = fu540_c000_clk_init, .init = fu540_c000_init, .jumbo_max_len = 10240, - .usrio = &macb_default_usrio, }; static const struct macb_config at91sam9260_config = { .caps = MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII, - .usrio = &macb_default_usrio, }; static const struct macb_config sama5d3macb_config = { .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII, - .usrio = &macb_default_usrio, }; static const struct macb_config pc302gem_config = { .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE, .dma_burst_length = 16, - .usrio = &macb_default_usrio, }; static const struct macb_config sama5d2_config = { .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_JUMBO, .dma_burst_length = 16, .jumbo_max_len = 10240, - .usrio = &macb_default_usrio, }; static const struct macb_config sama5d29_config = { .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, - .usrio = &macb_default_usrio, }; static const struct macb_config sama5d3_config = { @@ -5444,25 +5438,21 @@ static const struct macb_config sama5d3_config = { MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_JUMBO, .dma_burst_length = 16, .jumbo_max_len = 10240, - .usrio = &macb_default_usrio, }; static const struct macb_config sama5d4_config = { .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII, .dma_burst_length = 4, - .usrio = &macb_default_usrio, }; static const struct macb_config emac_config = { .caps = MACB_CAPS_NEEDS_RSTONUBR | MACB_CAPS_MACB_IS_EMAC, .clk_init = at91ether_clk_init, .init = at91ether_init, - .usrio = &macb_default_usrio, }; static const struct macb_config np4_config = { .caps = MACB_CAPS_USRIO_DISABLED, - .usrio = &macb_default_usrio, }; static const struct macb_config zynqmp_config = { @@ -5472,14 +5462,12 @@ static const struct macb_config zynqmp_config = { .dma_burst_length = 16, .init = init_reset_optional, .jumbo_max_len = 10240, - .usrio = &macb_default_usrio, }; static const struct macb_config zynq_config = { .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_NO_GIGABIT_HALF | MACB_CAPS_NEEDS_RSTONUBR, .dma_burst_length = 16, - .usrio = &macb_default_usrio, }; static const struct macb_config mpfs_config = { @@ -5488,7 +5476,6 @@ static const struct macb_config mpfs_config = { MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, .init = init_reset_optional, - .usrio = &macb_default_usrio, .max_tx_length = 4040, /* Cadence Erratum 1686 */ .jumbo_max_len = 4040, }; @@ -5517,7 +5504,6 @@ static const struct macb_config versal_config = { .dma_burst_length = 16, .init = init_reset_optional, .jumbo_max_len = 10240, - .usrio = &macb_default_usrio, }; static const struct macb_config eyeq5_config = { @@ -5527,7 +5513,6 @@ static const struct macb_config eyeq5_config = { .dma_burst_length = 16, .init = eyeq5_init, .jumbo_max_len = 10240, - .usrio = &macb_default_usrio, }; static const struct macb_config raspberrypi_rp1_config = { @@ -5536,7 +5521,6 @@ static const struct macb_config raspberrypi_rp1_config = { MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_EEE, .dma_burst_length = 16, - .usrio = &macb_default_usrio, .jumbo_max_len = 10240, }; @@ -5575,7 +5559,6 @@ static const struct macb_config default_gem_config = { MACB_CAPS_JUMBO | MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, - .usrio = &macb_default_usrio, .jumbo_max_len = 10240, }; @@ -5662,7 +5645,7 @@ static int macb_probe(struct platform_device *pdev) bp->wol = 0; device_set_wakeup_capable(&pdev->dev, 1); - bp->usrio = macb_config->usrio; + bp->usrio = macb_config->usrio ? : &macb_default_usrio; /* By default we set to partial store and forward mode for zynqmp. * Disable if not set in devicetree. From 82562972b85469e23fb787f78c1dea6ad6b16af4 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 10 Mar 2026 13:58:02 +0200 Subject: [PATCH 0446/1561] selftests: net: pass bpftrace timeout to cmd() The bpftrace() helper configures an interval based exit timer but does not propagate the timeout to the cmd object, which defaults to 5 seconds. Since the default BPFTRACE_TIMEOUT is 10 seconds, cmd.process() always raises a TimeoutExpired exception before bpftrace has a chance to exit gracefully. Pass timeout+5 to cmd() to allow bpftrace to complete gracefully. Note: this issue is masked by a bug in the way cmd() passes timeout, this is fixed in the next commit. Reviewed-by: Nimrod Oren Signed-off-by: Gal Pressman Link: https://patch.msgid.link/20260310115803.2521050-2-gal@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib/py/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 5ffd9fb2a8ce..a6d7fe15e6e4 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -258,8 +258,9 @@ def bpftrace(expr, json=None, ns=None, host=None, timeout=None): cmd_arr += ['-f', 'json', '-q'] if timeout: expr += ' interval:s:' + str(timeout) + ' { exit(); }' + timeout += 5 cmd_arr += ['-e', expr] - cmd_obj = cmd(cmd_arr, ns=ns, host=host, shell=False) + cmd_obj = cmd(cmd_arr, ns=ns, host=host, shell=False, timeout=timeout) if json: # bpftrace prints objects as lines ret = {} From f0bd19316663710de157e85afd62058312aa97e1 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 10 Mar 2026 13:58:03 +0200 Subject: [PATCH 0447/1561] selftests: net: fix timeout passed as positional argument to communicate() The cited commit refactored the hardcoded timeout=5 into a parameter, but dropped the keyword from the communicate() call. Since Popen.communicate()'s first positional argument is 'input' (not 'timeout'), the timeout value is silently treated as stdin input and the call never enforces a timeout. Pass timeout as a keyword argument to restore the intended behavior. Reviewed-by: Nimrod Oren Signed-off-by: Gal Pressman Link: https://patch.msgid.link/20260310115803.2521050-3-gal@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib/py/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index a6d7fe15e6e4..17adc2b67ee0 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -93,7 +93,7 @@ class cmd: def _process_terminate(self, terminate, timeout): if terminate: self.proc.terminate() - stdout, stderr = self.proc.communicate(timeout) + stdout, stderr = self.proc.communicate(timeout=timeout) self.stdout = stdout.decode("utf-8") self.stderr = stderr.decode("utf-8") self.proc.stdout.close() From dc9902bbd480aae510b885b67cd30cd04cfce3a8 Mon Sep 17 00:00:00 2001 From: Wesley Atwell Date: Mon, 9 Mar 2026 19:26:04 -0600 Subject: [PATCH 0448/1561] tcp: use WRITE_ONCE() for tsoffset in tcp_v6_connect() Commit dd23c9f1e8d5 ("tcp: annotate data-races around tp->tsoffset") updated do_tcp_getsockopt() to read tp->tsoffset with READ_ONCE() for TCP_TIMESTAMP because another CPU may change it concurrently. tcp_v6_connect() still stores tp->tsoffset with a plain write. That store runs under lock_sock() via inet_stream_connect(), but the socket lock does not serialize a concurrent getsockopt(TCP_TIMESTAMP) from another task sharing the socket. Use WRITE_ONCE() for the tcp_v6_connect() store so the connect-time writer matches the lockless TCP_TIMESTAMP reader. This also makes the IPv6 path consistent with tcp_v4_connect(). Signed-off-by: Wesley Atwell Reviewed-by: Eric Dumazet Reviewed-by: Jiayuan Chen Link: https://patch.msgid.link/20260310012604.145661-1-atwellwea@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/tcp_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 074b83c9a551..8dc3874e8b92 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -325,7 +325,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, inet->inet_dport); if (!tp->write_seq) WRITE_ONCE(tp->write_seq, st.seq); - tp->tsoffset = st.ts_off; + WRITE_ONCE(tp->tsoffset, st.ts_off); } if (tcp_fastopen_defer_connect(sk, &err)) From 17edc4e820bf8b4c7737c1de86c267e6974d543a Mon Sep 17 00:00:00 2001 From: "Christophe Leroy (CS GROUP)" Date: Tue, 10 Mar 2026 12:33:36 +0100 Subject: [PATCH 0449/1561] net: Convert move_addr_to_user() to scoped user access move_addr_to_user() is a critical functions that was converted to masked user access by commit 1fb0e471611d ("net: remove one stac/clac pair from move_addr_to_user()") Convert it to scoped user access to simplify the code. Signed-off-by: Christophe Leroy (CS GROUP) Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/36d7f2e7f504d620c1b88526b25ebc89e3cb61d9.1773142315.git.chleroy@kernel.org Signed-off-by: Jakub Kicinski --- net/socket.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/net/socket.c b/net/socket.c index 68829d09bcf1..ade2ff5845a0 100644 --- a/net/socket.c +++ b/net/socket.c @@ -280,23 +280,18 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen, BUG_ON(klen > sizeof(struct sockaddr_storage)); - if (can_do_masked_user_access()) - ulen = masked_user_access_begin(ulen); - else if (!user_access_begin(ulen, 4)) - return -EFAULT; + scoped_user_rw_access_size(ulen, 4, efault_end) { + unsafe_get_user(len, ulen, efault_end); - unsafe_get_user(len, ulen, efault_end); - - if (len > klen) - len = klen; - /* - * "fromlen shall refer to the value before truncation.." - * 1003.1g - */ - if (len >= 0) - unsafe_put_user(klen, ulen, efault_end); - - user_access_end(); + if (len > klen) + len = klen; + /* + * "fromlen shall refer to the value before truncation.." + * 1003.1g + */ + if (len >= 0) + unsafe_put_user(klen, ulen, efault_end); + } if (len) { if (len < 0) @@ -309,7 +304,6 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen, return 0; efault_end: - user_access_end(); return -EFAULT; } From 0de607dc4fd80ede3b2a35e8a72f99c7a0bbc321 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 4 Mar 2026 23:00:27 +0000 Subject: [PATCH 0450/1561] vsock: add G2H fallback for CIDs not owned by H2G transport When no H2G transport is loaded, vsock currently routes all CIDs to the G2H transport (commit 65b422d9b61b ("vsock: forward all packets to the host when no H2G is registered"). Extend that existing behavior: when an H2G transport is loaded but does not claim a given CID, the connection falls back to G2H in the same way. This matters in environments like Nitro Enclaves, where an instance may run nested VMs via vhost-vsock (H2G) while also needing to reach sibling enclaves at higher CIDs through virtio-vsock-pci (G2H). With the old code, any CID > 2 was unconditionally routed to H2G when vhost was loaded, making those enclaves unreachable without setting VMADDR_FLAG_TO_HOST explicitly on every connect. Requiring every application to set VMADDR_FLAG_TO_HOST creates friction: tools like socat, iperf, and others would all need to learn about it. The flag was introduced 6 years ago and I am still not aware of any tool that supports it. Even if there was support, it would be cumbersome to use. The most natural experience is a single CID address space where H2G only wins for CIDs it actually owns, and everything else falls through to G2H, extending the behavior that already exists when H2G is absent. To give user space at least a hint that the kernel applied this logic, automatically set the VMADDR_FLAG_TO_HOST on the remote address so it can determine the path taken via getpeername(). Add a per-network namespace sysctl net.vsock.g2h_fallback (default 1). At 0 it forces strict routing: H2G always wins for CID > VMADDR_CID_HOST, or ENODEV if H2G is not loaded. Signed-off-by: Alexander Graf Tested-by: syzbot@syzkaller.appspotmail.com Reviewed-by: Stefano Garzarella Link: https://patch.msgid.link/20260304230027.59857-1-graf@amazon.com Signed-off-by: Paolo Abeni --- Documentation/admin-guide/sysctl/net.rst | 28 +++++++++++++++++++ drivers/vhost/vsock.c | 13 +++++++++ include/net/af_vsock.h | 9 ++++++ include/net/netns/vsock.h | 2 ++ net/vmw_vsock/af_vsock.c | 35 ++++++++++++++++++++---- net/vmw_vsock/virtio_transport.c | 7 +++++ 6 files changed, 89 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 3b2ad61995d4..0724a793798f 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -602,3 +602,31 @@ it does not modify the current namespace or any existing children. A namespace with ``ns_mode`` set to ``local`` cannot change ``child_ns_mode`` to ``global`` (returns ``-EPERM``). + +g2h_fallback +------------ + +Controls whether connections to CIDs not owned by the host-to-guest (H2G) +transport automatically fall back to the guest-to-host (G2H) transport. + +When enabled, if a connect targets a CID that the H2G transport (e.g. +vhost-vsock) does not serve, or if no H2G transport is loaded at all, the +connection is routed via the G2H transport (e.g. virtio-vsock) instead. This +allows a host running both nested VMs (via vhost-vsock) and sibling VMs +reachable through the hypervisor (e.g. Nitro Enclaves) to address both using +a single CID space, without requiring applications to set +``VMADDR_FLAG_TO_HOST``. + +When the fallback is taken, ``VMADDR_FLAG_TO_HOST`` is automatically set on +the remote address so that userspace can determine the path via +``getpeername()``. + +Note: With this sysctl enabled, user space that attempts to talk to a guest +CID which is not implemented by the H2G transport will create host vsock +traffic. Environments that rely on H2G-only isolation should set it to 0. + +Values: + + - 0 - Connections to CIDs <= 2 or with VMADDR_FLAG_TO_HOST use G2H; + all others use H2G (or fail with ENODEV if H2G is not loaded). + - 1 - Connections to CIDs not owned by H2G fall back to G2H. (default) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 054f7a718f50..1d8ec6bed53e 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -91,6 +91,18 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid, struct net *net) return NULL; } +static bool vhost_transport_has_remote_cid(struct vsock_sock *vsk, u32 cid) +{ + struct sock *sk = sk_vsock(vsk); + struct net *net = sock_net(sk); + bool found; + + rcu_read_lock(); + found = !!vhost_vsock_get(cid, net); + rcu_read_unlock(); + return found; +} + static void vhost_transport_do_send_pkt(struct vhost_vsock *vsock, struct vhost_virtqueue *vq) @@ -424,6 +436,7 @@ static struct virtio_transport vhost_transport = { .module = THIS_MODULE, .get_local_cid = vhost_transport_get_local_cid, + .has_remote_cid = vhost_transport_has_remote_cid, .init = virtio_transport_do_socket_init, .destruct = virtio_transport_destruct, diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 533d8e75f7bb..4e40063adab4 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -179,6 +179,15 @@ struct vsock_transport { /* Addressing. */ u32 (*get_local_cid)(void); + /* Check if this transport serves a specific remote CID. + * For H2G transports: return true if the CID belongs to a registered + * guest. If not implemented, all CIDs > VMADDR_CID_HOST go to H2G. + * For G2H transports: return true if the transport can reach arbitrary + * CIDs via the hypervisor (i.e. supports the fallback overlay). VMCI + * does not implement this as it only serves CIDs 0 and 2. + */ + bool (*has_remote_cid)(struct vsock_sock *vsk, u32 remote_cid); + /* Read a single skb */ int (*read_skb)(struct vsock_sock *, skb_read_actor_t); diff --git a/include/net/netns/vsock.h b/include/net/netns/vsock.h index dc8cbe45f406..7f84aad92f57 100644 --- a/include/net/netns/vsock.h +++ b/include/net/netns/vsock.h @@ -20,5 +20,7 @@ struct netns_vsock { /* 0 = unlocked, 1 = locked to global, 2 = locked to local */ int child_ns_mode_locked; + + int g2h_fallback; }; #endif /* __NET_NET_NAMESPACE_VSOCK_H */ diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index f0ab2f13e9db..cc4b225250b9 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -545,9 +545,13 @@ static void vsock_deassign_transport(struct vsock_sock *vsk) * The vsk->remote_addr is used to decide which transport to use: * - remote CID == VMADDR_CID_LOCAL or g2h->local_cid or VMADDR_CID_HOST if * g2h is not loaded, will use local transport; - * - remote CID <= VMADDR_CID_HOST or h2g is not loaded or remote flags field - * includes VMADDR_FLAG_TO_HOST flag value, will use guest->host transport; - * - remote CID > VMADDR_CID_HOST will use host->guest transport; + * - remote CID <= VMADDR_CID_HOST or remote flags field includes + * VMADDR_FLAG_TO_HOST, will use guest->host transport; + * - remote CID > VMADDR_CID_HOST and h2g is loaded and h2g claims that CID, + * will use host->guest transport; + * - h2g not loaded or h2g does not claim that CID and g2h claims the CID via + * has_remote_cid, will use guest->host transport (when g2h_fallback=1) + * - anything else goes to h2g or returns -ENODEV if no h2g is available */ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) { @@ -581,11 +585,21 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) case SOCK_SEQPACKET: if (vsock_use_local_transport(remote_cid)) new_transport = transport_local; - else if (remote_cid <= VMADDR_CID_HOST || !transport_h2g || + else if (remote_cid <= VMADDR_CID_HOST || (remote_flags & VMADDR_FLAG_TO_HOST)) new_transport = transport_g2h; - else + else if (transport_h2g && + (!transport_h2g->has_remote_cid || + transport_h2g->has_remote_cid(vsk, remote_cid))) new_transport = transport_h2g; + else if (sock_net(sk)->vsock.g2h_fallback && + transport_g2h && transport_g2h->has_remote_cid && + transport_g2h->has_remote_cid(vsk, remote_cid)) { + vsk->remote_addr.svm_flags |= VMADDR_FLAG_TO_HOST; + new_transport = transport_g2h; + } else { + new_transport = transport_h2g; + } break; default: ret = -ESOCKTNOSUPPORT; @@ -2879,6 +2893,15 @@ static struct ctl_table vsock_table[] = { .mode = 0644, .proc_handler = vsock_net_child_mode_string }, + { + .procname = "g2h_fallback", + .data = &init_net.vsock.g2h_fallback, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, }; static int __net_init vsock_sysctl_register(struct net *net) @@ -2894,6 +2917,7 @@ static int __net_init vsock_sysctl_register(struct net *net) table[0].data = &net->vsock.mode; table[1].data = &net->vsock.child_ns_mode; + table[2].data = &net->vsock.g2h_fallback; } net->vsock.sysctl_hdr = register_net_sysctl_sz(net, "net/vsock", table, @@ -2928,6 +2952,7 @@ static void vsock_net_init(struct net *net) net->vsock.mode = vsock_net_child_mode(current->nsproxy->net_ns); net->vsock.child_ns_mode = net->vsock.mode; + net->vsock.g2h_fallback = 1; } static __net_init int vsock_sysctl_init_net(struct net *net) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 77fe5b7b066c..57f2d6ec3ffc 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -547,11 +547,18 @@ bool virtio_transport_stream_allow(struct vsock_sock *vsk, u32 cid, u32 port) static bool virtio_transport_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid); +static bool virtio_transport_has_remote_cid(struct vsock_sock *vsk, u32 cid) +{ + /* The CID could be implemented by the host. Always assume it is. */ + return true; +} + static struct virtio_transport virtio_transport = { .transport = { .module = THIS_MODULE, .get_local_cid = virtio_transport_get_local_cid, + .has_remote_cid = virtio_transport_has_remote_cid, .init = virtio_transport_do_socket_init, .destruct = virtio_transport_destruct, From 15b5be9389bef46884d9f970b643fedeea19105c Mon Sep 17 00:00:00 2001 From: Fan Gong Date: Tue, 10 Mar 2026 09:04:49 +0800 Subject: [PATCH 0451/1561] hinic3: Add command queue detailed-response interfaces Add new detailed response interfaces for the hinic3 command queue (CMDQ), enhancing its functionality to handle commands requiring input and output buffer pairs. Co-developed-by: Zhu Yikai Signed-off-by: Zhu Yikai Signed-off-by: Fan Gong Link: https://patch.msgid.link/cc3cff8458aeb27b07749dc9dcee43c11c45a4c1.1773062356.git.zhuyikai1@h-partners.com Signed-off-by: Paolo Abeni --- .../net/ethernet/huawei/hinic3/hinic3_cmdq.c | 166 ++++++++++++++++-- .../net/ethernet/huawei/hinic3/hinic3_cmdq.h | 15 ++ 2 files changed, 169 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c b/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c index bc5b80c31693..4e63280beea5 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c @@ -61,6 +61,10 @@ #define CMDQ_DB_HEAD_SET(val, member) \ FIELD_PREP(CMDQ_DB_HEAD_##member##_MASK, val) +#define SAVED_DATA_ARM_MASK BIT(31) +#define SAVED_DATA_SET(val, member) \ + FIELD_PREP(SAVED_DATA_##member##_MASK, val) + #define CMDQ_CEQE_TYPE_MASK GENMASK(2, 0) #define CMDQ_CEQE_GET(val, member) \ FIELD_GET(CMDQ_CEQE_##member##_MASK, le32_to_cpu(val)) @@ -84,6 +88,10 @@ enum cmdq_data_format { CMDQ_DATA_DIRECT = 1, }; +enum cmdq_scmd_type { + CMDQ_SET_ARM_CMD = 2, +}; + enum cmdq_ctrl_sect_len { CMDQ_CTRL_SECT_LEN = 1, CMDQ_CTRL_DIRECT_SECT_LEN = 2, @@ -166,6 +174,11 @@ static void cmdq_clear_cmd_buf(struct hinic3_cmdq_cmd_info *cmd_info, hinic3_free_cmd_buf(hwdev, cmd_info->buf_in); cmd_info->buf_in = NULL; } + + if (cmd_info->buf_out) { + hinic3_free_cmd_buf(hwdev, cmd_info->buf_out); + cmd_info->buf_out = NULL; + } } static void clear_wqe_complete_bit(struct hinic3_cmdq *cmdq, @@ -189,6 +202,20 @@ static void clear_wqe_complete_bit(struct hinic3_cmdq *cmdq, hinic3_wq_put_wqebbs(&cmdq->wq, CMDQ_WQE_NUM_WQEBBS); } +static int cmdq_arm_ceq_handler(struct hinic3_cmdq *cmdq, + struct cmdq_wqe *wqe, u16 ci) +{ + struct cmdq_ctrl *ctrl = &wqe->wqe_scmd.ctrl; + __le32 ctrl_info = ctrl->ctrl_info; + + if (!CMDQ_WQE_COMPLETED(ctrl_info)) + return -EBUSY; + + clear_wqe_complete_bit(cmdq, wqe, ci); + + return 0; +} + static void cmdq_update_cmd_status(struct hinic3_cmdq *cmdq, u16 prod_idx, struct cmdq_wqe *wqe) { @@ -257,6 +284,11 @@ void hinic3_cmdq_ceq_handler(struct hinic3_hwdev *hwdev, __le32 ceqe_data) cmdq_clear_cmd_buf(cmd_info, hwdev); clear_wqe_complete_bit(cmdq, wqe, ci); break; + case HINIC3_CMD_TYPE_SET_ARM: + /* arm_bit was set until here */ + if (cmdq_arm_ceq_handler(cmdq, wqe, ci)) + return; + break; default: /* only arm bit is using scmd wqe, * the other wqe is lcmd @@ -283,6 +315,18 @@ void hinic3_cmdq_ceq_handler(struct hinic3_hwdev *hwdev, __le32 ceqe_data) } } +static int cmdq_params_valid(const struct hinic3_hwdev *hwdev, + const struct hinic3_cmd_buf *buf_in) +{ + if (le16_to_cpu(buf_in->size) > CMDQ_BUF_SIZE) { + dev_err(hwdev->dev, "Invalid CMDQ buffer size: 0x%x\n", + le16_to_cpu(buf_in->size)); + return -EINVAL; + } + + return 0; +} + static int wait_cmdqs_enable(struct hinic3_cmdqs *cmdqs) { unsigned long end; @@ -356,6 +400,7 @@ static void cmdq_prepare_wqe_ctrl(struct cmdq_wqe *wqe, u8 wrapped, enum cmdq_bufdesc_len buf_len) { struct cmdq_header *hdr = CMDQ_WQE_HEADER(wqe); + __le32 saved_data = hdr->saved_data; enum cmdq_ctrl_sect_len ctrl_len; struct cmdq_wqe_lcmd *wqe_lcmd; struct cmdq_wqe_scmd *wqe_scmd; @@ -386,6 +431,11 @@ static void cmdq_prepare_wqe_ctrl(struct cmdq_wqe *wqe, u8 wrapped, CMDQ_WQE_HDR_SET(3, COMPLETE_SECT_LEN) | CMDQ_WQE_HDR_SET(ctrl_len, CTRL_LEN) | CMDQ_WQE_HDR_SET(wrapped, HW_BUSY_BIT)); + + saved_data &= ~cpu_to_le32(SAVED_DATA_ARM_MASK); + if (cmd == CMDQ_SET_ARM_CMD && mod == MGMT_MOD_COMM) + saved_data |= cpu_to_le32(SAVED_DATA_SET(1, ARM)); + hdr->saved_data = saved_data; } static void cmdq_set_lcmd_wqe(struct cmdq_wqe *wqe, @@ -488,9 +538,10 @@ static int wait_cmdq_sync_cmd_completion(struct hinic3_cmdq *cmdq, return err; } -static int cmdq_sync_cmd_direct_resp(struct hinic3_cmdq *cmdq, u8 mod, u8 cmd, - struct hinic3_cmd_buf *buf_in, - __le64 *out_param) +static int cmdq_sync_cmd_exec(struct hinic3_cmdq *cmdq, u8 mod, u8 cmd, + struct hinic3_cmd_buf *buf_in, + struct hinic3_cmd_buf *buf_out, + __le64 *out_param, u8 cmd_type, u8 wqe_cmd) { struct hinic3_cmdq_cmd_info *cmd_info, saved_cmd_info; int cmpt_code = CMDQ_SEND_CMPT_CODE; @@ -520,31 +571,35 @@ static int cmdq_sync_cmd_direct_resp(struct hinic3_cmdq *cmdq, u8 mod, u8 cmd, cmd_info = &cmdq->cmd_infos[curr_prod_idx]; init_completion(&done); refcount_inc(&buf_in->ref_cnt); - cmd_info->cmd_type = HINIC3_CMD_TYPE_DIRECT_RESP; + if (buf_out) + refcount_inc(&buf_out->ref_cnt); + + cmd_info->cmd_type = cmd_type; cmd_info->done = &done; cmd_info->errcode = &errcode; cmd_info->direct_resp = out_param; cmd_info->cmpt_code = &cmpt_code; cmd_info->buf_in = buf_in; + if (buf_out) + cmd_info->buf_out = buf_out; + saved_cmd_info = *cmd_info; - cmdq_set_lcmd_wqe(&wqe, CMDQ_CMD_DIRECT_RESP, buf_in, NULL, + cmdq_set_lcmd_wqe(&wqe, wqe_cmd, buf_in, buf_out, wrapped, mod, cmd, curr_prod_idx); cmdq_wqe_fill(curr_wqe, &wqe); (cmd_info->cmdq_msg_id)++; curr_msg_id = cmd_info->cmdq_msg_id; - cmdq_set_db(cmdq, HINIC3_CMDQ_SYNC, next_prod_idx); + cmdq_set_db(cmdq, cmdq->cmdq_type, next_prod_idx); spin_unlock_bh(&cmdq->cmdq_lock); err = wait_cmdq_sync_cmd_completion(cmdq, cmd_info, &saved_cmd_info, curr_msg_id, curr_prod_idx, curr_wqe, CMDQ_CMD_TIMEOUT); - if (err) { + if (err) dev_err(cmdq->hwdev->dev, "Cmdq sync command timeout, mod: %u, cmd: %u, prod idx: 0x%x\n", mod, cmd, curr_prod_idx); - err = -ETIMEDOUT; - } if (cmpt_code == CMDQ_FORCE_STOP_CMPT_CODE) { dev_dbg(cmdq->hwdev->dev, @@ -557,22 +612,107 @@ static int cmdq_sync_cmd_direct_resp(struct hinic3_cmdq *cmdq, u8 mod, u8 cmd, return err ? err : errcode; } +static int cmdq_sync_cmd_direct_resp(struct hinic3_cmdq *cmdq, u8 mod, u8 cmd, + struct hinic3_cmd_buf *buf_in, + __le64 *out_param) +{ + return cmdq_sync_cmd_exec(cmdq, mod, cmd, buf_in, NULL, out_param, + HINIC3_CMD_TYPE_DIRECT_RESP, + CMDQ_CMD_DIRECT_RESP); +} + +static int cmdq_sync_cmd_detail_resp(struct hinic3_cmdq *cmdq, u8 mod, u8 cmd, + struct hinic3_cmd_buf *buf_in, + struct hinic3_cmd_buf *buf_out, + __le64 *out_param) +{ + return cmdq_sync_cmd_exec(cmdq, mod, cmd, buf_in, buf_out, out_param, + HINIC3_CMD_TYPE_SGE_RESP, + CMDQ_CMD_SGE_RESP); +} + +int hinic3_cmd_buf_pair_init(struct hinic3_hwdev *hwdev, + struct hinic3_cmd_buf_pair *pair) +{ + pair->in = hinic3_alloc_cmd_buf(hwdev); + if (!pair->in) + goto err_out; + + pair->out = hinic3_alloc_cmd_buf(hwdev); + if (!pair->out) + goto err_free_cmd_buf_in; + + return 0; + +err_free_cmd_buf_in: + hinic3_free_cmd_buf(hwdev, pair->in); +err_out: + return -ENOMEM; +} + +void hinic3_cmd_buf_pair_uninit(struct hinic3_hwdev *hwdev, + struct hinic3_cmd_buf_pair *pair) +{ + hinic3_free_cmd_buf(hwdev, pair->in); + hinic3_free_cmd_buf(hwdev, pair->out); +} + int hinic3_cmdq_direct_resp(struct hinic3_hwdev *hwdev, u8 mod, u8 cmd, struct hinic3_cmd_buf *buf_in, __le64 *out_param) { struct hinic3_cmdqs *cmdqs; int err; + err = cmdq_params_valid(hwdev, buf_in); + if (err) { + dev_err(hwdev->dev, "Invalid CMDQ parameters\n"); + goto err_out; + } cmdqs = hwdev->cmdqs; err = wait_cmdqs_enable(cmdqs); if (err) { dev_err(hwdev->dev, "Cmdq is disabled\n"); - return err; + goto err_out; } err = cmdq_sync_cmd_direct_resp(&cmdqs->cmdq[HINIC3_CMDQ_SYNC], mod, cmd, buf_in, out_param); + if (err) + goto err_out; + return 0; + +err_out: + return err; +} + +int hinic3_cmdq_detail_resp(struct hinic3_hwdev *hwdev, u8 mod, u8 cmd, + struct hinic3_cmd_buf *buf_in, + struct hinic3_cmd_buf *buf_out, __le64 *out_param) +{ + struct hinic3_cmdqs *cmdqs; + int err; + + err = cmdq_params_valid(hwdev, buf_in); + if (err) + goto err_out; + + cmdqs = hwdev->cmdqs; + + err = wait_cmdqs_enable(cmdqs); + if (err) { + dev_err(hwdev->dev, "Cmdq is disabled\n"); + goto err_out; + } + + err = cmdq_sync_cmd_detail_resp(&cmdqs->cmdq[HINIC3_CMDQ_SYNC], + mod, cmd, buf_in, buf_out, out_param); + if (err) + goto err_out; + + return 0; + +err_out: return err; } @@ -758,7 +898,8 @@ static int init_cmdqs(struct hinic3_hwdev *hwdev) static void cmdq_flush_sync_cmd(struct hinic3_cmdq_cmd_info *cmd_info) { - if (cmd_info->cmd_type != HINIC3_CMD_TYPE_DIRECT_RESP) + if (cmd_info->cmd_type != HINIC3_CMD_TYPE_DIRECT_RESP && + cmd_info->cmd_type != HINIC3_CMD_TYPE_SGE_RESP) return; cmd_info->cmd_type = HINIC3_CMD_TYPE_FORCE_STOP; @@ -785,7 +926,8 @@ static void hinic3_cmdq_flush_cmd(struct hinic3_cmdq *cmdq) while (cmdq_read_wqe(&cmdq->wq, &ci)) { hinic3_wq_put_wqebbs(&cmdq->wq, CMDQ_WQE_NUM_WQEBBS); cmd_info = &cmdq->cmd_infos[ci]; - if (cmd_info->cmd_type == HINIC3_CMD_TYPE_DIRECT_RESP) + if (cmd_info->cmd_type == HINIC3_CMD_TYPE_DIRECT_RESP || + cmd_info->cmd_type == HINIC3_CMD_TYPE_SGE_RESP) cmdq_flush_sync_cmd(cmd_info); } spin_unlock_bh(&cmdq->cmdq_lock); diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.h b/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.h index f99c386a2780..29bad2fae7ba 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.h @@ -85,7 +85,9 @@ enum hinic3_cmdq_status { enum hinic3_cmdq_cmd_type { HINIC3_CMD_TYPE_NONE, + HINIC3_CMD_TYPE_SET_ARM, HINIC3_CMD_TYPE_DIRECT_RESP, + HINIC3_CMD_TYPE_SGE_RESP, HINIC3_CMD_TYPE_FAKE_TIMEOUT, HINIC3_CMD_TYPE_TIMEOUT, HINIC3_CMD_TYPE_FORCE_STOP, @@ -98,6 +100,11 @@ struct hinic3_cmd_buf { refcount_t ref_cnt; }; +struct hinic3_cmd_buf_pair { + struct hinic3_cmd_buf *in; + struct hinic3_cmd_buf *out; +}; + struct hinic3_cmdq_cmd_info { enum hinic3_cmdq_cmd_type cmd_type; struct completion *done; @@ -107,6 +114,7 @@ struct hinic3_cmdq_cmd_info { __le64 *direct_resp; u64 cmdq_msg_id; struct hinic3_cmd_buf *buf_in; + struct hinic3_cmd_buf *buf_out; }; struct hinic3_cmdq { @@ -146,8 +154,15 @@ void hinic3_free_cmd_buf(struct hinic3_hwdev *hwdev, struct hinic3_cmd_buf *cmd_buf); void hinic3_cmdq_ceq_handler(struct hinic3_hwdev *hwdev, __le32 ceqe_data); +int hinic3_cmd_buf_pair_init(struct hinic3_hwdev *hwdev, + struct hinic3_cmd_buf_pair *pair); +void hinic3_cmd_buf_pair_uninit(struct hinic3_hwdev *hwdev, + struct hinic3_cmd_buf_pair *pair); int hinic3_cmdq_direct_resp(struct hinic3_hwdev *hwdev, u8 mod, u8 cmd, struct hinic3_cmd_buf *buf_in, __le64 *out_param); +int hinic3_cmdq_detail_resp(struct hinic3_hwdev *hwdev, u8 mod, u8 cmd, + struct hinic3_cmd_buf *buf_in, + struct hinic3_cmd_buf *buf_out, __le64 *out_param); void hinic3_cmdq_flush_sync_cmd(struct hinic3_hwdev *hwdev); int hinic3_reinit_cmdq_ctxts(struct hinic3_hwdev *hwdev); From 678c5b3b6b22f2b9851058e1624156b982891ae8 Mon Sep 17 00:00:00 2001 From: Fan Gong Date: Tue, 10 Mar 2026 09:04:50 +0800 Subject: [PATCH 0452/1561] hinic3: Add Command Queue/Async Event Queue/Complete Event Queue/Mailbox dump interfaces Add dump interfaces for CMDQ, AEQ, CEQ and mailbox to enhance debugging capabilities. Dump the WQE header for CMDQ. Dump the detailed queue information for AEQ and CEQ. Dump the related register status for mailbox. Co-developed-by: Zhu Yikai Signed-off-by: Zhu Yikai Signed-off-by: Fan Gong Link: https://patch.msgid.link/1644c5021e2059594e878812339ea025ed677f71.1773062356.git.zhuyikai1@h-partners.com Signed-off-by: Paolo Abeni --- .../net/ethernet/huawei/hinic3/hinic3_cmdq.c | 19 ++++++ .../net/ethernet/huawei/hinic3/hinic3_csr.h | 2 + .../net/ethernet/huawei/hinic3/hinic3_eqs.c | 65 +++++++++++++++++++ .../net/ethernet/huawei/hinic3/hinic3_eqs.h | 5 ++ .../net/ethernet/huawei/hinic3/hinic3_mbox.c | 15 +++++ 5 files changed, 106 insertions(+) diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c b/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c index 4e63280beea5..946e699cbe10 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c @@ -6,12 +6,14 @@ #include #include "hinic3_cmdq.h" +#include "hinic3_eqs.h" #include "hinic3_hwdev.h" #include "hinic3_hwif.h" #include "hinic3_mbox.h" #define CMDQ_BUF_SIZE 2048 #define CMDQ_WQEBB_SIZE 64 +#define CMDQ_WQE_HEAD_LEN 32 #define CMDQ_CMD_TIMEOUT 5000 #define CMDQ_ENABLE_WAIT_TIMEOUT 300 @@ -114,6 +116,20 @@ enum cmdq_cmd_type { #define CMDQ_WQE_NUM_WQEBBS 1 +static void hinic3_dump_cmdq_wqe_head(struct hinic3_hwdev *hwdev, + struct cmdq_wqe *wqe) +{ + u32 *data = (u32 *)wqe; + u32 i; + + for (i = 0; i < (CMDQ_WQE_HEAD_LEN / sizeof(u32)); i += 0x4) { + dev_dbg(hwdev->dev, + "wqe data: 0x%08x, 0x%08x, 0x%08x, 0x%08x\n", + *(data + i), *(data + i + 0x1), *(data + i + 0x2), + *(data + i + 0x3)); + } +} + static struct cmdq_wqe *cmdq_read_wqe(struct hinic3_wq *wq, u16 *ci) { if (hinic3_wq_get_used(wq) == 0) @@ -279,6 +295,7 @@ void hinic3_cmdq_ceq_handler(struct hinic3_hwdev *hwdev, __le32 ceqe_data) case HINIC3_CMD_TYPE_TIMEOUT: dev_warn(hwdev->dev, "Cmdq timeout, q_id: %u, ci: %u\n", cmdq_type, ci); + hinic3_dump_cmdq_wqe_head(hwdev, wqe); fallthrough; case HINIC3_CMD_TYPE_FAKE_TIMEOUT: cmdq_clear_cmd_buf(cmd_info, hwdev); @@ -535,6 +552,8 @@ static int wait_cmdq_sync_cmd_completion(struct hinic3_cmdq *cmdq, clear_cmd_info(cmd_info, saved_cmd_info); spin_unlock_bh(&cmdq->cmdq_lock); + hinic3_dump_ceq_info(cmdq->hwdev); + return err; } diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_csr.h b/drivers/net/ethernet/huawei/hinic3/hinic3_csr.h index f7083a6e7df9..0e32ff34919e 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_csr.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_csr.h @@ -76,9 +76,11 @@ #define HINIC3_CSR_AEQ_CTRL_0_ADDR (HINIC3_CFG_REGS_FLAG + 0x200) #define HINIC3_CSR_AEQ_CTRL_1_ADDR (HINIC3_CFG_REGS_FLAG + 0x204) +#define HINIC3_CSR_AEQ_CONS_IDX_ADDR (HINIC3_CFG_REGS_FLAG + 0x208) #define HINIC3_CSR_AEQ_PROD_IDX_ADDR (HINIC3_CFG_REGS_FLAG + 0x20C) #define HINIC3_CSR_AEQ_CI_SIMPLE_INDIR_ADDR (HINIC3_CFG_REGS_FLAG + 0x50) +#define HINIC3_CSR_CEQ_CONS_IDX_ADDR (HINIC3_CFG_REGS_FLAG + 0x288) #define HINIC3_CSR_CEQ_PROD_IDX_ADDR (HINIC3_CFG_REGS_FLAG + 0x28c) #define HINIC3_CSR_CEQ_CI_SIMPLE_INDIR_ADDR (HINIC3_CFG_REGS_FLAG + 0x54) diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c b/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c index 13a0c6b07660..b8ac1d7bd82b 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c @@ -56,6 +56,10 @@ #define EQ_CI_SIMPLE_INDIR_SET(val, member) \ FIELD_PREP(EQ_CI_SIMPLE_INDIR_##member##_MASK, val) +#define EQ_CONS_IDX_REG_ADDR(eq) \ + (((eq)->type == HINIC3_AEQ) ? \ + HINIC3_CSR_AEQ_CONS_IDX_ADDR : HINIC3_CSR_CEQ_CONS_IDX_ADDR) + #define EQ_CI_SIMPLE_INDIR_REG_ADDR(eq) \ (((eq)->type == HINIC3_AEQ) ? \ HINIC3_CSR_AEQ_CI_SIMPLE_INDIR_ADDR : \ @@ -353,6 +357,7 @@ static irqreturn_t ceq_interrupt(int irq, void *data) struct hinic3_eq *ceq = data; int err; + ceq->soft_intr_jif = jiffies; /* clear resend timer counters */ hinic3_msix_intr_clear_resend_bit(ceq->hwdev, ceq->msix_entry_idx, EQ_MSIX_RESEND_TIMER_CLEAR); @@ -713,6 +718,39 @@ void hinic3_aeqs_free(struct hinic3_hwdev *hwdev) kfree(aeqs); } +void hinic3_dump_aeq_info(struct hinic3_hwdev *hwdev) +{ + const struct hinic3_aeq_elem *aeqe_pos; + u32 addr, ci, pi, ctrl0, idx; + struct hinic3_eq *eq; + int q_id; + + for (q_id = 0; q_id < hwdev->aeqs->num_aeqs; q_id++) { + eq = &hwdev->aeqs->aeq[q_id]; + /* Indirect access should set q_id first */ + hinic3_hwif_write_reg(eq->hwdev->hwif, + HINIC3_EQ_INDIR_IDX_ADDR(eq->type), + eq->q_id); + + addr = HINIC3_CSR_AEQ_CTRL_0_ADDR; + + ctrl0 = hinic3_hwif_read_reg(hwdev->hwif, addr); + + idx = hinic3_hwif_read_reg(hwdev->hwif, + HINIC3_EQ_INDIR_IDX_ADDR(eq->type)); + + addr = EQ_CONS_IDX_REG_ADDR(eq); + ci = hinic3_hwif_read_reg(hwdev->hwif, addr); + addr = EQ_PROD_IDX_REG_ADDR(eq); + pi = hinic3_hwif_read_reg(hwdev->hwif, addr); + aeqe_pos = get_curr_aeq_elem(eq); + dev_err(hwdev->dev, + "Aeq id: %d, idx: %u, ctrl0: 0x%08x, ci: 0x%08x, pi: 0x%x, work_state: 0x%x, wrap: %u, desc: 0x%x swci:0x%x\n", + q_id, idx, ctrl0, ci, pi, work_busy(&eq->aeq_work), + eq->wrapped, be32_to_cpu(aeqe_pos->desc), eq->cons_idx); + } +} + int hinic3_ceqs_init(struct hinic3_hwdev *hwdev, u16 num_ceqs, struct msix_entry *msix_entries) { @@ -773,3 +811,30 @@ void hinic3_ceqs_free(struct hinic3_hwdev *hwdev) kfree(ceqs); } + +void hinic3_dump_ceq_info(struct hinic3_hwdev *hwdev) +{ + struct hinic3_eq *eq; + u32 addr, ci, pi; + int q_id; + + for (q_id = 0; q_id < hwdev->ceqs->num_ceqs; q_id++) { + eq = &hwdev->ceqs->ceq[q_id]; + /* Indirect access should set q_id first */ + hinic3_hwif_write_reg(eq->hwdev->hwif, + HINIC3_EQ_INDIR_IDX_ADDR(eq->type), + eq->q_id); + + addr = EQ_CONS_IDX_REG_ADDR(eq); + ci = hinic3_hwif_read_reg(hwdev->hwif, addr); + addr = EQ_PROD_IDX_REG_ADDR(eq); + pi = hinic3_hwif_read_reg(hwdev->hwif, addr); + dev_err(hwdev->dev, + "Ceq id: %d, ci: 0x%08x, sw_ci: 0x%08x, pi: 0x%x, wrap: %u, ceqe: 0x%x\n", + q_id, ci, eq->cons_idx, pi, + eq->wrapped, be32_to_cpu(*get_curr_ceq_elem(eq))); + + dev_err(hwdev->dev, "Ceq last response soft interrupt time: %u\n", + jiffies_to_msecs(jiffies - eq->soft_intr_jif)); + } +} diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.h b/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.h index 005a6e0745b3..c0fa237b270b 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.h @@ -56,6 +56,8 @@ struct hinic3_eq { u16 msix_entry_idx; char irq_name[HINIC3_EQ_IRQ_NAME_LEN]; struct work_struct aeq_work; + + u64 soft_intr_jif; }; struct hinic3_aeq_elem { @@ -110,6 +112,8 @@ int hinic3_aeq_register_cb(struct hinic3_hwdev *hwdev, hinic3_aeq_event_cb hwe_cb); void hinic3_aeq_unregister_cb(struct hinic3_hwdev *hwdev, enum hinic3_aeq_type event); +void hinic3_dump_aeq_info(struct hinic3_hwdev *hwdev); + int hinic3_ceqs_init(struct hinic3_hwdev *hwdev, u16 num_ceqs, struct msix_entry *msix_entries); void hinic3_ceqs_free(struct hinic3_hwdev *hwdev); @@ -118,5 +122,6 @@ int hinic3_ceq_register_cb(struct hinic3_hwdev *hwdev, hinic3_ceq_event_cb callback); void hinic3_ceq_unregister_cb(struct hinic3_hwdev *hwdev, enum hinic3_ceq_event event); +void hinic3_dump_ceq_info(struct hinic3_hwdev *hwdev); #endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c b/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c index 826fa8879a11..7d31e215b14f 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c @@ -5,6 +5,7 @@ #include "hinic3_common.h" #include "hinic3_csr.h" +#include "hinic3_eqs.h" #include "hinic3_hwdev.h" #include "hinic3_hwif.h" #include "hinic3_mbox.h" @@ -616,6 +617,18 @@ static void write_mbox_msg_attr(struct hinic3_mbox *mbox, mbox_ctrl); } +static void hinic3_dump_mbox_reg(struct hinic3_hwdev *hwdev) +{ + u32 val; + + val = hinic3_hwif_read_reg(hwdev->hwif, + HINIC3_FUNC_CSR_MAILBOX_CONTROL_OFF); + dev_err(hwdev->dev, "Mailbox control reg: 0x%x\n", val); + val = hinic3_hwif_read_reg(hwdev->hwif, + HINIC3_FUNC_CSR_MAILBOX_INT_OFF); + dev_err(hwdev->dev, "Mailbox interrupt offset: 0x%x\n", val); +} + static u16 get_mbox_status(const struct hinic3_send_mbox *mbox) { __be64 *wb_status = mbox->wb_vaddr; @@ -670,6 +683,7 @@ static int send_mbox_seg(struct hinic3_mbox *mbox, __le64 header, if (err) { dev_err(hwdev->dev, "Send mailbox segment timeout, wb status: 0x%x\n", wb_status); + hinic3_dump_mbox_reg(hwdev); return err; } @@ -825,6 +839,7 @@ int hinic3_send_mbox_to_mgmt(struct hinic3_hwdev *hwdev, u8 mod, u16 cmd, if (wait_mbox_msg_completion(mbox, msg_params->timeout_ms)) { dev_err(hwdev->dev, "Send mbox msg timeout, msg_id: %u\n", msg_info.msg_id); + hinic3_dump_aeq_info(mbox->hwdev); err = -ETIMEDOUT; goto err_send; } From d69ee992fbf60dc691fed97bafcd2905c7e48832 Mon Sep 17 00:00:00 2001 From: Fan Gong Date: Tue, 10 Mar 2026 09:04:51 +0800 Subject: [PATCH 0453/1561] hinic3: Add chip_present_flag checks to prevent errors when card is absent chip_present_flag is added for driver to prevent errors when card does not exist. It has been added to multiple critical functions, including command queue, mailbox and network device operations, ensuring that the existence of the network card is verified before performing operations. Co-developed-by: Zhu Yikai Signed-off-by: Zhu Yikai Signed-off-by: Fan Gong Link: https://patch.msgid.link/3954f22df125f5e843aaa62953d7506eb66922ac.1773062356.git.zhuyikai1@h-partners.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c | 10 +++++++++- .../net/ethernet/huawei/hinic3/hinic3_common.c | 6 +++++- .../net/ethernet/huawei/hinic3/hinic3_common.h | 1 + .../net/ethernet/huawei/hinic3/hinic3_hw_comm.c | 7 +++++++ .../net/ethernet/huawei/hinic3/hinic3_hwdev.c | 5 +++++ .../net/ethernet/huawei/hinic3/hinic3_hwdev.h | 1 + drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c | 2 ++ drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c | 12 ++++++++++++ .../ethernet/huawei/hinic3/hinic3_netdev_ops.c | 16 ++++++++++------ 9 files changed, 52 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c b/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c index 946e699cbe10..1b0ba244a68f 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c @@ -353,7 +353,8 @@ static int wait_cmdqs_enable(struct hinic3_cmdqs *cmdqs) if (cmdqs->status & HINIC3_CMDQ_ENABLE) return 0; usleep_range(1000, 2000); - } while (time_before(jiffies, end) && !cmdqs->disable_flag); + } while (time_before(jiffies, end) && !cmdqs->disable_flag && + cmdqs->hwdev->chip_present_flag); cmdqs->disable_flag = 1; @@ -681,6 +682,10 @@ int hinic3_cmdq_direct_resp(struct hinic3_hwdev *hwdev, u8 mod, u8 cmd, { struct hinic3_cmdqs *cmdqs; int err; + + if (!hwdev->chip_present_flag) + return -ETIMEDOUT; + err = cmdq_params_valid(hwdev, buf_in); if (err) { dev_err(hwdev->dev, "Invalid CMDQ parameters\n"); @@ -712,6 +717,9 @@ int hinic3_cmdq_detail_resp(struct hinic3_hwdev *hwdev, u8 mod, u8 cmd, struct hinic3_cmdqs *cmdqs; int err; + if (!hwdev->chip_present_flag) + return -ETIMEDOUT; + err = cmdq_params_valid(hwdev, buf_in); if (err) goto err_out; diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_common.c b/drivers/net/ethernet/huawei/hinic3/hinic3_common.c index fe4778d152cf..b28576debdc8 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_common.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_common.c @@ -59,10 +59,14 @@ int hinic3_wait_for_timeout(void *priv_data, wait_cpl_handler handler, enum hinic3_wait_return ret; int err; - err = read_poll_timeout(handler, ret, ret == HINIC3_WAIT_PROCESS_CPL, + err = read_poll_timeout(handler, ret, + !(ret & HINIC3_WAIT_PROCESS_WAITING), wait_once_us, wait_total_ms * USEC_PER_MSEC, false, priv_data); + if (ret == HINIC3_WAIT_PROCESS_ERR) + return -EIO; + return err; } diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_common.h b/drivers/net/ethernet/huawei/hinic3/hinic3_common.h index a8fabfae90fb..c892439fa3cd 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_common.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_common.h @@ -21,6 +21,7 @@ struct hinic3_dma_addr_align { enum hinic3_wait_return { HINIC3_WAIT_PROCESS_CPL = 0, HINIC3_WAIT_PROCESS_WAITING = 1, + HINIC3_WAIT_PROCESS_ERR = 2, }; struct hinic3_sge { diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c index ecfe6265954e..1defd6800790 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c @@ -300,6 +300,10 @@ static enum hinic3_wait_return check_cmdq_stop_handler(void *priv_data) enum hinic3_cmdq_type cmdq_type; struct hinic3_cmdqs *cmdqs; + /* Stop waiting when card unpresent */ + if (!hwdev->chip_present_flag) + return HINIC3_WAIT_PROCESS_ERR; + cmdqs = hwdev->cmdqs; for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++) { if (!hinic3_cmdq_idle(&cmdqs->cmdq[cmdq_type])) @@ -347,6 +351,9 @@ int hinic3_func_rx_tx_flush(struct hinic3_hwdev *hwdev) int ret = 0; int err; + if (!hwdev->chip_present_flag) + return 0; + err = wait_cmdq_stop(hwdev); if (err) { dev_warn(hwdev->dev, "CMDQ is still working, CMDQ timeout value is unreasonable\n"); diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hwdev.c b/drivers/net/ethernet/huawei/hinic3/hinic3_hwdev.c index 0074d0c6dbaf..04c0385b3344 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hwdev.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hwdev.c @@ -32,6 +32,9 @@ #define HINIC3_PCIE_PH_DISABLE 0 #define HINIC3_PCIE_MSIX_ATTR_ENTRY 0 +#define HINIC3_CHIP_PRESENT 1 +#define HINIC3_CHIP_ABSENT 0 + #define HINIC3_DEFAULT_EQ_MSIX_PENDING_LIMIT 0 #define HINIC3_DEFAULT_EQ_MSIX_COALESC_TIMER_CFG 0xFF #define HINIC3_DEFAULT_EQ_MSIX_RESEND_TIMER_CFG 7 @@ -545,6 +548,7 @@ int hinic3_init_hwdev(struct pci_dev *pdev) dev_err(hwdev->dev, "Failed to init hwif\n"); goto err_free_hwdev; } + hwdev->chip_present_flag = HINIC3_CHIP_PRESENT; hwdev->workq = alloc_workqueue(HINIC3_HWDEV_WQ_NAME, WQ_MEM_RECLAIM | WQ_PERCPU, HINIC3_WQ_MAX_REQ); @@ -621,6 +625,7 @@ void hinic3_set_api_stop(struct hinic3_hwdev *hwdev) struct hinic3_recv_msg *recv_resp_msg; struct hinic3_mbox *mbox; + hwdev->chip_present_flag = HINIC3_CHIP_ABSENT; spin_lock_bh(&hwdev->channel_lock); if (HINIC3_IS_PF(hwdev) && test_bit(HINIC3_HWDEV_MGMT_INITED, &hwdev->func_state)) { diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hwdev.h b/drivers/net/ethernet/huawei/hinic3/hinic3_hwdev.h index 9686c2600b46..4276ac136464 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hwdev.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hwdev.h @@ -76,6 +76,7 @@ struct hinic3_hwdev { u32 wq_page_size; u8 max_cmdq; ulong func_state; + int chip_present_flag; }; struct hinic3_event_info { diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c b/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c index 771883174b3b..70d70556dca6 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c @@ -97,6 +97,8 @@ static enum hinic3_wait_return check_hwif_ready_handler(void *priv_data) u32 attr1; attr1 = hinic3_hwif_read_reg(hwdev->hwif, HINIC3_CSR_FUNC_ATTR1_ADDR); + if (attr1 == HINIC3_PCIE_LINK_DOWN) + return HINIC3_WAIT_PROCESS_ERR; return HINIC3_AF1_GET(attr1, MGMT_INIT_STATUS) ? HINIC3_WAIT_PROCESS_CPL : HINIC3_WAIT_PROCESS_WAITING; diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c b/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c index 7d31e215b14f..c82370cf401d 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c @@ -646,6 +646,9 @@ static enum hinic3_wait_return check_mbox_wb_status(void *priv_data) struct hinic3_mbox *mbox = priv_data; u16 wb_status; + if (!mbox->hwdev->chip_present_flag) + return HINIC3_WAIT_PROCESS_ERR; + wb_status = get_mbox_status(&mbox->send_mbox); return MBOX_STATUS_FINISHED(wb_status) ? @@ -788,6 +791,9 @@ static enum hinic3_wait_return check_mbox_msg_finish(void *priv_data) { struct hinic3_mbox *mbox = priv_data; + if (!mbox->hwdev->chip_present_flag) + return HINIC3_WAIT_PROCESS_ERR; + return (mbox->event_flag == MBOX_EVENT_SUCCESS) ? HINIC3_WAIT_PROCESS_CPL : HINIC3_WAIT_PROCESS_WAITING; } @@ -819,6 +825,9 @@ int hinic3_send_mbox_to_mgmt(struct hinic3_hwdev *hwdev, u8 mod, u16 cmd, u32 msg_len; int err; + if (!hwdev->chip_present_flag) + return -EPERM; + /* expect response message */ msg_desc = get_mbox_msg_desc(mbox, MBOX_MSG_RESP, MBOX_MGMT_FUNC_ID); mutex_lock(&mbox->mbox_send_lock); @@ -897,6 +906,9 @@ int hinic3_send_mbox_to_mgmt_no_ack(struct hinic3_hwdev *hwdev, u8 mod, u16 cmd, struct mbox_msg_info msg_info = {}; int err; + if (!hwdev->chip_present_flag) + return -EPERM; + mutex_lock(&mbox->mbox_send_lock); err = send_mbox_msg(mbox, mod, cmd, msg_params->buf_in, msg_params->in_size, MBOX_MGMT_FUNC_ID, diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_netdev_ops.c b/drivers/net/ethernet/huawei/hinic3/hinic3_netdev_ops.c index cabb8523f246..da73811641a9 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_netdev_ops.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_netdev_ops.c @@ -415,13 +415,17 @@ static void hinic3_vport_down(struct net_device *netdev) netif_carrier_off(netdev); netif_tx_disable(netdev); - glb_func_id = hinic3_global_func_id(nic_dev->hwdev); - hinic3_set_vport_enable(nic_dev->hwdev, glb_func_id, false); + if (nic_dev->hwdev->chip_present_flag) { + hinic3_maybe_set_port_state(netdev, false); - hinic3_flush_txqs(netdev); - /* wait to guarantee that no packets will be sent to host */ - msleep(100); - hinic3_flush_qps_res(nic_dev->hwdev); + glb_func_id = hinic3_global_func_id(nic_dev->hwdev); + hinic3_set_vport_enable(nic_dev->hwdev, glb_func_id, false); + + hinic3_flush_txqs(netdev); + /* wait to guarantee that no packets will be sent to host */ + msleep(100); + hinic3_flush_qps_res(nic_dev->hwdev); + } } static int hinic3_open(struct net_device *netdev) From 0f746fc5bc77cb7421ce3f6611bd770db8c4cba8 Mon Sep 17 00:00:00 2001 From: Fan Gong Date: Tue, 10 Mar 2026 09:04:52 +0800 Subject: [PATCH 0454/1561] hinic3: Add RX VLAN offload support Add vlan offload processing in RX process. Co-developed-by: Zhu Yikai Signed-off-by: Zhu Yikai Signed-off-by: Fan Gong Link: https://patch.msgid.link/22cf02a014c2beb7b5f92ab5e6de38c4dd928125.1773062356.git.zhuyikai1@h-partners.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/huawei/hinic3/hinic3_rx.c | 15 +++++++++++++++ drivers/net/ethernet/huawei/hinic3/hinic3_rx.h | 3 +++ 2 files changed, 18 insertions(+) diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_rx.c b/drivers/net/ethernet/huawei/hinic3/hinic3_rx.c index 1236ec233b7f..309ab5901379 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_rx.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_rx.c @@ -328,6 +328,7 @@ static void hinic3_rx_csum(struct hinic3_rxq *rxq, u32 offload_type, u32 ip_type = RQ_CQE_OFFOLAD_TYPE_GET(offload_type, IP_TYPE); u32 csum_err = RQ_CQE_STATUS_GET(status, CSUM_ERR); struct net_device *netdev = rxq->netdev; + bool l2_tunnel; if (!(netdev->features & NETIF_F_RXCSUM)) return; @@ -350,6 +351,12 @@ static void hinic3_rx_csum(struct hinic3_rxq *rxq, u32 offload_type, case HINIC3_RX_UDP_PKT: case HINIC3_RX_SCTP_PKT: skb->ip_summed = CHECKSUM_UNNECESSARY; + l2_tunnel = HINIC3_GET_RX_TUNNEL_PKT_FORMAT(offload_type) == + HINIC3_RX_PKT_FORMAT_VXLAN ? 1 : 0; + if (l2_tunnel) { + /* If we checked the outer header let the stack know */ + skb->csum_level = 1; + } break; default: skb->ip_summed = CHECKSUM_NONE; @@ -390,6 +397,14 @@ static int recv_one_pkt(struct hinic3_rxq *rxq, struct hinic3_rq_cqe *rx_cqe, offload_type = le32_to_cpu(rx_cqe->offload_type); hinic3_rx_csum(rxq, offload_type, status, skb); + if ((netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && + RQ_CQE_OFFOLAD_TYPE_GET(offload_type, VLAN_EN)) { + u16 vid = RQ_CQE_SGE_GET(vlan_len, VLAN); + + /* if the packet is a vlan pkt, the vid may be 0 */ + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); + } + num_lro = RQ_CQE_STATUS_GET(status, NUM_LRO); if (num_lro) hinic3_lro_set_gso_params(skb, num_lro); diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_rx.h b/drivers/net/ethernet/huawei/hinic3/hinic3_rx.h index 31622e0a63d0..06d1b3299e7c 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_rx.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_rx.h @@ -15,6 +15,9 @@ #define RQ_CQE_OFFOLAD_TYPE_GET(val, member) \ FIELD_GET(RQ_CQE_OFFOLAD_TYPE_##member##_MASK, val) +#define HINIC3_GET_RX_TUNNEL_PKT_FORMAT(offload_type) \ + RQ_CQE_OFFOLAD_TYPE_GET(offload_type, TUNNEL_PKT_FORMAT) + #define RQ_CQE_SGE_VLAN_MASK GENMASK(15, 0) #define RQ_CQE_SGE_LEN_MASK GENMASK(31, 16) #define RQ_CQE_SGE_GET(val, member) \ From 2a76f900d17dcb9e8322770ac9bcae34517805b3 Mon Sep 17 00:00:00 2001 From: Fan Gong Date: Tue, 10 Mar 2026 09:04:53 +0800 Subject: [PATCH 0455/1561] hinic3: Add msg_send_lock for message sending concurrecy As send_mbox_msg is invoked by 3 functions: hinic3_send_mbox_to_mgmt, hinic3_response_mbox_to_mgmt and hinic3_send_mbox_to_mgmt_no_ack, only hinic3_response_mbox_to_mgmt does not has mutex and the other two has mbox->mbox_send_lock because their send actions are mutually exclusive. As hinic3_response_mbox_to_mgmt does not conflict with them in send actions but in mailbox resources, add the new mutex(msg_send_lock) in send_mbox_msg to ensure message concurrency. Besdies, in mbox_send_seg change FIELD_PREP to FIELD_GET in MBOX_STATUS_FINISHED and MBOX_STATUS_SUCCESS to be more reasonable. Co-developed-by: Zhu Yikai Signed-off-by: Zhu Yikai Signed-off-by: Fan Gong Link: https://patch.msgid.link/d83f7f6eb4b5e94642a558fab75d61292c347e48.1773062356.git.zhuyikai1@h-partners.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c | 9 +++++++-- drivers/net/ethernet/huawei/hinic3/hinic3_mbox.h | 4 ++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c b/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c index c82370cf401d..51be726d9d2e 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c @@ -51,9 +51,9 @@ #define MBOX_WB_STATUS_NOT_FINISHED 0x00 #define MBOX_STATUS_FINISHED(wb) \ - ((FIELD_PREP(MBOX_WB_STATUS_MASK, (wb))) != MBOX_WB_STATUS_NOT_FINISHED) + ((FIELD_GET(MBOX_WB_STATUS_MASK, (wb))) != MBOX_WB_STATUS_NOT_FINISHED) #define MBOX_STATUS_SUCCESS(wb) \ - ((FIELD_PREP(MBOX_WB_STATUS_MASK, (wb))) == \ + ((FIELD_GET(MBOX_WB_STATUS_MASK, (wb))) == \ MBOX_WB_STATUS_FINISHED_SUCCESS) #define MBOX_STATUS_ERRCODE(wb) \ ((wb) & MBOX_WB_ERROR_CODE_MASK) @@ -396,6 +396,7 @@ static int hinic3_mbox_pre_init(struct hinic3_hwdev *hwdev, { mbox->hwdev = hwdev; mutex_init(&mbox->mbox_send_lock); + mutex_init(&mbox->mbox_seg_send_lock); spin_lock_init(&mbox->mbox_lock); mbox->workq = create_singlethread_workqueue(HINIC3_MBOX_WQ_NAME); @@ -723,6 +724,8 @@ static int send_mbox_msg(struct hinic3_mbox *mbox, u8 mod, u16 cmd, else rsp_aeq_id = 0; + mutex_lock(&mbox->mbox_seg_send_lock); + if (dst_func == MBOX_MGMT_FUNC_ID && !(hwdev->features[0] & MBOX_COMM_F_MBOX_SEGMENT)) { err = mbox_prepare_dma_msg(mbox, ack_type, &dma_msg, @@ -776,6 +779,8 @@ static int send_mbox_msg(struct hinic3_mbox *mbox, u8 mod, u16 cmd, } err_send: + mutex_unlock(&mbox->mbox_seg_send_lock); + return err; } diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.h b/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.h index e26f22d1d564..30de0c129503 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.h @@ -114,6 +114,10 @@ struct hinic3_mbox { struct hinic3_hwdev *hwdev; /* lock for send mbox message and ack message */ struct mutex mbox_send_lock; + /* lock for send message transmission. + * The lock hierarchy is mbox_send_lock -> mbox_seg_send_lock. + */ + struct mutex mbox_seg_send_lock; struct hinic3_send_mbox send_mbox; struct mbox_dma_queue sync_msg_queue; struct mbox_dma_queue async_msg_queue; From 3d36efc28078ef314445b8445b174f63bdf9579f Mon Sep 17 00:00:00 2001 From: Fan Gong Date: Tue, 10 Mar 2026 09:04:54 +0800 Subject: [PATCH 0456/1561] hinic3: Add PF device support and function type validation Add PF device ID to support for PF devices in driver and enhance function type validation to ensure proper handling of both PF and VF. Co-developed-by: Zhu Yikai Signed-off-by: Zhu Yikai Signed-off-by: Fan Gong Link: https://patch.msgid.link/895cf7ac341c475e383aa8726039dc8ea3b96ffb.1773062356.git.zhuyikai1@h-partners.com Signed-off-by: Paolo Abeni --- .../net/ethernet/huawei/hinic3/hinic3_hwdev.c | 4 ++++ .../net/ethernet/huawei/hinic3/hinic3_hwif.c | 21 +++++++++++++++++-- .../net/ethernet/huawei/hinic3/hinic3_lld.c | 1 + .../net/ethernet/huawei/hinic3/hinic3_mbox.c | 3 ++- .../huawei/hinic3/hinic3_pci_id_tbl.h | 1 + 5 files changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hwdev.c b/drivers/net/ethernet/huawei/hinic3/hinic3_hwdev.c index 04c0385b3344..f44b3064ab2e 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hwdev.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hwdev.c @@ -434,6 +434,10 @@ err_free_ceqs: static void hinic3_free_cmdqs_channel(struct hinic3_hwdev *hwdev) { hinic3_comm_cmdqs_free(hwdev); + + hinic3_set_wq_page_size(hwdev, hinic3_global_func_id(hwdev), + HINIC3_MIN_PAGE_SIZE); + hinic3_ceqs_free(hwdev); } diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c b/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c index 70d70556dca6..90feaa225080 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c @@ -70,11 +70,19 @@ #define HINIC3_PPF_ELECTION_GET(val, member) \ FIELD_GET(HINIC3_PPF_ELECTION_##member##_MASK, val) +#define HINIC3_GET_REG_FLAG(reg) ((reg) & (~(HINIC3_REGS_FLAG_MASK))) #define HINIC3_GET_REG_ADDR(reg) ((reg) & (HINIC3_REGS_FLAG_MASK)) static void __iomem *hinic3_reg_addr(struct hinic3_hwif *hwif, u32 reg) { - return hwif->cfg_regs_base + HINIC3_GET_REG_ADDR(reg); + void __iomem *addr; + + if (HINIC3_GET_REG_FLAG(reg) == HINIC3_MGMT_REGS_FLAG) + addr = hwif->mgmt_regs_base + HINIC3_GET_REG_ADDR(reg); + else + addr = hwif->cfg_regs_base + HINIC3_GET_REG_ADDR(reg); + + return addr; } u32 hinic3_hwif_read_reg(struct hinic3_hwif *hwif, u32 reg) @@ -137,6 +145,7 @@ static void set_hwif_attr(struct hinic3_func_attr *attr, u32 attr0, u32 attr1, static int init_hwif_attr(struct hinic3_hwdev *hwdev) { u32 attr0, attr1, attr2, attr3, attr6; + struct hinic3_func_attr *attr; struct hinic3_hwif *hwif; hwif = hwdev->hwif; @@ -160,7 +169,15 @@ static int init_hwif_attr(struct hinic3_hwdev *hwdev) if (attr6 == HINIC3_PCIE_LINK_DOWN) return -EFAULT; - set_hwif_attr(&hwif->attr, attr0, attr1, attr2, attr3, attr6); + attr = &hwif->attr; + set_hwif_attr(attr, attr0, attr1, attr2, attr3, attr6); + + if (attr->func_type != HINIC3_FUNC_TYPE_VF && + attr->func_type != HINIC3_FUNC_TYPE_PF) { + dev_err(hwdev->dev, "unexpected func_type %u\n", + attr->func_type); + return -EINVAL; + } if (!hwif->attr.num_ceqs) { dev_err(hwdev->dev, "Ceq num cfg in fw is zero\n"); diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_lld.c b/drivers/net/ethernet/huawei/hinic3/hinic3_lld.c index a8b89eeed753..f83d5bd1041e 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_lld.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_lld.c @@ -426,6 +426,7 @@ static void hinic3_remove(struct pci_dev *pdev) } static const struct pci_device_id hinic3_pci_table[] = { + {PCI_VDEVICE(HUAWEI, PCI_DEV_ID_HINIC3_PF), 0}, {PCI_VDEVICE(HUAWEI, PCI_DEV_ID_HINIC3_VF), 0}, {0, 0} diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c b/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c index 51be726d9d2e..c1bc64a50dd3 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c @@ -462,7 +462,8 @@ void hinic3_free_mbox(struct hinic3_hwdev *hwdev) destroy_workqueue(mbox->workq); free_mbox_wb_status(mbox); - hinic3_uninit_func_mbox_msg_channel(hwdev); + if (HINIC3_IS_VF(hwdev)) + hinic3_uninit_func_mbox_msg_channel(hwdev); uninit_mgmt_msg_channel(mbox); kfree(mbox); } diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_pci_id_tbl.h b/drivers/net/ethernet/huawei/hinic3/hinic3_pci_id_tbl.h index 86c88d0bb4bd..02b2b0fbecc7 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_pci_id_tbl.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_pci_id_tbl.h @@ -4,6 +4,7 @@ #ifndef _HINIC3_PCI_ID_TBL_H_ #define _HINIC3_PCI_ID_TBL_H_ +#define PCI_DEV_ID_HINIC3_PF 0x0222 #define PCI_DEV_ID_HINIC3_VF 0x375F #endif From 33cf53672b6f386585998366c40369834f882ddb Mon Sep 17 00:00:00 2001 From: Fan Gong Date: Tue, 10 Mar 2026 09:04:55 +0800 Subject: [PATCH 0457/1561] hinic3: Add PF FLR wait and timeout handling Add a mechanism for PF to wait for the completion of FLR, ensuring hardware state consistency after an FLR event. Co-developed-by: Zhu Yikai Signed-off-by: Zhu Yikai Signed-off-by: Fan Gong Link: https://patch.msgid.link/7a1b21426fd4274831733aca962eb209b806f4bd.1773062356.git.zhuyikai1@h-partners.com Signed-off-by: Paolo Abeni --- .../ethernet/huawei/hinic3/hinic3_hw_comm.c | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c index 1defd6800790..a6e4e9968334 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c @@ -292,6 +292,32 @@ int hinic3_set_cmdq_depth(struct hinic3_hwdev *hwdev, u16 cmdq_depth) return 0; } +#define HINIC3_FLR_TIMEOUT 1000 + +static enum hinic3_wait_return hinic3_check_flr_finish_handler(void *priv_data) +{ + struct hinic3_hwdev *hwdev = priv_data; + struct hinic3_hwif *hwif = hwdev->hwif; + enum hinic3_pf_status status; + + if (!hwdev->chip_present_flag) + return HINIC3_WAIT_PROCESS_ERR; + + status = hinic3_get_pf_status(hwif); + if (status == HINIC3_PF_STATUS_FLR_FINISH_FLAG) { + hinic3_set_pf_status(hwif, HINIC3_PF_STATUS_ACTIVE_FLAG); + return HINIC3_WAIT_PROCESS_CPL; + } + + return HINIC3_WAIT_PROCESS_WAITING; +} + +static int hinic3_wait_for_flr_finish(struct hinic3_hwdev *hwdev) +{ + return hinic3_wait_for_timeout(hwdev, hinic3_check_flr_finish_handler, + HINIC3_FLR_TIMEOUT, 0xa * USEC_PER_MSEC); +} + #define HINIC3_WAIT_CMDQ_IDLE_TIMEOUT 5000 static enum hinic3_wait_return check_cmdq_stop_handler(void *priv_data) @@ -389,6 +415,14 @@ int hinic3_func_rx_tx_flush(struct hinic3_hwdev *hwdev) ret = err; } + if (HINIC3_FUNC_TYPE(hwdev) != HINIC3_FUNC_TYPE_VF) { + err = hinic3_wait_for_flr_finish(hwdev); + if (err) { + dev_warn(hwdev->dev, "Wait firmware FLR timeout\n"); + ret = err; + } + } + hinic3_toggle_doorbell(hwif, ENABLE_DOORBELL); err = hinic3_reinit_cmdq_ctxts(hwdev); From 330adcedd0035414b138635fd6b5f61f00cf419f Mon Sep 17 00:00:00 2001 From: Fan Gong Date: Tue, 10 Mar 2026 09:04:56 +0800 Subject: [PATCH 0458/1561] hinic3: Add PF/VF capability parsing and parameter validation Add the ability to parse PF and VF capabilities and validate related parameters(SQ & RQ). Co-developed-by: Zhu Yikai Signed-off-by: Zhu Yikai Signed-off-by: Fan Gong Link: https://patch.msgid.link/ac4733f2c0409bb778b4624ed1632dcb2ded6632.1773062356.git.zhuyikai1@h-partners.com Signed-off-by: Paolo Abeni --- .../ethernet/huawei/hinic3/hinic3_hw_cfg.c | 47 +++++++++++++++++-- .../ethernet/huawei/hinic3/hinic3_hw_cfg.h | 8 ++++ 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c index ca60cf4b6282..2dfa4fb39627 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c @@ -17,6 +17,17 @@ static void hinic3_parse_pub_res_cap(struct hinic3_hwdev *hwdev, { cap->port_id = dev_cap->port_id; cap->supp_svcs_bitmap = dev_cap->svc_cap_en; + + cap->cos_valid_bitmap = dev_cap->valid_cos_bitmap; + cap->port_cos_valid_bitmap = dev_cap->port_cos_valid_bitmap; + + if (type != HINIC3_FUNC_TYPE_VF) + cap->max_vf = dev_cap->max_vf; + else + cap->max_vf = 0; + + dev_dbg(hwdev->dev, "Port_id: 0x%x, cos_bitmap: 0x%x, Max_vf: 0x%x\n", + cap->port_id, cap->cos_valid_bitmap, cap->max_vf); } static void hinic3_parse_l2nic_res_cap(struct hinic3_hwdev *hwdev, @@ -28,6 +39,13 @@ static void hinic3_parse_l2nic_res_cap(struct hinic3_hwdev *hwdev, nic_svc_cap->max_sqs = min(dev_cap->nic_max_sq_id + 1, HINIC3_CFG_MAX_QP); + + nic_svc_cap->max_rqs = min(dev_cap->nic_max_rq_id + 1, + HINIC3_CFG_MAX_QP); + nic_svc_cap->default_num_queues = dev_cap->nic_default_num_queues; + + dev_dbg(hwdev->dev, "L2nic resource capbility, max_sqs: 0x%x, max_rqs: 0x%x\n", + nic_svc_cap->max_sqs, nic_svc_cap->max_rqs); } static void hinic3_parse_dev_cap(struct hinic3_hwdev *hwdev, @@ -44,8 +62,8 @@ static void hinic3_parse_dev_cap(struct hinic3_hwdev *hwdev, hinic3_parse_l2nic_res_cap(hwdev, cap, dev_cap, type); } -static int get_cap_from_fw(struct hinic3_hwdev *hwdev, - enum hinic3_func_type type) +static int hinic3_get_cap_from_fw(struct hinic3_hwdev *hwdev, + enum hinic3_func_type type) { struct mgmt_msg_params msg_params = {}; struct cfg_cmd_dev_cap dev_cap = {}; @@ -69,6 +87,29 @@ static int get_cap_from_fw(struct hinic3_hwdev *hwdev, return 0; } +static int hinic3_get_dev_cap(struct hinic3_hwdev *hwdev) +{ + enum hinic3_func_type type = HINIC3_FUNC_TYPE(hwdev); + int err; + + switch (type) { + case HINIC3_FUNC_TYPE_PF: + case HINIC3_FUNC_TYPE_VF: + err = hinic3_get_cap_from_fw(hwdev, type); + if (err) { + dev_err(hwdev->dev, "Failed to get FW capability\n"); + return err; + } + break; + default: + dev_err(hwdev->dev, "Unsupported PCI Function type: %d\n", + type); + return -EINVAL; + } + + return 0; +} + static int hinic3_init_irq_info(struct hinic3_hwdev *hwdev) { struct hinic3_cfg_mgmt_info *cfg_mgmt = hwdev->cfg_mgmt; @@ -215,7 +256,7 @@ void hinic3_free_irq(struct hinic3_hwdev *hwdev, u32 irq_id) int hinic3_init_capability(struct hinic3_hwdev *hwdev) { - return get_cap_from_fw(hwdev, HINIC3_FUNC_TYPE_VF); + return hinic3_get_dev_cap(hwdev); } bool hinic3_support_nic(struct hinic3_hwdev *hwdev) diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.h b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.h index 58806199bf54..361c0b6a70f0 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.h @@ -26,6 +26,8 @@ struct hinic3_irq_info { struct hinic3_nic_service_cap { u16 max_sqs; + u16 max_rqs; + u16 default_num_queues; }; /* Device capabilities */ @@ -34,6 +36,12 @@ struct hinic3_dev_cap { u16 supp_svcs_bitmap; /* Physical port */ u8 port_id; + + u8 cos_valid_bitmap; + u8 port_cos_valid_bitmap; + /* max number of VFs that PF supports */ + u16 max_vf; + struct hinic3_nic_service_cap nic_svc_cap; }; From 00608d02ddf04b49c14801f4b0581b1b937bc766 Mon Sep 17 00:00:00 2001 From: Fan Gong Date: Tue, 10 Mar 2026 09:04:57 +0800 Subject: [PATCH 0459/1561] hinic3: Add ethtool basic ops Implement following ethtool callback function: .get_link_ksettings .get_drvinfo .get_msglevel .set_msglevel .get_link These callbacks allow users to utilize ethtool for detailed network configuration and monitoring. Co-developed-by: Zhu Yikai Signed-off-by: Zhu Yikai Signed-off-by: Fan Gong Link: https://patch.msgid.link/b56d490c2a06cae9541a0297d76b11d869f37161.1773062356.git.zhuyikai1@h-partners.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/huawei/hinic3/Makefile | 1 + .../ethernet/huawei/hinic3/hinic3_ethtool.c | 425 ++++++++++++++++++ .../ethernet/huawei/hinic3/hinic3_hw_comm.c | 28 ++ .../ethernet/huawei/hinic3/hinic3_hw_comm.h | 2 + .../ethernet/huawei/hinic3/hinic3_hw_intf.h | 12 + .../net/ethernet/huawei/hinic3/hinic3_main.c | 3 + .../huawei/hinic3/hinic3_mgmt_interface.h | 16 +- .../ethernet/huawei/hinic3/hinic3_nic_cfg.c | 77 ++++ .../ethernet/huawei/hinic3/hinic3_nic_cfg.h | 110 +++++ .../ethernet/huawei/hinic3/hinic3_nic_dev.h | 3 + .../net/ethernet/huawei/hinic3/hinic3_rss.c | 2 +- 11 files changed, 677 insertions(+), 2 deletions(-) create mode 100644 drivers/net/ethernet/huawei/hinic3/hinic3_ethtool.c diff --git a/drivers/net/ethernet/huawei/hinic3/Makefile b/drivers/net/ethernet/huawei/hinic3/Makefile index 26c05ecf31c9..4ebad3a4f943 100644 --- a/drivers/net/ethernet/huawei/hinic3/Makefile +++ b/drivers/net/ethernet/huawei/hinic3/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_HINIC3) += hinic3.o hinic3-objs := hinic3_cmdq.o \ hinic3_common.o \ hinic3_eqs.o \ + hinic3_ethtool.o \ hinic3_filter.o \ hinic3_hw_cfg.o \ hinic3_hw_comm.o \ diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_ethtool.c b/drivers/net/ethernet/huawei/hinic3/hinic3_ethtool.c new file mode 100644 index 000000000000..90fc16288de9 --- /dev/null +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_ethtool.c @@ -0,0 +1,425 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) Huawei Technologies Co., Ltd. 2026. All rights reserved. + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hinic3_lld.h" +#include "hinic3_hw_comm.h" +#include "hinic3_nic_dev.h" +#include "hinic3_nic_cfg.h" + +#define HINIC3_MGMT_VERSION_MAX_LEN 32 + +static void hinic3_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *info) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + u8 mgmt_ver[HINIC3_MGMT_VERSION_MAX_LEN]; + struct pci_dev *pdev = nic_dev->pdev; + int err; + + strscpy(info->driver, HINIC3_NIC_DRV_NAME, sizeof(info->driver)); + strscpy(info->bus_info, pci_name(pdev), sizeof(info->bus_info)); + + err = hinic3_get_mgmt_version(nic_dev->hwdev, mgmt_ver, + HINIC3_MGMT_VERSION_MAX_LEN); + if (err) { + netdev_err(netdev, "Failed to get fw version\n"); + return; + } + + snprintf(info->fw_version, sizeof(info->fw_version), "%s", mgmt_ver); +} + +static u32 hinic3_get_msglevel(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + + return nic_dev->msg_enable; +} + +static void hinic3_set_msglevel(struct net_device *netdev, u32 data) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + + nic_dev->msg_enable = data; + + netdev_dbg(netdev, "Set message level: 0x%x\n", data); +} + +static const u32 hinic3_link_mode_ge[] = { + ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, + ETHTOOL_LINK_MODE_1000baseX_Full_BIT, +}; + +static const u32 hinic3_link_mode_10ge_base_r[] = { + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseR_FEC_BIT, + ETHTOOL_LINK_MODE_10000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseLR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT, +}; + +static const u32 hinic3_link_mode_25ge_base_r[] = { + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT, +}; + +static const u32 hinic3_link_mode_40ge_base_r4[] = { + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT, +}; + +static const u32 hinic3_link_mode_50ge_base_r[] = { + ETHTOOL_LINK_MODE_50000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseCR_Full_BIT, +}; + +static const u32 hinic3_link_mode_50ge_base_r2[] = { + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT, +}; + +static const u32 hinic3_link_mode_100ge_base_r[] = { + ETHTOOL_LINK_MODE_100000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR_Full_BIT, +}; + +static const u32 hinic3_link_mode_100ge_base_r2[] = { + ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT, +}; + +static const u32 hinic3_link_mode_100ge_base_r4[] = { + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT, +}; + +static const u32 hinic3_link_mode_200ge_base_r2[] = { + ETHTOOL_LINK_MODE_200000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_200000baseSR2_Full_BIT, + ETHTOOL_LINK_MODE_200000baseCR2_Full_BIT, +}; + +static const u32 hinic3_link_mode_200ge_base_r4[] = { + ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT, +}; + +struct hw2ethtool_link_mode { + const u32 *link_mode_bit_arr; + u32 arr_size; + u32 speed; +}; + +static const struct hw2ethtool_link_mode + hw2ethtool_link_mode_table[LINK_MODE_MAX_NUMBERS] = { + [LINK_MODE_GE] = { + .link_mode_bit_arr = hinic3_link_mode_ge, + .arr_size = ARRAY_SIZE(hinic3_link_mode_ge), + .speed = SPEED_1000, + }, + [LINK_MODE_10GE_BASE_R] = { + .link_mode_bit_arr = hinic3_link_mode_10ge_base_r, + .arr_size = ARRAY_SIZE(hinic3_link_mode_10ge_base_r), + .speed = SPEED_10000, + }, + [LINK_MODE_25GE_BASE_R] = { + .link_mode_bit_arr = hinic3_link_mode_25ge_base_r, + .arr_size = ARRAY_SIZE(hinic3_link_mode_25ge_base_r), + .speed = SPEED_25000, + }, + [LINK_MODE_40GE_BASE_R4] = { + .link_mode_bit_arr = hinic3_link_mode_40ge_base_r4, + .arr_size = ARRAY_SIZE(hinic3_link_mode_40ge_base_r4), + .speed = SPEED_40000, + }, + [LINK_MODE_50GE_BASE_R] = { + .link_mode_bit_arr = hinic3_link_mode_50ge_base_r, + .arr_size = ARRAY_SIZE(hinic3_link_mode_50ge_base_r), + .speed = SPEED_50000, + }, + [LINK_MODE_50GE_BASE_R2] = { + .link_mode_bit_arr = hinic3_link_mode_50ge_base_r2, + .arr_size = ARRAY_SIZE(hinic3_link_mode_50ge_base_r2), + .speed = SPEED_50000, + }, + [LINK_MODE_100GE_BASE_R] = { + .link_mode_bit_arr = hinic3_link_mode_100ge_base_r, + .arr_size = ARRAY_SIZE(hinic3_link_mode_100ge_base_r), + .speed = SPEED_100000, + }, + [LINK_MODE_100GE_BASE_R2] = { + .link_mode_bit_arr = hinic3_link_mode_100ge_base_r2, + .arr_size = ARRAY_SIZE(hinic3_link_mode_100ge_base_r2), + .speed = SPEED_100000, + }, + [LINK_MODE_100GE_BASE_R4] = { + .link_mode_bit_arr = hinic3_link_mode_100ge_base_r4, + .arr_size = ARRAY_SIZE(hinic3_link_mode_100ge_base_r4), + .speed = SPEED_100000, + }, + [LINK_MODE_200GE_BASE_R2] = { + .link_mode_bit_arr = hinic3_link_mode_200ge_base_r2, + .arr_size = ARRAY_SIZE(hinic3_link_mode_200ge_base_r2), + .speed = SPEED_200000, + }, + [LINK_MODE_200GE_BASE_R4] = { + .link_mode_bit_arr = hinic3_link_mode_200ge_base_r4, + .arr_size = ARRAY_SIZE(hinic3_link_mode_200ge_base_r4), + .speed = SPEED_200000, + }, +}; + +#define GET_SUPPORTED_MODE 0 +#define GET_ADVERTISED_MODE 1 + +struct hinic3_link_settings { + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising); + + u32 speed; + u8 duplex; + u8 port; + u8 autoneg; +}; + +#define HINIC3_ADD_SUPPORTED_LINK_MODE(ecmd, mode) \ + set_bit(ETHTOOL_LINK_##mode##_BIT, (ecmd)->supported) +#define HINIC3_ADD_ADVERTISED_LINK_MODE(ecmd, mode) \ + set_bit(ETHTOOL_LINK_##mode##_BIT, (ecmd)->advertising) + +static void hinic3_add_speed_link_mode(unsigned long *bitmap, u32 mode) +{ + u32 i; + + for (i = 0; i < hw2ethtool_link_mode_table[mode].arr_size; i++) { + if (hw2ethtool_link_mode_table[mode].link_mode_bit_arr[i] >= + __ETHTOOL_LINK_MODE_MASK_NBITS) + continue; + + set_bit(hw2ethtool_link_mode_table[mode].link_mode_bit_arr[i], + bitmap); + } +} + +/* Related to enum mag_cmd_port_speed */ +static const u32 hw_to_ethtool_speed[] = { + (u32)SPEED_UNKNOWN, SPEED_10, SPEED_100, SPEED_1000, SPEED_10000, + SPEED_25000, SPEED_40000, SPEED_50000, SPEED_100000, SPEED_200000 +}; + +static void +hinic3_add_ethtool_link_mode(struct hinic3_link_settings *link_settings, + u32 hw_link_mode, u32 name) +{ + unsigned long *advertising_mask = link_settings->advertising; + unsigned long *supported_mask = link_settings->supported; + u32 link_mode; + + for (link_mode = 0; link_mode < LINK_MODE_MAX_NUMBERS; link_mode++) { + if (hw_link_mode & BIT(link_mode)) { + if (name == GET_SUPPORTED_MODE) + hinic3_add_speed_link_mode(supported_mask, + link_mode); + else + hinic3_add_speed_link_mode(advertising_mask, + link_mode); + } + } +} + +static void +hinic3_link_speed_set(struct net_device *netdev, + struct hinic3_link_settings *link_settings, + struct hinic3_nic_port_info *port_info) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + bool link_status_up; + int err; + + if (port_info->supported_mode != LINK_MODE_UNKNOWN) + hinic3_add_ethtool_link_mode(link_settings, + port_info->supported_mode, + GET_SUPPORTED_MODE); + if (port_info->advertised_mode != LINK_MODE_UNKNOWN) + hinic3_add_ethtool_link_mode(link_settings, + port_info->advertised_mode, + GET_ADVERTISED_MODE); + + err = hinic3_get_link_status(nic_dev->hwdev, &link_status_up); + if (!err && link_status_up) { + link_settings->speed = + port_info->speed < ARRAY_SIZE(hw_to_ethtool_speed) ? + hw_to_ethtool_speed[port_info->speed] : + (u32)SPEED_UNKNOWN; + + link_settings->duplex = port_info->duplex; + } else { + link_settings->speed = (u32)SPEED_UNKNOWN; + link_settings->duplex = DUPLEX_UNKNOWN; + } +} + +static void +hinic3_link_port_type_set(struct hinic3_link_settings *link_settings, + u8 port_type) +{ + switch (port_type) { + case MAG_CMD_WIRE_TYPE_ELECTRIC: + HINIC3_ADD_SUPPORTED_LINK_MODE(link_settings, MODE_TP); + HINIC3_ADD_ADVERTISED_LINK_MODE(link_settings, MODE_TP); + link_settings->port = PORT_TP; + break; + + case MAG_CMD_WIRE_TYPE_AOC: + case MAG_CMD_WIRE_TYPE_MM: + case MAG_CMD_WIRE_TYPE_SM: + HINIC3_ADD_SUPPORTED_LINK_MODE(link_settings, MODE_FIBRE); + HINIC3_ADD_ADVERTISED_LINK_MODE(link_settings, MODE_FIBRE); + link_settings->port = PORT_FIBRE; + break; + + case MAG_CMD_WIRE_TYPE_COPPER: + HINIC3_ADD_SUPPORTED_LINK_MODE(link_settings, MODE_FIBRE); + HINIC3_ADD_ADVERTISED_LINK_MODE(link_settings, MODE_FIBRE); + link_settings->port = PORT_DA; + break; + + case MAG_CMD_WIRE_TYPE_BACKPLANE: + HINIC3_ADD_SUPPORTED_LINK_MODE(link_settings, MODE_Backplane); + HINIC3_ADD_ADVERTISED_LINK_MODE(link_settings, MODE_Backplane); + link_settings->port = PORT_NONE; + break; + + default: + link_settings->port = PORT_OTHER; + break; + } +} + +static int +hinic3_get_link_pause_settings(struct net_device *netdev, + struct hinic3_link_settings *link_settings) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + struct hinic3_nic_pause_config nic_pause = {}; + int err; + + err = hinic3_get_pause_info(nic_dev, &nic_pause); + if (err) { + netdev_err(netdev, "Failed to get pause param from hw\n"); + return err; + } + + HINIC3_ADD_SUPPORTED_LINK_MODE(link_settings, MODE_Pause); + if (nic_pause.rx_pause && nic_pause.tx_pause) { + HINIC3_ADD_ADVERTISED_LINK_MODE(link_settings, MODE_Pause); + } else if (nic_pause.tx_pause) { + HINIC3_ADD_ADVERTISED_LINK_MODE(link_settings, + MODE_Asym_Pause); + } else if (nic_pause.rx_pause) { + HINIC3_ADD_ADVERTISED_LINK_MODE(link_settings, MODE_Pause); + HINIC3_ADD_ADVERTISED_LINK_MODE(link_settings, + MODE_Asym_Pause); + } + + return 0; +} + +static int +hinic3_get_link_settings(struct net_device *netdev, + struct hinic3_link_settings *link_settings) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + struct hinic3_nic_port_info port_info = {}; + int err; + + err = hinic3_get_port_info(nic_dev->hwdev, &port_info); + if (err) { + netdev_err(netdev, "Failed to get port info\n"); + return err; + } + + hinic3_link_speed_set(netdev, link_settings, &port_info); + + hinic3_link_port_type_set(link_settings, port_info.port_type); + + link_settings->autoneg = port_info.autoneg_state == PORT_CFG_AN_ON ? + AUTONEG_ENABLE : AUTONEG_DISABLE; + if (port_info.autoneg_cap) + HINIC3_ADD_SUPPORTED_LINK_MODE(link_settings, MODE_Autoneg); + if (port_info.autoneg_state == PORT_CFG_AN_ON) + HINIC3_ADD_ADVERTISED_LINK_MODE(link_settings, MODE_Autoneg); + + if (!HINIC3_IS_VF(nic_dev->hwdev)) { + err = hinic3_get_link_pause_settings(netdev, link_settings); + if (err) + return err; + } + + return 0; +} + +static int +hinic3_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_settings) +{ + struct ethtool_link_settings *base = &link_settings->base; + struct hinic3_link_settings settings = {}; + int err; + + ethtool_link_ksettings_zero_link_mode(link_settings, supported); + ethtool_link_ksettings_zero_link_mode(link_settings, advertising); + + err = hinic3_get_link_settings(netdev, &settings); + if (err) + return err; + + bitmap_copy(link_settings->link_modes.supported, settings.supported, + __ETHTOOL_LINK_MODE_MASK_NBITS); + bitmap_copy(link_settings->link_modes.advertising, settings.advertising, + __ETHTOOL_LINK_MODE_MASK_NBITS); + + base->autoneg = settings.autoneg; + base->speed = settings.speed; + base->duplex = settings.duplex; + base->port = settings.port; + + return 0; +} + +static const struct ethtool_ops hinic3_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_PKT_RATE_RX_USECS, + .get_link_ksettings = hinic3_get_link_ksettings, + .get_drvinfo = hinic3_get_drvinfo, + .get_msglevel = hinic3_get_msglevel, + .set_msglevel = hinic3_set_msglevel, + .get_link = ethtool_op_get_link, +}; + +void hinic3_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &hinic3_ethtool_ops; +} diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c index a6e4e9968334..0dbf9cbe7f9a 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c @@ -580,3 +580,31 @@ int hinic3_clean_root_ctxt(struct hinic3_hwdev *hwdev) return 0; } + +#define HINIC3_FW_VER_TYPE_MPU 1 + +int hinic3_get_mgmt_version(struct hinic3_hwdev *hwdev, u8 *mgmt_ver, + u8 version_size) +{ + struct comm_cmd_get_fw_version fw_ver = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + fw_ver.fw_type = HINIC3_FW_VER_TYPE_MPU; + + mgmt_msg_params_init_default(&msg_params, &fw_ver, sizeof(fw_ver)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_COMM, + COMM_CMD_GET_FW_VERSION, &msg_params); + + if (err || fw_ver.head.status) { + dev_err(hwdev->dev, + "Failed to get fw version, err: %d, status: 0x%x\n", + err, fw_ver.head.status); + return -EFAULT; + } + + snprintf(mgmt_ver, version_size, "%s", fw_ver.ver); + + return 0; +} diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.h b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.h index 8e4737c486b7..e672f9af5cb1 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.h @@ -49,5 +49,7 @@ void hinic3_sync_time_to_fw(struct hinic3_hwdev *hwdev); int hinic3_set_root_ctxt(struct hinic3_hwdev *hwdev, u32 rq_depth, u32 sq_depth, int rx_buf_sz); int hinic3_clean_root_ctxt(struct hinic3_hwdev *hwdev); +int hinic3_get_mgmt_version(struct hinic3_hwdev *hwdev, u8 *mgmt_ver, + u8 version_size); #endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_intf.h b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_intf.h index 329a9c464ff9..cfc9daa3034f 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_intf.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_intf.h @@ -114,6 +114,7 @@ enum comm_cmd { COMM_CMD_SET_DMA_ATTR = 25, /* Commands for obtaining information */ + COMM_CMD_GET_FW_VERSION = 60, COMM_CMD_SYNC_TIME = 62, COMM_CMD_SEND_BDF_INFO = 64, }; @@ -275,6 +276,17 @@ struct comm_cmd_bdf_info { u8 rsvd2[5]; }; +#define COMM_FW_VERSION_LEN 16 +#define COMM_FW_COMPILE_TIME_LEN 20 +struct comm_cmd_get_fw_version { + struct mgmt_msg_head head; + + u16 fw_type; + u16 rsvd1; + u8 ver[COMM_FW_VERSION_LEN]; + u8 time[COMM_FW_COMPILE_TIME_LEN]; +}; + /* Services supported by HW. HW uses these values when delivering events. * HW supports multiple services that are not yet supported by driver * (e.g. RoCE). diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_main.c b/drivers/net/ethernet/huawei/hinic3/hinic3_main.c index 6039fcf3c1dd..0a888fe4c975 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_main.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_main.c @@ -18,6 +18,7 @@ #define HINIC3_NIC_DRV_DESC "Intelligent Network Interface Card Driver" +#define HINIC3_DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_LINK) #define HINIC3_RX_BUF_LEN 2048 #define HINIC3_LRO_REPLENISH_THLD 256 #define HINIC3_NIC_DEV_WQ_NAME "hinic3_nic_dev_wq" @@ -143,6 +144,7 @@ static int hinic3_init_nic_dev(struct net_device *netdev, nic_dev->hwdev = hwdev; nic_dev->pdev = pdev; + nic_dev->msg_enable = HINIC3_DEFAULT_MSG_ENABLE; nic_dev->rx_buf_len = HINIC3_RX_BUF_LEN; nic_dev->lro_replenish_thld = HINIC3_LRO_REPLENISH_THLD; nic_dev->vlan_bitmap = kzalloc(HINIC3_VLAN_BITMAP_SIZE(nic_dev), @@ -241,6 +243,7 @@ static void hinic3_sw_uninit(struct net_device *netdev) static void hinic3_assign_netdev_ops(struct net_device *netdev) { hinic3_set_netdev_ops(netdev); + hinic3_set_ethtool_ops(netdev); } static void netdev_feature_init(struct net_device *netdev) diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_mgmt_interface.h b/drivers/net/ethernet/huawei/hinic3/hinic3_mgmt_interface.h index c0c87a8c2198..c5bca3c4af96 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_mgmt_interface.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_mgmt_interface.h @@ -183,7 +183,18 @@ struct l2nic_cmd_lro_timer { /* IEEE 802.1Qaz std */ #define L2NIC_DCB_COS_MAX 0x8 -struct l2nic_cmd_set_rss_ctx_tbl { +struct l2nic_cmd_pause_config { + struct mgmt_msg_head msg_head; + u8 port_id; + u8 opcode; + u16 rsvd1; + u8 auto_neg; + u8 rx_pause; + u8 tx_pause; + u8 rsvd2[5]; +}; + +struct l2nic_cmd_rss_ctx_tbl { struct mgmt_msg_head msg_head; u16 func_id; u16 rsvd1; @@ -238,6 +249,7 @@ enum l2nic_cmd { L2NIC_CMD_CFG_RSS_HASH_KEY = 63, L2NIC_CMD_CFG_RSS_HASH_ENGINE = 64, L2NIC_CMD_SET_RSS_CTX_TBL = 65, + L2NIC_CMD_CFG_PAUSE_INFO = 101, L2NIC_CMD_QOS_DCB_STATE = 110, L2NIC_CMD_FORCE_PKT_DROP = 113, L2NIC_CMD_MAX = 256, @@ -259,6 +271,8 @@ enum l2nic_ucode_cmd { enum mag_cmd { MAG_CMD_SET_PORT_ENABLE = 6, MAG_CMD_GET_LINK_STATUS = 7, + + MAG_CMD_GET_PORT_INFO = 153, }; /* firmware also use this cmd report link event to driver */ diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_cfg.c b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_cfg.c index 44abccf9cb29..de5a7984d2cb 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_cfg.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_cfg.c @@ -639,6 +639,39 @@ int hinic3_get_link_status(struct hinic3_hwdev *hwdev, bool *link_status_up) return 0; } +int hinic3_get_port_info(struct hinic3_hwdev *hwdev, + struct hinic3_nic_port_info *port_info) +{ + struct mag_cmd_get_port_info port_msg = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + port_msg.port_id = hinic3_physical_port_id(hwdev); + + mgmt_msg_params_init_default(&msg_params, &port_msg, sizeof(port_msg)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_HILINK, + MAG_CMD_GET_PORT_INFO, &msg_params); + + if (err || port_msg.head.status) { + dev_err(hwdev->dev, + "Failed to get port info, err: %d, status: 0x%x\n", + err, port_msg.head.status); + return -EFAULT; + } + + port_info->autoneg_cap = port_msg.an_support; + port_info->autoneg_state = port_msg.an_en; + port_info->duplex = port_msg.duplex; + port_info->port_type = port_msg.wire_type; + port_info->speed = port_msg.speed; + port_info->fec = port_msg.fec; + port_info->supported_mode = port_msg.supported_mode; + port_info->advertised_mode = port_msg.advertised_mode; + + return 0; +} + int hinic3_set_vport_enable(struct hinic3_hwdev *hwdev, u16 func_id, bool enable) { @@ -661,3 +694,47 @@ int hinic3_set_vport_enable(struct hinic3_hwdev *hwdev, u16 func_id, return 0; } + +static int hinic3_cfg_hw_pause(struct hinic3_hwdev *hwdev, u8 opcode, + struct hinic3_nic_pause_config *nic_pause) +{ + struct l2nic_cmd_pause_config pause_info = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + pause_info.port_id = hinic3_physical_port_id(hwdev); + pause_info.opcode = opcode; + if (opcode == MGMT_MSG_CMD_OP_SET) { + pause_info.auto_neg = nic_pause->auto_neg; + pause_info.rx_pause = nic_pause->rx_pause; + pause_info.tx_pause = nic_pause->tx_pause; + } + + mgmt_msg_params_init_default(&msg_params, &pause_info, + sizeof(pause_info)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_L2NIC, + L2NIC_CMD_CFG_PAUSE_INFO, &msg_params); + + if (err || pause_info.msg_head.status) { + dev_err(hwdev->dev, "Failed to %s pause info, err: %d, status: 0x%x\n", + opcode == MGMT_MSG_CMD_OP_SET ? "set" : "get", + err, pause_info.msg_head.status); + return -EFAULT; + } + + if (opcode == MGMT_MSG_CMD_OP_GET) { + nic_pause->auto_neg = pause_info.auto_neg; + nic_pause->rx_pause = pause_info.rx_pause; + nic_pause->tx_pause = pause_info.tx_pause; + } + + return 0; +} + +int hinic3_get_pause_info(struct hinic3_nic_dev *nic_dev, + struct hinic3_nic_pause_config *nic_pause) +{ + return hinic3_cfg_hw_pause(nic_dev->hwdev, MGMT_MSG_CMD_OP_GET, + nic_pause); +} diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_cfg.h b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_cfg.h index c32eaa886e17..5d52202a8d4e 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_cfg.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_cfg.h @@ -6,6 +6,7 @@ #include +#include "hinic3_hwif.h" #include "hinic3_hw_intf.h" #include "hinic3_mgmt_interface.h" @@ -35,6 +36,49 @@ struct hinic3_sq_attr { u64 ci_dma_base; }; +enum mag_cmd_port_an { + PORT_CFG_AN_ON = 1, +}; + +/* mag supported/advertised link mode bitmap */ +enum mag_cmd_link_mode { + LINK_MODE_GE = 0, + LINK_MODE_10GE_BASE_R = 1, + LINK_MODE_25GE_BASE_R = 2, + LINK_MODE_40GE_BASE_R4 = 3, + LINK_MODE_50GE_BASE_R = 4, + LINK_MODE_50GE_BASE_R2 = 5, + LINK_MODE_100GE_BASE_R = 6, + LINK_MODE_100GE_BASE_R2 = 7, + LINK_MODE_100GE_BASE_R4 = 8, + LINK_MODE_200GE_BASE_R2 = 9, + LINK_MODE_200GE_BASE_R4 = 10, + LINK_MODE_MAX_NUMBERS, + + LINK_MODE_UNKNOWN = 0xFFFF +}; + +struct mag_cmd_get_port_info { + struct mgmt_msg_head head; + + u8 port_id; + u8 rsvd0[3]; + + u8 wire_type; + u8 an_support; + u8 an_en; + u8 duplex; + + u8 speed; + u8 fec; + u8 lanes; + u8 rsvd1; + + u32 supported_mode; + u32 advertised_mode; + u8 rsvd2[8]; +}; + #define MAG_CMD_PORT_DISABLE 0x0 #define MAG_CMD_TX_ENABLE 0x1 #define MAG_CMD_RX_ENABLE 0x2 @@ -52,6 +96,39 @@ struct mag_cmd_set_port_enable { u8 rsvd1[3]; }; +/* xsfp wire type, refers to cmis protocol definition */ +enum mag_wire_type { + MAG_CMD_WIRE_TYPE_UNKNOWN = 0x0, + MAG_CMD_WIRE_TYPE_MM = 0x1, + MAG_CMD_WIRE_TYPE_SM = 0x2, + MAG_CMD_WIRE_TYPE_COPPER = 0x3, + MAG_CMD_WIRE_TYPE_ACC = 0x4, + MAG_CMD_WIRE_TYPE_BASET = 0x5, + MAG_CMD_WIRE_TYPE_AOC = 0x40, + MAG_CMD_WIRE_TYPE_ELECTRIC = 0x41, + MAG_CMD_WIRE_TYPE_BACKPLANE = 0x42 +}; + +#define XSFP_INFO_MAX_SIZE 640 +struct mag_cmd_get_xsfp_info { + struct mgmt_msg_head head; + + u8 port_id; + u8 wire_type; + u16 out_len; + u32 rsvd; + u8 sfp_info[XSFP_INFO_MAX_SIZE]; +}; + +struct mag_cmd_get_xsfp_present { + struct mgmt_msg_head head; + + u8 port_id; + /* 0:present, 1:absent */ + u8 abs_status; + u8 rsvd[2]; +}; + enum link_err_type { LINK_ERR_MODULE_UNRECOGENIZED, LINK_ERR_NUM, @@ -69,6 +146,34 @@ struct hinic3_port_module_event { enum link_err_type err_type; }; +struct hinic3_nic_port_info { + u8 port_type; + u8 autoneg_cap; + u8 autoneg_state; + u8 duplex; + u8 speed; + u8 fec; + u32 supported_mode; + u32 advertised_mode; +}; + +struct hinic3_nic_pause_config { + u8 auto_neg; + u8 rx_pause; + u8 tx_pause; +}; + +struct hinic3_nic_cfg { + /* Valid when pfc is disabled */ + bool pause_set; + struct hinic3_nic_pause_config nic_pause; + + u8 pfc_en; + u8 pfc_bitmap; + + struct hinic3_nic_port_info port_info; +}; + int hinic3_get_nic_feature_from_hw(struct hinic3_nic_dev *nic_dev); int hinic3_set_nic_feature_to_hw(struct hinic3_nic_dev *nic_dev); bool hinic3_test_support(struct hinic3_nic_dev *nic_dev, @@ -100,9 +205,14 @@ int hinic3_set_rx_mode(struct hinic3_hwdev *hwdev, u32 rx_mode); int hinic3_sync_dcb_state(struct hinic3_hwdev *hwdev, u8 op_code, u8 state); int hinic3_set_port_enable(struct hinic3_hwdev *hwdev, bool enable); int hinic3_get_link_status(struct hinic3_hwdev *hwdev, bool *link_status_up); +int hinic3_get_port_info(struct hinic3_hwdev *hwdev, + struct hinic3_nic_port_info *port_info); int hinic3_set_vport_enable(struct hinic3_hwdev *hwdev, u16 func_id, bool enable); int hinic3_add_vlan(struct hinic3_hwdev *hwdev, u16 vlan_id, u16 func_id); int hinic3_del_vlan(struct hinic3_hwdev *hwdev, u16 vlan_id, u16 func_id); +int hinic3_get_pause_info(struct hinic3_nic_dev *nic_dev, + struct hinic3_nic_pause_config *nic_pause); + #endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_dev.h b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_dev.h index 29189241f446..9502293ff710 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_dev.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_dev.h @@ -101,6 +101,7 @@ struct hinic3_nic_dev { struct hinic3_hwdev *hwdev; struct hinic3_nic_io *nic_io; + u32 msg_enable; u16 max_qps; u16 rx_buf_len; u32 lro_replenish_thld; @@ -148,4 +149,6 @@ void hinic3_qps_irq_uninit(struct net_device *netdev); void hinic3_set_rx_mode_work(struct work_struct *work); void hinic3_clean_mac_list_filter(struct net_device *netdev); +void hinic3_set_ethtool_ops(struct net_device *netdev); + #endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_rss.c b/drivers/net/ethernet/huawei/hinic3/hinic3_rss.c index 4ff1b2f79838..25db74d8c7dd 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_rss.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_rss.c @@ -132,7 +132,7 @@ static int hinic3_rss_set_indir_tbl(struct hinic3_hwdev *hwdev, static int hinic3_set_rss_type(struct hinic3_hwdev *hwdev, struct hinic3_rss_type rss_type) { - struct l2nic_cmd_set_rss_ctx_tbl ctx_tbl = {}; + struct l2nic_cmd_rss_ctx_tbl ctx_tbl = {}; struct mgmt_msg_params msg_params = {}; u32 ctx; int err; From 4320f1f111c587b8c6c9abc06f43e25bc10b670c Mon Sep 17 00:00:00 2001 From: Wojciech Slenska Date: Tue, 10 Mar 2026 12:22:30 +0100 Subject: [PATCH 0460/1561] dt-bindings: net: qcom,ipa: document qcm2290 compatible Document that ipa on qcm2290 uses version 4.2, the same as sc7180. Acked-by: Krzysztof Kozlowski Signed-off-by: Wojciech Slenska Link: https://patch.msgid.link/20260310112309.79261-2-wojciech.slenska@gmail.com Signed-off-by: Paolo Abeni --- Documentation/devicetree/bindings/net/qcom,ipa.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml index 4237e74041ef..e4bb627e1757 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml @@ -53,6 +53,10 @@ properties: - qcom,sm6350-ipa - qcom,sm8350-ipa - qcom,sm8550-ipa + - items: + - enum: + - qcom,qcm2290-ipa + - const: qcom,sc7180-ipa - items: - enum: - qcom,sm8650-ipa From 6f459eda8b60382efa0da2ca025c26a2018adc87 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Mar 2026 12:44:51 +0000 Subject: [PATCH 0461/1561] tcp: add tcp_release_cb_cond() helper Majority of tcp_release_cb() calls do nothing at all. Provide tcp_release_cb_cond() helper so that release_sock() can avoid these calls. Also hint the compiler that __release_sock() and wake_up() are rarely called. $ scripts/bloat-o-meter -t vmlinux.old vmlinux.new add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-77 (-77) Function old new delta release_sock 258 181 -77 Total: Before=25235790, After=25235713, chg -0.00% Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260310124451.2280968-1-edumazet@google.com Signed-off-by: Paolo Abeni --- include/linux/tcp.h | 7 +++++++ include/net/tcp.h | 14 ++++++++++++++ net/core/sock.c | 14 ++++++++------ net/ipv4/tcp_output.c | 5 ----- 4 files changed, 29 insertions(+), 11 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index c44cf9ae8d16..bcebc4f07532 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -548,6 +548,13 @@ enum tsq_flags { TCPF_ACK_DEFERRED = BIT(TCP_ACK_DEFERRED), }; +/* Flags of interest for tcp_release_cb() */ +#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \ + TCPF_WRITE_TIMER_DEFERRED | \ + TCPF_DELACK_TIMER_DEFERRED | \ + TCPF_MTU_REDUCED_DEFERRED | \ + TCPF_ACK_DEFERRED) + #define tcp_sk(ptr) container_of_const(ptr, struct tcp_sock, inet_conn.icsk_inet.sk) /* Variant of tcp_sk() upgrading a const sock to a read/write tcp socket. diff --git a/include/net/tcp.h b/include/net/tcp.h index 9f0aee9e5d76..48dffcca0a71 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -375,7 +375,21 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags); int tcp_wmem_schedule(struct sock *sk, int copy); void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle, int size_goal); + void tcp_release_cb(struct sock *sk); + +static inline bool tcp_release_cb_cond(struct sock *sk) +{ +#ifdef CONFIG_INET + if (likely(sk->sk_prot->release_cb == tcp_release_cb)) { + if (unlikely(smp_load_acquire(&sk->sk_tsq_flags) & TCP_DEFERRED_ALL)) + tcp_release_cb(sk); + return true; + } +#endif + return false; +} + void tcp_wfree(struct sk_buff *skb); void tcp_write_timer_handler(struct sock *sk); void tcp_delack_timer_handler(struct sock *sk); diff --git a/net/core/sock.c b/net/core/sock.c index f4e2ff23d60e..fdaf66e6dc18 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3807,16 +3807,18 @@ EXPORT_SYMBOL(lock_sock_nested); void release_sock(struct sock *sk) { spin_lock_bh(&sk->sk_lock.slock); - if (sk->sk_backlog.tail) + + if (unlikely(sk->sk_backlog.tail)) __release_sock(sk); - if (sk->sk_prot->release_cb) - INDIRECT_CALL_INET_1(sk->sk_prot->release_cb, - tcp_release_cb, sk); - + if (sk->sk_prot->release_cb) { + if (!tcp_release_cb_cond(sk)) + sk->sk_prot->release_cb(sk); + } sock_release_ownership(sk); - if (waitqueue_active(&sk->sk_lock.wq)) + if (unlikely(waitqueue_active(&sk->sk_lock.wq))) wake_up(&sk->sk_lock.wq); + spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL(release_sock); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a53802f28dd1..34a25ef61006 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1320,11 +1320,6 @@ static void tcp_tsq_workfn(struct work_struct *work) } } -#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \ - TCPF_WRITE_TIMER_DEFERRED | \ - TCPF_DELACK_TIMER_DEFERRED | \ - TCPF_MTU_REDUCED_DEFERRED | \ - TCPF_ACK_DEFERRED) /** * tcp_release_cb - tcp release_sock() callback * @sk: socket From 8e7adcf81564a3fe886a6270eea7558f063e5538 Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Tue, 10 Mar 2026 18:50:35 +0530 Subject: [PATCH 0462/1561] net: ti: icssg: Fix wrong macro used in RX classifier configuration The RX_CLASS_OR_REG macro is being used with RX_CLASS_OR_EN parameter when writing to the AND enable register. This should use RX_CLASS_AND_EN instead to properly configure the classifier AND enable register. Fix this by using the correct RX_CLASS_AND_EN macro parameter for RX_CLASS_OR_REG when configuring the PTP duplicate and HSR tag classifiers. Fixes: f56438a74d88 ("net: ti: icssg: Add HSR/PRP protocol frame filtering") Signed-off-by: MD Danish Anwar Link: https://patch.msgid.link/20260310132035.1299787-1-danishanwar@ti.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/icssg/icssg_classifier.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_classifier.c b/drivers/net/ethernet/ti/icssg/icssg_classifier.c index cde53d9aaa7b..87a8577093db 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_classifier.c +++ b/drivers/net/ethernet/ti/icssg/icssg_classifier.c @@ -517,7 +517,7 @@ void icssg_ft3_hsr_configurations(struct regmap *miig_rt, int slice, regmap_write(miig_rt, RX_CLASS_N_REG(slice, CLASSIFIER_PTP_DUP, RX_CLASS_AND_EN), RX_CLASS_DISABLE); - regmap_write(miig_rt, RX_CLASS_OR_REG(slice, CLASSIFIER_PTP_DUP, RX_CLASS_OR_EN), + regmap_write(miig_rt, RX_CLASS_OR_REG(slice, CLASSIFIER_PTP_DUP, RX_CLASS_AND_EN), RX_CLASS_OR_DUP_PTP); regmap_write(miig_rt, RX_CLASS_GATES_N_REG(slice, CLASSIFIER_PTP_DUP), RX_CLASS_GATE_PTP); @@ -525,7 +525,7 @@ void icssg_ft3_hsr_configurations(struct regmap *miig_rt, int slice, if (prueth->hsr_prp_version != PRP_V1) { regmap_write(miig_rt, RX_CLASS_N_REG(slice, CLASSIFIER_HSR_TAG, RX_CLASS_AND_EN), RX_CLASS_DISABLE); - regmap_write(miig_rt, RX_CLASS_OR_REG(slice, CLASSIFIER_HSR_TAG, RX_CLASS_OR_EN), + regmap_write(miig_rt, RX_CLASS_OR_REG(slice, CLASSIFIER_HSR_TAG, RX_CLASS_AND_EN), RX_CLASS_OR_HSR_TAG); regmap_write(miig_rt, RX_CLASS_GATES_N_REG(slice, CLASSIFIER_HSR_TAG), RX_CLASS_GATE_PTP); From 29ca18505d58fedf2388c303156107c4ed97197b Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Tue, 10 Mar 2026 12:46:46 -0700 Subject: [PATCH 0463/1561] net: xgbe: use device_get_mac_addr device_get_mac_addr is basically device_property_read_u8_array with an is_valid_ether_addr call. Allows just checking for ret. Remove XGBE_MAC_ADDR_PROPERTY. device_get_mac_addr supports more properties than just "mac-address". Signed-off-by: Rosen Penev Reviewed-by: Sai Krishna Link: https://patch.msgid.link/20260310194647.3794-1-rosenp@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/xgbe/xgbe-platform.c | 10 +++------- drivers/net/ethernet/amd/xgbe/xgbe.h | 1 - 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c index 98b03a3f3a95..bca0dbff80c1 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c @@ -252,13 +252,9 @@ static int xgbe_platform_probe(struct platform_device *pdev) dev_dbg(dev, "sir1_regs = %p\n", pdata->sir1_regs); /* Retrieve the MAC address */ - ret = device_property_read_u8_array(dev, XGBE_MAC_ADDR_PROPERTY, - pdata->mac_addr, - sizeof(pdata->mac_addr)); - if (ret || !is_valid_ether_addr(pdata->mac_addr)) { - dev_err(dev, "invalid %s property\n", XGBE_MAC_ADDR_PROPERTY); - if (!ret) - ret = -EINVAL; + ret = device_get_mac_address(dev, pdata->mac_addr); + if (ret) { + dev_err(dev, "invalid MAC address property\n"); goto err_io; } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 5ee08e3c7d2b..60b7e53206d1 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -96,7 +96,6 @@ min_t(unsigned int, IEEE_8021QAZ_MAX_TCS, (_cnt)) /* Common property names */ -#define XGBE_MAC_ADDR_PROPERTY "mac-address" #define XGBE_PHY_MODE_PROPERTY "phy-mode" #define XGBE_DMA_IRQS_PROPERTY "amd,per-channel-interrupt" #define XGBE_SPEEDSET_PROPERTY "amd,speed-set" From 00699d944836576d2789eb0cf02d989b975375d7 Mon Sep 17 00:00:00 2001 From: ShravyaPanchagiri Date: Tue, 10 Mar 2026 22:04:50 -0500 Subject: [PATCH 0464/1561] docs: octeontx2: fix typo in documentation Fix spelling mistake "Crate" to "Create" in the documentation. Signed-off-by: ShravyaPanchagiri Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260311030450.8461-1-shravy112@gmail.com Signed-off-by: Jakub Kicinski --- .../networking/device_drivers/ethernet/marvell/octeontx2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst index a52850602cd8..c31c6c197cdb 100644 --- a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst +++ b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst @@ -323,7 +323,7 @@ Setup HTB offload # ethtool -K hw-tc-offload on -2. Crate htb root:: +2. Create htb root:: # tc qdisc add dev clsact # tc qdisc replace dev root handle 1: htb offload From 7c52f407f28d2e3746a931f88869b0169a09ed6a Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 11 Mar 2026 11:22:29 +0800 Subject: [PATCH 0465/1561] ynl: ethtool: remove duplicated unspec entry There is a duplicated unspec entry. Remove it. No user impact expected, found by inspection. Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20260311-b4-drop_dup_unspec-v1-1-e0dfa47b5981@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 4707063af3b4..a05b5425b76a 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -306,10 +306,6 @@ attribute-sets: name: strings attr-cnt-name: __ethtool-a-strings-cnt attributes: - - - name: unspec - type: unused - value: 0 - name: unspec type: unused From 6e263aadbaf231bbb73e1fed048b3e3591f06264 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 11 Mar 2026 01:07:00 +0000 Subject: [PATCH 0466/1561] net: phy: vitesse: add inband caps and configuration Add support for VSC8662 reporting its inband capabilities, and also hook to configure the PHY's inband mode. This fixes a regression in the macb driver caused by commit 1338cfef1ff1 ("net: macb: fix SGMII with inband aneg disabled") Cc: stable+noautosel@kernel.org # neither this nor commit under fixes should be backported Reported-by: Conor Dooley Link: https://lore.kernel.org/r/20260304-nebulizer-rounding-40fbc81a2ba1@spud Signed-off-by: Russell King (Oracle) Tested-by: Conor Dooley Tested-by: Geert Uytterhoeven Fixes: 1338cfef1ff1b958 ("net: macb: fix SGMII with inband aneg disabled") Link: https://patch.msgid.link/E1w082O-0000000ChNc-1wDz@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/vitesse.c | 41 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c index b1b7bbba284e..1a430e832f66 100644 --- a/drivers/net/phy/vitesse.c +++ b/drivers/net/phy/vitesse.c @@ -62,6 +62,13 @@ /* Vitesse Extended Page Access Register */ #define MII_VSC82X4_EXT_PAGE_ACCESS 0x1f +/* Vitesse VSC8662 extended control register */ +#define VSC8662_EXT_CON1 0x17 +#define VSC8662_EXT_CON_MAC_AN BIT(13) + +#define VSC8662_MAC_AN 0x1b +#define VSC8662_MAC_AN_BYPASS BIT(13) + /* Vitesse VSC73XX Extended Control Register */ #define MII_VSC73XX_PHY_CTRL_EXT3 0x14 @@ -140,6 +147,38 @@ static int vsc824x_config_init(struct phy_device *phydev) return err; } +static unsigned int vsc8662_inband_caps(struct phy_device *phydev, + phy_interface_t interface) +{ + if (interface == PHY_INTERFACE_MODE_SGMII) + return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE | + LINK_INBAND_BYPASS; + + return 0; +} + +static int vsc8662_config_inband(struct phy_device *phydev, unsigned int modes) +{ + u16 mask, set; + int ret; + + mask = VSC8662_MAC_AN_BYPASS; + set = modes & LINK_INBAND_BYPASS ? mask : 0; + ret = phy_modify(phydev, VSC8662_MAC_AN, mask, set); + if (ret < 0) + return ret; + + mask = VSC8662_EXT_CON_MAC_AN; + set = modes & (LINK_INBAND_ENABLE | LINK_INBAND_BYPASS) ? mask : 0; + + ret = phy_modify_changed(phydev, VSC8662_EXT_CON1, mask, set); + if (ret <= 0) + return ret; + + /* We need to soft-reset the PHY when changing VSC8662_EXT_CON_MAC_AN */ + return genphy_soft_reset(phydev); +} + #define VSC73XX_EXT_PAGE_ACCESS 0x1f static int vsc73xx_read_page(struct phy_device *phydev) @@ -649,6 +688,8 @@ static struct phy_driver vsc82xx_driver[] = { .phy_id_mask = 0x000ffff0, /* PHY_GBIT_FEATURES */ .config_init = &vsc824x_config_init, + .inband_caps = vsc8662_inband_caps, + .config_inband = vsc8662_config_inband, .config_aneg = &vsc82x4_config_aneg, .config_intr = &vsc82xx_config_intr, .handle_interrupt = &vsc82xx_handle_interrupt, From b93ec16310b4bfc14af12321257dd7237ef4cce9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Mar 2026 20:28:36 -0700 Subject: [PATCH 0467/1561] genetlink: use maxattr of 0 for the reject policy Commit 4fa86555d1cd ("genetlink: piggy back on resv_op to default to a reject policy") added genl_policy_reject_all to ensure that ops without an explicit policy reject all attributes rather than silently accepting them. The reject policy had maxattr of 1. Passing info->attrs of size 2 may surprise families. Devlink, for instance, assumes that if info->attrs is set it's safe to access DEVLINK_ATTR_BUS_NAME (1) and DEVLINK_ATTR_DEV_NAME (2). Before plugging reject policies into split ops we need to make sure the genetlink code will not populate info->attrs if family had no explicit policy for the op. While even shared code paths within the families can figure out that given op has no policy fairly easily themselves, passing attrs with fixed size of 2 feels fairly useless and error prone. This change has no user-visible impact, reject attrs are not reported to the user space via getpolicy. We do have to remove the safety check in netlink_policy_dump_get_policy_idx() but it seems to have been there to catch likely faulty input, the code can handle maxattr = 0 just fine. Link: https://patch.msgid.link/20260311032839.417748-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/netlink/genetlink.c | 19 +++++++++---------- net/netlink/policy.c | 4 ++-- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index a23d4c51c089..c00f0586c8d6 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -92,10 +92,8 @@ static unsigned long mc_group_start = 0x3 | BIT(GENL_ID_CTRL) | static unsigned long *mc_groups = &mc_group_start; static unsigned long mc_groups_longs = 1; -/* We need the last attribute with non-zero ID therefore a 2-entry array */ static struct nla_policy genl_policy_reject_all[] = { { .type = NLA_REJECT }, - { .type = NLA_REJECT }, }; static int genl_ctrl_event(int event, const struct genl_family *family, @@ -106,13 +104,10 @@ static void genl_op_fill_in_reject_policy(const struct genl_family *family, struct genl_ops *op) { - BUILD_BUG_ON(ARRAY_SIZE(genl_policy_reject_all) - 1 != 1); - if (op->policy || op->cmd < family->resv_start_op) return; op->policy = genl_policy_reject_all; - op->maxattr = 1; } static void @@ -123,7 +118,6 @@ genl_op_fill_in_reject_policy_split(const struct genl_family *family, return; op->policy = genl_policy_reject_all; - op->maxattr = 1; } static const struct genl_family *genl_family_find_byid(unsigned int id) @@ -934,12 +928,17 @@ genl_family_rcv_msg_attrs_parse(const struct genl_family *family, struct nlattr **attrbuf; int err; - if (!ops->maxattr) + if (!ops->policy) return NULL; - attrbuf = kmalloc_objs(struct nlattr *, ops->maxattr + 1); - if (!attrbuf) - return ERR_PTR(-ENOMEM); + if (ops->maxattr) { + attrbuf = kmalloc_objs(struct nlattr *, ops->maxattr + 1); + if (!attrbuf) + return ERR_PTR(-ENOMEM); + } else { + /* Reject all policy, __nlmsg_parse() will just validate */ + attrbuf = NULL; + } err = __nlmsg_parse(nlh, hdrlen, attrbuf, ops->maxattr, ops->policy, validate, extack); diff --git a/net/netlink/policy.c b/net/netlink/policy.c index f39cd7cc4fb5..08b006c48f06 100644 --- a/net/netlink/policy.c +++ b/net/netlink/policy.c @@ -31,7 +31,7 @@ static int add_policy(struct netlink_policy_dump_state **statep, struct netlink_policy_dump_state *state = *statep; unsigned int old_n_alloc, n_alloc, i; - if (!policy || !maxtype) + if (!policy) return 0; for (i = 0; i < state->n_alloc; i++) { @@ -85,7 +85,7 @@ int netlink_policy_dump_get_policy_idx(struct netlink_policy_dump_state *state, { unsigned int i; - if (WARN_ON(!policy || !maxtype)) + if (WARN_ON(!policy)) return 0; for (i = 0; i < state->n_alloc; i++) { From e3a5b7f8ef2abe7b119c1806ee214d648289faf6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Mar 2026 20:28:37 -0700 Subject: [PATCH 0468/1561] genetlink: apply reject policy for split ops on the dispatch path Commit 4fa86555d1cd ("genetlink: piggy back on resv_op to default to a reject policy") added genl_policy_reject_all to ensure that ops without an explicit policy reject all attributes rather than silently accepting them. This change was applied to net. When split ops were later introduced in net-next in commit b8fd60c36a44 ("genetlink: allow families to use split ops directly"), genl_op_fill_in_reject_policy_split() was added and called from genl_op_from_split() (used for policy dumping and registration). However, genl_get_cmd_split(), which is called for incoming messages, copies split_ops entries as-is without applying the reject policy. This means that split ops without policy accept all inputs. This looks like an omission / mistake made when splitting the changes between net and net-next. Let's try to re-introduce the checking. Not considering this a fix given the regression potential. If anyone reports issues we should probably fill in fake policies for specific ops rather than reverting this. Link: https://patch.msgid.link/20260311032839.417748-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/netlink/genetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index c00f0586c8d6..d251d894afd4 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -244,6 +244,7 @@ genl_get_cmd_split(u32 cmd, u8 flag, const struct genl_family *family, if (family->split_ops[i].cmd == cmd && family->split_ops[i].flags & flag) { *op = family->split_ops[i]; + genl_op_fill_in_reject_policy_split(family, op); return 0; } From e911be835432b1e0fd70d1cc35127de189141f96 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Mar 2026 20:28:38 -0700 Subject: [PATCH 0469/1561] selftests: net: make sure that Netlink rejects unknown attrs in dump Add a test case for rejecting attrs if policy is not set. dev_get dump has no input policy (accepts no attrs). Link: https://patch.msgid.link/20260311032839.417748-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/nl_netdev.py | 32 +++++++++++++++++++----- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index 5c66421ab8aa..eff55c64a012 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -1,11 +1,15 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 -import time +""" +Tests for the netdev netlink family. +""" + +import errno from os import system -from lib.py import ksft_run, ksft_exit, ksft_pr -from lib.py import ksft_eq, ksft_ge, ksft_ne, ksft_busy_wait -from lib.py import NetdevFamily, NetdevSimDev, ip +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq, ksft_ge, ksft_ne, ksft_raises, ksft_busy_wait +from lib.py import NetdevFamily, NetdevSimDev, NlError, ip def empty_check(nf) -> None: @@ -19,6 +23,15 @@ def lo_check(nf) -> None: ksft_eq(len(lo_info['xdp-rx-metadata-features']), 0) +def dev_dump_reject_attr(nf) -> None: + """Test that dev-get dump rejects attributes (no dump request policy).""" + with ksft_raises(NlError) as cm: + nf.dev_get({'ifindex': 1}, dump=True) + ksft_eq(cm.exception.nl_msg.error, -errno.EINVAL) + ksft_eq(cm.exception.nl_msg.extack['msg'], 'Unknown attribute type') + ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex') + + def napi_list_check(nf) -> None: with NetdevSimDev(queue_count=100) as nsimdev: nsim = nsimdev.nsims[0] @@ -243,9 +256,16 @@ def page_pool_check(nf) -> None: def main() -> None: + """ Ksft boiler plate main """ nf = NetdevFamily() - ksft_run([empty_check, lo_check, page_pool_check, napi_list_check, - dev_set_threaded, napi_set_threaded, nsim_rxq_reset_down], + ksft_run([empty_check, + lo_check, + dev_dump_reject_attr, + napi_list_check, + napi_set_threaded, + dev_set_threaded, + nsim_rxq_reset_down, + page_pool_check], args=(nf, )) ksft_exit() From c1f9a89b0c901266028e66cd8e6bdf54c8c3042e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Mar 2026 20:28:39 -0700 Subject: [PATCH 0470/1561] selftests: net: add test for Netlink policy dumps Add validation for the nlctrl family, accessing family info and dumping policies. TAP version 13 1..4 ok 1 nl_nlctrl.getfamily_do ok 2 nl_nlctrl.getfamily_dump ok 3 nl_nlctrl.getpolicy_dump ok 4 nl_nlctrl.getpolicy_by_op # Totals: pass:4 fail:0 xfail:0 xpass:0 skip:0 error:0 Link: https://patch.msgid.link/20260311032839.417748-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + .../testing/selftests/net/lib/py/__init__.py | 5 +- tools/testing/selftests/net/lib/py/ynl.py | 10 +- tools/testing/selftests/net/nl_nlctrl.py | 135 ++++++++++++++++++ 4 files changed, 148 insertions(+), 3 deletions(-) create mode 100755 tools/testing/selftests/net/nl_nlctrl.py diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index a24ea64e2ae8..6bced3ed798b 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -65,6 +65,7 @@ TEST_PROGS := \ netns-name.sh \ netns-sysctl.sh \ nl_netdev.py \ + nl_nlctrl.py \ pmtu.sh \ psock_snd.sh \ reuseaddr_ports_exhausted.sh \ diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index 54e84282781c..e0c920041a58 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -15,7 +15,8 @@ from .nsim import NetdevSim, NetdevSimDev from .utils import CmdExitFailure, fd_read_timeout, cmd, bkg, defer, \ bpftool, ip, ethtool, bpftrace, rand_port, rand_ports, wait_port_listen, \ wait_file, tool -from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily +from .ynl import NlError, NlctrlFamily, YnlFamily, \ + EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily from .ynl import NetshaperFamily, DevlinkFamily, PSPFamily, Netlink __all__ = ["KSRC", @@ -31,4 +32,4 @@ __all__ = ["KSRC", "NetdevSim", "NetdevSimDev", "NetshaperFamily", "DevlinkFamily", "PSPFamily", "NlError", "YnlFamily", "EthtoolFamily", "NetdevFamily", "RtnlFamily", - "RtnlAddrFamily", "Netlink"] + "NlctrlFamily", "RtnlAddrFamily", "Netlink"] diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index e8aa97936f6c..2e567062aa6c 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -30,7 +30,8 @@ except ModuleNotFoundError as e: __all__ = [ "NlError", "NlPolicy", "Netlink", "YnlFamily", "SPEC_PATH", "EthtoolFamily", "RtnlFamily", "RtnlAddrFamily", - "NetdevFamily", "NetshaperFamily", "DevlinkFamily", "PSPFamily", + "NetdevFamily", "NetshaperFamily", "NlctrlFamily", "DevlinkFamily", + "PSPFamily", ] # @@ -63,6 +64,13 @@ class NetshaperFamily(YnlFamily): super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(), schema='', recv_size=recv_size) + +class NlctrlFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('nlctrl.yaml')).as_posix(), + schema='', recv_size=recv_size) + + class DevlinkFamily(YnlFamily): def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('devlink.yaml')).as_posix(), diff --git a/tools/testing/selftests/net/nl_nlctrl.py b/tools/testing/selftests/net/nl_nlctrl.py new file mode 100755 index 000000000000..d19e206c2c02 --- /dev/null +++ b/tools/testing/selftests/net/nl_nlctrl.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Tests for the nlctrl genetlink family (family info and policy dumps). +""" + +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq, ksft_ge, ksft_true, ksft_in, ksft_not_in +from lib.py import NetdevFamily, EthtoolFamily, NlctrlFamily + + +def getfamily_do(ctrl) -> None: + """Query a single family by name and validate its ops.""" + fam = ctrl.getfamily({'family-name': 'netdev'}) + ksft_eq(fam['family-name'], 'netdev') + ksft_true(fam['family-id'] > 0) + + # The format of ops is quite odd, [{$idx: {"id"...}}, {$idx: {"id"...}}] + # Discard the indices and re-key by command id. + ops_by_id = {v['id']: v for op in fam['ops'] for v in op.values()} + ksft_eq(len(ops_by_id), len(fam['ops'])) + + # All ops should have a policy (either do or dump has one) + for op in ops_by_id.values(): + ksft_in('cmd-cap-haspol', op['flags'], + comment=f"op {op['id']} missing haspol") + + # dev-get (id 1) should support both do and dump + ksft_in('cmd-cap-do', ops_by_id[1]['flags']) + ksft_in('cmd-cap-dump', ops_by_id[1]['flags']) + + # qstats-get (id 12) is dump-only + ksft_not_in('cmd-cap-do', ops_by_id[12]['flags']) + ksft_in('cmd-cap-dump', ops_by_id[12]['flags']) + + # napi-set (id 14) is do-only and requires admin + ksft_in('cmd-cap-do', ops_by_id[14]['flags']) + ksft_not_in('cmd-cap-dump', ops_by_id[14]['flags']) + ksft_in('admin-perm', ops_by_id[14]['flags']) + + # Notification-only commands (dev-add/del/change-ntf etc.) must + # not appear in the ops list since they have no do/dump handlers. + for ntf_id in [2, 3, 4, 6, 7, 8]: + ksft_not_in(ntf_id, ops_by_id, + comment=f"ntf-only cmd {ntf_id} should not be in ops") + + +def getfamily_dump(ctrl) -> None: + """Dump all families and verify expected entries.""" + families = ctrl.getfamily({}, dump=True) + ksft_ge(len(families), 2) + + names = [f['family-name'] for f in families] + ksft_in('nlctrl', names, comment="nlctrl not found in family dump") + ksft_in('netdev', names, comment="netdev not found in family dump") + + +def getpolicy_dump(_ctrl) -> None: + """Dump policies for ops using get_policy() and validate results. + + Test with netdev (split ops) where do and dump can have different + policies, and with ethtool (full ops) where they always share one. + """ + # -- netdev (split ops) -- + ndev = NetdevFamily() + + # dev-get: do has a real policy with ifindex, dump has no policy + # (only the reject-all policy with maxattr=0) + pol = ndev.get_policy('dev-get', 'do') + ksft_in('ifindex', pol.attrs, + comment="dev-get do policy should have ifindex") + ksft_eq(pol.attrs.get('ifindex', {}).get('type'), 'u32') + + pol_dump = ndev.get_policy('dev-get', 'dump') + ksft_eq(len(pol_dump.attrs), 0, + comment="dev-get should not accept any attrs") + + # napi-get: both do and dump have real policies + pol_do = ndev.get_policy('napi-get', 'do') + ksft_ge(len(pol_do.attrs), 1) + + pol_dump = ndev.get_policy('napi-get', 'dump') + ksft_ge(len(pol_dump.attrs), 1) + + # -- ethtool (full ops) -- + et = EthtoolFamily() + + # strset-get (has both do and dump, full ops share policy) + pol_do = et.get_policy('strset-get', 'do') + ksft_ge(len(pol_do.attrs), 1, comment="strset-get should have a do policy") + + pol_dump = et.get_policy('strset-get', 'dump') + ksft_ge(len(pol_dump.attrs), 1, + comment="strset-get should have a dump policy") + + # Same policy means same attribute names + ksft_eq(set(pol_do.attrs.keys()), set(pol_dump.attrs.keys())) + + # linkinfo-set is do-only (SET command), no dump + pol_do = et.get_policy('linkinfo-set', 'do') + ksft_ge(len(pol_do.attrs), 1, + comment="linkinfo-set should have a do policy") + + pol_dump = et.get_policy('linkinfo-set', 'dump') + ksft_eq(pol_dump, None, + comment="linkinfo-set should not have a dump policy") + + +def getpolicy_by_op(_ctrl) -> None: + """Query policy for specific ops, check attr names are resolved.""" + ndev = NetdevFamily() + + # dev-get do policy should have named attributes from the spec + pol = ndev.get_policy('dev-get', 'do') + ksft_ge(len(pol.attrs), 1) + # All attr names should be resolved (no 'attr-N' fallbacks) + for name in pol.attrs: + ksft_true(not name.startswith('attr-'), + comment=f"unresolved attr name: {name}") + + +def main() -> None: + """ Ksft boiler plate main """ + ctrl = NlctrlFamily() + ksft_run([getfamily_do, + getfamily_dump, + getpolicy_dump, + getpolicy_by_op], + args=(ctrl, )) + ksft_exit() + + +if __name__ == "__main__": + main() From 15abbe7c82661209c1dc67c21903c07e2fff5aae Mon Sep 17 00:00:00 2001 From: Nimrod Oren Date: Mon, 9 Mar 2026 10:13:01 +0200 Subject: [PATCH 0471/1561] net: page_pool: scale alloc cache with PAGE_SIZE The current page_pool alloc-cache size and refill values were chosen to match the NAPI budget and to leave headroom for XDP_DROP recycling. These fixed values do not scale well with large pages, as they significantly increase a given page_pool's memory footprint. Scale these values to better balance memory footprint across page sizes, while keeping behavior on 4KB-page systems unchanged. Reviewed-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Nimrod Oren Link: https://patch.msgid.link/20260309081301.103152-1-noren@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/types.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index cdd95477af7a..03da138722f5 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -44,6 +44,8 @@ * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX * ring is usually refilled and the max consumed elements will be 64, * thus a natural max size of objects needed in the cache. + * The refill watermark is set to 64 for 4KB pages, + * and scales to balance its size in bytes across page sizes. * * Keeping room for more objects, is due to XDP_DROP use-case. As * XDP_DROP allows the opportunity to recycle objects directly into @@ -51,8 +53,15 @@ * cache is already full (or partly full) then the XDP_DROP recycles * would have to take a slower code path. */ -#define PP_ALLOC_CACHE_SIZE 128 +#if PAGE_SIZE >= SZ_64K +#define PP_ALLOC_CACHE_REFILL 4 +#elif PAGE_SIZE >= SZ_16K +#define PP_ALLOC_CACHE_REFILL 16 +#else #define PP_ALLOC_CACHE_REFILL 64 +#endif + +#define PP_ALLOC_CACHE_SIZE (PP_ALLOC_CACHE_REFILL * 2) struct pp_alloc_cache { u32 count; netmem_ref cache[PP_ALLOC_CACHE_SIZE]; From 08dc30de1a402fe88fd80592cf6c72c4c2ebbcbc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Mar 2026 19:13:40 +0000 Subject: [PATCH 0472/1561] net: add skb_defer_disable_key static key Add a static key to bypass skb_attempt_defer_free() steps if net.core.skb_defer_max is set to zero. Main benefit is the atomic_long_inc_return() avoidance. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260311191340.1996888-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/net-sysfs.h | 1 + net/core/skbuff.c | 5 +++++ net/core/sysctl_net_core.c | 25 ++++++++++++++++++++++++- 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h index e938f25e8e86..38e2e3ffd0bd 100644 --- a/net/core/net-sysfs.h +++ b/net/core/net-sysfs.h @@ -13,4 +13,5 @@ int netdev_change_owner(struct net_device *, const struct net *net_old, extern struct mutex rps_default_mask_mutex; +DECLARE_STATIC_KEY_FALSE(skb_defer_disable_key); #endif diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 513cbfed19bc..3d6978dd0aa8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -7256,6 +7256,8 @@ static void kfree_skb_napi_cache(struct sk_buff *skb) local_bh_enable(); } +DEFINE_STATIC_KEY_FALSE(skb_defer_disable_key); + /** * skb_attempt_defer_free - queue skb for remote freeing * @skb: buffer @@ -7272,6 +7274,9 @@ void skb_attempt_defer_free(struct sk_buff *skb) bool kick; int cpu; + if (static_branch_unlikely(&skb_defer_disable_key)) + goto nodefer; + /* zero copy notifications should not be delayed. */ if (skb_zcopy(skb)) goto nodefer; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 502705e04649..b508618bfc12 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -349,6 +349,29 @@ static int proc_do_rss_key(const struct ctl_table *table, int write, return proc_dostring(&fake_table, write, buffer, lenp, ppos); } +static int proc_do_skb_defer_max(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + static DEFINE_MUTEX(skb_defer_max_mutex); + int ret, oval, nval; + + mutex_lock(&skb_defer_max_mutex); + + oval = !net_hotdata.sysctl_skb_defer_max; + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + nval = !net_hotdata.sysctl_skb_defer_max; + + if (nval != oval) { + if (nval) + static_branch_enable(&skb_defer_disable_key); + else + static_branch_disable(&skb_defer_disable_key); + } + + mutex_unlock(&skb_defer_max_mutex); + return ret; +} + #ifdef CONFIG_BPF_JIT static int proc_dointvec_minmax_bpf_enable(const struct ctl_table *table, int write, void *buffer, size_t *lenp, @@ -650,7 +673,7 @@ static struct ctl_table net_core_table[] = { .data = &net_hotdata.sysctl_skb_defer_max, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_do_skb_defer_max, .extra1 = SYSCTL_ZERO, }, }; From 886d56099d947443936298a8f98533768c5ad44b Mon Sep 17 00:00:00 2001 From: Kexin Sun Date: Wed, 11 Mar 2026 21:30:11 +0800 Subject: [PATCH 0473/1561] qlcnic: update outdated comment The function pci_unmap_page() was a compatibility wrapper around dma_unmap_page(), and was removed by commit 7968778914e5 ("PCI: Remove the deprecated pci-dma-compat.h API"). Update the comment accordingly. Assisted-by: unnamed:deepseek-v3.2 coccinelle Signed-off-by: Kexin Sun Link: https://patch.msgid.link/20260311133012.519-1-kexinsun@smail.nju.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qlcnic/qlcnic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index 3d0b5cd978cb..d7c8fadb49de 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -354,7 +354,7 @@ struct qlcnic_skb_frag { /* * There will be one qlcnic_buffer per skb packet. These will be - * used to save the dma info for pci_unmap_page() + * used to save the dma info for dma_unmap_page() */ struct qlcnic_cmd_buffer { struct sk_buff *skb; From 8f921f61005450589c0bc1a941a5ddde21d9aed9 Mon Sep 17 00:00:00 2001 From: Kexin Sun Date: Wed, 11 Mar 2026 21:35:19 +0800 Subject: [PATCH 0474/1561] netlink: update outdated comment The function netlink_clear_multicast_users() was removed as unused in commit 2173f8d953e7 ("netlink: cleanup tap related functions"). Update the comment in netlink_change_ngroups() to remove the stale reference, replacing it with a general description of the behavior while preserving the warning. Assisted-by: unnamed:deepseek-v3.2 coccinelle Signed-off-by: Kexin Sun Link: https://patch.msgid.link/20260311133519.688-1-kexinsun@smail.nju.edu.cn Signed-off-by: Jakub Kicinski --- net/netlink/af_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4d609d5cf406..018d33af25ad 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2108,8 +2108,8 @@ int __netlink_change_ngroups(struct sock *sk, unsigned int groups) * This changes the number of multicast groups that are available * on a certain netlink family. Note that it is not possible to * change the number of groups to below 32. Also note that it does - * not implicitly call netlink_clear_multicast_users() when the - * number of groups is reduced. + * not implicitly clear listeners from groups that are removed when + * the number of groups is reduced. * * @sk: The kernel netlink socket, as returned by netlink_kernel_create(). * @groups: The new number of groups. From e4b993f2bca78357b430170574f8de7bc7874088 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 3 Mar 2026 22:17:09 +0100 Subject: [PATCH 0475/1561] wifi: nl80211: split out UHR operation information The beacon doesn't contain the full UHR operation, a number of fields (such as NPCA) are only partially there. Add a new attribute to contain the full information, so it's available to the driver/mac80211. Link: https://patch.msgid.link/20260303221710.866bacf82639.Iafdf37fb0f4304bdcdb824977d61e17b38c47685@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 6 ++++++ net/wireless/nl80211.c | 26 ++++++++++++++++---------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 0b7a06c2b9f7..67d764023988 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3001,6 +3001,10 @@ enum nl80211_commands { * interference detection is not performed on these sub-channels, their * corresponding bits are consistently set to zero. * + * @NL80211_ATTR_UHR_OPERATION: Full UHR Operation element, as it appears in + * association response etc., since it's abridged in the beacon. Used + * for START_AP etc. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3576,6 +3580,8 @@ enum nl80211_attrs { NL80211_ATTR_INCUMBENT_SIGNAL_INTERFERENCE_BITMAP, + NL80211_ATTR_UHR_OPERATION, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 699687a0caa9..3e867930e253 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -344,6 +344,17 @@ static int validate_uhr_capa(const struct nlattr *attr, return 0; } +static int validate_uhr_operation(const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + const u8 *data = nla_data(attr); + unsigned int len = nla_len(attr); + + if (!ieee80211_uhr_oper_size_ok(data, len, false)) + return -EINVAL; + return 0; +} + /* policy for the attributes */ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR]; @@ -949,6 +960,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_UHR_CAPABILITY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_uhr_capa, 255), [NL80211_ATTR_DISABLE_UHR] = { .type = NLA_FLAG }, + [NL80211_ATTR_UHR_OPERATION] = + NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_uhr_operation), }; /* policy for the key attributes */ @@ -6501,16 +6514,6 @@ static int nl80211_calculate_ap_params(struct cfg80211_ap_settings *params) return -EINVAL; } - cap = cfg80211_find_ext_elem(WLAN_EID_EXT_UHR_OPER, ies, ies_len); - if (cap) { - if (!cap->datalen) - return -EINVAL; - params->uhr_oper = (void *)(cap->data + 1); - if (!ieee80211_uhr_oper_size_ok((const u8 *)params->uhr_oper, - cap->datalen - 1, true)) - return -EINVAL; - } - return 0; } @@ -6952,6 +6955,9 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (err) goto out; + if (info->attrs[NL80211_ATTR_UHR_OPERATION]) + params->uhr_oper = nla_data(info->attrs[NL80211_ATTR_UHR_OPERATION]); + err = nl80211_validate_ap_phy_operation(params); if (err) goto out; From f2514ff78855c45fe93c82bdb45f7609dd70c637 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 3 Mar 2026 15:08:33 +0100 Subject: [PATCH 0476/1561] wifi: mac80211: validate HE 6 GHz operation when EHT is used When in strict mode, validate that the HE 6 GHz operation is valid even when EHT operation is used instead. This checks that APs are advertising correct information for HE clients, without testing with such clients. Reviewed-by: Ilan Peer Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20260303150832.74b934163b99.If4d1db3f39c37900cf0d0f4669cd5f8b677daaa0@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5ecd3d1b172d..479bb76ec114 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -216,6 +216,24 @@ ieee80211_determine_ap_chan(struct ieee80211_sub_if_data *sdata, return IEEE80211_CONN_MODE_LEGACY; } + if (eht_oper && ieee80211_hw_check(&sdata->local->hw, STRICT)) { + struct cfg80211_chan_def he_chandef = *chandef; + + if (!ieee80211_chandef_he_6ghz_oper(sdata->local, + he_oper, NULL, + &he_chandef)) { + sdata_info(sdata, + "bad HE operation in EHT AP\n"); + return IEEE80211_CONN_MODE_LEGACY; + } + + if (!cfg80211_chandef_compatible(chandef, + &he_chandef)) { + sdata_info(sdata, "HE/EHT incompatible\n"); + return IEEE80211_CONN_MODE_LEGACY; + } + } + if (mode <= IEEE80211_CONN_MODE_EHT) return mode; goto check_uhr; From ba9d121f85771cce38012d1bee59d7399250e4a5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 3 Mar 2026 15:26:17 +0100 Subject: [PATCH 0477/1561] wifi: mac80211: refactor chandef tracing macros We don't need to duplicate the macros, just make a generic one that gets the name prefix to be used, and use that to create the others. While at it, add the puncturing bitmap to the trace and simplify the ternary expressions. Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20260303152641.ca32d70055f8.I8138a31ceb75715d928d807554288baccc33cd8c@changeid Signed-off-by: Johannes Berg --- net/mac80211/trace.h | 94 ++++++++++++++++++-------------------------- 1 file changed, 38 insertions(+), 56 deletions(-) diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index c04d4547e8f4..1f0c07eaad1b 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -2,7 +2,7 @@ /* * Portions of this file * Copyright(c) 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2024 Intel Corporation + * Copyright (C) 2018-2024, 2026 Intel Corporation */ #if !defined(__MAC80211_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ) @@ -37,64 +37,46 @@ #define VIF_PR_FMT " vif:%s(%d%s)" #define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : "" -#define CHANDEF_ENTRY __field(u32, control_freq) \ - __field(u32, freq_offset) \ - __field(u32, chan_width) \ - __field(u32, center_freq1) \ - __field(u32, freq1_offset) \ - __field(u32, center_freq2) -#define CHANDEF_ASSIGN(c) \ - __entry->control_freq = (c) ? ((c)->chan ? (c)->chan->center_freq : 0) : 0; \ - __entry->freq_offset = (c) ? ((c)->chan ? (c)->chan->freq_offset : 0) : 0; \ - __entry->chan_width = (c) ? (c)->width : 0; \ - __entry->center_freq1 = (c) ? (c)->center_freq1 : 0; \ - __entry->freq1_offset = (c) ? (c)->freq1_offset : 0; \ - __entry->center_freq2 = (c) ? (c)->center_freq2 : 0; -#define CHANDEF_PR_FMT " chandef(%d.%03d MHz,width:%d,center: %d.%03d/%d MHz)" -#define CHANDEF_PR_ARG __entry->control_freq, __entry->freq_offset, __entry->chan_width, \ - __entry->center_freq1, __entry->freq1_offset, __entry->center_freq2 +#define __CHANDEF_ENTRY(n) \ + __field(u32, n##control_freq) \ + __field(u32, n##freq_offset) \ + __field(u32, n##chan_width) \ + __field(u32, n##center_freq1) \ + __field(u32, n##freq1_offset) \ + __field(u32, n##center_freq2) \ + __field(u16, n##punctured) +#define __CHANDEF_ASSIGN(n, c) \ + __entry->n##control_freq = (c) && (c)->chan ? \ + (c)->chan->center_freq : 0; \ + __entry->n##freq_offset = (c) && (c)->chan ? \ + (c)->chan->freq_offset : 0; \ + __entry->n##chan_width = (c) ? (c)->width : 0; \ + __entry->n##center_freq1 = (c) ? (c)->center_freq1 : 0; \ + __entry->n##freq1_offset = (c) ? (c)->freq1_offset : 0; \ + __entry->n##center_freq2 = (c) ? (c)->center_freq2 : 0; \ + __entry->n##punctured = (c) ? (c)->punctured : 0; +#define __CHANDEF_PR_FMT(n) \ + " " #n "(%d.%03d MHz,width:%d,center: %d.%03d/%d MHz, punct:0x%x)" +#define __CHANDEF_PR_ARG(n) \ + __entry->n##control_freq, __entry->n##freq_offset, \ + __entry->n##chan_width, __entry->n##center_freq1, \ + __entry->n##freq1_offset, __entry->n##center_freq2, \ + __entry->n##punctured -#define MIN_CHANDEF_ENTRY \ - __field(u32, min_control_freq) \ - __field(u32, min_freq_offset) \ - __field(u32, min_chan_width) \ - __field(u32, min_center_freq1) \ - __field(u32, min_freq1_offset) \ - __field(u32, min_center_freq2) +#define CHANDEF_ENTRY __CHANDEF_ENTRY() +#define CHANDEF_ASSIGN(c) __CHANDEF_ASSIGN(, c) +#define CHANDEF_PR_FMT __CHANDEF_PR_FMT(chandef) +#define CHANDEF_PR_ARG __CHANDEF_PR_ARG() -#define MIN_CHANDEF_ASSIGN(c) \ - __entry->min_control_freq = (c)->chan ? (c)->chan->center_freq : 0; \ - __entry->min_freq_offset = (c)->chan ? (c)->chan->freq_offset : 0; \ - __entry->min_chan_width = (c)->width; \ - __entry->min_center_freq1 = (c)->center_freq1; \ - __entry->min_freq1_offset = (c)->freq1_offset; \ - __entry->min_center_freq2 = (c)->center_freq2; -#define MIN_CHANDEF_PR_FMT " mindef(%d.%03d MHz,width:%d,center: %d.%03d/%d MHz)" -#define MIN_CHANDEF_PR_ARG __entry->min_control_freq, __entry->min_freq_offset, \ - __entry->min_chan_width, \ - __entry->min_center_freq1, __entry->min_freq1_offset, \ - __entry->min_center_freq2 +#define MIN_CHANDEF_ENTRY __CHANDEF_ENTRY(min) +#define MIN_CHANDEF_ASSIGN(c) __CHANDEF_ASSIGN(min, c) +#define MIN_CHANDEF_PR_FMT __CHANDEF_PR_FMT(mindef) +#define MIN_CHANDEF_PR_ARG __CHANDEF_PR_ARG(min) -#define AP_CHANDEF_ENTRY \ - __field(u32, ap_control_freq) \ - __field(u32, ap_freq_offset) \ - __field(u32, ap_chan_width) \ - __field(u32, ap_center_freq1) \ - __field(u32, ap_freq1_offset) \ - __field(u32, ap_center_freq2) - -#define AP_CHANDEF_ASSIGN(c) \ - __entry->ap_control_freq = (c)->chan ? (c)->chan->center_freq : 0;\ - __entry->ap_freq_offset = (c)->chan ? (c)->chan->freq_offset : 0;\ - __entry->ap_chan_width = (c)->chan ? (c)->width : 0; \ - __entry->ap_center_freq1 = (c)->chan ? (c)->center_freq1 : 0; \ - __entry->ap_freq1_offset = (c)->chan ? (c)->freq1_offset : 0; \ - __entry->ap_center_freq2 = (c)->chan ? (c)->center_freq2 : 0; -#define AP_CHANDEF_PR_FMT " ap(%d.%03d MHz,width:%d,center: %d.%03d/%d MHz)" -#define AP_CHANDEF_PR_ARG __entry->ap_control_freq, __entry->ap_freq_offset, \ - __entry->ap_chan_width, \ - __entry->ap_center_freq1, __entry->ap_freq1_offset, \ - __entry->ap_center_freq2 +#define AP_CHANDEF_ENTRY __CHANDEF_ENTRY(ap) +#define AP_CHANDEF_ASSIGN(c) __CHANDEF_ASSIGN(ap, c) +#define AP_CHANDEF_PR_FMT __CHANDEF_PR_FMT(ap) +#define AP_CHANDEF_PR_ARG __CHANDEF_PR_ARG(ap) #define CHANCTX_ENTRY CHANDEF_ENTRY \ MIN_CHANDEF_ENTRY \ From f932856649b07529d9dbd0f2f7fb7fcc5b929858 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 3 Mar 2026 15:26:18 +0100 Subject: [PATCH 0478/1561] wifi: mac80211: always use full chanctx compatible check For NPCA, we need to treat the channel request differently for AP and other interfaces (APs can share NPCA, under the assumption that userspace will set them up with the same BSS color.) This is difficult if we have to check against a chanreq made up from the chanctx, but this isn't a code path that needs to be highly optimised, so just always use the (originally) recheck functionality to check against all users of the chanctx. Link: https://patch.msgid.link/20260303152641.1a3ff6ead82b.I486f1a94b9a32e0b045815cbbb22679c8cef56e4@changeid Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 90 ++++++++++++++++++++------------------------- 1 file changed, 39 insertions(+), 51 deletions(-) diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 4447cf03c41b..2f0c93f3ace6 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * mac80211 - channel management - * Copyright 2020 - 2025 Intel Corporation + * Copyright 2020-2026 Intel Corporation */ #include @@ -239,24 +239,45 @@ ieee80211_chanreq_compatible(const struct ieee80211_chan_req *a, return tmp; } +/* + * When checking for compatible, check against all the links using + * the chanctx (except the one passed that might be changing) to + * allow changes to the AP's bandwidth for wider bandwidth OFDMA + * purposes, which wouldn't be treated as compatible by checking + * against the chanctx's oper/ap chandefs. + */ static const struct ieee80211_chan_req * -ieee80211_chanctx_compatible(struct ieee80211_chanctx *ctx, +_ieee80211_chanctx_compatible(struct ieee80211_local *local, + struct ieee80211_link_data *skip_link, + struct ieee80211_chanctx *ctx, + const struct ieee80211_chan_req *req, + struct ieee80211_chan_req *tmp) +{ + const struct ieee80211_chan_req *ret = req; + struct ieee80211_chanctx_user_iter iter; + + lockdep_assert_wiphy(local->hw.wiphy); + + for_each_chanctx_user_all(local, ctx, &iter) { + if (iter.link && iter.link == skip_link) + continue; + + ret = ieee80211_chanreq_compatible(ret, iter.chanreq, tmp); + if (!ret) + return NULL; + } + + *tmp = *ret; + return tmp; +} + +static const struct ieee80211_chan_req * +ieee80211_chanctx_compatible(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, const struct ieee80211_chan_req *req, struct ieee80211_chan_req *tmp) { - const struct ieee80211_chan_req *ret; - struct ieee80211_chan_req tmp2; - - *tmp = (struct ieee80211_chan_req){ - .oper = ctx->conf.def, - .ap = ctx->conf.ap, - }; - - ret = ieee80211_chanreq_compatible(tmp, req, &tmp2); - if (!ret) - return NULL; - *tmp = *ret; - return tmp; + return _ieee80211_chanctx_compatible(local, NULL, ctx, req, tmp); } static const struct ieee80211_chan_req * @@ -756,7 +777,8 @@ ieee80211_find_chanctx(struct ieee80211_local *local, if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) continue; - compat = ieee80211_chanctx_compatible(ctx, chanreq, &tmp); + compat = ieee80211_chanctx_compatible(local, ctx, chanreq, + &tmp); if (!compat) continue; @@ -2128,40 +2150,6 @@ int ieee80211_link_use_reserved_context(struct ieee80211_link_data *link) return 0; } -/* - * This is similar to ieee80211_chanctx_compatible(), but rechecks - * against all the links actually using it (except the one that's - * passed, since that one is changing). - * This is done in order to allow changes to the AP's bandwidth for - * wider bandwidth OFDMA purposes, which wouldn't be treated as - * compatible by ieee80211_chanctx_recheck() but is OK if the link - * requesting the update is the only one using it. - */ -static const struct ieee80211_chan_req * -ieee80211_chanctx_recheck(struct ieee80211_local *local, - struct ieee80211_link_data *skip_link, - struct ieee80211_chanctx *ctx, - const struct ieee80211_chan_req *req, - struct ieee80211_chan_req *tmp) -{ - const struct ieee80211_chan_req *ret = req; - struct ieee80211_chanctx_user_iter iter; - - lockdep_assert_wiphy(local->hw.wiphy); - - for_each_chanctx_user_all(local, ctx, &iter) { - if (iter.link == skip_link) - continue; - - ret = ieee80211_chanreq_compatible(ret, iter.chanreq, tmp); - if (!ret) - return NULL; - } - - *tmp = *ret; - return tmp; -} - int ieee80211_link_change_chanreq(struct ieee80211_link_data *link, const struct ieee80211_chan_req *chanreq, u64 *changed) @@ -2198,7 +2186,7 @@ int ieee80211_link_change_chanreq(struct ieee80211_link_data *link, ctx = container_of(conf, struct ieee80211_chanctx, conf); - compat = ieee80211_chanctx_recheck(local, link, ctx, chanreq, &tmp); + compat = _ieee80211_chanctx_compatible(local, link, ctx, chanreq, &tmp); if (!compat) return -EINVAL; From fd2905157d692b9dee39d27bccb7b255e57ba3f4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 3 Mar 2026 15:26:19 +0100 Subject: [PATCH 0479/1561] wifi: cfg80211: split control freq check from chandef check In order to introduce NPCA later, split the control frequency check out of cfg80211_chandef_valid(). Link: https://patch.msgid.link/20260303152641.11b31e4878a7.I534669506008e12ffcd6c115161777e528fdc838@changeid Signed-off-by: Johannes Berg --- net/wireless/chan.c | 98 +++++++++++++++++++++++++-------------------- 1 file changed, 54 insertions(+), 44 deletions(-) diff --git a/net/wireless/chan.c b/net/wireless/chan.c index d9d4e043bb39..e3c18a4392bb 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -6,7 +6,7 @@ * * Copyright 2009 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright 2018-2025 Intel Corporation + * Copyright 2018-2026 Intel Corporation */ #include @@ -339,6 +339,58 @@ static bool cfg80211_valid_center_freq(u32 center, return (center - bw / 2 - 5945) % step == 0; } +static bool +cfg80211_chandef_valid_control_freq(const struct cfg80211_chan_def *chandef, + u32 control_freq) +{ + switch (chandef->width) { + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_1: + case NL80211_CHAN_WIDTH_2: + case NL80211_CHAN_WIDTH_4: + case NL80211_CHAN_WIDTH_8: + case NL80211_CHAN_WIDTH_16: + /* checked separately */ + break; + case NL80211_CHAN_WIDTH_320: + if (chandef->center_freq1 == control_freq + 150 || + chandef->center_freq1 == control_freq + 130 || + chandef->center_freq1 == control_freq + 110 || + chandef->center_freq1 == control_freq + 90 || + chandef->center_freq1 == control_freq - 90 || + chandef->center_freq1 == control_freq - 110 || + chandef->center_freq1 == control_freq - 130 || + chandef->center_freq1 == control_freq - 150) + break; + fallthrough; + case NL80211_CHAN_WIDTH_160: + if (chandef->center_freq1 == control_freq + 70 || + chandef->center_freq1 == control_freq + 50 || + chandef->center_freq1 == control_freq - 50 || + chandef->center_freq1 == control_freq - 70) + break; + fallthrough; + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_80: + if (chandef->center_freq1 == control_freq + 30 || + chandef->center_freq1 == control_freq - 30) + break; + fallthrough; + case NL80211_CHAN_WIDTH_40: + if (chandef->center_freq1 == control_freq + 10 || + chandef->center_freq1 == control_freq - 10) + break; + fallthrough; + default: + return false; + } + + return true; +} + bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) { u32 control_freq, control_freq_khz, start_khz, end_khz; @@ -393,50 +445,8 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) break; } - switch (chandef->width) { - case NL80211_CHAN_WIDTH_5: - case NL80211_CHAN_WIDTH_10: - case NL80211_CHAN_WIDTH_20: - case NL80211_CHAN_WIDTH_20_NOHT: - case NL80211_CHAN_WIDTH_1: - case NL80211_CHAN_WIDTH_2: - case NL80211_CHAN_WIDTH_4: - case NL80211_CHAN_WIDTH_8: - case NL80211_CHAN_WIDTH_16: - /* all checked above */ - break; - case NL80211_CHAN_WIDTH_320: - if (chandef->center_freq1 == control_freq + 150 || - chandef->center_freq1 == control_freq + 130 || - chandef->center_freq1 == control_freq + 110 || - chandef->center_freq1 == control_freq + 90 || - chandef->center_freq1 == control_freq - 90 || - chandef->center_freq1 == control_freq - 110 || - chandef->center_freq1 == control_freq - 130 || - chandef->center_freq1 == control_freq - 150) - break; - fallthrough; - case NL80211_CHAN_WIDTH_160: - if (chandef->center_freq1 == control_freq + 70 || - chandef->center_freq1 == control_freq + 50 || - chandef->center_freq1 == control_freq - 50 || - chandef->center_freq1 == control_freq - 70) - break; - fallthrough; - case NL80211_CHAN_WIDTH_80P80: - case NL80211_CHAN_WIDTH_80: - if (chandef->center_freq1 == control_freq + 30 || - chandef->center_freq1 == control_freq - 30) - break; - fallthrough; - case NL80211_CHAN_WIDTH_40: - if (chandef->center_freq1 == control_freq + 10 || - chandef->center_freq1 == control_freq - 10) - break; - fallthrough; - default: + if (!cfg80211_chandef_valid_control_freq(chandef, control_freq)) return false; - } if (!cfg80211_valid_center_freq(chandef->center_freq1, chandef->width)) return false; From 8bca522588e0aace011bf411bb0354e0ca17f144 Mon Sep 17 00:00:00 2001 From: Joshua Peisach Date: Sat, 7 Mar 2026 12:01:35 -0500 Subject: [PATCH 0480/1561] wifi: b43: use register definitions in nphy_op_software_rfkill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces uses of hardcoded register addresses with proper definitions, for readability. Signed-off-by: Joshua Peisach Acked-by: Michael Büsch Link: https://patch.msgid.link/20260307170135.167460-1-jpeisach@ubuntu.com Signed-off-by: Johannes Berg --- drivers/net/wireless/broadcom/b43/phy_n.c | 24 +++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/broadcom/b43/phy_n.c b/drivers/net/wireless/broadcom/b43/phy_n.c index bbc30cbad0bb..22359bd9db47 100644 --- a/drivers/net/wireless/broadcom/b43/phy_n.c +++ b/drivers/net/wireless/broadcom/b43/phy_n.c @@ -6566,19 +6566,19 @@ static void b43_nphy_op_software_rfkill(struct b43_wldev *dev, b43_radio_mask(dev, 0x09, ~0x2); - b43_radio_write(dev, 0x204D, 0); - b43_radio_write(dev, 0x2053, 0); - b43_radio_write(dev, 0x2058, 0); - b43_radio_write(dev, 0x205E, 0); - b43_radio_mask(dev, 0x2062, ~0xF0); - b43_radio_write(dev, 0x2064, 0); + b43_radio_write(dev, B2056_TX0 | B2056_TX_PADA_BOOST_TUNE, 0); + b43_radio_write(dev, B2056_TX0 | B2056_TX_PADG_BOOST_TUNE, 0); + b43_radio_write(dev, B2056_TX0 | B2056_TX_PGAA_BOOST_TUNE, 0); + b43_radio_write(dev, B2056_TX0 | B2056_TX_PGAG_BOOST_TUNE, 0); + b43_radio_mask(dev, B2056_TX0 | B2056_TX_MIXA_BOOST_TUNE, ~0xF0); + b43_radio_write(dev, B2056_TX0 | B2056_TX_MIXG_BOOST_TUNE, 0); - b43_radio_write(dev, 0x304D, 0); - b43_radio_write(dev, 0x3053, 0); - b43_radio_write(dev, 0x3058, 0); - b43_radio_write(dev, 0x305E, 0); - b43_radio_mask(dev, 0x3062, ~0xF0); - b43_radio_write(dev, 0x3064, 0); + b43_radio_write(dev, B2056_TX1 | B2056_TX_PADA_BOOST_TUNE, 0); + b43_radio_write(dev, B2056_TX1 | B2056_TX_PADG_BOOST_TUNE, 0); + b43_radio_write(dev, B2056_TX1 | B2056_TX_PGAA_BOOST_TUNE, 0); + b43_radio_write(dev, B2056_TX1 | B2056_TX_PGAG_BOOST_TUNE, 0); + b43_radio_mask(dev, B2056_TX1 | B2056_TX_MIXA_BOOST_TUNE, ~0xF0); + b43_radio_write(dev, B2056_TX1 | B2056_TX_MIXG_BOOST_TUNE, 0); } } else { if (phy->rev >= 19) { From 84674b03d8bf3a850f023a98136c27909f0a2b61 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 9 Mar 2026 09:28:28 +0100 Subject: [PATCH 0481/1561] wifi: mac80211: Remove deleted sta links in ieee80211_ml_reconf_work() Delete stale station links announced in the reconfiguration IE transmitted by the AP in the beacon frames. Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260309-mac80211-reconf-remove-sta-link-v2-1-1582aac720c6@kernel.org Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 479bb76ec114..f279bdb03aca 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7076,6 +7076,7 @@ static void ieee80211_ml_reconf_work(struct wiphy *wiphy, container_of(work, struct ieee80211_sub_if_data, u.mgd.ml_reconf_work.work); u16 new_valid_links, new_active_links, new_dormant_links; + struct sta_info *sta; int ret; if (!sdata->u.mgd.removed_links) @@ -7111,6 +7112,16 @@ static void ieee80211_ml_reconf_work(struct wiphy *wiphy, } } + sta = sta_info_get(sdata, sdata->vif.cfg.ap_addr); + if (sta) { + unsigned long removed_links = sdata->u.mgd.removed_links; + unsigned int link_id; + + for_each_set_bit(link_id, &removed_links, + IEEE80211_MLD_MAX_NUM_LINKS) + ieee80211_sta_free_link(sta, link_id); + } + new_dormant_links = sdata->vif.dormant_links & ~sdata->u.mgd.removed_links; ret = ieee80211_vif_set_links(sdata, new_valid_links, From a6d4291eae0409e63525d3b26f44feae6f6f4659 Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Thu, 12 Mar 2026 15:58:02 +1100 Subject: [PATCH 0482/1561] wifi: mac80211: don't use cfg80211_chandef_create() for default chandef cfg80211_chandef_create() is called universally to create the default chandef during hw registration, however it only really makes sense to be used for 2GHz, 5GHz, and 6GHz (and by extension the 'LC' band) as it relies on the channel type which is only relevant to those specific bands. To reduce some confusion, create a generic helper for creating the default chandef that makes sense for all supported bands. Suggested-by: Johannes Berg Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20260312045804.362974-2-lachlan.hodges@morsemicro.com Signed-off-by: Johannes Berg --- net/mac80211/main.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index b0451f1c8e79..d1bb6353908d 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1118,6 +1118,19 @@ ieee80211_ifcomb_check(const struct ieee80211_iface_combination *c, int n_comb) return true; } +static void ieee80211_create_default_chandef(struct cfg80211_chan_def *chandef, + struct ieee80211_channel *chan) +{ + *chandef = (struct cfg80211_chan_def) { + .chan = chan, + .width = chan->band == NL80211_BAND_S1GHZ ? + NL80211_CHAN_WIDTH_1 : + NL80211_CHAN_WIDTH_20_NOHT, + .center_freq1 = chan->center_freq, + .freq1_offset = chan->freq_offset, + }; +} + int ieee80211_register_hw(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); @@ -1261,9 +1274,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) /* if none found then use the first anyway */ if (i == sband->n_channels) i = 0; - cfg80211_chandef_create(&dflt_chandef, - &sband->channels[i], - NL80211_CHAN_NO_HT); + ieee80211_create_default_chandef(&dflt_chandef, + &sband->channels[i]); /* init channel we're on */ local->monitor_chanreq.oper = dflt_chandef; if (local->emulate_chanctx) { From 92d77e06e73ca987b030cfed322e8421db1d6f41 Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Thu, 12 Mar 2026 15:58:03 +1100 Subject: [PATCH 0483/1561] wifi: cfg80211: restrict cfg80211_chandef_create() to only HT-based bands cfg80211_chandef_create() should only be used by bands that are HT-based and the chantype argument makes sense. Insert a WARN such that it isn't used on 60GHz and S1GHz bands and to catch any potential existing uses by those bands. Suggested-by: Johannes Berg Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20260312045804.362974-3-lachlan.hodges@morsemicro.com Signed-off-by: Johannes Berg --- net/wireless/chan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/wireless/chan.c b/net/wireless/chan.c index e3c18a4392bb..3e483fb27c4e 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -29,9 +29,11 @@ void cfg80211_chandef_create(struct cfg80211_chan_def *chandef, *chandef = (struct cfg80211_chan_def) { .chan = chan, - .freq1_offset = chan->freq_offset, }; + WARN_ON(chan->band == NL80211_BAND_60GHZ || + chan->band == NL80211_BAND_S1GHZ); + switch (chan_type) { case NL80211_CHAN_NO_HT: chandef->width = NL80211_CHAN_WIDTH_20_NOHT; From 7218d8e9d8485b08933d832615ca19760a8999cd Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Thu, 12 Mar 2026 15:58:04 +1100 Subject: [PATCH 0484/1561] wifi: cfg80211: check non-S1G width with S1G chandef It is not valid to have an S1G chandef with a non-S1G width. Enforce this during chandef validation. Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20260312045804.362974-4-lachlan.hodges@morsemicro.com Signed-off-by: Johannes Berg --- net/wireless/chan.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 3e483fb27c4e..fa0764ede9c5 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -405,6 +405,14 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) control_freq = chandef->chan->center_freq; + if (cfg80211_chandef_is_s1g(chandef) && + chandef->width != NL80211_CHAN_WIDTH_1 && + chandef->width != NL80211_CHAN_WIDTH_2 && + chandef->width != NL80211_CHAN_WIDTH_4 && + chandef->width != NL80211_CHAN_WIDTH_8 && + chandef->width != NL80211_CHAN_WIDTH_16) + return false; + switch (chandef->width) { case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: From cb0caadb64ca0894c4a24e1a34841f260d462f90 Mon Sep 17 00:00:00 2001 From: Shayne Chen Date: Fri, 13 Mar 2026 14:21:49 +0800 Subject: [PATCH 0485/1561] wifi: ieee80211: fix definition of EHT-MCS 15 in MRU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the definition in IEEE Std 802.11be-2024, Table 9-417r, each bit indicates support for the transmission and reception of EHT-MCS 15 in: - B0: 52+26-tone and 106+26-tone MRUs. - B1: a 484+242-tone MRU if 80 MHz is supported. - B2: a 996+484-tone MRU and a 996+484+242-tone MRU if 160 MHz is supported. - B3: a 3×996-tone MRU if 320 MHz is supported. Fixes: 6239da18d2f9 ("wifi: mac80211: adjust EHT capa when lowering bandwidth") Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20260313062150.3165433-1-shayne.chen@mediatek.com Signed-off-by: Johannes Berg --- include/linux/ieee80211-eht.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/ieee80211-eht.h b/include/linux/ieee80211-eht.h index f8e9f5d36d2a..a97b1d01f3ac 100644 --- a/include/linux/ieee80211-eht.h +++ b/include/linux/ieee80211-eht.h @@ -251,8 +251,8 @@ struct ieee80211_eht_operation_info { #define IEEE80211_EHT_PHY_CAP5_SUPP_EXTRA_EHT_LTF 0x40 #define IEEE80211_EHT_PHY_CAP6_MAX_NUM_SUPP_EHT_LTF_MASK 0x07 -#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_80MHZ 0x08 -#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_160MHZ 0x30 +#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_80MHZ 0x10 +#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_160MHZ 0x20 #define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_320MHZ 0x40 #define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_MASK 0x78 #define IEEE80211_EHT_PHY_CAP6_EHT_DUP_6GHZ_SUPP 0x80 From d3947aac97c3e57ee2f85fd1bef8e7674e609c45 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 13 Mar 2026 13:01:06 +0100 Subject: [PATCH 0486/1561] wifi: nl80211: reject S1G/60G with HT chantype This configuration doesn't make sense, neither S1G nor 60G have 20 or 40 MHz channel width. Reject it to not run into the new cfg80211_chandef_create() warning. Fixes: 92d77e06e73c ("wifi: cfg80211: restrict cfg80211_chandef_create() to only HT-based bands") Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 3e867930e253..d2ef13ab1a20 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3634,6 +3634,9 @@ static int _nl80211_parse_chandef(struct cfg80211_registered_device *rdev, case NL80211_CHAN_HT20: case NL80211_CHAN_HT40PLUS: case NL80211_CHAN_HT40MINUS: + if (chandef->chan->band == NL80211_BAND_60GHZ || + chandef->chan->band == NL80211_BAND_S1GHZ) + return -EINVAL; cfg80211_chandef_create(chandef, chandef->chan, chantype); /* user input for center_freq is incorrect */ From 86a41d957ba058932d58c2d7729451afe8625ce9 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 11 Mar 2026 05:19:48 +0000 Subject: [PATCH 0487/1561] udp: Make udp[46]_seq_show() static. Since commit a3d2599b2446 ("ipv{4,6}/udp{,lite}: simplify proc registration"), udp4_seq_show() and udp6_seq_show() are not used in net/ipv4/udplite.c and net/ipv6/udplite.c. Instead, udp_seq_ops and udp6_seq_ops are exposed to UDP-Lite. Let's make udp4_seq_show() and udp6_seq_show() static. udp_seq_ops and udp6_seq_ops are moved to udp_impl.h so that we can make them static when the header is removed. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260311052020.1213705-2-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/udp.h | 3 --- net/ipv4/udp.c | 3 +-- net/ipv4/udp_impl.h | 2 +- net/ipv6/udp.c | 3 +-- net/ipv6/udp_impl.h | 2 +- 5 files changed, 4 insertions(+), 9 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index b648003e5792..f51a51c0e468 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -576,9 +576,6 @@ void *udp_seq_start(struct seq_file *seq, loff_t *pos); void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos); void udp_seq_stop(struct seq_file *seq, void *v); -extern const struct seq_operations udp_seq_ops; -extern const struct seq_operations udp6_seq_ops; - int udp4_proc_init(void); void udp4_proc_exit(void); #endif /* CONFIG_PROC_FS */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 668a0284c3d5..1fcdc5482594 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -3443,7 +3443,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, sk_drops_read(sp)); } -int udp4_seq_show(struct seq_file *seq, void *v) +static int udp4_seq_show(struct seq_file *seq, void *v) { seq_setwidth(seq, 127); if (v == SEQ_START_TOKEN) @@ -3753,7 +3753,6 @@ const struct seq_operations udp_seq_ops = { .stop = udp_seq_stop, .show = udp4_seq_show, }; -EXPORT_IPV6_MOD(udp_seq_ops); static struct udp_seq_afinfo udp4_seq_afinfo = { .family = AF_INET, diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 17a6fa8b1409..0ca4384f9afa 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -22,6 +22,6 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags); void udp_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS -int udp4_seq_show(struct seq_file *seq, void *v); +extern const struct seq_operations udp_seq_ops; #endif #endif /* _UDP4_IMPL_H */ diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5a3984e59c90..5fef1c226697 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1903,7 +1903,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname, /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS -int udp6_seq_show(struct seq_file *seq, void *v) +static int udp6_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) { seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); @@ -1924,7 +1924,6 @@ const struct seq_operations udp6_seq_ops = { .stop = udp_seq_stop, .show = udp6_seq_show, }; -EXPORT_SYMBOL(udp6_seq_ops); static struct udp_seq_afinfo udp6_seq_afinfo = { .family = AF_INET6, diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 1bd4a573e1bb..525ea600228a 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -26,6 +26,6 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags); void udpv6_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS -int udp6_seq_show(struct seq_file *seq, void *v); +extern const struct seq_operations udp6_seq_ops; #endif #endif /* _UDP6_IMPL_H */ From 62554a51c5844feebe0466d8b31980e110b481de Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 11 Mar 2026 05:19:49 +0000 Subject: [PATCH 0488/1561] ipv6: Retire UDP-Lite. As announced in commit be28c14ac8bb ("udplite: Print deprecation notice."), it's time to deprecate UDP-Lite. As a first step, let's drop support for IPv6 UDP-Lite sockets. We will remove the remaining dead code gradually. Along with the removal of udplite.c, most of the functions exposed via udp_impl.h are made static. The prototypes of udpv6_sendmsg() and udpv6_recvmsg() are moved to udp.h, but only udpv6_recvmsg() has INDIRECT_CALLABLE_DECLARE() because udpv6_sendmsg() is exported for rxrpc since commit ed472b0c8783 ("rxrpc: Call udp_sendmsg() directly"). Also, udpv6_recvmsg() needs INDIRECT_CALLABLE_SCOPE for CONFIG_MITIGATION_RETPOLINE=n. Note that udplite.h is included temporarily for udplite_csum(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260311052020.1213705-3-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/ipv6.h | 2 - include/net/transp_v6.h | 3 - include/net/udp.h | 4 ++ net/ipv6/Makefile | 2 +- net/ipv6/af_inet6.c | 23 +------ net/ipv6/proc.c | 2 - net/ipv6/udp.c | 32 ++++----- net/ipv6/udp_impl.h | 31 --------- net/ipv6/udplite.c | 139 ---------------------------------------- net/rxrpc/output.c | 2 - 10 files changed, 23 insertions(+), 217 deletions(-) delete mode 100644 net/ipv6/udp_impl.h delete mode 100644 net/ipv6/udplite.c diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 1c0ce5151275..0958cc5c6ec3 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1179,8 +1179,6 @@ int tcp6_proc_init(struct net *net); void tcp6_proc_exit(struct net *net); int udp6_proc_init(struct net *net); void udp6_proc_exit(struct net *net); -int udplite6_proc_init(void); -void udplite6_proc_exit(void); int ipv6_misc_proc_init(void); void ipv6_misc_proc_exit(void); int snmp6_register_dev(struct inet6_dev *idev); diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 1a97e3f32029..c0a421fe0c2a 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -8,7 +8,6 @@ /* IPv6 transport protocols */ extern struct proto rawv6_prot; extern struct proto udpv6_prot; -extern struct proto udplitev6_prot; extern struct proto tcpv6_prot; extern struct proto pingv6_prot; @@ -28,8 +27,6 @@ int rawv6_init(void); void rawv6_exit(void); int udpv6_init(void); void udpv6_exit(void); -int udplitev6_init(void); -void udplitev6_exit(void); int tcpv6_init(void); void tcpv6_exit(void); diff --git a/include/net/udp.h b/include/net/udp.h index f51a51c0e468..05f63e9e00a7 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -282,6 +282,10 @@ typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport, void udp_v6_early_demux(struct sk_buff *skb); INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); +int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); +INDIRECT_CALLABLE_DECLARE(int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, + size_t len, int flags)); + struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, netdev_features_t features, bool is_ipv6); diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 0492f1a0b491..2c9ce2ccbde1 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_IPV6) += ipv6.o ipv6-y := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ - route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ + route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ udp_offload.o seg6.o fib6_notifier.o rpl.o ioam6.o diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 03c175cbbdb6..61f7bc88526a 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include #include @@ -636,8 +635,6 @@ int inet6_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) EXPORT_SYMBOL_GPL(inet6_compat_ioctl); #endif /* CONFIG_COMPAT */ -INDIRECT_CALLABLE_DECLARE(int udpv6_sendmsg(struct sock *, struct msghdr *, - size_t)); int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; @@ -652,8 +649,6 @@ int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) sk, msg, size); } -INDIRECT_CALLABLE_DECLARE(int udpv6_recvmsg(struct sock *, struct msghdr *, - size_t, int)); int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { @@ -1080,13 +1075,9 @@ static int __init inet6_init(void) if (err) goto out_unregister_tcp_proto; - err = proto_register(&udplitev6_prot, 1); - if (err) - goto out_unregister_udp_proto; - err = proto_register(&rawv6_prot, 1); if (err) - goto out_unregister_udplite_proto; + goto out_unregister_udp_proto; err = proto_register(&pingv6_prot, 1); if (err) @@ -1137,8 +1128,6 @@ static int __init inet6_init(void) err = -ENOMEM; if (raw6_proc_init()) goto proc_raw6_fail; - if (udplite6_proc_init()) - goto proc_udplite6_fail; if (ipv6_misc_proc_init()) goto proc_misc6_fail; if (if6_proc_init()) @@ -1174,10 +1163,6 @@ static int __init inet6_init(void) if (err) goto udpv6_fail; - err = udplitev6_init(); - if (err) - goto udplitev6_fail; - err = udpv6_offload_init(); if (err) goto udpv6_offload_fail; @@ -1248,8 +1233,6 @@ ipv6_packet_fail: tcpv6_fail: udpv6_offload_exit(); udpv6_offload_fail: - udplitev6_exit(); -udplitev6_fail: udpv6_exit(); udpv6_fail: ipv6_frag_exit(); @@ -1271,8 +1254,6 @@ ip6_route_fail: proc_if6_fail: ipv6_misc_proc_exit(); proc_misc6_fail: - udplite6_proc_exit(); -proc_udplite6_fail: raw6_proc_exit(); proc_raw6_fail: #endif @@ -1296,8 +1277,6 @@ out_unregister_ping_proto: proto_unregister(&pingv6_prot); out_unregister_raw_proto: proto_unregister(&rawv6_prot); -out_unregister_udplite_proto: - proto_unregister(&udplitev6_prot); out_unregister_udp_proto: proto_unregister(&udpv6_prot); out_unregister_tcp_proto: diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 73296f38c252..21bfc73152f0 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -39,8 +39,6 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) sock_prot_inuse_get(net, &tcpv6_prot)); seq_printf(seq, "UDP6: inuse %d\n", sock_prot_inuse_get(net, &udpv6_prot)); - seq_printf(seq, "UDPLITE6: inuse %d\n", - sock_prot_inuse_get(net, &udplitev6_prot)); seq_printf(seq, "RAW6: inuse %d\n", sock_prot_inuse_get(net, &rawv6_prot)); seq_printf(seq, "FRAG6: inuse %u memory %lu\n", diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5fef1c226697..aa859bb0527d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -37,6 +37,7 @@ #include #include +#include #include #include #include @@ -57,7 +58,7 @@ #include #include #include -#include "udp_impl.h" +#include static void udpv6_destruct_sock(struct sock *sk) { @@ -65,7 +66,7 @@ static void udpv6_destruct_sock(struct sock *sk) inet6_sock_destruct(sk); } -int udpv6_init_sock(struct sock *sk) +static int udpv6_init_sock(struct sock *sk) { int res = udp_lib_init_sock(sk); @@ -95,7 +96,7 @@ u32 udp6_ehashfn(const struct net *net, udp6_ehash_secret + net_hash_mix(net)); } -int udp_v6_get_port(struct sock *sk, unsigned short snum) +static int udp_v6_get_port(struct sock *sk, unsigned short snum) { unsigned int hash2_nulladdr = ipv6_portaddr_hash(sock_net(sk), &in6addr_any, snum); @@ -107,7 +108,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, hash2_nulladdr); } -void udp_v6_rehash(struct sock *sk) +static void udp_v6_rehash(struct sock *sk) { u16 new_hash = ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, @@ -464,6 +465,7 @@ static int udp6_skb_len(struct sk_buff *skb) * return it, otherwise we block. */ +INDIRECT_CALLABLE_SCOPE int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags) { @@ -700,9 +702,9 @@ out: return sk; } -int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info, - struct udp_table *udptable) +static int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info, + struct udp_table *udptable) { struct ipv6_pinfo *np; const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; @@ -1115,8 +1117,8 @@ static int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) return 0; } -int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, - int proto) +static int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, + int proto) { enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; const struct in6_addr *saddr, *daddr; @@ -1854,7 +1856,7 @@ static void udpv6_splice_eof(struct socket *sock) release_sock(sk); } -void udpv6_destroy_sock(struct sock *sk) +static void udpv6_destroy_sock(struct sock *sk) { struct udp_sock *up = udp_sk(sk); lock_sock(sk); @@ -1882,8 +1884,8 @@ void udpv6_destroy_sock(struct sock *sk) /* * Socket option code for UDP */ -int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, - unsigned int optlen) +static int udpv6_setsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, unsigned int optlen) { if (level == SOL_UDP || level == SOL_UDPLITE || level == SOL_SOCKET) return udp_lib_setsockopt(sk, level, optname, @@ -1892,8 +1894,8 @@ int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, return ipv6_setsockopt(sk, level, optname, optval, optlen); } -int udpv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) +static int udpv6_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) { if (level == SOL_UDP || level == SOL_UDPLITE) return udp_lib_getsockopt(sk, level, optname, optval, optlen); @@ -1918,7 +1920,7 @@ static int udp6_seq_show(struct seq_file *seq, void *v) return 0; } -const struct seq_operations udp6_seq_ops = { +static const struct seq_operations udp6_seq_ops = { .start = udp_seq_start, .next = udp_seq_next, .stop = udp_seq_stop, diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h deleted file mode 100644 index 525ea600228a..000000000000 --- a/net/ipv6/udp_impl.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _UDP6_IMPL_H -#define _UDP6_IMPL_H -#include -#include -#include -#include -#include -#include -#include - -int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int); -int __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int, - __be32, struct udp_table *); - -int udpv6_init_sock(struct sock *sk); -int udp_v6_get_port(struct sock *sk, unsigned short snum); -void udp_v6_rehash(struct sock *sk); - -int udpv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen); -int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, - unsigned int optlen); -int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); -int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags); -void udpv6_destroy_sock(struct sock *sk); - -#ifdef CONFIG_PROC_FS -extern const struct seq_operations udp6_seq_ops; -#endif -#endif /* _UDP6_IMPL_H */ diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c deleted file mode 100644 index e867721cda4d..000000000000 --- a/net/ipv6/udplite.c +++ /dev/null @@ -1,139 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * UDPLITEv6 An implementation of the UDP-Lite protocol over IPv6. - * See also net/ipv4/udplite.c - * - * Authors: Gerrit Renker - * - * Changes: - * Fixes: - */ -#define pr_fmt(fmt) "UDPLite6: " fmt - -#include -#include -#include "udp_impl.h" - -static int udplitev6_sk_init(struct sock *sk) -{ - pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " - "please contact the netdev mailing list\n"); - return udpv6_init_sock(sk); -} - -static int udplitev6_rcv(struct sk_buff *skb) -{ - return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); -} - -static int udplitev6_err(struct sk_buff *skb, - struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) -{ - return __udp6_lib_err(skb, opt, type, code, offset, info, - &udplite_table); -} - -static const struct inet6_protocol udplitev6_protocol = { - .handler = udplitev6_rcv, - .err_handler = udplitev6_err, - .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, -}; - -struct proto udplitev6_prot = { - .name = "UDPLITEv6", - .owner = THIS_MODULE, - .close = udp_lib_close, - .connect = ip6_datagram_connect, - .disconnect = udp_disconnect, - .ioctl = udp_ioctl, - .init = udplitev6_sk_init, - .destroy = udpv6_destroy_sock, - .setsockopt = udpv6_setsockopt, - .getsockopt = udpv6_getsockopt, - .sendmsg = udpv6_sendmsg, - .recvmsg = udpv6_recvmsg, - .hash = udp_lib_hash, - .unhash = udp_lib_unhash, - .rehash = udp_v6_rehash, - .get_port = udp_v6_get_port, - - .memory_allocated = &net_aligned_data.udp_memory_allocated, - .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, - - .sysctl_mem = sysctl_udp_mem, - .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), - .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), - .obj_size = sizeof(struct udp6_sock), - .ipv6_pinfo_offset = offsetof(struct udp6_sock, inet6), - .h.udp_table = &udplite_table, -}; - -static struct inet_protosw udplite6_protosw = { - .type = SOCK_DGRAM, - .protocol = IPPROTO_UDPLITE, - .prot = &udplitev6_prot, - .ops = &inet6_dgram_ops, - .flags = INET_PROTOSW_PERMANENT, -}; - -int __init udplitev6_init(void) -{ - int ret; - - ret = inet6_add_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); - if (ret) - goto out; - - ret = inet6_register_protosw(&udplite6_protosw); - if (ret) - goto out_udplitev6_protocol; -out: - return ret; - -out_udplitev6_protocol: - inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); - goto out; -} - -void udplitev6_exit(void) -{ - inet6_unregister_protosw(&udplite6_protosw); - inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); -} - -#ifdef CONFIG_PROC_FS -static struct udp_seq_afinfo udplite6_seq_afinfo = { - .family = AF_INET6, - .udp_table = &udplite_table, -}; - -static int __net_init udplite6_proc_init_net(struct net *net) -{ - if (!proc_create_net_data("udplite6", 0444, net->proc_net, - &udp6_seq_ops, sizeof(struct udp_iter_state), - &udplite6_seq_afinfo)) - return -ENOMEM; - return 0; -} - -static void __net_exit udplite6_proc_exit_net(struct net *net) -{ - remove_proc_entry("udplite6", net->proc_net); -} - -static struct pernet_operations udplite6_net_ops = { - .init = udplite6_proc_init_net, - .exit = udplite6_proc_exit_net, -}; - -int __init udplite6_proc_init(void) -{ - return register_pernet_subsys(&udplite6_net_ops); -} - -void udplite6_proc_exit(void) -{ - unregister_pernet_subsys(&udplite6_net_ops); -} -#endif diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index d70db367e358..e5880116e087 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -16,8 +16,6 @@ #include #include "ar-internal.h" -extern int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); - ssize_t do_udp_sendmsg(struct socket *socket, struct msghdr *msg, size_t len) { struct sockaddr *sa = msg->msg_name; From 92586f02f8a52e0f02464445aa3d9697abe054fb Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 11 Mar 2026 05:19:50 +0000 Subject: [PATCH 0489/1561] ipv6: Remove UDP-Lite support for IPV6_ADDRFORM. We cannot create IPv6 UDP-Lite sockets anymore. Let's remove dead code in IPV6_ADDRFORM. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260311052020.1213705-4-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ipv6_sockglue.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 02c4cab60c69..b4c977434c2e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -45,7 +45,6 @@ #include #include #include -#include #include #include #include @@ -563,10 +562,8 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (sk->sk_type == SOCK_RAW) break; - if (sk->sk_protocol == IPPROTO_UDP || - sk->sk_protocol == IPPROTO_UDPLITE) { - struct udp_sock *up = udp_sk(sk); - if (up->pending == AF_INET6) { + if (sk->sk_protocol == IPPROTO_UDP) { + if (udp_sk(sk)->pending == AF_INET6) { retv = -EBUSY; break; } @@ -607,16 +604,11 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, WRITE_ONCE(sk->sk_family, PF_INET); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } else { - struct proto *prot = &udp_prot; - - if (sk->sk_protocol == IPPROTO_UDPLITE) - prot = &udplite_prot; - sock_prot_inuse_add(net, sk->sk_prot, -1); - sock_prot_inuse_add(net, prot, 1); + sock_prot_inuse_add(net, &udp_prot, 1); /* Paired with READ_ONCE(sk->sk_prot) in inet6_dgram_ops */ - WRITE_ONCE(sk->sk_prot, prot); + WRITE_ONCE(sk->sk_prot, &udp_prot); WRITE_ONCE(sk->sk_socket->ops, &inet_dgram_ops); WRITE_ONCE(sk->sk_family, PF_INET); } @@ -1098,7 +1090,6 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case IPV6_ADDRFORM: if (sk->sk_protocol != IPPROTO_UDP && - sk->sk_protocol != IPPROTO_UDPLITE && sk->sk_protocol != IPPROTO_TCP) return -ENOPROTOOPT; if (sk->sk_state != TCP_ESTABLISHED) From 56520b398e5e6ee129c6279d8649ca959765a0a0 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 11 Mar 2026 05:19:51 +0000 Subject: [PATCH 0490/1561] ipv4: Retire UDP-Lite. We have deprecated IPv6 UDP-Lite sockets. Let's drop support for IPv4 UDP-Lite sockets as well. Most of the changes are similar to the IPv6 patch: removing udplite.c and udp_impl.h, marking most functions in udp_impl.h as static, moving the prototype for udp_recvmsg() to udp.h, and adding INDIRECT_CALLABLE_SCOPE for it. In addition, the INET_DIAG support for UDP-Lite is dropped. We will remove the remaining dead code in the following patches. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260311052020.1213705-5-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 4 +- include/net/udp.h | 7 ++- include/net/udplite.h | 4 -- net/ipv4/Makefile | 2 +- net/ipv4/af_inet.c | 6 -- net/ipv4/proc.c | 3 - net/ipv4/udp.c | 33 ++++++----- net/ipv4/udp_bpf.c | 2 - net/ipv4/udp_diag.c | 47 +-------------- net/ipv4/udp_impl.h | 27 --------- net/ipv4/udplite.c | 135 ------------------------------------------ 11 files changed, 26 insertions(+), 244 deletions(-) delete mode 100644 net/ipv4/udp_impl.h delete mode 100644 net/ipv4/udplite.c diff --git a/include/net/sock.h b/include/net/sock.h index 6c3f1340e8ef..16a1b8895206 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -126,14 +126,14 @@ typedef __u64 __bitwise __addrpair; * @skc_bypass_prot_mem: bypass the per-protocol memory accounting for skb * @skc_bound_dev_if: bound device index if != 0 * @skc_bind_node: bind hash linkage for various protocol lookup tables - * @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol + * @skc_portaddr_node: second hash linkage for UDP * @skc_prot: protocol handlers inside a network family * @skc_net: reference to the network namespace of this socket * @skc_v6_daddr: IPV6 destination address * @skc_v6_rcv_saddr: IPV6 source address * @skc_cookie: socket's cookie value * @skc_node: main hash linkage for various protocol lookup tables - * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol + * @skc_nulls_node: main hash linkage for TCP * @skc_tx_queue_mapping: tx queue number for this connection * @skc_rx_queue_mapping: rx queue number for this connection * @skc_flags: place holder for sk_flags diff --git a/include/net/udp.h b/include/net/udp.h index 05f63e9e00a7..39223e2692e9 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -105,7 +105,7 @@ struct udp_table { unsigned int log; }; extern struct udp_table udp_table; -void udp_table_init(struct udp_table *, const char *); + static inline struct udp_hslot *udp_hashslot(struct udp_table *table, const struct net *net, unsigned int num) @@ -312,7 +312,7 @@ static inline void udp_drops_inc(struct sock *sk) numa_drop_add(&udp_sk(sk)->drop_counters, 1); } -/* hash routines shared between UDPv4/6 and UDP-Litev4/6 */ +/* hash routines shared between UDPv4/6 */ static inline int udp_lib_hash(struct sock *sk) { BUG(); @@ -420,6 +420,8 @@ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst); int udp_err(struct sk_buff *, u32); int udp_abort(struct sock *sk, int err); int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); +INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *sk, struct msghdr *msg, + size_t len, int flags)); void udp_splice_eof(struct socket *sock); int udp_push_pending_frames(struct sock *sk); void udp_flush_pending_frames(struct sock *sk); @@ -427,7 +429,6 @@ int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size); void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst); int udp_rcv(struct sk_buff *skb); int udp_ioctl(struct sock *sk, int cmd, int *karg); -int udp_init_sock(struct sock *sk); int udp_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len); int __udp_disconnect(struct sock *sk, int flags); int udp_disconnect(struct sock *sk, int flags); diff --git a/include/net/udplite.h b/include/net/udplite.h index 786919d29f8d..fdd769745ac4 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -12,9 +12,6 @@ #define UDPLITE_SEND_CSCOV 10 /* sender partial coverage (as sent) */ #define UDPLITE_RECV_CSCOV 11 /* receiver partial coverage (threshold ) */ -extern struct proto udplite_prot; -extern struct udp_table udplite_table; - /* * Checksum computation is all in software, hence simpler getfrag. */ @@ -84,5 +81,4 @@ static inline __wsum udplite_csum(struct sk_buff *skb) return skb_checksum(skb, off, len, 0); } -void udplite4_register(void); #endif /* _UDPLITE_H */ diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 18108a6f0499..7f9f98813986 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -10,7 +10,7 @@ obj-y := route.o inetpeer.o protocol.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ tcp_recovery.o tcp_ulp.o \ - tcp_offload.o tcp_plb.o datagram.o raw.o udp.o udplite.o \ + tcp_offload.o tcp_plb.o datagram.o raw.o udp.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \ inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4a112f0ee269..5d4bc4c1a731 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -104,7 +104,6 @@ #include #include #include -#include #include #include #include @@ -884,8 +883,6 @@ void inet_splice_eof(struct socket *sock) } EXPORT_SYMBOL_GPL(inet_splice_eof); -INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *, struct msghdr *, - size_t, int)); int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { @@ -1985,9 +1982,6 @@ static int __init inet_init(void) /* Setup UDP memory threshold */ udp_init(); - /* Add UDP-Lite (RFC 3828) */ - udplite4_register(); - raw_init(); ping_init(); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 974afc4ecbe2..cf51f8fcf34b 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -65,8 +64,6 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "UDP: inuse %d mem %ld\n", sock_prot_inuse_get(net, &udp_prot), proto_memory_allocated(&udp_prot)); - seq_printf(seq, "UDPLITE: inuse %d\n", - sock_prot_inuse_get(net, &udplite_prot)); seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(net, &raw_prot)); seq_printf(seq, "FRAG: inuse %u memory %lu\n", diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1fcdc5482594..b3f63a5ea2a9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -98,8 +98,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -112,10 +114,10 @@ #include #include #include -#include "udp_impl.h" #include #include #include +#include #include #if IS_ENABLED(CONFIG_IPV6) #include @@ -229,7 +231,7 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot) } /** - * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 + * udp_lib_get_port - UDP port lookup for IPv4 and IPv6 * * @sk: socket struct in question * @snum: port number to look up @@ -353,7 +355,7 @@ fail: } EXPORT_IPV6_MOD(udp_lib_get_port); -int udp_v4_get_port(struct sock *sk, unsigned short snum) +static int udp_v4_get_port(struct sock *sk, unsigned short snum) { unsigned int hash2_nulladdr = ipv4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum); @@ -928,7 +930,7 @@ out: * to find the appropriate port. */ -int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) +static int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) { struct inet_sock *inet; const struct iphdr *iph = (const struct iphdr *)skb->data; @@ -1855,7 +1857,7 @@ static void udp_destruct_sock(struct sock *sk) inet_sock_destruct(sk); } -int udp_init_sock(struct sock *sk) +static int udp_init_sock(struct sock *sk) { int res = udp_lib_init_sock(sk); @@ -2070,6 +2072,7 @@ EXPORT_IPV6_MOD(udp_read_skb); * return it, otherwise we block. */ +INDIRECT_CALLABLE_SCOPE int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags) { struct inet_sock *inet = inet_sk(sk); @@ -2342,7 +2345,7 @@ void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4) } EXPORT_IPV6_MOD(udp_lib_rehash); -void udp_v4_rehash(struct sock *sk) +static void udp_v4_rehash(struct sock *sk) { u16 new_hash = ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, @@ -2688,8 +2691,8 @@ static int udp_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, * All we need to do is get the socket, and then do a checksum. */ -int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, - int proto) +static int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, + int proto) { struct sock *sk = NULL; struct udphdr *uh; @@ -2935,7 +2938,7 @@ int udp_rcv(struct sk_buff *skb) return __udp4_lib_rcv(skb, dev_net(skb->dev)->ipv4.udp_table, IPPROTO_UDP); } -void udp_destroy_sock(struct sock *sk) +static void udp_destroy_sock(struct sock *sk) { struct udp_sock *up = udp_sk(sk); bool slow = lock_sock_fast(sk); @@ -3125,8 +3128,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, } EXPORT_IPV6_MOD(udp_lib_setsockopt); -int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, - unsigned int optlen) +static int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, + unsigned int optlen) { if (level == SOL_UDP || level == SOL_UDPLITE || level == SOL_SOCKET) return udp_lib_setsockopt(sk, level, optname, @@ -3196,8 +3199,8 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, } EXPORT_IPV6_MOD(udp_lib_getsockopt); -int udp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) +static int udp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) { if (level == SOL_UDP || level == SOL_UDPLITE) return udp_lib_getsockopt(sk, level, optname, optval, optlen); @@ -3747,7 +3750,7 @@ static unsigned short seq_file_family(const struct seq_file *seq) return afinfo->family; } -const struct seq_operations udp_seq_ops = { +static const struct seq_operations udp_seq_ops = { .start = udp_seq_start, .next = udp_seq_next, .stop = udp_seq_stop, @@ -3806,7 +3809,7 @@ static int __init set_uhash_entries(char *str) } __setup("uhash_entries=", set_uhash_entries); -void __init udp_table_init(struct udp_table *table, const char *name) +static void __init udp_table_init(struct udp_table *table, const char *name) { unsigned int i, slot_size; diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index d328a3078ae0..9f33b07b1481 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -7,8 +7,6 @@ #include #include -#include "udp_impl.h" - static struct proto *udpv6_prot_saved __read_mostly; static int sk_udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 6e491c720c90..a010d05062a0 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -10,7 +10,6 @@ #include #include #include -#include #include static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, @@ -224,12 +223,6 @@ static int udp_diag_destroy(struct sk_buff *in_skb, return __udp_diag_destroy(in_skb, req, sock_net(in_skb->sk)->ipv4.udp_table); } -static int udplite_diag_destroy(struct sk_buff *in_skb, - const struct inet_diag_req_v2 *req) -{ - return __udp_diag_destroy(in_skb, req, &udplite_table); -} - #endif static const struct inet_diag_handler udp_diag_handler = { @@ -244,50 +237,13 @@ static const struct inet_diag_handler udp_diag_handler = { #endif }; -static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r) -{ - udp_dump(&udplite_table, skb, cb, r); -} - -static int udplite_diag_dump_one(struct netlink_callback *cb, - const struct inet_diag_req_v2 *req) -{ - return udp_dump_one(&udplite_table, cb, req); -} - -static const struct inet_diag_handler udplite_diag_handler = { - .owner = THIS_MODULE, - .dump = udplite_diag_dump, - .dump_one = udplite_diag_dump_one, - .idiag_get_info = udp_diag_get_info, - .idiag_type = IPPROTO_UDPLITE, - .idiag_info_size = 0, -#ifdef CONFIG_INET_DIAG_DESTROY - .destroy = udplite_diag_destroy, -#endif -}; - static int __init udp_diag_init(void) { - int err; - - err = inet_diag_register(&udp_diag_handler); - if (err) - goto out; - err = inet_diag_register(&udplite_diag_handler); - if (err) - goto out_lite; -out: - return err; -out_lite: - inet_diag_unregister(&udp_diag_handler); - goto out; + return inet_diag_register(&udp_diag_handler); } static void __exit udp_diag_exit(void) { - inet_diag_unregister(&udplite_diag_handler); inet_diag_unregister(&udp_diag_handler); } @@ -296,4 +252,3 @@ module_exit(udp_diag_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("UDP socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-17 /* AF_INET - IPPROTO_UDP */); -MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-136 /* AF_INET - IPPROTO_UDPLITE */); diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h deleted file mode 100644 index 0ca4384f9afa..000000000000 --- a/net/ipv4/udp_impl.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _UDP4_IMPL_H -#define _UDP4_IMPL_H -#include -#include -#include -#include -#include - -int __udp4_lib_rcv(struct sk_buff *, struct udp_table *, int); -int __udp4_lib_err(struct sk_buff *, u32, struct udp_table *); - -int udp_v4_get_port(struct sock *sk, unsigned short snum); -void udp_v4_rehash(struct sock *sk); - -int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, - unsigned int optlen); -int udp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen); - -int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags); -void udp_destroy_sock(struct sock *sk); - -#ifdef CONFIG_PROC_FS -extern const struct seq_operations udp_seq_ops; -#endif -#endif /* _UDP4_IMPL_H */ diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c deleted file mode 100644 index 826e9e79eb19..000000000000 --- a/net/ipv4/udplite.c +++ /dev/null @@ -1,135 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * UDPLITE An implementation of the UDP-Lite protocol (RFC 3828). - * - * Authors: Gerrit Renker - * - * Changes: - * Fixes: - */ - -#define pr_fmt(fmt) "UDPLite: " fmt - -#include -#include -#include "udp_impl.h" - -struct udp_table udplite_table __read_mostly; -EXPORT_SYMBOL(udplite_table); - -/* Designate sk as UDP-Lite socket */ -static int udplite_sk_init(struct sock *sk) -{ - pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " - "please contact the netdev mailing list\n"); - return udp_init_sock(sk); -} - -static int udplite_rcv(struct sk_buff *skb) -{ - return __udp4_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); -} - -static int udplite_err(struct sk_buff *skb, u32 info) -{ - return __udp4_lib_err(skb, info, &udplite_table); -} - -static const struct net_protocol udplite_protocol = { - .handler = udplite_rcv, - .err_handler = udplite_err, - .no_policy = 1, -}; - -struct proto udplite_prot = { - .name = "UDP-Lite", - .owner = THIS_MODULE, - .close = udp_lib_close, - .connect = ip4_datagram_connect, - .disconnect = udp_disconnect, - .ioctl = udp_ioctl, - .init = udplite_sk_init, - .destroy = udp_destroy_sock, - .setsockopt = udp_setsockopt, - .getsockopt = udp_getsockopt, - .sendmsg = udp_sendmsg, - .recvmsg = udp_recvmsg, - .hash = udp_lib_hash, - .unhash = udp_lib_unhash, - .rehash = udp_v4_rehash, - .get_port = udp_v4_get_port, - - .memory_allocated = &net_aligned_data.udp_memory_allocated, - .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, - - .sysctl_mem = sysctl_udp_mem, - .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), - .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), - .obj_size = sizeof(struct udp_sock), - .h.udp_table = &udplite_table, -}; -EXPORT_SYMBOL(udplite_prot); - -static struct inet_protosw udplite4_protosw = { - .type = SOCK_DGRAM, - .protocol = IPPROTO_UDPLITE, - .prot = &udplite_prot, - .ops = &inet_dgram_ops, - .flags = INET_PROTOSW_PERMANENT, -}; - -#ifdef CONFIG_PROC_FS -static struct udp_seq_afinfo udplite4_seq_afinfo = { - .family = AF_INET, - .udp_table = &udplite_table, -}; - -static int __net_init udplite4_proc_init_net(struct net *net) -{ - if (!proc_create_net_data("udplite", 0444, net->proc_net, &udp_seq_ops, - sizeof(struct udp_iter_state), &udplite4_seq_afinfo)) - return -ENOMEM; - return 0; -} - -static void __net_exit udplite4_proc_exit_net(struct net *net) -{ - remove_proc_entry("udplite", net->proc_net); -} - -static struct pernet_operations udplite4_net_ops = { - .init = udplite4_proc_init_net, - .exit = udplite4_proc_exit_net, -}; - -static __init int udplite4_proc_init(void) -{ - return register_pernet_subsys(&udplite4_net_ops); -} -#else -static inline int udplite4_proc_init(void) -{ - return 0; -} -#endif - -void __init udplite4_register(void) -{ - udp_table_init(&udplite_table, "UDP-Lite"); - if (proto_register(&udplite_prot, 1)) - goto out_register_err; - - if (inet_add_protocol(&udplite_protocol, IPPROTO_UDPLITE) < 0) - goto out_unregister_proto; - - inet_register_protosw(&udplite4_protosw); - - if (udplite4_proc_init()) - pr_err("%s: Cannot register /proc!\n", __func__); - return; - -out_unregister_proto: - proto_unregister(&udplite_prot); -out_register_err: - pr_crit("%s: Cannot add UDP-Lite protocol\n", __func__); -} From 7accba6fd1ab60fb4f3a5c15c52d6fbb3af7f3a3 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 11 Mar 2026 05:19:52 +0000 Subject: [PATCH 0491/1561] udp: Remove UDP-Lite SNMP stats. Since UDP and UDP-Lite shared most of the code, we have had to check the protocol every time we increment SNMP stats. Now that the UDP-Lite paths are dead, let's remove UDP-Lite SNMP stats. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260311052020.1213705-6-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/netns/mib.h | 5 --- include/net/udp.h | 46 ++++++++++--------------- net/ipv4/af_inet.c | 6 ---- net/ipv4/proc.c | 13 ------- net/ipv4/udp.c | 75 ++++++++++++++++++----------------------- net/ipv6/af_inet6.c | 9 ++--- net/ipv6/proc.c | 14 -------- net/ipv6/udp.c | 48 +++++++++++--------------- 8 files changed, 71 insertions(+), 145 deletions(-) diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h index 7e373664b1e7..dce05f8e6a33 100644 --- a/include/net/netns/mib.h +++ b/include/net/netns/mib.h @@ -28,11 +28,6 @@ struct netns_mib { DEFINE_SNMP_STAT(struct mptcp_mib, mptcp_statistics); #endif - DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics); -#if IS_ENABLED(CONFIG_IPV6) - DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6); -#endif - DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics); DEFINE_SNMP_STAT_ATOMIC(struct icmpmsg_mib, icmpmsg_statistics); #if IS_ENABLED(CONFIG_IPV6) diff --git a/include/net/udp.h b/include/net/udp.h index 39223e2692e9..264c10607d2e 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -529,38 +529,28 @@ static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, } /* - * SNMP statistics for UDP and UDP-Lite + * SNMP statistics for UDP */ -#define UDP_INC_STATS(net, field, is_udplite) do { \ - if (unlikely(is_udplite)) SNMP_INC_STATS((net)->mib.udplite_statistics, field); \ - else SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0) -#define __UDP_INC_STATS(net, field, is_udplite) do { \ - if (unlikely(is_udplite)) __SNMP_INC_STATS((net)->mib.udplite_statistics, field); \ - else __SNMP_INC_STATS((net)->mib.udp_statistics, field); } while(0) - -#define __UDP6_INC_STATS(net, field, is_udplite) do { \ - if (unlikely(is_udplite)) __SNMP_INC_STATS((net)->mib.udplite_stats_in6, field); \ - else __SNMP_INC_STATS((net)->mib.udp_stats_in6, field); \ -} while(0) -#define UDP6_INC_STATS(net, field, __lite) do { \ - if (unlikely(__lite)) SNMP_INC_STATS((net)->mib.udplite_stats_in6, field); \ - else SNMP_INC_STATS((net)->mib.udp_stats_in6, field); \ -} while(0) +#define __UDP_INC_STATS(net, field) \ + __SNMP_INC_STATS((net)->mib.udp_statistics, field) +#define UDP_INC_STATS(net, field) \ + SNMP_INC_STATS((net)->mib.udp_statistics, field) +#define __UDP6_INC_STATS(net, field) \ + __SNMP_INC_STATS((net)->mib.udp_stats_in6, field) +#define UDP6_INC_STATS(net, field) \ + SNMP_INC_STATS((net)->mib.udp_stats_in6, field) #if IS_ENABLED(CONFIG_IPV6) -#define __UDPX_MIB(sk, ipv4) \ -({ \ - ipv4 ? (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \ - sock_net(sk)->mib.udp_statistics) : \ - (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_stats_in6 : \ - sock_net(sk)->mib.udp_stats_in6); \ -}) +#define __UDPX_MIB(sk, ipv4) \ + ({ \ + ipv4 ? sock_net(sk)->mib.udp_statistics : \ + sock_net(sk)->mib.udp_stats_in6; \ + }) #else -#define __UDPX_MIB(sk, ipv4) \ -({ \ - IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \ - sock_net(sk)->mib.udp_statistics; \ -}) +#define __UDPX_MIB(sk, ipv4) \ + ({ \ + sock_net(sk)->mib.udp_statistics; \ + }) #endif #define __UDPX_INC_STATS(sk, field) \ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 5d4bc4c1a731..f98e46ae3e30 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1733,9 +1733,6 @@ static __net_init int ipv4_mib_init_net(struct net *net) net->mib.udp_statistics = alloc_percpu(struct udp_mib); if (!net->mib.udp_statistics) goto err_udp_mib; - net->mib.udplite_statistics = alloc_percpu(struct udp_mib); - if (!net->mib.udplite_statistics) - goto err_udplite_mib; net->mib.icmp_statistics = alloc_percpu(struct icmp_mib); if (!net->mib.icmp_statistics) goto err_icmp_mib; @@ -1749,8 +1746,6 @@ static __net_init int ipv4_mib_init_net(struct net *net) err_icmpmsg_mib: free_percpu(net->mib.icmp_statistics); err_icmp_mib: - free_percpu(net->mib.udplite_statistics); -err_udplite_mib: free_percpu(net->mib.udp_statistics); err_udp_mib: free_percpu(net->mib.net_statistics); @@ -1766,7 +1761,6 @@ static __net_exit void ipv4_mib_exit_net(struct net *net) { kfree(net->mib.icmpmsg_statistics); free_percpu(net->mib.icmp_statistics); - free_percpu(net->mib.udplite_statistics); free_percpu(net->mib.udp_statistics); free_percpu(net->mib.net_statistics); free_percpu(net->mib.ip_statistics); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index cf51f8fcf34b..bfc06d1713ec 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -444,19 +444,6 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) for (i = 0; i < udp_cnt; i++) seq_printf(seq, " %lu", buff[i]); - memset(buff, 0, udp_cnt * sizeof(unsigned long)); - - /* the UDP and UDP-Lite MIBs are the same */ - seq_puts(seq, "\nUdpLite:"); - snmp_get_cpu_field_batch_cnt(buff, snmp4_udp_list, - udp_cnt, - net->mib.udplite_statistics); - for (i = 0; i < udp_cnt; i++) - seq_printf(seq, " %s", snmp4_udp_list[i].name); - seq_puts(seq, "\nUdpLite:"); - for (i = 0; i < udp_cnt; i++) - seq_printf(seq, " %lu", buff[i]); - seq_putc(seq, '\n'); return 0; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b3f63a5ea2a9..10082095e633 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1198,13 +1198,12 @@ send: if (unlikely(err)) { if (err == -ENOBUFS && !inet_test_bit(RECVERR, sk)) { - UDP_INC_STATS(sock_net(sk), - UDP_MIB_SNDBUFERRORS, is_udplite); + UDP_INC_STATS(sock_net(sk), UDP_MIB_SNDBUFERRORS); err = 0; } - } else - UDP_INC_STATS(sock_net(sk), - UDP_MIB_OUTDATAGRAMS, is_udplite); + } else { + UDP_INC_STATS(sock_net(sk), UDP_MIB_OUTDATAGRAMS); + } return err; } @@ -1535,10 +1534,9 @@ out_free: * things). We could add another new stat but at least for now that * seems like overkill. */ - if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP_INC_STATS(sock_net(sk), - UDP_MIB_SNDBUFERRORS, is_udplite); - } + if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) + UDP_INC_STATS(sock_net(sk), UDP_MIB_SNDBUFERRORS); + return err; do_confirm: @@ -1897,10 +1895,10 @@ static struct sk_buff *__first_packet_length(struct sock *sk, while ((skb = skb_peek(rcvq)) != NULL) { if (udp_lib_checksum_complete(skb)) { - __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, - IS_UDPLITE(sk)); - __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, - IS_UDPLITE(sk)); + struct net *net = sock_net(sk); + + __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS); + __UDP_INC_STATS(net, UDP_MIB_INERRORS); udp_drops_inc(sk); __skb_unlink(skb, rcvq); *total += skb->truesize; @@ -2052,11 +2050,10 @@ try_again: return err; if (udp_lib_checksum_complete(skb)) { - int is_udplite = IS_UDPLITE(sk); struct net *net = sock_net(sk); - __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, is_udplite); - __UDP_INC_STATS(net, UDP_MIB_INERRORS, is_udplite); + __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS); + __UDP_INC_STATS(net, UDP_MIB_INERRORS); udp_drops_inc(sk); kfree_skb_reason(skb, SKB_DROP_REASON_UDP_CSUM); goto try_again; @@ -2081,6 +2078,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags) unsigned int ulen, copied; int off, err, peeking = flags & MSG_PEEK; int is_udplite = IS_UDPLITE(sk); + struct net *net = sock_net(sk); bool checksum_valid = false; if (flags & MSG_ERRQUEUE) @@ -2128,16 +2126,14 @@ try_again: if (unlikely(err)) { if (!peeking) { udp_drops_inc(sk); - UDP_INC_STATS(sock_net(sk), - UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS(net, UDP_MIB_INERRORS); } kfree_skb(skb); return err; } if (!peeking) - UDP_INC_STATS(sock_net(sk), - UDP_MIB_INDATAGRAMS, is_udplite); + UDP_INC_STATS(net, UDP_MIB_INDATAGRAMS); sock_recv_cmsgs(msg, sk, skb); @@ -2170,8 +2166,8 @@ try_again: csum_copy_err: if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags, udp_skb_destructor)) { - UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); - UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS(net, UDP_MIB_CSUMERRORS); + UDP_INC_STATS(net, UDP_MIB_INERRORS); } kfree_skb_reason(skb, SKB_DROP_REASON_UDP_CSUM); @@ -2371,20 +2367,18 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) rc = __udp_enqueue_schedule_skb(sk, skb); if (rc < 0) { - int is_udplite = IS_UDPLITE(sk); + struct net *net = sock_net(sk); int drop_reason; /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) { - UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, - is_udplite); + UDP_INC_STATS(net, UDP_MIB_RCVBUFERRORS); drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF; } else { - UDP_INC_STATS(sock_net(sk), UDP_MIB_MEMERRORS, - is_udplite); + UDP_INC_STATS(net, UDP_MIB_MEMERRORS); drop_reason = SKB_DROP_REASON_PROTO_MEM; } - UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS(net, UDP_MIB_INERRORS); trace_udp_fail_queue_rcv_skb(rc, sk, skb); sk_skb_reason_drop(sk, skb, drop_reason); return -1; @@ -2405,7 +2399,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) { enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; struct udp_sock *up = udp_sk(sk); - int is_udplite = IS_UDPLITE(sk); + struct net *net = sock_net(sk); /* * Charge it to the socket, dropping if the queue is full. @@ -2442,9 +2436,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) ret = encap_rcv(sk, skb); if (ret <= 0) { - __UDP_INC_STATS(sock_net(sk), - UDP_MIB_INDATAGRAMS, - is_udplite); + __UDP_INC_STATS(net, UDP_MIB_INDATAGRAMS); return -ret; } } @@ -2503,9 +2495,9 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) csum_error: drop_reason = SKB_DROP_REASON_UDP_CSUM; - __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); + __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS); drop: - __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + __UDP_INC_STATS(net, UDP_MIB_INERRORS); udp_drops_inc(sk); sk_skb_reason_drop(sk, skb, drop_reason); return -1; @@ -2592,10 +2584,8 @@ start_lookup: if (unlikely(!nskb)) { udp_drops_inc(sk); - __UDP_INC_STATS(net, UDP_MIB_RCVBUFERRORS, - IS_UDPLITE(sk)); - __UDP_INC_STATS(net, UDP_MIB_INERRORS, - IS_UDPLITE(sk)); + __UDP_INC_STATS(net, UDP_MIB_RCVBUFERRORS); + __UDP_INC_STATS(net, UDP_MIB_INERRORS); continue; } if (udp_queue_rcv_skb(sk, nskb) > 0) @@ -2613,8 +2603,7 @@ start_lookup: consume_skb(skb); } else { kfree_skb(skb); - __UDP_INC_STATS(net, UDP_MIB_IGNOREDMULTI, - proto == IPPROTO_UDPLITE); + __UDP_INC_STATS(net, UDP_MIB_IGNOREDMULTI); } return 0; } @@ -2764,7 +2753,7 @@ no_sk: goto csum_error; drop_reason = SKB_DROP_REASON_NO_SOCKET; - __UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + __UDP_INC_STATS(net, UDP_MIB_NOPORTS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); /* @@ -2793,9 +2782,9 @@ csum_error: proto == IPPROTO_UDPLITE ? "Lite" : "", &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest), ulen); - __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); + __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS); drop: - __UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); + __UDP_INC_STATS(net, UDP_MIB_INERRORS); sk_skb_reason_drop(sk, skb, drop_reason); return 0; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 61f7bc88526a..eb9fff86baa1 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -886,9 +886,7 @@ static int __net_init ipv6_init_mibs(struct net *net) net->mib.udp_stats_in6 = alloc_percpu(struct udp_mib); if (!net->mib.udp_stats_in6) return -ENOMEM; - net->mib.udplite_stats_in6 = alloc_percpu(struct udp_mib); - if (!net->mib.udplite_stats_in6) - goto err_udplite_mib; + net->mib.ipv6_statistics = alloc_percpu(struct ipstats_mib); if (!net->mib.ipv6_statistics) goto err_ip_mib; @@ -899,10 +897,10 @@ static int __net_init ipv6_init_mibs(struct net *net) u64_stats_init(&af_inet6_stats->syncp); } - net->mib.icmpv6_statistics = alloc_percpu(struct icmpv6_mib); if (!net->mib.icmpv6_statistics) goto err_icmp_mib; + net->mib.icmpv6msg_statistics = kzalloc_obj(struct icmpv6msg_mib); if (!net->mib.icmpv6msg_statistics) goto err_icmpmsg_mib; @@ -913,8 +911,6 @@ err_icmpmsg_mib: err_icmp_mib: free_percpu(net->mib.ipv6_statistics); err_ip_mib: - free_percpu(net->mib.udplite_stats_in6); -err_udplite_mib: free_percpu(net->mib.udp_stats_in6); return -ENOMEM; } @@ -922,7 +918,6 @@ err_udplite_mib: static void ipv6_cleanup_mibs(struct net *net) { free_percpu(net->mib.udp_stats_in6); - free_percpu(net->mib.udplite_stats_in6); free_percpu(net->mib.ipv6_statistics); free_percpu(net->mib.icmpv6_statistics); kfree(net->mib.icmpv6msg_statistics); diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 21bfc73152f0..813013ca4e75 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -108,17 +108,6 @@ static const struct snmp_mib snmp6_udp6_list[] = { SNMP_MIB_ITEM("Udp6MemErrors", UDP_MIB_MEMERRORS), }; -static const struct snmp_mib snmp6_udplite6_list[] = { - SNMP_MIB_ITEM("UdpLite6InDatagrams", UDP_MIB_INDATAGRAMS), - SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS), - SNMP_MIB_ITEM("UdpLite6InErrors", UDP_MIB_INERRORS), - SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS), - SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS), - SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS), - SNMP_MIB_ITEM("UdpLite6InCsumErrors", UDP_MIB_CSUMERRORS), - SNMP_MIB_ITEM("UdpLite6MemErrors", UDP_MIB_MEMERRORS), -}; - static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, atomic_long_t *smib) { char name[32]; @@ -226,9 +215,6 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) snmp6_seq_show_item(seq, net->mib.udp_stats_in6, NULL, snmp6_udp6_list, ARRAY_SIZE(snmp6_udp6_list)); - snmp6_seq_show_item(seq, net->mib.udplite_stats_in6, - NULL, snmp6_udplite6_list, - ARRAY_SIZE(snmp6_udplite6_list)); return 0; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index aa859bb0527d..07308b7156a6 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -796,20 +796,18 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) rc = __udp_enqueue_schedule_skb(sk, skb); if (rc < 0) { - int is_udplite = IS_UDPLITE(sk); enum skb_drop_reason drop_reason; + struct net *net = sock_net(sk); /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) { - UDP6_INC_STATS(sock_net(sk), - UDP_MIB_RCVBUFERRORS, is_udplite); + UDP6_INC_STATS(net, UDP_MIB_RCVBUFERRORS); drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF; } else { - UDP6_INC_STATS(sock_net(sk), - UDP_MIB_MEMERRORS, is_udplite); + UDP6_INC_STATS(net, UDP_MIB_MEMERRORS); drop_reason = SKB_DROP_REASON_PROTO_MEM; } - UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + UDP6_INC_STATS(net, UDP_MIB_INERRORS); trace_udp_fail_queue_rcv_skb(rc, sk, skb); sk_skb_reason_drop(sk, skb, drop_reason); return -1; @@ -830,7 +828,7 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) { enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; struct udp_sock *up = udp_sk(sk); - int is_udplite = IS_UDPLITE(sk); + struct net *net = sock_net(sk); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { drop_reason = SKB_DROP_REASON_XFRM_POLICY; @@ -864,9 +862,7 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) ret = encap_rcv(sk, skb); if (ret <= 0) { - __UDP6_INC_STATS(sock_net(sk), - UDP_MIB_INDATAGRAMS, - is_udplite); + __UDP6_INC_STATS(net, UDP_MIB_INDATAGRAMS); return -ret; } } @@ -909,9 +905,9 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) csum_error: drop_reason = SKB_DROP_REASON_UDP_CSUM; - __UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); + __UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS); drop: - __UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + __UDP6_INC_STATS(net, UDP_MIB_INERRORS); udp_drops_inc(sk); sk_skb_reason_drop(sk, skb, drop_reason); return -1; @@ -1018,10 +1014,8 @@ start_lookup: nskb = skb_clone(skb, GFP_ATOMIC); if (unlikely(!nskb)) { udp_drops_inc(sk); - __UDP6_INC_STATS(net, UDP_MIB_RCVBUFERRORS, - IS_UDPLITE(sk)); - __UDP6_INC_STATS(net, UDP_MIB_INERRORS, - IS_UDPLITE(sk)); + __UDP6_INC_STATS(net, UDP_MIB_RCVBUFERRORS); + __UDP6_INC_STATS(net, UDP_MIB_INERRORS); continue; } @@ -1040,8 +1034,7 @@ start_lookup: consume_skb(skb); } else { kfree_skb(skb); - __UDP6_INC_STATS(net, UDP_MIB_IGNOREDMULTI, - proto == IPPROTO_UDPLITE); + __UDP6_INC_STATS(net, UDP_MIB_IGNOREDMULTI); } return 0; } @@ -1213,7 +1206,7 @@ no_sk: if (udp_lib_checksum_complete(skb)) goto csum_error; - __UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + __UDP6_INC_STATS(net, UDP_MIB_NOPORTS); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); sk_skb_reason_drop(sk, skb, reason); @@ -1234,9 +1227,9 @@ report_csum_error: csum_error: if (reason == SKB_DROP_REASON_NOT_SPECIFIED) reason = SKB_DROP_REASON_UDP_CSUM; - __UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); + __UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS); discard: - __UDP6_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); + __UDP6_INC_STATS(net, UDP_MIB_INERRORS); sk_skb_reason_drop(sk, skb, reason); return 0; } @@ -1490,13 +1483,11 @@ send: err = ip6_send_skb(skb); if (unlikely(err)) { if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) { - UDP6_INC_STATS(sock_net(sk), - UDP_MIB_SNDBUFERRORS, is_udplite); + UDP6_INC_STATS(sock_net(sk), UDP_MIB_SNDBUFERRORS); err = 0; } } else { - UDP6_INC_STATS(sock_net(sk), - UDP_MIB_OUTDATAGRAMS, is_udplite); + UDP6_INC_STATS(sock_net(sk), UDP_MIB_OUTDATAGRAMS); } return err; } @@ -1826,10 +1817,9 @@ out_no_dst: * things). We could add another new stat but at least for now that * seems like overkill. */ - if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP6_INC_STATS(sock_net(sk), - UDP_MIB_SNDBUFERRORS, is_udplite); - } + if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) + UDP6_INC_STATS(sock_net(sk), UDP_MIB_SNDBUFERRORS); + return err; do_confirm: From b972cb5d397e0e9575d953a6c6b983c708de0326 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 11 Mar 2026 05:19:53 +0000 Subject: [PATCH 0492/1561] smack: Remove IPPROTO_UDPLITE support in security_sock_rcv_skb(). smack_socket_sock_rcv_skb() is registered as socket_sock_rcv_skb, which is called as security_sock_rcv_skb() in sk_filter_trim_cap(). Now that UDP-Lite is gone, let's remove the IPPROTO_UDPLITE support in smack_socket_sock_rcv_skb(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Acked-by: Casey Schaufler Link: https://patch.msgid.link/20260311052020.1213705-7-kuniyu@google.com Signed-off-by: Jakub Kicinski --- security/smack/smack_lsm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 98af9d7b9434..e581d6465946 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4176,7 +4176,6 @@ static int smk_skb_to_addr_ipv6(struct sk_buff *skb, struct sockaddr_in6 *sip) sip->sin6_port = th->source; break; case IPPROTO_UDP: - case IPPROTO_UDPLITE: uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); if (uh != NULL) sip->sin6_port = uh->source; @@ -4301,8 +4300,7 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) #if IS_ENABLED(CONFIG_IPV6) case PF_INET6: proto = smk_skb_to_addr_ipv6(skb, &sadd); - if (proto != IPPROTO_UDP && proto != IPPROTO_UDPLITE && - proto != IPPROTO_TCP) + if (proto != IPPROTO_UDP && proto != IPPROTO_TCP) break; #ifdef SMACK_IPV6_SECMARK_LABELING skp = smack_from_skb(skb); From c2539d4f2df7a9889b71bad97b97ddfd9e47add1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 11 Mar 2026 05:19:54 +0000 Subject: [PATCH 0493/1561] udp: Remove partial csum code in RX. UDP-Lite supports the partial checksum and the coverage is stored in the position of the length field of struct udphdr. In RX paths, udp4_csum_init() / udp6_csum_init() save the value in UDP_SKB_CB(skb)->cscov and set UDP_SKB_CB(skb)->partial_cov to 1 if the coverage is not full. The subsequent processing diverges depending on the value, but such paths are now dead. Also, these functions have some code guarded for UDP: * udp_unicast_rcv_skb / udp6_unicast_rcv_skb * __udp4_lib_rcv() and __udp6_lib_rcv(). Let's remove the partial csum code and the unnecessary guard for UDP-Lite in RX. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260311052020.1213705-8-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/udp.h | 17 ++------- include/net/udplite.h | 34 ----------------- net/ipv4/udp.c | 85 ++++++++----------------------------------- net/ipv6/udp.c | 82 ++++++++++------------------------------- 4 files changed, 39 insertions(+), 179 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index 264c10607d2e..bc275cda9f8c 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -32,11 +32,9 @@ #include /** - * struct udp_skb_cb - UDP(-Lite) private variables + * struct udp_skb_cb - UDP private variables * * @header: private variables used by IPv4/IPv6 - * @cscov: checksum coverage length (UDP-Lite only) - * @partial_cov: if set indicates partial csum coverage */ struct udp_skb_cb { union { @@ -45,8 +43,6 @@ struct udp_skb_cb { struct inet6_skb_parm h6; #endif } header; - __u16 cscov; - __u8 partial_cov; }; #define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb)) @@ -216,13 +212,11 @@ extern int sysctl_udp_wmem_min; struct sk_buff; /* - * Generic checksumming routines for UDP(-Lite) v4 and v6 + * Generic checksumming routines for UDP v4 and v6 */ static inline __sum16 __udp_lib_checksum_complete(struct sk_buff *skb) { - return (UDP_SKB_CB(skb)->cscov == skb->len ? - __skb_checksum_complete(skb) : - __skb_checksum_complete_head(skb, UDP_SKB_CB(skb)->cscov)); + return __skb_checksum_complete(skb); } static inline int udp_lib_checksum_complete(struct sk_buff *skb) @@ -273,7 +267,6 @@ static inline void udp_csum_pull_header(struct sk_buff *skb) skb->csum = csum_partial(skb->data, sizeof(struct udphdr), skb->csum); skb_pull_rcsum(skb, sizeof(struct udphdr)); - UDP_SKB_CB(skb)->cscov -= sizeof(struct udphdr); } typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport, @@ -641,9 +634,6 @@ drop: static inline void udp_post_segment_fix_csum(struct sk_buff *skb) { - /* UDP-lite can't land here - no GRO */ - WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov); - /* UDP packets generated with UDP_SEGMENT and traversing: * * UDP tunnel(xmit) -> veth (segmentation) -> veth (gro) -> UDP tunnel (rx) @@ -657,7 +647,6 @@ static inline void udp_post_segment_fix_csum(struct sk_buff *skb) * a valid csum after the segmentation. * Additionally fixup the UDP CB. */ - UDP_SKB_CB(skb)->cscov = skb->len; if (skb->ip_summed == CHECKSUM_NONE && !skb->csum_valid) skb->csum_valid = 1; } diff --git a/include/net/udplite.h b/include/net/udplite.h index fdd769745ac4..0456a14c993b 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -25,40 +25,6 @@ static __inline__ int udplite_getfrag(void *from, char *to, int offset, /* * Checksumming routines */ -static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) -{ - u16 cscov; - - /* In UDPv4 a zero checksum means that the transmitter generated no - * checksum. UDP-Lite (like IPv6) mandates checksums, hence packets - * with a zero checksum field are illegal. */ - if (uh->check == 0) { - net_dbg_ratelimited("UDPLite: zeroed checksum field\n"); - return 1; - } - - cscov = ntohs(uh->len); - - if (cscov == 0) /* Indicates that full coverage is required. */ - ; - else if (cscov < 8 || cscov > skb->len) { - /* - * Coverage length violates RFC 3828: log and discard silently. - */ - net_dbg_ratelimited("UDPLite: bad csum coverage %d/%d\n", - cscov, skb->len); - return 1; - - } else if (cscov < skb->len) { - UDP_SKB_CB(skb)->partial_cov = 1; - UDP_SKB_CB(skb)->cscov = cscov; - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = CHECKSUM_NONE; - skb->csum_valid = 0; - } - - return 0; -} /* Fast-path computation of checksum. Socket may not be locked. */ static inline __wsum udplite_csum(struct sk_buff *skb) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 10082095e633..d42fb9330c22 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2072,14 +2072,13 @@ EXPORT_IPV6_MOD(udp_read_skb); INDIRECT_CALLABLE_SCOPE int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags) { - struct inet_sock *inet = inet_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); - struct sk_buff *skb; - unsigned int ulen, copied; int off, err, peeking = flags & MSG_PEEK; - int is_udplite = IS_UDPLITE(sk); + struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); bool checksum_valid = false; + unsigned int ulen, copied; + struct sk_buff *skb; if (flags & MSG_ERRQUEUE) return ip_recv_error(sk, msg, len); @@ -2097,14 +2096,10 @@ try_again: else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; - /* - * If checksum is needed at all, try to do it while copying the - * data. If the data is truncated, or if we only want a partial - * coverage checksum (UDP-Lite), do it before the copy. + /* If checksum is needed at all, try to do it while copying the + * data. If the data is truncated, do it before the copy. */ - - if (copied < ulen || peeking || - (is_udplite && UDP_SKB_CB(skb)->partial_cov)) { + if (copied < ulen || peeking) { checksum_valid = udp_skb_csum_unnecessary(skb) || !__udp_lib_checksum_complete(skb); if (!checksum_valid) @@ -2444,42 +2439,6 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) /* FALLTHROUGH -- it's a UDP Packet */ } - /* - * UDP-Lite specific tests, ignored on UDP sockets - */ - if (unlikely(udp_test_bit(UDPLITE_RECV_CC, sk) && - UDP_SKB_CB(skb)->partial_cov)) { - u16 pcrlen = READ_ONCE(up->pcrlen); - - /* - * MIB statistics other than incrementing the error count are - * disabled for the following two types of errors: these depend - * on the application settings, not on the functioning of the - * protocol stack as such. - * - * RFC 3828 here recommends (sec 3.3): "There should also be a - * way ... to ... at least let the receiving application block - * delivery of packets with coverage values less than a value - * provided by the application." - */ - if (pcrlen == 0) { /* full coverage was set */ - net_dbg_ratelimited("UDPLite: partial coverage %d while full coverage %d requested\n", - UDP_SKB_CB(skb)->cscov, skb->len); - goto drop; - } - /* The next case involves violating the min. coverage requested - * by the receiver. This is subtle: if receiver wants x and x is - * greater than the buffersize/MTU then receiver will complain - * that it wants x while sender emits packets of smaller size y. - * Therefore the above ...()->partial_cov statement is essential. - */ - if (UDP_SKB_CB(skb)->cscov < pcrlen) { - net_dbg_ratelimited("UDPLite: coverage %d too small, need min %d\n", - UDP_SKB_CB(skb)->cscov, pcrlen); - goto drop; - } - } - prefetch(&sk->sk_rmem_alloc); if (rcu_access_pointer(sk->sk_filter) && udp_lib_checksum_complete(skb)) @@ -2613,29 +2572,14 @@ start_lookup: * Otherwise, csum completion requires checksumming packet body, * including udp header and folding it to skb->csum. */ -static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, - int proto) +static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh) { int err; - UDP_SKB_CB(skb)->partial_cov = 0; - UDP_SKB_CB(skb)->cscov = skb->len; - - if (proto == IPPROTO_UDPLITE) { - err = udplite_checksum_init(skb, uh); - if (err) - return err; - - if (UDP_SKB_CB(skb)->partial_cov) { - skb->csum = inet_compute_pseudo(skb, proto); - return 0; - } - } - /* Note, we are only interested in != 0 or == 0, thus the * force to int. */ - err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + err = (__force int)skb_checksum_init_zero_check(skb, IPPROTO_UDP, uh->check, inet_compute_pseudo); if (err) return err; @@ -2663,7 +2607,7 @@ static int udp_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, { int ret; - if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) + if (inet_get_convert_csum(sk) && uh->check) skb_checksum_try_convert(skb, IPPROTO_UDP, inet_compute_pseudo); ret = udp_queue_rcv_skb(sk, skb); @@ -2708,14 +2652,17 @@ static int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (ulen > skb->len) goto short_packet; - if (proto == IPPROTO_UDP) { - /* UDP validates ulen. */ - if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) + if (ulen < sizeof(*uh)) + goto short_packet; + + if (ulen < skb->len) { + if (pskb_trim_rcsum(skb, ulen)) goto short_packet; + uh = udp_hdr(skb); } - if (udp4_csum_init(skb, uh, proto)) + if (udp4_csum_init(skb, uh)) goto csum_error; sk = inet_steal_sock(net, skb, sizeof(struct udphdr), saddr, uh->source, daddr, uh->dest, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 07308b7156a6..bf5430ea66f0 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -469,15 +469,13 @@ INDIRECT_CALLABLE_SCOPE int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags) { + int off, is_udp4, err, peeking = flags & MSG_PEEK; struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); - struct sk_buff *skb; - unsigned int ulen, copied; - int off, err, peeking = flags & MSG_PEEK; - int is_udplite = IS_UDPLITE(sk); struct udp_mib __percpu *mib; bool checksum_valid = false; - int is_udp4; + unsigned int ulen, copied; + struct sk_buff *skb; if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len); @@ -501,14 +499,10 @@ try_again: is_udp4 = (skb->protocol == htons(ETH_P_IP)); mib = __UDPX_MIB(sk, is_udp4); - /* - * If checksum is needed at all, try to do it while copying the - * data. If the data is truncated, or if we only want a partial - * coverage checksum (UDP-Lite), do it before the copy. + /* If checksum is needed at all, try to do it while copying the + * data. If the data is truncated, do it before the copy. */ - - if (copied < ulen || peeking || - (is_udplite && UDP_SKB_CB(skb)->partial_cov)) { + if (copied < ulen || peeking) { checksum_valid = udp_skb_csum_unnecessary(skb) || !__udp_lib_checksum_complete(skb); if (!checksum_valid) @@ -870,25 +864,6 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) /* FALLTHROUGH -- it's a UDP Packet */ } - /* - * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). - */ - if (unlikely(udp_test_bit(UDPLITE_RECV_CC, sk) && - UDP_SKB_CB(skb)->partial_cov)) { - u16 pcrlen = READ_ONCE(up->pcrlen); - - if (pcrlen == 0) { /* full coverage was set */ - net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n", - UDP_SKB_CB(skb)->cscov, skb->len); - goto drop; - } - if (UDP_SKB_CB(skb)->cscov < pcrlen) { - net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n", - UDP_SKB_CB(skb)->cscov, pcrlen); - goto drop; - } - } - prefetch(&sk->sk_rmem_alloc); if (rcu_access_pointer(sk->sk_filter) && udp_lib_checksum_complete(skb)) @@ -1053,7 +1028,7 @@ static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, { int ret; - if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) + if (inet_get_convert_csum(sk) && uh->check) skb_checksum_try_convert(skb, IPPROTO_UDP, ip6_compute_pseudo); ret = udpv6_queue_rcv_skb(sk, skb); @@ -1064,24 +1039,10 @@ static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, return 0; } -static int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) +static int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh) { int err; - UDP_SKB_CB(skb)->partial_cov = 0; - UDP_SKB_CB(skb)->cscov = skb->len; - - if (proto == IPPROTO_UDPLITE) { - err = udplite_checksum_init(skb, uh); - if (err) - return err; - - if (UDP_SKB_CB(skb)->partial_cov) { - skb->csum = ip6_compute_pseudo(skb, proto); - return 0; - } - } - /* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels) * we accept a checksum of zero here. When we find the socket * for the UDP packet we'll check if that socket allows zero checksum @@ -1090,7 +1051,7 @@ static int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) * Note, we are only interested in != 0 or == 0, thus the * force to int. */ - err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + err = (__force int)skb_checksum_init_zero_check(skb, IPPROTO_UDP, uh->check, ip6_compute_pseudo); if (err) return err; @@ -1132,26 +1093,23 @@ static int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (ulen > skb->len) goto short_packet; - if (proto == IPPROTO_UDP) { - /* UDP validates ulen. */ + /* Check for jumbo payload */ + if (ulen == 0) + ulen = skb->len; - /* Check for jumbo payload */ - if (ulen == 0) - ulen = skb->len; + if (ulen < sizeof(*uh)) + goto short_packet; - if (ulen < sizeof(*uh)) + if (ulen < skb->len) { + if (pskb_trim_rcsum(skb, ulen)) goto short_packet; - if (ulen < skb->len) { - if (pskb_trim_rcsum(skb, ulen)) - goto short_packet; - saddr = &ipv6_hdr(skb)->saddr; - daddr = &ipv6_hdr(skb)->daddr; - uh = udp_hdr(skb); - } + saddr = &ipv6_hdr(skb)->saddr; + daddr = &ipv6_hdr(skb)->daddr; + uh = udp_hdr(skb); } - if (udp6_csum_init(skb, uh, proto)) + if (udp6_csum_init(skb, uh)) goto csum_error; /* Check if the socket is already available, e.g. due to early demux */ From b2a1d719be4f8e9d970038ecd4db983f6e42d377 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 11 Mar 2026 05:19:55 +0000 Subject: [PATCH 0494/1561] udp: Remove partial csum code in TX. UDP TX paths also have some code for UDP-Lite partial checksum: * udplite_csum() in udp_send_skb() and udp_v6_send_skb() * udplite_getfrag() in udp_sendmsg() and udpv6_sendmsg() Let's remove such code. Now, we can use IPPROTO_UDP directly instead of sk->sk_protocol or fl6->flowi6_proto for csum_tcpudp_magic() and csum_ipv6_magic(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260311052020.1213705-9-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/udplite.h | 35 ------------------------- net/ipv4/udp.c | 60 +++++++++++++++++-------------------------- net/ipv6/udp.c | 56 +++++++++++++++++++--------------------- 3 files changed, 49 insertions(+), 102 deletions(-) diff --git a/include/net/udplite.h b/include/net/udplite.h index 0456a14c993b..6bfa1d6833d1 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -12,39 +12,4 @@ #define UDPLITE_SEND_CSCOV 10 /* sender partial coverage (as sent) */ #define UDPLITE_RECV_CSCOV 11 /* receiver partial coverage (threshold ) */ -/* - * Checksum computation is all in software, hence simpler getfrag. - */ -static __inline__ int udplite_getfrag(void *from, char *to, int offset, - int len, int odd, struct sk_buff *skb) -{ - struct msghdr *msg = from; - return copy_from_iter_full(to, len, &msg->msg_iter) ? 0 : -EFAULT; -} - -/* - * Checksumming routines - */ - -/* Fast-path computation of checksum. Socket may not be locked. */ -static inline __wsum udplite_csum(struct sk_buff *skb) -{ - const int off = skb_transport_offset(skb); - const struct sock *sk = skb->sk; - int len = skb->len - off; - - if (udp_test_bit(UDPLITE_SEND_CC, sk)) { - u16 pcslen = READ_ONCE(udp_sk(sk)->pcslen); - - if (pcslen < len) { - if (pcslen > 0) - len = pcslen; - udp_hdr(skb)->len = htons(pcslen); - } - } - skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ - - return skb_checksum(skb, off, len, 0); -} - #endif /* _UDPLITE_H */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d42fb9330c22..9a2c8ff96e83 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1120,20 +1120,19 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, struct inet_cork *cork) { struct sock *sk = skb->sk; - struct inet_sock *inet = inet_sk(sk); + int offset, len, datalen; struct udphdr *uh; int err; - int is_udplite = IS_UDPLITE(sk); - int offset = skb_transport_offset(skb); - int len = skb->len - offset; - int datalen = len - sizeof(*uh); - __wsum csum = 0; + + offset = skb_transport_offset(skb); + len = skb->len - offset; + datalen = len - sizeof(*uh); /* * Create a UDP header */ uh = udp_hdr(skb); - uh->source = inet->inet_sport; + uh->source = inet_sk(sk)->inet_sport; uh->dest = fl4->fl4_dport; uh->len = htons(len); uh->check = 0; @@ -1154,7 +1153,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, kfree_skb(skb); return -EINVAL; } - if (is_udplite || dst_xfrm(skb_dst(skb))) { + if (dst_xfrm(skb_dst(skb))) { kfree_skb(skb); return -EIO; } @@ -1170,26 +1169,18 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, } } - if (is_udplite) /* UDP-Lite */ - csum = udplite_csum(skb); - - else if (sk->sk_no_check_tx) { /* UDP csum off */ - + if (sk->sk_no_check_tx) { /* UDP csum off */ skb->ip_summed = CHECKSUM_NONE; goto send; - } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ csum_partial: - udp4_hwcsum(skb, fl4->saddr, fl4->daddr); goto send; - - } else - csum = udp_csum(skb); + } /* add protocol-dependent pseudo-header */ uh->check = csum_tcpudp_magic(fl4->saddr, fl4->daddr, len, - sk->sk_protocol, csum); + IPPROTO_UDP, udp_csum(skb)); if (uh->check == 0) uh->check = CSUM_MANGLED_0; @@ -1270,26 +1261,23 @@ EXPORT_IPV6_MOD_GPL(udp_cmsg_send); int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { + int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE; DEFINE_RAW_FLEX(struct ip_options_rcu, opt_copy, opt.__data, IP_OPTIONS_DATA_FIXED_SIZE); + DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); + int ulen = len, free = 0, connected = 0; struct inet_sock *inet = inet_sk(sk); struct udp_sock *up = udp_sk(sk); - DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); - struct flowi4 fl4_stack; - struct flowi4 *fl4; - int ulen = len; - struct ipcm_cookie ipc; - struct rtable *rt = NULL; - int free = 0; - int connected = 0; __be32 daddr, faddr, saddr; - u8 scope; - __be16 dport; - int err, is_udplite = IS_UDPLITE(sk); - int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE; - int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); + struct rtable *rt = NULL; + struct flowi4 fl4_stack; + struct ipcm_cookie ipc; struct sk_buff *skb; + struct flowi4 *fl4; + __be16 dport; int uc_index; + u8 scope; + int err; if (len > 0xFFFF) return -EMSGSIZE; @@ -1301,8 +1289,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */ return -EOPNOTSUPP; - getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; - fl4 = &inet->cork.fl.u.ip4; if (READ_ONCE(up->pending)) { /* @@ -1444,7 +1430,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, ipc.tos & INET_DSCP_MASK, scope, - sk->sk_protocol, flow_flags, faddr, saddr, + IPPROTO_UDP, flow_flags, faddr, saddr, dport, inet->inet_sport, sk_uid(sk)); @@ -1478,7 +1464,7 @@ back_from_confirm: if (!corkreq) { struct inet_cork cork; - skb = ip_make_skb(sk, fl4, getfrag, msg, ulen, + skb = ip_make_skb(sk, fl4, ip_generic_getfrag, msg, ulen, sizeof(struct udphdr), &ipc, &rt, &cork, msg->msg_flags); err = PTR_ERR(skb); @@ -1509,7 +1495,7 @@ back_from_confirm: do_append_data: up->len += ulen; - err = ip_append_data(sk, fl4, getfrag, msg, ulen, + err = ip_append_data(sk, fl4, ip_generic_getfrag, msg, ulen, sizeof(struct udphdr), &ipc, &rt, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index bf5430ea66f0..511e3f898be5 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1370,13 +1370,13 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, struct inet_cork *cork) { struct sock *sk = skb->sk; + int offset, len, datalen; struct udphdr *uh; int err = 0; - int is_udplite = IS_UDPLITE(sk); - __wsum csum = 0; - int offset = skb_transport_offset(skb); - int len = skb->len - offset; - int datalen = len - sizeof(*uh); + + offset = skb_transport_offset(skb); + len = skb->len - offset; + datalen = len - sizeof(*uh); /* * Create a UDP header @@ -1403,7 +1403,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, kfree_skb(skb); return -EINVAL; } - if (is_udplite || dst_xfrm(skb_dst(skb))) { + if (dst_xfrm(skb_dst(skb))) { kfree_skb(skb); return -EIO; } @@ -1419,21 +1419,18 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, } } - if (is_udplite) - csum = udplite_csum(skb); - else if (udp_get_no_check6_tx(sk)) { /* UDP csum disabled */ + if (udp_get_no_check6_tx(sk)) { /* UDP csum disabled */ skb->ip_summed = CHECKSUM_NONE; goto send; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ csum_partial: udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, len); goto send; - } else - csum = udp_csum(skb); + } /* add protocol-dependent pseudo-header */ uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, - len, fl6->flowi6_proto, csum); + len, IPPROTO_UDP, udp_csum(skb)); if (uh->check == 0) uh->check = CSUM_MANGLED_0; @@ -1473,27 +1470,26 @@ out: int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { - struct ipv6_txoptions opt_space; - struct udp_sock *up = udp_sk(sk); + int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE; + DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); + struct ipv6_txoptions *opt_to_free = NULL; + struct in6_addr *daddr, *final_p, final; + struct ip6_flowlabel *flowlabel = NULL; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); - struct in6_addr *daddr, *final_p, final; struct ipv6_txoptions *opt = NULL; - struct ipv6_txoptions *opt_to_free = NULL; - struct ip6_flowlabel *flowlabel = NULL; - struct inet_cork_full cork; - struct flowi6 *fl6 = &cork.fl.u.ip6; - struct dst_entry *dst; - struct ipcm6_cookie ipc6; + struct udp_sock *up = udp_sk(sk); + struct ipv6_txoptions opt_space; int addr_len = msg->msg_namelen; + struct inet_cork_full cork; + struct ipcm6_cookie ipc6; bool connected = false; + struct dst_entry *dst; + struct flowi6 *fl6; int ulen = len; - int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE; int err; - int is_udplite = IS_UDPLITE(sk); - int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); + fl6 = &cork.fl.u.ip6; ipcm6_init_sk(&ipc6, sk); ipc6.gso_size = READ_ONCE(up->gso_size); @@ -1552,7 +1548,6 @@ do_udp_sendmsg: if (len > INT_MAX - sizeof(struct udphdr)) return -EMSGSIZE; - getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; if (READ_ONCE(up->pending)) { if (READ_ONCE(up->pending) == AF_INET) return udp_sendmsg(sk, msg, len); @@ -1654,7 +1649,7 @@ do_udp_sendmsg: opt = ipv6_fixup_options(&opt_space, opt); ipc6.opt = opt; - fl6->flowi6_proto = sk->sk_protocol; + fl6->flowi6_proto = IPPROTO_UDP; fl6->flowi6_mark = ipc6.sockc.mark; fl6->daddr = *daddr; if (ipv6_addr_any(&fl6->saddr) && !ipv6_addr_any(&np->saddr)) @@ -1721,7 +1716,7 @@ back_from_confirm: if (!corkreq) { struct sk_buff *skb; - skb = ip6_make_skb(sk, getfrag, msg, ulen, + skb = ip6_make_skb(sk, ip_generic_getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, dst_rt6_info(dst), msg->msg_flags, &cork); @@ -1747,8 +1742,9 @@ back_from_confirm: do_append_data: up->len += ulen; - err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), - &ipc6, fl6, dst_rt6_info(dst), + err = ip6_append_data(sk, ip_generic_getfrag, msg, ulen, + sizeof(struct udphdr), &ipc6, fl6, + dst_rt6_info(dst), corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_v6_flush_pending_frames(sk); From 74f0cca1100b6d1f1ea28178435aff8078d06603 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 11 Mar 2026 05:19:56 +0000 Subject: [PATCH 0495/1561] udp: Remove UDPLITE_SEND_CSCOV and UDPLITE_RECV_CSCOV. UDP-Lite supports variable-length checksum and has two socket options, UDPLITE_SEND_CSCOV and UDPLITE_RECV_CSCOV, to control the checksum coverage. Let's remove the support. setsockopt(UDPLITE_SEND_CSCOV / UDPLITE_RECV_CSCOV) was only available for UDP-Lite and returned -ENOPROTOOPT for UDP. Now, the options are handled in ip_setsockopt() and ipv6_setsockopt(), which still return the same error. getsockopt(UDPLITE_SEND_CSCOV / UDPLITE_RECV_CSCOV) was available for UDP and always returned 0, meaning full checksum, but now -ENOPROTOOPT is returned. Given that getsockopt() is meaningless for UDP and even the options are not defined under include/uapi/, this should not be a problem. $ man 7 udplite ... BUGS Where glibc support is missing, the following definitions are needed: #define IPPROTO_UDPLITE 136 #define UDPLITE_SEND_CSCOV 10 #define UDPLITE_RECV_CSCOV 11 Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260311052020.1213705-10-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/udp.h | 10 +-------- include/net/udplite.h | 15 ------------- include/uapi/linux/udp.h | 2 ++ net/ipv4/udp.c | 46 ++-------------------------------------- net/ipv6/ip6_checksum.c | 2 +- net/ipv6/udp.c | 5 ++--- 6 files changed, 8 insertions(+), 72 deletions(-) delete mode 100644 include/net/udplite.h diff --git a/include/linux/udp.h b/include/linux/udp.h index 1cbf6b4d3aab..ce56ebcee5cb 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -40,8 +40,6 @@ enum { UDP_FLAGS_ACCEPT_FRAGLIST, UDP_FLAGS_ACCEPT_L4, UDP_FLAGS_ENCAP_ENABLED, /* This socket enabled encap */ - UDP_FLAGS_UDPLITE_SEND_CC, /* set via udplite setsockopt */ - UDP_FLAGS_UDPLITE_RECV_CC, /* set via udplite setsockopt */ }; /* per NUMA structure for lockless producer usage. */ @@ -74,11 +72,7 @@ struct udp_sock { */ __u16 len; /* total length of pending frames */ __u16 gso_size; - /* - * Fields specific to UDP-Lite. - */ - __u16 pcslen; - __u16 pcrlen; + /* * For encapsulation sockets. */ @@ -236,8 +230,6 @@ static inline void udp_allow_gso(struct sock *sk) hlist_nulls_for_each_entry_rcu(__up, node, list, udp_lrpa_node) #endif -#define IS_UDPLITE(__sk) (unlikely(__sk->sk_protocol == IPPROTO_UDPLITE)) - static inline struct sock *udp_tunnel_sk(const struct net *net, bool is_ipv6) { #if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) diff --git a/include/net/udplite.h b/include/net/udplite.h deleted file mode 100644 index 6bfa1d6833d1..000000000000 --- a/include/net/udplite.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Definitions for the UDP-Lite (RFC 3828) code. - */ -#ifndef _UDPLITE_H -#define _UDPLITE_H - -#include -#include - -/* UDP-Lite socket options */ -#define UDPLITE_SEND_CSCOV 10 /* sender partial coverage (as sent) */ -#define UDPLITE_RECV_CSCOV 11 /* receiver partial coverage (threshold ) */ - -#endif /* _UDPLITE_H */ diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h index edca3e430305..877fb02df8fb 100644 --- a/include/uapi/linux/udp.h +++ b/include/uapi/linux/udp.h @@ -29,6 +29,8 @@ struct udphdr { /* UDP socket options */ #define UDP_CORK 1 /* Never send partially complete segments */ +/* Deprecated, reserved for UDPLITE_SEND_CSCOV 10 */ +/* Deprecated, reserved for UDPLITE_RECV_CSCOV 11 */ #define UDP_ENCAP 100 /* Set the socket to accept encapsulated packets */ #define UDP_NO_CHECK6_TX 101 /* Disable sending checksum for UDP6X */ #define UDP_NO_CHECK6_RX 102 /* Disable accepting checksum for UDP6 */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9a2c8ff96e83..d47ca721ef0d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -117,7 +117,6 @@ #include #include #include -#include #include #if IS_ENABLED(CONFIG_IPV6) #include @@ -2924,7 +2923,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, struct udp_sock *up = udp_sk(sk); int val, valbool; int err = 0; - int is_udplite = IS_UDPLITE(sk); if (level == SOL_SOCKET) { err = sk_setsockopt(sk, level, optname, optval, optlen); @@ -3011,36 +3009,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, sockopt_release_sock(sk); break; - /* - * UDP-Lite's partial checksum coverage (RFC 3828). - */ - /* The sender sets actual checksum coverage length via this option. - * The case coverage > packet length is handled by send module. */ - case UDPLITE_SEND_CSCOV: - if (!is_udplite) /* Disable the option on UDP sockets */ - return -ENOPROTOOPT; - if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ - val = 8; - else if (val > USHRT_MAX) - val = USHRT_MAX; - WRITE_ONCE(up->pcslen, val); - udp_set_bit(UDPLITE_SEND_CC, sk); - break; - - /* The receiver specifies a minimum checksum coverage value. To make - * sense, this should be set to at least 8 (as done below). If zero is - * used, this again means full checksum coverage. */ - case UDPLITE_RECV_CSCOV: - if (!is_udplite) /* Disable the option on UDP sockets */ - return -ENOPROTOOPT; - if (val != 0 && val < 8) /* Avoid silly minimal values. */ - val = 8; - else if (val > USHRT_MAX) - val = USHRT_MAX; - WRITE_ONCE(up->pcrlen, val); - udp_set_bit(UDPLITE_RECV_CC, sk); - break; - default: err = -ENOPROTOOPT; break; @@ -3053,7 +3021,7 @@ EXPORT_IPV6_MOD(udp_lib_setsockopt); static int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE || level == SOL_SOCKET) + if (level == SOL_UDP || level == SOL_SOCKET) return udp_lib_setsockopt(sk, level, optname, optval, optlen, udp_push_pending_frames); @@ -3099,16 +3067,6 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, val = udp_test_bit(GRO_ENABLED, sk); break; - /* The following two cannot be changed on UDP sockets, the return is - * always 0 (which corresponds to the full checksum coverage of UDP). */ - case UDPLITE_SEND_CSCOV: - val = READ_ONCE(up->pcslen); - break; - - case UDPLITE_RECV_CSCOV: - val = READ_ONCE(up->pcrlen); - break; - default: return -ENOPROTOOPT; } @@ -3124,7 +3082,7 @@ EXPORT_IPV6_MOD(udp_lib_getsockopt); static int udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE) + if (level == SOL_UDP) return udp_lib_getsockopt(sk, level, optname, optval, optlen); return ip_getsockopt(sk, level, optname, optval, optlen); } diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index 8bb68a0cdfd6..e1a594873675 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include -#include #include #ifndef _HAVE_ARCH_IPV6_CSUM diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 511e3f898be5..c3d8b5ede164 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -58,7 +58,6 @@ #include #include #include -#include static void udpv6_destruct_sock(struct sock *sk) { @@ -1831,7 +1830,7 @@ static void udpv6_destroy_sock(struct sock *sk) static int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE || level == SOL_SOCKET) + if (level == SOL_UDP || level == SOL_SOCKET) return udp_lib_setsockopt(sk, level, optname, optval, optlen, udp_v6_push_pending_frames); @@ -1841,7 +1840,7 @@ static int udpv6_setsockopt(struct sock *sk, int level, int optname, static int udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE) + if (level == SOL_UDP) return udp_lib_getsockopt(sk, level, optname, optval, optlen); return ipv6_getsockopt(sk, level, optname, optval, optlen); } From 5c2738588621a4a53e3a1e87860abcaf9190194a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 11 Mar 2026 05:19:57 +0000 Subject: [PATCH 0496/1561] udp: Remove struct proto.h.udp_table. Since UDP and UDP-Lite had dedicated socket hash tables for each, we have had to fetch them from different pointers. UDP always has its global or per-netns table in net->ipv4.udp_table and struct proto.h.udp_table is NULL. OTOH, UDP-Lite had only one global table in the pointer. We no longer use the field. Let's remove it and udp_get_table_prot(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260311052020.1213705-11-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 1 - net/ipv4/udp.c | 26 ++++++++++++-------------- net/ipv6/udp.c | 1 - 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 16a1b8895206..7d51ac9e7d9a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1392,7 +1392,6 @@ struct proto { union { struct inet_hashinfo *hashinfo; - struct udp_table *udp_table; struct raw_hashinfo *raw_hash; struct smc_hashinfo *smc_hash; } h; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d47ca721ef0d..a7ca727347ce 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -134,11 +134,6 @@ EXPORT_PER_CPU_SYMBOL_GPL(udp_memory_per_cpu_fw_alloc); #define MAX_UDP_PORTS 65536 #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN_PERNET) -static struct udp_table *udp_get_table_prot(struct sock *sk) -{ - return sk->sk_prot->h.udp_table ? : sock_net(sk)->ipv4.udp_table; -} - static int udp_lib_lport_inuse(struct net *net, __u16 num, const struct udp_hslot *hslot, unsigned long *bitmap, @@ -240,11 +235,13 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot) int udp_lib_get_port(struct sock *sk, unsigned short snum, unsigned int hash2_nulladdr) { - struct udp_table *udptable = udp_get_table_prot(sk); struct udp_hslot *hslot, *hslot2; struct net *net = sock_net(sk); + struct udp_table *udptable; int error = -EADDRINUSE; + udptable = net->ipv4.udp_table; + if (!snum) { DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); unsigned short first, last; @@ -2224,12 +2221,13 @@ EXPORT_IPV6_MOD(udp_disconnect); void udp_lib_unhash(struct sock *sk) { if (sk_hashed(sk)) { - struct udp_table *udptable = udp_get_table_prot(sk); struct udp_hslot *hslot, *hslot2; + struct net *net = sock_net(sk); + struct udp_table *udptable; sock_rps_delete_flow(sk); - hslot = udp_hashslot(udptable, sock_net(sk), - udp_sk(sk)->udp_port_hash); + udptable = net->ipv4.udp_table; + hslot = udp_hashslot(udptable, net, udp_sk(sk)->udp_port_hash); hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); spin_lock_bh(&hslot->lock); @@ -2238,7 +2236,7 @@ void udp_lib_unhash(struct sock *sk) if (sk_del_node_init_rcu(sk)) { hslot->count--; inet_sk(sk)->inet_num = 0; - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + sock_prot_inuse_add(net, sk->sk_prot, -1); spin_lock(&hslot2->lock); hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); @@ -2258,11 +2256,12 @@ EXPORT_IPV6_MOD(udp_lib_unhash); void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4) { if (sk_hashed(sk)) { - struct udp_table *udptable = udp_get_table_prot(sk); struct udp_hslot *hslot, *hslot2, *nhslot2; + struct net *net = sock_net(sk); + struct udp_table *udptable; - hslot = udp_hashslot(udptable, sock_net(sk), - udp_sk(sk)->udp_port_hash); + udptable = net->ipv4.udp_table; + hslot = udp_hashslot(udptable, net, udp_sk(sk)->udp_port_hash); hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); nhslot2 = udp_hashslot2(udptable, newhash); @@ -3175,7 +3174,6 @@ struct proto udp_prot = { .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), .obj_size = sizeof(struct udp_sock), - .h.udp_table = NULL, .diag_destroy = udp_abort, }; EXPORT_SYMBOL(udp_prot); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c3d8b5ede164..5bddbf457b61 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1924,7 +1924,6 @@ struct proto udpv6_prot = { .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), .obj_size = sizeof(struct udp6_sock), .ipv6_pinfo_offset = offsetof(struct udp6_sock, inet6), - .h.udp_table = NULL, .diag_destroy = udp_abort, }; From c570bd25d88a02c249be23850315435ec69808f5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 11 Mar 2026 05:19:58 +0000 Subject: [PATCH 0497/1561] udp: Remove udp_table in struct udp_seq_afinfo. Since UDP and UDP-Lite had dedicated socket hash tables for each, we have had to fetch them from different pointers for procfs or bpf iterator. UDP always has its global or per-netns table in net->ipv4.udp_table and struct udp_seq_afinfo.udp_table is NULL. OTOH, UDP-Lite had only one global table in the pointer. We no longer use the field. Let's remove it and udp_get_table_seq(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260311052020.1213705-12-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/udp.h | 1 - net/ipv4/udp.c | 22 ++++------------------ net/ipv6/udp.c | 1 - 3 files changed, 4 insertions(+), 20 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index bc275cda9f8c..76f401988353 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -552,7 +552,6 @@ static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, #ifdef CONFIG_PROC_FS struct udp_seq_afinfo { sa_family_t family; - struct udp_table *udp_table; }; struct udp_iter_state { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a7ca727347ce..14b372b211be 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -3194,19 +3194,6 @@ static bool seq_sk_match(struct seq_file *seq, const struct sock *sk) #ifdef CONFIG_BPF_SYSCALL static const struct seq_operations bpf_iter_udp_seq_ops; #endif -static struct udp_table *udp_get_table_seq(struct seq_file *seq, - struct net *net) -{ - const struct udp_seq_afinfo *afinfo; - -#ifdef CONFIG_BPF_SYSCALL - if (seq->op == &bpf_iter_udp_seq_ops) - return net->ipv4.udp_table; -#endif - - afinfo = pde_data(file_inode(seq->file)); - return afinfo->udp_table ? : net->ipv4.udp_table; -} static struct sock *udp_get_first(struct seq_file *seq, int start) { @@ -3215,7 +3202,7 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) struct udp_table *udptable; struct sock *sk; - udptable = udp_get_table_seq(seq, net); + udptable = net->ipv4.udp_table; for (state->bucket = start; state->bucket <= udptable->mask; ++state->bucket) { @@ -3247,7 +3234,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) } while (sk && !seq_sk_match(seq, sk)); if (!sk) { - udptable = udp_get_table_seq(seq, net); + udptable = net->ipv4.udp_table; if (state->bucket <= udptable->mask) spin_unlock_bh(&udptable->hash[state->bucket].lock); @@ -3295,7 +3282,7 @@ void udp_seq_stop(struct seq_file *seq, void *v) struct udp_iter_state *state = seq->private; struct udp_table *udptable; - udptable = udp_get_table_seq(seq, seq_file_net(seq)); + udptable = seq_file_net(seq)->ipv4.udp_table; if (state->bucket <= udptable->mask) spin_unlock_bh(&udptable->hash[state->bucket].lock); @@ -3399,7 +3386,7 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq) if (iter->cur_sk == iter->end_sk) state->bucket++; - udptable = udp_get_table_seq(seq, net); + udptable = net->ipv4.udp_table; again: /* New batch for the next bucket. @@ -3637,7 +3624,6 @@ static const struct seq_operations udp_seq_ops = { static struct udp_seq_afinfo udp4_seq_afinfo = { .family = AF_INET, - .udp_table = NULL, }; static int __net_init udp4_proc_init_net(struct net *net) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5bddbf457b61..eeb77363a556 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1872,7 +1872,6 @@ static const struct seq_operations udp6_seq_ops = { static struct udp_seq_afinfo udp6_seq_afinfo = { .family = AF_INET6, - .udp_table = NULL, }; int __net_init udp6_proc_init(struct net *net) From 5a88b2810f267893e4b34f3044a9e11f952f03d9 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 11 Mar 2026 05:19:59 +0000 Subject: [PATCH 0498/1561] udp: Remove dead check in __udp[46]_lib_lookup() for BPF. BPF socket lookup for SO_REUSEPORT does not support UDP-Lite. In __udp4_lib_lookup() and __udp6_lib_lookup(), it checks if the passed udptable pointer is the same as net->ipv4.udp_table, which is only true for UDP. Now, the condition is always true. Let's remove the check. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260311052020.1213705-13-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/udp.c | 3 +-- net/ipv6/udp.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 14b372b211be..fd0bad44d111 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -699,8 +699,7 @@ struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr, goto done; /* Lookup redirect from BPF */ - if (static_branch_unlikely(&bpf_sk_lookup_enabled) && - udptable == net->ipv4.udp_table) { + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { sk = inet_lookup_run_sk_lookup(net, IPPROTO_UDP, skb, sizeof(struct udphdr), saddr, sport, daddr, hnum, dif, udp_ehashfn); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index eeb77363a556..5ba399218d07 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -370,8 +370,7 @@ struct sock *__udp6_lib_lookup(const struct net *net, goto done; /* Lookup redirect from BPF */ - if (static_branch_unlikely(&bpf_sk_lookup_enabled) && - udptable == net->ipv4.udp_table) { + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { sk = inet6_lookup_run_sk_lookup(net, IPPROTO_UDP, skb, sizeof(struct udphdr), saddr, sport, daddr, hnum, dif, udp6_ehashfn); From deffb85478a4076226f0213c7b9f7b7cf5dfe9f8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 11 Mar 2026 05:20:00 +0000 Subject: [PATCH 0499/1561] udp: Don't pass udptable to IPv6 socket lookup functions. Since UDP and UDP-Lite had dedicated socket hash tables for each, we have had to pass the pointer down to many socket lookup functions. UDP-Lite gone, and we do not need to do that. Let's fetch net->ipv4.udp_table only where needed in IPv6 stack: __udp6_lib_lookup() and __udp6_lib_mcast_deliver(). __udp6_lib_err() is renamed to udpv6_err() as its wrapper is no longer needed. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260311052020.1213705-14-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/ipv6_stubs.h | 7 ++-- include/net/udp.h | 3 +- net/core/filter.c | 3 +- net/ipv4/udp_diag.c | 20 +++++----- net/ipv6/udp.c | 82 ++++++++++++++++++---------------------- net/ipv6/udp_offload.c | 3 +- 6 files changed, 53 insertions(+), 65 deletions(-) diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index d3013e721b14..907681cecde8 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -83,10 +83,9 @@ struct ipv6_bpf_stub { int (*inet6_bind)(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len, u32 flags); struct sock *(*udp6_lib_lookup)(const struct net *net, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, __be16 dport, - int dif, int sdif, struct udp_table *tbl, - struct sk_buff *skb); + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, __be16 dport, + int dif, int sdif, struct sk_buff *skb); int (*ipv6_setsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int (*ipv6_getsockopt)(struct sock *sk, int level, int optname, diff --git a/include/net/udp.h b/include/net/udp.h index 76f401988353..adec74531ee1 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -449,8 +449,7 @@ struct sock *udp6_lib_lookup(const struct net *net, struct sock *__udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, - int dif, int sdif, struct udp_table *tbl, - struct sk_buff *skb); + int dif, int sdif, struct sk_buff *skb); struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport); int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor); diff --git a/net/core/filter.c b/net/core/filter.c index 78b548158fb0..b66d985785f7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6904,8 +6904,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, sk = ipv6_bpf_stub->udp6_lib_lookup(net, src6, tuple->ipv6.sport, dst6, tuple->ipv6.dport, - dif, sdif, - net->ipv4.udp_table, NULL); + dif, sdif, NULL); #endif } diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index a010d05062a0..0899c60cce53 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -44,11 +44,11 @@ static int udp_dump_one(struct udp_table *tbl, #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) sk = __udp6_lib_lookup(net, - (struct in6_addr *)req->id.idiag_src, - req->id.idiag_sport, - (struct in6_addr *)req->id.idiag_dst, - req->id.idiag_dport, - req->id.idiag_if, 0, tbl, NULL); + (struct in6_addr *)req->id.idiag_src, + req->id.idiag_sport, + (struct in6_addr *)req->id.idiag_dst, + req->id.idiag_dport, + req->id.idiag_if, 0, NULL); #endif if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; @@ -185,11 +185,11 @@ static int __udp_diag_destroy(struct sk_buff *in_skb, else sk = __udp6_lib_lookup(net, - (struct in6_addr *)req->id.idiag_dst, - req->id.idiag_dport, - (struct in6_addr *)req->id.idiag_src, - req->id.idiag_sport, - req->id.idiag_if, 0, tbl, NULL); + (struct in6_addr *)req->id.idiag_dst, + req->id.idiag_dport, + (struct in6_addr *)req->id.idiag_src, + req->id.idiag_sport, + req->id.idiag_if, 0, NULL); } #endif else { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5ba399218d07..dd958e2b552b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -344,9 +344,9 @@ static void udp6_hash4(struct sock *sk) struct sock *__udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, - int dif, int sdif, struct udp_table *udptable, - struct sk_buff *skb) + int dif, int sdif, struct sk_buff *skb) { + struct udp_table *udptable = net->ipv4.udp_table; unsigned short hnum = ntohs(dport); struct udp_hslot *hslot2; struct sock *result, *sk; @@ -406,14 +406,13 @@ done: EXPORT_SYMBOL_GPL(__udp6_lib_lookup); static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, - __be16 sport, __be16 dport, - struct udp_table *udptable) + __be16 sport, __be16 dport) { const struct ipv6hdr *iph = ipv6_hdr(skb); return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), - inet6_sdif(skb), udptable, skb); + inet6_sdif(skb), skb); } struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, @@ -421,14 +420,12 @@ struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, { const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset); - struct net *net = dev_net(skb->dev); int iif, sdif; inet6_get_iif_sdif(skb, &iif, &sdif); - return __udp6_lib_lookup(net, &iph->saddr, sport, - &iph->daddr, dport, iif, - sdif, net->ipv4.udp_table, NULL); + return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, + &iph->daddr, dport, iif, sdif, NULL); } /* Must be called under rcu_read_lock(). @@ -440,8 +437,7 @@ struct sock *udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr { struct sock *sk; - sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport, - dif, 0, net->ipv4.udp_table, NULL); + sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, 0, NULL); if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; @@ -642,7 +638,6 @@ static int __udp6_lib_err_encap_no_sk(struct sk_buff *skb, static struct sock *__udp6_lib_err_encap(struct net *net, const struct ipv6hdr *hdr, int offset, struct udphdr *uh, - struct udp_table *udptable, struct sock *sk, struct sk_buff *skb, struct inet6_skb_parm *opt, @@ -673,7 +668,7 @@ static struct sock *__udp6_lib_err_encap(struct net *net, sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source, &hdr->saddr, uh->dest, - inet6_iif(skb), 0, udptable, skb); + inet6_iif(skb), 0, skb); if (sk) { up = udp_sk(sk); @@ -694,29 +689,28 @@ out: return sk; } -static int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info, - struct udp_table *udptable) +static int udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) { - struct ipv6_pinfo *np; const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; - const struct in6_addr *saddr = &hdr->saddr; - const struct in6_addr *daddr = seg6_get_daddr(skb, opt) ? : &hdr->daddr; - struct udphdr *uh = (struct udphdr *)(skb->data+offset); + struct udphdr *uh = (struct udphdr *)(skb->data + offset); + const struct in6_addr *saddr, *daddr; + struct net *net = dev_net(skb->dev); + struct ipv6_pinfo *np; bool tunnel = false; struct sock *sk; int harderr; int err; - struct net *net = dev_net(skb->dev); + daddr = seg6_get_daddr(skb, opt) ? : &hdr->daddr; + saddr = &hdr->saddr; sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, - inet6_iif(skb), inet6_sdif(skb), udptable, NULL); + inet6_iif(skb), inet6_sdif(skb), NULL); if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) { /* No socket for error: try tunnels before discarding */ if (static_branch_unlikely(&udpv6_encap_needed_key)) { - sk = __udp6_lib_err_encap(net, hdr, offset, uh, - udptable, sk, skb, + sk = __udp6_lib_err_encap(net, hdr, offset, uh, sk, skb, opt, type, code, info); if (!sk) return 0; @@ -808,14 +802,6 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) return 0; } -static __inline__ int udpv6_err(struct sk_buff *skb, - struct inet6_skb_parm *opt, u8 type, - u8 code, int offset, __be32 info) -{ - return __udp6_lib_err(skb, opt, type, code, offset, info, - dev_net(skb->dev)->ipv4.udp_table); -} - static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) { enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; @@ -947,19 +933,27 @@ static void udp6_csum_zero_error(struct sk_buff *skb) * so we don't need to lock the hashes. */ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, - const struct in6_addr *saddr, const struct in6_addr *daddr, - struct udp_table *udptable, int proto) + const struct in6_addr *saddr, + const struct in6_addr *daddr, + int proto) { - struct sock *sk, *first = NULL; + struct udp_table *udptable = net->ipv4.udp_table; const struct udphdr *uh = udp_hdr(skb); + unsigned int hash2, hash2_any, offset; unsigned short hnum = ntohs(uh->dest); - struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum); - unsigned int offset = offsetof(typeof(*sk), sk_node); - unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); - int dif = inet6_iif(skb); + struct sock *sk, *first = NULL; int sdif = inet6_sdif(skb); + int dif = inet6_iif(skb); struct hlist_node *node; + struct udp_hslot *hslot; struct sk_buff *nskb; + bool use_hash2; + + hash2_any = 0; + hash2 = 0; + hslot = udp_hashslot(udptable, net, hnum); + use_hash2 = hslot->count > 10; + offset = offsetof(typeof(*sk), sk_node); if (use_hash2) { hash2_any = ipv6_portaddr_hash(net, &in6addr_any, hnum) & @@ -1069,8 +1063,7 @@ static int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh) return 0; } -static int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, - int proto) +static int __udp6_lib_rcv(struct sk_buff *skb, int proto) { enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; const struct in6_addr *saddr, *daddr; @@ -1139,11 +1132,10 @@ static int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, * Multicast receive code */ if (ipv6_addr_is_multicast(daddr)) - return __udp6_lib_mcast_deliver(net, skb, - saddr, daddr, udptable, proto); + return __udp6_lib_mcast_deliver(net, skb, saddr, daddr, proto); /* Unicast */ - sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); + sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest); if (sk) { if (!uh->check && !udp_get_no_check6_rx(sk)) goto report_csum_error; @@ -1261,7 +1253,7 @@ void udp_v6_early_demux(struct sk_buff *skb) INDIRECT_CALLABLE_SCOPE int udpv6_rcv(struct sk_buff *skb) { - return __udp6_lib_rcv(skb, dev_net(skb->dev)->ipv4.udp_table, IPPROTO_UDP); + return __udp6_lib_rcv(skb, IPPROTO_UDP); } /* diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index e003b8494dc0..778afc7453ce 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -128,8 +128,7 @@ static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport, inet6_get_iif_sdif(skb, &iif, &sdif); return __udp6_lib_lookup(net, &iph->saddr, sport, - &iph->daddr, dport, iif, - sdif, net->ipv4.udp_table, NULL); + &iph->daddr, dport, iif, sdif, NULL); } struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb) From 68aeb21ef0e183ff3675fb82e22573e959505f95 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 11 Mar 2026 05:20:01 +0000 Subject: [PATCH 0500/1561] udp: Don't pass udptable to IPv4 socket lookup functions. Since UDP and UDP-Lite had dedicated socket hash tables for each, we have had to pass the pointer down to many socket lookup functions. UDP-Lite gone, and we do not need to do that. Let's fetch net->ipv4.udp_table only where needed in IPv4 stack: __udp4_lib_lookup(), __udp4_lib_mcast_deliver(), and udp_diag_dump(). Some functions are renamed as the wrapper functions are no longer needed. __udp4_lib_err() -> udp_err() __udp_diag_destroy() -> udp_diag_destroy() udp_dump_one() -> udp_diag_dump_one() udp_dump() -> udp_diag_dump() Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260311052020.1213705-15-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/udp.h | 5 ++- net/core/filter.c | 2 +- net/ipv4/udp.c | 77 +++++++++++++++++++----------------------- net/ipv4/udp_diag.c | 61 ++++++++++++--------------------- net/ipv4/udp_offload.c | 3 +- 5 files changed, 60 insertions(+), 88 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index adec74531ee1..8262e2b215b4 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -437,9 +437,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, struct sock *udp4_lib_lookup(const struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif); struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr, - __be16 sport, - __be32 daddr, __be16 dport, int dif, int sdif, - struct udp_table *tbl, struct sk_buff *skb); + __be16 sport, __be32 daddr, __be16 dport, + int dif, int sdif, struct sk_buff *skb); struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport); struct sock *udp6_lib_lookup(const struct net *net, diff --git a/net/core/filter.c b/net/core/filter.c index b66d985785f7..2f023999f046 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6889,7 +6889,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, else sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport, dst4, tuple->ipv4.dport, - dif, sdif, net->ipv4.udp_table, NULL); + dif, sdif, NULL); #if IS_ENABLED(CONFIG_IPV6) } else { struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index fd0bad44d111..dff803e7d6a8 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -673,9 +673,10 @@ EXPORT_IPV6_MOD(udp4_hash4); * harder than this. -DaveM */ struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr, - __be16 sport, __be32 daddr, __be16 dport, int dif, - int sdif, struct udp_table *udptable, struct sk_buff *skb) + __be16 sport, __be32 daddr, __be16 dport, + int dif, int sdif, struct sk_buff *skb) { + struct udp_table *udptable = net->ipv4.udp_table; unsigned short hnum = ntohs(dport); struct udp_hslot *hslot2; struct sock *result, *sk; @@ -741,14 +742,13 @@ done: EXPORT_SYMBOL_GPL(__udp4_lib_lookup); static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, - __be16 sport, __be16 dport, - struct udp_table *udptable) + __be16 sport, __be16 dport) { const struct iphdr *iph = ip_hdr(skb); return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport, iph->daddr, dport, inet_iif(skb), - inet_sdif(skb), udptable, skb); + inet_sdif(skb), skb); } struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb, @@ -756,14 +756,12 @@ struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb, { const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; const struct iphdr *iph = (struct iphdr *)(skb->data + offset); - struct net *net = dev_net(skb->dev); int iif, sdif; inet_get_iif_sdif(skb, &iif, &sdif); - return __udp4_lib_lookup(net, iph->saddr, sport, - iph->daddr, dport, iif, - sdif, net->ipv4.udp_table, NULL); + return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport, + iph->daddr, dport, iif, sdif, NULL); } /* Must be called under rcu_read_lock(). @@ -775,8 +773,7 @@ struct sock *udp4_lib_lookup(const struct net *net, __be32 saddr, __be16 sport, { struct sock *sk; - sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport, - dif, 0, net->ipv4.udp_table, NULL); + sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, 0, NULL); if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; @@ -866,7 +863,6 @@ static int __udp4_lib_err_encap_no_sk(struct sk_buff *skb, u32 info) static struct sock *__udp4_lib_err_encap(struct net *net, const struct iphdr *iph, struct udphdr *uh, - struct udp_table *udptable, struct sock *sk, struct sk_buff *skb, u32 info) { @@ -894,8 +890,7 @@ static struct sock *__udp4_lib_err_encap(struct net *net, } sk = __udp4_lib_lookup(net, iph->daddr, uh->source, - iph->saddr, uh->dest, skb->dev->ifindex, 0, - udptable, NULL); + iph->saddr, uh->dest, skb->dev->ifindex, 0, NULL); if (sk) { up = udp_sk(sk); @@ -924,29 +919,28 @@ out: * header points to the first 8 bytes of the udp header. We need * to find the appropriate port. */ - -static int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) +int udp_err(struct sk_buff *skb, u32 info) { - struct inet_sock *inet; const struct iphdr *iph = (const struct iphdr *)skb->data; - struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; + struct net *net = dev_net(skb->dev); + struct inet_sock *inet; bool tunnel = false; + struct udphdr *uh; struct sock *sk; int harderr; int err; - struct net *net = dev_net(skb->dev); + uh = (struct udphdr *)(skb->data + (iph->ihl << 2)); sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex, - inet_sdif(skb), udptable, NULL); + inet_sdif(skb), NULL); if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) { /* No socket for error: try tunnels before discarding */ if (static_branch_unlikely(&udp_encap_needed_key)) { - sk = __udp4_lib_err_encap(net, iph, uh, udptable, sk, skb, - info); + sk = __udp4_lib_err_encap(net, iph, uh, sk, skb, info); if (!sk) return 0; } else @@ -1019,11 +1013,6 @@ out: return 0; } -int udp_err(struct sk_buff *skb, u32 info) -{ - return __udp4_lib_err(skb, info, dev_net(skb->dev)->ipv4.udp_table); -} - /* * Throw away all pending data and cancel the corking. Socket is locked. */ @@ -2491,18 +2480,24 @@ EXPORT_IPV6_MOD(udp_sk_rx_dst_set); static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct udphdr *uh, __be32 saddr, __be32 daddr, - struct udp_table *udptable, int proto) { - struct sock *sk, *first = NULL; + struct udp_table *udptable = net->ipv4.udp_table; + unsigned int hash2, hash2_any, offset; unsigned short hnum = ntohs(uh->dest); - struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum); - unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); - unsigned int offset = offsetof(typeof(*sk), sk_node); + struct sock *sk, *first = NULL; int dif = skb->dev->ifindex; int sdif = inet_sdif(skb); struct hlist_node *node; + struct udp_hslot *hslot; struct sk_buff *nskb; + bool use_hash2; + + hash2_any = 0; + hash2 = 0; + hslot = udp_hashslot(udptable, net, hnum); + use_hash2 = hslot->count > 10; + offset = offsetof(typeof(*sk), sk_node); if (use_hash2) { hash2_any = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & @@ -2607,15 +2602,14 @@ static int udp_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, * All we need to do is get the socket, and then do a checksum. */ -static int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, - int proto) +static int __udp4_lib_rcv(struct sk_buff *skb, int proto) { - struct sock *sk = NULL; - struct udphdr *uh; - unsigned short ulen; struct rtable *rt = skb_rtable(skb); - __be32 saddr, daddr; struct net *net = dev_net(skb->dev); + struct sock *sk = NULL; + unsigned short ulen; + __be32 saddr, daddr; + struct udphdr *uh; bool refcounted; int drop_reason; @@ -2667,10 +2661,9 @@ static int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, } if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) - return __udp4_lib_mcast_deliver(net, skb, uh, - saddr, daddr, udptable, proto); + return __udp4_lib_mcast_deliver(net, skb, uh, saddr, daddr, proto); - sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); + sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest); if (sk) return udp_unicast_rcv_skb(sk, skb, uh); no_sk: @@ -2854,7 +2847,7 @@ enum skb_drop_reason udp_v4_early_demux(struct sk_buff *skb) int udp_rcv(struct sk_buff *skb) { - return __udp4_lib_rcv(skb, dev_net(skb->dev)->ipv4.udp_table, IPPROTO_UDP); + return __udp4_lib_rcv(skb, IPPROTO_UDP); } static void udp_destroy_sock(struct sock *sk) diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 0899c60cce53..f4b24e628cf8 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -24,23 +24,24 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, net_admin); } -static int udp_dump_one(struct udp_table *tbl, - struct netlink_callback *cb, - const struct inet_diag_req_v2 *req) +static int udp_diag_dump_one(struct netlink_callback *cb, + const struct inet_diag_req_v2 *req) { struct sk_buff *in_skb = cb->skb; - int err; struct sock *sk = NULL; struct sk_buff *rep; - struct net *net = sock_net(in_skb->sk); + struct net *net; + int err; + + net = sock_net(in_skb->sk); rcu_read_lock(); if (req->sdiag_family == AF_INET) /* src and dst are swapped for historical reasons */ sk = __udp4_lib_lookup(net, - req->id.idiag_src[0], req->id.idiag_sport, - req->id.idiag_dst[0], req->id.idiag_dport, - req->id.idiag_if, 0, tbl, NULL); + req->id.idiag_src[0], req->id.idiag_sport, + req->id.idiag_dst[0], req->id.idiag_dport, + req->id.idiag_if, 0, NULL); #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) sk = __udp6_lib_lookup(net, @@ -85,14 +86,15 @@ out_nosk: return err; } -static void udp_dump(struct udp_table *table, struct sk_buff *skb, - struct netlink_callback *cb, - const struct inet_diag_req_v2 *r) +static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + const struct inet_diag_req_v2 *r) { bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); struct net *net = sock_net(skb->sk); int num, s_num, slot, s_slot; + struct udp_table *table; + table = net->ipv4.udp_table; s_slot = cb->args[0]; num = s_num = cb->args[1]; @@ -139,18 +141,6 @@ done: cb->args[1] = num; } -static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r) -{ - udp_dump(sock_net(cb->skb->sk)->ipv4.udp_table, skb, cb, r); -} - -static int udp_diag_dump_one(struct netlink_callback *cb, - const struct inet_diag_req_v2 *req) -{ - return udp_dump_one(sock_net(cb->skb->sk)->ipv4.udp_table, cb, req); -} - static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *info) { @@ -159,9 +149,8 @@ static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, } #ifdef CONFIG_INET_DIAG_DESTROY -static int __udp_diag_destroy(struct sk_buff *in_skb, - const struct inet_diag_req_v2 *req, - struct udp_table *tbl) +static int udp_diag_destroy(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req) { struct net *net = sock_net(in_skb->sk); struct sock *sk; @@ -171,18 +160,17 @@ static int __udp_diag_destroy(struct sk_buff *in_skb, if (req->sdiag_family == AF_INET) sk = __udp4_lib_lookup(net, - req->id.idiag_dst[0], req->id.idiag_dport, - req->id.idiag_src[0], req->id.idiag_sport, - req->id.idiag_if, 0, tbl, NULL); + req->id.idiag_dst[0], req->id.idiag_dport, + req->id.idiag_src[0], req->id.idiag_sport, + req->id.idiag_if, 0, NULL); #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) { if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) sk = __udp4_lib_lookup(net, - req->id.idiag_dst[3], req->id.idiag_dport, - req->id.idiag_src[3], req->id.idiag_sport, - req->id.idiag_if, 0, tbl, NULL); - + req->id.idiag_dst[3], req->id.idiag_dport, + req->id.idiag_src[3], req->id.idiag_sport, + req->id.idiag_if, 0, NULL); else sk = __udp6_lib_lookup(net, (struct in6_addr *)req->id.idiag_dst, @@ -216,13 +204,6 @@ static int __udp_diag_destroy(struct sk_buff *in_skb, return err; } - -static int udp_diag_destroy(struct sk_buff *in_skb, - const struct inet_diag_req_v2 *req) -{ - return __udp_diag_destroy(in_skb, req, sock_net(in_skb->sk)->ipv4.udp_table); -} - #endif static const struct inet_diag_handler udp_diag_handler = { diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 6b1654c1ad4a..98e92da726b5 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -869,8 +869,7 @@ static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport, inet_get_iif_sdif(skb, &iif, &sdif); return __udp4_lib_lookup(net, iph->saddr, sport, - iph->daddr, dport, iif, - sdif, net->ipv4.udp_table, NULL); + iph->daddr, dport, iif, sdif, NULL); } INDIRECT_CALLABLE_SCOPE From 14ce9a47c5b7497193cb4fdf9980b847482bd289 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 11 Mar 2026 05:20:02 +0000 Subject: [PATCH 0501/1561] udp: Don't pass proto to __udp4_lib_rcv() and __udp6_lib_rcv(). UDP and UDP-Lite shared __udp4_lib_rcv() and __udp6_lib_rcv() by passing IPPROTO_UDP or IPPROTO_UDPLITE. Now, @proto is always IPPROTO_UDP. Let's not pass it and rename the functions accordingly. With this series removing a bunch of conditionals for UDP-Lite from the fast path, udp_rr with 20,000 flows sees a 10% increase in pps (13.3 Mpps -> 14.7 Mpps) on an AMD EPYC 7B12 (Zen 2) 64-Core Processor platform. [ With FDO, the baseline is much higher and the delta was ~3%, 20.1 Mpps -> 20.7 Mpps ] Before: $ nstat > /dev/null; sleep 1; nstat | grep Udp Udp6InDatagrams 14013408 0.0 Udp6OutDatagrams 14013128 0.0 After: $ nstat > /dev/null; sleep 1; nstat | grep Udp Udp6InDatagrams 15491971 0.0 Udp6OutDatagrams 15491671 0.0 $ ./scripts/bloat-o-meter vmlinux.before vmlinux.after add/remove: 13/75 grow/shrink: 11/75 up/down: 13777/-18401 (-4624) Function old new delta udp4_gro_receive 872 866 -6 udp6_gro_receive 910 903 -7 udp_rcv 32 1727 +1695 udpv6_rcv 32 1450 +1418 __udp4_lib_rcv 2045 - -2045 __udp6_lib_rcv 2084 - -2084 udp_unicast_rcv_skb 160 149 -11 udp6_unicast_rcv_skb 196 181 -15 __udp4_lib_mcast_deliver 925 846 -79 __udp6_lib_mcast_deliver 922 810 -112 __udp4_lib_lookup 973 969 -4 __udp6_lib_lookup 940 929 -11 __udp4_lib_lookup_skb 106 100 -6 __udp6_lib_lookup_skb 71 66 -5 udp4_lib_lookup_skb 132 127 -5 udp6_lib_lookup_skb 87 81 -6 udp_queue_rcv_skb 326 356 +30 udpv6_queue_rcv_skb 331 361 +30 udp_queue_rcv_one_skb 1233 914 -319 udpv6_queue_rcv_one_skb 1250 930 -320 __udp_enqueue_schedule_skb 1067 995 -72 udp_rcv_segment 520 480 -40 udp_post_segment_fix_csum 120 - -120 udp_lib_checksum_complete 200 84 -116 udp_err 27 1103 +1076 udpv6_err 36 1417 +1381 __udp4_lib_err 1112 - -1112 __udp6_lib_err 1448 - -1448 udp_recvmsg 1149 994 -155 udpv6_recvmsg 1349 1294 -55 udp_sendmsg 2730 2648 -82 udp_send_skb 909 681 -228 udpv6_sendmsg 3022 2861 -161 udp_v6_send_skb 1214 952 -262 ... Total: Before=18446744073748075501, After=18446744073748070877, chg -0.00% Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260311052020.1213705-16-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/udp.c | 18 +++++------------- net/ipv6/udp.c | 15 ++++----------- 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index dff803e7d6a8..7e3f9fd9de19 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2479,8 +2479,7 @@ EXPORT_IPV6_MOD(udp_sk_rx_dst_set); */ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct udphdr *uh, - __be32 saddr, __be32 daddr, - int proto) + __be32 saddr, __be32 daddr) { struct udp_table *udptable = net->ipv4.udp_table; unsigned int hash2, hash2_any, offset; @@ -2602,7 +2601,7 @@ static int udp_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, * All we need to do is get the socket, and then do a checksum. */ -static int __udp4_lib_rcv(struct sk_buff *skb, int proto) +int udp_rcv(struct sk_buff *skb) { struct rtable *rt = skb_rtable(skb); struct net *net = dev_net(skb->dev); @@ -2661,7 +2660,7 @@ static int __udp4_lib_rcv(struct sk_buff *skb, int proto) } if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) - return __udp4_lib_mcast_deliver(net, skb, uh, saddr, daddr, proto); + return __udp4_lib_mcast_deliver(net, skb, uh, saddr, daddr); sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest); if (sk) @@ -2688,8 +2687,7 @@ no_sk: short_packet: drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; - net_dbg_ratelimited("UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n", - proto == IPPROTO_UDPLITE ? "Lite" : "", + net_dbg_ratelimited("UDP: short packet: From %pI4:%u %d/%d to %pI4:%u\n", &saddr, ntohs(uh->source), ulen, skb->len, &daddr, ntohs(uh->dest)); @@ -2701,8 +2699,7 @@ csum_error: * the network is concerned, anyway) as per 4.1.3.4 (MUST). */ drop_reason = SKB_DROP_REASON_UDP_CSUM; - net_dbg_ratelimited("UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", - proto == IPPROTO_UDPLITE ? "Lite" : "", + net_dbg_ratelimited("UDP: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest), ulen); __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS); @@ -2845,11 +2842,6 @@ enum skb_drop_reason udp_v4_early_demux(struct sk_buff *skb) return SKB_NOT_DROPPED_YET; } -int udp_rcv(struct sk_buff *skb) -{ - return __udp4_lib_rcv(skb, IPPROTO_UDP); -} - static void udp_destroy_sock(struct sock *sk) { struct udp_sock *up = udp_sk(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index dd958e2b552b..d7cf4c9508b2 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -934,8 +934,7 @@ static void udp6_csum_zero_error(struct sk_buff *skb) */ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, const struct in6_addr *saddr, - const struct in6_addr *daddr, - int proto) + const struct in6_addr *daddr) { struct udp_table *udptable = net->ipv4.udp_table; const struct udphdr *uh = udp_hdr(skb); @@ -1063,7 +1062,7 @@ static int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh) return 0; } -static int __udp6_lib_rcv(struct sk_buff *skb, int proto) +INDIRECT_CALLABLE_SCOPE int udpv6_rcv(struct sk_buff *skb) { enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; const struct in6_addr *saddr, *daddr; @@ -1132,7 +1131,7 @@ static int __udp6_lib_rcv(struct sk_buff *skb, int proto) * Multicast receive code */ if (ipv6_addr_is_multicast(daddr)) - return __udp6_lib_mcast_deliver(net, skb, saddr, daddr, proto); + return __udp6_lib_mcast_deliver(net, skb, saddr, daddr); /* Unicast */ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest); @@ -1163,8 +1162,7 @@ no_sk: short_packet: if (reason == SKB_DROP_REASON_NOT_SPECIFIED) reason = SKB_DROP_REASON_PKT_TOO_SMALL; - net_dbg_ratelimited("UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n", - proto == IPPROTO_UDPLITE ? "-Lite" : "", + net_dbg_ratelimited("UDPv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n", saddr, ntohs(uh->source), ulen, skb->len, daddr, ntohs(uh->dest)); @@ -1251,11 +1249,6 @@ void udp_v6_early_demux(struct sk_buff *skb) } } -INDIRECT_CALLABLE_SCOPE int udpv6_rcv(struct sk_buff *skb) -{ - return __udp6_lib_rcv(skb, IPPROTO_UDP); -} - /* * Throw away all pending data and cancel the corking. Socket is locked. */ From 9089c5f3c444ad6e9eb172e9375615ed0b0bc31c Mon Sep 17 00:00:00 2001 From: Sagi Maimon Date: Thu, 12 Mar 2026 10:20:03 +0200 Subject: [PATCH 0502/1561] ptp: ocp: Add support for Xilinx-based Adva TimeCard variant Add support for the Adva TimeCard model built on a Xilinx-based design. This patch enables detection and integration of the new hardware within the existing OCP timecard framework. The Xilinx variant relies on the shared driver infrastructure, requiring only small, targeted additions to accommodate its specific characteristics. Signed-off-by: Sagi Maimon Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20260312082009.249622-1-maimon.sagi@gmail.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_ocp.c | 365 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 337 insertions(+), 28 deletions(-) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index d88ab2f86b1b..beacc2ffb166 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -35,6 +35,7 @@ #define PCI_VENDOR_ID_ADVA 0xad5a #define PCI_DEVICE_ID_ADVA_TIMECARD 0x0400 +#define PCI_DEVICE_ID_ADVA_TIMECARD_X1 0x0410 static struct class timecard_class = { .name = "timecard", @@ -72,6 +73,20 @@ struct ptp_ocp_servo_conf { u32 servo_drift_i; }; +/* + * Combined servo + board-variant parameters for ADVA boards. + * Embedded in the resource table .extra so a single ptp_ocp_adva_board_init() + * can handle both ADVA and ADVA-X1 without per-variant init functions. + */ +struct ptp_ocp_adva_info { + struct ptp_ocp_servo_conf servo; + u32 flash_start; + const struct ocp_sma_op *sma_op; + u8 signals_nr; + u8 freq_in_nr; + const struct ocp_attr_group *attr_groups; +}; + #define OCP_CTRL_ENABLE BIT(0) #define OCP_CTRL_ADJUST_TIME BIT(1) #define OCP_CTRL_ADJUST_OFFSET BIT(2) @@ -293,6 +308,20 @@ struct ocp_attr_group { const struct attribute_group *group; }; +struct ocp_selector { + const char *name; + int value; + u64 frequency; +}; + +struct ocp_sma_op { + const struct ocp_selector *tbl[2]; + void (*init)(struct ptp_ocp *bp); + u32 (*get)(struct ptp_ocp *bp, int sma_nr); + int (*set_inputs)(struct ptp_ocp *bp, int sma_nr, u32 val); + int (*set_output)(struct ptp_ocp *bp, int sma_nr, u32 val); +}; + #define OCP_CAP_BASIC BIT(0) #define OCP_CAP_SIGNAL BIT(1) #define OCP_CAP_FREQ BIT(2) @@ -420,12 +449,17 @@ static int ptp_ocp_art_board_init(struct ptp_ocp *bp, struct ocp_resource *r); static int ptp_ocp_adva_board_init(struct ptp_ocp *bp, struct ocp_resource *r); +static const struct ocp_sma_op ocp_adva_sma_op; +static const struct ocp_sma_op ocp_adva_x1_sma_op; + static const struct ocp_attr_group fb_timecard_groups[]; static const struct ocp_attr_group art_timecard_groups[]; static const struct ocp_attr_group adva_timecard_groups[]; +static const struct ocp_attr_group adva_timecard_x1_groups[]; + struct ptp_ocp_eeprom_map { u16 off; u16 len; @@ -1020,11 +1054,225 @@ static struct ocp_resource ocp_adva_resource[] = { }, { .setup = ptp_ocp_adva_board_init, - .extra = &(struct ptp_ocp_servo_conf) { - .servo_offset_p = 0xc000, - .servo_offset_i = 0x1000, - .servo_drift_p = 0, - .servo_drift_i = 0, + .extra = &(struct ptp_ocp_adva_info) { + .servo = { + .servo_offset_p = 0xc000, + .servo_offset_i = 0x1000, + .servo_drift_p = 0, + .servo_drift_i = 0, + }, + .flash_start = 0xA00000, + .sma_op = &ocp_adva_sma_op, + .signals_nr = 2, + .freq_in_nr = 2, + .attr_groups = adva_timecard_groups, + }, + }, + { } +}; + +static struct ocp_resource ocp_adva_x1_resource[] = { + { + OCP_MEM_RESOURCE(reg), + .offset = 0x01000000, .size = 0x10000, + }, + { + OCP_EXT_RESOURCE(ts0), + .offset = 0x01010000, .size = 0x10000, .irq_vec = 1, + .extra = &(struct ptp_ocp_ext_info) { + .index = 0, + .irq_fcn = ptp_ocp_ts_irq, + .enable = ptp_ocp_ts_enable, + }, + }, + { + OCP_EXT_RESOURCE(ts1), + .offset = 0x01020000, .size = 0x10000, .irq_vec = 2, + .extra = &(struct ptp_ocp_ext_info) { + .index = 1, + .irq_fcn = ptp_ocp_ts_irq, + .enable = ptp_ocp_ts_enable, + }, + }, + { + OCP_EXT_RESOURCE(ts2), + .offset = 0x01060000, .size = 0x10000, .irq_vec = 6, + .extra = &(struct ptp_ocp_ext_info) { + .index = 2, + .irq_fcn = ptp_ocp_ts_irq, + .enable = ptp_ocp_ts_enable, + }, + }, + { + OCP_EXT_RESOURCE(ts3), + .offset = 0x01110000, .size = 0x10000, .irq_vec = 15, + .extra = &(struct ptp_ocp_ext_info) { + .index = 3, + .irq_fcn = ptp_ocp_ts_irq, + .enable = ptp_ocp_ts_enable, + }, + }, + { + OCP_EXT_RESOURCE(ts4), + .offset = 0x01120000, .size = 0x10000, .irq_vec = 16, + .extra = &(struct ptp_ocp_ext_info) { + .index = 4, + .irq_fcn = ptp_ocp_ts_irq, + .enable = ptp_ocp_ts_enable, + }, + }, + /* Timestamp for PHC and/or PPS generator */ + { + OCP_EXT_RESOURCE(pps), + .offset = 0x010C0000, .size = 0x10000, .irq_vec = 0, + .extra = &(struct ptp_ocp_ext_info) { + .index = 5, + .irq_fcn = ptp_ocp_ts_irq, + .enable = ptp_ocp_ts_enable, + }, + }, + { + OCP_EXT_RESOURCE(signal_out[0]), + .offset = 0x010D0000, .size = 0x10000, .irq_vec = 11, + .extra = &(struct ptp_ocp_ext_info) { + .index = 1, + .irq_fcn = ptp_ocp_signal_irq, + .enable = ptp_ocp_signal_enable, + }, + }, + { + OCP_EXT_RESOURCE(signal_out[1]), + .offset = 0x010E0000, .size = 0x10000, .irq_vec = 12, + .extra = &(struct ptp_ocp_ext_info) { + .index = 2, + .irq_fcn = ptp_ocp_signal_irq, + .enable = ptp_ocp_signal_enable, + }, + }, + { + OCP_EXT_RESOURCE(signal_out[2]), + .offset = 0x010F0000, .size = 0x10000, .irq_vec = 13, + .extra = &(struct ptp_ocp_ext_info) { + .index = 3, + .irq_fcn = ptp_ocp_signal_irq, + .enable = ptp_ocp_signal_enable, + }, + }, + { + OCP_EXT_RESOURCE(signal_out[3]), + .offset = 0x01100000, .size = 0x10000, .irq_vec = 14, + .extra = &(struct ptp_ocp_ext_info) { + .index = 4, + .irq_fcn = ptp_ocp_signal_irq, + .enable = ptp_ocp_signal_enable, + }, + }, + { + OCP_MEM_RESOURCE(pps_to_ext), + .offset = 0x01030000, .size = 0x10000, + }, + { + OCP_MEM_RESOURCE(pps_to_clk), + .offset = 0x01040000, .size = 0x10000, + }, + { + OCP_MEM_RESOURCE(tod), + .offset = 0x01050000, .size = 0x10000, + }, + { + OCP_MEM_RESOURCE(image), + .offset = 0x00020000, .size = 0x1000, + }, + { + OCP_MEM_RESOURCE(pps_select), + .offset = 0x00130000, .size = 0x1000, + }, + { + OCP_MEM_RESOURCE(sma_map1), + .offset = 0x00140000, .size = 0x1000, + }, + { + OCP_MEM_RESOURCE(sma_map2), + .offset = 0x00220000, .size = 0x1000, + }, + { + OCP_SERIAL_RESOURCE(port[PORT_GNSS]), + .offset = 0x00160000 + 0x1000, .irq_vec = 3, + .extra = &(struct ptp_ocp_serial_port) { + .baud = 9600, + }, + }, + { + OCP_SERIAL_RESOURCE(port[PORT_MAC]), + .offset = 0x00180000 + 0x1000, .irq_vec = 5, + .extra = &(struct ptp_ocp_serial_port) { + .baud = 115200, + }, + }, + { + OCP_MEM_RESOURCE(freq_in[0]), + .offset = 0x01200000, .size = 0x10000, + }, + { + OCP_MEM_RESOURCE(freq_in[1]), + .offset = 0x01210000, .size = 0x10000, + }, + { + OCP_MEM_RESOURCE(freq_in[2]), + .offset = 0x01220000, .size = 0x10000, + }, + { + OCP_MEM_RESOURCE(freq_in[3]), + .offset = 0x01230000, .size = 0x10000, + }, + { + OCP_SPI_RESOURCE(spi_flash), + .offset = 0x00310000, .size = 0x10000, .irq_vec = 9, + .extra = &(struct ptp_ocp_flash_info) { + .name = "xilinx_spi", .pci_offset = 0, + .data_size = sizeof(struct xspi_platform_data), + .data = &(struct xspi_platform_data) { + .num_chipselect = 1, + .bits_per_word = 8, + .num_devices = 1, + .force_irq = true, + .devices = &(struct spi_board_info) { + .modalias = "spi-nor", + }, + }, + }, + }, + { + OCP_I2C_RESOURCE(i2c_ctrl), + .offset = 0x00150000, .size = 0x10000, .irq_vec = 7, + .extra = &(struct ptp_ocp_i2c_info) { + .name = "xiic-i2c", + .fixed_rate = 50000000, + .data_size = sizeof(struct xiic_i2c_platform_data), + .data = &(struct xiic_i2c_platform_data) { + .num_devices = 2, + .devices = (struct i2c_board_info[]) { + { I2C_BOARD_INFO("24c02", 0x50) }, + { I2C_BOARD_INFO("24mac402", 0x58), + .platform_data = "mac" }, + }, + }, + }, + }, + { + .setup = ptp_ocp_adva_board_init, + .extra = &(struct ptp_ocp_adva_info) { + .servo = { + .servo_offset_p = 0xc000, + .servo_offset_i = 0x1000, + .servo_drift_p = 0, + .servo_drift_i = 0, + }, + .flash_start = 0x1000000, + .sma_op = &ocp_adva_x1_sma_op, + .signals_nr = 4, + .freq_in_nr = 4, + .attr_groups = adva_timecard_x1_groups, }, }, { } @@ -1035,6 +1283,7 @@ static const struct pci_device_id ptp_ocp_pcidev_id[] = { { PCI_DEVICE_DATA(CELESTICA, TIMECARD, &ocp_fb_resource) }, { PCI_DEVICE_DATA(OROLIA, ARTCARD, &ocp_art_resource) }, { PCI_DEVICE_DATA(ADVA, TIMECARD, &ocp_adva_resource) }, + { PCI_DEVICE_DATA(ADVA, TIMECARD_X1, &ocp_adva_x1_resource) }, { } }; MODULE_DEVICE_TABLE(pci, ptp_ocp_pcidev_id); @@ -1042,12 +1291,6 @@ MODULE_DEVICE_TABLE(pci, ptp_ocp_pcidev_id); static DEFINE_MUTEX(ptp_ocp_lock); static DEFINE_IDR(ptp_ocp_idr); -struct ocp_selector { - const char *name; - int value; - u64 frequency; -}; - static const struct ocp_selector ptp_ocp_clock[] = { { .name = "NONE", .value = 0 }, { .name = "TOD", .value = 1 }, @@ -1137,12 +1380,32 @@ static const struct ocp_selector ptp_ocp_adva_sma_out[] = { { } }; -struct ocp_sma_op { - const struct ocp_selector *tbl[2]; - void (*init)(struct ptp_ocp *bp); - u32 (*get)(struct ptp_ocp *bp, int sma_nr); - int (*set_inputs)(struct ptp_ocp *bp, int sma_nr, u32 val); - int (*set_output)(struct ptp_ocp *bp, int sma_nr, u32 val); +static const struct ocp_selector ptp_ocp_adva_x1_sma_in[] = { + { .name = "PPS1", .value = 0x0001, .frequency = 1 }, + { .name = "TS1", .value = 0x0004, .frequency = 0 }, + { .name = "TS2", .value = 0x0008, .frequency = 0 }, + { .name = "TS3", .value = 0x0040, .frequency = 0 }, + { .name = "TS4", .value = 0x0080, .frequency = 0 }, + { .name = "FREQ1", .value = 0x0100, .frequency = 0 }, + { .name = "FREQ2", .value = 0x0200, .frequency = 0 }, + { .name = "FREQ3", .value = 0x0400, .frequency = 0 }, + { .name = "FREQ4", .value = 0x0800, .frequency = 0 }, + { .name = "None", .value = SMA_DISABLE, .frequency = 0 }, + { } +}; + +static const struct ocp_selector ptp_ocp_adva_x1_sma_out[] = { + { .name = "10Mhz", .value = 0x0000, .frequency = 10000000}, + { .name = "PHC", .value = 0x0001, .frequency = 1 }, + { .name = "MAC", .value = 0x0002, .frequency = 1 }, + { .name = "GNSS1", .value = 0x0004, .frequency = 1 }, + { .name = "GEN1", .value = 0x0040 }, + { .name = "GEN2", .value = 0x0080 }, + { .name = "GEN3", .value = 0x0100 }, + { .name = "GEN4", .value = 0x0200 }, + { .name = "GND", .value = 0x2000 }, + { .name = "VCC", .value = 0x4000 }, + { } }; static void @@ -2639,6 +2902,14 @@ static const struct ocp_sma_op ocp_adva_sma_op = { .set_output = ptp_ocp_sma_adva_set_output, }; +static const struct ocp_sma_op ocp_adva_x1_sma_op = { + .tbl = { ptp_ocp_adva_x1_sma_in, ptp_ocp_adva_x1_sma_out }, + .init = ptp_ocp_sma_fb_init, + .get = ptp_ocp_sma_fb_get, + .set_inputs = ptp_ocp_sma_adva_set_inputs, + .set_output = ptp_ocp_sma_adva_set_output, +}; + static int ptp_ocp_set_pins(struct ptp_ocp *bp) { @@ -2887,18 +3158,19 @@ ptp_ocp_art_board_init(struct ptp_ocp *bp, struct ocp_resource *r) return ptp_ocp_init_clock(bp, r->extra); } -/* ADVA specific board initializers; last "resource" registered. */ +/* ADVA board initializer; variant differences come from r->extra. */ static int ptp_ocp_adva_board_init(struct ptp_ocp *bp, struct ocp_resource *r) { - int err; + struct ptp_ocp_adva_info *info = r->extra; u32 version; + int err; - bp->flash_start = 0xA00000; - bp->eeprom_map = fb_eeprom_map; - bp->sma_op = &ocp_adva_sma_op; - bp->signals_nr = 2; - bp->freq_in_nr = 2; + bp->flash_start = info->flash_start; + bp->eeprom_map = fb_eeprom_map; + bp->sma_op = info->sma_op; + bp->signals_nr = info->signals_nr; + bp->freq_in_nr = info->freq_in_nr; version = ioread32(&bp->image->version); /* if lower 16 bits are empty, this is the fw loader. */ @@ -2906,15 +3178,15 @@ ptp_ocp_adva_board_init(struct ptp_ocp *bp, struct ocp_resource *r) version = version >> 16; bp->fw_loader = true; } - bp->fw_tag = 3; + bp->fw_tag = 3; bp->fw_version = version & 0xffff; - bp->fw_cap = OCP_CAP_BASIC | OCP_CAP_SIGNAL | OCP_CAP_FREQ; + bp->fw_cap = OCP_CAP_BASIC | OCP_CAP_SIGNAL | OCP_CAP_FREQ; ptp_ocp_tod_init(bp); ptp_ocp_nmea_out_init(bp); ptp_ocp_signal_init(bp); - err = ptp_ocp_attr_group_add(bp, adva_timecard_groups); + err = ptp_ocp_attr_group_add(bp, info->attr_groups); if (err) return err; @@ -2923,7 +3195,7 @@ ptp_ocp_adva_board_init(struct ptp_ocp *bp, struct ocp_resource *r) return err; ptp_ocp_sma_init(bp); - return ptp_ocp_init_clock(bp, r->extra); + return ptp_ocp_init_clock(bp, &info->servo); } static ssize_t @@ -3982,6 +4254,43 @@ static const struct ocp_attr_group adva_timecard_groups[] = { { }, }; +static struct attribute *adva_timecard_x1_attrs[] = { + &dev_attr_serialnum.attr, + &dev_attr_gnss_sync.attr, + &dev_attr_clock_source.attr, + &dev_attr_available_clock_sources.attr, + &dev_attr_sma1.attr, + &dev_attr_sma2.attr, + &dev_attr_sma3.attr, + &dev_attr_sma4.attr, + &dev_attr_available_sma_inputs.attr, + &dev_attr_available_sma_outputs.attr, + &dev_attr_clock_status_drift.attr, + &dev_attr_clock_status_offset.attr, + &dev_attr_ts_window_adjust.attr, + &dev_attr_utc_tai_offset.attr, + &dev_attr_tod_correction.attr, + NULL, +}; + +static const struct attribute_group adva_timecard_x1_group = { + .attrs = adva_timecard_x1_attrs, +}; + +static const struct ocp_attr_group adva_timecard_x1_groups[] = { + { .cap = OCP_CAP_BASIC, .group = &adva_timecard_x1_group }, + { .cap = OCP_CAP_BASIC, .group = &ptp_ocp_timecard_tty_group }, + { .cap = OCP_CAP_SIGNAL, .group = &fb_timecard_signal0_group }, + { .cap = OCP_CAP_SIGNAL, .group = &fb_timecard_signal1_group }, + { .cap = OCP_CAP_SIGNAL, .group = &fb_timecard_signal2_group }, + { .cap = OCP_CAP_SIGNAL, .group = &fb_timecard_signal3_group }, + { .cap = OCP_CAP_FREQ, .group = &fb_timecard_freq0_group }, + { .cap = OCP_CAP_FREQ, .group = &fb_timecard_freq1_group }, + { .cap = OCP_CAP_FREQ, .group = &fb_timecard_freq2_group }, + { .cap = OCP_CAP_FREQ, .group = &fb_timecard_freq3_group }, + { }, +}; + static void gpio_input_map(char *buf, struct ptp_ocp *bp, u16 map[][2], u16 bit, const char *def) From 0e24d17bd9668f9dad78ede6a0e8f13dab176682 Mon Sep 17 00:00:00 2001 From: Simon Baatz Date: Mon, 9 Mar 2026 09:02:26 +0100 Subject: [PATCH 0503/1561] tcp: implement RFC 7323 window retraction receiver requirements By default, the Linux TCP implementation does not shrink the advertised window (RFC 7323 calls this "window retraction") with the following exceptions: - When an incoming segment cannot be added due to the receive buffer running out of memory. Since commit 8c670bdfa58e ("tcp: correct handling of extreme memory squeeze") a zero window will be advertised in this case. It turns out that reaching the required memory pressure is easy when window scaling is in use. In the simplest case, sending a sufficient number of segments smaller than the scale factor to a receiver that does not read data is enough. - Commit b650d953cd39 ("tcp: enforce receive buffer memory limits by allowing the tcp window to shrink") addressed the "eating memory" problem by introducing a sysctl knob that allows shrinking the window before running out of memory. However, RFC 7323 does not only state that shrinking the window is necessary in some cases, it also formulates requirements for TCP implementations when doing so (Section 2.4). This commit addresses the receiver-side requirements: After retracting the window, the peer may have a snd_nxt that lies within a previously advertised window but is now beyond the retracted window. This means that all incoming segments (including pure ACKs) will be rejected until the application happens to read enough data to let the peer's snd_nxt be in window again (which may be never). To comply with RFC 7323, the receiver MUST honor any segment that would have been in window for any ACK sent by the receiver and, when window scaling is in effect, SHOULD track the maximum window sequence number it has advertised. This patch tracks that maximum window sequence number rcv_mwnd_seq throughout the connection and uses it in tcp_sequence() when deciding whether a segment is acceptable. rcv_mwnd_seq is updated together with rcv_wup and rcv_wnd in tcp_select_window(). If we count tcp_sequence() as fast path, it is read in the fast path. Therefore, rcv_mwnd_seq is put into rcv_wnd's cacheline group. The logic for handling received data in tcp_data_queue() is already sufficient and does not need to be updated. Signed-off-by: Simon Baatz Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260309-tcp_rfc7323_retract_wnd_rfc-v3-1-4c7f96b1ec69@gmail.com Signed-off-by: Jakub Kicinski --- .../networking/net_cachelines/tcp_sock.rst | 1 + include/linux/tcp.h | 3 +++ include/net/tcp.h | 22 +++++++++++++++++++ net/ipv4/tcp.c | 2 ++ net/ipv4/tcp_fastopen.c | 1 + net/ipv4/tcp_input.c | 10 ++++----- net/ipv4/tcp_minisocks.c | 1 + net/ipv4/tcp_output.c | 3 +++ .../net/packetdrill/tcp_rcv_big_endseq.pkt | 2 +- 9 files changed, 39 insertions(+), 6 deletions(-) diff --git a/Documentation/networking/net_cachelines/tcp_sock.rst b/Documentation/networking/net_cachelines/tcp_sock.rst index 563daea10d6c..fecf61166a54 100644 --- a/Documentation/networking/net_cachelines/tcp_sock.rst +++ b/Documentation/networking/net_cachelines/tcp_sock.rst @@ -121,6 +121,7 @@ u64 delivered_mstamp read_write u32 rate_delivered read_mostly tcp_rate_gen u32 rate_interval_us read_mostly rate_delivered,rate_app_limited u32 rcv_wnd read_write read_mostly tcp_select_window,tcp_receive_window,tcp_fast_path_check +u32 rcv_mwnd_seq read_write tcp_select_window u32 write_seq read_write tcp_rate_check_app_limited,tcp_write_queue_empty,tcp_skb_entail,forced_push,tcp_mark_push u32 notsent_lowat read_mostly tcp_stream_memory_free u32 pushed_seq read_write tcp_mark_push,forced_push diff --git a/include/linux/tcp.h b/include/linux/tcp.h index bcebc4f07532..6982f10e826b 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -316,6 +316,9 @@ struct tcp_sock { */ u32 app_limited; /* limited until "delivered" reaches this val */ u32 rcv_wnd; /* Current receiver window */ + u32 rcv_mwnd_seq; /* Maximum window sequence number (RFC 7323, + * section 2.4, receiver requirements) + */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ /* * Options received (usually on last packet, some only on SYN packets). diff --git a/include/net/tcp.h b/include/net/tcp.h index 48dffcca0a71..f87bdacb5a69 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -934,6 +934,28 @@ static inline u32 tcp_receive_window(const struct tcp_sock *tp) return (u32) win; } +/* Compute the maximum receive window we ever advertised. + * Rcv_nxt can be after the window if our peer push more data + * than the offered window. + */ +static inline u32 tcp_max_receive_window(const struct tcp_sock *tp) +{ + s32 win = tp->rcv_mwnd_seq - tp->rcv_nxt; + + if (win < 0) + win = 0; + return (u32) win; +} + +/* Check if we need to update the maximum receive window sequence number */ +static inline void tcp_update_max_rcv_wnd_seq(struct tcp_sock *tp) +{ + u32 wre = tp->rcv_wup + tp->rcv_wnd; + + if (after(wre, tp->rcv_mwnd_seq)) + tp->rcv_mwnd_seq = wre; +} + /* Choose a new window, without checks for shrinking, and without * scaling applied to the result. The caller does these things * if necessary. This is a "raw" window selection. diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ed6f6712f060..516087c622ad 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3561,6 +3561,7 @@ static int tcp_repair_set_window(struct tcp_sock *tp, sockptr_t optbuf, int len) tp->rcv_wnd = opt.rcv_wnd; tp->rcv_wup = opt.rcv_wup; + tp->rcv_mwnd_seq = opt.rcv_wup + opt.rcv_wnd; return 0; } @@ -5275,6 +5276,7 @@ static void __init tcp_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ecn_bytes); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_mwnd_seq); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_tstamp); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rx_opt); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 9fdc19accafd..4e389d609f91 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -377,6 +377,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, tcp_rsk(req)->rcv_nxt = tp->rcv_nxt; tp->rcv_wup = tp->rcv_nxt; + tp->rcv_mwnd_seq = tp->rcv_wup + tp->rcv_wnd; /* tcp_conn_request() is sending the SYNACK, * and queues the child into listener accept queue. */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 71ac69b7b75e..2e1b23760815 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4808,20 +4808,18 @@ static enum skb_drop_reason tcp_sequence(const struct sock *sk, const struct tcphdr *th) { const struct tcp_sock *tp = tcp_sk(sk); - u32 seq_limit; if (before(end_seq, tp->rcv_wup)) return SKB_DROP_REASON_TCP_OLD_SEQUENCE; - seq_limit = tp->rcv_nxt + tcp_receive_window(tp); - if (unlikely(after(end_seq, seq_limit))) { + if (unlikely(after(end_seq, tp->rcv_nxt + tcp_max_receive_window(tp)))) { /* Some stacks are known to handle FIN incorrectly; allow the * FIN to extend beyond the window and check it in detail later. */ - if (!after(end_seq - th->fin, seq_limit)) + if (!after(end_seq - th->fin, tp->rcv_nxt + tcp_receive_window(tp))) return SKB_NOT_DROPPED_YET; - if (after(seq, seq_limit)) + if (after(seq, tp->rcv_nxt + tcp_max_receive_window(tp))) return SKB_DROP_REASON_TCP_INVALID_SEQUENCE; /* Only accept this packet if receive queue is empty. */ @@ -6903,6 +6901,7 @@ consume: */ WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1); tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; + tp->rcv_mwnd_seq = tp->rcv_wup + tp->rcv_wnd; /* RFC1323: The window in SYN & SYN/ACK segments is * never scaled. @@ -7015,6 +7014,7 @@ consume: WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1); WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; + tp->rcv_mwnd_seq = tp->rcv_wup + tp->rcv_wnd; /* RFC1323: The window in SYN & SYN/ACK segments is * never scaled. diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index dafb63b923d0..d350d794a959 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -604,6 +604,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->window_clamp = req->rsk_window_clamp; newtp->rcv_ssthresh = req->rsk_rcv_wnd; newtp->rcv_wnd = req->rsk_rcv_wnd; + newtp->rcv_mwnd_seq = newtp->rcv_wup + req->rsk_rcv_wnd; newtp->rx_opt.wscale_ok = ireq->wscale_ok; if (newtp->rx_opt.wscale_ok) { newtp->rx_opt.snd_wscale = ireq->snd_wscale; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 34a25ef61006..35c3b0ab5a0c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -293,6 +293,7 @@ static u16 tcp_select_window(struct sock *sk) tp->pred_flags = 0; tp->rcv_wnd = 0; tp->rcv_wup = tp->rcv_nxt; + tcp_update_max_rcv_wnd_seq(tp); return 0; } @@ -316,6 +317,7 @@ static u16 tcp_select_window(struct sock *sk) tp->rcv_wnd = new_win; tp->rcv_wup = tp->rcv_nxt; + tcp_update_max_rcv_wnd_seq(tp); /* Make sure we do not exceed the maximum possible * scaled window. @@ -4165,6 +4167,7 @@ static void tcp_connect_init(struct sock *sk) else tp->rcv_tstamp = tcp_jiffies32; tp->rcv_wup = tp->rcv_nxt; + tp->rcv_mwnd_seq = tp->rcv_nxt + tp->rcv_wnd; WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); inet_csk(sk)->icsk_rto = tcp_timeout_init(sk); diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt index 6c0f32c40f19..12882be10f2e 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt @@ -36,7 +36,7 @@ +0 read(4, ..., 100000) = 4000 -// If queue is empty, accept a packet even if its end_seq is above wup + rcv_wnd +// If queue is empty, accept a packet even if its end_seq is above rcv_mwnd_seq +0 < P. 4001:54001(50000) ack 1 win 257 * > . 1:1(0) ack 54001 win 0 From 81714374a29cbaca7e23d9229296515027e355cb Mon Sep 17 00:00:00 2001 From: Simon Baatz Date: Mon, 9 Mar 2026 09:02:27 +0100 Subject: [PATCH 0504/1561] mptcp: keep rcv_mwnd_seq in sync with subflow rcv_wnd MPTCP shares a receive window across subflows and applies it at the subflow level by adjusting each subflow's rcv_wnd when needed. With the new TCP tracking of the maximum advertised window sequence, rcv_mwnd_seq must stay consistent with these subflow-level rcv_wnd adjustments. Signed-off-by: Simon Baatz Reviewed-by: Eric Dumazet Reviewed-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260309-tcp_rfc7323_retract_wnd_rfc-v3-2-4c7f96b1ec69@gmail.com Signed-off-by: Jakub Kicinski --- net/mptcp/options.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 43df4293f58b..8a1c5698983c 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1076,6 +1076,7 @@ static void rwin_update(struct mptcp_sock *msk, struct sock *ssk, * resync. */ tp->rcv_wnd += mptcp_rcv_wnd - subflow->rcv_wnd_sent; + tcp_update_max_rcv_wnd_seq(tp); subflow->rcv_wnd_sent = mptcp_rcv_wnd; } @@ -1338,8 +1339,9 @@ raise_win: */ rcv_wnd_new = rcv_wnd_old; win = rcv_wnd_old - ack_seq; - tp->rcv_wnd = min_t(u64, win, U32_MAX); - new_win = tp->rcv_wnd; + new_win = min_t(u64, win, U32_MAX); + tp->rcv_wnd = new_win; + tcp_update_max_rcv_wnd_seq(tp); /* Make sure we do not exceed the maximum possible * scaled window. From e2b9c52a2b00ca754def5597cbc07ad401457974 Mon Sep 17 00:00:00 2001 From: Simon Baatz Date: Mon, 9 Mar 2026 09:02:28 +0100 Subject: [PATCH 0505/1561] tcp: increase LINUX_MIB_BEYOND_WINDOW for SKB_DROP_REASON_TCP_OVERWINDOW Since commit 9ca48d616ed7 ("tcp: do not accept packets beyond window"), the path leading to SKB_DROP_REASON_TCP_OVERWINDOW in tcp_data_queue() is probably dead. However, it can be reached now when tcp_max_receive_window() is larger than tcp_receive_window(). In that case, increment LINUX_MIB_BEYOND_WINDOW as done in tcp_sequence(). Signed-off-by: Simon Baatz Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260309-tcp_rfc7323_retract_wnd_rfc-v3-3-4c7f96b1ec69@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2e1b23760815..e6b2f4be7723 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5678,6 +5678,7 @@ drop: if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp))) { reason = SKB_DROP_REASON_TCP_OVERWINDOW; + NET_INC_STATS(sock_net(sk), LINUX_MIB_BEYOND_WINDOW); goto out_of_window; } From ec1adf8ecf9568a0581464b9a86603c9a4287ce6 Mon Sep 17 00:00:00 2001 From: Simon Baatz Date: Mon, 9 Mar 2026 09:02:29 +0100 Subject: [PATCH 0506/1561] selftests/net: packetdrill: add tcp_rcv_wnd_shrink_nomem.pkt This test verifies - the sequence number checks using the maximum advertised window sequence number and - the logic for handling received data in tcp_data_queue() for the cases: 1. The window is reduced to zero because of memory 2. The window grows again but still does not reach the originally advertised window Signed-off-by: Simon Baatz Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260309-tcp_rfc7323_retract_wnd_rfc-v3-4-4c7f96b1ec69@gmail.com Signed-off-by: Jakub Kicinski --- .../packetdrill/tcp_rcv_wnd_shrink_nomem.pkt | 132 ++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_nomem.pkt diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_nomem.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_nomem.pkt new file mode 100644 index 000000000000..a80eb55dc69a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_nomem.pkt @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 +// When tcp_receive_window() < tcp_max_receive_window(), tcp_sequence() accepts +// packets that would be dropped under normal conditions (i.e. tcp_receive_window() +// equal to tcp_max_receive_window()). +// Test that such packets are handled as expected for RWIN == 0 and for RWIN > 0. + +--mss=1000 + +`./defaults.sh` + + 0 `nstat -n` + +// Establish a connection. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [1000000], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 win 65535 + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + +// Put 1040000 bytes into the receive buffer + +0 < P. 1:65001(65000) ack 1 win 257 + * > . 1:1(0) ack 65001 + +0 < P. 65001:130001(65000) ack 1 win 257 + * > . 1:1(0) ack 130001 + +0 < P. 130001:195001(65000) ack 1 win 257 + * > . 1:1(0) ack 195001 + +0 < P. 195001:260001(65000) ack 1 win 257 + * > . 1:1(0) ack 260001 + +0 < P. 260001:325001(65000) ack 1 win 257 + * > . 1:1(0) ack 325001 + +0 < P. 325001:390001(65000) ack 1 win 257 + * > . 1:1(0) ack 390001 + +0 < P. 390001:455001(65000) ack 1 win 257 + * > . 1:1(0) ack 455001 + +0 < P. 455001:520001(65000) ack 1 win 257 + * > . 1:1(0) ack 520001 + +0 < P. 520001:585001(65000) ack 1 win 257 + * > . 1:1(0) ack 585001 + +0 < P. 585001:650001(65000) ack 1 win 257 + * > . 1:1(0) ack 650001 + +0 < P. 650001:715001(65000) ack 1 win 257 + * > . 1:1(0) ack 715001 + +0 < P. 715001:780001(65000) ack 1 win 257 + * > . 1:1(0) ack 780001 + +0 < P. 780001:845001(65000) ack 1 win 257 + * > . 1:1(0) ack 845001 + +0 < P. 845001:910001(65000) ack 1 win 257 + * > . 1:1(0) ack 910001 + +0 < P. 910001:975001(65000) ack 1 win 257 + * > . 1:1(0) ack 975001 + +0 < P. 975001:1040001(65000) ack 1 win 257 + * > . 1:1(0) ack 1040001 + +// Trigger an extreme memory squeeze by shrinking SO_RCVBUF + +0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [16000], 4) = 0 + + +0 < P. 1040001:1105001(65000) ack 1 win 257 + * > . 1:1(0) ack 1040001 win 0 +// Check LINUX_MIB_TCPRCVQDROP has been incremented + +0 `nstat -s | grep TcpExtTCPRcvQDrop| grep -q " 1 "` + +// RWIN == 0: rcv_wup = 1040001, rcv_wnd = 0, rcv_mwnd_seq > 1105001 (significantly larger, typically ~1970000) + +// Accept pure ack with seq in max adv. window + +0 write(4, ..., 1000) = 1000 + +0 > P. 1:1001(1000) ack 1040001 win 0 + +0 < . 1105001:1105001(0) ack 1001 win 257 + +// In order segment, in max adv. window -> drop (SKB_DROP_REASON_TCP_ZEROWINDOW) + +0 < P. 1040001:1041001(1000) ack 1001 win 257 + +0 > . 1001:1001(0) ack 1040001 win 0 +// Ooo partial segment, in max adv. window -> drop (SKB_DROP_REASON_TCP_ZEROWINDOW) + +0 < P. 1039001:1041001(2000) ack 1001 win 257 + +0 > . 1001:1001(0) ack 1040001 win 0 +// Check LINUX_MIB_TCPZEROWINDOWDROP has been incremented twice + +0 `nstat -s | grep TcpExtTCPZeroWindowDrop| grep -q " 2 "` + +// Ooo segment, in max adv. window -> drop (SKB_DROP_REASON_TCP_OVERWINDOW) + +0 < P. 1105001:1106001(1000) ack 1001 win 257 + +0 > . 1001:1001(0) ack 1040001 win 0 +// Ooo segment, beyond max adv. window -> drop (SKB_DROP_REASON_TCP_INVALID_SEQUENCE) + +0 < P. 2000001:2001001(1000) ack 1001 win 257 + +0 > . 1001:1001(0) ack 1040001 win 0 +// Check LINUX_MIB_BEYOND_WINDOW has been incremented twice + +0 `nstat -s | grep TcpExtBeyondWindow | grep -q " 2 "` + +// Read all data + +0 read(4, ..., 2000000) = 1040000 + * > . 1001:1001(0) ack 1040001 + +// RWIN > 0: rcv_wup = 1040001, 0 < rcv_wnd < 32000, rcv_mwnd_seq > 1105001 (significantly larger, typically ~1970000) + +// Accept pure ack with seq in max adv. window, beyond adv. window + +0 write(4, ..., 1000) = 1000 + +0 > P. 1001:2001(1000) ack 1040001 + +0 < . 1105001:1105001(0) ack 2001 win 257 + +// In order segment, in max adv. window, in adv. window -> accept +// Note: This also ensures that we cannot hit the empty queue exception in tcp_sequence() in the following tests + +0 < P. 1040001:1041001(1000) ack 2001 win 257 + * > . 2001:2001(0) ack 1041001 + +// Ooo partial segment, in adv. window -> accept + +0 < P. 1040001:1042001(2000) ack 2001 win 257 + +0 > . 2001:2001(0) ack 1042001 + +// Ooo segment, in max adv. window, beyond adv. window -> drop (SKB_DROP_REASON_TCP_OVERWINDOW) + +0 < P. 1105001:1106001(1000) ack 2001 win 257 + +0 > . 2001:2001(0) ack 1042001 +// Ooo segment, beyond max adv. window, beyond adv. window -> drop (SKB_DROP_REASON_TCP_INVALID_SEQUENCE) + +0 < P. 2000001:2001001(1000) ack 2001 win 257 + +0 > . 2001:2001(0) ack 1042001 +// Check LINUX_MIB_BEYOND_WINDOW has been incremented twice + +0 `nstat -s | grep TcpExtBeyondWindow | grep -q " 4 "` + +// We are allowed to go beyond the window and buffer with one packet + +0 < P. 1042001:1062001(20000) ack 2001 win 257 + * > . 2001:2001(0) ack 1062001 + +0 < P. 1062001:1082001(20000) ack 2001 win 257 + * > . 2001:2001(0) ack 1082001 win 0 + +// But not more: In order segment, in max adv. window -> drop (SKB_DROP_REASON_TCP_ZEROWINDOW) + +0 < P. 1082001:1083001(1000) ack 2001 win 257 + * > . 2001:2001(0) ack 1082001 +// Check LINUX_MIB_TCPZEROWINDOWDROP has been incremented again + +0 `nstat -s | grep TcpExtTCPZeroWindowDrop| grep -q " 3 "` From ba58b3e70b86cd64fe8bb9c52f1111667a448908 Mon Sep 17 00:00:00 2001 From: Simon Baatz Date: Mon, 9 Mar 2026 09:02:30 +0100 Subject: [PATCH 0507/1561] selftests/net: packetdrill: add tcp_rcv_wnd_shrink_allowed.pkt This test verifies the sequence number checks using the maximum advertised window sequence number when net.ipv4.tcp_shrink_window is enabled. Signed-off-by: Simon Baatz Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260309-tcp_rfc7323_retract_wnd_rfc-v3-5-4c7f96b1ec69@gmail.com Signed-off-by: Jakub Kicinski --- .../tcp_rcv_wnd_shrink_allowed.pkt | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_allowed.pkt diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_allowed.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_allowed.pkt new file mode 100644 index 000000000000..6af0e0eb183a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_allowed.pkt @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_shrink_window=1 +sysctl -q net.ipv4.tcp_rmem="4096 32768 $((32*1024*1024))"` + + 0 `nstat -n` + +// Establish a connection. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < P. 1:10001(10000) ack 1 win 257 + * > . 1:1(0) ack 10001 win 15 + + +0 < P. 10001:11024(1023) ack 1 win 257 + * > . 1:1(0) ack 11024 win 13 + +// Max window seq advertised 10001 + 15*1024 = 25361, last advertised: 11024 + 13*1024 = 24336 + +// Segment beyond the max window is dropped + +0 < P. 11024:25362(14338) ack 1 win 257 + * > . 1:1(0) ack 11024 win 13 + +// Segment using the max window is accepted + +0 < P. 11024:25361(14337) ack 1 win 257 + * > . 1:1(0) ack 25361 win 0 + +// Check LINUX_MIB_BEYOND_WINDOW has been incremented once + +0 `nstat | grep TcpExtBeyondWindow | grep -q " 1 "` From 3eb371eddad0a47183dd4434de9c9190d0f721c5 Mon Sep 17 00:00:00 2001 From: Simon Baatz Date: Mon, 9 Mar 2026 09:02:31 +0100 Subject: [PATCH 0508/1561] selftests/net: packetdrill: add tcp_rcv_neg_window.pkt The test ensures we correctly apply the maximum advertised window limit when rcv_nxt advances past rcv_mwnd_seq, so that the "usable window" is properly clamped to zero rather than becoming negative. Signed-off-by: Simon Baatz Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260309-tcp_rfc7323_retract_wnd_rfc-v3-6-4c7f96b1ec69@gmail.com Signed-off-by: Jakub Kicinski --- .../net/packetdrill/tcp_rcv_neg_window.pkt | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_rcv_neg_window.pkt diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_neg_window.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_neg_window.pkt new file mode 100644 index 000000000000..15a9b4938f16 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_neg_window.pkt @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh` + +// Establish a connection. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 win 18980 + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + +// A too big packet is accepted if the receive queue is empty + +0 < P. 1:20001(20000) ack 1 win 257 +// Send a RST immediately so that there is no rcv_wup/rcv_mwnd_seq update yet + +0 < R. 20001:20001(0) ack 1 win 257 + + +.1 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + From d15d3de94a4766fb43d7fe7a72ed0479fb268131 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 12 Mar 2026 20:18:23 +0000 Subject: [PATCH 0509/1561] net: dropreason: add SKB_DROP_REASON_RECURSION_LIMIT ip[6]tunnel_xmit() can drop packets if a too deep recursion level is detected. Add SKB_DROP_REASON_RECURSION_LIMIT drop reason. We will use this reason later in __dev_queue_xmit(). Signed-off-by: Eric Dumazet Reviewed-by: Joe Damato Link: https://patch.msgid.link/20260312201824.203093-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/dropreason-core.h | 3 +++ include/net/ip6_tunnel.h | 2 +- net/ipv4/ip_tunnel_core.c | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 5c8c2eb3d2c5..de61dd5dbfd9 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -123,6 +123,7 @@ FN(PFMEMALLOC) \ FN(PSP_INPUT) \ FN(PSP_OUTPUT) \ + FN(RECURSION_LIMIT) \ FNe(MAX) /** @@ -582,6 +583,8 @@ enum skb_drop_reason { SKB_DROP_REASON_PSP_INPUT, /** @SKB_DROP_REASON_PSP_OUTPUT: PSP output checks failed */ SKB_DROP_REASON_PSP_OUTPUT, + /** @SKB_DROP_REASON_RECURSION_LIMIT: Dead loop on virtual device. */ + SKB_DROP_REASON_RECURSION_LIMIT, /** * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gen diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 359b595f1df9..b99805ee2fd1 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -162,7 +162,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, dev->name); DEV_STATS_INC(dev, tx_errors); } - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_RECURSION_LIMIT); return; } diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 5683c328990f..f430d6f0463e 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -65,7 +65,7 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, DEV_STATS_INC(dev, tx_errors); } ip_rt_put(rt); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_RECURSION_LIMIT); return; } From 045f977dd4ebdd3ad8e96cf684917adfc5805adb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 12 Mar 2026 20:18:24 +0000 Subject: [PATCH 0510/1561] net: plumb drop reasons to __dev_queue_xmit() Add drop reasons to __dev_queue_xmit(): - SKB_DROP_REASON_DEV_READY : device is not UP. - SKB_DROP_REASON_RECURSION_LIMIT : recursion limit on virtual device is hit. Also add an unlikely() for the SKB_DROP_REASON_DEV_READY case, and reduce indentation level. Signed-off-by: Eric Dumazet Reviewed-by: Joe Damato Link: https://patch.msgid.link/20260312201824.203093-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 103 +++++++++++++++++++++++++------------------------ 1 file changed, 53 insertions(+), 50 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index f48dc299e4b2..200d44883fc1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4745,9 +4745,10 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) { struct net_device *dev = skb->dev; struct netdev_queue *txq = NULL; - struct Qdisc *q; - int rc = -ENOMEM; + enum skb_drop_reason reason; + int cpu, rc = -ENOMEM; bool again = false; + struct Qdisc *q; skb_reset_mac_header(skb); skb_assert_len(skb); @@ -4816,59 +4817,61 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) * Check this and shot the lock. It is not prone from deadlocks. *Either shot noqueue qdisc, it is even simpler 8) */ - if (dev->flags & IFF_UP) { - int cpu = smp_processor_id(); /* ok because BHs are off */ - - if (!netif_tx_owned(txq, cpu)) { - bool is_list = false; - - if (dev_xmit_recursion()) - goto recursion_alert; - - skb = validate_xmit_skb(skb, dev, &again); - if (!skb) - goto out; - - HARD_TX_LOCK(dev, txq, cpu); - - if (!netif_xmit_stopped(txq)) { - is_list = !!skb->next; - - dev_xmit_recursion_inc(); - skb = dev_hard_start_xmit(skb, dev, txq, &rc); - dev_xmit_recursion_dec(); - - /* GSO segments a single SKB into - * a list of frames. TCP expects error - * to mean none of the data was sent. - */ - if (is_list) - rc = NETDEV_TX_OK; - } - HARD_TX_UNLOCK(dev, txq); - if (!skb) /* xmit completed */ - goto out; - - net_crit_ratelimited("Virtual device %s asks to queue packet!\n", - dev->name); - /* NETDEV_TX_BUSY or queue was stopped */ - if (!is_list) - rc = -ENETDOWN; - } else { - /* Recursion is detected! It is possible, - * unfortunately - */ -recursion_alert: - net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", - dev->name); - rc = -ENETDOWN; - } + if (unlikely(!(dev->flags & IFF_UP))) { + reason = SKB_DROP_REASON_DEV_READY; + goto drop; } + cpu = smp_processor_id(); /* ok because BHs are off */ + + if (likely(!netif_tx_owned(txq, cpu))) { + bool is_list = false; + + if (dev_xmit_recursion()) + goto recursion_alert; + + skb = validate_xmit_skb(skb, dev, &again); + if (!skb) + goto out; + + HARD_TX_LOCK(dev, txq, cpu); + + if (!netif_xmit_stopped(txq)) { + is_list = !!skb->next; + + dev_xmit_recursion_inc(); + skb = dev_hard_start_xmit(skb, dev, txq, &rc); + dev_xmit_recursion_dec(); + + /* GSO segments a single SKB into a list of frames. + * TCP expects error to mean none of the data was sent. + */ + if (is_list) + rc = NETDEV_TX_OK; + } + HARD_TX_UNLOCK(dev, txq); + if (!skb) /* xmit completed */ + goto out; + + net_crit_ratelimited("Virtual device %s asks to queue packet!\n", + dev->name); + /* NETDEV_TX_BUSY or queue was stopped */ + if (!is_list) + rc = -ENETDOWN; + } else { + /* Recursion is detected! It is possible unfortunately. */ +recursion_alert: + net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", + dev->name); + rc = -ENETDOWN; + } + + reason = SKB_DROP_REASON_RECURSION_LIMIT; +drop: rcu_read_unlock_bh(); dev_core_stats_tx_dropped_inc(dev); - kfree_skb_list(skb); + kfree_skb_list_reason(skb, reason); return rc; out: rcu_read_unlock_bh(); From 425abcea830c647ed9e11f669e05200a67e905e8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 12 Mar 2026 23:59:22 +0000 Subject: [PATCH 0511/1561] hinic3: Fix spelling mistake "capbility" -> "capability" There is a spelling mistake in a dev_dbg message. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Joe Damato Link: https://patch.msgid.link/20260312235922.3442120-1-colin.i.king@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c index 2dfa4fb39627..c5dd53a8fa96 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c @@ -44,7 +44,7 @@ static void hinic3_parse_l2nic_res_cap(struct hinic3_hwdev *hwdev, HINIC3_CFG_MAX_QP); nic_svc_cap->default_num_queues = dev_cap->nic_default_num_queues; - dev_dbg(hwdev->dev, "L2nic resource capbility, max_sqs: 0x%x, max_rqs: 0x%x\n", + dev_dbg(hwdev->dev, "L2nic resource capability, max_sqs: 0x%x, max_rqs: 0x%x\n", nic_svc_cap->max_sqs, nic_svc_cap->max_rqs); } From f4ce4922df8d1c3417f4bc21314ac7d5e4c92ab7 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Fri, 13 Mar 2026 19:24:54 +0800 Subject: [PATCH 0512/1561] net: enetc: remove stray semicolon Remove stray semicolon from ntmp_table_name(). Signed-off-by: Wei Fang Link: https://patch.msgid.link/20260313112454.427191-1-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/ntmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/ntmp.c b/drivers/net/ethernet/freescale/enetc/ntmp.c index 0c1d343253bf..703752995e93 100644 --- a/drivers/net/ethernet/freescale/enetc/ntmp.c +++ b/drivers/net/ethernet/freescale/enetc/ntmp.c @@ -227,7 +227,7 @@ static const char *ntmp_table_name(int tbl_id) return "RSS Table"; default: return "Unknown Table"; - }; + } } static int ntmp_delete_entry_by_id(struct ntmp_user *user, int tbl_id, From f807b5b9b89eb9220d034115c272c312251cbcac Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 12 Mar 2026 12:13:52 +0000 Subject: [PATCH 0513/1561] net: stmmac: avoid passing pci_dev The pci_dev is only used to provide the ethtool bus_info using pci_name(priv->plat->pdev). This is the same as dev_name(priv->device). Thus, rather than passing the pci_dev, make use of what we already have. To avoid unexpectedly exposing the device name through ethtool where it wasn't provided before, add a flag priv->plat->provide_bus_info to enable this, which only dwmac-intel needs to set. Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Link: https://patch.msgid.link/E1w0evI-0000000CzY7-1fyo@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 5 ++--- include/linux/stmmac.h | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 421c6c81ca5e..f621077c30a4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -589,7 +589,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, int ret; int i; - plat->pdev = pdev; + plat->provide_bus_info = true; plat->phy_addr = -1; plat->clk_csr = STMMAC_CSR_250_300M; plat->core_type = DWMAC_CORE_GMAC4; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index c1e26965d9b5..92585d27ab88 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -312,10 +312,9 @@ static void stmmac_ethtool_getdrvinfo(struct net_device *dev, strscpy(info->driver, MAC100_ETHTOOL_NAME, sizeof(info->driver)); - if (priv->plat->pdev) { - strscpy(info->bus_info, pci_name(priv->plat->pdev), + if (priv->plat->provide_bus_info) + strscpy(info->bus_info, dev_name(priv->device), sizeof(info->bus_info)); - } } static int stmmac_ethtool_get_link_ksettings(struct net_device *dev, diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 937985276e6b..72febd246bdb 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -348,7 +348,7 @@ struct plat_stmmacenet_data { int rss_en; int mac_port_sel_speed; u8 vlan_fail_q; - struct pci_dev *pdev; + bool provide_bus_info; int int_snapshot_num; int msi_mac_vec; int msi_wol_vec; From fa8fca88714c3a4a74f972ed37328e2f0bbef9fa Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Thu, 12 Mar 2026 15:26:37 +0100 Subject: [PATCH 0514/1561] ipv4: validate IPV4_DEVCONF attributes properly As the IPV4_DEVCONF netlink attributes are not being validated, it is possible to use netlink to set read-only values like mc_forwarding. In addition, valid ranges are not being validated neither but that is less relevant as they aren't in sysctl. To avoid similar situations in the future, define a NLA policy for IPV4_DEVCONF attributes which are nested in IFLA_INET_CONF. Signed-off-by: Fernando Fernandez Mancera Link: https://patch.msgid.link/20260312142637.5704-1-fmancera@suse.de Signed-off-by: Jakub Kicinski --- net/ipv4/devinet.c | 55 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 10 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 537bb6c315d2..58fe7cb69545 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2063,12 +2063,50 @@ static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = { [IFLA_INET_CONF] = { .type = NLA_NESTED }, }; +static const struct nla_policy inet_devconf_policy[IPV4_DEVCONF_MAX + 1] = { + [IPV4_DEVCONF_FORWARDING] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_MC_FORWARDING] = { .type = NLA_REJECT }, + [IPV4_DEVCONF_PROXY_ARP] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_ACCEPT_REDIRECTS] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_SECURE_REDIRECTS] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_SEND_REDIRECTS] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_SHARED_MEDIA] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_RP_FILTER] = NLA_POLICY_RANGE(NLA_U32, 0, 2), + [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_BOOTP_RELAY] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_LOG_MARTIANS] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_TAG] = { .type = NLA_U32 }, + [IPV4_DEVCONF_ARPFILTER] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_MEDIUM_ID] = NLA_POLICY_MIN(NLA_S32, -1), + [IPV4_DEVCONF_NOXFRM] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_NOPOLICY] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_FORCE_IGMP_VERSION] = NLA_POLICY_RANGE(NLA_U32, 0, 3), + [IPV4_DEVCONF_ARP_ANNOUNCE] = NLA_POLICY_RANGE(NLA_U32, 0, 2), + [IPV4_DEVCONF_ARP_IGNORE] = NLA_POLICY_RANGE(NLA_U32, 0, 8), + [IPV4_DEVCONF_PROMOTE_SECONDARIES] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_ARP_ACCEPT] = NLA_POLICY_RANGE(NLA_U32, 0, 2), + [IPV4_DEVCONF_ARP_NOTIFY] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_ACCEPT_LOCAL] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_SRC_VMARK] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_PROXY_ARP_PVLAN] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_ROUTE_LOCALNET] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_BC_FORWARDING] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL] = { .type = NLA_U32 }, + [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL] = { .type = NLA_U32 }, + [IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = + NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = + NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_DROP_GRATUITOUS_ARP] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_ARP_EVICT_NOCARRIER] = NLA_POLICY_RANGE(NLA_U32, 0, 1), +}; + static int inet_validate_link_af(const struct net_device *dev, const struct nlattr *nla, struct netlink_ext_ack *extack) { - struct nlattr *a, *tb[IFLA_INET_MAX+1]; - int err, rem; + struct nlattr *tb[IFLA_INET_MAX + 1], *nested_tb[IPV4_DEVCONF_MAX + 1]; + int err; if (dev && !__in_dev_get_rtnl(dev)) return -EAFNOSUPPORT; @@ -2079,15 +2117,12 @@ static int inet_validate_link_af(const struct net_device *dev, return err; if (tb[IFLA_INET_CONF]) { - nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) { - int cfgid = nla_type(a); + err = nla_parse_nested(nested_tb, IPV4_DEVCONF_MAX, + tb[IFLA_INET_CONF], inet_devconf_policy, + extack); - if (nla_len(a) < 4) - return -EINVAL; - - if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX) - return -EINVAL; - } + if (err < 0) + return err; } return 0; From b87249aab4c0cbc8d9e4e745dcd1cb45ca29b8bd Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 11 Mar 2026 21:54:46 +0100 Subject: [PATCH 0515/1561] net: mdio: remove selecting FIXED_PHY for FWNODE_MDIO Fwnode MDIO has never used the fixed PHY code, therefore don't select symbol FIXED_PHY. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/880ca62b-a5d3-4865-bbce-2d2210928239@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/mdio/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/mdio/Kconfig b/drivers/net/mdio/Kconfig index 44380378911b..53e2c047d804 100644 --- a/drivers/net/mdio/Kconfig +++ b/drivers/net/mdio/Kconfig @@ -7,7 +7,6 @@ if PHYLIB config FWNODE_MDIO def_tristate (ACPI || OF) || COMPILE_TEST - select FIXED_PHY help FWNODE MDIO bus (Ethernet PHY) accessors From a99f06e579a39be6113d37e84658a20b71c52e5f Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 12 Mar 2026 09:45:32 +0100 Subject: [PATCH 0516/1561] net: usb: cdc-ether: unify error handling in probe usbnet_generic_cdc_bind() is duplicating the error handling multiple times. That is bad. Unify it with jumps. V2: Update error logging with every cause a unique message Signed-off-by: Oliver Neukum Link: https://patch.msgid.link/20260312084612.1469853-1-oneukum@suse.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/cdc_ether.c | 54 ++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index a032c1ded406..a0a5740590b9 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -115,7 +115,7 @@ int usbnet_generic_cdc_bind(struct usbnet *dev, struct usb_interface *intf) int len = intf->cur_altsetting->extralen; struct usb_interface_descriptor *d; struct cdc_state *info = (void *) &dev->data; - int status; + int status = -ENODEV; int rndis; bool android_rndis_quirk = false; struct usb_driver *driver = driver_of(intf); @@ -169,10 +169,13 @@ int usbnet_generic_cdc_bind(struct usbnet *dev, struct usb_interface *intf) info->header = header.usb_cdc_header_desc; info->ether = header.usb_cdc_ether_desc; if (!info->u) { - if (rndis) + if (rndis) { goto skip; - else /* in that case a quirk is mandatory */ + } else { + /* in that case a quirk is mandatory */ + dev_err(&dev->udev->dev, "No union descriptors\n"); goto bad_desc; + } } /* we need a master/control interface (what we're * probed with) and a slave/data interface; union @@ -192,18 +195,20 @@ int usbnet_generic_cdc_bind(struct usbnet *dev, struct usb_interface *intf) android_rndis_quirk = true; goto skip; } + dev_err(&intf->dev, "bad CDC descriptors\n"); goto bad_desc; } if (info->control != intf) { - dev_dbg(&intf->dev, "bogus CDC Union\n"); /* Ambit USB Cable Modem (and maybe others) * interchanges master and slave interface. */ if (info->data == intf) { info->data = info->control; info->control = intf; - } else + } else { + dev_err(&intf->dev, "bogus CDC Union\n"); goto bad_desc; + } } /* some devices merge these - skip class check */ @@ -213,7 +218,7 @@ int usbnet_generic_cdc_bind(struct usbnet *dev, struct usb_interface *intf) /* a data interface altsetting does the real i/o */ d = &info->data->cur_altsetting->desc; if (d->bInterfaceClass != USB_CLASS_CDC_DATA) { - dev_dbg(&intf->dev, "slave class %u\n", d->bInterfaceClass); + dev_err(&intf->dev, "slave class %u\n", d->bInterfaceClass); goto bad_desc; } skip: @@ -227,7 +232,7 @@ skip: if (rndis && is_rndis(&intf->cur_altsetting->desc) && header.usb_cdc_acm_descriptor && header.usb_cdc_acm_descriptor->bmCapabilities) { - dev_dbg(&intf->dev, + dev_err(&intf->dev, "ACM capabilities %02x, not really RNDIS?\n", header.usb_cdc_acm_descriptor->bmCapabilities); goto bad_desc; @@ -242,14 +247,14 @@ skip: if (header.usb_cdc_mdlm_desc && memcmp(header.usb_cdc_mdlm_desc->bGUID, mbm_guid, 16)) { - dev_dbg(&intf->dev, "GUID doesn't match\n"); + dev_err(&intf->dev, "GUID doesn't match\n"); goto bad_desc; } if (header.usb_cdc_mdlm_detail_desc && header.usb_cdc_mdlm_detail_desc->bLength < (sizeof(struct usb_cdc_mdlm_detail_desc) + 1)) { - dev_dbg(&intf->dev, "Descriptor too short\n"); + dev_err(&intf->dev, "Descriptor too short\n"); goto bad_desc; } @@ -267,7 +272,7 @@ skip: info->control = usb_ifnum_to_if(dev->udev, 0); info->data = usb_ifnum_to_if(dev->udev, 1); if (!info->control || !info->data || info->control != intf) { - dev_dbg(&intf->dev, + dev_err(&intf->dev, "rndis: master #0/%p slave #1/%p\n", info->control, info->data); @@ -275,7 +280,7 @@ skip: } } else if (!info->header || (!rndis && !info->ether)) { - dev_dbg(&intf->dev, "missing cdc %s%s%sdescriptor\n", + dev_err(&intf->dev, "missing cdc %s%s%sdescriptor\n", info->header ? "" : "header ", info->u ? "" : "union ", info->ether ? "" : "ether "); @@ -287,16 +292,15 @@ skip: */ if (info->data != info->control) { status = usb_driver_claim_interface(driver, info->data, dev); - if (status < 0) - return status; + if (status < 0) { + dev_err(&intf->dev, "Second interface unclaimable\n"); + goto bad_desc; + } } status = usbnet_get_endpoints(dev, info->data); if (status < 0) { - /* ensure immediate exit from usbnet_disconnect */ - usb_set_intfdata(info->data, NULL); - if (info->data != info->control) - usb_driver_release_interface(driver, info->data); - return status; + dev_dbg(&intf->dev, "Mandatory endpoints missing\n"); + goto bail_out_and_release; } /* status endpoint: optional for CDC Ethernet, not RNDIS (or ACM) */ @@ -316,10 +320,9 @@ skip: } } if (rndis && !dev->status) { - dev_dbg(&intf->dev, "missing RNDIS status endpoint\n"); - usb_set_intfdata(info->data, NULL); - usb_driver_release_interface(driver, info->data); - return -ENODEV; + dev_err(&intf->dev, "missing RNDIS status endpoint\n"); + status = -ENODEV; + goto bail_out_and_release; } /* override ethtool_ops */ @@ -327,9 +330,12 @@ skip: return 0; +bail_out_and_release: + usb_set_intfdata(info->data, NULL); + if (info->data != info->control) + usb_driver_release_interface(driver, info->data); bad_desc: - dev_info(&dev->udev->dev, "bad CDC descriptors\n"); - return -ENODEV; + return status; } EXPORT_SYMBOL_GPL(usbnet_generic_cdc_bind); From 2d7bebc9dd79177f098187b0ddb0c357d4496c1c Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 9 Mar 2026 18:02:38 +0100 Subject: [PATCH 0517/1561] net: phy: move mdio_device reset handling functions in the code In preparation of a follow-up patch this moves reset-related functions in the code. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/8ea1a929-33b8-49ee-afe6-355f5a7d2bd1@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mdio_device.c | 162 +++++++++++++++++----------------- 1 file changed, 81 insertions(+), 81 deletions(-) diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c index b8a5a1838196..da4fb7484c7c 100644 --- a/drivers/net/phy/mdio_device.c +++ b/drivers/net/phy/mdio_device.c @@ -24,6 +24,87 @@ #include #include "mdio-private.h" +/** + * mdio_device_register_reset - Read and initialize the reset properties of + * an mdio device + * @mdiodev: mdio_device structure + * + * Return: Zero if successful, negative error code on failure + */ +int mdio_device_register_reset(struct mdio_device *mdiodev) +{ + struct reset_control *reset; + + /* Deassert the optional reset signal */ + mdiodev->reset_gpio = gpiod_get_optional(&mdiodev->dev, + "reset", GPIOD_OUT_LOW); + if (IS_ERR(mdiodev->reset_gpio)) + return PTR_ERR(mdiodev->reset_gpio); + + if (mdiodev->reset_gpio) + gpiod_set_consumer_name(mdiodev->reset_gpio, "PHY reset"); + + reset = reset_control_get_optional_exclusive(&mdiodev->dev, "phy"); + if (IS_ERR(reset)) { + gpiod_put(mdiodev->reset_gpio); + mdiodev->reset_gpio = NULL; + return PTR_ERR(reset); + } + + mdiodev->reset_ctrl = reset; + + /* Read optional firmware properties */ + device_property_read_u32(&mdiodev->dev, "reset-assert-us", + &mdiodev->reset_assert_delay); + device_property_read_u32(&mdiodev->dev, "reset-deassert-us", + &mdiodev->reset_deassert_delay); + + return 0; +} + +/** + * mdio_device_unregister_reset - uninitialize the reset properties of + * an mdio device + * @mdiodev: mdio_device structure + */ +void mdio_device_unregister_reset(struct mdio_device *mdiodev) +{ + gpiod_put(mdiodev->reset_gpio); + mdiodev->reset_gpio = NULL; + reset_control_put(mdiodev->reset_ctrl); + mdiodev->reset_ctrl = NULL; + mdiodev->reset_assert_delay = 0; + mdiodev->reset_deassert_delay = 0; +} + +void mdio_device_reset(struct mdio_device *mdiodev, int value) +{ + unsigned int d; + + if (!mdiodev->reset_gpio && !mdiodev->reset_ctrl) + return; + + if (mdiodev->reset_state == value) + return; + + if (mdiodev->reset_gpio) + gpiod_set_value_cansleep(mdiodev->reset_gpio, value); + + if (mdiodev->reset_ctrl) { + if (value) + reset_control_assert(mdiodev->reset_ctrl); + else + reset_control_deassert(mdiodev->reset_ctrl); + } + + d = value ? mdiodev->reset_assert_delay : mdiodev->reset_deassert_delay; + if (d) + fsleep(d); + + mdiodev->reset_state = value; +} +EXPORT_SYMBOL(mdio_device_reset); + void mdio_device_free(struct mdio_device *mdiodev) { put_device(&mdiodev->dev); @@ -108,87 +189,6 @@ void mdio_device_remove(struct mdio_device *mdiodev) } EXPORT_SYMBOL(mdio_device_remove); -/** - * mdio_device_register_reset - Read and initialize the reset properties of - * an mdio device - * @mdiodev: mdio_device structure - * - * Return: Zero if successful, negative error code on failure - */ -int mdio_device_register_reset(struct mdio_device *mdiodev) -{ - struct reset_control *reset; - - /* Deassert the optional reset signal */ - mdiodev->reset_gpio = gpiod_get_optional(&mdiodev->dev, - "reset", GPIOD_OUT_LOW); - if (IS_ERR(mdiodev->reset_gpio)) - return PTR_ERR(mdiodev->reset_gpio); - - if (mdiodev->reset_gpio) - gpiod_set_consumer_name(mdiodev->reset_gpio, "PHY reset"); - - reset = reset_control_get_optional_exclusive(&mdiodev->dev, "phy"); - if (IS_ERR(reset)) { - gpiod_put(mdiodev->reset_gpio); - mdiodev->reset_gpio = NULL; - return PTR_ERR(reset); - } - - mdiodev->reset_ctrl = reset; - - /* Read optional firmware properties */ - device_property_read_u32(&mdiodev->dev, "reset-assert-us", - &mdiodev->reset_assert_delay); - device_property_read_u32(&mdiodev->dev, "reset-deassert-us", - &mdiodev->reset_deassert_delay); - - return 0; -} - -/** - * mdio_device_unregister_reset - uninitialize the reset properties of - * an mdio device - * @mdiodev: mdio_device structure - */ -void mdio_device_unregister_reset(struct mdio_device *mdiodev) -{ - gpiod_put(mdiodev->reset_gpio); - mdiodev->reset_gpio = NULL; - reset_control_put(mdiodev->reset_ctrl); - mdiodev->reset_ctrl = NULL; - mdiodev->reset_assert_delay = 0; - mdiodev->reset_deassert_delay = 0; -} - -void mdio_device_reset(struct mdio_device *mdiodev, int value) -{ - unsigned int d; - - if (!mdiodev->reset_gpio && !mdiodev->reset_ctrl) - return; - - if (mdiodev->reset_state == value) - return; - - if (mdiodev->reset_gpio) - gpiod_set_value_cansleep(mdiodev->reset_gpio, value); - - if (mdiodev->reset_ctrl) { - if (value) - reset_control_assert(mdiodev->reset_ctrl); - else - reset_control_deassert(mdiodev->reset_ctrl); - } - - d = value ? mdiodev->reset_assert_delay : mdiodev->reset_deassert_delay; - if (d) - fsleep(d); - - mdiodev->reset_state = value; -} -EXPORT_SYMBOL(mdio_device_reset); - /** * mdio_probe - probe an MDIO device * @dev: device to probe From 6df1459605cedd2112ebf660c77f42bb87d5c306 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 9 Mar 2026 18:03:31 +0100 Subject: [PATCH 0518/1561] net: phy: make mdio_device.c part of libphy This patch - makes mdio_device.c part of libphy - makes mdio_device_(un)register_reset() static - moves mdiobus_(un)register_device() from mdio_bus.c to mdio_device.c, stops exporting both functions and makes them private to phylib This further decouples the MDIO consumer functionality from libphy. Note: This makes MDIO driver registration part of phylib, therefore adjust Kconfig dependencies where needed. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/c6dbf9b3-3ca0-434b-ad3a-71fe602ab809@gmail.com Signed-off-by: Jakub Kicinski --- drivers/clk/qcom/Kconfig | 2 +- drivers/net/phy/Makefile | 6 ++--- drivers/net/phy/mdio-private.h | 11 --------- drivers/net/phy/mdio_bus.c | 36 ---------------------------- drivers/net/phy/mdio_device.c | 39 ++++++++++++++++++++++++++++--- drivers/net/phy/phylib-internal.h | 4 ++++ drivers/phy/broadcom/Kconfig | 4 ++-- include/linux/mdio.h | 2 -- 8 files changed, 46 insertions(+), 58 deletions(-) delete mode 100644 drivers/net/phy/mdio-private.h diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig index a8a86ea6bb74..a277c434d641 100644 --- a/drivers/clk/qcom/Kconfig +++ b/drivers/clk/qcom/Kconfig @@ -392,7 +392,7 @@ config IPQ_NSSCC_9574 config IPQ_NSSCC_QCA8K tristate "QCA8K(QCA8386 or QCA8084) NSS Clock Controller" - depends on MDIO_BUS + depends on PHYLIB help Support for NSS(Network SubSystem) clock controller on qca8386/qca8084 chip. diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 3a34917adea7..8d262b4e2be2 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -3,8 +3,8 @@ libphy-y := phy.o phy-c45.o phy-core.o phy_device.o \ linkmode.o phy_link_topology.o \ - phy_caps.o mdio_bus_provider.o phy_port.o -mdio-bus-y += mdio_bus.o mdio_device.o + phy_caps.o mdio_bus_provider.o phy_port.o \ + mdio_device.o ifdef CONFIG_PHYLIB # built-in whenever PHYLIB is built-in or module @@ -15,7 +15,7 @@ libphy-$(CONFIG_SWPHY) += swphy.o libphy-$(CONFIG_LED_TRIGGER_PHY) += phy_led_triggers.o libphy-$(CONFIG_OPEN_ALLIANCE_HELPERS) += open_alliance_helpers.o -obj-$(CONFIG_MDIO_BUS) += mdio-bus.o +obj-$(CONFIG_MDIO_BUS) += mdio_bus.o obj-$(CONFIG_PHYLINK) += phylink.o obj-$(CONFIG_PHYLIB) += libphy.o obj-$(CONFIG_PHYLIB) += mdio_devres.o diff --git a/drivers/net/phy/mdio-private.h b/drivers/net/phy/mdio-private.h deleted file mode 100644 index 8bc6d9088af1..000000000000 --- a/drivers/net/phy/mdio-private.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef __MDIO_PRIVATE_H -#define __MDIO_PRIVATE_H - -/* MDIO internal helpers - */ - -int mdio_device_register_reset(struct mdio_device *mdiodev); -void mdio_device_unregister_reset(struct mdio_device *mdiodev); - -#endif /* __MDIO_PRIVATE_H */ diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 48c0447e6a8f..a30c679feeca 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -29,46 +29,10 @@ #include #include #include -#include "mdio-private.h" #define CREATE_TRACE_POINTS #include -int mdiobus_register_device(struct mdio_device *mdiodev) -{ - int err; - - if (mdiodev->bus->mdio_map[mdiodev->addr]) - return -EBUSY; - - if (mdiodev->flags & MDIO_DEVICE_FLAG_PHY) { - err = mdio_device_register_reset(mdiodev); - if (err) - return err; - - /* Assert the reset signal */ - mdio_device_reset(mdiodev, 1); - } - - mdiodev->bus->mdio_map[mdiodev->addr] = mdiodev; - - return 0; -} -EXPORT_SYMBOL(mdiobus_register_device); - -int mdiobus_unregister_device(struct mdio_device *mdiodev) -{ - if (mdiodev->bus->mdio_map[mdiodev->addr] != mdiodev) - return -EINVAL; - - mdio_device_unregister_reset(mdiodev); - - mdiodev->bus->mdio_map[mdiodev->addr] = NULL; - - return 0; -} -EXPORT_SYMBOL(mdiobus_unregister_device); - static struct mdio_device *mdiobus_find_device(struct mii_bus *bus, int addr) { bool addr_valid = addr >= 0 && addr < ARRAY_SIZE(bus->mdio_map); diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c index da4fb7484c7c..56080d3d2d25 100644 --- a/drivers/net/phy/mdio_device.c +++ b/drivers/net/phy/mdio_device.c @@ -22,7 +22,7 @@ #include #include #include -#include "mdio-private.h" +#include "phylib-internal.h" /** * mdio_device_register_reset - Read and initialize the reset properties of @@ -31,7 +31,7 @@ * * Return: Zero if successful, negative error code on failure */ -int mdio_device_register_reset(struct mdio_device *mdiodev) +static int mdio_device_register_reset(struct mdio_device *mdiodev) { struct reset_control *reset; @@ -67,7 +67,7 @@ int mdio_device_register_reset(struct mdio_device *mdiodev) * an mdio device * @mdiodev: mdio_device structure */ -void mdio_device_unregister_reset(struct mdio_device *mdiodev) +static void mdio_device_unregister_reset(struct mdio_device *mdiodev) { gpiod_put(mdiodev->reset_gpio); mdiodev->reset_gpio = NULL; @@ -189,6 +189,39 @@ void mdio_device_remove(struct mdio_device *mdiodev) } EXPORT_SYMBOL(mdio_device_remove); +int mdiobus_register_device(struct mdio_device *mdiodev) +{ + int err; + + if (mdiodev->bus->mdio_map[mdiodev->addr]) + return -EBUSY; + + if (mdiodev->flags & MDIO_DEVICE_FLAG_PHY) { + err = mdio_device_register_reset(mdiodev); + if (err) + return err; + + /* Assert the reset signal */ + mdio_device_reset(mdiodev, 1); + } + + mdiodev->bus->mdio_map[mdiodev->addr] = mdiodev; + + return 0; +} + +int mdiobus_unregister_device(struct mdio_device *mdiodev) +{ + if (mdiodev->bus->mdio_map[mdiodev->addr] != mdiodev) + return -EINVAL; + + mdio_device_unregister_reset(mdiodev); + + mdiodev->bus->mdio_map[mdiodev->addr] = NULL; + + return 0; +} + /** * mdio_probe - probe an MDIO device * @dev: device to probe diff --git a/drivers/net/phy/phylib-internal.h b/drivers/net/phy/phylib-internal.h index dc9592c6bb8e..bfb1aa823868 100644 --- a/drivers/net/phy/phylib-internal.h +++ b/drivers/net/phy/phylib-internal.h @@ -6,6 +6,7 @@ #ifndef __PHYLIB_INTERNAL_H #define __PHYLIB_INTERNAL_H +struct mdio_device; struct phy_device; /* @@ -20,6 +21,9 @@ void of_set_phy_timing_role(struct phy_device *phydev); int phy_speed_down_core(struct phy_device *phydev); void phy_check_downshift(struct phy_device *phydev); +int mdiobus_register_device(struct mdio_device *mdiodev); +int mdiobus_unregister_device(struct mdio_device *mdiodev); + int genphy_c45_read_eee_adv(struct phy_device *phydev, unsigned long *adv); #endif /* __PHYLIB_INTERNAL_H */ diff --git a/drivers/phy/broadcom/Kconfig b/drivers/phy/broadcom/Kconfig index 1d89a2fd9b79..46371a8940d7 100644 --- a/drivers/phy/broadcom/Kconfig +++ b/drivers/phy/broadcom/Kconfig @@ -52,7 +52,7 @@ config PHY_BCM_NS_USB3 tristate "Broadcom Northstar USB 3.0 PHY Driver" depends on ARCH_BCM_IPROC || COMPILE_TEST depends on HAS_IOMEM && OF - depends on MDIO_BUS + depends on PHYLIB select GENERIC_PHY help Enable this to support Broadcom USB 3.0 PHY connected to the USB @@ -60,7 +60,7 @@ config PHY_BCM_NS_USB3 config PHY_NS2_PCIE tristate "Broadcom Northstar2 PCIe PHY driver" - depends on (OF && MDIO_BUS_MUX_BCM_IPROC) || (COMPILE_TEST && MDIO_BUS) + depends on (OF && MDIO_BUS_MUX_BCM_IPROC) || (COMPILE_TEST && PHYLIB) select GENERIC_PHY default ARCH_BCM_IPROC help diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 5d1203b9af20..f4f9d9609448 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -688,8 +688,6 @@ static inline int mdiodev_c45_write(struct mdio_device *mdiodev, u32 devad, val); } -int mdiobus_register_device(struct mdio_device *mdiodev); -int mdiobus_unregister_device(struct mdio_device *mdiodev); bool mdiobus_is_registered_device(struct mii_bus *bus, int addr); struct phy_device *mdiobus_get_phy(struct mii_bus *bus, int addr); From b69ceb387aca23126421ed676a312576cc3e0aee Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 9 Mar 2026 18:04:08 +0100 Subject: [PATCH 0519/1561] net: phy: move (of_)mdio_find_bus to mdio_bus_provider.c Functionality outside libphy shouldn't access mdio_bus_class directly. So move both functions to the provider side. This is a step towards making mdio_bus_class private to libphy. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/b6161c64-68ac-4524-82ec-5b7d81b86dbc@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mdio_bus.c | 44 ----------------------------- drivers/net/phy/mdio_bus_provider.c | 44 +++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 44 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index a30c679feeca..c9a495390d26 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -276,50 +276,6 @@ const struct class mdio_bus_class = { }; EXPORT_SYMBOL_GPL(mdio_bus_class); -/** - * mdio_find_bus - Given the name of a mdiobus, find the mii_bus. - * @mdio_name: The name of a mdiobus. - * - * Return: a reference to the mii_bus, or NULL if none found. The - * embedded struct device will have its reference count incremented, - * and this must be put_deviced'ed once the bus is finished with. - */ -struct mii_bus *mdio_find_bus(const char *mdio_name) -{ - struct device *d; - - d = class_find_device_by_name(&mdio_bus_class, mdio_name); - return d ? to_mii_bus(d) : NULL; -} -EXPORT_SYMBOL(mdio_find_bus); - -#if IS_ENABLED(CONFIG_OF_MDIO) -/** - * of_mdio_find_bus - Given an mii_bus node, find the mii_bus. - * @mdio_bus_np: Pointer to the mii_bus. - * - * Return: a reference to the mii_bus, or NULL if none found. The - * embedded struct device will have its reference count incremented, - * and this must be put once the bus is finished with. - * - * Because the association of a device_node and mii_bus is made via - * of_mdiobus_register(), the mii_bus cannot be found before it is - * registered with of_mdiobus_register(). - * - */ -struct mii_bus *of_mdio_find_bus(struct device_node *mdio_bus_np) -{ - struct device *d; - - if (!mdio_bus_np) - return NULL; - - d = class_find_device_by_of_node(&mdio_bus_class, mdio_bus_np); - return d ? to_mii_bus(d) : NULL; -} -EXPORT_SYMBOL(of_mdio_find_bus); -#endif - static void mdiobus_stats_acct(struct mdio_bus_stats *stats, bool op, int ret) { preempt_disable(); diff --git a/drivers/net/phy/mdio_bus_provider.c b/drivers/net/phy/mdio_bus_provider.c index 4b0637405740..d50fe6eb4b02 100644 --- a/drivers/net/phy/mdio_bus_provider.c +++ b/drivers/net/phy/mdio_bus_provider.c @@ -440,3 +440,47 @@ void mdiobus_free(struct mii_bus *bus) put_device(&bus->dev); } EXPORT_SYMBOL(mdiobus_free); + +/** + * mdio_find_bus - Given the name of a mdiobus, find the mii_bus. + * @mdio_name: The name of a mdiobus. + * + * Return: a reference to the mii_bus, or NULL if none found. The + * embedded struct device will have its reference count incremented, + * and this must be put_deviced'ed once the bus is finished with. + */ +struct mii_bus *mdio_find_bus(const char *mdio_name) +{ + struct device *d; + + d = class_find_device_by_name(&mdio_bus_class, mdio_name); + return d ? to_mii_bus(d) : NULL; +} +EXPORT_SYMBOL(mdio_find_bus); + +#if IS_ENABLED(CONFIG_OF_MDIO) +/** + * of_mdio_find_bus - Given an mii_bus node, find the mii_bus. + * @mdio_bus_np: Pointer to the mii_bus. + * + * Return: a reference to the mii_bus, or NULL if none found. The + * embedded struct device will have its reference count incremented, + * and this must be put once the bus is finished with. + * + * Because the association of a device_node and mii_bus is made via + * of_mdiobus_register(), the mii_bus cannot be found before it is + * registered with of_mdiobus_register(). + * + */ +struct mii_bus *of_mdio_find_bus(struct device_node *mdio_bus_np) +{ + struct device *d; + + if (!mdio_bus_np) + return NULL; + + d = class_find_device_by_of_node(&mdio_bus_class, mdio_bus_np); + return d ? to_mii_bus(d) : NULL; +} +EXPORT_SYMBOL(of_mdio_find_bus); +#endif From 25b23d82831870b8581abe6a24970ffd95675bc7 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 9 Mar 2026 18:04:46 +0100 Subject: [PATCH 0520/1561] net: phy: move registering mdio_bus_class and mdio_bus_type to libphy The MDIO consumer side shouldn't register class and bus_type. Therefore move this to libphy. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/b15b378a-fda2-44b9-9d63-bf82919b71b2@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mdio_bus.c | 23 ----------------------- drivers/net/phy/phy_device.c | 13 +++++++++++++ 2 files changed, 13 insertions(+), 23 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index c9a495390d26..9fb473326027 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -905,28 +905,5 @@ const struct bus_type mdio_bus_type = { }; EXPORT_SYMBOL(mdio_bus_type); -static int __init mdio_bus_init(void) -{ - int ret; - - ret = class_register(&mdio_bus_class); - if (!ret) { - ret = bus_register(&mdio_bus_type); - if (ret) - class_unregister(&mdio_bus_class); - } - - return ret; -} - -static void __exit mdio_bus_exit(void) -{ - class_unregister(&mdio_bus_class); - bus_unregister(&mdio_bus_type); -} - -subsys_initcall(mdio_bus_init); -module_exit(mdio_bus_exit); - MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("MDIO bus/device layer"); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index d1cbcfc3d2a6..0edff47478c2 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -3913,6 +3913,14 @@ static int __init phy_init(void) { int rc; + rc = class_register(&mdio_bus_class); + if (rc) + return rc; + + rc = bus_register(&mdio_bus_type); + if (rc) + goto err_class; + rtnl_lock(); ethtool_set_ethtool_phy_ops(&phy_ethtool_phy_ops); phylib_register_stubs(); @@ -3941,6 +3949,9 @@ err_ethtool_phy_ops: phylib_unregister_stubs(); ethtool_set_ethtool_phy_ops(NULL); rtnl_unlock(); + bus_unregister(&mdio_bus_type); +err_class: + class_unregister(&mdio_bus_class); return rc; } @@ -3953,6 +3964,8 @@ static void __exit phy_exit(void) phylib_unregister_stubs(); ethtool_set_ethtool_phy_ops(NULL); rtnl_unlock(); + bus_unregister(&mdio_bus_type); + class_unregister(&mdio_bus_class); } subsys_initcall(phy_init); From c4399af5e55658e832779b256d8458323011f983 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 9 Mar 2026 18:06:00 +0100 Subject: [PATCH 0521/1561] net: phy: move remaining provider code to mdio_bus_provider.c This moves definition of mdio_bus class and bus_type to the provider side, what allows to make them private to libphy. As a prerequisite MDIO statistics handling is moved to the provider side as well. Note: This patch causes a checkpatch error "Macros with complex values should be enclosed in parentheses" for MDIO_BUS_STATS_ADDR_ATTR_GROUP. I consider this a false positive here, in addition the patch just moves existing code. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/47b85676-b349-4aa0-a5ef-cd37769a4c69@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mdio_bus.c | 282 ---------------------------- drivers/net/phy/mdio_bus_provider.c | 275 +++++++++++++++++++++++++++ drivers/net/phy/phylib-internal.h | 3 + include/linux/phy.h | 3 - 4 files changed, 278 insertions(+), 285 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 9fb473326027..00d0e4159e9b 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -10,20 +10,14 @@ #include #include -#include #include -#include #include #include #include #include #include #include -#include -#include -#include #include -#include #include #include #include @@ -64,218 +58,6 @@ bool mdiobus_is_registered_device(struct mii_bus *bus, int addr) } EXPORT_SYMBOL(mdiobus_is_registered_device); -/** - * mdiobus_release - mii_bus device release callback - * @d: the target struct device that contains the mii_bus - * - * Description: called when the last reference to an mii_bus is - * dropped, to free the underlying memory. - */ -static void mdiobus_release(struct device *d) -{ - struct mii_bus *bus = to_mii_bus(d); - - WARN(bus->state != MDIOBUS_RELEASED && - /* for compatibility with error handling in drivers */ - bus->state != MDIOBUS_ALLOCATED, - "%s: not in RELEASED or ALLOCATED state\n", - bus->id); - - if (bus->state == MDIOBUS_RELEASED) - fwnode_handle_put(dev_fwnode(d)); - - kfree(bus); -} - -struct mdio_bus_stat_attr { - struct device_attribute attr; - int address; - unsigned int field_offset; -}; - -static struct mdio_bus_stat_attr *to_sattr(struct device_attribute *attr) -{ - return container_of(attr, struct mdio_bus_stat_attr, attr); -} - -static u64 mdio_bus_get_stat(struct mdio_bus_stats *s, unsigned int offset) -{ - const u64_stats_t *stats = (const void *)s + offset; - unsigned int start; - u64 val = 0; - - do { - start = u64_stats_fetch_begin(&s->syncp); - val = u64_stats_read(stats); - } while (u64_stats_fetch_retry(&s->syncp, start)); - - return val; -} - -static ssize_t mdio_bus_stat_field_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct mdio_bus_stat_attr *sattr = to_sattr(attr); - struct mii_bus *bus = to_mii_bus(dev); - u64 val = 0; - - if (sattr->address < 0) { - /* get global stats */ - for (int i = 0; i < PHY_MAX_ADDR; i++) - val += mdio_bus_get_stat(&bus->stats[i], - sattr->field_offset); - } else { - val = mdio_bus_get_stat(&bus->stats[sattr->address], - sattr->field_offset); - } - - return sysfs_emit(buf, "%llu\n", val); -} - -static ssize_t mdio_bus_device_stat_field_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct mdio_bus_stat_attr *sattr = to_sattr(attr); - struct mdio_device *mdiodev = to_mdio_device(dev); - struct mii_bus *bus = mdiodev->bus; - int addr = mdiodev->addr; - u64 val; - - val = mdio_bus_get_stat(&bus->stats[addr], sattr->field_offset); - - return sysfs_emit(buf, "%llu\n", val); -} - -#define MDIO_BUS_STATS_ATTR(field) \ -static const struct mdio_bus_stat_attr dev_attr_mdio_bus_##field = { \ - .attr = __ATTR(field, 0444, mdio_bus_stat_field_show, NULL), \ - .address = -1, \ - .field_offset = offsetof(struct mdio_bus_stats, field), \ -}; \ -static const struct mdio_bus_stat_attr dev_attr_mdio_bus_device_##field = { \ - .attr = __ATTR(field, 0444, mdio_bus_device_stat_field_show, NULL), \ - .field_offset = offsetof(struct mdio_bus_stats, field), \ -} - -MDIO_BUS_STATS_ATTR(transfers); -MDIO_BUS_STATS_ATTR(errors); -MDIO_BUS_STATS_ATTR(writes); -MDIO_BUS_STATS_ATTR(reads); - -#define MDIO_BUS_STATS_ADDR_ATTR_DECL(field, addr, file) \ -static const struct mdio_bus_stat_attr \ -dev_attr_mdio_bus_addr_##field##_##addr = { \ - .attr = { .attr = { .name = file, .mode = 0444 }, \ - .show = mdio_bus_stat_field_show, \ - }, \ - .address = addr, \ - .field_offset = offsetof(struct mdio_bus_stats, field), \ -} - -#define MDIO_BUS_STATS_ADDR_ATTR(field, addr) \ - MDIO_BUS_STATS_ADDR_ATTR_DECL(field, addr, \ - __stringify(field) "_" __stringify(addr)) - -#define MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(addr) \ - MDIO_BUS_STATS_ADDR_ATTR(transfers, addr); \ - MDIO_BUS_STATS_ADDR_ATTR(errors, addr); \ - MDIO_BUS_STATS_ADDR_ATTR(writes, addr); \ - MDIO_BUS_STATS_ADDR_ATTR(reads, addr) \ - -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(0); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(1); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(2); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(3); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(4); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(5); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(6); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(7); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(8); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(9); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(10); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(11); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(12); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(13); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(14); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(15); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(16); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(17); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(18); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(19); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(20); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(21); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(22); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(23); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(24); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(25); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(26); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(27); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(28); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(29); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(30); -MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(31); - -#define MDIO_BUS_STATS_ADDR_ATTR_GROUP(addr) \ - &dev_attr_mdio_bus_addr_transfers_##addr.attr.attr, \ - &dev_attr_mdio_bus_addr_errors_##addr.attr.attr, \ - &dev_attr_mdio_bus_addr_writes_##addr.attr.attr, \ - &dev_attr_mdio_bus_addr_reads_##addr.attr.attr \ - -static const struct attribute *const mdio_bus_statistics_attrs[] = { - &dev_attr_mdio_bus_transfers.attr.attr, - &dev_attr_mdio_bus_errors.attr.attr, - &dev_attr_mdio_bus_writes.attr.attr, - &dev_attr_mdio_bus_reads.attr.attr, - MDIO_BUS_STATS_ADDR_ATTR_GROUP(0), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(1), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(2), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(3), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(4), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(5), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(6), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(7), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(8), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(9), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(10), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(11), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(12), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(13), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(14), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(15), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(16), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(17), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(18), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(19), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(20), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(21), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(22), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(23), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(24), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(25), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(26), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(27), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(28), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(29), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(30), - MDIO_BUS_STATS_ADDR_ATTR_GROUP(31), - NULL, -}; - -static const struct attribute_group mdio_bus_statistics_group = { - .name = "statistics", - .attrs_const = mdio_bus_statistics_attrs, -}; -__ATTRIBUTE_GROUPS(mdio_bus_statistics); - -const struct class mdio_bus_class = { - .name = "mdio_bus", - .dev_release = mdiobus_release, - .dev_groups = mdio_bus_statistics_groups, -}; -EXPORT_SYMBOL_GPL(mdio_bus_class); - static void mdiobus_stats_acct(struct mdio_bus_stats *stats, bool op, int ret) { preempt_disable(); @@ -841,69 +623,5 @@ int mdiobus_c45_modify_changed(struct mii_bus *bus, int addr, int devad, } EXPORT_SYMBOL_GPL(mdiobus_c45_modify_changed); -/** - * mdio_bus_match - determine if given MDIO driver supports the given - * MDIO device - * @dev: target MDIO device - * @drv: given MDIO driver - * - * Return: 1 if the driver supports the device, 0 otherwise - * - * Description: This may require calling the devices own match function, - * since different classes of MDIO devices have different match criteria. - */ -static int mdio_bus_match(struct device *dev, const struct device_driver *drv) -{ - const struct mdio_driver *mdiodrv = to_mdio_driver(drv); - struct mdio_device *mdio = to_mdio_device(dev); - - /* Both the driver and device must type-match */ - if (!(mdiodrv->mdiodrv.flags & MDIO_DEVICE_IS_PHY) != - !(mdio->flags & MDIO_DEVICE_FLAG_PHY)) - return 0; - - if (of_driver_match_device(dev, drv)) - return 1; - - if (mdio->bus_match) - return mdio->bus_match(dev, drv); - - return 0; -} - -static int mdio_uevent(const struct device *dev, struct kobj_uevent_env *env) -{ - int rc; - - /* Some devices have extra OF data and an OF-style MODALIAS */ - rc = of_device_uevent_modalias(dev, env); - if (rc != -ENODEV) - return rc; - - return 0; -} - -static const struct attribute *const mdio_bus_device_statistics_attrs[] = { - &dev_attr_mdio_bus_device_transfers.attr.attr, - &dev_attr_mdio_bus_device_errors.attr.attr, - &dev_attr_mdio_bus_device_writes.attr.attr, - &dev_attr_mdio_bus_device_reads.attr.attr, - NULL, -}; - -static const struct attribute_group mdio_bus_device_statistics_group = { - .name = "statistics", - .attrs_const = mdio_bus_device_statistics_attrs, -}; -__ATTRIBUTE_GROUPS(mdio_bus_device_statistics); - -const struct bus_type mdio_bus_type = { - .name = "mdio_bus", - .dev_groups = mdio_bus_device_statistics_groups, - .match = mdio_bus_match, - .uevent = mdio_uevent, -}; -EXPORT_SYMBOL(mdio_bus_type); - MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("MDIO bus/device layer"); diff --git a/drivers/net/phy/mdio_bus_provider.c b/drivers/net/phy/mdio_bus_provider.c index d50fe6eb4b02..041576eba47a 100644 --- a/drivers/net/phy/mdio_bus_provider.c +++ b/drivers/net/phy/mdio_bus_provider.c @@ -28,6 +28,281 @@ #include #include #include +#include "phylib-internal.h" + +/** + * mdiobus_release - mii_bus device release callback + * @d: the target struct device that contains the mii_bus + * + * Description: called when the last reference to an mii_bus is + * dropped, to free the underlying memory. + */ +static void mdiobus_release(struct device *d) +{ + struct mii_bus *bus = to_mii_bus(d); + + WARN(bus->state != MDIOBUS_RELEASED && + /* for compatibility with error handling in drivers */ + bus->state != MDIOBUS_ALLOCATED, + "%s: not in RELEASED or ALLOCATED state\n", + bus->id); + + if (bus->state == MDIOBUS_RELEASED) + fwnode_handle_put(dev_fwnode(d)); + + kfree(bus); +} + +struct mdio_bus_stat_attr { + struct device_attribute attr; + int address; + unsigned int field_offset; +}; + +static struct mdio_bus_stat_attr *to_sattr(struct device_attribute *attr) +{ + return container_of(attr, struct mdio_bus_stat_attr, attr); +} + +static u64 mdio_bus_get_stat(struct mdio_bus_stats *s, unsigned int offset) +{ + const u64_stats_t *stats = (const void *)s + offset; + unsigned int start; + u64 val = 0; + + do { + start = u64_stats_fetch_begin(&s->syncp); + val = u64_stats_read(stats); + } while (u64_stats_fetch_retry(&s->syncp, start)); + + return val; +} + +static ssize_t mdio_bus_stat_field_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mdio_bus_stat_attr *sattr = to_sattr(attr); + struct mii_bus *bus = to_mii_bus(dev); + u64 val = 0; + + if (sattr->address < 0) { + /* get global stats */ + for (int i = 0; i < PHY_MAX_ADDR; i++) + val += mdio_bus_get_stat(&bus->stats[i], + sattr->field_offset); + } else { + val = mdio_bus_get_stat(&bus->stats[sattr->address], + sattr->field_offset); + } + + return sysfs_emit(buf, "%llu\n", val); +} + +static ssize_t mdio_bus_device_stat_field_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mdio_bus_stat_attr *sattr = to_sattr(attr); + struct mdio_device *mdiodev = to_mdio_device(dev); + struct mii_bus *bus = mdiodev->bus; + int addr = mdiodev->addr; + u64 val; + + val = mdio_bus_get_stat(&bus->stats[addr], sattr->field_offset); + + return sysfs_emit(buf, "%llu\n", val); +} + +#define MDIO_BUS_STATS_ATTR(field) \ +static const struct mdio_bus_stat_attr dev_attr_mdio_bus_##field = { \ + .attr = __ATTR(field, 0444, mdio_bus_stat_field_show, NULL), \ + .address = -1, \ + .field_offset = offsetof(struct mdio_bus_stats, field), \ +}; \ +static const struct mdio_bus_stat_attr dev_attr_mdio_bus_device_##field = { \ + .attr = __ATTR(field, 0444, mdio_bus_device_stat_field_show, NULL), \ + .field_offset = offsetof(struct mdio_bus_stats, field), \ +} + +MDIO_BUS_STATS_ATTR(transfers); +MDIO_BUS_STATS_ATTR(errors); +MDIO_BUS_STATS_ATTR(writes); +MDIO_BUS_STATS_ATTR(reads); + +#define MDIO_BUS_STATS_ADDR_ATTR_DECL(field, addr, file) \ +static const struct mdio_bus_stat_attr \ +dev_attr_mdio_bus_addr_##field##_##addr = { \ + .attr = { .attr = { .name = file, .mode = 0444 }, \ + .show = mdio_bus_stat_field_show, \ + }, \ + .address = addr, \ + .field_offset = offsetof(struct mdio_bus_stats, field), \ +} + +#define MDIO_BUS_STATS_ADDR_ATTR(field, addr) \ + MDIO_BUS_STATS_ADDR_ATTR_DECL(field, addr, \ + __stringify(field) "_" __stringify(addr)) + +#define MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(addr) \ + MDIO_BUS_STATS_ADDR_ATTR(transfers, addr); \ + MDIO_BUS_STATS_ADDR_ATTR(errors, addr); \ + MDIO_BUS_STATS_ADDR_ATTR(writes, addr); \ + MDIO_BUS_STATS_ADDR_ATTR(reads, addr) \ + +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(0); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(1); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(2); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(3); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(4); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(5); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(6); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(7); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(8); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(9); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(10); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(11); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(12); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(13); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(14); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(15); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(16); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(17); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(18); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(19); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(20); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(21); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(22); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(23); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(24); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(25); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(26); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(27); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(28); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(29); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(30); +MDIO_BUS_STATS_ADDR_ATTR_GROUP_DECL(31); + +#define MDIO_BUS_STATS_ADDR_ATTR_GROUP(addr) \ + &(dev_attr_mdio_bus_addr_transfers_##addr).attr.attr, \ + &(dev_attr_mdio_bus_addr_errors_##addr).attr.attr, \ + &(dev_attr_mdio_bus_addr_writes_##addr).attr.attr, \ + &(dev_attr_mdio_bus_addr_reads_##addr).attr.attr \ + +static const struct attribute *const mdio_bus_statistics_attrs[] = { + &dev_attr_mdio_bus_transfers.attr.attr, + &dev_attr_mdio_bus_errors.attr.attr, + &dev_attr_mdio_bus_writes.attr.attr, + &dev_attr_mdio_bus_reads.attr.attr, + MDIO_BUS_STATS_ADDR_ATTR_GROUP(0), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(1), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(2), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(3), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(4), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(5), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(6), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(7), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(8), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(9), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(10), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(11), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(12), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(13), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(14), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(15), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(16), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(17), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(18), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(19), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(20), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(21), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(22), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(23), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(24), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(25), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(26), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(27), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(28), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(29), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(30), + MDIO_BUS_STATS_ADDR_ATTR_GROUP(31), + NULL, +}; + +static const struct attribute_group mdio_bus_statistics_group = { + .name = "statistics", + .attrs_const = mdio_bus_statistics_attrs, +}; +__ATTRIBUTE_GROUPS(mdio_bus_statistics); + +const struct class mdio_bus_class = { + .name = "mdio_bus", + .dev_release = mdiobus_release, + .dev_groups = mdio_bus_statistics_groups, +}; + +/** + * mdio_bus_match - determine if given MDIO driver supports the given + * MDIO device + * @dev: target MDIO device + * @drv: given MDIO driver + * + * Return: 1 if the driver supports the device, 0 otherwise + * + * Description: This may require calling the devices own match function, + * since different classes of MDIO devices have different match criteria. + */ +static int mdio_bus_match(struct device *dev, const struct device_driver *drv) +{ + const struct mdio_driver *mdiodrv = to_mdio_driver(drv); + struct mdio_device *mdio = to_mdio_device(dev); + + /* Both the driver and device must type-match */ + if (!(mdiodrv->mdiodrv.flags & MDIO_DEVICE_IS_PHY) != + !(mdio->flags & MDIO_DEVICE_FLAG_PHY)) + return 0; + + if (of_driver_match_device(dev, drv)) + return 1; + + if (mdio->bus_match) + return mdio->bus_match(dev, drv); + + return 0; +} + +static int mdio_uevent(const struct device *dev, struct kobj_uevent_env *env) +{ + int rc; + + /* Some devices have extra OF data and an OF-style MODALIAS */ + rc = of_device_uevent_modalias(dev, env); + if (rc != -ENODEV) + return rc; + + return 0; +} + +static const struct attribute *const mdio_bus_device_statistics_attrs[] = { + &dev_attr_mdio_bus_device_transfers.attr.attr, + &dev_attr_mdio_bus_device_errors.attr.attr, + &dev_attr_mdio_bus_device_writes.attr.attr, + &dev_attr_mdio_bus_device_reads.attr.attr, + NULL, +}; + +static const struct attribute_group mdio_bus_device_statistics_group = { + .name = "statistics", + .attrs_const = mdio_bus_device_statistics_attrs, +}; +__ATTRIBUTE_GROUPS(mdio_bus_device_statistics); + +const struct bus_type mdio_bus_type = { + .name = "mdio_bus", + .dev_groups = mdio_bus_device_statistics_groups, + .match = mdio_bus_match, + .uevent = mdio_uevent, +}; /** * mdiobus_alloc_size - allocate a mii_bus structure diff --git a/drivers/net/phy/phylib-internal.h b/drivers/net/phy/phylib-internal.h index bfb1aa823868..664ed7faa518 100644 --- a/drivers/net/phy/phylib-internal.h +++ b/drivers/net/phy/phylib-internal.h @@ -9,6 +9,9 @@ struct mdio_device; struct phy_device; +extern const struct bus_type mdio_bus_type; +extern const struct class mdio_bus_class; + /* * phy_supported_speeds - return all speeds currently supported by a PHY device */ diff --git a/include/linux/phy.h b/include/linux/phy.h index e9b0d7427b0e..5de4b172cd0b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -2446,9 +2446,6 @@ int __phy_hwtstamp_set(struct phy_device *phydev, struct phy_port *phy_get_sfp_port(struct phy_device *phydev); -extern const struct bus_type mdio_bus_type; -extern const struct class mdio_bus_class; - /** * phy_module_driver() - Helper macro for registering PHY drivers * @__phy_drivers: array of PHY drivers to register From 68deca0f0f4bba8c5278340c7c142500171d5f9b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 12 Mar 2026 11:03:55 +0100 Subject: [PATCH 0522/1561] devlink: expose devlink instance index over netlink Each devlink instance has an internally assigned index used for xarray storage. Expose it as a new DEVLINK_ATTR_INDEX uint attribute alongside the existing bus_name and dev_name handle. Signed-off-by: Jiri Pirko Link: https://patch.msgid.link/20260312100407.551173-2-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/devlink.yaml | 5 +++++ include/uapi/linux/devlink.h | 2 ++ net/devlink/devl_internal.h | 2 ++ net/devlink/port.c | 1 + 4 files changed, 10 insertions(+) diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index 837112da6738..1bed67a0eefb 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -867,6 +867,10 @@ attribute-sets: type: flag doc: Request restoring parameter to its default value. value: 183 + - + name: index + type: uint + doc: Unique devlink instance index. - name: dl-dev-stats subset-of: devlink @@ -1311,6 +1315,7 @@ operations: attributes: - bus-name - dev-name + - index - reload-failed - dev-stats dump: diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index e7d6b6d13470..1ba3436db4ae 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -642,6 +642,8 @@ enum devlink_attr { DEVLINK_ATTR_PARAM_VALUE_DEFAULT, /* dynamic */ DEVLINK_ATTR_PARAM_RESET_DEFAULT, /* flag */ + DEVLINK_ATTR_INDEX, /* uint */ + /* Add new attributes above here, update the spec in * Documentation/netlink/specs/devlink.yaml and re-generate * net/devlink/netlink_gen.c. diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index 1377864383bc..31fa98af418e 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -178,6 +178,8 @@ devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink) return -EMSGSIZE; if (nla_put_string(msg, DEVLINK_ATTR_DEV_NAME, dev_name(devlink->dev))) return -EMSGSIZE; + if (nla_put_uint(msg, DEVLINK_ATTR_INDEX, devlink->index)) + return -EMSGSIZE; return 0; } diff --git a/net/devlink/port.c b/net/devlink/port.c index 93d8a25bb920..1ff609571ea4 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -222,6 +222,7 @@ size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port) return nla_total_size(strlen(devlink->dev->bus->name) + 1) /* DEVLINK_ATTR_BUS_NAME */ + nla_total_size(strlen(dev_name(devlink->dev)) + 1) /* DEVLINK_ATTR_DEV_NAME */ + + nla_total_size(8) /* DEVLINK_ATTR_INDEX */ + nla_total_size(4); /* DEVLINK_ATTR_PORT_INDEX */ } From 0f5531879afbf904f19a15b39f687a9ec47a82cc Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 12 Mar 2026 11:03:56 +0100 Subject: [PATCH 0523/1561] devlink: add helpers to get bus_name/dev_name Introduce devlink_bus_name() and devlink_dev_name() helpers and convert all direct accesses to devlink->dev->bus->name and dev_name(devlink->dev) to use them. This prepares for dev-less devlink instances where these helpers will be extended to handle the missing device. Signed-off-by: Jiri Pirko Link: https://patch.msgid.link/20260312100407.551173-3-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 2 ++ include/trace/events/devlink.h | 24 ++++++++++++------------ net/devlink/core.c | 12 ++++++++++++ net/devlink/devl_internal.h | 8 ++++---- net/devlink/netlink.c | 4 ++-- net/devlink/port.c | 4 ++-- 6 files changed, 34 insertions(+), 20 deletions(-) diff --git a/include/net/devlink.h b/include/net/devlink.h index cb839e0435a1..0afb0958b910 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1611,6 +1611,8 @@ struct devlink_ops { void *devlink_priv(struct devlink *devlink); struct devlink *priv_to_devlink(void *priv); struct device *devlink_to_dev(const struct devlink *devlink); +const char *devlink_bus_name(const struct devlink *devlink); +const char *devlink_dev_name(const struct devlink *devlink); /* Devlink instance explicit locking */ void devl_lock(struct devlink *devlink); diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h index f241e204fe6b..32304ce9ad15 100644 --- a/include/trace/events/devlink.h +++ b/include/trace/events/devlink.h @@ -21,8 +21,8 @@ TRACE_EVENT(devlink_hwmsg, TP_ARGS(devlink, incoming, type, buf, len), TP_STRUCT__entry( - __string(bus_name, devlink_to_dev(devlink)->bus->name) - __string(dev_name, dev_name(devlink_to_dev(devlink))) + __string(bus_name, devlink_bus_name(devlink)) + __string(dev_name, devlink_dev_name(devlink)) __string(driver_name, devlink_to_dev(devlink)->driver->name) __field(bool, incoming) __field(unsigned long, type) @@ -55,8 +55,8 @@ TRACE_EVENT(devlink_hwerr, TP_ARGS(devlink, err, msg), TP_STRUCT__entry( - __string(bus_name, devlink_to_dev(devlink)->bus->name) - __string(dev_name, dev_name(devlink_to_dev(devlink))) + __string(bus_name, devlink_bus_name(devlink)) + __string(dev_name, devlink_dev_name(devlink)) __string(driver_name, devlink_to_dev(devlink)->driver->name) __field(int, err) __string(msg, msg) @@ -85,8 +85,8 @@ TRACE_EVENT(devlink_health_report, TP_ARGS(devlink, reporter_name, msg), TP_STRUCT__entry( - __string(bus_name, devlink_to_dev(devlink)->bus->name) - __string(dev_name, dev_name(devlink_to_dev(devlink))) + __string(bus_name, devlink_bus_name(devlink)) + __string(dev_name, devlink_dev_name(devlink)) __string(driver_name, devlink_to_dev(devlink)->driver->name) __string(reporter_name, reporter_name) __string(msg, msg) @@ -116,8 +116,8 @@ TRACE_EVENT(devlink_health_recover_aborted, TP_ARGS(devlink, reporter_name, health_state, time_since_last_recover), TP_STRUCT__entry( - __string(bus_name, devlink_to_dev(devlink)->bus->name) - __string(dev_name, dev_name(devlink_to_dev(devlink))) + __string(bus_name, devlink_bus_name(devlink)) + __string(dev_name, devlink_dev_name(devlink)) __string(driver_name, devlink_to_dev(devlink)->driver->name) __string(reporter_name, reporter_name) __field(bool, health_state) @@ -150,8 +150,8 @@ TRACE_EVENT(devlink_health_reporter_state_update, TP_ARGS(devlink, reporter_name, new_state), TP_STRUCT__entry( - __string(bus_name, devlink_to_dev(devlink)->bus->name) - __string(dev_name, dev_name(devlink_to_dev(devlink))) + __string(bus_name, devlink_bus_name(devlink)) + __string(dev_name, devlink_dev_name(devlink)) __string(driver_name, devlink_to_dev(devlink)->driver->name) __string(reporter_name, reporter_name) __field(u8, new_state) @@ -181,8 +181,8 @@ TRACE_EVENT(devlink_trap_report, TP_ARGS(devlink, skb, metadata), TP_STRUCT__entry( - __string(bus_name, devlink_to_dev(devlink)->bus->name) - __string(dev_name, dev_name(devlink_to_dev(devlink))) + __string(bus_name, devlink_bus_name(devlink)) + __string(dev_name, devlink_dev_name(devlink)) __string(driver_name, devlink_to_dev(devlink)->driver->name) __string(trap_name, metadata->trap_name) __string(trap_group_name, metadata->trap_group_name) diff --git a/net/devlink/core.c b/net/devlink/core.c index d8e509a669bf..63709c132a7c 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -248,6 +248,18 @@ struct device *devlink_to_dev(const struct devlink *devlink) } EXPORT_SYMBOL_GPL(devlink_to_dev); +const char *devlink_bus_name(const struct devlink *devlink) +{ + return devlink->dev->bus->name; +} +EXPORT_SYMBOL_GPL(devlink_bus_name); + +const char *devlink_dev_name(const struct devlink *devlink) +{ + return dev_name(devlink->dev); +} +EXPORT_SYMBOL_GPL(devlink_dev_name); + struct net *devlink_net(const struct devlink *devlink) { return read_pnet(&devlink->_net); diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index 31fa98af418e..1b770de0313e 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -174,9 +174,9 @@ devlink_dump_state(struct netlink_callback *cb) static inline int devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink) { - if (nla_put_string(msg, DEVLINK_ATTR_BUS_NAME, devlink->dev->bus->name)) + if (nla_put_string(msg, DEVLINK_ATTR_BUS_NAME, devlink_bus_name(devlink))) return -EMSGSIZE; - if (nla_put_string(msg, DEVLINK_ATTR_DEV_NAME, dev_name(devlink->dev))) + if (nla_put_string(msg, DEVLINK_ATTR_DEV_NAME, devlink_dev_name(devlink))) return -EMSGSIZE; if (nla_put_uint(msg, DEVLINK_ATTR_INDEX, devlink->index)) return -EMSGSIZE; @@ -211,8 +211,8 @@ static inline void devlink_nl_obj_desc_init(struct devlink_obj_desc *desc, struct devlink *devlink) { memset(desc, 0, sizeof(*desc)); - desc->bus_name = devlink->dev->bus->name; - desc->dev_name = dev_name(devlink->dev); + desc->bus_name = devlink_bus_name(devlink); + desc->dev_name = devlink_dev_name(devlink); } static inline void devlink_nl_obj_desc_port_set(struct devlink_obj_desc *desc, diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c index 593605c1b1ef..56817b85a3f9 100644 --- a/net/devlink/netlink.c +++ b/net/devlink/netlink.c @@ -193,8 +193,8 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs, devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]); devlinks_xa_for_each_registered_get(net, index, devlink) { - if (strcmp(devlink->dev->bus->name, busname) == 0 && - strcmp(dev_name(devlink->dev), devname) == 0) { + if (strcmp(devlink_bus_name(devlink), busname) == 0 && + strcmp(devlink_dev_name(devlink), devname) == 0) { devl_dev_lock(devlink, dev_lock); if (devl_is_registered(devlink)) return devlink; diff --git a/net/devlink/port.c b/net/devlink/port.c index 1ff609571ea4..fa3e1597711b 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -220,8 +220,8 @@ size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port) { struct devlink *devlink = devlink_port->devlink; - return nla_total_size(strlen(devlink->dev->bus->name) + 1) /* DEVLINK_ATTR_BUS_NAME */ - + nla_total_size(strlen(dev_name(devlink->dev)) + 1) /* DEVLINK_ATTR_DEV_NAME */ + return nla_total_size(strlen(devlink_bus_name(devlink)) + 1) /* DEVLINK_ATTR_BUS_NAME */ + + nla_total_size(strlen(devlink_dev_name(devlink)) + 1) /* DEVLINK_ATTR_DEV_NAME */ + nla_total_size(8) /* DEVLINK_ATTR_INDEX */ + nla_total_size(4); /* DEVLINK_ATTR_PORT_INDEX */ } From e2e3666fd3609bfc03c42bd8544be8cbd080d24d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 12 Mar 2026 11:03:57 +0100 Subject: [PATCH 0524/1561] devlink: avoid extra iterations when found devlink is not registered Since the one found is not registered, very unlikely another one with the same bus_name/dev_name is going to be found. Stop right away and prepare common "found" path for the follow-up patch. Signed-off-by: Jiri Pirko Link: https://patch.msgid.link/20260312100407.551173-4-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- net/devlink/netlink.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c index 56817b85a3f9..7b205f677b7a 100644 --- a/net/devlink/netlink.c +++ b/net/devlink/netlink.c @@ -194,16 +194,20 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs, devlinks_xa_for_each_registered_get(net, index, devlink) { if (strcmp(devlink_bus_name(devlink), busname) == 0 && - strcmp(devlink_dev_name(devlink), devname) == 0) { - devl_dev_lock(devlink, dev_lock); - if (devl_is_registered(devlink)) - return devlink; - devl_dev_unlock(devlink, dev_lock); - } + strcmp(devlink_dev_name(devlink), devname) == 0) + goto found; devlink_put(devlink); } return ERR_PTR(-ENODEV); + +found: + devl_dev_lock(devlink, dev_lock); + if (devl_is_registered(devlink)) + return devlink; + devl_dev_unlock(devlink, dev_lock); + devlink_put(devlink); + return ERR_PTR(-ENODEV); } static int __devlink_nl_pre_doit(struct sk_buff *skb, struct genl_info *info, From d85a8af57da871964c60eb5b9ab121a6c31e3bd3 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 12 Mar 2026 11:03:58 +0100 Subject: [PATCH 0525/1561] devlink: allow to use devlink index as a command handle Currently devlink instances are addressed bus_name/dev_name tuple. Allow the newly introduced DEVLINK_ATTR_INDEX to be used as an alternative handle for all devlink commands. When DEVLINK_ATTR_INDEX is present in the request, use it for a direct xarray lookup instead of iterating over all instances comparing bus_name/dev_name strings. Signed-off-by: Jiri Pirko Link: https://patch.msgid.link/20260312100407.551173-5-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/devlink.yaml | 53 ++++ net/devlink/core.c | 16 +- net/devlink/devl_internal.h | 1 + net/devlink/netlink.c | 14 +- net/devlink/netlink_gen.c | 355 ++++++++++++++--------- 5 files changed, 296 insertions(+), 143 deletions(-) diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index 1bed67a0eefb..b495d56b9137 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -871,6 +871,8 @@ attribute-sets: name: index type: uint doc: Unique devlink instance index. + checks: + max: u32-max - name: dl-dev-stats subset-of: devlink @@ -1310,6 +1312,7 @@ operations: attributes: &dev-id-attrs - bus-name - dev-name + - index reply: &get-reply value: 3 attributes: @@ -1334,6 +1337,7 @@ operations: attributes: &port-id-attrs - bus-name - dev-name + - index - port-index reply: value: 7 @@ -1358,6 +1362,7 @@ operations: attributes: - bus-name - dev-name + - index - port-index - port-type - port-function @@ -1375,6 +1380,7 @@ operations: attributes: - bus-name - dev-name + - index - port-index - port-flavour - port-pci-pf-number @@ -1409,6 +1415,7 @@ operations: attributes: - bus-name - dev-name + - index - port-index - port-split-count @@ -1437,6 +1444,7 @@ operations: attributes: &sb-id-attrs - bus-name - dev-name + - index - sb-index reply: &sb-get-reply value: 13 @@ -1459,6 +1467,7 @@ operations: attributes: &sb-pool-id-attrs - bus-name - dev-name + - index - sb-index - sb-pool-index reply: &sb-pool-get-reply @@ -1482,6 +1491,7 @@ operations: attributes: - bus-name - dev-name + - index - sb-index - sb-pool-index - sb-pool-threshold-type @@ -1500,6 +1510,7 @@ operations: attributes: &sb-port-pool-id-attrs - bus-name - dev-name + - index - port-index - sb-index - sb-pool-index @@ -1524,6 +1535,7 @@ operations: attributes: - bus-name - dev-name + - index - port-index - sb-index - sb-pool-index @@ -1542,6 +1554,7 @@ operations: attributes: &sb-tc-pool-bind-id-attrs - bus-name - dev-name + - index - port-index - sb-index - sb-pool-type @@ -1567,6 +1580,7 @@ operations: attributes: - bus-name - dev-name + - index - port-index - sb-index - sb-pool-index @@ -1588,6 +1602,7 @@ operations: attributes: - bus-name - dev-name + - index - sb-index - @@ -1603,6 +1618,7 @@ operations: attributes: - bus-name - dev-name + - index - sb-index - @@ -1621,6 +1637,7 @@ operations: attributes: &eswitch-attrs - bus-name - dev-name + - index - eswitch-mode - eswitch-inline-mode - eswitch-encap-mode @@ -1649,12 +1666,14 @@ operations: attributes: - bus-name - dev-name + - index - dpipe-table-name reply: value: 31 attributes: - bus-name - dev-name + - index - dpipe-tables - @@ -1669,11 +1688,13 @@ operations: attributes: - bus-name - dev-name + - index - dpipe-table-name reply: attributes: - bus-name - dev-name + - index - dpipe-entries - @@ -1688,10 +1709,12 @@ operations: attributes: - bus-name - dev-name + - index reply: attributes: - bus-name - dev-name + - index - dpipe-headers - @@ -1707,6 +1730,7 @@ operations: attributes: - bus-name - dev-name + - index - dpipe-table-name - dpipe-table-counters-enabled @@ -1723,6 +1747,7 @@ operations: attributes: - bus-name - dev-name + - index - resource-id - resource-size @@ -1738,11 +1763,13 @@ operations: attributes: - bus-name - dev-name + - index reply: value: 36 attributes: - bus-name - dev-name + - index - resource-list - @@ -1758,6 +1785,7 @@ operations: attributes: - bus-name - dev-name + - index - reload-action - reload-limits - netns-pid @@ -1767,6 +1795,7 @@ operations: attributes: - bus-name - dev-name + - index - reload-actions-performed - @@ -1781,6 +1810,7 @@ operations: attributes: ¶m-id-attrs - bus-name - dev-name + - index - param-name reply: ¶m-get-reply attributes: *param-id-attrs @@ -1802,6 +1832,7 @@ operations: attributes: - bus-name - dev-name + - index - param-name - param-type # param-value-data is missing here as the type is variable @@ -1821,6 +1852,7 @@ operations: attributes: ®ion-id-attrs - bus-name - dev-name + - index - port-index - region-name reply: ®ion-get-reply @@ -1845,6 +1877,7 @@ operations: attributes: ®ion-snapshot-id-attrs - bus-name - dev-name + - index - port-index - region-name - region-snapshot-id @@ -1875,6 +1908,7 @@ operations: attributes: - bus-name - dev-name + - index - port-index - region-name - region-snapshot-id @@ -1886,6 +1920,7 @@ operations: attributes: - bus-name - dev-name + - index - port-index - region-name @@ -1935,6 +1970,7 @@ operations: attributes: - bus-name - dev-name + - index - info-driver-name - info-serial-number - info-version-fixed @@ -1956,6 +1992,7 @@ operations: attributes: &health-reporter-id-attrs - bus-name - dev-name + - index - port-index - health-reporter-name reply: &health-reporter-get-reply @@ -1978,6 +2015,7 @@ operations: attributes: - bus-name - dev-name + - index - port-index - health-reporter-name - health-reporter-graceful-period @@ -2048,6 +2086,7 @@ operations: attributes: - bus-name - dev-name + - index - flash-update-file-name - flash-update-component - flash-update-overwrite-mask @@ -2065,6 +2104,7 @@ operations: attributes: &trap-id-attrs - bus-name - dev-name + - index - trap-name reply: &trap-get-reply value: 63 @@ -2087,6 +2127,7 @@ operations: attributes: - bus-name - dev-name + - index - trap-name - trap-action @@ -2103,6 +2144,7 @@ operations: attributes: &trap-group-id-attrs - bus-name - dev-name + - index - trap-group-name reply: &trap-group-get-reply value: 67 @@ -2125,6 +2167,7 @@ operations: attributes: - bus-name - dev-name + - index - trap-group-name - trap-action - trap-policer-id @@ -2142,6 +2185,7 @@ operations: attributes: &trap-policer-id-attrs - bus-name - dev-name + - index - trap-policer-id reply: &trap-policer-get-reply value: 71 @@ -2164,6 +2208,7 @@ operations: attributes: - bus-name - dev-name + - index - trap-policer-id - trap-policer-rate - trap-policer-burst @@ -2194,6 +2239,7 @@ operations: attributes: &rate-id-attrs - bus-name - dev-name + - index - port-index - rate-node-name reply: &rate-get-reply @@ -2217,6 +2263,7 @@ operations: attributes: - bus-name - dev-name + - index - rate-node-name - rate-tx-share - rate-tx-max @@ -2238,6 +2285,7 @@ operations: attributes: - bus-name - dev-name + - index - rate-node-name - rate-tx-share - rate-tx-max @@ -2259,6 +2307,7 @@ operations: attributes: - bus-name - dev-name + - index - rate-node-name - @@ -2274,6 +2323,7 @@ operations: attributes: &linecard-id-attrs - bus-name - dev-name + - index - linecard-index reply: &linecard-get-reply value: 80 @@ -2296,6 +2346,7 @@ operations: attributes: - bus-name - dev-name + - index - linecard-index - linecard-type @@ -2329,6 +2380,7 @@ operations: attributes: - bus-name - dev-name + - index - selftests - @@ -2340,4 +2392,5 @@ operations: attributes: - bus-name - dev-name + - index - port-index diff --git a/net/devlink/core.c b/net/devlink/core.c index 63709c132a7c..237558abcd63 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -333,13 +333,15 @@ void devlink_put(struct devlink *devlink) queue_rcu_work(system_percpu_wq, &devlink->rwork); } -struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp) +static struct devlink *__devlinks_xa_find_get(struct net *net, + unsigned long *indexp, + unsigned long end) { struct devlink *devlink = NULL; rcu_read_lock(); retry: - devlink = xa_find(&devlinks, indexp, ULONG_MAX, DEVLINK_REGISTERED); + devlink = xa_find(&devlinks, indexp, end, DEVLINK_REGISTERED); if (!devlink) goto unlock; @@ -358,6 +360,16 @@ next: goto retry; } +struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp) +{ + return __devlinks_xa_find_get(net, indexp, ULONG_MAX); +} + +struct devlink *devlinks_xa_lookup_get(struct net *net, unsigned long index) +{ + return __devlinks_xa_find_get(net, &index, index); +} + /** * devl_register - Register devlink instance * @devlink: devlink diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index 1b770de0313e..395832ed4477 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -90,6 +90,7 @@ extern struct genl_family devlink_nl_family; for (index = 0; (devlink = devlinks_xa_find_get(net, &index)); index++) struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp); +struct devlink *devlinks_xa_lookup_get(struct net *net, unsigned long index); static inline bool __devl_is_registered(struct devlink *devlink) { diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c index 7b205f677b7a..9cba40285de4 100644 --- a/net/devlink/netlink.c +++ b/net/devlink/netlink.c @@ -186,6 +186,17 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs, char *busname; char *devname; + if (attrs[DEVLINK_ATTR_INDEX]) { + if (attrs[DEVLINK_ATTR_BUS_NAME] || + attrs[DEVLINK_ATTR_DEV_NAME]) + return ERR_PTR(-EINVAL); + index = nla_get_u32(attrs[DEVLINK_ATTR_INDEX]); + devlink = devlinks_xa_lookup_get(net, index); + if (!devlink) + return ERR_PTR(-ENODEV); + goto found; + } + if (!attrs[DEVLINK_ATTR_BUS_NAME] || !attrs[DEVLINK_ATTR_DEV_NAME]) return ERR_PTR(-EINVAL); @@ -356,7 +367,8 @@ int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb, int flags = NLM_F_MULTI; if (attrs && - (attrs[DEVLINK_ATTR_BUS_NAME] || attrs[DEVLINK_ATTR_DEV_NAME])) + (attrs[DEVLINK_ATTR_BUS_NAME] || attrs[DEVLINK_ATTR_DEV_NAME] || + attrs[DEVLINK_ATTR_INDEX])) return devlink_nl_inst_single_dumpit(msg, cb, flags, dump_one, attrs); else diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index f4c61c2b4f22..eb35e80e01d1 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -11,6 +11,11 @@ #include +/* Integer value ranges */ +static const struct netlink_range_validation devlink_attr_index_range = { + .max = U32_MAX, +}; + /* Sparse enums validation callbacks */ static int devlink_attr_param_type_validate(const struct nlattr *attr, @@ -56,37 +61,42 @@ const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_I }; /* DEVLINK_CMD_GET - do */ -static const struct nla_policy devlink_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_get_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_PORT_GET - do */ -static const struct nla_policy devlink_port_get_do_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { +static const struct nla_policy devlink_port_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_PORT_GET - dump */ -static const struct nla_policy devlink_port_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_port_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_PORT_SET - do */ -static const struct nla_policy devlink_port_set_nl_policy[DEVLINK_ATTR_PORT_FUNCTION + 1] = { +static const struct nla_policy devlink_port_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_PORT_TYPE] = NLA_POLICY_MAX(NLA_U16, 3), [DEVLINK_ATTR_PORT_FUNCTION] = NLA_POLICY_NESTED(devlink_dl_port_function_nl_policy), }; /* DEVLINK_CMD_PORT_NEW - do */ -static const struct nla_policy devlink_port_new_nl_policy[DEVLINK_ATTR_PORT_PCI_SF_NUMBER + 1] = { +static const struct nla_policy devlink_port_new_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_PORT_FLAVOUR] = NLA_POLICY_MAX(NLA_U16, 7), [DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16, }, @@ -95,58 +105,66 @@ static const struct nla_policy devlink_port_new_nl_policy[DEVLINK_ATTR_PORT_PCI_ }; /* DEVLINK_CMD_PORT_DEL - do */ -static const struct nla_policy devlink_port_del_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { +static const struct nla_policy devlink_port_del_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_PORT_SPLIT - do */ -static const struct nla_policy devlink_port_split_nl_policy[DEVLINK_ATTR_PORT_SPLIT_COUNT + 1] = { +static const struct nla_policy devlink_port_split_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_PORT_UNSPLIT - do */ -static const struct nla_policy devlink_port_unsplit_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { +static const struct nla_policy devlink_port_unsplit_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_SB_GET - do */ -static const struct nla_policy devlink_sb_get_do_nl_policy[DEVLINK_ATTR_SB_INDEX + 1] = { +static const struct nla_policy devlink_sb_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_SB_GET - dump */ -static const struct nla_policy devlink_sb_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_sb_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_SB_POOL_GET - do */ -static const struct nla_policy devlink_sb_pool_get_do_nl_policy[DEVLINK_ATTR_SB_POOL_INDEX + 1] = { +static const struct nla_policy devlink_sb_pool_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, }, }; /* DEVLINK_CMD_SB_POOL_GET - dump */ -static const struct nla_policy devlink_sb_pool_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_sb_pool_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_SB_POOL_SET - do */ -static const struct nla_policy devlink_sb_pool_set_nl_policy[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE + 1] = { +static const struct nla_policy devlink_sb_pool_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, }, [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = NLA_POLICY_MAX(NLA_U8, 1), @@ -154,24 +172,27 @@ static const struct nla_policy devlink_sb_pool_set_nl_policy[DEVLINK_ATTR_SB_POO }; /* DEVLINK_CMD_SB_PORT_POOL_GET - do */ -static const struct nla_policy devlink_sb_port_pool_get_do_nl_policy[DEVLINK_ATTR_SB_POOL_INDEX + 1] = { +static const struct nla_policy devlink_sb_port_pool_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, }, }; /* DEVLINK_CMD_SB_PORT_POOL_GET - dump */ -static const struct nla_policy devlink_sb_port_pool_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_sb_port_pool_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_SB_PORT_POOL_SET - do */ -static const struct nla_policy devlink_sb_port_pool_set_nl_policy[DEVLINK_ATTR_SB_THRESHOLD + 1] = { +static const struct nla_policy devlink_sb_port_pool_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, }, @@ -179,9 +200,10 @@ static const struct nla_policy devlink_sb_port_pool_set_nl_policy[DEVLINK_ATTR_S }; /* DEVLINK_CMD_SB_TC_POOL_BIND_GET - do */ -static const struct nla_policy devlink_sb_tc_pool_bind_get_do_nl_policy[DEVLINK_ATTR_SB_TC_INDEX + 1] = { +static const struct nla_policy devlink_sb_tc_pool_bind_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_SB_POOL_TYPE] = NLA_POLICY_MAX(NLA_U8, 1), @@ -189,15 +211,17 @@ static const struct nla_policy devlink_sb_tc_pool_bind_get_do_nl_policy[DEVLINK_ }; /* DEVLINK_CMD_SB_TC_POOL_BIND_GET - dump */ -static const struct nla_policy devlink_sb_tc_pool_bind_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_sb_tc_pool_bind_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_SB_TC_POOL_BIND_SET - do */ -static const struct nla_policy devlink_sb_tc_pool_bind_set_nl_policy[DEVLINK_ATTR_SB_TC_INDEX + 1] = { +static const struct nla_policy devlink_sb_tc_pool_bind_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, }, @@ -207,80 +231,91 @@ static const struct nla_policy devlink_sb_tc_pool_bind_set_nl_policy[DEVLINK_ATT }; /* DEVLINK_CMD_SB_OCC_SNAPSHOT - do */ -static const struct nla_policy devlink_sb_occ_snapshot_nl_policy[DEVLINK_ATTR_SB_INDEX + 1] = { +static const struct nla_policy devlink_sb_occ_snapshot_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_SB_OCC_MAX_CLEAR - do */ -static const struct nla_policy devlink_sb_occ_max_clear_nl_policy[DEVLINK_ATTR_SB_INDEX + 1] = { +static const struct nla_policy devlink_sb_occ_max_clear_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_ESWITCH_GET - do */ -static const struct nla_policy devlink_eswitch_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_eswitch_get_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_ESWITCH_SET - do */ -static const struct nla_policy devlink_eswitch_set_nl_policy[DEVLINK_ATTR_ESWITCH_ENCAP_MODE + 1] = { +static const struct nla_policy devlink_eswitch_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_MAX(NLA_U16, 2), [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = NLA_POLICY_MAX(NLA_U8, 3), [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = NLA_POLICY_MAX(NLA_U8, 1), }; /* DEVLINK_CMD_DPIPE_TABLE_GET - do */ -static const struct nla_policy devlink_dpipe_table_get_nl_policy[DEVLINK_ATTR_DPIPE_TABLE_NAME + 1] = { +static const struct nla_policy devlink_dpipe_table_get_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_DPIPE_ENTRIES_GET - do */ -static const struct nla_policy devlink_dpipe_entries_get_nl_policy[DEVLINK_ATTR_DPIPE_TABLE_NAME + 1] = { +static const struct nla_policy devlink_dpipe_entries_get_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_DPIPE_HEADERS_GET - do */ -static const struct nla_policy devlink_dpipe_headers_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_dpipe_headers_get_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET - do */ -static const struct nla_policy devlink_dpipe_table_counters_set_nl_policy[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED + 1] = { +static const struct nla_policy devlink_dpipe_table_counters_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8, }, }; /* DEVLINK_CMD_RESOURCE_SET - do */ -static const struct nla_policy devlink_resource_set_nl_policy[DEVLINK_ATTR_RESOURCE_SIZE + 1] = { +static const struct nla_policy devlink_resource_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_RESOURCE_ID] = { .type = NLA_U64, }, [DEVLINK_ATTR_RESOURCE_SIZE] = { .type = NLA_U64, }, }; /* DEVLINK_CMD_RESOURCE_DUMP - do */ -static const struct nla_policy devlink_resource_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_resource_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_RELOAD - do */ -static const struct nla_policy devlink_reload_nl_policy[DEVLINK_ATTR_RELOAD_LIMITS + 1] = { +static const struct nla_policy devlink_reload_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_RELOAD_ACTION] = NLA_POLICY_RANGE(NLA_U8, 1, 2), [DEVLINK_ATTR_RELOAD_LIMITS] = NLA_POLICY_BITFIELD32(6), [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32, }, @@ -289,22 +324,25 @@ static const struct nla_policy devlink_reload_nl_policy[DEVLINK_ATTR_RELOAD_LIMI }; /* DEVLINK_CMD_PARAM_GET - do */ -static const struct nla_policy devlink_param_get_do_nl_policy[DEVLINK_ATTR_PARAM_NAME + 1] = { +static const struct nla_policy devlink_param_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_PARAM_GET - dump */ -static const struct nla_policy devlink_param_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_param_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_PARAM_SET - do */ -static const struct nla_policy devlink_param_set_nl_policy[DEVLINK_ATTR_PARAM_RESET_DEFAULT + 1] = { +static const struct nla_policy devlink_param_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_PARAM_TYPE] = NLA_POLICY_VALIDATE_FN(NLA_U8, &devlink_attr_param_type_validate), [DEVLINK_ATTR_PARAM_VALUE_CMODE] = NLA_POLICY_MAX(NLA_U8, 2), @@ -312,41 +350,46 @@ static const struct nla_policy devlink_param_set_nl_policy[DEVLINK_ATTR_PARAM_RE }; /* DEVLINK_CMD_REGION_GET - do */ -static const struct nla_policy devlink_region_get_do_nl_policy[DEVLINK_ATTR_REGION_NAME + 1] = { +static const struct nla_policy devlink_region_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_REGION_GET - dump */ -static const struct nla_policy devlink_region_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_region_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_REGION_NEW - do */ -static const struct nla_policy devlink_region_new_nl_policy[DEVLINK_ATTR_REGION_SNAPSHOT_ID + 1] = { +static const struct nla_policy devlink_region_new_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_REGION_DEL - do */ -static const struct nla_policy devlink_region_del_nl_policy[DEVLINK_ATTR_REGION_SNAPSHOT_ID + 1] = { +static const struct nla_policy devlink_region_del_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_REGION_READ - dump */ -static const struct nla_policy devlink_region_read_nl_policy[DEVLINK_ATTR_REGION_DIRECT + 1] = { +static const struct nla_policy devlink_region_read_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32, }, @@ -356,44 +399,50 @@ static const struct nla_policy devlink_region_read_nl_policy[DEVLINK_ATTR_REGION }; /* DEVLINK_CMD_PORT_PARAM_GET - do */ -static const struct nla_policy devlink_port_param_get_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { +static const struct nla_policy devlink_port_param_get_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_PORT_PARAM_SET - do */ -static const struct nla_policy devlink_port_param_set_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { +static const struct nla_policy devlink_port_param_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_INFO_GET - do */ -static const struct nla_policy devlink_info_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_info_get_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_HEALTH_REPORTER_GET - do */ -static const struct nla_policy devlink_health_reporter_get_do_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = { +static const struct nla_policy devlink_health_reporter_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_HEALTH_REPORTER_GET - dump */ -static const struct nla_policy devlink_health_reporter_get_dump_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { +static const struct nla_policy devlink_health_reporter_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_HEALTH_REPORTER_SET - do */ -static const struct nla_policy devlink_health_reporter_set_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD + 1] = { +static const struct nla_policy devlink_health_reporter_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64, }, @@ -403,137 +452,155 @@ static const struct nla_policy devlink_health_reporter_set_nl_policy[DEVLINK_ATT }; /* DEVLINK_CMD_HEALTH_REPORTER_RECOVER - do */ -static const struct nla_policy devlink_health_reporter_recover_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = { +static const struct nla_policy devlink_health_reporter_recover_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE - do */ -static const struct nla_policy devlink_health_reporter_diagnose_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = { +static const struct nla_policy devlink_health_reporter_diagnose_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET - dump */ -static const struct nla_policy devlink_health_reporter_dump_get_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = { +static const struct nla_policy devlink_health_reporter_dump_get_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR - do */ -static const struct nla_policy devlink_health_reporter_dump_clear_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = { +static const struct nla_policy devlink_health_reporter_dump_clear_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_FLASH_UPDATE - do */ -static const struct nla_policy devlink_flash_update_nl_policy[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK + 1] = { +static const struct nla_policy devlink_flash_update_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK] = NLA_POLICY_BITFIELD32(3), }; /* DEVLINK_CMD_TRAP_GET - do */ -static const struct nla_policy devlink_trap_get_do_nl_policy[DEVLINK_ATTR_TRAP_NAME + 1] = { +static const struct nla_policy devlink_trap_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_TRAP_GET - dump */ -static const struct nla_policy devlink_trap_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_trap_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_TRAP_SET - do */ -static const struct nla_policy devlink_trap_set_nl_policy[DEVLINK_ATTR_TRAP_ACTION + 1] = { +static const struct nla_policy devlink_trap_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_TRAP_ACTION] = NLA_POLICY_MAX(NLA_U8, 2), }; /* DEVLINK_CMD_TRAP_GROUP_GET - do */ -static const struct nla_policy devlink_trap_group_get_do_nl_policy[DEVLINK_ATTR_TRAP_GROUP_NAME + 1] = { +static const struct nla_policy devlink_trap_group_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_TRAP_GROUP_GET - dump */ -static const struct nla_policy devlink_trap_group_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_trap_group_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_TRAP_GROUP_SET - do */ -static const struct nla_policy devlink_trap_group_set_nl_policy[DEVLINK_ATTR_TRAP_POLICER_ID + 1] = { +static const struct nla_policy devlink_trap_group_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_TRAP_ACTION] = NLA_POLICY_MAX(NLA_U8, 2), [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_TRAP_POLICER_GET - do */ -static const struct nla_policy devlink_trap_policer_get_do_nl_policy[DEVLINK_ATTR_TRAP_POLICER_ID + 1] = { +static const struct nla_policy devlink_trap_policer_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_TRAP_POLICER_GET - dump */ -static const struct nla_policy devlink_trap_policer_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_trap_policer_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_TRAP_POLICER_SET - do */ -static const struct nla_policy devlink_trap_policer_set_nl_policy[DEVLINK_ATTR_TRAP_POLICER_BURST + 1] = { +static const struct nla_policy devlink_trap_policer_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32, }, [DEVLINK_ATTR_TRAP_POLICER_RATE] = { .type = NLA_U64, }, [DEVLINK_ATTR_TRAP_POLICER_BURST] = { .type = NLA_U64, }, }; /* DEVLINK_CMD_HEALTH_REPORTER_TEST - do */ -static const struct nla_policy devlink_health_reporter_test_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = { +static const struct nla_policy devlink_health_reporter_test_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_RATE_GET - do */ -static const struct nla_policy devlink_rate_get_do_nl_policy[DEVLINK_ATTR_RATE_NODE_NAME + 1] = { +static const struct nla_policy devlink_rate_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_RATE_GET - dump */ -static const struct nla_policy devlink_rate_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_rate_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_RATE_SET - do */ -static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TC_BWS + 1] = { +static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64, }, [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64, }, @@ -544,9 +611,10 @@ static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TC_B }; /* DEVLINK_CMD_RATE_NEW - do */ -static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TC_BWS + 1] = { +static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64, }, [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64, }, @@ -557,50 +625,57 @@ static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TC_B }; /* DEVLINK_CMD_RATE_DEL - do */ -static const struct nla_policy devlink_rate_del_nl_policy[DEVLINK_ATTR_RATE_NODE_NAME + 1] = { +static const struct nla_policy devlink_rate_del_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_LINECARD_GET - do */ -static const struct nla_policy devlink_linecard_get_do_nl_policy[DEVLINK_ATTR_LINECARD_INDEX + 1] = { +static const struct nla_policy devlink_linecard_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_LINECARD_GET - dump */ -static const struct nla_policy devlink_linecard_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_linecard_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_LINECARD_SET - do */ -static const struct nla_policy devlink_linecard_set_nl_policy[DEVLINK_ATTR_LINECARD_TYPE + 1] = { +static const struct nla_policy devlink_linecard_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_SELFTESTS_GET - do */ -static const struct nla_policy devlink_selftests_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_selftests_get_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_SELFTESTS_RUN - do */ -static const struct nla_policy devlink_selftests_run_nl_policy[DEVLINK_ATTR_SELFTESTS + 1] = { +static const struct nla_policy devlink_selftests_run_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_SELFTESTS] = NLA_POLICY_NESTED(devlink_dl_selftest_id_nl_policy), }; /* DEVLINK_CMD_NOTIFY_FILTER_SET - do */ -static const struct nla_policy devlink_notify_filter_set_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { +static const struct nla_policy devlink_notify_filter_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, }; @@ -613,7 +688,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_get_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { @@ -629,14 +704,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_port_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_port_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_PORT_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_PORT_GET, .dumpit = devlink_nl_port_get_dumpit, .policy = devlink_port_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -646,7 +721,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_port_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_port_set_nl_policy, - .maxattr = DEVLINK_ATTR_PORT_FUNCTION, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -656,7 +731,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_port_new_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_port_new_nl_policy, - .maxattr = DEVLINK_ATTR_PORT_PCI_SF_NUMBER, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -666,7 +741,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_port_del_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_port_del_nl_policy, - .maxattr = DEVLINK_ATTR_PORT_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -676,7 +751,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_port_split_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_port_split_nl_policy, - .maxattr = DEVLINK_ATTR_PORT_SPLIT_COUNT, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -686,7 +761,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_port_unsplit_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_port_unsplit_nl_policy, - .maxattr = DEVLINK_ATTR_PORT_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -696,14 +771,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_sb_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_sb_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_SB_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_SB_GET, .dumpit = devlink_nl_sb_get_dumpit, .policy = devlink_sb_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -713,14 +788,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_sb_pool_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_sb_pool_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_SB_POOL_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_SB_POOL_GET, .dumpit = devlink_nl_sb_pool_get_dumpit, .policy = devlink_sb_pool_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -730,7 +805,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_sb_pool_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_sb_pool_set_nl_policy, - .maxattr = DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -740,14 +815,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_sb_port_pool_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_sb_port_pool_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_SB_POOL_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_SB_PORT_POOL_GET, .dumpit = devlink_nl_sb_port_pool_get_dumpit, .policy = devlink_sb_port_pool_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -757,7 +832,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_sb_port_pool_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_sb_port_pool_set_nl_policy, - .maxattr = DEVLINK_ATTR_SB_THRESHOLD, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -767,14 +842,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_sb_tc_pool_bind_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_sb_tc_pool_bind_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_SB_TC_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET, .dumpit = devlink_nl_sb_tc_pool_bind_get_dumpit, .policy = devlink_sb_tc_pool_bind_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -784,7 +859,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_sb_tc_pool_bind_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_sb_tc_pool_bind_set_nl_policy, - .maxattr = DEVLINK_ATTR_SB_TC_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -794,7 +869,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_sb_occ_snapshot_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_sb_occ_snapshot_nl_policy, - .maxattr = DEVLINK_ATTR_SB_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -804,7 +879,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_sb_occ_max_clear_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_sb_occ_max_clear_nl_policy, - .maxattr = DEVLINK_ATTR_SB_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -814,7 +889,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_eswitch_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_eswitch_get_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -824,7 +899,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_eswitch_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_eswitch_set_nl_policy, - .maxattr = DEVLINK_ATTR_ESWITCH_ENCAP_MODE, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -834,7 +909,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_dpipe_table_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_dpipe_table_get_nl_policy, - .maxattr = DEVLINK_ATTR_DPIPE_TABLE_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { @@ -844,7 +919,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_dpipe_entries_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_dpipe_entries_get_nl_policy, - .maxattr = DEVLINK_ATTR_DPIPE_TABLE_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { @@ -854,7 +929,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_dpipe_headers_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_dpipe_headers_get_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { @@ -864,7 +939,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_dpipe_table_counters_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_dpipe_table_counters_set_nl_policy, - .maxattr = DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -874,7 +949,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_resource_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_resource_set_nl_policy, - .maxattr = DEVLINK_ATTR_RESOURCE_SIZE, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -884,7 +959,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_resource_dump_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_resource_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { @@ -894,7 +969,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_reload_doit, .post_doit = devlink_nl_post_doit_dev_lock, .policy = devlink_reload_nl_policy, - .maxattr = DEVLINK_ATTR_RELOAD_LIMITS, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -904,14 +979,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_param_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_param_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_PARAM_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_PARAM_GET, .dumpit = devlink_nl_param_get_dumpit, .policy = devlink_param_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -921,7 +996,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_param_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_param_set_nl_policy, - .maxattr = DEVLINK_ATTR_PARAM_RESET_DEFAULT, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -931,14 +1006,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_region_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_region_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_REGION_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_REGION_GET, .dumpit = devlink_nl_region_get_dumpit, .policy = devlink_region_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -948,7 +1023,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_region_new_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_region_new_nl_policy, - .maxattr = DEVLINK_ATTR_REGION_SNAPSHOT_ID, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -958,7 +1033,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_region_del_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_region_del_nl_policy, - .maxattr = DEVLINK_ATTR_REGION_SNAPSHOT_ID, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -966,7 +1041,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .validate = GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = devlink_nl_region_read_dumpit, .policy = devlink_region_read_nl_policy, - .maxattr = DEVLINK_ATTR_REGION_DIRECT, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP, }, { @@ -976,7 +1051,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_port_param_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_port_param_get_nl_policy, - .maxattr = DEVLINK_ATTR_PORT_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { @@ -992,7 +1067,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_port_param_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_port_param_set_nl_policy, - .maxattr = DEVLINK_ATTR_PORT_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1002,7 +1077,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_info_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_info_get_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { @@ -1018,14 +1093,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_health_reporter_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_health_reporter_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET, .dumpit = devlink_nl_health_reporter_get_dumpit, .policy = devlink_health_reporter_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_PORT_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -1035,7 +1110,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_health_reporter_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_health_reporter_set_nl_policy, - .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1045,7 +1120,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_health_reporter_recover_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_health_reporter_recover_nl_policy, - .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1055,7 +1130,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_health_reporter_diagnose_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_health_reporter_diagnose_nl_policy, - .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1063,7 +1138,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .validate = GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = devlink_nl_health_reporter_dump_get_dumpit, .policy = devlink_health_reporter_dump_get_nl_policy, - .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP, }, { @@ -1073,7 +1148,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_health_reporter_dump_clear_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_health_reporter_dump_clear_nl_policy, - .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1083,7 +1158,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_flash_update_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_flash_update_nl_policy, - .maxattr = DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1093,14 +1168,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_trap_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_trap_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_TRAP_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_TRAP_GET, .dumpit = devlink_nl_trap_get_dumpit, .policy = devlink_trap_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -1110,7 +1185,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_trap_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_trap_set_nl_policy, - .maxattr = DEVLINK_ATTR_TRAP_ACTION, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1120,14 +1195,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_trap_group_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_trap_group_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_TRAP_GROUP_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_TRAP_GROUP_GET, .dumpit = devlink_nl_trap_group_get_dumpit, .policy = devlink_trap_group_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -1137,7 +1212,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_trap_group_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_trap_group_set_nl_policy, - .maxattr = DEVLINK_ATTR_TRAP_POLICER_ID, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1147,14 +1222,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_trap_policer_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_trap_policer_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_TRAP_POLICER_ID, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_TRAP_POLICER_GET, .dumpit = devlink_nl_trap_policer_get_dumpit, .policy = devlink_trap_policer_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -1164,7 +1239,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_trap_policer_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_trap_policer_set_nl_policy, - .maxattr = DEVLINK_ATTR_TRAP_POLICER_BURST, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1174,7 +1249,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_health_reporter_test_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_health_reporter_test_nl_policy, - .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1184,14 +1259,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_rate_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_rate_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_RATE_NODE_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_RATE_GET, .dumpit = devlink_nl_rate_get_dumpit, .policy = devlink_rate_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -1201,7 +1276,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_rate_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_rate_set_nl_policy, - .maxattr = DEVLINK_ATTR_RATE_TC_BWS, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1211,7 +1286,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_rate_new_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_rate_new_nl_policy, - .maxattr = DEVLINK_ATTR_RATE_TC_BWS, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1221,7 +1296,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_rate_del_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_rate_del_nl_policy, - .maxattr = DEVLINK_ATTR_RATE_NODE_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1231,14 +1306,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_linecard_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_linecard_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_LINECARD_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_LINECARD_GET, .dumpit = devlink_nl_linecard_get_dumpit, .policy = devlink_linecard_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -1248,7 +1323,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_linecard_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_linecard_set_nl_policy, - .maxattr = DEVLINK_ATTR_LINECARD_TYPE, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1258,7 +1333,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_selftests_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_selftests_get_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { @@ -1274,14 +1349,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_selftests_run_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_selftests_run_nl_policy, - .maxattr = DEVLINK_ATTR_SELFTESTS, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_NOTIFY_FILTER_SET, .doit = devlink_nl_notify_filter_set_doit, .policy = devlink_notify_filter_set_nl_policy, - .maxattr = DEVLINK_ATTR_PORT_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, }; From 725d5fdb7b9c01d9e7079682acf998703762475b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 12 Mar 2026 11:03:59 +0100 Subject: [PATCH 0526/1561] devlink: support index-based lookup via bus_name/dev_name handle Devlink instances without a backing device use bus_name "devlink_index" and dev_name set to the decimal index string. When user space sends this handle, detect the pattern and perform a direct xarray lookup by index instead of iterating all instances. Signed-off-by: Jiri Pirko Link: https://patch.msgid.link/20260312100407.551173-6-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- include/uapi/linux/devlink.h | 2 ++ net/devlink/netlink.c | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 1ba3436db4ae..7de2d8cc862f 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -19,6 +19,8 @@ #define DEVLINK_GENL_VERSION 0x1 #define DEVLINK_GENL_MCGRP_CONFIG_NAME "config" +#define DEVLINK_INDEX_BUS_NAME "devlink_index" + enum devlink_command { /* don't change the order or add anything between, this is ABI! */ DEVLINK_CMD_UNSPEC, diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c index 9cba40285de4..fa38fca22fe4 100644 --- a/net/devlink/netlink.c +++ b/net/devlink/netlink.c @@ -203,6 +203,15 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs, busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]); devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]); + if (!strcmp(busname, DEVLINK_INDEX_BUS_NAME)) { + if (kstrtoul(devname, 10, &index)) + return ERR_PTR(-ENODEV); + devlink = devlinks_xa_lookup_get(net, index); + if (!devlink) + return ERR_PTR(-ENODEV); + goto found; + } + devlinks_xa_for_each_registered_get(net, index, devlink) { if (strcmp(devlink_bus_name(devlink), busname) == 0 && strcmp(devlink_dev_name(devlink), devname) == 0) From 089aeb4f2218ee0d7b53930e9f04a061240ce0f0 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 12 Mar 2026 11:04:00 +0100 Subject: [PATCH 0527/1561] devlink: support index-based notification filtering Extend the notification filter descriptor with devlink_index so that userspace can filter notifications by devlink instance index in addition to bus_name/dev_name. Signed-off-by: Jiri Pirko Link: https://patch.msgid.link/20260312100407.551173-7-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- net/devlink/devl_internal.h | 4 ++++ net/devlink/netlink.c | 11 ++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index 395832ed4477..f0ebfb936770 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -205,6 +205,8 @@ struct devlink_obj_desc { const char *dev_name; unsigned int port_index; bool port_index_valid; + unsigned int devlink_index; + bool devlink_index_valid; long data[]; }; @@ -214,6 +216,8 @@ static inline void devlink_nl_obj_desc_init(struct devlink_obj_desc *desc, memset(desc, 0, sizeof(*desc)); desc->bus_name = devlink_bus_name(devlink); desc->dev_name = devlink_dev_name(devlink); + desc->devlink_index = devlink->index; + desc->devlink_index_valid = true; } static inline void devlink_nl_obj_desc_port_set(struct devlink_obj_desc *desc, diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c index fa38fca22fe4..32ddbe244cb7 100644 --- a/net/devlink/netlink.c +++ b/net/devlink/netlink.c @@ -73,13 +73,19 @@ int devlink_nl_notify_filter_set_doit(struct sk_buff *skb, flt->dev_name = pos; } + if (attrs[DEVLINK_ATTR_INDEX]) { + flt->devlink_index = nla_get_uint(attrs[DEVLINK_ATTR_INDEX]); + flt->devlink_index_valid = true; + } + if (attrs[DEVLINK_ATTR_PORT_INDEX]) { flt->port_index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]); flt->port_index_valid = true; } /* Don't attach empty filter. */ - if (!flt->bus_name && !flt->dev_name && !flt->port_index_valid) { + if (!flt->bus_name && !flt->dev_name && + !flt->devlink_index_valid && !flt->port_index_valid) { kfree(flt); flt = NULL; } @@ -100,6 +106,9 @@ int devlink_nl_notify_filter_set_doit(struct sk_buff *skb, static bool devlink_obj_desc_match(const struct devlink_obj_desc *desc, const struct devlink_obj_desc *flt) { + if (desc->devlink_index_valid && flt->devlink_index_valid && + desc->devlink_index != flt->devlink_index) + return false; if (desc->bus_name && flt->bus_name && strcmp(desc->bus_name, flt->bus_name)) return false; From eb32a6310a7bcc6b26087a1a93981d3593aa17ea Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 12 Mar 2026 11:04:01 +0100 Subject: [PATCH 0528/1561] devlink: introduce __devlink_alloc() with dev driver pointer Introduce __devlink_alloc() as an internal devlink allocator that accepts a struct device_driver pointer and stores it in the devlink instance. This allows internal devlink code (e.g. shared instances) to associate a driver with a devlink instance without need to pass dev pointer. Signed-off-by: Jiri Pirko Link: https://patch.msgid.link/20260312100407.551173-8-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- net/devlink/core.c | 40 ++++++++++++++++++++++--------------- net/devlink/devl_internal.h | 5 +++++ 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/net/devlink/core.c b/net/devlink/core.c index 237558abcd63..fcb73d3e56aa 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -418,27 +418,15 @@ void devlink_unregister(struct devlink *devlink) } EXPORT_SYMBOL_GPL(devlink_unregister); -/** - * devlink_alloc_ns - Allocate new devlink instance resources - * in specific namespace - * - * @ops: ops - * @priv_size: size of user private data - * @net: net namespace - * @dev: parent device - * - * Allocate new devlink instance resources, including devlink index - * and name. - */ -struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, - size_t priv_size, struct net *net, - struct device *dev) +struct devlink *__devlink_alloc(const struct devlink_ops *ops, size_t priv_size, + struct net *net, struct device *dev, + const struct device_driver *dev_driver) { struct devlink *devlink; static u32 last_id; int ret; - WARN_ON(!ops || !dev); + WARN_ON(!ops || !dev || !dev_driver); if (!devlink_reload_actions_valid(ops)) return NULL; @@ -453,6 +441,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, devlink->dev = get_device(dev); devlink->ops = ops; + devlink->dev_driver = dev_driver; xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC); xa_init_flags(&devlink->params, XA_FLAGS_ALLOC); xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC); @@ -480,6 +469,25 @@ err_xa_alloc: kvfree(devlink); return NULL; } + +/** + * devlink_alloc_ns - Allocate new devlink instance resources + * in specific namespace + * + * @ops: ops + * @priv_size: size of user private data + * @net: net namespace + * @dev: parent device + * + * Allocate new devlink instance resources, including devlink index + * and name. + */ +struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, + size_t priv_size, struct net *net, + struct device *dev) +{ + return __devlink_alloc(ops, priv_size, net, dev, dev->driver); +} EXPORT_SYMBOL_GPL(devlink_alloc_ns); /** diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index f0ebfb936770..3cc7e696e0fd 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -49,6 +49,7 @@ struct devlink { struct xarray snapshot_ids; struct devlink_dev_stats stats; struct device *dev; + const struct device_driver *dev_driver; possible_net_t _net; /* Serializes access to devlink instance specific objects such as * port, sb, dpipe, resource, params, region, traps and more. @@ -66,6 +67,10 @@ struct devlink { extern struct xarray devlinks; extern struct genl_family devlink_nl_family; +struct devlink *__devlink_alloc(const struct devlink_ops *ops, size_t priv_size, + struct net *net, struct device *dev, + const struct device_driver *dev_driver); + /* devlink instances are open to the access from the user space after * devlink_register() call. Such logical barrier allows us to have certain * expectations related to locking. From 20b0f383aae7d26990a769d52b4d5c0e570e659c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 12 Mar 2026 11:04:02 +0100 Subject: [PATCH 0529/1561] devlink: add devlink_dev_driver_name() helper and use it in trace events In preparation to dev-less devlinks, add devlink_dev_driver_name() that returns the driver name stored in devlink struct, and use it in all trace events. Signed-off-by: Jiri Pirko Link: https://patch.msgid.link/20260312100407.551173-9-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 1 + include/trace/events/devlink.h | 12 ++++++------ net/devlink/core.c | 6 ++++++ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/include/net/devlink.h b/include/net/devlink.h index 0afb0958b910..45dec7067a8e 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1613,6 +1613,7 @@ struct devlink *priv_to_devlink(void *priv); struct device *devlink_to_dev(const struct devlink *devlink); const char *devlink_bus_name(const struct devlink *devlink); const char *devlink_dev_name(const struct devlink *devlink); +const char *devlink_dev_driver_name(const struct devlink *devlink); /* Devlink instance explicit locking */ void devl_lock(struct devlink *devlink); diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h index 32304ce9ad15..4f8edf77dfbe 100644 --- a/include/trace/events/devlink.h +++ b/include/trace/events/devlink.h @@ -23,7 +23,7 @@ TRACE_EVENT(devlink_hwmsg, TP_STRUCT__entry( __string(bus_name, devlink_bus_name(devlink)) __string(dev_name, devlink_dev_name(devlink)) - __string(driver_name, devlink_to_dev(devlink)->driver->name) + __string(driver_name, devlink_dev_driver_name(devlink)) __field(bool, incoming) __field(unsigned long, type) __dynamic_array(u8, buf, len) @@ -57,7 +57,7 @@ TRACE_EVENT(devlink_hwerr, TP_STRUCT__entry( __string(bus_name, devlink_bus_name(devlink)) __string(dev_name, devlink_dev_name(devlink)) - __string(driver_name, devlink_to_dev(devlink)->driver->name) + __string(driver_name, devlink_dev_driver_name(devlink)) __field(int, err) __string(msg, msg) ), @@ -87,7 +87,7 @@ TRACE_EVENT(devlink_health_report, TP_STRUCT__entry( __string(bus_name, devlink_bus_name(devlink)) __string(dev_name, devlink_dev_name(devlink)) - __string(driver_name, devlink_to_dev(devlink)->driver->name) + __string(driver_name, devlink_dev_driver_name(devlink)) __string(reporter_name, reporter_name) __string(msg, msg) ), @@ -118,7 +118,7 @@ TRACE_EVENT(devlink_health_recover_aborted, TP_STRUCT__entry( __string(bus_name, devlink_bus_name(devlink)) __string(dev_name, devlink_dev_name(devlink)) - __string(driver_name, devlink_to_dev(devlink)->driver->name) + __string(driver_name, devlink_dev_driver_name(devlink)) __string(reporter_name, reporter_name) __field(bool, health_state) __field(u64, time_since_last_recover) @@ -152,7 +152,7 @@ TRACE_EVENT(devlink_health_reporter_state_update, TP_STRUCT__entry( __string(bus_name, devlink_bus_name(devlink)) __string(dev_name, devlink_dev_name(devlink)) - __string(driver_name, devlink_to_dev(devlink)->driver->name) + __string(driver_name, devlink_dev_driver_name(devlink)) __string(reporter_name, reporter_name) __field(u8, new_state) ), @@ -183,7 +183,7 @@ TRACE_EVENT(devlink_trap_report, TP_STRUCT__entry( __string(bus_name, devlink_bus_name(devlink)) __string(dev_name, devlink_dev_name(devlink)) - __string(driver_name, devlink_to_dev(devlink)->driver->name) + __string(driver_name, devlink_dev_driver_name(devlink)) __string(trap_name, metadata->trap_name) __string(trap_group_name, metadata->trap_group_name) __array(char, input_dev_name, IFNAMSIZ) diff --git a/net/devlink/core.c b/net/devlink/core.c index fcb73d3e56aa..34eb06d88544 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -260,6 +260,12 @@ const char *devlink_dev_name(const struct devlink *devlink) } EXPORT_SYMBOL_GPL(devlink_dev_name); +const char *devlink_dev_driver_name(const struct devlink *devlink) +{ + return devlink->dev_driver->name; +} +EXPORT_SYMBOL_GPL(devlink_dev_driver_name); + struct net *devlink_net(const struct devlink *devlink) { return read_pnet(&devlink->_net); From 104733e1303efcec97c9a581adabbc03c6679006 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 12 Mar 2026 11:04:03 +0100 Subject: [PATCH 0530/1561] devlink: add devl_warn() helper and use it in port warnings Introduce devl_warn() macro that uses dev_warn() when a backing device is available and falls back to pr_warn() otherwise. Convert all dev_warn() calls in port.c to use it, preparing for devlink instances without a backing device. Signed-off-by: Jiri Pirko Link: https://patch.msgid.link/20260312100407.551173-10-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- net/devlink/devl_internal.h | 9 +++++++++ net/devlink/port.c | 14 +++++++------- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index 3cc7e696e0fd..cb2ffef1ac2d 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -71,6 +71,15 @@ struct devlink *__devlink_alloc(const struct devlink_ops *ops, size_t priv_size, struct net *net, struct device *dev, const struct device_driver *dev_driver); +#define devl_warn(devlink, format, args...) \ + do { \ + if ((devlink)->dev) \ + dev_warn((devlink)->dev, format, ##args); \ + else \ + pr_warn("devlink (%s): " format, \ + devlink_dev_name(devlink), ##args); \ + } while (0) + /* devlink instances are open to the access from the user space after * devlink_register() call. Such logical barrier allows us to have certain * expectations related to locking. diff --git a/net/devlink/port.c b/net/devlink/port.c index fa3e1597711b..7fcd1d3ed44c 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -976,7 +976,7 @@ static void devlink_port_type_warn(struct work_struct *work) struct devlink_port *port = container_of(to_delayed_work(work), struct devlink_port, type_warn_dw); - dev_warn(port->devlink->dev, "Type was not set for devlink port."); + devl_warn(port->devlink, "Type was not set for devlink port."); } static bool devlink_port_type_should_warn(struct devlink_port *devlink_port) @@ -1242,9 +1242,9 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port, */ void devlink_port_type_eth_set(struct devlink_port *devlink_port) { - dev_warn(devlink_port->devlink->dev, - "devlink port type for port %d set to Ethernet without a software interface reference, device type not supported by the kernel?\n", - devlink_port->index); + devl_warn(devlink_port->devlink, + "devlink port type for port %d set to Ethernet without a software interface reference, device type not supported by the kernel?\n", + devlink_port->index); __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, NULL); } EXPORT_SYMBOL_GPL(devlink_port_type_eth_set); @@ -1273,9 +1273,9 @@ EXPORT_SYMBOL_GPL(devlink_port_type_ib_set); void devlink_port_type_clear(struct devlink_port *devlink_port) { if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) - dev_warn(devlink_port->devlink->dev, - "devlink port type for port %d cleared without a software interface reference, device type not supported by the kernel?\n", - devlink_port->index); + devl_warn(devlink_port->devlink, + "devlink port type for port %d cleared without a software interface reference, device type not supported by the kernel?\n", + devlink_port->index); __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, NULL); } EXPORT_SYMBOL_GPL(devlink_port_type_clear); From a4c6d53e5fd61829f707b7a723dd2937ed67c803 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 12 Mar 2026 11:04:04 +0100 Subject: [PATCH 0531/1561] devlink: allow devlink instance allocation without a backing device Allow devlink_alloc_ns() to be called with dev=NULL to support device-less devlink instances. When dev is NULL, the instance is identified over netlink using "devlink_index" as bus_name and the decimal index value as dev_name. Signed-off-by: Jiri Pirko Link: https://patch.msgid.link/20260312100407.551173-11-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- net/devlink/core.c | 23 ++++++++++++++++++----- net/devlink/dev.c | 8 ++++---- net/devlink/devl_internal.h | 5 +++-- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/net/devlink/core.c b/net/devlink/core.c index 34eb06d88544..eeb6a71f5f56 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -250,13 +250,13 @@ EXPORT_SYMBOL_GPL(devlink_to_dev); const char *devlink_bus_name(const struct devlink *devlink) { - return devlink->dev->bus->name; + return devlink->dev ? devlink->dev->bus->name : DEVLINK_INDEX_BUS_NAME; } EXPORT_SYMBOL_GPL(devlink_bus_name); const char *devlink_dev_name(const struct devlink *devlink) { - return dev_name(devlink->dev); + return devlink->dev ? dev_name(devlink->dev) : devlink->dev_name_index; } EXPORT_SYMBOL_GPL(devlink_dev_name); @@ -329,7 +329,10 @@ static void devlink_release(struct work_struct *work) mutex_destroy(&devlink->lock); lockdep_unregister_key(&devlink->lock_key); - put_device(devlink->dev); + if (devlink->dev) + put_device(devlink->dev); + else + kfree(devlink->dev_name_index); kvfree(devlink); } @@ -432,7 +435,7 @@ struct devlink *__devlink_alloc(const struct devlink_ops *ops, size_t priv_size, static u32 last_id; int ret; - WARN_ON(!ops || !dev || !dev_driver); + WARN_ON(!ops || !dev_driver); if (!devlink_reload_actions_valid(ops)) return NULL; @@ -445,7 +448,14 @@ struct devlink *__devlink_alloc(const struct devlink_ops *ops, size_t priv_size, if (ret < 0) goto err_xa_alloc; - devlink->dev = get_device(dev); + if (dev) { + devlink->dev = get_device(dev); + } else { + devlink->dev_name_index = kasprintf(GFP_KERNEL, "%u", devlink->index); + if (!devlink->dev_name_index) + goto err_kasprintf; + } + devlink->ops = ops; devlink->dev_driver = dev_driver; xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC); @@ -471,6 +481,8 @@ struct devlink *__devlink_alloc(const struct devlink_ops *ops, size_t priv_size, return devlink; +err_kasprintf: + xa_erase(&devlinks, devlink->index); err_xa_alloc: kvfree(devlink); return NULL; @@ -492,6 +504,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, size_t priv_size, struct net *net, struct device *dev) { + WARN_ON(!dev); return __devlink_alloc(ops, priv_size, net, dev, dev->driver); } EXPORT_SYMBOL_GPL(devlink_alloc_ns); diff --git a/net/devlink/dev.c b/net/devlink/dev.c index e3a36de4f4ae..57b2b8f03543 100644 --- a/net/devlink/dev.c +++ b/net/devlink/dev.c @@ -453,7 +453,8 @@ int devlink_reload(struct devlink *devlink, struct net *dest_net, * (e.g., PCI reset) and to close possible races between these * operations and probe/remove. */ - device_lock_assert(devlink->dev); + if (devlink->dev) + device_lock_assert(devlink->dev); memcpy(remote_reload_stats, devlink->stats.remote_reload_stats, sizeof(remote_reload_stats)); @@ -854,7 +855,7 @@ int devlink_info_version_running_put_ext(struct devlink_info_req *req, } EXPORT_SYMBOL_GPL(devlink_info_version_running_put_ext); -static int devlink_nl_driver_info_get(struct device_driver *drv, +static int devlink_nl_driver_info_get(const struct device_driver *drv, struct devlink_info_req *req) { if (!drv) @@ -872,7 +873,6 @@ devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, u32 portid, u32 seq, int flags, struct netlink_ext_ack *extack) { - struct device *dev = devlink_to_dev(devlink); struct devlink_info_req req = {}; void *hdr; int err; @@ -892,7 +892,7 @@ devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink, goto err_cancel_msg; } - err = devlink_nl_driver_info_get(dev->driver, &req); + err = devlink_nl_driver_info_get(devlink->dev_driver, &req); if (err) goto err_cancel_msg; diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index cb2ffef1ac2d..7dfb7cdd2d23 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -49,6 +49,7 @@ struct devlink { struct xarray snapshot_ids; struct devlink_dev_stats stats; struct device *dev; + const char *dev_name_index; const struct device_driver *dev_driver; possible_net_t _net; /* Serializes access to devlink instance specific objects such as @@ -119,7 +120,7 @@ static inline bool devl_is_registered(struct devlink *devlink) static inline void devl_dev_lock(struct devlink *devlink, bool dev_lock) { - if (dev_lock) + if (dev_lock && devlink->dev) device_lock(devlink->dev); devl_lock(devlink); } @@ -127,7 +128,7 @@ static inline void devl_dev_lock(struct devlink *devlink, bool dev_lock) static inline void devl_dev_unlock(struct devlink *devlink, bool dev_lock) { devl_unlock(devlink); - if (dev_lock) + if (dev_lock && devlink->dev) device_unlock(devlink->dev); } From 1850e76b38049548ecb03c62bb10d40b94eecaac Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 12 Mar 2026 11:04:05 +0100 Subject: [PATCH 0532/1561] devlink: introduce shared devlink instance for PFs on same chip Multiple PFs may reside on the same physical chip, running a single firmware. Some of the resources and configurations may be shared among these PFs. Currently, there is no good object to pin the configuration knobs on. Introduce a shared devlink instance, instantiated upon probe of the first PF and removed during remove of the last PF. The shared devlink instance is not backed by any device device, as there is no PCI device related to it. The implementation uses reference counting to manage the lifecycle: each PF that probes calls devlink_shd_get() to get or create the shared instance, and calls devlink_shd_put() when it removes. The shared instance is automatically destroyed when the last PF removes. Example: pci/0000:08:00.0: index 0 nested_devlink: auxiliary/mlx5_core.eth.0 devlink_index/1: index 1 nested_devlink: pci/0000:08:00.0 pci/0000:08:00.1 auxiliary/mlx5_core.eth.0: index 2 pci/0000:08:00.1: index 3 nested_devlink: auxiliary/mlx5_core.eth.1 auxiliary/mlx5_core.eth.1: index 4 Signed-off-by: Jiri Pirko Link: https://patch.msgid.link/20260312100407.551173-12-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 7 ++ net/devlink/Makefile | 2 +- net/devlink/sh_dev.c | 161 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 169 insertions(+), 1 deletion(-) create mode 100644 net/devlink/sh_dev.c diff --git a/include/net/devlink.h b/include/net/devlink.h index 45dec7067a8e..3038af6ec017 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1647,6 +1647,13 @@ void devlink_register(struct devlink *devlink); void devlink_unregister(struct devlink *devlink); void devlink_free(struct devlink *devlink); +struct devlink *devlink_shd_get(const char *id, + const struct devlink_ops *ops, + size_t priv_size, + const struct device_driver *driver); +void devlink_shd_put(struct devlink *devlink); +void *devlink_shd_get_priv(struct devlink *devlink); + /** * struct devlink_port_ops - Port operations * @port_split: Callback used to split the port into multiple ones. diff --git a/net/devlink/Makefile b/net/devlink/Makefile index 000da622116a..8f2adb5e5836 100644 --- a/net/devlink/Makefile +++ b/net/devlink/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 obj-y := core.o netlink.o netlink_gen.o dev.o port.o sb.o dpipe.o \ - resource.o param.o region.o health.o trap.o rate.o linecard.o + resource.o param.o region.o health.o trap.o rate.o linecard.o sh_dev.o diff --git a/net/devlink/sh_dev.c b/net/devlink/sh_dev.c new file mode 100644 index 000000000000..85acce97e788 --- /dev/null +++ b/net/devlink/sh_dev.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include + +#include "devl_internal.h" + +static LIST_HEAD(shd_list); +static DEFINE_MUTEX(shd_mutex); /* Protects shd_list and shd->list */ + +/* This structure represents a shared devlink instance, + * there is one created per identifier (e.g., serial number). + */ +struct devlink_shd { + struct list_head list; /* Node in shd list */ + const char *id; /* Identifier string (e.g., serial number) */ + refcount_t refcount; /* Reference count */ + size_t priv_size; /* Size of driver private data */ + char priv[] __aligned(NETDEV_ALIGN) __counted_by(priv_size); +}; + +static struct devlink_shd *devlink_shd_lookup(const char *id) +{ + struct devlink_shd *shd; + + list_for_each_entry(shd, &shd_list, list) { + if (!strcmp(shd->id, id)) + return shd; + } + + return NULL; +} + +static struct devlink_shd *devlink_shd_create(const char *id, + const struct devlink_ops *ops, + size_t priv_size, + const struct device_driver *driver) +{ + struct devlink_shd *shd; + struct devlink *devlink; + + devlink = __devlink_alloc(ops, sizeof(struct devlink_shd) + priv_size, + &init_net, NULL, driver); + if (!devlink) + return NULL; + shd = devlink_priv(devlink); + + shd->id = kstrdup(id, GFP_KERNEL); + if (!shd->id) + goto err_devlink_free; + shd->priv_size = priv_size; + refcount_set(&shd->refcount, 1); + + devl_lock(devlink); + devl_register(devlink); + devl_unlock(devlink); + + list_add_tail(&shd->list, &shd_list); + + return shd; + +err_devlink_free: + devlink_free(devlink); + return NULL; +} + +static void devlink_shd_destroy(struct devlink_shd *shd) +{ + struct devlink *devlink = priv_to_devlink(shd); + + list_del(&shd->list); + devl_lock(devlink); + devl_unregister(devlink); + devl_unlock(devlink); + kfree(shd->id); + devlink_free(devlink); +} + +/** + * devlink_shd_get - Get or create a shared devlink instance + * @id: Identifier string (e.g., serial number) for the shared instance + * @ops: Devlink operations structure + * @priv_size: Size of private data structure + * @driver: Driver associated with the shared devlink instance + * + * Get an existing shared devlink instance identified by @id, or create + * a new one if it doesn't exist. Return the devlink instance with a + * reference held. The caller must call devlink_shd_put() when done. + * + * All callers sharing the same @id must pass identical @ops, @priv_size + * and @driver. A mismatch triggers a warning and returns NULL. + * + * Return: Pointer to the shared devlink instance on success, + * NULL on failure + */ +struct devlink *devlink_shd_get(const char *id, + const struct devlink_ops *ops, + size_t priv_size, + const struct device_driver *driver) +{ + struct devlink *devlink; + struct devlink_shd *shd; + + mutex_lock(&shd_mutex); + + shd = devlink_shd_lookup(id); + if (!shd) { + shd = devlink_shd_create(id, ops, priv_size, driver); + goto unlock; + } + + devlink = priv_to_devlink(shd); + if (WARN_ON_ONCE(devlink->ops != ops || + shd->priv_size != priv_size || + devlink->dev_driver != driver)) { + shd = NULL; + goto unlock; + } + refcount_inc(&shd->refcount); + +unlock: + mutex_unlock(&shd_mutex); + return shd ? priv_to_devlink(shd) : NULL; +} +EXPORT_SYMBOL_GPL(devlink_shd_get); + +/** + * devlink_shd_put - Release a reference on a shared devlink instance + * @devlink: Shared devlink instance + * + * Release a reference on a shared devlink instance obtained via + * devlink_shd_get(). + */ +void devlink_shd_put(struct devlink *devlink) +{ + struct devlink_shd *shd; + + mutex_lock(&shd_mutex); + shd = devlink_priv(devlink); + if (refcount_dec_and_test(&shd->refcount)) + devlink_shd_destroy(shd); + mutex_unlock(&shd_mutex); +} +EXPORT_SYMBOL_GPL(devlink_shd_put); + +/** + * devlink_shd_get_priv - Get private data from shared devlink instance + * @devlink: Devlink instance + * + * Returns a pointer to the driver's private data structure within + * the shared devlink instance. + * + * Return: Pointer to private data + */ +void *devlink_shd_get_priv(struct devlink *devlink) +{ + struct devlink_shd *shd = devlink_priv(devlink); + + return shd->priv; +} +EXPORT_SYMBOL_GPL(devlink_shd_get_priv); From 63fff8c0f7025a838f1afab5fc8e6ad989e4823e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 12 Mar 2026 11:04:06 +0100 Subject: [PATCH 0533/1561] documentation: networking: add shared devlink documentation Document shared devlink instances for multiple PFs on the same chip. Signed-off-by: Jiri Pirko Link: https://patch.msgid.link/20260312100407.551173-13-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- .../networking/devlink/devlink-shared.rst | 97 +++++++++++++++++++ Documentation/networking/devlink/index.rst | 1 + 2 files changed, 98 insertions(+) create mode 100644 Documentation/networking/devlink/devlink-shared.rst diff --git a/Documentation/networking/devlink/devlink-shared.rst b/Documentation/networking/devlink/devlink-shared.rst new file mode 100644 index 000000000000..16bf6a7d25d9 --- /dev/null +++ b/Documentation/networking/devlink/devlink-shared.rst @@ -0,0 +1,97 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================== +Devlink Shared Instances +======================== + +Overview +======== + +Shared devlink instances allow multiple physical functions (PFs) on the same +chip to share a devlink instance for chip-wide operations. + +Multiple PFs may reside on the same physical chip, running a single firmware. +Some of the resources and configurations may be shared among these PFs. The +shared devlink instance provides an object to pin configuration knobs on. + +There are two possible usage models: + +1. The shared devlink instance is used alongside individual PF devlink + instances, providing chip-wide configuration in addition to per-PF + configuration. +2. The shared devlink instance is the only devlink instance, without + per-PF instances. + +It is up to the driver to decide which usage model to use. + +The shared devlink instance is not backed by any struct *device*. + +Implementation +============== + +Architecture +------------ + +The implementation uses: + +* **Chip identification**: PFs are grouped by chip using a driver-specific identifier +* **Shared instance management**: Global list of shared instances with reference counting + +API Functions +------------- + +The following functions are provided for managing shared devlink instances: + +* ``devlink_shd_get()``: Get or create a shared devlink instance identified by a string ID +* ``devlink_shd_put()``: Release a reference on a shared devlink instance +* ``devlink_shd_get_priv()``: Get private data from shared devlink instance + +Initialization Flow +------------------- + +1. **PF calls shared devlink init** during driver probe +2. **Chip identification** using driver-specific method to determine device identity +3. **Get or create shared instance** using ``devlink_shd_get()``: + + * The function looks up existing instance by identifier + * If none exists, creates new instance: + - Allocates and registers devlink instance + - Adds to global shared instances list + - Increments reference count + +4. **Set nested devlink instance** for the PF devlink instance using + ``devl_nested_devlink_set()`` before registering the PF devlink instance + +Cleanup Flow +------------ + +1. **Cleanup** when PF is removed +2. **Call** ``devlink_shd_put()`` to release reference (decrements reference count) +3. **Shared instance is automatically destroyed** when the last PF removes (reference count reaches zero) + +Chip Identification +------------------- + +PFs belonging to the same chip are identified using a driver-specific method. +The driver is free to choose any identifier that is suitable for determining +whether two PFs are part of the same device. Examples include: + +* **PCI VPD serial numbers**: Extract from PCI VPD +* **Device tree properties**: Read chip identifier from device tree +* **Other hardware-specific identifiers**: Any unique identifier that groups PFs by chip + +Locking +------- + +A global mutex (``shd_mutex``) protects the shared instances list during registration/deregistration. + +Similarly to other nested devlink instance relationships, devlink lock of +the shared instance should be always taken after the devlink lock of PF. + +Reference Counting +------------------ + +Each shared devlink instance maintains a reference count (``refcount_t refcount``). +The reference count is incremented when ``devlink_shd_get()`` is called and decremented +when ``devlink_shd_put()`` is called. When the reference count reaches zero, the shared +instance is automatically destroyed. diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst index 35b12a2bfeba..f7ba7dcf477d 100644 --- a/Documentation/networking/devlink/index.rst +++ b/Documentation/networking/devlink/index.rst @@ -68,6 +68,7 @@ general. devlink-resource devlink-selftests devlink-trap + devlink-shared Driver-specific documentation ----------------------------- From 2a8c8a03f306e21a0ea74c93d4332119557f4575 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 12 Mar 2026 11:04:07 +0100 Subject: [PATCH 0534/1561] net/mlx5: Add a shared devlink instance for PFs on same chip Use the previously introduced shared devlink infrastructure to create a shared devlink instance for mlx5 PFs that reside on the same physical chip. The shared instance is identified by the chip's serial number extracted from PCI VPD (V3 keyword, with fallback to serial number for older devices). Each PF that probes calls mlx5_shd_init() which extracts the chip serial number and uses devlink_shd_get() to get or create the shared instance. When a PF is removed, mlx5_shd_uninit() calls devlink_shd_put() to release the reference. The shared instance is automatically destroyed when the last PF is removed. Make the PF devlink instances nested in this shared devlink instance, allowing userspace to identify which PFs belong to the same physical chip. Example: pci/0000:08:00.0: index 0 nested_devlink: auxiliary/mlx5_core.eth.0 devlink_index/1: index 1 nested_devlink: pci/0000:08:00.0 pci/0000:08:00.1 auxiliary/mlx5_core.eth.0: index 2 pci/0000:08:00.1: index 3 nested_devlink: auxiliary/mlx5_core.eth.1 auxiliary/mlx5_core.eth.1: index 4 Signed-off-by: Jiri Pirko Link: https://patch.msgid.link/20260312100407.551173-14-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/Makefile | 5 +- .../net/ethernet/mellanox/mlx5/core/main.c | 17 ++++++ .../ethernet/mellanox/mlx5/core/sh_devlink.c | 61 +++++++++++++++++++ .../ethernet/mellanox/mlx5/core/sh_devlink.h | 12 ++++ include/linux/mlx5/driver.h | 1 + 5 files changed, 94 insertions(+), 2 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 8ffa286a18f5..d39fe9c4a87c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -16,8 +16,9 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ - diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \ - fw_reset.o qos.o lib/tout.o lib/aso.o wc.o fs_pool.o lib/nv_param.o + diag/fw_tracer.o diag/crdump.o devlink.o sh_devlink.o diag/rsc_dump.o \ + diag/reporter_vnic.o fw_reset.o qos.o lib/tout.o lib/aso.o wc.o fs_pool.o \ + lib/nv_param.o # # Netdev basic diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index fdc3ba20912e..1c35c3fc3bb3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -74,6 +74,7 @@ #include "mlx5_irq.h" #include "hwmon.h" #include "lag/lag.h" +#include "sh_devlink.h" MODULE_AUTHOR("Eli Cohen "); MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); @@ -1520,10 +1521,16 @@ int mlx5_init_one(struct mlx5_core_dev *dev) int err; devl_lock(devlink); + if (dev->shd) { + err = devl_nested_devlink_set(dev->shd, devlink); + if (err) + goto unlock; + } devl_register(devlink); err = mlx5_init_one_devl_locked(dev); if (err) devl_unregister(devlink); +unlock: devl_unlock(devlink); return err; } @@ -2005,6 +2012,13 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) goto pci_init_err; } + err = mlx5_shd_init(dev); + if (err) { + mlx5_core_err(dev, "mlx5_shd_init failed with error code %d\n", + err); + goto shd_init_err; + } + err = mlx5_init_one(dev); if (err) { mlx5_core_err(dev, "mlx5_init_one failed with error code %d\n", @@ -2018,6 +2032,8 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_init_one: + mlx5_shd_uninit(dev); +shd_init_err: mlx5_pci_close(dev); pci_init_err: mlx5_mdev_uninit(dev); @@ -2039,6 +2055,7 @@ static void remove_one(struct pci_dev *pdev) mlx5_drain_health_wq(dev); mlx5_sriov_disable(pdev, false); mlx5_uninit_one(dev); + mlx5_shd_uninit(dev); mlx5_pci_close(dev); mlx5_mdev_uninit(dev); mlx5_adev_idx_free(dev->priv.adev_idx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c new file mode 100644 index 000000000000..bc33f95302df --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include +#include + +#include "sh_devlink.h" + +static const struct devlink_ops mlx5_shd_ops = { +}; + +int mlx5_shd_init(struct mlx5_core_dev *dev) +{ + u8 *vpd_data __free(kfree) = NULL; + struct pci_dev *pdev = dev->pdev; + unsigned int vpd_size, kw_len; + struct devlink *devlink; + char *sn, *end; + int start; + int err; + + if (!mlx5_core_is_pf(dev)) + return 0; + + vpd_data = pci_vpd_alloc(pdev, &vpd_size); + if (IS_ERR(vpd_data)) { + err = PTR_ERR(vpd_data); + return err == -ENODEV ? 0 : err; + } + start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size, "V3", &kw_len); + if (start < 0) { + /* Fall-back to SN for older devices. */ + start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size, + PCI_VPD_RO_KEYWORD_SERIALNO, &kw_len); + if (start < 0) + return -ENOENT; + } + sn = kstrndup(vpd_data + start, kw_len, GFP_KERNEL); + if (!sn) + return -ENOMEM; + /* Firmware may return spaces at the end of the string, strip it. */ + end = strchrnul(sn, ' '); + *end = '\0'; + + /* Get or create shared devlink instance */ + devlink = devlink_shd_get(sn, &mlx5_shd_ops, 0, pdev->dev.driver); + kfree(sn); + if (!devlink) + return -ENOMEM; + + dev->shd = devlink; + return 0; +} + +void mlx5_shd_uninit(struct mlx5_core_dev *dev) +{ + if (!dev->shd) + return; + + devlink_shd_put(dev->shd); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h new file mode 100644 index 000000000000..8ab8d6940227 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_SH_DEVLINK_H__ +#define __MLX5_SH_DEVLINK_H__ + +#include + +int mlx5_shd_init(struct mlx5_core_dev *dev); +void mlx5_shd_uninit(struct mlx5_core_dev *dev); + +#endif /* __MLX5_SH_DEVLINK_H__ */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 04dcd09f7517..1268fcf35ec7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -798,6 +798,7 @@ struct mlx5_core_dev { enum mlx5_wc_state wc_state; /* sync write combining state */ struct mutex wc_state_lock; + struct devlink *shd; }; struct mlx5_db { From 4686679a14d269d4f02533228e82de56f432bae5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 Mar 2026 11:54:29 +0000 Subject: [PATCH 0535/1561] selftests/net: packetdrill: add tcp_disorder_fin_in_FIN_WAIT.pkt Commit 795a7dfbc3d9 ("net: tcp: accept old ack during closing") was fixing an old bug, add a test to make sure we won't break this case in future kernels. Signed-off-by: Eric Dumazet Cc: Menglong Dong Link: https://patch.msgid.link/20260313115429.3365751-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- .../tcp_disorder_fin_in_FIN_WAIT.pkt | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_disorder_fin_in_FIN_WAIT.pkt diff --git a/tools/testing/selftests/net/packetdrill/tcp_disorder_fin_in_FIN_WAIT.pkt b/tools/testing/selftests/net/packetdrill/tcp_disorder_fin_in_FIN_WAIT.pkt new file mode 100644 index 000000000000..336cbf7815c8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_disorder_fin_in_FIN_WAIT.pkt @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Check fix in 795a7dfbc3d9 ("net: tcp: accept old ack during closing") + +// Set up config. +`./defaults.sh` + +// Initialize a server socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 + * > S. 0:0(0) ack 1 + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 shutdown(4, SHUT_WR) = 0 + * > F. 1:1(0) ack 1 + +// We expect to receive one ACK. +// But what happens if a FIN was already in transmt and received out-of-order ? + + +0 < . 2:2(0) ack 2 win 257 + +// This FIN packet was sent before the prior ACK (see ack 1). + +0 < F. 1:1(0) ack 1 win 257 + +// Even if the FIN is received out-of-order, we should ACK it. + + * > . 2:2(0) ack 2 From 12589892f41c4c645c80ef9f036f7451a6045624 Mon Sep 17 00:00:00 2001 From: Cai Xinchen Date: Thu, 12 Mar 2026 06:59:06 +0000 Subject: [PATCH 0536/1561] dpaa2: add independent dependencies for FSL_DPAA2_SWITCH Since the commit 84cba72956fd ("dpaa2-switch: integrate the MAC endpoint support") included dpaa2-mac.o in the driver, but it didn't select PCS_LYNX, PHYLINK and FSL_XGMAC_MDIO. it will lead to link error, such as undefined reference to `phylink_ethtool_ksettings_set' undefined reference to `lynx_pcs_create_fwnode' And the same reason as the commit d2624e70a2f53 ("dpaa2-eth: select XGMAC_MDIO for MDIO bus support"), enable the FSL_XGMAC_MDIO Kconfig option in order to have MDIO access to internal and external PHYs. Because dpaa2-switch uses fsl_mc_driver APIs, add depends on FSL_MC_BUS && FSL_MC_DPIO as FSL_DPAA2_SWITCH do. FSL_XGMAC_MDIO and FSL_MC_BUS depend on OF, thus the dependence of FSL_MC_BUS can satisfy FSL_XGMAC_MDIO's OF requirement. Fixes: 84cba72956fd ("dpaa2-switch: integrate the MAC endpoint support") Suggested-by: Ioana Ciornei Signed-off-by: Cai Xinchen Link: https://patch.msgid.link/20260312065907.476663-2-caixinchen1@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/freescale/dpaa2/Kconfig b/drivers/net/ethernet/freescale/dpaa2/Kconfig index d029b69c3f18..36280e5d99e1 100644 --- a/drivers/net/ethernet/freescale/dpaa2/Kconfig +++ b/drivers/net/ethernet/freescale/dpaa2/Kconfig @@ -34,6 +34,10 @@ config FSL_DPAA2_SWITCH tristate "Freescale DPAA2 Ethernet Switch" depends on BRIDGE || BRIDGE=n depends on NET_SWITCHDEV + depends on FSL_MC_BUS && FSL_MC_DPIO + select PHYLINK + select PCS_LYNX + select FSL_XGMAC_MDIO help Driver for Freescale DPAA2 Ethernet Switch. This driver manages switch objects discovered on the Freeescale MC bus. From 97daf00745f7f9f261b0e91418de6e79d7826c36 Mon Sep 17 00:00:00 2001 From: Cai Xinchen Date: Thu, 12 Mar 2026 06:59:07 +0000 Subject: [PATCH 0537/1561] dpaa2: compile dpaa2 even CONFIG_FSL_DPAA2_ETH=n CONFIG_FSL_DPAA2_ETH and CONFIG_FSL_DPAA2_SWITCH are not associated, but the compilation of FSL_DPAA2_SWITCH depends on the compilation of the dpaa2 folder. The files controlled by CONFIG_FSL_DPAA2_SWITCH in the dpaa2 folder are not controlled by CONFIG_FSL_DPAA2_ETH, except for the files controlled by CONFIG_FSL_DPAA2_SWITCH. Therefore, removing the restriction will not affect the compilation of the files in the directory. Fixes: f48298d3fbfaa ("staging: dpaa2-switch: move the driver out of staging") Suggested-by: Ioana Ciornei Signed-off-by: Cai Xinchen Link: https://patch.msgid.link/20260312065907.476663-3-caixinchen1@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/Makefile b/drivers/net/ethernet/freescale/Makefile index de7b31842233..d0a259e47960 100644 --- a/drivers/net/ethernet/freescale/Makefile +++ b/drivers/net/ethernet/freescale/Makefile @@ -22,6 +22,5 @@ ucc_geth_driver-objs := ucc_geth.o ucc_geth_ethtool.o obj-$(CONFIG_FSL_FMAN) += fman/ obj-$(CONFIG_FSL_DPAA_ETH) += dpaa/ -obj-$(CONFIG_FSL_DPAA2_ETH) += dpaa2/ - +obj-y += dpaa2/ obj-y += enetc/ From 2a585b2efb48a86cd32a953ba84cf1557a655b40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2E=20Neusch=C3=A4fer?= Date: Tue, 3 Mar 2026 18:49:25 +0100 Subject: [PATCH 0538/1561] wifi: rtl8xxxu: Mark RTL8188ETV (0bda:0179) as tested MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This WiFi chip appears in the "TCU Fernsehfee 3.0" set-top box. I have tested that it works in WPA2-PSK-CCMP client mode. Cc: Jes.Sorensen@gmail.com Signed-off-by: J. Neuschäfer Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260303-rtl8xxx-tested-v1-1-291a38a6ff2f@posteo.net --- drivers/net/wireless/realtek/rtl8xxxu/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/core.c b/drivers/net/wireless/realtek/rtl8xxxu/core.c index 794187d28caa..b4efc6f00a37 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/core.c @@ -7703,6 +7703,7 @@ static int rtl8xxxu_probe(struct usb_interface *interface, switch (id->idVendor) { case USB_VENDOR_ID_REALTEK: switch(id->idProduct) { + case 0x0179: case 0x1724: case 0x8176: case 0x8178: From f8a2fc809bfeb49130709b31a4d357a049f28547 Mon Sep 17 00:00:00 2001 From: Yi Cong Date: Fri, 6 Mar 2026 15:16:27 +0800 Subject: [PATCH 0539/1561] wifi: rtl8xxxu: fix potential use of uninitialized value The local variables 'mcs' and 'nss' in rtl8xxxu_update_ra_report() are passed to rtl8xxxu_desc_to_mcsrate() as output parameters. If the helper function encounters an unhandled rate index, it may return without setting these values, leading to the use of uninitialized stack data. Remove the helper rtl8xxxu_desc_to_mcsrate() and inline the logic into rtl8xxxu_update_ra_report(). This fixes the use of uninitialized 'mcs' and 'nss' variables for legacy rates. The new implementation explicitly handles: - Legacy rates: Set bitrate only. - HT rates (MCS0-15): Set MCS flags, index, and NSS (1 or 2) directly. - Invalid rates: Return early. Fixes: 7de16123d9e2 ("wifi: rtl8xxxu: Introduce rtl8xxxu_update_ra_report") Cc: stable@vger.kernel.org Suggested-by: Ping-Ke Shih Signed-off-by: Yi Cong Link: https://lore.kernel.org/all/96e31963da0c42dcb52ce44f818963d7@realtek.com/ Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260306071627.56501-1-cong.yi@linux.dev --- drivers/net/wireless/realtek/rtl8xxxu/core.c | 28 ++++++-------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/core.c b/drivers/net/wireless/realtek/rtl8xxxu/core.c index b4efc6f00a37..d1b1474cba67 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/core.c @@ -4697,20 +4697,6 @@ static const struct ieee80211_rate rtl8xxxu_legacy_ratetable[] = { {.bitrate = 540, .hw_value = 0x0b,}, }; -static void rtl8xxxu_desc_to_mcsrate(u16 rate, u8 *mcs, u8 *nss) -{ - if (rate <= DESC_RATE_54M) - return; - - if (rate >= DESC_RATE_MCS0 && rate <= DESC_RATE_MCS15) { - if (rate < DESC_RATE_MCS8) - *nss = 1; - else - *nss = 2; - *mcs = rate - DESC_RATE_MCS0; - } -} - static void rtl8xxxu_set_basic_rates(struct rtl8xxxu_priv *priv, u32 rate_cfg) { struct ieee80211_hw *hw = priv->hw; @@ -4820,23 +4806,25 @@ static void rtl8xxxu_set_aifs(struct rtl8xxxu_priv *priv, u8 slot_time) void rtl8xxxu_update_ra_report(struct rtl8xxxu_ra_report *rarpt, u8 rate, u8 sgi, u8 bw) { - u8 mcs, nss; - rarpt->txrate.flags = 0; if (rate <= DESC_RATE_54M) { rarpt->txrate.legacy = rtl8xxxu_legacy_ratetable[rate].bitrate; - } else { - rtl8xxxu_desc_to_mcsrate(rate, &mcs, &nss); + } else if (rate >= DESC_RATE_MCS0 && rate <= DESC_RATE_MCS15) { rarpt->txrate.flags |= RATE_INFO_FLAGS_MCS; + if (rate < DESC_RATE_MCS8) + rarpt->txrate.nss = 1; + else + rarpt->txrate.nss = 2; - rarpt->txrate.mcs = mcs; - rarpt->txrate.nss = nss; + rarpt->txrate.mcs = rate - DESC_RATE_MCS0; if (sgi) rarpt->txrate.flags |= RATE_INFO_FLAGS_SHORT_GI; rarpt->txrate.bw = bw; + } else { + return; } rarpt->bit_rate = cfg80211_calculate_bitrate(&rarpt->txrate); From 68e7d359a5eeea75197626c800d082955032e30a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Mar 2026 09:51:42 +0100 Subject: [PATCH 0540/1561] wifi: rtl818x: drop redundant device reference Driver core holds a reference to the USB interface and its parent USB device while the interface is bound to a driver and there is no need to take additional references unless the structures are needed after disconnect. Drop the redundant device reference to reduce cargo culting, make it easier to spot drivers where an extra reference is needed, and reduce the risk of memory leaks when drivers fail to release it. Signed-off-by: Johan Hovold Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260306085144.12064-17-johan@kernel.org --- drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c index f7e0f6573180..1d21c468a236 100644 --- a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c +++ b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c @@ -1475,8 +1475,6 @@ static int rtl8187_probe(struct usb_interface *intf, usb_set_intfdata(intf, dev); priv->udev = udev; - usb_get_dev(udev); - skb_queue_head_init(&priv->rx_queue); BUILD_BUG_ON(sizeof(priv->channels) != sizeof(rtl818x_channels)); @@ -1663,7 +1661,6 @@ static int rtl8187_probe(struct usb_interface *intf, err_free_dmabuf: kfree(priv->io_dmabuf); usb_set_intfdata(intf, NULL); - usb_put_dev(udev); err_free_dev: ieee80211_free_hw(dev); return err; @@ -1685,7 +1682,6 @@ static void rtl8187_disconnect(struct usb_interface *intf) priv = dev->priv; usb_reset_device(priv->udev); - usb_put_dev(interface_to_usbdev(intf)); kfree(priv->io_dmabuf); ieee80211_free_hw(dev); } From 711b8add39294393a086b5c0e51f0c8d79a89f4d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Mar 2026 09:51:43 +0100 Subject: [PATCH 0541/1561] wifi: rtl8xxxu: drop redundant device reference Driver core holds a reference to the USB interface and its parent USB device while the interface is bound to a driver and there is no need to take additional references unless the structures are needed after disconnect. Drop the redundant device reference to reduce cargo culting, make it easier to spot drivers where an extra reference is needed, and reduce the risk of memory leaks when drivers fail to release it. Signed-off-by: Johan Hovold Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260306085144.12064-18-johan@kernel.org --- drivers/net/wireless/realtek/rtl8xxxu/core.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/core.c b/drivers/net/wireless/realtek/rtl8xxxu/core.c index d1b1474cba67..cb6c8859a918 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/core.c @@ -7686,7 +7686,7 @@ static int rtl8xxxu_probe(struct usb_interface *interface, int ret; int untested = 1; - udev = usb_get_dev(interface_to_usbdev(interface)); + udev = interface_to_usbdev(interface); switch (id->idVendor) { case USB_VENDOR_ID_REALTEK: @@ -7745,10 +7745,8 @@ static int rtl8xxxu_probe(struct usb_interface *interface, } hw = ieee80211_alloc_hw(sizeof(struct rtl8xxxu_priv), &rtl8xxxu_ops); - if (!hw) { - ret = -ENOMEM; - goto err_put_dev; - } + if (!hw) + return -ENOMEM; priv = hw->priv; priv->hw = hw; @@ -7890,8 +7888,6 @@ err_set_intfdata: mutex_destroy(&priv->h2c_mutex); ieee80211_free_hw(hw); -err_put_dev: - usb_put_dev(udev); return ret; } @@ -7924,7 +7920,6 @@ static void rtl8xxxu_disconnect(struct usb_interface *interface) "Device still attached, trying to reset\n"); usb_reset_device(priv->udev); } - usb_put_dev(priv->udev); ieee80211_free_hw(hw); } From bbb15e71156cd9f5e1869eee7207a06ea8e96c39 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Mar 2026 09:51:44 +0100 Subject: [PATCH 0542/1561] wifi: rtw88: fix device leak on probe failure Driver core holds a reference to the USB interface and its parent USB device while the interface is bound to a driver and there is no need to take additional references unless the structures are needed after disconnect. This driver takes a reference to the USB device during probe but does not to release it on all probe errors (e.g. when descriptor parsing fails). Drop the redundant device reference to fix the leak, reduce cargo culting, make it easier to spot drivers where an extra reference is needed, and reduce the risk of further memory leaks. Fixes: a82dfd33d123 ("wifi: rtw88: Add common USB chip support") Reported-by: Greg Kroah-Hartman Link: https://lore.kernel.org/netdev/2026022319-turbofan-darkened-206d@gregkh/ Cc: stable@vger.kernel.org # 6.2 Cc: Sascha Hauer Signed-off-by: Johan Hovold Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260306085144.12064-19-johan@kernel.org --- drivers/net/wireless/realtek/rtw88/usb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/usb.c b/drivers/net/wireless/realtek/rtw88/usb.c index 433b06c8d8a6..718940ebba31 100644 --- a/drivers/net/wireless/realtek/rtw88/usb.c +++ b/drivers/net/wireless/realtek/rtw88/usb.c @@ -1041,7 +1041,7 @@ static int rtw_usb_intf_init(struct rtw_dev *rtwdev, struct usb_interface *intf) { struct rtw_usb *rtwusb = rtw_get_usb_priv(rtwdev); - struct usb_device *udev = usb_get_dev(interface_to_usbdev(intf)); + struct usb_device *udev = interface_to_usbdev(intf); int ret; rtwusb->udev = udev; @@ -1067,7 +1067,6 @@ static void rtw_usb_intf_deinit(struct rtw_dev *rtwdev, { struct rtw_usb *rtwusb = rtw_get_usb_priv(rtwdev); - usb_put_dev(rtwusb->udev); kfree(rtwusb->usb_data); usb_set_intfdata(intf, NULL); } From c2a21f35cea43c3287a4f314b7aab22ea7234122 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Mar 2026 10:32:06 +0100 Subject: [PATCH 0543/1561] wifi: rtw89: drop redundant device reference Driver core holds a reference to the USB interface and its parent USB device while the interface is bound to a driver and there is no need to take additional references unless the structures are needed after disconnect. Drop the redundant device reference to reduce cargo culting, make it easier to spot drivers where an extra reference is needed, and reduce the risk of memory leaks when drivers fail to release it. Signed-off-by: Johan Hovold Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260306093206.21081-1-johan@kernel.org --- drivers/net/wireless/realtek/rtw89/usb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/usb.c b/drivers/net/wireless/realtek/rtw89/usb.c index da1b7ce8089e..64f0c0ec5ed4 100644 --- a/drivers/net/wireless/realtek/rtw89/usb.c +++ b/drivers/net/wireless/realtek/rtw89/usb.c @@ -935,7 +935,7 @@ static int rtw89_usb_intf_init(struct rtw89_dev *rtwdev, if (!rtwusb->vendor_req_buf) return -ENOMEM; - rtwusb->udev = usb_get_dev(interface_to_usbdev(intf)); + rtwusb->udev = interface_to_usbdev(intf); usb_set_intfdata(intf, rtwdev->hw); @@ -949,7 +949,6 @@ static void rtw89_usb_intf_deinit(struct rtw89_dev *rtwdev, { struct rtw89_usb *rtwusb = rtw89_usb_priv(rtwdev); - usb_put_dev(rtwusb->udev); kfree(rtwusb->vendor_req_buf); usb_set_intfdata(intf, NULL); } From fc25b384fc0bb983fe383e8392b6c2f4aafec77d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 9 Mar 2026 09:33:36 +0100 Subject: [PATCH 0544/1561] wifi: rtlwifi: usb: drop redundant device reference Driver core holds a reference to the USB interface and its parent USB device while the interface is bound to a driver and there is no need to take additional references unless the structures are needed after disconnect. Drop the redundant device reference to reduce cargo culting, make it easier to spot drivers where an extra reference is needed, and reduce the risk of memory leaks when drivers fail to release it. Signed-off-by: Johan Hovold Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260309083336.16397-1-johan@kernel.org --- drivers/net/wireless/realtek/rtlwifi/usb.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c index d35ed56d6db9..9a64df9eed39 100644 --- a/drivers/net/wireless/realtek/rtlwifi/usb.c +++ b/drivers/net/wireless/realtek/rtlwifi/usb.c @@ -986,7 +986,6 @@ int rtl_usb_probe(struct usb_interface *intf, init_completion(&rtlpriv->firmware_loading_complete); SET_IEEE80211_DEV(hw, &intf->dev); udev = interface_to_usbdev(intf); - usb_get_dev(udev); usb_priv = rtl_usbpriv(hw); memset(usb_priv, 0, sizeof(*usb_priv)); usb_priv->dev.intf = intf; @@ -1038,7 +1037,6 @@ error_out: rtl_deinit_core(hw); error_out2: _rtl_usb_io_handler_release(hw); - usb_put_dev(udev); kfree(rtlpriv->usb_data); ieee80211_free_hw(hw); return -ENODEV; @@ -1050,7 +1048,6 @@ void rtl_usb_disconnect(struct usb_interface *intf) struct ieee80211_hw *hw = usb_get_intfdata(intf); struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_mac *rtlmac = rtl_mac(rtl_priv(hw)); - struct rtl_usb *rtlusb = rtl_usbdev(rtl_usbpriv(hw)); if (unlikely(!rtlpriv)) return; @@ -1072,7 +1069,6 @@ void rtl_usb_disconnect(struct usb_interface *intf) kfree(rtlpriv->usb_data); rtlpriv->cfg->ops->deinit_sw_vars(hw); _rtl_usb_io_handler_release(hw); - usb_put_dev(rtlusb->udev); usb_set_intfdata(intf, NULL); ieee80211_free_hw(hw); } From e1b6b5af5d1725c9474baf3846f717a738f7e000 Mon Sep 17 00:00:00 2001 From: Jaime Saguillo Revilla Date: Sun, 8 Mar 2026 11:18:50 +0000 Subject: [PATCH 0545/1561] wifi: rtlwifi: rtl8192d: fix typo in H2C wait counter names Rename local variables in rtl92d_fill_h2c_cmd() from wait_writeh2c_limmit/wait_h2c_limmit to wait_writeh2c_limit/wait_h2c_limit. No functional change. Signed-off-by: Jaime Saguillo Revilla Acked-by: Ping-Ke Shih Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260308111850.20420-1-jaime.saguillo@gmail.com --- .../wireless/realtek/rtlwifi/rtl8192d/fw_common.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192d/fw_common.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192d/fw_common.c index aa54dbde6ea8..a255a5061d77 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192d/fw_common.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192d/fw_common.c @@ -212,9 +212,9 @@ void rtl92d_fill_h2c_cmd(struct ieee80211_hw *hw, struct rtl_priv *rtlpriv = rtl_priv(hw); u8 boxcontent[4], boxextcontent[2]; u16 box_reg = 0, box_extreg = 0; - u8 wait_writeh2c_limmit = 100; + u8 wait_writeh2c_limit = 100; bool bwrite_success = false; - u8 wait_h2c_limmit = 100; + u8 wait_h2c_limit = 100; u32 h2c_waitcounter = 0; bool isfw_read = false; unsigned long flag; @@ -261,8 +261,8 @@ void rtl92d_fill_h2c_cmd(struct ieee80211_hw *hw, } while (!bwrite_success) { - wait_writeh2c_limmit--; - if (wait_writeh2c_limmit == 0) { + wait_writeh2c_limit--; + if (wait_writeh2c_limit == 0) { pr_err("Write H2C fail because no trigger for FW INT!\n"); break; } @@ -278,8 +278,8 @@ void rtl92d_fill_h2c_cmd(struct ieee80211_hw *hw, isfw_read = _rtl92d_check_fw_read_last_h2c(hw, boxnum); while (!isfw_read) { - wait_h2c_limmit--; - if (wait_h2c_limmit == 0) { + wait_h2c_limit--; + if (wait_h2c_limit == 0) { rtl_dbg(rtlpriv, COMP_CMD, DBG_LOUD, "Waiting too long for FW read clear HMEBox(%d)!\n", boxnum); From 177520960c012cf223002279c6454726ea6fa73e Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Tue, 10 Mar 2026 16:01:35 +0800 Subject: [PATCH 0546/1561] wifi: rtw89: pci: update SER parameters for suspend/resume In suspend mode, SER timer unit is different from normal mode. Set proper value to prevent expected SER happened during suspend. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260310080146.31113-3-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/pci.h | 1 + drivers/net/wireless/realtek/rtw89/pci_be.c | 52 +++++++++++++++++++-- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/pci.h b/drivers/net/wireless/realtek/rtw89/pci.h index ccfa6d33623a..dc488d9a8884 100644 --- a/drivers/net/wireless/realtek/rtw89/pci.h +++ b/drivers/net/wireless/realtek/rtw89/pci.h @@ -1016,6 +1016,7 @@ #define B_BE_PL1_IGNORE_HOT_RST BIT(30) #define B_BE_PL1_TIMER_UNIT_MASK GENMASK(19, 17) #define PCIE_SER_TIMER_UNIT 0x2 +#define PCIE_SER_WOW_TIMER_UNIT 0x4 #define B_BE_PL1_TIMER_CLEAR BIT(0) #define R_BE_REG_PL1_MASK 0x34B0 diff --git a/drivers/net/wireless/realtek/rtw89/pci_be.c b/drivers/net/wireless/realtek/rtw89/pci_be.c index 114f40c6c31b..dea01d9e57fd 100644 --- a/drivers/net/wireless/realtek/rtw89/pci_be.c +++ b/drivers/net/wireless/realtek/rtw89/pci_be.c @@ -721,12 +721,24 @@ static int __maybe_unused rtw89_pci_suspend_be(struct device *dev) { struct ieee80211_hw *hw = dev_get_drvdata(dev); struct rtw89_dev *rtwdev = hw->priv; + u32 val32; rtw89_write32_set(rtwdev, R_BE_RSV_CTRL, B_BE_WLOCK_1C_BIT6); rtw89_write32_set(rtwdev, R_BE_RSV_CTRL, B_BE_R_DIS_PRST); rtw89_write32_clr(rtwdev, R_BE_RSV_CTRL, B_BE_WLOCK_1C_BIT6); rtw89_write32_set(rtwdev, R_BE_PCIE_FRZ_CLK, B_BE_PCIE_FRZ_REG_RST); - rtw89_write32_clr(rtwdev, R_BE_REG_PL1_MASK, B_BE_SER_PM_MASTER_IMR); + + val32 = rtw89_read32(rtwdev, R_BE_SER_PL1_CTRL); + if (val32 & B_BE_PL1_SER_PL1_EN) { + val32 = u32_replace_bits(val32, PCIE_SER_WOW_TIMER_UNIT, + B_BE_PL1_TIMER_UNIT_MASK); + rtw89_write32(rtwdev, R_BE_SER_PL1_CTRL, val32); + + if (rtwdev->chip->chip_id == RTL8922A) + rtw89_write32_clr(rtwdev, R_BE_REG_PL1_MASK, + B_BE_SER_PM_MASTER_IMR); + } + return 0; } @@ -735,12 +747,19 @@ static int __maybe_unused rtw89_pci_resume_be(struct device *dev) struct ieee80211_hw *hw = dev_get_drvdata(dev); struct rtw89_dev *rtwdev = hw->priv; u32 polling; + u32 val32; + u16 val16; int ret; rtw89_write32_set(rtwdev, R_BE_RSV_CTRL, B_BE_WLOCK_1C_BIT6); rtw89_write32_clr(rtwdev, R_BE_RSV_CTRL, B_BE_R_DIS_PRST); rtw89_write32_clr(rtwdev, R_BE_RSV_CTRL, B_BE_WLOCK_1C_BIT6); rtw89_write32_clr(rtwdev, R_BE_PCIE_FRZ_CLK, B_BE_PCIE_FRZ_REG_RST); + + val32 = rtw89_read32(rtwdev, R_BE_SER_PL1_CTRL); + if (!(val32 & B_BE_PL1_SER_PL1_EN)) + goto clear_phy_isr; + rtw89_write32_clr(rtwdev, R_BE_SER_PL1_CTRL, B_BE_PL1_SER_PL1_EN); ret = read_poll_timeout_atomic(rtw89_read32, polling, !polling, 1, 1000, @@ -748,8 +767,35 @@ static int __maybe_unused rtw89_pci_resume_be(struct device *dev) if (ret) rtw89_warn(rtwdev, "[ERR] PCIE SER clear polling fail\n"); - rtw89_write32_set(rtwdev, R_BE_SER_PL1_CTRL, B_BE_PL1_SER_PL1_EN); - rtw89_write32_set(rtwdev, R_BE_REG_PL1_MASK, B_BE_SER_PM_MASTER_IMR); + if (rtwdev->chip->chip_id == RTL8922A) + rtw89_write32_set(rtwdev, R_BE_REG_PL1_MASK, + B_BE_SER_PM_MASTER_IMR | B_BE_SER_PCLKREQ_ACK_MASK); + + val32 = rtw89_read32(rtwdev, R_BE_SER_PL1_CTRL); + val32 = u32_replace_bits(val32, PCIE_SER_TIMER_UNIT, B_BE_PL1_TIMER_UNIT_MASK); + val32 |= B_BE_PL1_SER_PL1_EN; + rtw89_write32(rtwdev, R_BE_SER_PL1_CTRL, val32); + +clear_phy_isr: + if (rtwdev->chip->chip_id == RTL8922D) { + val16 = rtw89_read16(rtwdev, RAC_DIRECT_OFFESET_L0_G2 + + RAC_ANA41 * RAC_MULT); + if (val16 & PHY_ERR_FLAG_EN) { + rtw89_write16_clr(rtwdev, RAC_DIRECT_OFFESET_L0_G2 + + RAC_ANA41 * RAC_MULT, PHY_ERR_FLAG_EN); + rtw89_write16_set(rtwdev, RAC_DIRECT_OFFESET_L0_G2 + + RAC_ANA41 * RAC_MULT, PHY_ERR_FLAG_EN); + } + + val16 = rtw89_read16(rtwdev, RAC_DIRECT_OFFESET_L0_G1 + + RAC_ANA41 * RAC_MULT); + if (val16 & PHY_ERR_FLAG_EN) { + rtw89_write16_clr(rtwdev, RAC_DIRECT_OFFESET_L0_G1 + + RAC_ANA41 * RAC_MULT, PHY_ERR_FLAG_EN); + rtw89_write16_set(rtwdev, RAC_DIRECT_OFFESET_L0_G1 + + RAC_ANA41 * RAC_MULT, PHY_ERR_FLAG_EN); + } + } rtw89_pci_basic_cfg(rtwdev, true); From 9a38ef92aaa2d3c02ae1f6f1cacc3d3a8cf19db6 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Tue, 10 Mar 2026 16:01:36 +0800 Subject: [PATCH 0547/1561] wifi: rtw89: mac: remove A-die off setting for RTL8852C and RTL8922A Fix timing issue of A-die off followed by XTAL off. Otherwise, device might get lost potentially. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260310080146.31113-4-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/rtw8852c.c | 2 +- drivers/net/wireless/realtek/rtw89/rtw8922a.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852c.c b/drivers/net/wireless/realtek/rtw89/rtw8852c.c index e62a7288c8aa..7ea0a8905282 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852c.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852c.c @@ -463,7 +463,7 @@ static int rtw8852c_pwr_off_func(struct rtw89_dev *rtwdev) else if (rtwdev->hci.type == RTW89_HCI_TYPE_USB) rtw89_write32_clr(rtwdev, R_AX_SYS_PW_CTRL, B_AX_SOP_EDSWR); - rtw89_write32_set(rtwdev, R_AX_SYS_PW_CTRL, B_AX_XTAL_OFF_A_DIE); + rtw89_write32_clr(rtwdev, R_AX_SYS_PW_CTRL, B_AX_XTAL_OFF_A_DIE); rtw89_write32_set(rtwdev, R_AX_SYS_SWR_CTRL1, B_AX_SYM_CTRL_SPS_PWMFREQ); rtw89_write32_mask(rtwdev, R_AX_SPS_DIG_ON_CTRL0, B_AX_REG_ZCDC_H_MASK, 0x3); diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922a.c b/drivers/net/wireless/realtek/rtw89/rtw8922a.c index 36ef36110602..a489aaf90f23 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922a.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922a.c @@ -492,7 +492,7 @@ static int rtw8922a_pwr_off_func(struct rtw89_dev *rtwdev) return ret; rtw89_write32(rtwdev, R_BE_WLLPS_CTRL, 0x0000A1B2); - rtw89_write32_set(rtwdev, R_BE_SYS_PW_CTRL, B_BE_XTAL_OFF_A_DIE); + rtw89_write32_clr(rtwdev, R_BE_SYS_PW_CTRL, B_BE_XTAL_OFF_A_DIE); rtw89_write32_set(rtwdev, R_BE_SYS_PW_CTRL, B_BE_APFM_SWLPS); rtw89_write32(rtwdev, R_BE_UDM1, 0); From 84f5e0eaf84ec55edfc053e858d58af074e3fcb4 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Tue, 10 Mar 2026 16:01:37 +0800 Subject: [PATCH 0548/1561] wifi: rtw89: phy: limit AMPDU number for RA try rate When RA (Rate Adaptive) does try higher rate, a TRY bit is flagged, and hardware will reference registers configured by this patch as maximum number of AMPDU. To prevent aggregate too many MPDU over peer's capability causing loss in peer side, set the minimum values across all stations and TID since there is single one register per hardware band. Consider MLD case, a BA session can run across two hardware bands, so set the same value as well. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260310080146.31113-5-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/mac.c | 2 ++ drivers/net/wireless/realtek/rtw89/mac.h | 1 + drivers/net/wireless/realtek/rtw89/mac80211.c | 3 +++ drivers/net/wireless/realtek/rtw89/mac_be.c | 2 ++ drivers/net/wireless/realtek/rtw89/phy.c | 27 +++++++++++++++++++ drivers/net/wireless/realtek/rtw89/phy.h | 1 + 6 files changed, 36 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/mac.c b/drivers/net/wireless/realtek/rtw89/mac.c index 234928613ef4..a292a14394b2 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.c +++ b/drivers/net/wireless/realtek/rtw89/mac.c @@ -7319,6 +7319,8 @@ const struct rtw89_mac_gen_def rtw89_mac_gen_ax = { }, .wow_ctrl = {.addr = R_AX_WOW_CTRL, .mask = B_AX_WOW_WOWEN,}, .agg_limit = {.addr = R_AX_AMPDU_AGG_LIMIT, .mask = B_AX_AMPDU_MAX_TIME_MASK,}, + .ra_agg_limit = {.addr = R_AX_AMPDU_AGG_LIMIT, + .mask = B_AX_RA_TRY_RATE_AGG_LMT_MASK,}, .txcnt_limit = {.addr = R_AX_TXCNT, .mask = B_AX_L_TXCNT_LMT_MASK,}, .check_mac_en = rtw89_mac_check_mac_en_ax, diff --git a/drivers/net/wireless/realtek/rtw89/mac.h b/drivers/net/wireless/realtek/rtw89/mac.h index e6b715b95409..9c77bfaa34ee 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.h +++ b/drivers/net/wireless/realtek/rtw89/mac.h @@ -1037,6 +1037,7 @@ struct rtw89_mac_gen_def { struct rtw89_reg_def narrow_bw_ru_dis; struct rtw89_reg_def wow_ctrl; struct rtw89_reg_def agg_limit; + struct rtw89_reg_def ra_agg_limit; struct rtw89_reg_def txcnt_limit; int (*check_mac_en)(struct rtw89_dev *rtwdev, u8 band, diff --git a/drivers/net/wireless/realtek/rtw89/mac80211.c b/drivers/net/wireless/realtek/rtw89/mac80211.c index 1ef73bfc40d1..cd8e2c8de888 100644 --- a/drivers/net/wireless/realtek/rtw89/mac80211.c +++ b/drivers/net/wireless/realtek/rtw89/mac80211.c @@ -1005,6 +1005,8 @@ static int rtw89_ops_ampdu_action(struct ieee80211_hw *hw, clear_bit(tid, rtwsta->ampdu_map); rtw89_chip_h2c_ampdu_cmac_tbl(rtwdev, rtwvif, rtwsta); ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid); + rtw89_leave_ps_mode(rtwdev); + rtw89_phy_ra_recalc_agg_limit(rtwdev); break; case IEEE80211_AMPDU_TX_OPERATIONAL: set_bit(RTW89_TXQ_F_AMPDU, &rtwtxq->flags); @@ -1013,6 +1015,7 @@ static int rtw89_ops_ampdu_action(struct ieee80211_hw *hw, set_bit(tid, rtwsta->ampdu_map); rtw89_leave_ps_mode(rtwdev); rtw89_chip_h2c_ampdu_cmac_tbl(rtwdev, rtwvif, rtwsta); + rtw89_phy_ra_recalc_agg_limit(rtwdev); break; case IEEE80211_AMPDU_RX_START: rtw89_chip_h2c_ba_cam(rtwdev, rtwsta, true, params); diff --git a/drivers/net/wireless/realtek/rtw89/mac_be.c b/drivers/net/wireless/realtek/rtw89/mac_be.c index dc66b1ee851a..39a28fd27412 100644 --- a/drivers/net/wireless/realtek/rtw89/mac_be.c +++ b/drivers/net/wireless/realtek/rtw89/mac_be.c @@ -3193,6 +3193,8 @@ const struct rtw89_mac_gen_def rtw89_mac_gen_be = { }, .wow_ctrl = {.addr = R_BE_WOW_CTRL, .mask = B_BE_WOW_WOWEN,}, .agg_limit = {.addr = R_BE_AMPDU_AGG_LIMIT, .mask = B_BE_AMPDU_MAX_TIME_MASK,}, + .ra_agg_limit = {.addr = R_BE_AMPDU_AGG_LIMIT, + .mask = B_BE_RA_TRY_RATE_AGG_LMT_MASK,}, .txcnt_limit = {.addr = R_BE_TXCNT, .mask = B_BE_L_TXCNT_LMT_MASK,}, .check_mac_en = rtw89_mac_check_mac_en_be, diff --git a/drivers/net/wireless/realtek/rtw89/phy.c b/drivers/net/wireless/realtek/rtw89/phy.c index 74f5d5562848..0fa4d8d791f1 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.c +++ b/drivers/net/wireless/realtek/rtw89/phy.c @@ -775,6 +775,33 @@ void rtw89_phy_ra_assoc(struct rtw89_dev *rtwdev, struct rtw89_sta_link *rtwsta_ rtw89_fw_h2c_ra(rtwdev, ra, csi); } +void rtw89_phy_ra_recalc_agg_limit(struct rtw89_dev *rtwdev) +{ + const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def; + const struct rtw89_reg_def *ra_limit = &mac->ra_agg_limit; + struct ieee80211_sta *sta; + struct rtw89_sta *rtwsta; + u16 agg_num = U16_MAX; + u8 tid; + + for_each_station(sta, rtwdev->hw) { + rtwsta = sta_to_rtwsta(sta); + + for_each_set_bit(tid, rtwsta->ampdu_map, IEEE80211_NUM_TIDS) + agg_num = min(agg_num, rtwsta->ampdu_params[tid].agg_num); + } + + if (agg_num == U16_MAX) + agg_num = 0x3F; + else + agg_num = clamp(agg_num, 1, 256) - 1; + + rtw89_write32_idx(rtwdev, ra_limit->addr, ra_limit->mask, agg_num, RTW89_MAC_0); + if (!rtwdev->dbcc_en) + return; + rtw89_write32_idx(rtwdev, ra_limit->addr, ra_limit->mask, agg_num, RTW89_MAC_1); +} + u8 rtw89_phy_get_txsc(struct rtw89_dev *rtwdev, const struct rtw89_chan *chan, enum rtw89_bandwidth dbw) diff --git a/drivers/net/wireless/realtek/rtw89/phy.h b/drivers/net/wireless/realtek/rtw89/phy.h index 094c7e45f254..bde419edf744 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.h +++ b/drivers/net/wireless/realtek/rtw89/phy.h @@ -1006,6 +1006,7 @@ void rtw89_phy_ra_update_sta(struct rtw89_dev *rtwdev, struct ieee80211_sta *sta void rtw89_phy_ra_update_sta_link(struct rtw89_dev *rtwdev, struct rtw89_sta_link *rtwsta_link, u32 changed); +void rtw89_phy_ra_recalc_agg_limit(struct rtw89_dev *rtwdev); void rtw89_phy_rate_pattern_vif(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif, const struct cfg80211_bitrate_mask *mask); From be28b2c4eed490c95c2ca2ebacbf7795912b3ec6 Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Tue, 10 Mar 2026 16:01:38 +0800 Subject: [PATCH 0549/1561] wifi: rtw89: move disabling dynamic mechanism functions to core Some dynamic mechanism (DM) may need to be disabled during some normal processes rather than debugging. For example, should not do MLSR switch during SCAN/ROC or even MCC. So, move the disabling DM functions to core for impending uses. No logic changes. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260310080146.31113-6-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.c | 29 ++++++++++++++++++ drivers/net/wireless/realtek/rtw89/core.h | 3 ++ drivers/net/wireless/realtek/rtw89/debug.c | 35 ++-------------------- 3 files changed, 35 insertions(+), 32 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c index 9d9b91570989..81004ef18168 100644 --- a/drivers/net/wireless/realtek/rtw89/core.c +++ b/drivers/net/wireless/realtek/rtw89/core.c @@ -4713,6 +4713,35 @@ static void rtw89_track_work(struct wiphy *wiphy, struct wiphy_work *work) rtw89_enter_lps_track(rtwdev); } +void rtw89_core_dm_disable_cfg(struct rtw89_dev *rtwdev, u32 new) +{ + struct rtw89_hal *hal = &rtwdev->hal; + u32 old = hal->disabled_dm_bitmap; + + if (new == old) + return; + + hal->disabled_dm_bitmap = new; + + rtw89_debug(rtwdev, RTW89_DBG_STATE, "Disable DM: 0x%x -> 0x%x\n", old, new); +} + +void rtw89_core_dm_disable_set(struct rtw89_dev *rtwdev, enum rtw89_dm_type type) +{ + struct rtw89_hal *hal = &rtwdev->hal; + u32 cur = hal->disabled_dm_bitmap; + + rtw89_core_dm_disable_cfg(rtwdev, cur | BIT(type)); +} + +void rtw89_core_dm_disable_clr(struct rtw89_dev *rtwdev, enum rtw89_dm_type type) +{ + struct rtw89_hal *hal = &rtwdev->hal; + u32 cur = hal->disabled_dm_bitmap; + + rtw89_core_dm_disable_cfg(rtwdev, cur & ~BIT(type)); +} + u8 rtw89_core_acquire_bit_map(unsigned long *addr, unsigned long size) { unsigned long bit; diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index cf0cc718f41c..05f8ad6d3034 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -7862,5 +7862,8 @@ void rtw89_core_update_p2p_ps(struct rtw89_dev *rtwdev, void rtw89_core_ntfy_btc_event(struct rtw89_dev *rtwdev, enum rtw89_btc_hmsg event); int rtw89_core_mlsr_switch(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif, unsigned int link_id); +void rtw89_core_dm_disable_cfg(struct rtw89_dev *rtwdev, u32 new); +void rtw89_core_dm_disable_set(struct rtw89_dev *rtwdev, enum rtw89_dm_type type); +void rtw89_core_dm_disable_clr(struct rtw89_dev *rtwdev, enum rtw89_dm_type type); #endif diff --git a/drivers/net/wireless/realtek/rtw89/debug.c b/drivers/net/wireless/realtek/rtw89/debug.c index 3e16ed2c4570..aee0f25e036a 100644 --- a/drivers/net/wireless/realtek/rtw89/debug.c +++ b/drivers/net/wireless/realtek/rtw89/debug.c @@ -4333,35 +4333,6 @@ static ssize_t rtw89_debug_priv_stations_get(struct rtw89_dev *rtwdev, return p - buf; } -static void rtw89_debug_disable_dm_cfg_bmap(struct rtw89_dev *rtwdev, u32 new) -{ - struct rtw89_hal *hal = &rtwdev->hal; - u32 old = hal->disabled_dm_bitmap; - - if (new == old) - return; - - hal->disabled_dm_bitmap = new; - - rtw89_debug(rtwdev, RTW89_DBG_STATE, "Disable DM: 0x%x -> 0x%x\n", old, new); -} - -static void rtw89_debug_disable_dm_set_flag(struct rtw89_dev *rtwdev, u8 flag) -{ - struct rtw89_hal *hal = &rtwdev->hal; - u32 cur = hal->disabled_dm_bitmap; - - rtw89_debug_disable_dm_cfg_bmap(rtwdev, cur | BIT(flag)); -} - -static void rtw89_debug_disable_dm_clr_flag(struct rtw89_dev *rtwdev, u8 flag) -{ - struct rtw89_hal *hal = &rtwdev->hal; - u32 cur = hal->disabled_dm_bitmap; - - rtw89_debug_disable_dm_cfg_bmap(rtwdev, cur & ~BIT(flag)); -} - #define DM_INFO(type) {RTW89_DM_ ## type, #type} static const struct rtw89_disabled_dm_info { @@ -4412,7 +4383,7 @@ rtw89_debug_priv_disable_dm_set(struct rtw89_dev *rtwdev, if (ret) return -EINVAL; - rtw89_debug_disable_dm_cfg_bmap(rtwdev, conf); + rtw89_core_dm_disable_cfg(rtwdev, conf); return count; } @@ -4475,7 +4446,7 @@ rtw89_debug_priv_mlo_mode_set(struct rtw89_dev *rtwdev, if (num != 2) return -EINVAL; - rtw89_debug_disable_dm_set_flag(rtwdev, RTW89_DM_MLO); + rtw89_core_dm_disable_set(rtwdev, RTW89_DM_MLO); rtw89_debug(rtwdev, RTW89_DBG_STATE, "Set MLO mode to %x\n", mlo_mode); @@ -4485,7 +4456,7 @@ rtw89_debug_priv_mlo_mode_set(struct rtw89_dev *rtwdev, break; default: rtw89_debug(rtwdev, RTW89_DBG_STATE, "Unsupported MLO mode\n"); - rtw89_debug_disable_dm_clr_flag(rtwdev, RTW89_DM_MLO); + rtw89_core_dm_disable_clr(rtwdev, RTW89_DM_MLO); return -EOPNOTSUPP; } From 4516621686cb3c628c2094f38e622a94958b335e Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Tue, 10 Mar 2026 16:01:39 +0800 Subject: [PATCH 0550/1561] wifi: rtw89: tweak settings of TX power and channel for Wi-Fi 7 The support_mlo flag depends on FW features, so it's determined at runtime. Since Wi-Fi 7 chip now needs to initialize second HW band, if support_mlo is not allowed, second HW band might act without settings of TX power and channel. So, set that for Wi-Fi 7 chip. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260310080146.31113-7-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c index 81004ef18168..6ab99a3577f3 100644 --- a/drivers/net/wireless/realtek/rtw89/core.c +++ b/drivers/net/wireless/realtek/rtw89/core.c @@ -463,7 +463,7 @@ void rtw89_core_set_chip_txpwr(struct rtw89_dev *rtwdev) chan = rtw89_mgnt_chan_get(rtwdev, 0); __rtw89_core_set_chip_txpwr(rtwdev, chan, RTW89_PHY_0); - if (!rtwdev->support_mlo) + if (rtwdev->chip->chip_gen == RTW89_CHIP_AX) return; chan = rtw89_mgnt_chan_get(rtwdev, 1); @@ -558,7 +558,7 @@ int rtw89_set_channel(struct rtw89_dev *rtwdev) chan = rtw89_mgnt_chan_get(rtwdev, 0); __rtw89_set_channel(rtwdev, chan, RTW89_MAC_0, RTW89_PHY_0); - if (!rtwdev->support_mlo) + if (rtwdev->chip->chip_gen == RTW89_CHIP_AX) return 0; chan = rtw89_mgnt_chan_get(rtwdev, 1); From 2fed8de4eb98764e559189eb3bac9d3f2954cb95 Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Tue, 10 Mar 2026 16:01:40 +0800 Subject: [PATCH 0551/1561] wifi: rtw89: chan: simplify link handling related to ROC The original channel is swapped out for the target channel during ROC. And, all vifs/links accessing the original channel will be marked with off-channel. So, it doesn't seem necessary for chan.c to determine which link instance it is. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260310080146.31113-8-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/chan.c | 11 ----------- drivers/net/wireless/realtek/rtw89/core.h | 1 - 2 files changed, 12 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/chan.c b/drivers/net/wireless/realtek/rtw89/chan.c index 9b2f6f0a00fd..def9e4f3af59 100644 --- a/drivers/net/wireless/realtek/rtw89/chan.c +++ b/drivers/net/wireless/realtek/rtw89/chan.c @@ -276,7 +276,6 @@ void rtw89_config_roc_chandef(struct rtw89_dev *rtwdev, } hal->roc_chandef = *chandef; - hal->roc_link_index = rtw89_vif_link_inst_get_index(rtwvif_link); } else { cur = atomic_cmpxchg(&hal->roc_chanctx_idx, idx, RTW89_CHANCTX_IDLE); @@ -389,7 +388,6 @@ const struct rtw89_chan *__rtw89_mgnt_chan_get(struct rtw89_dev *rtwdev, struct rtw89_hal *hal = &rtwdev->hal; struct rtw89_entity_mgnt *mgnt = &hal->entity_mgnt; enum rtw89_chanctx_idx chanctx_idx; - enum rtw89_chanctx_idx roc_idx; enum rtw89_entity_mode mode; u8 role_index; @@ -419,15 +417,6 @@ const struct rtw89_chan *__rtw89_mgnt_chan_get(struct rtw89_dev *rtwdev, if (chanctx_idx == RTW89_CHANCTX_IDLE) goto dflt; - roc_idx = atomic_read(&hal->roc_chanctx_idx); - if (roc_idx != RTW89_CHANCTX_IDLE) { - /* ROC is ongoing (given ROC runs on @hal->roc_link_index). - * If @link_index is the same, get the ongoing ROC chanctx. - */ - if (link_index == hal->roc_link_index) - chanctx_idx = roc_idx; - } - return rtw89_chan_get(rtwdev, chanctx_idx); dflt: diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index 05f8ad6d3034..01573150ab3c 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -5170,7 +5170,6 @@ struct rtw89_hal { bool no_eht; atomic_t roc_chanctx_idx; - u8 roc_link_index; DECLARE_BITMAP(changes, NUM_OF_RTW89_CHANCTX_CHANGES); DECLARE_BITMAP(entity_map, NUM_OF_RTW89_CHANCTX); From cf3cd3687d8a9ff2940d97dc649c53c62458d3e9 Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Tue, 10 Mar 2026 16:01:41 +0800 Subject: [PATCH 0552/1561] wifi: rtw89: chan: recalc MLO DBCC mode based on current entity mode Since MLD vif can do MLSR switch, it may not always run on HW band 0. But when preparing MCC for MLD + P2P, P2P vif needs to use HW band 0 to handle connection, i.e. uses of HW bands may be different by vif. The current major role/vif can be indicated through entity mode. So, based on it, recalculate MLO DBCC mode to change use of HW band. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260310080146.31113-9-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/chan.c | 61 +++++++++++++++-------- 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/chan.c b/drivers/net/wireless/realtek/rtw89/chan.c index def9e4f3af59..ceb399fc2b94 100644 --- a/drivers/net/wireless/realtek/rtw89/chan.c +++ b/drivers/net/wireless/realtek/rtw89/chan.c @@ -381,6 +381,23 @@ static void rtw89_normalize_link_chanctx(struct rtw89_dev *rtwdev, rtw89_swap_chanctx(rtwdev, rtwvif_link->chanctx_idx, cur->chanctx_idx); } +static u8 rtw89_entity_role_get_index(struct rtw89_dev *rtwdev) +{ + enum rtw89_entity_mode mode; + + mode = rtw89_get_entity_mode(rtwdev); + switch (mode) { + default: + WARN(1, "Invalid ent mode: %d\n", mode); + fallthrough; + case RTW89_ENTITY_MODE_SCC_OR_SMLD: + case RTW89_ENTITY_MODE_MCC: + return 0; + case RTW89_ENTITY_MODE_MCC_PREPARE: + return 1; + } +} + const struct rtw89_chan *__rtw89_mgnt_chan_get(struct rtw89_dev *rtwdev, const char *caller_message, u8 link_index, bool nullchk) @@ -388,7 +405,6 @@ const struct rtw89_chan *__rtw89_mgnt_chan_get(struct rtw89_dev *rtwdev, struct rtw89_hal *hal = &rtwdev->hal; struct rtw89_entity_mgnt *mgnt = &hal->entity_mgnt; enum rtw89_chanctx_idx chanctx_idx; - enum rtw89_entity_mode mode; u8 role_index; lockdep_assert_wiphy(rtwdev->hw->wiphy); @@ -399,19 +415,7 @@ const struct rtw89_chan *__rtw89_mgnt_chan_get(struct rtw89_dev *rtwdev, goto dflt; } - mode = rtw89_get_entity_mode(rtwdev); - switch (mode) { - case RTW89_ENTITY_MODE_SCC_OR_SMLD: - case RTW89_ENTITY_MODE_MCC: - role_index = 0; - break; - case RTW89_ENTITY_MODE_MCC_PREPARE: - role_index = 1; - break; - default: - WARN(1, "Invalid ent mode: %d\n", mode); - goto dflt; - } + role_index = rtw89_entity_role_get_index(rtwdev); chanctx_idx = mgnt->chanctx_tbl[role_index][link_index]; if (chanctx_idx == RTW89_CHANCTX_IDLE) @@ -479,10 +483,28 @@ rtw89_entity_sel_mlo_dbcc_mode(struct rtw89_dev *rtwdev, u8 active_hws) } } -static -void rtw89_entity_recalc_mlo_dbcc_mode(struct rtw89_dev *rtwdev, u8 active_hws) +static void rtw89_entity_recalc_mlo_dbcc_mode(struct rtw89_dev *rtwdev) { + struct rtw89_entity_mgnt *mgnt = &rtwdev->hal.entity_mgnt; enum rtw89_mlo_dbcc_mode mode; + struct rtw89_vif *role; + u8 active_hws = 0; + u8 ridx; + + ridx = rtw89_entity_role_get_index(rtwdev); + role = mgnt->active_roles[ridx]; + if (role) { + struct rtw89_vif_link *link; + int i; + + for (i = 0; i < role->links_inst_valid_num; i++) { + link = rtw89_vif_get_link_inst(role, i); + if (!link || !link->chanctx_assigned) + continue; + + active_hws |= BIT(i); + } + } mode = rtw89_entity_sel_mlo_dbcc_mode(rtwdev, active_hws); rtwdev->mlo_dbcc_mode = mode; @@ -496,7 +518,6 @@ static void rtw89_entity_recalc_mgnt_roles(struct rtw89_dev *rtwdev) struct rtw89_entity_mgnt *mgnt = &hal->entity_mgnt; struct rtw89_vif_link *link; struct rtw89_vif *role; - u8 active_hws = 0; u8 pos = 0; int i, j; @@ -545,13 +566,10 @@ fill: continue; mgnt->chanctx_tbl[pos][i] = link->chanctx_idx; - active_hws |= BIT(i); } mgnt->active_roles[pos++] = role; } - - rtw89_entity_recalc_mlo_dbcc_mode(rtwdev, active_hws); } enum rtw89_entity_mode rtw89_entity_recalc(struct rtw89_dev *rtwdev) @@ -621,6 +639,9 @@ enum rtw89_entity_mode rtw89_entity_recalc(struct rtw89_dev *rtwdev) return rtw89_get_entity_mode(rtwdev); rtw89_set_entity_mode(rtwdev, mode); + + rtw89_entity_recalc_mlo_dbcc_mode(rtwdev); + return mode; } From 72dbc78594a19b78467d7a5ea67a0b557e505667 Mon Sep 17 00:00:00 2001 From: Chin-Yen Lee Date: Tue, 10 Mar 2026 16:01:42 +0800 Subject: [PATCH 0553/1561] wifi: rtw89: wow: add retry for ensuring packet are processed Before entering WoWLAN mode, the driver must ensure that all received packets have been processed to prevent packet loss. Consequently, a retry mechanism has been implemented to guarantee completion. Signed-off-by: Chin-Yen Lee Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260310080146.31113-10-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/mac.c | 15 ++++++++++++++- drivers/net/wireless/realtek/rtw89/mac.h | 1 + 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw89/mac.c b/drivers/net/wireless/realtek/rtw89/mac.c index a292a14394b2..35fd18fe6470 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.c +++ b/drivers/net/wireless/realtek/rtw89/mac.c @@ -7183,7 +7183,7 @@ int rtw89_mac_ptk_drop_by_band_and_wait(struct rtw89_dev *rtwdev, return ret; } -int rtw89_mac_cpu_io_rx(struct rtw89_dev *rtwdev, bool wow_enable) +static int _rtw89_mac_cpu_io_rx(struct rtw89_dev *rtwdev, bool wow_enable) { struct rtw89_mac_h2c_info h2c_info = {}; struct rtw89_mac_c2h_info c2h_info = {}; @@ -7206,6 +7206,19 @@ int rtw89_mac_cpu_io_rx(struct rtw89_dev *rtwdev, bool wow_enable) return ret; } +int rtw89_mac_cpu_io_rx(struct rtw89_dev *rtwdev, bool wow_enable) +{ + int i, ret; + + for (i = 0; i < CPU_IO_RX_RETRY_CNT; i++) { + ret = _rtw89_mac_cpu_io_rx(rtwdev, wow_enable); + if (!ret) + return 0; + } + + return ret; +} + static int rtw89_wow_config_mac_ax(struct rtw89_dev *rtwdev, bool enable_wow) { const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def; diff --git a/drivers/net/wireless/realtek/rtw89/mac.h b/drivers/net/wireless/realtek/rtw89/mac.h index 9c77bfaa34ee..88a877556cb3 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.h +++ b/drivers/net/wireless/realtek/rtw89/mac.h @@ -17,6 +17,7 @@ #define BSSID_CAM_ENT_SIZE 0x08 #define HFC_PAGE_UNIT 64 #define RPWM_TRY_CNT 3 +#define CPU_IO_RX_RETRY_CNT 3 enum rtw89_mac_hwmod_sel { RTW89_DMAC_SEL = 0, From 829b89c2b08ff376eb4c5edc66363ea09ba99138 Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Tue, 10 Mar 2026 16:01:43 +0800 Subject: [PATCH 0554/1561] wifi: rtw89: replace RF mutex with wiphy lock assertion Now, stack has introduced wiphy lock. And, the normal paths calling RF read/write should be under wiphy lock. So, replace RF mutex with wiphy lock assertion. Besides, in dbgfs paths, add the corresponding lock option. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260310080146.31113-11-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.c | 2 -- drivers/net/wireless/realtek/rtw89/core.h | 14 ++++---------- drivers/net/wireless/realtek/rtw89/debug.c | 6 +++--- 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c index 6ab99a3577f3..18dbf3664f0a 100644 --- a/drivers/net/wireless/realtek/rtw89/core.c +++ b/drivers/net/wireless/realtek/rtw89/core.c @@ -6147,7 +6147,6 @@ int rtw89_core_init(struct rtw89_dev *rtwdev) return -ENOMEM; spin_lock_init(&rtwdev->ba_lock); spin_lock_init(&rtwdev->rpwm_lock); - mutex_init(&rtwdev->rf_mutex); rtwdev->total_sta_assoc = 0; rtw89_init_wait(&rtwdev->mcc.wait); @@ -6206,7 +6205,6 @@ void rtw89_core_deinit(struct rtw89_dev *rtwdev) __rtw89_fw_free_all_early_h2c(rtwdev); destroy_workqueue(rtwdev->txq_wq); - mutex_destroy(&rtwdev->rf_mutex); } EXPORT_SYMBOL(rtw89_core_deinit); diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index 01573150ab3c..ce04ecaa3a5e 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -6182,8 +6182,6 @@ struct rtw89_dev { refcount_t refcount_ap_info; struct list_head rtwvifs_list; - /* used to protect rf read write */ - struct mutex rf_mutex; struct workqueue_struct *txq_wq; struct work_struct txq_work; struct delayed_work txq_reinvoke_work; @@ -6808,22 +6806,18 @@ static inline u32 rtw89_read_rf(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_path, u32 addr, u32 mask) { - u32 val; + lockdep_assert_wiphy(rtwdev->hw->wiphy); - mutex_lock(&rtwdev->rf_mutex); - val = rtwdev->chip->ops->read_rf(rtwdev, rf_path, addr, mask); - mutex_unlock(&rtwdev->rf_mutex); - - return val; + return rtwdev->chip->ops->read_rf(rtwdev, rf_path, addr, mask); } static inline void rtw89_write_rf(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_path, u32 addr, u32 mask, u32 data) { - mutex_lock(&rtwdev->rf_mutex); + lockdep_assert_wiphy(rtwdev->hw->wiphy); + rtwdev->chip->ops->write_rf(rtwdev, rf_path, addr, mask, data); - mutex_unlock(&rtwdev->rf_mutex); } static inline u32 rtw89_read32_pci_cfg(struct rtw89_dev *rtwdev, u32 addr) diff --git a/drivers/net/wireless/realtek/rtw89/debug.c b/drivers/net/wireless/realtek/rtw89/debug.c index aee0f25e036a..d461ffc6dc9e 100644 --- a/drivers/net/wireless/realtek/rtw89/debug.c +++ b/drivers/net/wireless/realtek/rtw89/debug.c @@ -4859,9 +4859,9 @@ rtw89_debug_priv_beacon_info_get(struct rtw89_dev *rtwdev, static const struct rtw89_debugfs rtw89_debugfs_templ = { .read_reg = rtw89_debug_priv_select_and_get(read_reg), .write_reg = rtw89_debug_priv_set(write_reg), - .read_rf = rtw89_debug_priv_select_and_get(read_rf), - .write_rf = rtw89_debug_priv_set(write_rf), - .rf_reg_dump = rtw89_debug_priv_get(rf_reg_dump, RSIZE_8K), + .read_rf = rtw89_debug_priv_select_and_get(read_rf, RLOCK), + .write_rf = rtw89_debug_priv_set(write_rf, WLOCK), + .rf_reg_dump = rtw89_debug_priv_get(rf_reg_dump, RSIZE_8K, RLOCK), .txpwr_table = rtw89_debug_priv_get(txpwr_table, RSIZE_20K, RLOCK), .mac_reg_dump = rtw89_debug_priv_select_and_get(mac_reg_dump, RSIZE_128K), .mac_mem_dump = rtw89_debug_priv_select_and_get(mac_mem_dump, RSIZE_16K, RLOCK), From bda294ed0ed05ada2a832b19a55dd4a6fa72b1a1 Mon Sep 17 00:00:00 2001 From: Po-Hao Huang Date: Tue, 10 Mar 2026 16:01:44 +0800 Subject: [PATCH 0555/1561] wifi: rtw89: Drop malformed AMPDU frames with abnormal PN Fix connection issue caused by AMPDU frames with abnormal PN patterns (out-of-order packets with correct MPDU sequence numbers but paired with abnormal PN values, which is next PN of previous in-order packet). This is causing packet drops, low throughput and disconnections. It is observed in fields with some specific AP firmwares. Do this workaround for better interoperability since some APs could never receive a proper FW update. Signed-off-by: Po-Hao Huang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260310080146.31113-12-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.c | 115 ++++++++++++++++++ drivers/net/wireless/realtek/rtw89/core.h | 11 ++ drivers/net/wireless/realtek/rtw89/mac80211.c | 3 + drivers/net/wireless/realtek/rtw89/util.h | 17 +++ drivers/net/wireless/realtek/rtw89/wow.c | 2 + drivers/net/wireless/realtek/rtw89/wow.h | 7 -- 6 files changed, 148 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c index 18dbf3664f0a..9d3f651798ff 100644 --- a/drivers/net/wireless/realtek/rtw89/core.c +++ b/drivers/net/wireless/realtek/rtw89/core.c @@ -3272,6 +3272,114 @@ out: rcu_read_unlock(); } +static void __rtw89_core_tid_rx_stats_reset(struct rtw89_tid_stats *tid_stats) +{ + tid_stats->last_pn = -1LL; + tid_stats->last_sn = IEEE80211_SN_MASK; +} + +void rtw89_core_tid_rx_stats_ctrl(struct rtw89_dev *rtwdev, struct rtw89_sta *rtwsta, + struct ieee80211_ampdu_params *params, bool enable) +{ + struct rtw89_tid_stats *tid_stats; + u16 tid = params->tid; + + tid_stats = &rtwsta->tid_rx_stats[tid]; + + if (enable) { + __rtw89_core_tid_rx_stats_reset(tid_stats); + tid_stats->started = true; + } else { + tid_stats->started = false; + } +} + +void rtw89_core_tid_rx_stats_reset(struct rtw89_dev *rtwdev) +{ + struct rtw89_tid_stats *tid_stats; + struct ieee80211_sta *sta; + struct rtw89_sta *rtwsta; + u16 tid; + + for_each_station(sta, rtwdev->hw) { + rtwsta = sta_to_rtwsta(sta); + + for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) { + tid_stats = &rtwsta->tid_rx_stats[tid]; + + if (!tid_stats->started) + continue; + + __rtw89_core_tid_rx_stats_reset(tid_stats); + } + } +} + +static bool rtw89_core_skb_pn_valid(struct rtw89_dev *rtwdev, + struct rtw89_rx_desc_info *desc_info, + struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + const struct rtw89_chip_info *chip = rtwdev->chip; + struct rtw89_sta_link *rtwsta_link; + struct rtw89_tid_stats *tid_stats; + struct rtw89_sta *rtwsta; + u8 tid, *ccmp_hdr_ptr; + s64 pn, last_pn; + u16 mpdu_sn; + int hdrlen; + + if (chip->chip_gen != RTW89_CHIP_AX) + return true; + + if (!ieee80211_is_data_qos(hdr->frame_control)) + return true; + + if (!desc_info->hw_dec || !desc_info->addr1_match) + return true; + + guard(rcu)(); + + rtwsta_link = rtw89_assoc_link_rcu_dereference(rtwdev, desc_info->mac_id); + if (!rtwsta_link) + return true; + + rtwsta = rtwsta_link->rtwsta; + tid = ieee80211_get_tid(hdr); + tid_stats = &rtwsta->tid_rx_stats[tid]; + + if (!tid_stats->started) + return true; + + switch (desc_info->sec_type) { + case RTW89_SEC_KEY_TYPE_CCMP128: + case RTW89_SEC_KEY_TYPE_CCMP256: + case RTW89_SEC_KEY_TYPE_GCMP128: + case RTW89_SEC_KEY_TYPE_GCMP256: + mpdu_sn = ieee80211_get_sn(hdr); + hdrlen = ieee80211_hdrlen(hdr->frame_control); + ccmp_hdr_ptr = skb->data + hdrlen; + ccmp_hdr2pn(&pn, ccmp_hdr_ptr); + last_pn = tid_stats->last_pn; + + if (pn > last_pn) { + if (ieee80211_sn_less(mpdu_sn, tid_stats->last_sn)) { + dev_kfree_skb_any(skb); + + return false; + } + + tid_stats->last_sn = mpdu_sn; + tid_stats->last_pn = pn; + } + break; + default: + break; + } + + return true; +} + static void rtw89_core_rx_to_mac80211(struct rtw89_dev *rtwdev, struct rtw89_rx_phy_ppdu *phy_ppdu, struct rtw89_rx_desc_info *desc_info, @@ -3421,6 +3529,7 @@ void rtw89_core_query_rxdesc(struct rtw89_dev *rtwdev, desc_info->sec_cam_id = le32_get_bits(rxd_l->dword5, AX_RXD_SEC_CAM_IDX_MASK); desc_info->mac_id = le32_get_bits(rxd_l->dword5, AX_RXD_MAC_ID_MASK); desc_info->rx_pl_id = le32_get_bits(rxd_l->dword5, AX_RXD_RX_PL_ID_MASK); + desc_info->sec_type = le32_get_bits(rxd_l->dword7, AX_RXD_SEC_TYPE_MASK); } EXPORT_SYMBOL(rtw89_core_query_rxdesc); @@ -3450,6 +3559,7 @@ void rtw89_core_query_rxdesc_v2(struct rtw89_dev *rtwdev, desc_info->mac_id = le32_get_bits(rxd_s->dword2, BE_RXD_MAC_ID_MASK); desc_info->addr_cam_valid = le32_get_bits(rxd_s->dword2, BE_RXD_ADDR_CAM_VLD); + desc_info->sec_type = le32_get_bits(rxd_s->dword3, BE_RXD_SEC_TYPE_MASK); desc_info->icv_err = le32_get_bits(rxd_s->dword3, BE_RXD_ICV_ERR); desc_info->crc32_err = le32_get_bits(rxd_s->dword3, BE_RXD_CRC32_ERR); desc_info->hw_dec = le32_get_bits(rxd_s->dword3, BE_RXD_HW_DEC); @@ -3523,6 +3633,7 @@ void rtw89_core_query_rxdesc_v3(struct rtw89_dev *rtwdev, desc_info->mac_id = le32_get_bits(rxd_s->dword2, BE_RXD_MAC_ID_V1); desc_info->addr_cam_valid = le32_get_bits(rxd_s->dword2, BE_RXD_ADDR_CAM_VLD); + desc_info->sec_type = le32_get_bits(rxd_s->dword3, BE_RXD_SEC_TYPE_MASK); desc_info->icv_err = le32_get_bits(rxd_s->dword3, BE_RXD_ICV_ERR); desc_info->crc32_err = le32_get_bits(rxd_s->dword3, BE_RXD_CRC32_ERR); desc_info->hw_dec = le32_get_bits(rxd_s->dword3, BE_RXD_HW_DEC); @@ -3802,6 +3913,10 @@ void rtw89_core_rx(struct rtw89_dev *rtwdev, memset(rx_status, 0, sizeof(*rx_status)); rtw89_core_update_rx_status(rtwdev, skb, desc_info, rx_status); rtw89_core_rx_pkt_hdl(rtwdev, skb, desc_info); + + if (!rtw89_core_skb_pn_valid(rtwdev, desc_info, skb)) + return; + if (desc_info->long_rxdesc && BIT(desc_info->frame_type) & PPDU_FILTER_BITMAP) skb_queue_tail(&ppdu_sts->rx_queue[band], skb); diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index ce04ecaa3a5e..94e4faf70e12 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -1126,6 +1126,7 @@ struct rtw89_rx_desc_info { bool addr_cam_valid; u8 addr_cam_id; u8 sec_cam_id; + u8 sec_type; u8 mac_id; u16 offset; u16 rxd_len; @@ -6153,6 +6154,12 @@ struct rtw89_beacon_track_info { u32 tbtt_diff_th; }; +struct rtw89_tid_stats { + s64 last_pn; + u16 last_sn; + bool started; +}; + struct rtw89_dev { struct ieee80211_hw *hw; struct device *dev; @@ -6359,6 +6366,7 @@ struct rtw89_sta { struct sk_buff_head roc_queue; struct rtw89_ampdu_params ampdu_params[IEEE80211_NUM_TIDS]; + struct rtw89_tid_stats tid_rx_stats[IEEE80211_NUM_TIDS]; DECLARE_BITMAP(ampdu_map, IEEE80211_NUM_TIDS); DECLARE_BITMAP(pairwise_sec_cam_map, RTW89_MAX_SEC_CAM_NUM); @@ -7769,6 +7777,9 @@ int rtw89_core_sta_link_remove(struct rtw89_dev *rtwdev, void rtw89_core_set_tid_config(struct rtw89_dev *rtwdev, struct ieee80211_sta *sta, struct cfg80211_tid_config *tid_config); +void rtw89_core_tid_rx_stats_ctrl(struct rtw89_dev *rtwdev, struct rtw89_sta *rtwsta, + struct ieee80211_ampdu_params *params, bool enable); +void rtw89_core_tid_rx_stats_reset(struct rtw89_dev *rtwdev); void rtw89_core_rfkill_poll(struct rtw89_dev *rtwdev, bool force); void rtw89_check_quirks(struct rtw89_dev *rtwdev, const struct dmi_system_id *quirks); int rtw89_core_init(struct rtw89_dev *rtwdev); diff --git a/drivers/net/wireless/realtek/rtw89/mac80211.c b/drivers/net/wireless/realtek/rtw89/mac80211.c index cd8e2c8de888..501c3af1da01 100644 --- a/drivers/net/wireless/realtek/rtw89/mac80211.c +++ b/drivers/net/wireless/realtek/rtw89/mac80211.c @@ -964,6 +964,7 @@ static int rtw89_ops_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, rtw89_err(rtwdev, "failed to add key to sec cam\n"); return ret; } + rtw89_core_tid_rx_stats_reset(rtwdev); break; case DISABLE_KEY: flush_work(&rtwdev->txq_work); @@ -1018,9 +1019,11 @@ static int rtw89_ops_ampdu_action(struct ieee80211_hw *hw, rtw89_phy_ra_recalc_agg_limit(rtwdev); break; case IEEE80211_AMPDU_RX_START: + rtw89_core_tid_rx_stats_ctrl(rtwdev, rtwsta, params, true); rtw89_chip_h2c_ba_cam(rtwdev, rtwsta, true, params); break; case IEEE80211_AMPDU_RX_STOP: + rtw89_core_tid_rx_stats_ctrl(rtwdev, rtwsta, params, false); rtw89_chip_h2c_ba_cam(rtwdev, rtwsta, false, params); break; default: diff --git a/drivers/net/wireless/realtek/rtw89/util.h b/drivers/net/wireless/realtek/rtw89/util.h index bd08495301e4..c16e7a7f8bc9 100644 --- a/drivers/net/wireless/realtek/rtw89/util.h +++ b/drivers/net/wireless/realtek/rtw89/util.h @@ -6,6 +6,13 @@ #include "core.h" +#define RTW89_KEY_PN_0 GENMASK_ULL(7, 0) +#define RTW89_KEY_PN_1 GENMASK_ULL(15, 8) +#define RTW89_KEY_PN_2 GENMASK_ULL(23, 16) +#define RTW89_KEY_PN_3 GENMASK_ULL(31, 24) +#define RTW89_KEY_PN_4 GENMASK_ULL(39, 32) +#define RTW89_KEY_PN_5 GENMASK_ULL(47, 40) + #define rtw89_iterate_vifs_bh(rtwdev, iterator, data) \ ieee80211_iterate_active_interfaces_atomic((rtwdev)->hw, \ IEEE80211_IFACE_ITER_NORMAL, iterator, data) @@ -73,6 +80,16 @@ static inline void ether_addr_copy_mask(u8 *dst, const u8 *src, u8 mask) } } +static inline void ccmp_hdr2pn(s64 *pn, const u8 *hdr) +{ + *pn = u64_encode_bits(hdr[0], RTW89_KEY_PN_0) | + u64_encode_bits(hdr[1], RTW89_KEY_PN_1) | + u64_encode_bits(hdr[4], RTW89_KEY_PN_2) | + u64_encode_bits(hdr[5], RTW89_KEY_PN_3) | + u64_encode_bits(hdr[6], RTW89_KEY_PN_4) | + u64_encode_bits(hdr[7], RTW89_KEY_PN_5); +} + s32 rtw89_linear_to_db_quarter(u64 val); s32 rtw89_linear_to_db(u64 val); u64 rtw89_db_quarter_to_linear(s32 db); diff --git a/drivers/net/wireless/realtek/rtw89/wow.c b/drivers/net/wireless/realtek/rtw89/wow.c index 368e08826f1e..8dadd8df4fc6 100644 --- a/drivers/net/wireless/realtek/rtw89/wow.c +++ b/drivers/net/wireless/realtek/rtw89/wow.c @@ -1741,6 +1741,8 @@ static int rtw89_wow_disable(struct rtw89_dev *rtwdev) rtw89_wow_leave_ps(rtwdev, false); + rtw89_core_tid_rx_stats_reset(rtwdev); + ret = rtw89_wow_fw_stop(rtwdev); if (ret) { rtw89_err(rtwdev, "wow: failed to swap to normal fw\n"); diff --git a/drivers/net/wireless/realtek/rtw89/wow.h b/drivers/net/wireless/realtek/rtw89/wow.h index 71e07f482174..d7e67632efeb 100644 --- a/drivers/net/wireless/realtek/rtw89/wow.h +++ b/drivers/net/wireless/realtek/rtw89/wow.h @@ -8,13 +8,6 @@ #define RTW89_KEY_TKIP_PN_IV16 GENMASK_ULL(15, 0) #define RTW89_KEY_TKIP_PN_IV32 GENMASK_ULL(47, 16) -#define RTW89_KEY_PN_0 GENMASK_ULL(7, 0) -#define RTW89_KEY_PN_1 GENMASK_ULL(15, 8) -#define RTW89_KEY_PN_2 GENMASK_ULL(23, 16) -#define RTW89_KEY_PN_3 GENMASK_ULL(31, 24) -#define RTW89_KEY_PN_4 GENMASK_ULL(39, 32) -#define RTW89_KEY_PN_5 GENMASK_ULL(47, 40) - #define RTW89_IGTK_IPN_0 GENMASK_ULL(7, 0) #define RTW89_IGTK_IPN_1 GENMASK_ULL(15, 8) #define RTW89_IGTK_IPN_2 GENMASK_ULL(23, 16) From 45ba9226b1081fe3292c1087de9db6d592c38de8 Mon Sep 17 00:00:00 2001 From: Po-Hao Huang Date: Tue, 10 Mar 2026 16:01:45 +0800 Subject: [PATCH 0556/1561] wifi: rtw89: Recalculate station aggregates when AMSDU length changes for MLO links Currently, AMSDU length is updated per-link for MLO but not propagated to the station aggregates, causing suboptimal TX throughput. This change ensures station aggregates are recalculated when any link's AMSDU length changes. Signed-off-by: Po-Hao Huang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260310080146.31113-13-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/phy.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/phy.c b/drivers/net/wireless/realtek/rtw89/phy.c index 0fa4d8d791f1..3a241738ac06 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.c +++ b/drivers/net/wireless/realtek/rtw89/phy.c @@ -3213,7 +3213,8 @@ struct rtw89_phy_iter_ra_data { static void __rtw89_phy_c2h_ra_rpt_iter(struct rtw89_sta_link *rtwsta_link, struct ieee80211_link_sta *link_sta, - struct rtw89_phy_iter_ra_data *ra_data) + struct rtw89_phy_iter_ra_data *ra_data, + bool *changed) { struct rtw89_dev *rtwdev = ra_data->rtwdev; const struct rtw89_c2h_ra_rpt *c2h = @@ -3222,7 +3223,7 @@ static void __rtw89_phy_c2h_ra_rpt_iter(struct rtw89_sta_link *rtwsta_link, const struct rtw89_chip_info *chip = rtwdev->chip; bool format_v1 = chip->chip_gen == RTW89_CHIP_BE; u8 mode, rate, bw, giltf, mac_id; - u16 legacy_bitrate; + u16 legacy_bitrate, amsdu_len; bool valid; u8 mcs = 0; u8 t; @@ -3319,7 +3320,13 @@ static void __rtw89_phy_c2h_ra_rpt_iter(struct rtw89_sta_link *rtwsta_link, u16_encode_bits(mode, RTW89_HW_RATE_MASK_MOD) | u16_encode_bits(rate, RTW89_HW_RATE_MASK_VAL); ra_report->might_fallback_legacy = mcs <= 2; - link_sta->agg.max_rc_amsdu_len = get_max_amsdu_len(rtwdev, ra_report); + + amsdu_len = get_max_amsdu_len(rtwdev, ra_report); + if (link_sta->agg.max_rc_amsdu_len != amsdu_len) { + link_sta->agg.max_rc_amsdu_len = amsdu_len; + *changed = true; + } + rtwsta_link->max_agg_wait = link_sta->agg.max_rc_amsdu_len / 1500 - 1; } @@ -3330,14 +3337,18 @@ static void rtw89_phy_c2h_ra_rpt_iter(void *data, struct ieee80211_sta *sta) struct rtw89_sta_link *rtwsta_link; struct ieee80211_link_sta *link_sta; unsigned int link_id; + bool changed = false; rcu_read_lock(); rtw89_sta_for_each_link(rtwsta, rtwsta_link, link_id) { link_sta = rtw89_sta_rcu_dereference_link(rtwsta_link, false); - __rtw89_phy_c2h_ra_rpt_iter(rtwsta_link, link_sta, ra_data); + __rtw89_phy_c2h_ra_rpt_iter(rtwsta_link, link_sta, ra_data, &changed); } + if (changed) + ieee80211_sta_recalc_aggregates(sta); + rcu_read_unlock(); } From a1488456f70655f7a95f846c8fbadbe61daf292c Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Tue, 10 Mar 2026 16:01:46 +0800 Subject: [PATCH 0557/1561] wifi: rtw89: debug: simulate Wi-Fi 7 SER L0/L1 without PS mode Current triggers of Wi-Fi 7 SER (system error recovery) L0/L1 simulation don't yet guarantee working with PS mode. So, leave PS mode first before triggering them for now. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260310080146.31113-14-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/debug.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/debug.c b/drivers/net/wireless/realtek/rtw89/debug.c index d461ffc6dc9e..82849d109cc3 100644 --- a/drivers/net/wireless/realtek/rtw89/debug.c +++ b/drivers/net/wireless/realtek/rtw89/debug.c @@ -3552,6 +3552,8 @@ static int rtw89_dbg_trigger_l1_error_by_halt_h2c_be(struct rtw89_dev *rtwdev) if (!test_bit(RTW89_FLAG_FW_RDY, rtwdev->flags)) return -EBUSY; + rtw89_leave_ps_mode(rtwdev); + rtw89_write32_set(rtwdev, R_BE_FW_TRIGGER_IDCT_ISR, B_BE_DMAC_FW_TRIG_IDCT | B_BE_DMAC_FW_ERR_IDCT_IMR); @@ -3654,6 +3656,8 @@ static int rtw89_dbg_trigger_l0_error_by_halt_h2c_be(struct rtw89_dev *rtwdev) if (!test_bit(RTW89_FLAG_FW_RDY, rtwdev->flags)) return -EBUSY; + rtw89_leave_ps_mode(rtwdev); + rtw89_write32_set(rtwdev, R_BE_CMAC_FW_TRIGGER_IDCT_ISR, B_BE_CMAC_FW_TRIG_IDCT | B_BE_CMAC_FW_ERR_IDCT_IMR); From e570593b568f74b8d8367d094400d71bc398118f Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 10 Mar 2026 08:16:12 -0700 Subject: [PATCH 0558/1561] wifi: ath12k: Clean up the WMI Unit Test command interface Currently, ath12k_wmi_send_unit_test_cmd() provides the interface to send a Unit Test command to firmware. The payload for the command is passed in two separate parameters, struct wmi_unit_test_cmd ut_cmd and u32 *test_args. This interface is strange in that it passes the ut_cmd structure by value instead of by reference. But even worse, this presents an interface that is not endian clean since the ut_cmd structure is defined in little endian format while the test_args array is defined to be in cpu endian format. Furthermore, the implementation of this function passes the test_args directly to the firmware, without performing cpu_to_le32() conversion, and hence this functionality will not work correctly on big endian platforms. In order to fix these issues, introduce a new wmi_unit_test_arg structure which defines all of the parameters needed by the Unit Test command in a single structure using cpu endian. Update ath12k_wmi_send_unit_test_cmd() to take a pointer to this structure and perform all cpu_to_le32() conversions needed while forming the firmware command. Update the only existing Unit Test function, ath12k_wmi_simulate_radar(), to properly fill and pass this new structure to ath12k_wmi_send_unit_test_cmd(). Compile tested only. Reviewed-by: Rameshkumar Sundaram Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20260310-ath12k-unit-test-cleanup-v1-1-03e3df56f903@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wmi.c | 58 +++++++++++++-------------- drivers/net/wireless/ath/ath12k/wmi.h | 11 +++++ 2 files changed, 38 insertions(+), 31 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 8e13c3ec1cc7..8379aa8fe091 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -10027,50 +10027,46 @@ static int ath12k_connect_pdev_htc_service(struct ath12k_base *ab, static int ath12k_wmi_send_unit_test_cmd(struct ath12k *ar, - struct wmi_unit_test_cmd ut_cmd, - u32 *test_args) + const struct wmi_unit_test_arg *ut) { struct ath12k_wmi_pdev *wmi = ar->wmi; struct wmi_unit_test_cmd *cmd; + int buf_len, arg_len; struct sk_buff *skb; struct wmi_tlv *tlv; + __le32 *ut_cmd_args; void *ptr; - u32 *ut_cmd_args; - int buf_len, arg_len; int ret; int i; - arg_len = sizeof(u32) * le32_to_cpu(ut_cmd.num_args); - buf_len = sizeof(ut_cmd) + arg_len + TLV_HDR_SIZE; + arg_len = sizeof(*ut_cmd_args) * ut->num_args; + buf_len = sizeof(*cmd) + arg_len + TLV_HDR_SIZE; skb = ath12k_wmi_alloc_skb(wmi->wmi_ab, buf_len); if (!skb) return -ENOMEM; - cmd = (struct wmi_unit_test_cmd *)skb->data; + ptr = skb->data; + cmd = ptr; cmd->tlv_header = ath12k_wmi_tlv_cmd_hdr(WMI_TAG_UNIT_TEST_CMD, - sizeof(ut_cmd)); - - cmd->vdev_id = ut_cmd.vdev_id; - cmd->module_id = ut_cmd.module_id; - cmd->num_args = ut_cmd.num_args; - cmd->diag_token = ut_cmd.diag_token; - - ptr = skb->data + sizeof(ut_cmd); + sizeof(*cmd)); + cmd->vdev_id = cpu_to_le32(ut->vdev_id); + cmd->module_id = cpu_to_le32(ut->module_id); + cmd->num_args = cpu_to_le32(ut->num_args); + cmd->diag_token = cpu_to_le32(ut->diag_token); + ptr += sizeof(*cmd); tlv = ptr; tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_UINT32, arg_len); ptr += TLV_HDR_SIZE; - ut_cmd_args = ptr; - for (i = 0; i < le32_to_cpu(ut_cmd.num_args); i++) - ut_cmd_args[i] = test_args[i]; + for (i = 0; i < ut->num_args; i++) + ut_cmd_args[i] = cpu_to_le32(ut->args[i]); ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "WMI unit test : module %d vdev %d n_args %d token %d\n", - cmd->module_id, cmd->vdev_id, cmd->num_args, - cmd->diag_token); + ut->module_id, ut->vdev_id, ut->num_args, ut->diag_token); ret = ath12k_wmi_cmd_send(wmi, skb, WMI_UNIT_TEST_CMDID); @@ -10086,8 +10082,7 @@ ath12k_wmi_send_unit_test_cmd(struct ath12k *ar, int ath12k_wmi_simulate_radar(struct ath12k *ar) { struct ath12k_link_vif *arvif; - u32 dfs_args[DFS_MAX_TEST_ARGS]; - struct wmi_unit_test_cmd wmi_ut; + struct wmi_unit_test_arg wmi_ut = {}; bool arvif_found = false; list_for_each_entry(arvif, &ar->arvifs, list) { @@ -10100,22 +10095,23 @@ int ath12k_wmi_simulate_radar(struct ath12k *ar) if (!arvif_found) return -EINVAL; - dfs_args[DFS_TEST_CMDID] = 0; - dfs_args[DFS_TEST_PDEV_ID] = ar->pdev->pdev_id; - /* Currently we could pass segment_id(b0 - b1), chirp(b2) + wmi_ut.args[DFS_TEST_CMDID] = 0; + wmi_ut.args[DFS_TEST_PDEV_ID] = ar->pdev->pdev_id; + /* + * Currently we could pass segment_id(b0 - b1), chirp(b2) * freq offset (b3 - b10) to unit test. For simulation * purpose this can be set to 0 which is valid. */ - dfs_args[DFS_TEST_RADAR_PARAM] = 0; + wmi_ut.args[DFS_TEST_RADAR_PARAM] = 0; - wmi_ut.vdev_id = cpu_to_le32(arvif->vdev_id); - wmi_ut.module_id = cpu_to_le32(DFS_UNIT_TEST_MODULE); - wmi_ut.num_args = cpu_to_le32(DFS_MAX_TEST_ARGS); - wmi_ut.diag_token = cpu_to_le32(DFS_UNIT_TEST_TOKEN); + wmi_ut.vdev_id = arvif->vdev_id; + wmi_ut.module_id = DFS_UNIT_TEST_MODULE; + wmi_ut.num_args = DFS_MAX_TEST_ARGS; + wmi_ut.diag_token = DFS_UNIT_TEST_TOKEN; ath12k_dbg(ar->ab, ATH12K_DBG_REG, "Triggering Radar Simulation\n"); - return ath12k_wmi_send_unit_test_cmd(ar, wmi_ut, dfs_args); + return ath12k_wmi_send_unit_test_cmd(ar, &wmi_ut); } int ath12k_wmi_send_tpc_stats_request(struct ath12k *ar, diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index 0bf0a7941cd3..8d766dd5b304 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -4210,6 +4210,17 @@ struct wmi_dfs_unit_test_arg { u32 radar_param; }; +/* update if another test command requires more */ +#define WMI_UNIT_TEST_ARGS_MAX DFS_MAX_TEST_ARGS + +struct wmi_unit_test_arg { + u32 vdev_id; + u32 module_id; + u32 diag_token; + u32 num_args; + u32 args[WMI_UNIT_TEST_ARGS_MAX]; +}; + struct wmi_unit_test_cmd { __le32 tlv_header; __le32 vdev_id; From 7bbb578fc43e7dcb8690cfc98844bd67bc311e8a Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 10 Mar 2026 08:16:13 -0700 Subject: [PATCH 0559/1561] wifi: ath12k: Remove unused DFS Unit Test definitions The following are unused, so remove them: struct wmi_dfs_unit_test_arg macro DFS_PHYERR_UNIT_TEST_CMD Compile tested only. Reviewed-by: Rameshkumar Sundaram Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20260310-ath12k-unit-test-cleanup-v1-2-03e3df56f903@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wmi.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index 8d766dd5b304..5ba9b7d3a888 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -4193,7 +4193,6 @@ struct wmi_addba_clear_resp_cmd { struct ath12k_wmi_mac_addr_params peer_macaddr; } __packed; -#define DFS_PHYERR_UNIT_TEST_CMD 0 #define DFS_UNIT_TEST_MODULE 0x2b #define DFS_UNIT_TEST_TOKEN 0xAA @@ -4204,12 +4203,6 @@ enum dfs_test_args_idx { DFS_MAX_TEST_ARGS, }; -struct wmi_dfs_unit_test_arg { - u32 cmd_id; - u32 pdev_id; - u32 radar_param; -}; - /* update if another test command requires more */ #define WMI_UNIT_TEST_ARGS_MAX DFS_MAX_TEST_ARGS From f8e761655997cc0ee434fb5f35570d2e93d3a707 Mon Sep 17 00:00:00 2001 From: Alexei Lazar Date: Mon, 9 Mar 2026 11:34:27 +0200 Subject: [PATCH 0560/1561] net/mlx5: Add IFC bits for shared headroom pool PBMC support Add hardware interface definitions for shared headroom pool (SHP) in port buffer management: - shp_pbmc_pbsr_support: capability bit in PCAM enhanced features indicating device support for shared headroom pool in PBMC/PBSR. - shared_headroom_pool: buffer entry in PBMC register (pbmc_reg_bits) for the shared headroom pool configuration, reusing the bufferx layout; reduce trailing reserved region accordingly. Signed-off-by: Alexei Lazar Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260309093435.1850724-2-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a3948b36820d..a76c54bf1927 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10845,7 +10845,9 @@ struct mlx5_ifc_pcam_enhanced_features_bits { u8 fec_200G_per_lane_in_pplm[0x1]; u8 reserved_at_1e[0x2a]; u8 fec_100G_per_lane_in_pplm[0x1]; - u8 reserved_at_49[0xa]; + u8 reserved_at_49[0x2]; + u8 shp_pbmc_pbsr_support[0x1]; + u8 reserved_at_4c[0x7]; u8 buffer_ownership[0x1]; u8 resereved_at_54[0x14]; u8 fec_50G_per_lane_in_pplm[0x1]; @@ -12090,8 +12092,9 @@ struct mlx5_ifc_pbmc_reg_bits { u8 port_buffer_size[0x10]; struct mlx5_ifc_bufferx_reg_bits buffer[10]; + struct mlx5_ifc_bufferx_reg_bits shared_headroom_pool; - u8 reserved_at_2e0[0x80]; + u8 reserved_at_320[0x40]; }; struct mlx5_ifc_sbpr_reg_bits { From 691dffc7255e740bc3df1c68b50b36786aadeb3a Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 9 Mar 2026 11:34:28 +0200 Subject: [PATCH 0561/1561] net/mlx5: Add silent mode set/query and VHCA RX IFC bits Update the mlx5 IFC headers with newly defined capability and command-layout bits: - Add silent_mode_query and rename silent_mode to silent_mode_set cap fields. - Add forward_vhca_rx and MLX5_IFC_FLOW_DESTINATION_TYPE_VHCA_RX. - Expose silent mode fields in the L2 table query command structures. Update the SD support check to use the new capability name (silent_mode_set) to match the updated IFC definition. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260309093435.1850724-3-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- .../net/ethernet/mellanox/mlx5/core/fs_cmd.c | 2 +- .../net/ethernet/mellanox/mlx5/core/lib/sd.c | 2 +- include/linux/mlx5/mlx5_ifc.h | 19 ++++++++++++++----- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index c348ee62cd3a..16b28028609d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -1183,7 +1183,7 @@ int mlx5_fs_cmd_set_l2table_entry_silent(struct mlx5_core_dev *dev, u8 silent_mo { u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {}; - if (silent_mode && !MLX5_CAP_GEN(dev, silent_mode)) + if (silent_mode && !MLX5_CAP_GEN(dev, silent_mode_set)) return -EOPNOTSUPP; MLX5_SET(set_l2_table_entry_in, in, opcode, MLX5_CMD_OP_SET_L2_TABLE_ENTRY); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c index 954942ad93c5..762c783156b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c @@ -107,7 +107,7 @@ static bool mlx5_sd_is_supported(struct mlx5_core_dev *dev, u8 host_buses) /* Disconnect secondaries from the network */ if (!MLX5_CAP_GEN(dev, eswitch_manager)) return false; - if (!MLX5_CAP_GEN(dev, silent_mode)) + if (!MLX5_CAP_GEN(dev, silent_mode_set)) return false; /* RX steering from primary to secondaries */ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a76c54bf1927..8fa4fb3d36cf 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -469,7 +469,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 table_miss_action_domain[0x1]; u8 termination_table[0x1]; u8 reformat_and_fwd_to_table[0x1]; - u8 reserved_at_1a[0x2]; + u8 forward_vhca_rx[0x1]; + u8 reserved_at_1b[0x1]; u8 ipsec_encrypt[0x1]; u8 ipsec_decrypt[0x1]; u8 sw_owner_v2[0x1]; @@ -2012,12 +2013,14 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 disable_local_lb_mc[0x1]; u8 log_min_hairpin_wq_data_sz[0x5]; u8 reserved_at_3e8[0x1]; - u8 silent_mode[0x1]; + u8 silent_mode_set[0x1]; u8 vhca_state[0x1]; u8 log_max_vlan_list[0x5]; u8 reserved_at_3f0[0x3]; u8 log_max_current_mc_list[0x5]; - u8 reserved_at_3f8[0x3]; + u8 reserved_at_3f8[0x1]; + u8 silent_mode_query[0x1]; + u8 reserved_at_3fa[0x1]; u8 log_max_current_uc_list[0x5]; u8 general_obj_types[0x40]; @@ -2279,6 +2282,7 @@ enum mlx5_ifc_flow_destination_type { MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT = 0x0, MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE = 0x1, MLX5_IFC_FLOW_DESTINATION_TYPE_TIR = 0x2, + MLX5_IFC_FLOW_DESTINATION_TYPE_VHCA_RX = 0x4, MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_SAMPLER = 0x6, MLX5_IFC_FLOW_DESTINATION_TYPE_UPLINK = 0x8, MLX5_IFC_FLOW_DESTINATION_TYPE_TABLE_TYPE = 0xA, @@ -6265,7 +6269,9 @@ struct mlx5_ifc_query_l2_table_entry_out_bits { u8 reserved_at_40[0xa0]; - u8 reserved_at_e0[0x13]; + u8 reserved_at_e0[0x11]; + u8 silent_mode[0x1]; + u8 reserved_at_f2[0x1]; u8 vlan_valid[0x1]; u8 vlan[0xc]; @@ -6281,7 +6287,10 @@ struct mlx5_ifc_query_l2_table_entry_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x60]; + u8 reserved_at_40[0x40]; + + u8 silent_mode_query[0x1]; + u8 reserved_at_81[0x1f]; u8 reserved_at_a0[0x8]; u8 table_index[0x18]; From 91e9f3e7b626579b0393151545b80e50e2bffdca Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 9 Mar 2026 11:34:29 +0200 Subject: [PATCH 0562/1561] net/mlx5: LAG, replace pf array with xarray Replace the fixed-size array with a dynamic xarray. This commit changes: - Adds mlx5_lag_pf() helper for consistent xarray access - Converts all direct pf[] accesses to use the new helper/macro - Dynamically allocates lag_func entries via xa_store/xa_load No functional changes intended. This prepares the LAG infrastructure for future flexibility in device indexing. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260309093435.1850724-4-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- .../ethernet/mellanox/mlx5/core/lag/debugfs.c | 3 +- .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 300 ++++++++++++------ .../net/ethernet/mellanox/mlx5/core/lag/lag.h | 8 +- .../net/ethernet/mellanox/mlx5/core/lag/mp.c | 20 +- .../ethernet/mellanox/mlx5/core/lag/mpesw.c | 12 +- .../mellanox/mlx5/core/lag/port_sel.c | 20 +- 6 files changed, 243 insertions(+), 120 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c index 62b6faa4276a..37de4be0e620 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c @@ -145,7 +145,8 @@ static int members_show(struct seq_file *file, void *priv) ldev = mlx5_lag_dev(dev); mutex_lock(&ldev->lock); mlx5_ldev_for_each(i, 0, ldev) - seq_printf(file, "%s\n", dev_name(ldev->pf[i].dev->device)); + seq_printf(file, "%s\n", + dev_name(mlx5_lag_pf(ldev, i)->dev->device)); mutex_unlock(&ldev->lock); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 044adfdf9aa2..81b1f84f902e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -232,6 +232,7 @@ static void mlx5_do_bond_work(struct work_struct *work); static void mlx5_ldev_free(struct kref *ref) { struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref); + struct lag_func *pf; struct net *net; int i; @@ -241,13 +242,16 @@ static void mlx5_ldev_free(struct kref *ref) } mlx5_ldev_for_each(i, 0, ldev) { - if (ldev->pf[i].dev && - ldev->pf[i].port_change_nb.nb.notifier_call) { - struct mlx5_nb *nb = &ldev->pf[i].port_change_nb; + pf = mlx5_lag_pf(ldev, i); + if (pf->port_change_nb.nb.notifier_call) { + struct mlx5_nb *nb = &pf->port_change_nb; - mlx5_eq_notifier_unregister(ldev->pf[i].dev, nb); + mlx5_eq_notifier_unregister(pf->dev, nb); } + xa_erase(&ldev->pfs, i); + kfree(pf); } + xa_destroy(&ldev->pfs); mlx5_lag_mp_cleanup(ldev); cancel_delayed_work_sync(&ldev->bond_work); @@ -284,6 +288,7 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) kref_init(&ldev->ref); mutex_init(&ldev->lock); + xa_init(&ldev->pfs); INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); INIT_WORK(&ldev->speed_update_work, mlx5_mpesw_speed_update_work); @@ -309,11 +314,14 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, struct net_device *ndev) { + struct lag_func *pf; int i; - mlx5_ldev_for_each(i, 0, ldev) - if (ldev->pf[i].netdev == ndev) + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); + if (pf->netdev == ndev) return i; + } return -ENOENT; } @@ -349,14 +357,17 @@ int mlx5_lag_num_devs(struct mlx5_lag *ldev) int mlx5_lag_num_netdevs(struct mlx5_lag *ldev) { + struct lag_func *pf; int i, num = 0; if (!ldev) return 0; - mlx5_ldev_for_each(i, 0, ldev) - if (ldev->pf[i].netdev) + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); + if (pf->netdev) num++; + } return num; } @@ -424,25 +435,30 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev) { + struct lag_func *pf; int i; - mlx5_ldev_for_each(i, 0, ldev) - if (ldev->pf[i].has_drop) + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); + if (pf->has_drop) return true; + } return false; } static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) { + struct lag_func *pf; int i; mlx5_ldev_for_each(i, 0, ldev) { - if (!ldev->pf[i].has_drop) + pf = mlx5_lag_pf(ldev, i); + if (!pf->has_drop) continue; - mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch, + mlx5_esw_acl_ingress_vport_drop_rule_destroy(pf->dev->priv.eswitch, MLX5_VPORT_UPLINK); - ldev->pf[i].has_drop = false; + pf->has_drop = false; } } @@ -451,6 +467,7 @@ static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, { u8 disabled_ports[MLX5_MAX_PORTS] = {}; struct mlx5_core_dev *dev; + struct lag_func *pf; int disabled_index; int num_disabled; int err; @@ -468,11 +485,12 @@ static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, for (i = 0; i < num_disabled; i++) { disabled_index = disabled_ports[i]; - dev = ldev->pf[disabled_index].dev; + pf = mlx5_lag_pf(ldev, disabled_index); + dev = pf->dev; err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch, MLX5_VPORT_UPLINK); if (!err) - ldev->pf[disabled_index].has_drop = true; + pf->has_drop = true; else mlx5_core_err(dev, "Failed to create lag drop rule, error: %d", err); @@ -504,7 +522,7 @@ static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) if (idx < 0) return -EINVAL; - dev0 = ldev->pf[idx].dev; + dev0 = mlx5_lag_pf(ldev, idx)->dev; if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) { ret = mlx5_lag_port_sel_modify(ldev, ports); if (ret || @@ -521,6 +539,7 @@ static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev) { struct net_device *ndev = NULL; + struct lag_func *pf; struct mlx5_lag *ldev; unsigned long flags; int i, last_idx; @@ -531,14 +550,17 @@ static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev if (!ldev) goto unlock; - mlx5_ldev_for_each(i, 0, ldev) + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); if (ldev->tracker.netdev_state[i].tx_enabled) - ndev = ldev->pf[i].netdev; + ndev = pf->netdev; + } if (!ndev) { last_idx = mlx5_lag_get_dev_index_by_seq(ldev, ldev->ports - 1); if (last_idx < 0) goto unlock; - ndev = ldev->pf[last_idx].netdev; + pf = mlx5_lag_pf(ldev, last_idx); + ndev = pf->netdev; } dev_hold(ndev); @@ -563,7 +585,7 @@ void mlx5_modify_lag(struct mlx5_lag *ldev, if (first_idx < 0) return; - dev0 = ldev->pf[first_idx].dev; + dev0 = mlx5_lag_pf(ldev, first_idx)->dev; mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ports); mlx5_ldev_for_each(i, 0, ldev) { @@ -615,7 +637,7 @@ static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev, mode == MLX5_LAG_MODE_MULTIPATH) return 0; - dev0 = ldev->pf[first_idx].dev; + dev0 = mlx5_lag_pf(ldev, first_idx)->dev; if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) { if (ldev->ports > 2) @@ -670,10 +692,12 @@ static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev) if (first_idx < 0) return -EINVAL; - dev0 = ldev->pf[first_idx].dev; + dev0 = mlx5_lag_pf(ldev, first_idx)->dev; master_esw = dev0->priv.eswitch; mlx5_ldev_for_each(i, first_idx + 1, ldev) { - struct mlx5_eswitch *slave_esw = ldev->pf[i].dev->priv.eswitch; + struct mlx5_eswitch *slave_esw; + + slave_esw = mlx5_lag_pf(ldev, i)->dev->priv.eswitch; err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw, slave_esw, ldev->ports); @@ -684,7 +708,7 @@ static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev) err: mlx5_ldev_for_each_reverse(j, i, first_idx + 1, ldev) mlx5_eswitch_offloads_single_fdb_del_one(master_esw, - ldev->pf[j].dev->priv.eswitch); + mlx5_lag_pf(ldev, j)->dev->priv.eswitch); return err; } @@ -702,7 +726,7 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, if (first_idx < 0) return -EINVAL; - dev0 = ldev->pf[first_idx].dev; + dev0 = mlx5_lag_pf(ldev, first_idx)->dev; if (tracker) mlx5_lag_print_mapping(dev0, ldev, tracker, flags); mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n", @@ -749,7 +773,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, if (first_idx < 0) return -EINVAL; - dev0 = ldev->pf[first_idx].dev; + dev0 = mlx5_lag_pf(ldev, first_idx)->dev; err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags); if (err) return err; @@ -805,7 +829,7 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev) if (first_idx < 0) return -EINVAL; - dev0 = ldev->pf[first_idx].dev; + dev0 = mlx5_lag_pf(ldev, first_idx)->dev; master_esw = dev0->priv.eswitch; ldev->mode = MLX5_LAG_MODE_NONE; ldev->mode_flags = 0; @@ -814,7 +838,7 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev) if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) { mlx5_ldev_for_each(i, first_idx + 1, ldev) mlx5_eswitch_offloads_single_fdb_del_one(master_esw, - ldev->pf[i].dev->priv.eswitch); + mlx5_lag_pf(ldev, i)->dev->priv.eswitch); clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); } @@ -849,6 +873,7 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) struct mlx5_core_dev *dev; u8 mode; #endif + struct lag_func *pf; bool roce_support; int i; @@ -857,55 +882,66 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) #ifdef CONFIG_MLX5_ESWITCH mlx5_ldev_for_each(i, 0, ldev) { - dev = ldev->pf[i].dev; + pf = mlx5_lag_pf(ldev, i); + dev = pf->dev; if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev)) return false; } - dev = ldev->pf[first_idx].dev; + pf = mlx5_lag_pf(ldev, first_idx); + dev = pf->dev; mode = mlx5_eswitch_mode(dev); - mlx5_ldev_for_each(i, 0, ldev) - if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); + if (mlx5_eswitch_mode(pf->dev) != mode) return false; + } #else - mlx5_ldev_for_each(i, 0, ldev) - if (mlx5_sriov_is_enabled(ldev->pf[i].dev)) + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); + if (mlx5_sriov_is_enabled(pf->dev)) return false; + } #endif - roce_support = mlx5_get_roce_state(ldev->pf[first_idx].dev); - mlx5_ldev_for_each(i, first_idx + 1, ldev) - if (mlx5_get_roce_state(ldev->pf[i].dev) != roce_support) + pf = mlx5_lag_pf(ldev, first_idx); + roce_support = mlx5_get_roce_state(pf->dev); + mlx5_ldev_for_each(i, first_idx + 1, ldev) { + pf = mlx5_lag_pf(ldev, i); + if (mlx5_get_roce_state(pf->dev) != roce_support) return false; + } return true; } void mlx5_lag_add_devices(struct mlx5_lag *ldev) { + struct lag_func *pf; int i; mlx5_ldev_for_each(i, 0, ldev) { - if (ldev->pf[i].dev->priv.flags & - MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) + pf = mlx5_lag_pf(ldev, i); + if (pf->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) continue; - ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; - mlx5_rescan_drivers_locked(ldev->pf[i].dev); + pf->dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(pf->dev); } } void mlx5_lag_remove_devices(struct mlx5_lag *ldev) { + struct lag_func *pf; int i; mlx5_ldev_for_each(i, 0, ldev) { - if (ldev->pf[i].dev->priv.flags & - MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) + pf = mlx5_lag_pf(ldev, i); + if (pf->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) continue; - ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; - mlx5_rescan_drivers_locked(ldev->pf[i].dev); + pf->dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(pf->dev); } } @@ -921,7 +957,7 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) if (idx < 0) return; - dev0 = ldev->pf[idx].dev; + dev0 = mlx5_lag_pf(ldev, idx)->dev; roce_lag = __mlx5_lag_is_roce(ldev); if (shared_fdb) { @@ -932,7 +968,7 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) mlx5_rescan_drivers_locked(dev0); } mlx5_ldev_for_each(i, idx + 1, ldev) - mlx5_nic_vport_disable_roce(ldev->pf[i].dev); + mlx5_nic_vport_disable_roce(mlx5_lag_pf(ldev, i)->dev); } err = mlx5_deactivate_lag(ldev); @@ -944,8 +980,8 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) if (shared_fdb) mlx5_ldev_for_each(i, 0, ldev) - if (!(ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) - mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); + if (!(mlx5_lag_pf(ldev, i)->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) + mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch); } bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev) @@ -958,7 +994,7 @@ bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev) return false; mlx5_ldev_for_each(i, idx + 1, ldev) { - dev = ldev->pf[i].dev; + dev = mlx5_lag_pf(ldev, i)->dev; if (is_mdev_switchdev_mode(dev) && mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && MLX5_CAP_GEN(dev, lag_native_fdb_selection) && @@ -969,7 +1005,7 @@ bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev) return false; } - dev = ldev->pf[idx].dev; + dev = mlx5_lag_pf(ldev, idx)->dev; if (is_mdev_switchdev_mode(dev) && mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) && @@ -983,14 +1019,19 @@ bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev) static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev) { bool roce_lag = true; + struct lag_func *pf; int i; - mlx5_ldev_for_each(i, 0, ldev) - roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev); + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); + roce_lag = roce_lag && !mlx5_sriov_is_enabled(pf->dev); + } #ifdef CONFIG_MLX5_ESWITCH - mlx5_ldev_for_each(i, 0, ldev) - roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev); + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); + roce_lag = roce_lag && is_mdev_legacy_mode(pf->dev); + } #endif return roce_lag; @@ -1014,13 +1055,17 @@ mlx5_lag_sum_devices_speed(struct mlx5_lag *ldev, u32 *sum_speed, int (*get_speed)(struct mlx5_core_dev *, u32 *)) { struct mlx5_core_dev *pf_mdev; + struct lag_func *pf; int pf_idx; u32 speed; int ret; *sum_speed = 0; mlx5_ldev_for_each(pf_idx, 0, ldev) { - pf_mdev = ldev->pf[pf_idx].dev; + pf = mlx5_lag_pf(ldev, pf_idx); + if (!pf) + continue; + pf_mdev = pf->dev; if (!pf_mdev) continue; @@ -1086,6 +1131,7 @@ static void mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev *mdev, void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev) { struct mlx5_core_dev *mdev; + struct lag_func *pf; u32 speed; int pf_idx; @@ -1105,7 +1151,10 @@ void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev) speed = speed / MLX5_MAX_TX_SPEED_UNIT; mlx5_ldev_for_each(pf_idx, 0, ldev) { - mdev = ldev->pf[pf_idx].dev; + pf = mlx5_lag_pf(ldev, pf_idx); + if (!pf) + continue; + mdev = pf->dev; if (!mdev) continue; @@ -1116,12 +1165,16 @@ void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev) void mlx5_lag_reset_vports_speed(struct mlx5_lag *ldev) { struct mlx5_core_dev *mdev; + struct lag_func *pf; u32 speed; int pf_idx; int ret; mlx5_ldev_for_each(pf_idx, 0, ldev) { - mdev = ldev->pf[pf_idx].dev; + pf = mlx5_lag_pf(ldev, pf_idx); + if (!pf) + continue; + mdev = pf->dev; if (!mdev) continue; @@ -1152,7 +1205,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) if (idx < 0) return; - dev0 = ldev->pf[idx].dev; + dev0 = mlx5_lag_pf(ldev, idx)->dev; if (!mlx5_lag_is_ready(ldev)) { do_bond = false; } else { @@ -1182,16 +1235,19 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) mlx5_lag_add_devices(ldev); if (shared_fdb) { mlx5_ldev_for_each(i, 0, ldev) - mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); + mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch); } return; } else if (roce_lag) { + struct mlx5_core_dev *dev; + dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); mlx5_ldev_for_each(i, idx + 1, ldev) { - if (mlx5_get_roce_state(ldev->pf[i].dev)) - mlx5_nic_vport_enable_roce(ldev->pf[i].dev); + dev = mlx5_lag_pf(ldev, i)->dev; + if (mlx5_get_roce_state(dev)) + mlx5_nic_vport_enable_roce(dev); } } else if (shared_fdb) { int i; @@ -1200,7 +1256,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) mlx5_rescan_drivers_locked(dev0); mlx5_ldev_for_each(i, 0, ldev) { - err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); + err = mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch); if (err) break; } @@ -1211,7 +1267,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) mlx5_deactivate_lag(ldev); mlx5_lag_add_devices(ldev); mlx5_ldev_for_each(i, 0, ldev) - mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); + mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch); mlx5_core_err(dev0, "Failed to enable lag\n"); return; } @@ -1243,12 +1299,15 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev) { struct mlx5_devcom_comp_dev *devcom = NULL; + struct lag_func *pf; int i; mutex_lock(&ldev->lock); i = mlx5_get_next_ldev_func(ldev, 0); - if (i < MLX5_MAX_PORTS) - devcom = ldev->pf[i].dev->priv.hca_devcom_comp; + if (i < MLX5_MAX_PORTS) { + pf = mlx5_lag_pf(ldev, i); + devcom = pf->dev->priv.hca_devcom_comp; + } mutex_unlock(&ldev->lock); return devcom; } @@ -1297,6 +1356,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, struct netdev_lag_upper_info *lag_upper_info = NULL; bool is_bonded, is_in_lag, mode_supported; bool has_inactive = 0; + struct lag_func *pf; struct slave *slave; u8 bond_status = 0; int num_slaves = 0; @@ -1317,7 +1377,8 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, rcu_read_lock(); for_each_netdev_in_bond_rcu(upper, ndev_tmp) { mlx5_ldev_for_each(i, 0, ldev) { - if (ldev->pf[i].netdev == ndev_tmp) { + pf = mlx5_lag_pf(ldev, i); + if (pf->netdev == ndev_tmp) { idx++; break; } @@ -1538,10 +1599,12 @@ static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, struct net_device *netdev) { unsigned int fn = mlx5_get_dev_index(dev); + struct lag_func *pf; unsigned long flags; spin_lock_irqsave(&lag_lock, flags); - ldev->pf[fn].netdev = netdev; + pf = mlx5_lag_pf(ldev, fn); + pf->netdev = netdev; ldev->tracker.netdev_state[fn].link_up = 0; ldev->tracker.netdev_state[fn].tx_enabled = 0; spin_unlock_irqrestore(&lag_lock, flags); @@ -1550,46 +1613,69 @@ static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, struct net_device *netdev) { + struct lag_func *pf; unsigned long flags; int i; spin_lock_irqsave(&lag_lock, flags); mlx5_ldev_for_each(i, 0, ldev) { - if (ldev->pf[i].netdev == netdev) { - ldev->pf[i].netdev = NULL; + pf = mlx5_lag_pf(ldev, i); + if (pf->netdev == netdev) { + pf->netdev = NULL; break; } } spin_unlock_irqrestore(&lag_lock, flags); } -static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, +static int mlx5_ldev_add_mdev(struct mlx5_lag *ldev, struct mlx5_core_dev *dev) { unsigned int fn = mlx5_get_dev_index(dev); + struct lag_func *pf; + int err; - ldev->pf[fn].dev = dev; + pf = xa_load(&ldev->pfs, fn); + if (!pf) { + pf = kzalloc_obj(*pf); + if (!pf) + return -ENOMEM; + + err = xa_err(xa_store(&ldev->pfs, fn, pf, GFP_KERNEL)); + if (err) { + kfree(pf); + return err; + } + } + + pf->dev = dev; dev->priv.lag = ldev; - MLX5_NB_INIT(&ldev->pf[fn].port_change_nb, + MLX5_NB_INIT(&pf->port_change_nb, mlx5_lag_mpesw_port_change_event, PORT_CHANGE); - mlx5_eq_notifier_register(dev, &ldev->pf[fn].port_change_nb); + mlx5_eq_notifier_register(dev, &pf->port_change_nb); + + return 0; } static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, struct mlx5_core_dev *dev) { + struct lag_func *pf; int fn; fn = mlx5_get_dev_index(dev); - if (ldev->pf[fn].dev != dev) + pf = xa_load(&ldev->pfs, fn); + if (!pf || pf->dev != dev) return; - if (ldev->pf[fn].port_change_nb.nb.notifier_call) - mlx5_eq_notifier_unregister(dev, &ldev->pf[fn].port_change_nb); + if (pf->port_change_nb.nb.notifier_call) + mlx5_eq_notifier_unregister(dev, &pf->port_change_nb); - ldev->pf[fn].dev = NULL; + pf->dev = NULL; dev->priv.lag = NULL; + xa_erase(&ldev->pfs, fn); + kfree(pf); } /* Must be called with HCA devcom component lock held */ @@ -1598,6 +1684,7 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) struct mlx5_devcom_comp_dev *pos = NULL; struct mlx5_lag *ldev = NULL; struct mlx5_core_dev *tmp_dev; + int err; tmp_dev = mlx5_devcom_get_next_peer_data(dev->priv.hca_devcom_comp, &pos); if (tmp_dev) @@ -1609,7 +1696,12 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) mlx5_core_err(dev, "Failed to alloc lag dev\n"); return 0; } - mlx5_ldev_add_mdev(ldev, dev); + err = mlx5_ldev_add_mdev(ldev, dev); + if (err) { + mlx5_core_err(dev, "Failed to add mdev to lag dev\n"); + mlx5_ldev_put(ldev); + return 0; + } return 0; } @@ -1619,7 +1711,12 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) return -EAGAIN; } mlx5_ldev_get(ldev); - mlx5_ldev_add_mdev(ldev, dev); + err = mlx5_ldev_add_mdev(ldev, dev); + if (err) { + mlx5_ldev_put(ldev); + mutex_unlock(&ldev->lock); + return err; + } mutex_unlock(&ldev->lock); return 0; @@ -1746,21 +1843,25 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, int mlx5_get_pre_ldev_func(struct mlx5_lag *ldev, int start_idx, int end_idx) { + struct lag_func *pf; int i; - for (i = start_idx; i >= end_idx; i--) - if (ldev->pf[i].dev) + for (i = start_idx; i >= end_idx; i--) { + pf = xa_load(&ldev->pfs, i); + if (pf && pf->dev) return i; + } return -1; } int mlx5_get_next_ldev_func(struct mlx5_lag *ldev, int start_idx) { - int i; + struct lag_func *pf; + unsigned long idx; - for (i = start_idx; i < MLX5_MAX_PORTS; i++) - if (ldev->pf[i].dev) - return i; + xa_for_each_start(&ldev->pfs, idx, pf, start_idx) + if (pf->dev) + return idx; return MLX5_MAX_PORTS; } @@ -1814,13 +1915,17 @@ bool mlx5_lag_is_master(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; unsigned long flags; + struct lag_func *pf; bool res = false; int idx; spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); - res = ldev && __mlx5_lag_is_active(ldev) && idx >= 0 && dev == ldev->pf[idx].dev; + if (ldev && __mlx5_lag_is_active(ldev) && idx >= 0) { + pf = mlx5_lag_pf(ldev, idx); + res = pf && dev == pf->dev; + } spin_unlock_irqrestore(&lag_lock, flags); return res; @@ -1899,6 +2004,7 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, { struct mlx5_lag *ldev; unsigned long flags; + struct lag_func *pf; u8 port = 0; int i; @@ -1908,7 +2014,8 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, goto unlock; mlx5_ldev_for_each(i, 0, ldev) { - if (ldev->pf[i].netdev == slave) { + pf = mlx5_lag_pf(ldev, i); + if (pf->netdev == slave) { port = i; break; } @@ -1939,6 +2046,7 @@ struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int struct mlx5_core_dev *peer_dev = NULL; struct mlx5_lag *ldev; unsigned long flags; + struct lag_func *pf; int idx; spin_lock_irqsave(&lag_lock, flags); @@ -1948,9 +2056,11 @@ struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int if (*i == MLX5_MAX_PORTS) goto unlock; - mlx5_ldev_for_each(idx, *i, ldev) - if (ldev->pf[idx].dev != dev) + mlx5_ldev_for_each(idx, *i, ldev) { + pf = mlx5_lag_pf(ldev, idx); + if (pf->dev != dev) break; + } if (idx == MLX5_MAX_PORTS) { *i = idx; @@ -1958,7 +2068,8 @@ struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int } *i = idx + 1; - peer_dev = ldev->pf[idx].dev; + pf = mlx5_lag_pf(ldev, idx); + peer_dev = pf->dev; unlock: spin_unlock_irqrestore(&lag_lock, flags); @@ -1976,6 +2087,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, int ret = 0, i, j, idx = 0; struct mlx5_lag *ldev; unsigned long flags; + struct lag_func *pf; int num_ports; void *out; @@ -1995,8 +2107,10 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, ldev = mlx5_lag_dev(dev); if (ldev && __mlx5_lag_is_active(ldev)) { num_ports = ldev->ports; - mlx5_ldev_for_each(i, 0, ldev) - mdev[idx++] = ldev->pf[i].dev; + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); + mdev[idx++] = pf->dev; + } } else { num_ports = 1; mdev[MLX5_LAG_P1] = dev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index be1afece5fdc..09758871b3da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -64,7 +64,7 @@ struct mlx5_lag { int mode_changes_in_progress; u8 v2p_map[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS]; struct kref ref; - struct lag_func pf[MLX5_MAX_PORTS]; + struct xarray pfs; struct lag_tracker tracker; struct workqueue_struct *wq; struct delayed_work bond_work; @@ -84,6 +84,12 @@ mlx5_lag_dev(struct mlx5_core_dev *dev) return dev->priv.lag; } +static inline struct lag_func * +mlx5_lag_pf(struct mlx5_lag *ldev, unsigned int idx) +{ + return xa_load(&ldev->pfs, idx); +} + static inline bool __mlx5_lag_is_active(struct mlx5_lag *ldev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index c4c2bf33ef35..f42e051fa7e7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -29,8 +29,8 @@ static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) if (ldev->ports > MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS) return false; - return mlx5_esw_multipath_prereq(ldev->pf[idx0].dev, - ldev->pf[idx1].dev); + return mlx5_esw_multipath_prereq(mlx5_lag_pf(ldev, idx0)->dev, + mlx5_lag_pf(ldev, idx1)->dev); } bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) @@ -80,18 +80,18 @@ static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, tracker.netdev_state[idx1].link_up = true; break; default: - mlx5_core_warn(ldev->pf[idx0].dev, + mlx5_core_warn(mlx5_lag_pf(ldev, idx0)->dev, "Invalid affinity port %d", port); return; } if (tracker.netdev_state[idx0].tx_enabled) - mlx5_notifier_call_chain(ldev->pf[idx0].dev->priv.events, + mlx5_notifier_call_chain(mlx5_lag_pf(ldev, idx0)->dev->priv.events, MLX5_DEV_EVENT_PORT_AFFINITY, (void *)0); if (tracker.netdev_state[idx1].tx_enabled) - mlx5_notifier_call_chain(ldev->pf[idx1].dev->priv.events, + mlx5_notifier_call_chain(mlx5_lag_pf(ldev, idx1)->dev->priv.events, MLX5_DEV_EVENT_PORT_AFFINITY, (void *)0); @@ -146,7 +146,7 @@ mlx5_lag_get_next_fib_dev(struct mlx5_lag *ldev, fib_dev = fib_info_nh(fi, i)->fib_nh_dev; ldev_idx = mlx5_lag_dev_get_netdev_idx(ldev, fib_dev); if (ldev_idx >= 0) - return ldev->pf[ldev_idx].netdev; + return mlx5_lag_pf(ldev, ldev_idx)->netdev; } return NULL; @@ -178,7 +178,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, mp->fib.dst_len <= fen_info->dst_len && !(mp->fib.dst_len == fen_info->dst_len && fi->fib_priority < mp->fib.priority)) { - mlx5_core_dbg(ldev->pf[idx].dev, + mlx5_core_dbg(mlx5_lag_pf(ldev, idx)->dev, "Multipath entry with lower priority was rejected\n"); return; } @@ -194,7 +194,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, } if (nh_dev0 == nh_dev1) { - mlx5_core_warn(ldev->pf[idx].dev, + mlx5_core_warn(mlx5_lag_pf(ldev, idx)->dev, "Multipath offload doesn't support routes with multiple nexthops of the same device"); return; } @@ -203,7 +203,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, if (__mlx5_lag_is_active(ldev)) { mlx5_ldev_for_each(i, 0, ldev) { dev_idx++; - if (ldev->pf[i].netdev == nh_dev0) + if (mlx5_lag_pf(ldev, i)->netdev == nh_dev0) break; } mlx5_lag_set_port_affinity(ldev, dev_idx); @@ -240,7 +240,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, /* nh added/removed */ if (event == FIB_EVENT_NH_DEL) { mlx5_ldev_for_each(i, 0, ldev) { - if (ldev->pf[i].netdev == fib_nh->fib_nh_dev) + if (mlx5_lag_pf(ldev, i)->netdev == fib_nh->fib_nh_dev) break; dev_idx++; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index 74d5c2ed14ff..0e7d206cd594 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -16,7 +16,7 @@ static void mlx5_mpesw_metadata_cleanup(struct mlx5_lag *ldev) int i; mlx5_ldev_for_each(i, 0, ldev) { - dev = ldev->pf[i].dev; + dev = mlx5_lag_pf(ldev, i)->dev; esw = dev->priv.eswitch; pf_metadata = ldev->lag_mpesw.pf_metadata[i]; if (!pf_metadata) @@ -37,7 +37,7 @@ static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev) int i, err; mlx5_ldev_for_each(i, 0, ldev) { - dev = ldev->pf[i].dev; + dev = mlx5_lag_pf(ldev, i)->dev; esw = dev->priv.eswitch; pf_metadata = mlx5_esw_match_metadata_alloc(esw); if (!pf_metadata) { @@ -53,7 +53,7 @@ static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev) } mlx5_ldev_for_each(i, 0, ldev) { - dev = ldev->pf[i].dev; + dev = mlx5_lag_pf(ldev, i)->dev; mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_MULTIPORT_ESW, (void *)0); } @@ -82,7 +82,7 @@ static int mlx5_lag_enable_mpesw(struct mlx5_lag *ldev) if (idx < 0) return -EINVAL; - dev0 = ldev->pf[idx].dev; + dev0 = mlx5_lag_pf(ldev, idx)->dev; if (mlx5_eswitch_mode(dev0) != MLX5_ESWITCH_OFFLOADS || !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table) || !MLX5_CAP_GEN(dev0, create_lag_when_not_master_up) || @@ -105,7 +105,7 @@ static int mlx5_lag_enable_mpesw(struct mlx5_lag *ldev) dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); mlx5_ldev_for_each(i, 0, ldev) { - err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); + err = mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch); if (err) goto err_rescan_drivers; } @@ -121,7 +121,7 @@ err_rescan_drivers: err_add_devices: mlx5_lag_add_devices(ldev); mlx5_ldev_for_each(i, 0, ldev) - mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); + mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch); mlx5_mpesw_metadata_cleanup(ldev); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index 16c7d16215c4..7e9e3e81977d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -50,7 +50,7 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, if (first_idx < 0) return -EINVAL; - dev = ldev->pf[first_idx].dev; + dev = mlx5_lag_pf(ldev, first_idx)->dev; ft_attr.max_fte = ldev->ports * ldev->buckets; ft_attr.level = MLX5_LAG_FT_LEVEL_DEFINER; @@ -84,8 +84,9 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, idx = i * ldev->buckets + j; affinity = ports[idx]; - dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[affinity - 1].dev, - vhca_id); + dest.vport.vhca_id = + MLX5_CAP_GEN(mlx5_lag_pf(ldev, affinity - 1)->dev, + vhca_id); lag_definer->rules[idx] = mlx5_add_flow_rules(lag_definer->ft, NULL, &flow_act, &dest, 1); @@ -307,7 +308,7 @@ mlx5_lag_create_definer(struct mlx5_lag *ldev, enum netdev_lag_hash hash, if (first_idx < 0) return ERR_PTR(-EINVAL); - dev = ldev->pf[first_idx].dev; + dev = mlx5_lag_pf(ldev, first_idx)->dev; lag_definer = kzalloc_obj(*lag_definer); if (!lag_definer) return ERR_PTR(-ENOMEM); @@ -356,7 +357,7 @@ static void mlx5_lag_destroy_definer(struct mlx5_lag *ldev, if (first_idx < 0) return; - dev = ldev->pf[first_idx].dev; + dev = mlx5_lag_pf(ldev, first_idx)->dev; mlx5_ldev_for_each(i, first_idx, ldev) { for (j = 0; j < ldev->buckets; j++) { idx = i * ldev->buckets + j; @@ -520,7 +521,7 @@ static int mlx5_lag_create_ttc_table(struct mlx5_lag *ldev) if (first_idx < 0) return -EINVAL; - dev = ldev->pf[first_idx].dev; + dev = mlx5_lag_pf(ldev, first_idx)->dev; mlx5_lag_set_outer_ttc_params(ldev, &ttc_params); port_sel->outer.ttc = mlx5_create_ttc_table(dev, &ttc_params); return PTR_ERR_OR_ZERO(port_sel->outer.ttc); @@ -536,7 +537,7 @@ static int mlx5_lag_create_inner_ttc_table(struct mlx5_lag *ldev) if (first_idx < 0) return -EINVAL; - dev = ldev->pf[first_idx].dev; + dev = mlx5_lag_pf(ldev, first_idx)->dev; mlx5_lag_set_inner_ttc_params(ldev, &ttc_params); port_sel->inner.ttc = mlx5_create_inner_ttc_table(dev, &ttc_params); return PTR_ERR_OR_ZERO(port_sel->inner.ttc); @@ -594,8 +595,9 @@ static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, if (ldev->v2p_map[idx] == ports[idx]) continue; - dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev, - vhca_id); + dest.vport.vhca_id = + MLX5_CAP_GEN(mlx5_lag_pf(ldev, ports[idx] - 1)->dev, + vhca_id); err = mlx5_modify_rule_destination(def->rules[idx], &dest, NULL); if (err) return err; From 2b204cdb12068c7087f44a6a6d0af6f71fd72237 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 9 Mar 2026 11:34:30 +0200 Subject: [PATCH 0563/1561] net/mlx5: LAG, use xa_alloc to manage LAG device indices Replace the use of mlx5_get_dev_index() for xarray indexing with xa_alloc() to dynamically allocate indices. This decouples the LAG xarray index from the physical device index. Update mlx5_ldev_add_netdev/remove_mdev to find entries by dev pointer and replace mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1) calls with mlx5_lag_get_master_idx() where appropriate. No functional changes intended Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260309093435.1850724-5-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 244 ++++++++++++++---- .../net/ethernet/mellanox/mlx5/core/lag/lag.h | 29 +++ .../ethernet/mellanox/mlx5/core/lag/mpesw.c | 3 +- .../mellanox/mlx5/core/lag/port_sel.c | 12 +- 4 files changed, 231 insertions(+), 57 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 81b1f84f902e..4beee64c937a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -288,7 +288,7 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) kref_init(&ldev->ref); mutex_init(&ldev->lock); - xa_init(&ldev->pfs); + xa_init_flags(&ldev->pfs, XA_FLAGS_ALLOC); INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); INIT_WORK(&ldev->speed_update_work, mlx5_mpesw_speed_update_work); @@ -326,14 +326,42 @@ int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, return -ENOENT; } -int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq) +static int mlx5_lag_get_master_idx(struct mlx5_lag *ldev) { - int i, num = 0; + unsigned long idx = 0; + void *entry; if (!ldev) return -ENOENT; + entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER); + if (!entry) + return -ENOENT; + + return (int)idx; +} + +int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq) +{ + int master_idx, i, num = 0; + + if (!ldev) + return -ENOENT; + + master_idx = mlx5_lag_get_master_idx(ldev); + + /* If seq 0 is requested and there's a primary PF, return it */ + if (master_idx >= 0) { + if (seq == 0) + return master_idx; + num++; + } + mlx5_ldev_for_each(i, 0, ldev) { + /* Skip the primary PF in the loop */ + if (i == master_idx) + continue; + if (num == seq) return i; num++; @@ -341,6 +369,75 @@ int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq) return -ENOENT; } +/* Devcom events for LAG master marking */ +#define LAG_DEVCOM_PAIR (0) +#define LAG_DEVCOM_UNPAIR (1) + +static void mlx5_lag_mark_master(struct mlx5_lag *ldev) +{ + int lowest_dev_idx = INT_MAX; + struct lag_func *pf; + int master_xa_idx = -1; + int dev_idx; + int i; + + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); + dev_idx = mlx5_get_dev_index(pf->dev); + if (dev_idx < lowest_dev_idx) { + lowest_dev_idx = dev_idx; + master_xa_idx = i; + } + } + + if (master_xa_idx >= 0) + xa_set_mark(&ldev->pfs, master_xa_idx, MLX5_LAG_XA_MARK_MASTER); +} + +static void mlx5_lag_clear_master(struct mlx5_lag *ldev) +{ + unsigned long idx = 0; + void *entry; + + entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER); + if (!entry) + return; + + xa_clear_mark(&ldev->pfs, idx, MLX5_LAG_XA_MARK_MASTER); +} + +/* Devcom event handler to manage LAG master marking */ +static int mlx5_lag_devcom_event(int event, void *my_data, void *event_data) +{ + struct mlx5_core_dev *dev = my_data; + struct mlx5_lag *ldev; + int idx; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return 0; + + mutex_lock(&ldev->lock); + switch (event) { + case LAG_DEVCOM_PAIR: + /* No need to mark more than once */ + idx = mlx5_lag_get_master_idx(ldev); + if (idx >= 0) + break; + /* Check if all LAG ports are now registered */ + if (mlx5_lag_num_devs(ldev) == ldev->ports) + mlx5_lag_mark_master(ldev); + break; + + case LAG_DEVCOM_UNPAIR: + /* Clear master mark when a device is removed */ + mlx5_lag_clear_master(ldev); + break; + } + mutex_unlock(&ldev->lock); + return 0; +} + int mlx5_lag_num_devs(struct mlx5_lag *ldev) { int i, num = 0; @@ -411,11 +508,12 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, /* Use native mapping by default where each port's buckets * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc + * ports[] values are 1-indexed device indices for FW. */ mlx5_ldev_for_each(i, 0, ldev) { for (j = 0; j < buckets; j++) { idx = i * buckets + j; - ports[idx] = i + 1; + ports[idx] = mlx5_lag_xa_to_dev_idx(ldev, i) + 1; } } @@ -427,8 +525,12 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, /* Go over the disabled ports and for each assign a random active port */ for (i = 0; i < disabled_ports_num; i++) { for (j = 0; j < buckets; j++) { + int rand_xa_idx; + get_random_bytes(&rand, 4); - ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1; + rand_xa_idx = enabled[rand % enabled_ports_num]; + ports[disabled[i] * buckets + j] = + mlx5_lag_xa_to_dev_idx(ldev, rand_xa_idx) + 1; } } } @@ -683,20 +785,23 @@ char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev) { - int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct mlx5_eswitch *master_esw; struct mlx5_core_dev *dev0; int i, j; int err; - if (first_idx < 0) + if (master_idx < 0) return -EINVAL; - dev0 = mlx5_lag_pf(ldev, first_idx)->dev; + dev0 = mlx5_lag_pf(ldev, master_idx)->dev; master_esw = dev0->priv.eswitch; - mlx5_ldev_for_each(i, first_idx + 1, ldev) { + mlx5_ldev_for_each(i, 0, ldev) { struct mlx5_eswitch *slave_esw; + if (i == master_idx) + continue; + slave_esw = mlx5_lag_pf(ldev, i)->dev->priv.eswitch; err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw, @@ -706,9 +811,12 @@ static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev) } return 0; err: - mlx5_ldev_for_each_reverse(j, i, first_idx + 1, ldev) + mlx5_ldev_for_each_reverse(j, i, 0, ldev) { + if (j == master_idx) + continue; mlx5_eswitch_offloads_single_fdb_del_one(master_esw, mlx5_lag_pf(ldev, j)->dev->priv.eswitch); + } return err; } @@ -717,8 +825,8 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, unsigned long flags) { - bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; struct mlx5_core_dev *dev0; int err; @@ -764,16 +872,17 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, bool shared_fdb) { - int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); bool roce_lag = mode == MLX5_LAG_MODE_ROCE; struct mlx5_core_dev *dev0; unsigned long flags = 0; + int master_idx; int err; - if (first_idx < 0) + master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + if (master_idx < 0) return -EINVAL; - dev0 = mlx5_lag_pf(ldev, first_idx)->dev; + dev0 = mlx5_lag_pf(ldev, master_idx)->dev; err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags); if (err) return err; @@ -817,7 +926,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, int mlx5_deactivate_lag(struct mlx5_lag *ldev) { - int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; bool roce_lag = __mlx5_lag_is_roce(ldev); unsigned long flags = ldev->mode_flags; @@ -826,19 +935,22 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev) int err; int i; - if (first_idx < 0) + if (master_idx < 0) return -EINVAL; - dev0 = mlx5_lag_pf(ldev, first_idx)->dev; + dev0 = mlx5_lag_pf(ldev, master_idx)->dev; master_esw = dev0->priv.eswitch; ldev->mode = MLX5_LAG_MODE_NONE; ldev->mode_flags = 0; mlx5_lag_mp_reset(ldev); if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) { - mlx5_ldev_for_each(i, first_idx + 1, ldev) + mlx5_ldev_for_each(i, 0, ldev) { + if (i == master_idx) + continue; mlx5_eswitch_offloads_single_fdb_del_one(master_esw, mlx5_lag_pf(ldev, i)->dev->priv.eswitch); + } clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); } @@ -868,7 +980,7 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev) bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) { - int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); #ifdef CONFIG_MLX5_ESWITCH struct mlx5_core_dev *dev; u8 mode; @@ -877,7 +989,7 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) bool roce_support; int i; - if (first_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports) + if (master_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports) return false; #ifdef CONFIG_MLX5_ESWITCH @@ -888,7 +1000,7 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) return false; } - pf = mlx5_lag_pf(ldev, first_idx); + pf = mlx5_lag_pf(ldev, master_idx); dev = pf->dev; mode = mlx5_eswitch_mode(dev); mlx5_ldev_for_each(i, 0, ldev) { @@ -904,9 +1016,11 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) return false; } #endif - pf = mlx5_lag_pf(ldev, first_idx); + pf = mlx5_lag_pf(ldev, master_idx); roce_support = mlx5_get_roce_state(pf->dev); - mlx5_ldev_for_each(i, first_idx + 1, ldev) { + mlx5_ldev_for_each(i, 0, ldev) { + if (i == master_idx) + continue; pf = mlx5_lag_pf(ldev, i); if (mlx5_get_roce_state(pf->dev) != roce_support) return false; @@ -967,8 +1081,11 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); } - mlx5_ldev_for_each(i, idx + 1, ldev) + mlx5_ldev_for_each(i, 0, ldev) { + if (i == idx) + continue; mlx5_nic_vport_disable_roce(mlx5_lag_pf(ldev, i)->dev); + } } err = mlx5_deactivate_lag(ldev); @@ -986,14 +1103,18 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev) { - int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct mlx5_core_dev *dev; + bool ret = false; + int idx; int i; + idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); if (idx < 0) return false; - mlx5_ldev_for_each(i, idx + 1, ldev) { + mlx5_ldev_for_each(i, 0, ldev) { + if (i == idx) + continue; dev = mlx5_lag_pf(ldev, i)->dev; if (is_mdev_switchdev_mode(dev) && mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && @@ -1011,9 +1132,9 @@ bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev) mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) && MLX5_CAP_ESW(dev, esw_shared_ingress_acl) && mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1) - return true; + ret = true; - return false; + return ret; } static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev) @@ -1239,12 +1360,16 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) } return; - } else if (roce_lag) { + } + + if (roce_lag) { struct mlx5_core_dev *dev; dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); - mlx5_ldev_for_each(i, idx + 1, ldev) { + mlx5_ldev_for_each(i, 0, ldev) { + if (i == idx) + continue; dev = mlx5_lag_pf(ldev, i)->dev; if (mlx5_get_roce_state(dev)) mlx5_nic_vport_enable_roce(dev); @@ -1598,15 +1723,21 @@ static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, struct mlx5_core_dev *dev, struct net_device *netdev) { - unsigned int fn = mlx5_get_dev_index(dev); struct lag_func *pf; unsigned long flags; + int i; spin_lock_irqsave(&lag_lock, flags); - pf = mlx5_lag_pf(ldev, fn); - pf->netdev = netdev; - ldev->tracker.netdev_state[fn].link_up = 0; - ldev->tracker.netdev_state[fn].tx_enabled = 0; + /* Find pf entry by matching dev pointer */ + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); + if (pf->dev == dev) { + pf->netdev = netdev; + ldev->tracker.netdev_state[i].link_up = 0; + ldev->tracker.netdev_state[i].tx_enabled = 0; + break; + } + } spin_unlock_irqrestore(&lag_lock, flags); } @@ -1631,23 +1762,22 @@ static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, static int mlx5_ldev_add_mdev(struct mlx5_lag *ldev, struct mlx5_core_dev *dev) { - unsigned int fn = mlx5_get_dev_index(dev); struct lag_func *pf; + u32 idx; int err; - pf = xa_load(&ldev->pfs, fn); - if (!pf) { - pf = kzalloc_obj(*pf); - if (!pf) - return -ENOMEM; + pf = kzalloc_obj(*pf); + if (!pf) + return -ENOMEM; - err = xa_err(xa_store(&ldev->pfs, fn, pf, GFP_KERNEL)); - if (err) { - kfree(pf); - return err; - } + err = xa_alloc(&ldev->pfs, &idx, pf, XA_LIMIT(0, MLX5_MAX_PORTS - 1), + GFP_KERNEL); + if (err) { + kfree(pf); + return err; } + pf->idx = idx; pf->dev = dev; dev->priv.lag = ldev; @@ -1662,11 +1792,14 @@ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, struct mlx5_core_dev *dev) { struct lag_func *pf; - int fn; + int i; - fn = mlx5_get_dev_index(dev); - pf = xa_load(&ldev->pfs, fn); - if (!pf || pf->dev != dev) + mlx5_ldev_for_each(i, 0, ldev) { + pf = mlx5_lag_pf(ldev, i); + if (pf->dev == dev) + break; + } + if (i >= MLX5_MAX_PORTS) return; if (pf->port_change_nb.nb.notifier_call) @@ -1674,7 +1807,7 @@ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, pf->dev = NULL; dev->priv.lag = NULL; - xa_erase(&ldev->pfs, fn); + xa_erase(&ldev->pfs, pf->idx); kfree(pf); } @@ -1744,7 +1877,8 @@ static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev) dev->priv.hca_devcom_comp = mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_HCA_PORTS, - &attr, NULL, dev); + &attr, mlx5_lag_devcom_event, + dev); if (!dev->priv.hca_devcom_comp) { mlx5_core_err(dev, "Failed to register devcom HCA component."); @@ -1775,6 +1909,9 @@ recheck: } mlx5_ldev_remove_mdev(ldev, dev); mutex_unlock(&ldev->lock); + /* Send devcom event to notify peers that a device is being removed */ + mlx5_devcom_send_event(dev->priv.hca_devcom_comp, + LAG_DEVCOM_UNPAIR, LAG_DEVCOM_UNPAIR, dev); mlx5_lag_unregister_hca_devcom_comp(dev); mlx5_ldev_put(ldev); } @@ -1798,6 +1935,9 @@ recheck: msleep(100); goto recheck; } + /* Send devcom event to notify peers that a device was added */ + mlx5_devcom_send_event(dev->priv.hca_devcom_comp, + LAG_DEVCOM_PAIR, LAG_DEVCOM_UNPAIR, dev); mlx5_ldev_add_debugfs(dev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index 09758871b3da..30cbd61768f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -7,6 +7,12 @@ #include #define MLX5_LAG_MAX_HASH_BUCKETS 16 +/* XArray mark for the LAG master device + * (device with lowest mlx5_get_dev_index). + * Note: XA_MARK_0 is reserved by XA_FLAGS_ALLOC for free-slot tracking. + */ +#define MLX5_LAG_XA_MARK_MASTER XA_MARK_1 + #include "mlx5_core.h" #include "mp.h" #include "port_sel.h" @@ -39,6 +45,7 @@ struct lag_func { struct mlx5_core_dev *dev; struct net_device *netdev; bool has_drop; + unsigned int idx; /* xarray index assigned by LAG */ struct mlx5_nb port_change_nb; }; @@ -90,6 +97,28 @@ mlx5_lag_pf(struct mlx5_lag *ldev, unsigned int idx) return xa_load(&ldev->pfs, idx); } +/* Get device index (mlx5_get_dev_index) from xarray index */ +static inline int mlx5_lag_xa_to_dev_idx(struct mlx5_lag *ldev, int xa_idx) +{ + struct lag_func *pf = mlx5_lag_pf(ldev, xa_idx); + + return pf ? mlx5_get_dev_index(pf->dev) : -ENOENT; +} + +/* Find lag_func by device index (reverse lookup from mlx5_get_dev_index) */ +static inline struct lag_func * +mlx5_lag_pf_by_dev_idx(struct mlx5_lag *ldev, int dev_idx) +{ + struct lag_func *pf; + unsigned long idx; + + xa_for_each(&ldev->pfs, idx, pf) { + if (mlx5_get_dev_index(pf->dev) == dev_idx) + return pf; + } + return NULL; +} + static inline bool __mlx5_lag_is_active(struct mlx5_lag *ldev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index 0e7d206cd594..5eea12a6887a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -67,9 +67,9 @@ err_metadata: static int mlx5_lag_enable_mpesw(struct mlx5_lag *ldev) { + int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct mlx5_core_dev *dev0; int err; - int idx; int i; if (ldev->mode == MLX5_LAG_MODE_MPESW) @@ -78,7 +78,6 @@ static int mlx5_lag_enable_mpesw(struct mlx5_lag *ldev) if (ldev->mode != MLX5_LAG_MODE_NONE) return -EINVAL; - idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); if (idx < 0) return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index 7e9e3e81977d..2a034b2a3eee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -84,8 +84,11 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, idx = i * ldev->buckets + j; affinity = ports[idx]; + /* affinity is 1-indexed device index, + * use reverse lookup. + */ dest.vport.vhca_id = - MLX5_CAP_GEN(mlx5_lag_pf(ldev, affinity - 1)->dev, + MLX5_CAP_GEN(mlx5_lag_pf_by_dev_idx(ldev, affinity - 1)->dev, vhca_id); lag_definer->rules[idx] = mlx5_add_flow_rules(lag_definer->ft, NULL, &flow_act, @@ -358,7 +361,7 @@ static void mlx5_lag_destroy_definer(struct mlx5_lag *ldev, return; dev = mlx5_lag_pf(ldev, first_idx)->dev; - mlx5_ldev_for_each(i, first_idx, ldev) { + mlx5_ldev_for_each(i, 0, ldev) { for (j = 0; j < ldev->buckets; j++) { idx = i * ldev->buckets + j; mlx5_del_flow_rules(lag_definer->rules[idx]); @@ -595,8 +598,11 @@ static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, if (ldev->v2p_map[idx] == ports[idx]) continue; + /* ports[] contains 1-indexed device indices, + * use reverse lookup. + */ dest.vport.vhca_id = - MLX5_CAP_GEN(mlx5_lag_pf(ldev, ports[idx] - 1)->dev, + MLX5_CAP_GEN(mlx5_lag_pf_by_dev_idx(ldev, ports[idx] - 1)->dev, vhca_id); err = mlx5_modify_rule_destination(def->rules[idx], &dest, NULL); if (err) From da0349d0ffc7b831a589b1fcec59d9d94aa10e55 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 9 Mar 2026 11:34:31 +0200 Subject: [PATCH 0564/1561] net/mlx5: E-switch, modify peer miss rule index to vhca_id Replace the fixed-size array peer_miss_rules[MLX5_MAX_PORTS], indexed by physical function index, with an xarray indexed by vhca_id. This decouples peer_miss_rules from mlx5_get_dev_index(), removing the dependency on a PF-derived index and the arbitrary MLX5_MAX_PORTS bounds check. Using vhca_id as the key simplifies insertion/removal logic and scales better across multi-peer topologies. Initialize and destroy the xarray alongside the existing esw->paired xarray in mlx5_esw_offloads_devcom_init/cleanup respectively. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260309093435.1850724-6-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 2 +- .../mellanox/mlx5/core/eswitch_offloads.c | 20 +++++++++---------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 6841caef02d1..96309a732d50 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -273,7 +273,7 @@ struct mlx5_eswitch_fdb { struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *send_to_vport_meta_grp; struct mlx5_flow_group *peer_miss_grp; - struct mlx5_flow_handle **peer_miss_rules[MLX5_MAX_PORTS]; + struct xarray peer_miss_rules; struct mlx5_flow_group *miss_grp; struct mlx5_flow_handle **send_to_vport_meta_rules; struct mlx5_flow_handle *miss_rule_uni; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 1366f6e489bd..90e6f97bdf4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1190,7 +1190,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, struct mlx5_flow_handle *flow; struct mlx5_vport *peer_vport; struct mlx5_flow_spec *spec; - int err, pfindex; + int err; unsigned long i; void *misc; @@ -1274,14 +1274,10 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, } } - pfindex = mlx5_get_dev_index(peer_dev); - if (pfindex >= MLX5_MAX_PORTS) { - esw_warn(esw->dev, "Peer dev index(%d) is over the max num defined(%d)\n", - pfindex, MLX5_MAX_PORTS); - err = -EINVAL; + err = xa_insert(&esw->fdb_table.offloads.peer_miss_rules, + MLX5_CAP_GEN(peer_dev, vhca_id), flows, GFP_KERNEL); + if (err) goto add_ec_vf_flow_err; - } - esw->fdb_table.offloads.peer_miss_rules[pfindex] = flows; kvfree(spec); return 0; @@ -1323,12 +1319,13 @@ static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw, struct mlx5_core_dev *peer_dev) { struct mlx5_eswitch *peer_esw = peer_dev->priv.eswitch; - u16 peer_index = mlx5_get_dev_index(peer_dev); + u16 peer_vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id); struct mlx5_flow_handle **flows; struct mlx5_vport *peer_vport; unsigned long i; - flows = esw->fdb_table.offloads.peer_miss_rules[peer_index]; + flows = xa_erase(&esw->fdb_table.offloads.peer_miss_rules, + peer_vhca_id); if (!flows) return; @@ -1353,7 +1350,6 @@ static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw, } kvfree(flows); - esw->fdb_table.offloads.peer_miss_rules[peer_index] = NULL; } static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) @@ -3250,6 +3246,7 @@ void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, return; xa_init(&esw->paired); + xa_init(&esw->fdb_table.offloads.peer_miss_rules); esw->num_peers = 0; esw->devcom = mlx5_devcom_register_component(esw->dev->priv.devc, MLX5_DEVCOM_ESW_OFFLOADS, @@ -3277,6 +3274,7 @@ void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) mlx5_devcom_unregister_component(esw->devcom); xa_destroy(&esw->paired); + xa_destroy(&esw->fdb_table.offloads.peer_miss_rules); esw->devcom = NULL; } From 971b28accc09436fe6a6d5afd667dcbfb3ed7e03 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 9 Mar 2026 11:34:32 +0200 Subject: [PATCH 0565/1561] net/mlx5: LAG, replace mlx5_get_dev_index with LAG sequence number Introduce mlx5_lag_get_dev_seq() which returns a device's sequence number within the LAG: master is always 0, remaining devices numbered sequentially. This provides a stable index for peer flow tracking and vport ordering without depending on native_port_num. Replace mlx5_get_dev_index() usage in en_tc.c (peer flow array indexing) and ib_rep.c (vport index ordering) with the new API. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260309093435.1850724-7-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/ib_rep.c | 4 ++- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 9 ++--- .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 34 +++++++++++++++++++ include/linux/mlx5/lag.h | 11 ++++++ 4 files changed, 53 insertions(+), 5 deletions(-) create mode 100644 include/linux/mlx5/lag.h diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 621834d75205..df8f049c5806 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -3,6 +3,7 @@ * Copyright (c) 2018 Mellanox Technologies. All rights reserved. */ +#include #include #include "ib_rep.h" #include "srq.h" @@ -134,7 +135,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) /* Only 1 ib port is the representor for all uplinks */ peer_n_ports--; - if (mlx5_get_dev_index(peer_dev) < mlx5_get_dev_index(dev)) + if (mlx5_lag_get_dev_seq(peer_dev) < + mlx5_lag_get_dev_seq(dev)) vport_index += peer_n_ports; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 1434b65d4746..397a93584fd6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -2131,7 +2132,7 @@ static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow, mutex_unlock(&esw->offloads.peer_mutex); list_for_each_entry_safe(peer_flow, tmp, &flow->peer_flows, peer_flows) { - if (peer_index != mlx5_get_dev_index(peer_flow->priv->mdev)) + if (peer_index != mlx5_lag_get_dev_seq(peer_flow->priv->mdev)) continue; list_del(&peer_flow->peer_flows); @@ -2154,7 +2155,7 @@ static void mlx5e_tc_del_fdb_peers_flow(struct mlx5e_tc_flow *flow) devcom = flow->priv->mdev->priv.eswitch->devcom; mlx5_devcom_for_each_peer_entry(devcom, peer_esw, pos) { - i = mlx5_get_dev_index(peer_esw->dev); + i = mlx5_lag_get_dev_seq(peer_esw->dev); mlx5e_tc_del_fdb_peer_flow(flow, i); } } @@ -4584,7 +4585,7 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; - int i = mlx5_get_dev_index(peer_esw->dev); + int i = mlx5_lag_get_dev_seq(peer_esw->dev); struct mlx5e_rep_priv *peer_urpriv; struct mlx5e_tc_flow *peer_flow; struct mlx5_core_dev *in_mdev; @@ -5525,7 +5526,7 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw) devcom = esw->devcom; mlx5_devcom_for_each_peer_entry(devcom, peer_esw, pos) { - i = mlx5_get_dev_index(peer_esw->dev); + i = mlx5_lag_get_dev_seq(peer_esw->dev); list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows[i], peer[i]) mlx5e_tc_del_fdb_peers_flow(flow); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 4beee64c937a..51ec8f61ecbb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "lib/mlx5.h" #include "lib/devcom.h" #include "mlx5_core.h" @@ -369,6 +370,39 @@ int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq) return -ENOENT; } +/* Reverse of mlx5_lag_get_dev_index_by_seq: given a device, return its + * sequence number in the LAG. Master is always 0, others numbered + * sequentially starting from 1. + */ +int mlx5_lag_get_dev_seq(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev = mlx5_lag_dev(dev); + int master_idx, i, num = 1; + struct lag_func *pf; + + if (!ldev) + return -ENOENT; + + master_idx = mlx5_lag_get_master_idx(ldev); + if (master_idx < 0) + return -ENOENT; + + pf = mlx5_lag_pf(ldev, master_idx); + if (pf && pf->dev == dev) + return 0; + + mlx5_ldev_for_each(i, 0, ldev) { + if (i == master_idx) + continue; + pf = mlx5_lag_pf(ldev, i); + if (pf->dev == dev) + return num; + num++; + } + return -ENOENT; +} +EXPORT_SYMBOL(mlx5_lag_get_dev_seq); + /* Devcom events for LAG master marking */ #define LAG_DEVCOM_PAIR (0) #define LAG_DEVCOM_UNPAIR (1) diff --git a/include/linux/mlx5/lag.h b/include/linux/mlx5/lag.h new file mode 100644 index 000000000000..d370dfd19055 --- /dev/null +++ b/include/linux/mlx5/lag.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_LAG_API_H__ +#define __MLX5_LAG_API_H__ + +struct mlx5_core_dev; + +int mlx5_lag_get_dev_seq(struct mlx5_core_dev *dev); + +#endif /* __MLX5_LAG_API_H__ */ From 0bc9059fab6365feaf95cc9a796a3d381915a70f Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 9 Mar 2026 11:34:33 +0200 Subject: [PATCH 0566/1561] net/mlx5: Add VHCA RX flow destination support for FW steering Introduce MLX5_FLOW_DESTINATION_TYPE_VHCA_RX as a new flow steering destination type. Wire the new destination through flow steering command setup by mapping it to MLX5_IFC_FLOW_DESTINATION_TYPE_VHCA_RX and passing the vhca id, extend forward-destination validation to accept it, and teach the flow steering tracepoint formatter to print rx_vhca_id. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260309093435.1850724-8-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- .../net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 4 ++++ drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 7 +++++-- include/linux/mlx5/fs.h | 4 ++++ 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c index 6d73127b7217..2cf1d3825def 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -282,6 +282,9 @@ const char *parse_fs_dst(struct trace_seq *p, case MLX5_FLOW_DESTINATION_TYPE_NONE: trace_seq_printf(p, "none\n"); break; + case MLX5_FLOW_DESTINATION_TYPE_VHCA_RX: + trace_seq_printf(p, "rx_vhca_id=%u\n", dst->vhca.id); + break; } trace_seq_putc(p, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 16b28028609d..1cd4cd898ec2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -716,6 +716,10 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, id = dst->dest_attr.ft->id; ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TABLE_TYPE; break; + case MLX5_FLOW_DESTINATION_TYPE_VHCA_RX: + id = dst->dest_attr.vhca.id; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_VHCA_RX; + break; default: id = dst->dest_attr.tir_num; ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TIR; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 2c3544880a30..003d211306a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -503,7 +503,8 @@ static bool is_fwd_dest_type(enum mlx5_flow_destination_type type) type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER || type == MLX5_FLOW_DESTINATION_TYPE_TIR || type == MLX5_FLOW_DESTINATION_TYPE_RANGE || - type == MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE; + type == MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE || + type == MLX5_FLOW_DESTINATION_TYPE_VHCA_RX; } static bool check_valid_spec(const struct mlx5_flow_spec *spec) @@ -1890,7 +1891,9 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, d1->range.hit_ft == d2->range.hit_ft && d1->range.miss_ft == d2->range.miss_ft && d1->range.min == d2->range.min && - d1->range.max == d2->range.max)) + d1->range.max == d2->range.max) || + (d1->type == MLX5_FLOW_DESTINATION_TYPE_VHCA_RX && + d1->vhca.id == d2->vhca.id)) return true; } diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 9cadb1d5e6df..02064424e868 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -55,6 +55,7 @@ enum mlx5_flow_destination_type { MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM, MLX5_FLOW_DESTINATION_TYPE_RANGE, MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE, + MLX5_FLOW_DESTINATION_TYPE_VHCA_RX, }; enum { @@ -189,6 +190,9 @@ struct mlx5_flow_destination { u32 ft_num; struct mlx5_flow_table *ft; struct mlx5_fc *counter; + struct { + u16 id; + } vhca; struct { u16 num; u16 vhca_id; From d6c9b4de8109a3b4ca9c6c6b7c5fbc42cfeff9ae Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 9 Mar 2026 11:34:34 +0200 Subject: [PATCH 0567/1561] {net/RDMA}/mlx5: Add LAG demux table API and vport demux rules Downstream patches will introduce SW-only LAG (e.g. shared_fdb without HW LAG). In this mode the firmware cannot create the LAG demux table, but vport demuxing is still required. Move LAG demux flow-table ownership to the LAG layer and introduce APIs to init/cleanup the demux table and add/delete per-vport rules. Adjust the RDMA driver to use the new APIs. In this mode, the LAG layer will create a flow group that matches vport metadata. Vports that are not native to the LAG master eswitch add the demux rule during IB representor load and remove it on unload. The demux rule forward traffic from said vports to their native eswitch manager via a new dest type - MLX5_FLOW_DESTINATION_TYPE_VHCA_RX. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260309093435.1850724-9-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/ib_rep.c | 20 ++- drivers/infiniband/hw/mlx5/main.c | 21 +-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 - .../net/ethernet/mellanox/mlx5/core/eswitch.h | 12 ++ .../mellanox/mlx5/core/eswitch_offloads.c | 83 +++++++++- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 10 +- .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 152 ++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/lag/lag.h | 12 ++ include/linux/mlx5/fs.h | 6 +- include/linux/mlx5/lag.h | 12 +- 10 files changed, 301 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index df8f049c5806..1709b628702e 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -10,11 +10,13 @@ static int mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, + struct mlx5_core_dev *rep_dev, struct mlx5_eswitch_rep *rep, int vport_index) { struct mlx5_ib_dev *ibdev; struct net_device *ndev; + int ret; ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB); if (!ibdev) @@ -24,7 +26,17 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, rep->rep_data[REP_IB].priv = ibdev; ndev = mlx5_ib_get_rep_netdev(rep->esw, rep->vport); - return ib_device_set_netdev(&ibdev->ib_dev, ndev, vport_index + 1); + ret = ib_device_set_netdev(&ibdev->ib_dev, ndev, vport_index + 1); + if (ret) + return ret; + + /* Only Vports that are not native to the LAG master eswitch need to add + * demux rule. + */ + if (mlx5_eswitch_get_total_vports(dev) > vport_index) + return 0; + + return mlx5_lag_demux_rule_add(rep_dev, rep->vport, vport_index); } static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev); @@ -131,7 +143,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) if (mlx5_lag_is_master(peer_dev)) lag_master = peer_dev; - else if (!mlx5_lag_is_mpesw(dev)) + else if (!mlx5_lag_is_mpesw(peer_dev)) /* Only 1 ib port is the representor for all uplinks */ peer_n_ports--; @@ -145,7 +157,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink) profile = &raw_eth_profile; else - return mlx5_ib_set_vport_rep(lag_master, rep, vport_index); + return mlx5_ib_set_vport_rep(lag_master, dev, rep, vport_index); if (mlx5_lag_is_shared_fdb(dev)) { ret = mlx5_ib_take_transport(lag_master); @@ -233,6 +245,8 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) vport_index = i; } + mlx5_lag_demux_rule_del(mdev, vport_index); + port = &dev->port[vport_index]; ib_device_set_netdev(&dev->ib_dev, NULL, vport_index + 1); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 635002e684a5..9fb0629978bd 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -3678,12 +3679,12 @@ static void mlx5e_lag_event_unregister(struct mlx5_ib_dev *dev) static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) { + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(mdev, - MLX5_FLOW_NAMESPACE_LAG); - struct mlx5_flow_table *ft; + struct mlx5_flow_namespace *ns; int err; + ns = mlx5_get_flow_namespace(mdev, MLX5_FLOW_NAMESPACE_LAG); if (!ns || !mlx5_lag_is_active(mdev)) return 0; @@ -3691,14 +3692,15 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) if (err) return err; - ft = mlx5_create_lag_demux_flow_table(ns, 0, 0); - if (IS_ERR(ft)) { - err = PTR_ERR(ft); + ft_attr.level = 0; + ft_attr.prio = 0; + ft_attr.max_fte = dev->num_ports; + + err = mlx5_lag_demux_init(mdev, &ft_attr); + if (err) goto err_destroy_vport_lag; - } mlx5e_lag_event_register(dev); - dev->flow_db->lag_demux_ft = ft; dev->lag_ports = mlx5_lag_get_num_ports(mdev); dev->lag_active = true; return 0; @@ -3716,8 +3718,7 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev) dev->lag_active = false; mlx5e_lag_event_unregister(dev); - mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft); - dev->flow_db->lag_demux_ft = NULL; + mlx5_lag_demux_cleanup(mdev); mlx5_cmd_destroy_vport_lag(mdev); } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 4f4114d95130..3fc31415e107 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -306,7 +306,6 @@ struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio rdma_rx[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX]; - struct mlx5_flow_table *lag_demux_ft; struct mlx5_ib_flow_prio *rdma_transport_rx[MLX5_RDMA_TRANSPORT_BYPASS_PRIO]; struct mlx5_ib_flow_prio *rdma_transport_tx[MLX5_RDMA_TRANSPORT_BYPASS_PRIO]; /* Protect flow steering bypass flow tables diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 96309a732d50..9b729789537f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -940,6 +940,12 @@ int mlx5_esw_ipsec_vf_packet_offload_supported(struct mlx5_core_dev *dev, u16 vport_num); bool mlx5_esw_host_functions_enabled(const struct mlx5_core_dev *dev); void mlx5_eswitch_safe_aux_devs_remove(struct mlx5_core_dev *dev); +struct mlx5_flow_group * +mlx5_esw_lag_demux_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ft); +struct mlx5_flow_handle * +mlx5_esw_lag_demux_rule_create(struct mlx5_eswitch *esw, u16 vport_num, + struct mlx5_flow_table *lag_ft); #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } @@ -1025,6 +1031,12 @@ mlx5_esw_vport_vhca_id(struct mlx5_eswitch *esw, u16 vportn, u16 *vhca_id) static inline void mlx5_eswitch_safe_aux_devs_remove(struct mlx5_core_dev *dev) {} +static inline struct mlx5_flow_handle * +mlx5_esw_lag_demux_rule_create(struct mlx5_eswitch *esw, u16 vport_num, + struct mlx5_flow_table *lag_ft) +{ + return ERR_PTR(-EOPNOTSUPP); +} #endif /* CONFIG_MLX5_ESWITCH */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 90e6f97bdf4a..f98837470f39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1459,6 +1459,83 @@ esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) return flow_rule; } +struct mlx5_flow_group * +mlx5_esw_lag_demux_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + void *match_criteria; + void *flow_group_in; + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) + return ERR_PTR(-EOPNOTSUPP); + + if (IS_ERR(ft)) + return ERR_CAST(ft); + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return ERR_PTR(-ENOMEM); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ft->max_fte - 1); + + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + fg = mlx5_create_flow_group(ft, flow_group_in); + kvfree(flow_group_in); + if (IS_ERR(fg)) + esw_warn(esw->dev, "Can't create LAG demux flow group\n"); + + return fg; +} + +struct mlx5_flow_handle * +mlx5_esw_lag_demux_rule_create(struct mlx5_eswitch *esw, u16 vport_num, + struct mlx5_flow_table *lag_ft) +{ + struct mlx5_flow_spec *spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *ret; + void *misc; + + if (!spec) + return ERR_PTR(-ENOMEM); + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) { + kvfree(spec); + return ERR_PTR(-EOPNOTSUPP); + } + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num)); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VHCA_RX; + dest.vhca.id = MLX5_CAP_GEN(esw->dev, vhca_id); + + ret = mlx5_add_flow_rules(lag_ft, spec, &flow_act, &dest, 1); + kvfree(spec); + return ret; +} + #define MAX_PF_SQ 256 #define MAX_SQ_NVPORTS 32 @@ -2047,7 +2124,8 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) if (IS_ERR(g)) { err = PTR_ERR(g); - mlx5_core_warn(esw->dev, "Failed to create vport rx group err %d\n", err); + esw_warn(esw->dev, "Failed to create vport rx group err %d\n", + err); goto out; } @@ -2092,7 +2170,8 @@ static int esw_create_vport_rx_drop_group(struct mlx5_eswitch *esw) if (IS_ERR(g)) { err = PTR_ERR(g); - mlx5_core_warn(esw->dev, "Failed to create vport rx drop group err %d\n", err); + esw_warn(esw->dev, + "Failed to create vport rx drop group err %d\n", err); goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 003d211306a7..61a6ba1e49dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1438,15 +1438,9 @@ mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, struct mlx5_flow_table* mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns, - int prio, u32 level) + struct mlx5_flow_table_attr *ft_attr) { - struct mlx5_flow_table_attr ft_attr = {}; - - ft_attr.level = level; - ft_attr.prio = prio; - ft_attr.max_fte = 1; - - return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0); + return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0); } EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 51ec8f61ecbb..449e4bd86c06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -1471,6 +1471,158 @@ struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev) return devcom; } +static int mlx5_lag_demux_ft_fg_init(struct mlx5_core_dev *dev, + struct mlx5_flow_table_attr *ft_attr, + struct mlx5_lag *ldev) +{ +#ifdef CONFIG_MLX5_ESWITCH + struct mlx5_flow_namespace *ns; + struct mlx5_flow_group *fg; + int err; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_LAG); + if (!ns) + return 0; + + ldev->lag_demux_ft = mlx5_create_flow_table(ns, ft_attr); + if (IS_ERR(ldev->lag_demux_ft)) + return PTR_ERR(ldev->lag_demux_ft); + + fg = mlx5_esw_lag_demux_fg_create(dev->priv.eswitch, + ldev->lag_demux_ft); + if (IS_ERR(fg)) { + err = PTR_ERR(fg); + mlx5_destroy_flow_table(ldev->lag_demux_ft); + ldev->lag_demux_ft = NULL; + return err; + } + + ldev->lag_demux_fg = fg; + return 0; +#else + return -EOPNOTSUPP; +#endif +} + +static int mlx5_lag_demux_fw_init(struct mlx5_core_dev *dev, + struct mlx5_flow_table_attr *ft_attr, + struct mlx5_lag *ldev) +{ + struct mlx5_flow_namespace *ns; + int err; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_LAG); + if (!ns) + return 0; + + ldev->lag_demux_fg = NULL; + ft_attr->max_fte = 1; + ldev->lag_demux_ft = mlx5_create_lag_demux_flow_table(ns, ft_attr); + if (IS_ERR(ldev->lag_demux_ft)) { + err = PTR_ERR(ldev->lag_demux_ft); + ldev->lag_demux_ft = NULL; + return err; + } + + return 0; +} + +int mlx5_lag_demux_init(struct mlx5_core_dev *dev, + struct mlx5_flow_table_attr *ft_attr) +{ + struct mlx5_lag *ldev; + + if (!ft_attr) + return -EINVAL; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return -ENODEV; + + xa_init(&ldev->lag_demux_rules); + + if (mlx5_get_sd(dev)) + return mlx5_lag_demux_ft_fg_init(dev, ft_attr, ldev); + + return mlx5_lag_demux_fw_init(dev, ft_attr, ldev); +} +EXPORT_SYMBOL(mlx5_lag_demux_init); + +void mlx5_lag_demux_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_handle *rule; + struct mlx5_lag *ldev; + unsigned long vport_num; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return; + + xa_for_each(&ldev->lag_demux_rules, vport_num, rule) + mlx5_del_flow_rules(rule); + xa_destroy(&ldev->lag_demux_rules); + + if (ldev->lag_demux_fg) + mlx5_destroy_flow_group(ldev->lag_demux_fg); + if (ldev->lag_demux_ft) + mlx5_destroy_flow_table(ldev->lag_demux_ft); + ldev->lag_demux_fg = NULL; + ldev->lag_demux_ft = NULL; +} +EXPORT_SYMBOL(mlx5_lag_demux_cleanup); + +int mlx5_lag_demux_rule_add(struct mlx5_core_dev *vport_dev, u16 vport_num, + int index) +{ + struct mlx5_flow_handle *rule; + struct mlx5_lag *ldev; + int err; + + ldev = mlx5_lag_dev(vport_dev); + if (!ldev || !ldev->lag_demux_fg) + return 0; + + if (xa_load(&ldev->lag_demux_rules, index)) + return 0; + + rule = mlx5_esw_lag_demux_rule_create(vport_dev->priv.eswitch, + vport_num, ldev->lag_demux_ft); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(vport_dev, + "Failed to create LAG demux rule for vport %u, err %d\n", + vport_num, err); + return err; + } + + err = xa_err(xa_store(&ldev->lag_demux_rules, index, rule, + GFP_KERNEL)); + if (err) { + mlx5_del_flow_rules(rule); + mlx5_core_warn(vport_dev, + "Failed to store LAG demux rule for vport %u, err %d\n", + vport_num, err); + } + + return err; +} +EXPORT_SYMBOL(mlx5_lag_demux_rule_add); + +void mlx5_lag_demux_rule_del(struct mlx5_core_dev *dev, int index) +{ + struct mlx5_flow_handle *rule; + struct mlx5_lag *ldev; + + ldev = mlx5_lag_dev(dev); + if (!ldev || !ldev->lag_demux_fg) + return; + + rule = xa_erase(&ldev->lag_demux_rules, index); + if (rule) + mlx5_del_flow_rules(rule); +} +EXPORT_SYMBOL(mlx5_lag_demux_rule_del); + static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) { queue_delayed_work(ldev->wq, &ldev->bond_work, delay); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index 30cbd61768f8..6c911374f409 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -5,6 +5,9 @@ #define __MLX5_LAG_H__ #include +#include +#include +#include #define MLX5_LAG_MAX_HASH_BUCKETS 16 /* XArray mark for the LAG master device @@ -83,6 +86,9 @@ struct mlx5_lag { /* Protect lag fields/state changes */ struct mutex lock; struct lag_mpesw lag_mpesw; + struct mlx5_flow_table *lag_demux_ft; + struct mlx5_flow_group *lag_demux_fg; + struct xarray lag_demux_rules; }; static inline struct mlx5_lag * @@ -133,6 +139,12 @@ mlx5_lag_is_ready(struct mlx5_lag *ldev) bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev); bool mlx5_lag_check_prereq(struct mlx5_lag *ldev); +int mlx5_lag_demux_init(struct mlx5_core_dev *dev, + struct mlx5_flow_table_attr *ft_attr); +void mlx5_lag_demux_cleanup(struct mlx5_core_dev *dev); +int mlx5_lag_demux_rule_add(struct mlx5_core_dev *dev, u16 vport_num, + int vport_index); +void mlx5_lag_demux_rule_del(struct mlx5_core_dev *dev, int vport_index); void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker); int mlx5_activate_lag(struct mlx5_lag *ldev, diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 02064424e868..d8f3b7ef319e 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -252,9 +252,9 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, struct mlx5_flow_table * mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, struct mlx5_flow_table_attr *ft_attr, u16 vport); -struct mlx5_flow_table *mlx5_create_lag_demux_flow_table( - struct mlx5_flow_namespace *ns, - int prio, u32 level); +struct mlx5_flow_table * +mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns, + struct mlx5_flow_table_attr *ft_attr); int mlx5_destroy_flow_table(struct mlx5_flow_table *ft); /* inbox should be set with the following values: diff --git a/include/linux/mlx5/lag.h b/include/linux/mlx5/lag.h index d370dfd19055..ab9f754664e5 100644 --- a/include/linux/mlx5/lag.h +++ b/include/linux/mlx5/lag.h @@ -4,8 +4,18 @@ #ifndef __MLX5_LAG_API_H__ #define __MLX5_LAG_API_H__ -struct mlx5_core_dev; +#include +struct mlx5_core_dev; +struct mlx5_flow_table; +struct mlx5_flow_table_attr; + +int mlx5_lag_demux_init(struct mlx5_core_dev *dev, + struct mlx5_flow_table_attr *ft_attr); +void mlx5_lag_demux_cleanup(struct mlx5_core_dev *dev); +int mlx5_lag_demux_rule_add(struct mlx5_core_dev *dev, u16 vport_num, + int vport_index); +void mlx5_lag_demux_rule_del(struct mlx5_core_dev *dev, int vport_index); int mlx5_lag_get_dev_seq(struct mlx5_core_dev *dev); #endif /* __MLX5_LAG_API_H__ */ From 4dd2115f43594da5271a1aa34fde6719b4259047 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 9 Mar 2026 11:34:35 +0200 Subject: [PATCH 0568/1561] net/mlx5: Expose MLX5_UMR_ALIGN definition Expose HW constant value in a shared header, to be used by core/EN drivers. Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260309093435.1850724-10-tariqt@nvidia.com Reviewed-by: Dragos Tatulea Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 1 - include/linux/mlx5/device.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 665323b90b64..ff56948597dd 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -51,7 +51,6 @@ enum { }; #define MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS 4 -#define MLX5_UMR_ALIGN 2048 static void create_mkey_callback(int status, struct mlx5_async_work *context); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 25c6b42140b2..07a25f264292 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -293,6 +293,7 @@ enum { MLX5_UMR_INLINE = (1 << 7), }; +#define MLX5_UMR_ALIGN (2048) #define MLX5_UMR_FLEX_ALIGNMENT 0x40 #define MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_mtt)) #define MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_klm)) From 24fbd3967f3fdaad5f93e0d35ae870ed25fb2c3a Mon Sep 17 00:00:00 2001 From: Vishwanath Seshagiri Date: Tue, 10 Mar 2026 11:31:04 -0700 Subject: [PATCH 0569/1561] virtio_net: add page_pool support for buffer allocation Use page_pool for RX buffer allocation in mergeable and small buffer modes to enable page recycling and avoid repeated page allocator calls. skb_mark_for_recycle() enables page reuse in the network stack. Big packets mode is unchanged because it uses page->private for linked list chaining of multiple pages per buffer, which conflicts with page_pool's internal use of page->private. Implement conditional DMA premapping using virtqueue_dma_dev(): - When non-NULL (vhost, virtio-pci): use PP_FLAG_DMA_MAP with page_pool handling DMA mapping, submit via virtqueue_add_inbuf_premapped() - When NULL (VDUSE, direct physical): page_pool handles allocation only, submit via virtqueue_add_inbuf_ctx() This preserves the DMA premapping optimization from commit 31f3cd4e5756b ("virtio-net: rq submits premapped per-buffer") while adding page_pool support as a prerequisite for future zero-copy features (devmem TCP, io_uring ZCRX). Page pools are created in probe and destroyed in remove (not open/close), following existing driver behavior where RX buffers remain in virtqueues across interface state changes. Signed-off-by: Vishwanath Seshagiri Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Link: https://patch.msgid.link/20260310183107.2822016-1-vishs@meta.com Signed-off-by: Jakub Kicinski --- drivers/net/Kconfig | 1 + drivers/net/virtio_net.c | 497 ++++++++++++++++++++------------------- 2 files changed, 251 insertions(+), 247 deletions(-) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 17108c359216..b2fd90466bab 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -452,6 +452,7 @@ config VIRTIO_NET depends on VIRTIO select NET_FAILOVER select DIMLIB + select PAGE_POOL help This is the virtual network driver for virtio. It can be used with QEMU based VMMs (like KVM or Xen). Say Y or M. diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 811b90da15a9..022f60728721 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -26,6 +26,7 @@ #include #include #include +#include static int napi_weight = NAPI_POLL_WEIGHT; module_param(napi_weight, int, 0444); @@ -290,14 +291,6 @@ struct virtnet_interrupt_coalesce { u32 max_usecs; }; -/* The dma information of pages allocated at a time. */ -struct virtnet_rq_dma { - dma_addr_t addr; - u32 ref; - u16 len; - u16 need_sync; -}; - /* Internal representation of a send virtqueue */ struct send_queue { /* Virtqueue associated with this send _queue */ @@ -356,8 +349,10 @@ struct receive_queue { /* Average packet length for mergeable receive buffers. */ struct ewma_pkt_len mrg_avg_pkt_len; - /* Page frag for packet buffer allocation. */ - struct page_frag alloc_frag; + struct page_pool *page_pool; + + /* True if page_pool handles DMA mapping via PP_FLAG_DMA_MAP */ + bool use_page_pool_dma; /* RX: fragments + linear part + virtio header */ struct scatterlist sg[MAX_SKB_FRAGS + 2]; @@ -370,9 +365,6 @@ struct receive_queue { struct xdp_rxq_info xdp_rxq; - /* Record the last dma info to free after new pages is allocated. */ - struct virtnet_rq_dma *last_dma; - struct xsk_buff_pool *xsk_pool; /* xdp rxq used by xsk */ @@ -521,11 +513,14 @@ static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp, struct virtnet_rq_stats *stats); static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, struct sk_buff *skb, u8 flags); -static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, +static struct sk_buff *virtnet_skb_append_frag(struct receive_queue *rq, + struct sk_buff *head_skb, struct sk_buff *curr_skb, struct page *page, void *buf, int len, int truesize); static void virtnet_xsk_completed(struct send_queue *sq, int num); +static void free_unused_bufs(struct virtnet_info *vi); +static void virtnet_del_vqs(struct virtnet_info *vi); enum virtnet_xmit_type { VIRTNET_XMIT_TYPE_SKB, @@ -709,12 +704,10 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) static void virtnet_rq_free_buf(struct virtnet_info *vi, struct receive_queue *rq, void *buf) { - if (vi->mergeable_rx_bufs) - put_page(virt_to_head_page(buf)); - else if (vi->big_packets) + if (!rq->page_pool) give_pages(rq, buf); else - put_page(virt_to_head_page(buf)); + page_pool_put_page(rq->page_pool, virt_to_head_page(buf), -1, false); } static void enable_rx_mode_work(struct virtnet_info *vi) @@ -876,10 +869,16 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, skb = virtnet_build_skb(buf, truesize, p - buf, len); if (unlikely(!skb)) return NULL; + /* Big packets mode chains pages via page->private, which is + * incompatible with the way page_pool uses page->private. + * Currently, big packets mode doesn't use page pools. + */ + if (!rq->page_pool) { + page = (struct page *)page->private; + if (page) + give_pages(rq, page); + } - page = (struct page *)page->private; - if (page) - give_pages(rq, page); goto ok; } @@ -925,133 +924,16 @@ ok: hdr = skb_vnet_common_hdr(skb); memcpy(hdr, hdr_p, hdr_len); if (page_to_free) - put_page(page_to_free); + page_pool_put_page(rq->page_pool, page_to_free, -1, true); return skb; } -static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) -{ - struct virtnet_info *vi = rq->vq->vdev->priv; - struct page *page = virt_to_head_page(buf); - struct virtnet_rq_dma *dma; - void *head; - int offset; - - BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); - - head = page_address(page); - - dma = head; - - --dma->ref; - - if (dma->need_sync && len) { - offset = buf - (head + sizeof(*dma)); - - virtqueue_map_sync_single_range_for_cpu(rq->vq, dma->addr, - offset, len, - DMA_FROM_DEVICE); - } - - if (dma->ref) - return; - - virtqueue_unmap_single_attrs(rq->vq, dma->addr, dma->len, - DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); - put_page(page); -} - static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) { - struct virtnet_info *vi = rq->vq->vdev->priv; - void *buf; + BUG_ON(!rq->page_pool); - BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); - - buf = virtqueue_get_buf_ctx(rq->vq, len, ctx); - if (buf) - virtnet_rq_unmap(rq, buf, *len); - - return buf; -} - -static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) -{ - struct virtnet_info *vi = rq->vq->vdev->priv; - struct virtnet_rq_dma *dma; - dma_addr_t addr; - u32 offset; - void *head; - - BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); - - head = page_address(rq->alloc_frag.page); - - offset = buf - head; - - dma = head; - - addr = dma->addr - sizeof(*dma) + offset; - - sg_init_table(rq->sg, 1); - sg_fill_dma(rq->sg, addr, len); -} - -static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) -{ - struct page_frag *alloc_frag = &rq->alloc_frag; - struct virtnet_info *vi = rq->vq->vdev->priv; - struct virtnet_rq_dma *dma; - void *buf, *head; - dma_addr_t addr; - - BUG_ON(vi->big_packets && !vi->mergeable_rx_bufs); - - head = page_address(alloc_frag->page); - - dma = head; - - /* new pages */ - if (!alloc_frag->offset) { - if (rq->last_dma) { - /* Now, the new page is allocated, the last dma - * will not be used. So the dma can be unmapped - * if the ref is 0. - */ - virtnet_rq_unmap(rq, rq->last_dma, 0); - rq->last_dma = NULL; - } - - dma->len = alloc_frag->size - sizeof(*dma); - - addr = virtqueue_map_single_attrs(rq->vq, dma + 1, - dma->len, DMA_FROM_DEVICE, 0); - if (virtqueue_map_mapping_error(rq->vq, addr)) - return NULL; - - dma->addr = addr; - dma->need_sync = virtqueue_map_need_sync(rq->vq, addr); - - /* Add a reference to dma to prevent the entire dma from - * being released during error handling. This reference - * will be freed after the pages are no longer used. - */ - get_page(alloc_frag->page); - dma->ref = 1; - alloc_frag->offset = sizeof(*dma); - - rq->last_dma = dma; - } - - ++dma->ref; - - buf = head + alloc_frag->offset; - - get_page(alloc_frag->page); - alloc_frag->offset += size; - - return buf; + return virtqueue_get_buf_ctx(rq->vq, len, ctx); } static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) @@ -1067,9 +949,6 @@ static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) return; } - if (!vi->big_packets || vi->mergeable_rx_bufs) - virtnet_rq_unmap(rq, buf, 0); - virtnet_rq_free_buf(vi, rq, buf); } @@ -1335,7 +1214,7 @@ static int xsk_append_merge_buffer(struct virtnet_info *vi, truesize = len; - curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, + curr_skb = virtnet_skb_append_frag(rq, head_skb, curr_skb, page, buf, len, truesize); if (!curr_skb) { put_page(page); @@ -1790,7 +1669,7 @@ out: return ret; } -static void put_xdp_frags(struct xdp_buff *xdp) +static void put_xdp_frags(struct receive_queue *rq, struct xdp_buff *xdp) { struct skb_shared_info *shinfo; struct page *xdp_page; @@ -1800,7 +1679,7 @@ static void put_xdp_frags(struct xdp_buff *xdp) shinfo = xdp_get_shared_info_from_buff(xdp); for (i = 0; i < shinfo->nr_frags; i++) { xdp_page = skb_frag_page(&shinfo->frags[i]); - put_page(xdp_page); + page_pool_put_page(rq->page_pool, xdp_page, -1, true); } } } @@ -1892,7 +1771,7 @@ static struct page *xdp_linearize_page(struct net_device *dev, if (page_off + *len + tailroom > PAGE_SIZE) return NULL; - page = alloc_page(GFP_ATOMIC); + page = page_pool_alloc_pages(rq->page_pool, GFP_ATOMIC); if (!page) return NULL; @@ -1915,8 +1794,12 @@ static struct page *xdp_linearize_page(struct net_device *dev, p = virt_to_head_page(buf); off = buf - page_address(p); + if (rq->use_page_pool_dma) + page_pool_dma_sync_for_cpu(rq->page_pool, p, + off, buflen); + if (check_mergeable_len(dev, ctx, buflen)) { - put_page(p); + page_pool_put_page(rq->page_pool, p, -1, true); goto err_buf; } @@ -1924,38 +1807,36 @@ static struct page *xdp_linearize_page(struct net_device *dev, * is sending packet larger than the MTU. */ if ((page_off + buflen + tailroom) > PAGE_SIZE) { - put_page(p); + page_pool_put_page(rq->page_pool, p, -1, true); goto err_buf; } memcpy(page_address(page) + page_off, page_address(p) + off, buflen); page_off += buflen; - put_page(p); + page_pool_put_page(rq->page_pool, p, -1, true); } /* Headroom does not contribute to packet length */ *len = page_off - XDP_PACKET_HEADROOM; return page; err_buf: - __free_pages(page, 0); + page_pool_put_page(rq->page_pool, page, -1, true); return NULL; } static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi, unsigned int xdp_headroom, void *buf, - unsigned int len) + unsigned int len, + unsigned int buflen) { unsigned int header_offset; unsigned int headroom; - unsigned int buflen; struct sk_buff *skb; header_offset = VIRTNET_RX_PAD + xdp_headroom; headroom = vi->hdr_len + header_offset; - buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); skb = virtnet_build_skb(buf, buflen, headroom, len); if (unlikely(!skb)) @@ -1974,6 +1855,7 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev, void *buf, unsigned int xdp_headroom, unsigned int len, + unsigned int buflen, unsigned int *xdp_xmit, struct virtnet_rq_stats *stats) { @@ -1982,7 +1864,6 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev, struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; struct page *page = virt_to_head_page(buf); struct page *xdp_page; - unsigned int buflen; struct xdp_buff xdp; struct sk_buff *skb; unsigned int metasize = 0; @@ -1995,9 +1876,6 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev, if (unlikely(hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) goto err_xdp; - buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { int offset = buf - page_address(page) + header_offset; unsigned int tlen = len + vi->hdr_len; @@ -2015,7 +1893,7 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev, goto err_xdp; buf = page_address(xdp_page); - put_page(page); + page_pool_put_page(rq->page_pool, page, -1, true); page = xdp_page; } @@ -2047,13 +1925,15 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev, if (metasize) skb_metadata_set(skb, metasize); + skb_mark_for_recycle(skb); + return skb; err_xdp: u64_stats_inc(&stats->xdp_drops); err: u64_stats_inc(&stats->drops); - put_page(page); + page_pool_put_page(rq->page_pool, page, -1, true); xdp_xmit: return NULL; } @@ -2066,7 +1946,8 @@ static struct sk_buff *receive_small(struct net_device *dev, unsigned int *xdp_xmit, struct virtnet_rq_stats *stats) { - unsigned int xdp_headroom = (unsigned long)ctx; + unsigned int xdp_headroom = mergeable_ctx_to_headroom(ctx); + unsigned int buflen = mergeable_ctx_to_truesize(ctx); struct page *page = virt_to_head_page(buf); struct sk_buff *skb; @@ -2075,6 +1956,13 @@ static struct sk_buff *receive_small(struct net_device *dev, */ buf -= VIRTNET_RX_PAD + xdp_headroom; + if (rq->use_page_pool_dma) { + int offset = buf - page_address(page) + + VIRTNET_RX_PAD + xdp_headroom; + + page_pool_dma_sync_for_cpu(rq->page_pool, page, offset, len); + } + len -= vi->hdr_len; u64_stats_add(&stats->bytes, len); @@ -2092,21 +1980,23 @@ static struct sk_buff *receive_small(struct net_device *dev, xdp_prog = rcu_dereference(rq->xdp_prog); if (xdp_prog) { skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf, - xdp_headroom, len, xdp_xmit, - stats); + xdp_headroom, len, buflen, + xdp_xmit, stats); rcu_read_unlock(); return skb; } rcu_read_unlock(); } - skb = receive_small_build_skb(vi, xdp_headroom, buf, len); - if (likely(skb)) + skb = receive_small_build_skb(vi, xdp_headroom, buf, len, buflen); + if (likely(skb)) { + skb_mark_for_recycle(skb); return skb; + } err: u64_stats_inc(&stats->drops); - put_page(page); + page_pool_put_page(rq->page_pool, page, -1, true); return NULL; } @@ -2161,7 +2051,7 @@ static void mergeable_buf_free(struct receive_queue *rq, int num_buf, } u64_stats_add(&stats->bytes, len); page = virt_to_head_page(buf); - put_page(page); + page_pool_put_page(rq->page_pool, page, -1, true); } } @@ -2271,8 +2161,12 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev, page = virt_to_head_page(buf); offset = buf - page_address(page); + if (rq->use_page_pool_dma) + page_pool_dma_sync_for_cpu(rq->page_pool, page, + offset, len); + if (check_mergeable_len(dev, ctx, len)) { - put_page(page); + page_pool_put_page(rq->page_pool, page, -1, true); goto err; } @@ -2291,7 +2185,7 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev, return 0; err: - put_xdp_frags(xdp); + put_xdp_frags(rq, xdp); return -EINVAL; } @@ -2356,7 +2250,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, if (*len + xdp_room > PAGE_SIZE) return NULL; - xdp_page = alloc_page(GFP_ATOMIC); + xdp_page = page_pool_alloc_pages(rq->page_pool, GFP_ATOMIC); if (!xdp_page) return NULL; @@ -2366,7 +2260,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, *frame_sz = PAGE_SIZE; - put_page(*page); + page_pool_put_page(rq->page_pool, *page, -1, true); *page = xdp_page; @@ -2412,6 +2306,8 @@ static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); if (unlikely(!head_skb)) break; + + skb_mark_for_recycle(head_skb); return head_skb; case XDP_TX: @@ -2422,10 +2318,10 @@ static struct sk_buff *receive_mergeable_xdp(struct net_device *dev, break; } - put_xdp_frags(&xdp); + put_xdp_frags(rq, &xdp); err_xdp: - put_page(page); + page_pool_put_page(rq->page_pool, page, -1, true); mergeable_buf_free(rq, num_buf, dev, stats); u64_stats_inc(&stats->xdp_drops); @@ -2433,7 +2329,8 @@ err_xdp: return NULL; } -static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, +static struct sk_buff *virtnet_skb_append_frag(struct receive_queue *rq, + struct sk_buff *head_skb, struct sk_buff *curr_skb, struct page *page, void *buf, int len, int truesize) @@ -2448,6 +2345,9 @@ static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, if (unlikely(!nskb)) return NULL; + if (head_skb->pp_recycle) + skb_mark_for_recycle(nskb); + if (curr_skb == head_skb) skb_shinfo(curr_skb)->frag_list = nskb; else @@ -2465,7 +2365,10 @@ static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, offset = buf - page_address(page); if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { - put_page(page); + if (head_skb->pp_recycle) + page_pool_put_page(rq->page_pool, page, -1, true); + else + put_page(page); skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, len, truesize); } else { @@ -2494,6 +2397,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, unsigned int headroom = mergeable_ctx_to_headroom(ctx); head_skb = NULL; + + if (rq->use_page_pool_dma) + page_pool_dma_sync_for_cpu(rq->page_pool, page, offset, len); + u64_stats_add(&stats->bytes, len - vi->hdr_len); if (check_mergeable_len(dev, ctx, len)) @@ -2518,6 +2425,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, if (unlikely(!curr_skb)) goto err_skb; + + skb_mark_for_recycle(head_skb); while (--num_buf) { buf = virtnet_rq_get_buf(rq, &len, &ctx); if (unlikely(!buf)) { @@ -2532,11 +2441,17 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, u64_stats_add(&stats->bytes, len); page = virt_to_head_page(buf); + if (rq->use_page_pool_dma) { + offset = buf - page_address(page); + page_pool_dma_sync_for_cpu(rq->page_pool, page, + offset, len); + } + if (check_mergeable_len(dev, ctx, len)) goto err_skb; truesize = mergeable_ctx_to_truesize(ctx); - curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, + curr_skb = virtnet_skb_append_frag(rq, head_skb, curr_skb, page, buf, len, truesize); if (!curr_skb) goto err_skb; @@ -2546,7 +2461,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, return head_skb; err_skb: - put_page(page); + page_pool_put_page(rq->page_pool, page, -1, true); mergeable_buf_free(rq, num_buf, dev, stats); err_buf: @@ -2677,40 +2592,54 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, virtnet_receive_done(vi, rq, skb, flags); } -/* Unlike mergeable buffers, all buffers are allocated to the - * same size, except for the headroom. For this reason we do - * not need to use mergeable_len_to_ctx here - it is enough - * to store the headroom as the context ignoring the truesize. +static int virtnet_rq_submit(struct receive_queue *rq, char *buf, + int len, void *ctx, gfp_t gfp) +{ + if (rq->use_page_pool_dma) { + struct page *page = virt_to_head_page(buf); + dma_addr_t addr = page_pool_get_dma_addr(page) + + (buf - (char *)page_address(page)); + + sg_init_table(rq->sg, 1); + sg_fill_dma(rq->sg, addr, len); + return virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, + buf, ctx, gfp); + } + + sg_init_one(rq->sg, buf, len); + return virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); +} + +/* With page_pool, the actual allocation may exceed the requested size + * when the remaining page fragment can't fit another buffer. Encode + * the actual allocation size in ctx so build_skb() gets the correct + * buflen for truesize accounting. */ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, gfp_t gfp) { - char *buf; unsigned int xdp_headroom = virtnet_get_headroom(vi); - void *ctx = (void *)(unsigned long)xdp_headroom; - int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; + unsigned int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom; + unsigned int alloc_len; + char *buf; + void *ctx; int err; len = SKB_DATA_ALIGN(len) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - if (unlikely(!skb_page_frag_refill(len, &rq->alloc_frag, gfp))) - return -ENOMEM; - - buf = virtnet_rq_alloc(rq, len, gfp); + alloc_len = len; + buf = page_pool_alloc_va(rq->page_pool, &alloc_len, gfp); if (unlikely(!buf)) return -ENOMEM; buf += VIRTNET_RX_PAD + xdp_headroom; - virtnet_rq_init_one_sg(rq, buf, vi->hdr_len + GOOD_PACKET_LEN); - - err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, buf, ctx, gfp); - if (err < 0) { - virtnet_rq_unmap(rq, buf, 0); - put_page(virt_to_head_page(buf)); - } + ctx = mergeable_len_to_ctx(alloc_len, xdp_headroom); + err = virtnet_rq_submit(rq, buf, vi->hdr_len + GOOD_PACKET_LEN, ctx, gfp); + if (err < 0) + page_pool_put_page(rq->page_pool, virt_to_head_page(buf), -1, false); return err; } @@ -2783,13 +2712,12 @@ static unsigned int get_mergeable_buf_len(struct receive_queue *rq, static int add_recvbuf_mergeable(struct virtnet_info *vi, struct receive_queue *rq, gfp_t gfp) { - struct page_frag *alloc_frag = &rq->alloc_frag; unsigned int headroom = virtnet_get_headroom(vi); unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); - unsigned int len, hole; - void *ctx; + unsigned int len, alloc_len; char *buf; + void *ctx; int err; /* Extra tailroom is needed to satisfy XDP's assumption. This @@ -2798,39 +2726,22 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, */ len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); - if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) - return -ENOMEM; - - if (!alloc_frag->offset && len + room + sizeof(struct virtnet_rq_dma) > alloc_frag->size) - len -= sizeof(struct virtnet_rq_dma); - - buf = virtnet_rq_alloc(rq, len + room, gfp); + alloc_len = len + room; + buf = page_pool_alloc_va(rq->page_pool, &alloc_len, gfp); if (unlikely(!buf)) return -ENOMEM; buf += headroom; /* advance address leaving hole at front of pkt */ - hole = alloc_frag->size - alloc_frag->offset; - if (hole < len + room) { - /* To avoid internal fragmentation, if there is very likely not - * enough space for another buffer, add the remaining space to - * the current buffer. - * XDP core assumes that frame_size of xdp_buff and the length - * of the frag are PAGE_SIZE, so we disable the hole mechanism. - */ - if (!headroom) - len += hole; - alloc_frag->offset += hole; - } - virtnet_rq_init_one_sg(rq, buf, len); + if (!headroom) + len = alloc_len - room; ctx = mergeable_len_to_ctx(len + room, headroom); - err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, buf, ctx, gfp); - if (err < 0) { - virtnet_rq_unmap(rq, buf, 0); - put_page(virt_to_head_page(buf)); - } + err = virtnet_rq_submit(rq, buf, len, ctx, gfp); + + if (err < 0) + page_pool_put_page(rq->page_pool, virt_to_head_page(buf), -1, false); return err; } @@ -2984,7 +2895,7 @@ static int virtnet_receive_packets(struct virtnet_info *vi, int packets = 0; void *buf; - if (!vi->big_packets || vi->mergeable_rx_bufs) { + if (rq->page_pool) { void *ctx; while (packets < budget && (buf = virtnet_rq_get_buf(rq, &len, &ctx))) { @@ -3149,7 +3060,10 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) return err; err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq, - MEM_TYPE_PAGE_SHARED, NULL); + vi->rq[qp_index].page_pool ? + MEM_TYPE_PAGE_POOL : + MEM_TYPE_PAGE_SHARED, + vi->rq[qp_index].page_pool); if (err < 0) goto err_xdp_reg_mem_model; @@ -3189,6 +3103,82 @@ static void virtnet_update_settings(struct virtnet_info *vi) vi->duplex = duplex; } +static int virtnet_create_page_pools(struct virtnet_info *vi) +{ + int i, err; + + if (vi->big_packets && !vi->mergeable_rx_bufs) + return 0; + + for (i = 0; i < vi->max_queue_pairs; i++) { + struct receive_queue *rq = &vi->rq[i]; + struct page_pool_params pp_params = { 0 }; + struct device *dma_dev; + + if (rq->page_pool) + continue; + + if (rq->xsk_pool) + continue; + + pp_params.order = 0; + pp_params.pool_size = virtqueue_get_vring_size(rq->vq); + pp_params.nid = dev_to_node(vi->vdev->dev.parent); + pp_params.netdev = vi->dev; + pp_params.napi = &rq->napi; + + /* Use page_pool DMA mapping if backend supports DMA API. + * DMA_SYNC_DEV is needed for non-coherent archs on recycle. + */ + dma_dev = virtqueue_dma_dev(rq->vq); + if (dma_dev) { + pp_params.dev = dma_dev; + pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; + pp_params.dma_dir = DMA_FROM_DEVICE; + pp_params.max_len = PAGE_SIZE; + pp_params.offset = 0; + rq->use_page_pool_dma = true; + } else { + /* No DMA API (e.g., VDUSE): page_pool for allocation only. */ + pp_params.flags = 0; + rq->use_page_pool_dma = false; + } + + rq->page_pool = page_pool_create(&pp_params); + if (IS_ERR(rq->page_pool)) { + err = PTR_ERR(rq->page_pool); + rq->page_pool = NULL; + goto err_cleanup; + } + } + return 0; + +err_cleanup: + while (--i >= 0) { + struct receive_queue *rq = &vi->rq[i]; + + if (rq->page_pool) { + page_pool_destroy(rq->page_pool); + rq->page_pool = NULL; + } + } + return err; +} + +static void virtnet_destroy_page_pools(struct virtnet_info *vi) +{ + int i; + + for (i = 0; i < vi->max_queue_pairs; i++) { + struct receive_queue *rq = &vi->rq[i]; + + if (rq->page_pool) { + page_pool_destroy(rq->page_pool); + rq->page_pool = NULL; + } + } +} + static int virtnet_open(struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); @@ -5736,6 +5726,10 @@ static int virtnet_restore_up(struct virtio_device *vdev) if (err) return err; + err = virtnet_create_page_pools(vi); + if (err) + goto err_del_vqs; + virtio_device_ready(vdev); enable_rx_mode_work(vi); @@ -5745,12 +5739,24 @@ static int virtnet_restore_up(struct virtio_device *vdev) err = virtnet_open(vi->dev); rtnl_unlock(); if (err) - return err; + goto err_destroy_pools; } netif_tx_lock_bh(vi->dev); netif_device_attach(vi->dev); netif_tx_unlock_bh(vi->dev); + return 0; + +err_destroy_pools: + virtio_reset_device(vdev); + free_unused_bufs(vi); + virtnet_destroy_page_pools(vi); + virtnet_del_vqs(vi); + return err; + +err_del_vqs: + virtio_reset_device(vdev); + virtnet_del_vqs(vi); return err; } @@ -5878,7 +5884,7 @@ static int virtnet_xsk_pool_enable(struct net_device *dev, /* In big_packets mode, xdp cannot work, so there is no need to * initialize xsk of rq. */ - if (vi->big_packets && !vi->mergeable_rx_bufs) + if (!vi->rq[qid].page_pool) return -ENOENT; if (qid >= vi->curr_queue_pairs) @@ -6308,17 +6314,6 @@ static void free_receive_bufs(struct virtnet_info *vi) rtnl_unlock(); } -static void free_receive_page_frags(struct virtnet_info *vi) -{ - int i; - for (i = 0; i < vi->max_queue_pairs; i++) - if (vi->rq[i].alloc_frag.page) { - if (vi->rq[i].last_dma) - virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0); - put_page(vi->rq[i].alloc_frag.page); - } -} - static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) { struct virtnet_info *vi = vq->vdev->priv; @@ -6422,7 +6417,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi) vqs_info = kzalloc_objs(*vqs_info, total_vqs); if (!vqs_info) goto err_vqs_info; - if (!vi->big_packets || vi->mergeable_rx_bufs) { + if (vi->mergeable_rx_bufs || !vi->big_packets) { ctx = kzalloc_objs(*ctx, total_vqs); if (!ctx) goto err_ctx; @@ -6462,10 +6457,8 @@ static int virtnet_find_vqs(struct virtnet_info *vi) vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq); vi->sq[i].vq = vqs[txq2vq(i)]; } - /* run here: ret == 0. */ - err_find: kfree(ctx); err_ctx: @@ -6966,6 +6959,14 @@ static int virtnet_probe(struct virtio_device *vdev) goto free; } + /* Create page pools for receive queues. + * Page pools are created at probe time so they can be used + * with premapped DMA addresses throughout the device lifetime. + */ + err = virtnet_create_page_pools(vi); + if (err) + goto free_irq_moder; + #ifdef CONFIG_SYSFS if (vi->mergeable_rx_bufs) dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; @@ -6979,7 +6980,7 @@ static int virtnet_probe(struct virtio_device *vdev) vi->failover = net_failover_create(vi->dev); if (IS_ERR(vi->failover)) { err = PTR_ERR(vi->failover); - goto free_vqs; + goto free_page_pools; } } @@ -7096,9 +7097,11 @@ free_unregister_netdev: unregister_netdev(dev); free_failover: net_failover_destroy(vi->failover); -free_vqs: +free_page_pools: + virtnet_destroy_page_pools(vi); +free_irq_moder: + virtnet_free_irq_moder(vi); virtio_reset_device(vdev); - free_receive_page_frags(vi); virtnet_del_vqs(vi); free: free_netdev(dev); @@ -7123,7 +7126,7 @@ static void remove_vq_common(struct virtnet_info *vi) free_receive_bufs(vi); - free_receive_page_frags(vi); + virtnet_destroy_page_pools(vi); virtnet_del_vqs(vi); } From 45339c237c6a9ef916061521314acae0a414a6df Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Wed, 11 Mar 2026 13:59:23 +0530 Subject: [PATCH 0570/1561] net: ti: icssg-prueth: Add HSR multicast FDB port membership management In HSR offload mode, multicast addresses can be added via HSR master (hsr0) or directly to slave ports (eth1/eth2). The FDB must track port membership: P0 (0x1) for HSR master, P1 (0x2) for slave port 1, and P2 (0x4) for slave port 2. When the same address is added from multiple paths, memberships must accumulate. Implement a hybrid approach using __dev_mc_sync() callbacks to track basic add/delete operations, checking netdev_hw_addr->synced to distinguish HSR-synced addresses from direct additions. Post-process to handle overlapping memberships by checking refcount: - refcount=2 with synced=1: HSR only (P0) - refcount>=3 with synced=1: HSR + direct (P0|P1/P2) - synced=0 with P0 set: HSR removed, clean up orphaned P0 On add operations, accumulate new membership with existing ports. On delete operations, remove only the specific port and clean up orphaned P0 bits if needed. Add error handling for icssg_fdb_lookup() which can return negative error codes (e.g., -ETIMEDOUT). On lookup failure in add/delete path, default to no existing membership. In the post-processing path, skip the address update to avoid corrupting FDB entries with garbage values. VLAN Interface Handling: Add support for multicast addresses added to VLAN interfaces on the HSR master (e.g., hsr0.7). These addresses require P0 (HSR master) bit to be set along with the port bits, since VLAN-tagged packets use separate FDB entries per VLAN ID. Without P0, the HSR master would not receive multicast packets on VLAN interfaces. Track whether the add/del operation came from a VLAN interface path and set P0 when in HSR offload mode with VLAN interfaces. Update orphaned P0 cleanup logic to preserve P0 for VLAN interfaces. Signed-off-by: MD Danish Anwar Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260311082923.2962937-1-danishanwar@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 249 +++++++++++++++++-- 1 file changed, 233 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 372e55803168..591be5c8056b 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -669,28 +669,133 @@ static int icssg_prueth_del_mcast(struct net_device *ndev, const u8 *addr) return 0; } -static void icssg_prueth_hsr_fdb_add_del(struct prueth_emac *emac, - const u8 *addr, u8 vid, bool add) +/** + * icssg_is_addr_synced - Check if address is synced from HSR master + * @ndev: network device + * @addr: multicast MAC address + * + * Checks if the address is synced from HSR master (hsr0) via + * dev_mc_sync_multiple() or added directly to the slave port. + * + * Return: true if synced from HSR master, false if added directly + */ +static bool icssg_is_addr_synced(struct net_device *ndev, const u8 *addr) { - icssg_fdb_add_del(emac, addr, vid, - ICSSG_FDB_ENTRY_P0_MEMBERSHIP | - ICSSG_FDB_ENTRY_P1_MEMBERSHIP | - ICSSG_FDB_ENTRY_P2_MEMBERSHIP | - ICSSG_FDB_ENTRY_BLOCK, add); + struct netdev_hw_addr *ha; + bool is_synced = false; - if (add) - icssg_vtbl_modify(emac, vid, BIT(emac->port_id), - BIT(emac->port_id), add); + netif_addr_lock_bh(ndev); + netdev_for_each_mc_addr(ha, ndev) { + if (ether_addr_equal(ha->addr, addr)) { + is_synced = ha->synced; + break; + } + } + netif_addr_unlock_bh(ndev); + + return is_synced; +} + +/** + * icssg_hsr_fdb_update - Update FDB and VLAN table for HSR multicast + * @emac: PRUETH EMAC private data + * @addr: multicast MAC address + * @vid: VLAN ID + * @port_membership: port membership bitmap (P0=0x1, P1=0x2, P2=0x4) + * @add: true to add entry, false to delete + * + * Updates both FDB and VLAN table entries for the given address. + */ +static void icssg_hsr_fdb_update(struct prueth_emac *emac, const u8 *addr, + u8 vid, u8 port_membership, bool add) +{ + icssg_fdb_add_del(emac, addr, vid, port_membership, add); + icssg_vtbl_modify(emac, vid, BIT(emac->port_id), + BIT(emac->port_id), add); +} + +/** + * icssg_prueth_hsr_fdb_add_del - Manage FDB port membership for HSR multicast + * @emac: PRUETH EMAC private data + * @addr: multicast MAC address + * @vid: VLAN ID + * @is_synced: true if synced from HSR master, false if added directly + * @is_vlan_path: true if called from VLAN interface path, false for direct path + * @add: true to add/update, false to remove + * + * Manages FDB port membership bits (P0/P1/P2) for multicast addresses. + * On add: accumulates membership with existing ports. + * On delete: removes only the specific port, cleans up orphaned P0 if needed. + */ +static void icssg_prueth_hsr_fdb_add_del(struct prueth_emac *emac, + const u8 *addr, u8 vid, + bool is_synced, bool is_vlan_path, + bool add) +{ + int existing_membership, other_port_membership; + u8 port_membership; + + /* Set P0 (HSR master) bit when: + * - Address is synced from HSR master (is_synced=true), OR + * - In HSR offload mode AND it's a VLAN interface (is_vlan_path=true) + * This ensures VLAN interfaces on HSR master (hsr0.X) also get P0 set. + */ + if (is_synced || (emac->prueth->is_hsr_offload_mode && is_vlan_path)) + port_membership = ICSSG_FDB_ENTRY_P0_MEMBERSHIP | BIT(emac->port_id); + else + port_membership = BIT(emac->port_id); + existing_membership = icssg_fdb_lookup(emac, addr, vid); + if (existing_membership < 0) { + netdev_dbg(emac->ndev, + "FDB lookup failed for %pM: %d, assuming no existing entry\n", + addr, existing_membership); + existing_membership = 0; + } + + if (add) { + /* Accumulate with existing membership */ + port_membership |= existing_membership; + + netdev_dbg(emac->ndev, + "FDB add %pM: P%d%s -> membership 0x%x\n", + addr, emac->port_id, is_synced ? "+P0" : "", + port_membership); + + icssg_hsr_fdb_update(emac, addr, vid, port_membership, true); + } else { + other_port_membership = existing_membership & ~port_membership; + + /* Clean up orphaned P0 if neither HSR synced nor VLAN path */ + if (!is_synced && !is_vlan_path && + (existing_membership & ICSSG_FDB_ENTRY_P0_MEMBERSHIP)) + other_port_membership &= + ~ICSSG_FDB_ENTRY_P0_MEMBERSHIP; + + netdev_dbg(emac->ndev, + "FDB del %pM: 0x%x -> 0x%x\n", + addr, existing_membership, other_port_membership); + + icssg_hsr_fdb_update(emac, addr, vid, port_membership, false); + + if (other_port_membership) + icssg_hsr_fdb_update(emac, addr, vid, + other_port_membership, true); + } } static int icssg_prueth_hsr_add_mcast(struct net_device *ndev, const u8 *addr) { struct net_device *real_dev, *port_dev; + bool is_synced, is_vlan_path; struct prueth_emac *emac; u8 vlan_id, i; - vlan_id = is_vlan_dev(ndev) ? vlan_dev_vlan_id(ndev) : PRUETH_DFLT_VLAN_HSR; - real_dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev; + is_vlan_path = is_vlan_dev(ndev); + vlan_id = is_vlan_path ? vlan_dev_vlan_id(ndev) : PRUETH_DFLT_VLAN_HSR; + real_dev = is_vlan_path ? vlan_dev_real_dev(ndev) : ndev; + + /* Check if this address is synced from HSR master */ + is_synced = icssg_is_addr_synced(ndev, addr); if (is_hsr_master(real_dev)) { for (i = HSR_PT_SLAVE_A; i < HSR_PT_INTERLINK; i++) { @@ -701,12 +806,14 @@ static int icssg_prueth_hsr_add_mcast(struct net_device *ndev, const u8 *addr) return -EINVAL; } icssg_prueth_hsr_fdb_add_del(emac, addr, vlan_id, + is_synced, is_vlan_path, true); dev_put(port_dev); } } else { emac = netdev_priv(real_dev); - icssg_prueth_hsr_fdb_add_del(emac, addr, vlan_id, true); + icssg_prueth_hsr_fdb_add_del(emac, addr, vlan_id, + is_synced, is_vlan_path, true); } return 0; @@ -715,11 +822,16 @@ static int icssg_prueth_hsr_add_mcast(struct net_device *ndev, const u8 *addr) static int icssg_prueth_hsr_del_mcast(struct net_device *ndev, const u8 *addr) { struct net_device *real_dev, *port_dev; + bool is_synced, is_vlan_path; struct prueth_emac *emac; u8 vlan_id, i; - vlan_id = is_vlan_dev(ndev) ? vlan_dev_vlan_id(ndev) : PRUETH_DFLT_VLAN_HSR; - real_dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev; + is_vlan_path = is_vlan_dev(ndev); + vlan_id = is_vlan_path ? vlan_dev_vlan_id(ndev) : PRUETH_DFLT_VLAN_HSR; + real_dev = is_vlan_path ? vlan_dev_real_dev(ndev) : ndev; + + /* Check if this address was synced from HSR master */ + is_synced = icssg_is_addr_synced(ndev, addr); if (is_hsr_master(real_dev)) { for (i = HSR_PT_SLAVE_A; i < HSR_PT_INTERLINK; i++) { @@ -730,12 +842,14 @@ static int icssg_prueth_hsr_del_mcast(struct net_device *ndev, const u8 *addr) return -EINVAL; } icssg_prueth_hsr_fdb_add_del(emac, addr, vlan_id, + is_synced, is_vlan_path, false); dev_put(port_dev); } } else { emac = netdev_priv(real_dev); - icssg_prueth_hsr_fdb_add_del(emac, addr, vlan_id, false); + icssg_prueth_hsr_fdb_add_del(emac, addr, vlan_id, + is_synced, is_vlan_path, false); } return 0; @@ -1059,6 +1173,104 @@ static int emac_ndo_stop(struct net_device *ndev) return 0; } +/** + * icssg_hsr_handle_multicast_sync() - Handle HSR multicast overlapping memberships + * @emac: PRUETH EMAC private structure + * + * Post-process multicast addresses to handle overlapping memberships where + * the same address is added from multiple paths (hsr0 + eth1). The kernel + * increments refcount without triggering callbacks, so manually check refcount + * to detect and update FDB port membership accordingly. + */ +static void icssg_hsr_handle_multicast_sync(struct prueth_emac *emac) +{ + struct hsr_mcast_update { + struct list_head list; + u8 addr[ETH_ALEN]; + int refcount; + bool synced; + }; + struct hsr_mcast_update *update, *tmp; + struct net_device *ndev = emac->ndev; + u8 vlan_id = PRUETH_DFLT_VLAN_HSR; + struct netdev_hw_addr *ha; + int existing_membership; + LIST_HEAD(update_list); + u8 port_membership; + + /* Handle overlapping memberships: When the same address + * is added from multiple paths (hsr0 + eth1), kernel + * increments refcount without triggering callbacks. + * Manually check refcount to detect: + * - refcount=2 + synced: HSR only, membership = P0 + * - refcount>=3 + synced: HSR + direct, membership = P0|Px + * - !synced but P0 set: HSR removed, clean up orphaned P0 + * + * Collect addresses to update while holding the lock, then + * process them after releasing the lock to avoid sleeping + * while atomic (icssg_fdb_lookup/update can sleep). + */ + netif_addr_lock_bh(ndev); + netdev_for_each_mc_addr(ha, ndev) { + /* Queue this address for processing. + * We need to check existing_membership via FDB lookup + * (which can sleep), so defer that until after unlock. + * Save synced/refcount to reproduce original logic. + */ + update = kmalloc_obj(*update, GFP_ATOMIC); + if (!update) + continue; + + ether_addr_copy(update->addr, ha->addr); + update->synced = ha->synced; + update->refcount = ha->refcount; + list_add_tail(&update->list, &update_list); + } + netif_addr_unlock_bh(ndev); + + /* Process collected addresses outside spinlock */ + list_for_each_entry_safe(update, tmp, &update_list, list) { + existing_membership = icssg_fdb_lookup(emac, + update->addr, + vlan_id); + if (existing_membership < 0) { + netdev_dbg(ndev, + "FDB lookup failed for %pM: %d, skipping\n", + update->addr, existing_membership); + list_del(&update->list); + kfree(update); + continue; + } + + port_membership = existing_membership; + if (update->synced) { + port_membership |= + ICSSG_FDB_ENTRY_P0_MEMBERSHIP; + if (update->refcount >= 3) + port_membership |= BIT(emac->port_id); + else + port_membership &= ~BIT(emac->port_id); + } else if (existing_membership & + ICSSG_FDB_ENTRY_P0_MEMBERSHIP) { + port_membership &= + ~ICSSG_FDB_ENTRY_P0_MEMBERSHIP; + } + + if (existing_membership != port_membership) { + netdev_dbg(ndev, "Update %pM: 0x%x -> 0x%x\n", + update->addr, existing_membership, + port_membership); + + icssg_hsr_fdb_update(emac, update->addr, + vlan_id, port_membership, + true); + } + + list_del(&update->list); + kfree(update); + } +} + static void emac_ndo_set_rx_mode_work(struct work_struct *work) { struct prueth_emac *emac = container_of(work, struct prueth_emac, rx_mode_work); @@ -1085,8 +1297,13 @@ static void emac_ndo_set_rx_mode_work(struct work_struct *work) } if (emac->prueth->is_hsr_offload_mode) { + /* Track basic add/delete via callbacks */ __dev_mc_sync(ndev, icssg_prueth_hsr_add_mcast, icssg_prueth_hsr_del_mcast); + + /* Handle overlapping memberships */ + icssg_hsr_handle_multicast_sync(emac); + if (rtnl_trylock()) { vlan_for_each(emac->prueth->hsr_dev, icssg_update_vlan_mcast, emac); From 6a33a706265daa3a0d92fece0baf6f2c3915f1cd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 15 Mar 2026 09:00:38 -0700 Subject: [PATCH 0571/1561] selftests: net: py: give bpftrace more time to start After commit under Fixes debug runners in the CI hit the following: # subprocess.TimeoutExpired: Command '['bpftrace', '-f', 'json', '-q', '-e', 'kprobe:netpoll_poll_dev { @hits = count(); } interval:s:10 { exit(); }']' timed out after 15 seconds # # Exception| net.lib.py.ksft.KsftFailEx: bpftrace failed to run!?: {} in netpoll_basic.py >10% of the time. Let's give bpftool more time to start, it can take a while on a debug kernel. Fixes: 82562972b854 ("selftests: net: pass bpftrace timeout to cmd()") Reviewed-by: Breno Leitao Reviewed-by: Petr Machata Reviewed-by: Nimrod Oren Link: https://patch.msgid.link/20260315160038.3187730-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib/py/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 17adc2b67ee0..6c44a3d2bbf7 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -258,7 +258,7 @@ def bpftrace(expr, json=None, ns=None, host=None, timeout=None): cmd_arr += ['-f', 'json', '-q'] if timeout: expr += ' interval:s:' + str(timeout) + ' { exit(); }' - timeout += 5 + timeout += 20 cmd_arr += ['-e', expr] cmd_obj = cmd(cmd_arr, ns=ns, host=host, shell=False, timeout=timeout) if json: From 854587e69ef3b7a14b4380d9b99e18693bb9a07b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 Mar 2026 12:03:46 +0000 Subject: [PATCH 0572/1561] tcp: improve inet6_ehashfn() entropy Instead of only using the 32 low order bits of the local address, use all of them. Xor net_hash_mix(net) with the 32 high order bits of the local address so that we can use __jhash_mix() three times. If we were hashing 4 extra bytes, we would need one __jhash_final() which is a bit expensive. Using net_hash_mix() at the beginning allows better register allocation. We no longer use a cascade of two jhash and inet6_ehash_secret, this was dubious/weak. Add a comment explaining why @lport is not part of the jhash computation. $ scripts/bloat-o-meter -t vmlinux.0 vmlinux.1 add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-24 (-24) Function old new delta inet6_ehashfn 330 306 -24 Total: Before=24855958, After=24855934, chg -0.00% Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260313120346.3378811-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/inet6_hashtables.c | 40 ++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 72bc68fef48a..109fbf620ad7 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -35,13 +35,43 @@ u32 inet6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport) { - u32 lhash, fhash; + u32 a, b, c; - lhash = (__force u32)laddr->s6_addr32[3]; - fhash = __ipv6_addr_jhash(faddr, tcp_ipv6_hash_secret); + /* + * Please look at jhash() implementation for reference. + * Hash laddr + faddr + lport/fport + net_hash_mix. + * Notes: + * We combine laddr[0] (high order 32 bits of local address) + * with net_hash_mix() to avoid using __jhash_final(a, b, c). + * + * We do not include JHASH_INITVAL + 36 contribution + * to initial values of a, b, c. + */ - return lport + __inet6_ehashfn(lhash, 0, fhash, fport, - inet6_ehash_secret + net_hash_mix(net)); + a = b = c = tcp_ipv6_hash_secret; + + a += (__force u32)laddr->s6_addr32[0] ^ net_hash_mix(net); + b += (__force u32)laddr->s6_addr32[1]; + c += (__force u32)laddr->s6_addr32[2]; + __jhash_mix(a, b, c); + + a += (__force u32)laddr->s6_addr32[3]; + b += (__force u32)faddr->s6_addr32[0]; + c += (__force u32)faddr->s6_addr32[1]; + __jhash_mix(a, b, c); + + a += (__force u32)faddr->s6_addr32[2]; + b += (__force u32)faddr->s6_addr32[3]; + c += (__force u32)fport; + __jhash_mix(a, b, c); + + /* Note: We need to add @lport instead of fully hashing it. + * See commits 9544d60a2605 ("inet: change lport contribution + * to inet_ehashfn() and inet6_ehashfn()") and d4438ce68bf1 + * ("inet: call inet6_ehashfn() once from inet6_hash_connect()") + * for references. + */ + return lport + c; } EXPORT_SYMBOL_GPL(inet6_ehashfn); From cc6421acd97f2a386516a16129d00254588bd9ad Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Fri, 13 Mar 2026 12:19:31 +0100 Subject: [PATCH 0573/1561] xsk: remove repeated defines Seems we have been carrying around repeated defines for unaligned mode logic. Remove redundant ones. Signed-off-by: Maciej Fijalkowski Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20260313111931.438911-1-maciej.fijalkowski@intel.com Signed-off-by: Jakub Kicinski --- include/net/xsk_buff_pool.h | 7 ------- net/xdp/xsk.h | 7 ------- 2 files changed, 14 deletions(-) diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 0b1abdb99c9e..ccb3b350001f 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -174,13 +174,6 @@ static inline void xp_dma_sync_for_device(struct xsk_buff_pool *pool, dma_sync_single_for_device(pool->dev, dma, size, DMA_BIDIRECTIONAL); } -/* Masks for xdp_umem_page flags. - * The low 12-bits of the addr will be 0 since this is the page address, so we - * can use them for flags. - */ -#define XSK_NEXT_PG_CONTIG_SHIFT 0 -#define XSK_NEXT_PG_CONTIG_MASK BIT_ULL(XSK_NEXT_PG_CONTIG_SHIFT) - static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool, u64 addr, u32 len) { diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h index a4bc4749faac..7c811b5cce76 100644 --- a/net/xdp/xsk.h +++ b/net/xdp/xsk.h @@ -4,13 +4,6 @@ #ifndef XSK_H_ #define XSK_H_ -/* Masks for xdp_umem_page flags. - * The low 12-bits of the addr will be 0 since this is the page address, so we - * can use them for flags. - */ -#define XSK_NEXT_PG_CONTIG_SHIFT 0 -#define XSK_NEXT_PG_CONTIG_MASK BIT_ULL(XSK_NEXT_PG_CONTIG_SHIFT) - struct xdp_ring_offset_v1 { __u64 producer; __u64 consumer; From a31bbe5ca2f8265f9c7dbc78f1787b88a1ad1775 Mon Sep 17 00:00:00 2001 From: "Jan Petrous (OSS)" Date: Fri, 13 Mar 2026 08:13:32 +0100 Subject: [PATCH 0574/1561] net: stmmac: platform: read channels irq Read IRQ resources for all rx/tx channels, to allow Multi-IRQ mode for platform glue drivers. Reviewed-by: Matthias Brugger Signed-off-by: Jan Petrous (OSS) Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260313-dwmac_multi_irq-v12-1-b5c9d0aa13d6@oss.nxp.com Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/stmmac_platform.c | 57 ++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 3b514a702612..59aac0afc609 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -695,9 +695,47 @@ struct clk *stmmac_pltfr_find_clk(struct plat_stmmacenet_data *plat_dat, } EXPORT_SYMBOL_GPL(stmmac_pltfr_find_clk); +/** + * stmmac_pltfr_get_irq_array - Read per-channel IRQs from platform device + * @pdev: platform device + * @fmt: IRQ name format string (e.g., "tx-queue-%d") + * @irqs: array to store IRQ numbers + * @num: maximum number of IRQs to read + * + * Return: 0 on success, -EPROBE_DEFER if IRQ is deferred, -EINVAL on error. + * Missing IRQs are set to 0 and iteration stops at first missing IRQ. + */ +static int stmmac_pltfr_get_irq_array(struct platform_device *pdev, + const char *fmt, int *irqs, size_t num) +{ + char name[16]; + int i; + + for (i = 0; i < num; i++) { + if (snprintf(name, sizeof(name), fmt, i) >= sizeof(name)) + return -EINVAL; + + irqs[i] = platform_get_irq_byname_optional(pdev, name); + if (irqs[i] == -EPROBE_DEFER) + return -EPROBE_DEFER; + + if (irqs[i] <= 0) { + dev_dbg(&pdev->dev, "IRQ %s not found\n", name); + + /* Stop silently on first unset irq */ + irqs[i] = 0; + break; + } + } + + return 0; +} + int stmmac_get_platform_resources(struct platform_device *pdev, struct stmmac_resources *stmmac_res) { + int ret; + memset(stmmac_res, 0, sizeof(*stmmac_res)); /* Get IRQ information early to have an ability to ask for deferred @@ -733,7 +771,24 @@ int stmmac_get_platform_resources(struct platform_device *pdev, stmmac_res->addr = devm_platform_ioremap_resource(pdev, 0); - return PTR_ERR_OR_ZERO(stmmac_res->addr); + if (IS_ERR(stmmac_res->addr)) + return PTR_ERR(stmmac_res->addr); + + /* TX channels irq */ + ret = stmmac_pltfr_get_irq_array(pdev, "tx-queue-%d", + stmmac_res->tx_irq, + MTL_MAX_TX_QUEUES); + if (ret) + return ret; + + /* RX channels irq */ + ret = stmmac_pltfr_get_irq_array(pdev, "rx-queue-%d", + stmmac_res->rx_irq, + MTL_MAX_RX_QUEUES); + if (ret) + return ret; + + return 0; } EXPORT_SYMBOL_GPL(stmmac_get_platform_resources); From cc7a3435dfadb7469082c6b09018fb2b9cbdeda1 Mon Sep 17 00:00:00 2001 From: "Jan Petrous (OSS)" Date: Fri, 13 Mar 2026 08:13:34 +0100 Subject: [PATCH 0575/1561] dt-bindings: net: nxp,s32-dwmac: Declare per-queue interrupts The DWMAC IP on NXP S32G/R SoCs has connected queue-based IRQ lines, set them to allow using Multi-IRQ mode. Reviewed-by: Matthias Brugger Acked-by: Conor Dooley Signed-off-by: Jan Petrous (OSS) Link: https://patch.msgid.link/20260313-dwmac_multi_irq-v12-3-b5c9d0aa13d6@oss.nxp.com Signed-off-by: Jakub Kicinski --- .../bindings/net/nxp,s32-dwmac.yaml | 47 +++++++++++++++++-- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml b/Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml index 1b2934f3c87c..753a04941659 100644 --- a/Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml @@ -1,5 +1,5 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -# Copyright 2021-2024 NXP +# Copyright 2021-2026 NXP %YAML 1.2 --- $id: http://devicetree.org/schemas/net/nxp,s32-dwmac.yaml# @@ -16,6 +16,8 @@ description: the SoC S32R45 has two instances. The devices can use RGMII/RMII/MII interface over Pinctrl device or the output can be routed to the embedded SerDes for SGMII connectivity. + The DWMAC instances have connected all RX/TX queues interrupts, + enabling load balancing of data traffic across all CPU cores. properties: compatible: @@ -45,10 +47,25 @@ properties: FlexTimer Modules connect to GMAC_0. interrupts: - maxItems: 1 + minItems: 1 + maxItems: 11 interrupt-names: - const: macirq + oneOf: + - items: + - const: macirq + - items: + - const: macirq + - const: tx-queue-0 + - const: rx-queue-0 + - const: tx-queue-1 + - const: rx-queue-1 + - const: tx-queue-2 + - const: rx-queue-2 + - const: tx-queue-3 + - const: rx-queue-3 + - const: tx-queue-4 + - const: rx-queue-4 clocks: items: @@ -88,8 +105,28 @@ examples: <0x0 0x4007c004 0x0 0x4>; /* GMAC_0_CTRL_STS */ nxp,phy-sel = <&gpr 0x4>; interrupt-parent = <&gic>; - interrupts = ; - interrupt-names = "macirq"; + interrupts = , + /* CHN 0: tx, rx */ + , + , + /* CHN 1: tx, rx */ + , + , + /* CHN 2: tx, rx */ + , + , + /* CHN 3: tx, rx */ + , + , + /* CHN 4: tx, rx */ + , + ; + interrupt-names = "macirq", + "tx-queue-0", "rx-queue-0", + "tx-queue-1", "rx-queue-1", + "tx-queue-2", "rx-queue-2", + "tx-queue-3", "rx-queue-3", + "tx-queue-4", "rx-queue-4"; snps,mtl-rx-config = <&mtl_rx_setup>; snps,mtl-tx-config = <&mtl_tx_setup>; clocks = <&clks 24>, <&clks 17>, <&clks 16>, <&clks 15>; From 66ccb4f1d20551969e4aff9128f239c195b3e543 Mon Sep 17 00:00:00 2001 From: "Jan Petrous (OSS)" Date: Fri, 13 Mar 2026 08:13:35 +0100 Subject: [PATCH 0576/1561] stmmac: s32: enable support for Multi-IRQ mode Based on previous changes in platform driver, the vendor glue driver can enable Multi-IRQ mode, if needed. To get enabled Multi-IRQ mode for dwmac-s32, the driver checks: 1) property of 'snps,mtl-xx-config' subnode defines 'snps,xx-queues-to-use' bigger then one, ie: ethernet@4033c000 { compatible = "nxp,s32g2-dwmac"; ... snps,mtl-rx-config = <&mtl_rx_setup>; ... mtl_rx_setup: rx-queues-config { snps,rx-queues-to-use = <2>; }; 2) queue based IRQs are set, ie: ethernet@4033c000 { compatible = "nxp,s32g2-dwmac"; ... interrupts = , /* CHN 0: tx, rx */ , , /* CHN 1: tx, rx */ , ; interrupt-names = "macirq", "tx-queue-0", "rx-queue-0", "tx-queue-1", "rx-queue-1"; If those prerequisites are met, the driver switches to Multi-IRQ mode, using per-queue IRQs for rx/tx data pathr: [ 1.387045] s32-dwmac 4033c000.ethernet: Multi-IRQ mode (per queue IRQs) selected Now the driver owns all queues IRQs: root@s32g399aevb3:~# grep eth /proc/interrupts 29: 0 0 0 0 0 0 0 0 GICv3 89 Level eth0:mac 30: 0 0 0 0 0 0 0 0 GICv3 91 Level eth0:rx-0 31: 0 0 0 0 0 0 0 0 GICv3 93 Level eth0:rx-1 32: 0 0 0 0 0 0 0 0 GICv3 95 Level eth0:rx-2 33: 0 0 0 0 0 0 0 0 GICv3 97 Level eth0:rx-3 34: 0 0 0 0 0 0 0 0 GICv3 99 Level eth0:rx-4 35: 0 0 0 0 0 0 0 0 GICv3 90 Level eth0:tx-0 36: 0 0 0 0 0 0 0 0 GICv3 92 Level eth0:tx-1 37: 0 0 0 0 0 0 0 0 GICv3 94 Level eth0:tx-2 38: 0 0 0 0 0 0 0 0 GICv3 96 Level eth0:tx-3 39: 0 0 0 0 0 0 0 0 GICv3 98 Level eth0:tx-4 Otherwise, if one of the prerequisite don't met, the driver continue with MAC IRQ mode: [ 1.387045] s32-dwmac 4033c000.ethernet: MAC IRQ mode selected And only MAC IRQ will be attached: root@s32g399aevb3:~# grep eth /proc/interrupts 29: 0 0 0 0 0 0 0 0 GICv3 89 Level eth0:mac What represents the original MAC IRQ mode and is fully backward compatible. Reviewed-by: Matthias Brugger Signed-off-by: Jan Petrous (OSS) Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260313-dwmac_multi_irq-v12-4-b5c9d0aa13d6@oss.nxp.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-s32.c | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c index 48fceadc55b1..024d8e10e918 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c @@ -2,7 +2,7 @@ /* * NXP S32G/R GMAC glue layer * - * Copyright 2019-2024 NXP + * Copyright 2019-2026 NXP * */ @@ -110,6 +110,37 @@ static void s32_gmac_exit(struct device *dev, void *priv) clk_disable_unprepare(gmac->rx_clk); } +static void s32_gmac_setup_multi_irq(struct device *dev, + struct plat_stmmacenet_data *plat, + struct stmmac_resources *res) +{ + int i; + + /* RX IRQs */ + for (i = 0; i < plat->rx_queues_to_use; i++) { + if (res->rx_irq[i] <= 0) { + dev_dbg(dev, "Missing RX queue %d interrupt\n", i); + goto mac_irq_mode; + } + } + + /* TX IRQs */ + for (i = 0; i < plat->tx_queues_to_use; i++) { + if (res->tx_irq[i] <= 0) { + dev_dbg(dev, "Missing TX queue %d interrupt\n", i); + goto mac_irq_mode; + } + } + + plat->flags |= STMMAC_FLAG_MULTI_MSI_EN; + dev_info(dev, "Multi-IRQ mode (per queue IRQs) selected\n"); + return; + +mac_irq_mode: + plat->flags &= ~STMMAC_FLAG_MULTI_MSI_EN; + dev_info(dev, "MAC IRQ mode selected\n"); +} + static int s32_dwmac_probe(struct platform_device *pdev) { struct plat_stmmacenet_data *plat; @@ -165,6 +196,9 @@ static int s32_dwmac_probe(struct platform_device *pdev) plat->core_type = DWMAC_CORE_GMAC4; plat->pmt = true; plat->flags |= STMMAC_FLAG_SPH_DISABLE; + + s32_gmac_setup_multi_irq(dev, plat, &res); + plat->rx_fifo_size = 20480; plat->tx_fifo_size = 20480; From dab177cbea3491354cc3c0b268602d8902501233 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 13 Mar 2026 11:51:58 +0000 Subject: [PATCH 0577/1561] net: stmmac: move MSI data out of struct stmmac_priv Only three platforms supprt MSIs, which means having all the strings and interrupt arrays always allocated wastes space. None of this data is performance critical - this data is only used when requesting and releasing the MSI interrupts. Move the MSI data out of struct stmmac_priv into its own separately allocated structure, and move its initialisation to a separate function. This removes 768 bytes from struct stmmac_priv. Link: https://lore.kernel.org/r/aYtq4ypxXTvn_Is6@shell.armlinux.org.uk Reviewed-by: Florian Bezdeka Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w113e-0000000DDwc-2oRv@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 31 +++--- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 102 +++++++++++------- 2 files changed, 81 insertions(+), 52 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index bba9bb9c95bf..7a66edba8f66 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -243,6 +243,23 @@ struct stmmac_est { u32 max_sdu[MTL_MAX_TX_QUEUES]; }; +struct stmmac_msi { + int sfty_ce_irq; + int sfty_ue_irq; + int rx_irq[MTL_MAX_RX_QUEUES]; + int tx_irq[MTL_MAX_TX_QUEUES]; + + /*irq name */ + char int_name_mac[IFNAMSIZ + 9]; + char int_name_wol[IFNAMSIZ + 9]; + char int_name_lpi[IFNAMSIZ + 9]; + char int_name_sfty[IFNAMSIZ + 10]; + char int_name_sfty_ce[IFNAMSIZ + 10]; + char int_name_sfty_ue[IFNAMSIZ + 10]; + char int_name_rx_irq[MTL_MAX_RX_QUEUES][IFNAMSIZ + 14]; + char int_name_tx_irq[MTL_MAX_TX_QUEUES][IFNAMSIZ + 18]; +}; + struct stmmac_priv { /* Frequently used values are kept adjacent for cache effect */ u32 tx_coal_frames[MTL_MAX_TX_QUEUES]; @@ -329,19 +346,7 @@ struct stmmac_priv { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; unsigned int num_double_vlans; int sfty_irq; - int sfty_ce_irq; - int sfty_ue_irq; - int rx_irq[MTL_MAX_RX_QUEUES]; - int tx_irq[MTL_MAX_TX_QUEUES]; - /*irq name */ - char int_name_mac[IFNAMSIZ + 9]; - char int_name_wol[IFNAMSIZ + 9]; - char int_name_lpi[IFNAMSIZ + 9]; - char int_name_sfty[IFNAMSIZ + 10]; - char int_name_sfty_ce[IFNAMSIZ + 10]; - char int_name_sfty_ue[IFNAMSIZ + 10]; - char int_name_rx_irq[MTL_MAX_RX_QUEUES][IFNAMSIZ + 14]; - char int_name_tx_irq[MTL_MAX_TX_QUEUES][IFNAMSIZ + 18]; + struct stmmac_msi *msi; #ifdef CONFIG_DEBUG_FS struct dentry *dbgfs_dir; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 11150bddd872..124d7a00f9f0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3725,6 +3725,7 @@ static void stmmac_free_irq(struct net_device *dev, enum request_irq_err irq_err, int irq_idx) { struct stmmac_priv *priv = netdev_priv(dev); + struct stmmac_msi *msi = priv->msi; int j; switch (irq_err) { @@ -3732,28 +3733,30 @@ static void stmmac_free_irq(struct net_device *dev, irq_idx = priv->plat->tx_queues_to_use; fallthrough; case REQ_IRQ_ERR_TX: - for (j = irq_idx - 1; j >= 0; j--) { - if (priv->tx_irq[j] > 0) { - irq_set_affinity_hint(priv->tx_irq[j], NULL); - free_irq(priv->tx_irq[j], &priv->dma_conf.tx_queue[j]); + for (j = irq_idx - 1; msi && j >= 0; j--) { + if (msi->tx_irq[j] > 0) { + irq_set_affinity_hint(msi->tx_irq[j], NULL); + free_irq(msi->tx_irq[j], + &priv->dma_conf.tx_queue[j]); } } irq_idx = priv->plat->rx_queues_to_use; fallthrough; case REQ_IRQ_ERR_RX: - for (j = irq_idx - 1; j >= 0; j--) { - if (priv->rx_irq[j] > 0) { - irq_set_affinity_hint(priv->rx_irq[j], NULL); - free_irq(priv->rx_irq[j], &priv->dma_conf.rx_queue[j]); + for (j = irq_idx - 1; msi && j >= 0; j--) { + if (msi->rx_irq[j] > 0) { + irq_set_affinity_hint(msi->rx_irq[j], NULL); + free_irq(msi->rx_irq[j], + &priv->dma_conf.rx_queue[j]); } } - if (priv->sfty_ue_irq > 0 && priv->sfty_ue_irq != dev->irq) - free_irq(priv->sfty_ue_irq, dev); + if (msi && msi->sfty_ue_irq > 0 && msi->sfty_ue_irq != dev->irq) + free_irq(msi->sfty_ue_irq, dev); fallthrough; case REQ_IRQ_ERR_SFTY_UE: - if (priv->sfty_ce_irq > 0 && priv->sfty_ce_irq != dev->irq) - free_irq(priv->sfty_ce_irq, dev); + if (msi && msi->sfty_ce_irq > 0 && msi->sfty_ce_irq != dev->irq) + free_irq(msi->sfty_ce_irq, dev); fallthrough; case REQ_IRQ_ERR_SFTY_CE: if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) @@ -3773,9 +3776,30 @@ static void stmmac_free_irq(struct net_device *dev, } } +static int stmmac_msi_init(struct stmmac_priv *priv, + struct stmmac_resources *res) +{ + int i; + + priv->msi = devm_kmalloc(priv->device, sizeof(*priv->msi), GFP_KERNEL); + if (!priv->msi) + return -ENOMEM; + + priv->msi->sfty_ce_irq = res->sfty_ce_irq; + priv->msi->sfty_ue_irq = res->sfty_ue_irq; + + for (i = 0; i < MTL_MAX_RX_QUEUES; i++) + priv->msi->rx_irq[i] = res->rx_irq[i]; + for (i = 0; i < MTL_MAX_TX_QUEUES; i++) + priv->msi->tx_irq[i] = res->tx_irq[i]; + + return 0; +} + static int stmmac_request_irq_multi_msi(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); + struct stmmac_msi *msi = priv->msi; enum request_irq_err irq_err; int irq_idx = 0; char *int_name; @@ -3783,7 +3807,7 @@ static int stmmac_request_irq_multi_msi(struct net_device *dev) int i; /* For common interrupt */ - int_name = priv->int_name_mac; + int_name = msi->int_name_mac; sprintf(int_name, "%s:%s", dev->name, "mac"); ret = request_irq(dev->irq, stmmac_mac_interrupt, 0, int_name, dev); @@ -3799,7 +3823,7 @@ static int stmmac_request_irq_multi_msi(struct net_device *dev) * is used for WoL */ if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) { - int_name = priv->int_name_wol; + int_name = msi->int_name_wol; sprintf(int_name, "%s:%s", dev->name, "wol"); ret = request_irq(priv->wol_irq, stmmac_mac_interrupt, @@ -3817,7 +3841,7 @@ static int stmmac_request_irq_multi_msi(struct net_device *dev) * Error line in case of another line is used */ if (priv->sfty_irq > 0 && priv->sfty_irq != dev->irq) { - int_name = priv->int_name_sfty; + int_name = msi->int_name_sfty; sprintf(int_name, "%s:%s", dev->name, "safety"); ret = request_irq(priv->sfty_irq, stmmac_safety_interrupt, 0, int_name, dev); @@ -3833,16 +3857,16 @@ static int stmmac_request_irq_multi_msi(struct net_device *dev) /* Request the Safety Feature Correctible Error line in * case of another line is used */ - if (priv->sfty_ce_irq > 0 && priv->sfty_ce_irq != dev->irq) { - int_name = priv->int_name_sfty_ce; + if (msi->sfty_ce_irq > 0 && msi->sfty_ce_irq != dev->irq) { + int_name = msi->int_name_sfty_ce; sprintf(int_name, "%s:%s", dev->name, "safety-ce"); - ret = request_irq(priv->sfty_ce_irq, + ret = request_irq(msi->sfty_ce_irq, stmmac_safety_interrupt, 0, int_name, dev); if (unlikely(ret < 0)) { netdev_err(priv->dev, "%s: alloc sfty ce MSI %d (error: %d)\n", - __func__, priv->sfty_ce_irq, ret); + __func__, msi->sfty_ce_irq, ret); irq_err = REQ_IRQ_ERR_SFTY_CE; goto irq_error; } @@ -3851,16 +3875,16 @@ static int stmmac_request_irq_multi_msi(struct net_device *dev) /* Request the Safety Feature Uncorrectible Error line in * case of another line is used */ - if (priv->sfty_ue_irq > 0 && priv->sfty_ue_irq != dev->irq) { - int_name = priv->int_name_sfty_ue; + if (msi->sfty_ue_irq > 0 && msi->sfty_ue_irq != dev->irq) { + int_name = msi->int_name_sfty_ue; sprintf(int_name, "%s:%s", dev->name, "safety-ue"); - ret = request_irq(priv->sfty_ue_irq, + ret = request_irq(msi->sfty_ue_irq, stmmac_safety_interrupt, 0, int_name, dev); if (unlikely(ret < 0)) { netdev_err(priv->dev, "%s: alloc sfty ue MSI %d (error: %d)\n", - __func__, priv->sfty_ue_irq, ret); + __func__, msi->sfty_ue_irq, ret); irq_err = REQ_IRQ_ERR_SFTY_UE; goto irq_error; } @@ -3870,23 +3894,23 @@ static int stmmac_request_irq_multi_msi(struct net_device *dev) for (i = 0; i < priv->plat->rx_queues_to_use; i++) { if (i >= MTL_MAX_RX_QUEUES) break; - if (priv->rx_irq[i] == 0) + if (msi->rx_irq[i] == 0) continue; - int_name = priv->int_name_rx_irq[i]; + int_name = msi->int_name_rx_irq[i]; sprintf(int_name, "%s:%s-%d", dev->name, "rx", i); - ret = request_irq(priv->rx_irq[i], + ret = request_irq(msi->rx_irq[i], stmmac_msi_intr_rx, 0, int_name, &priv->dma_conf.rx_queue[i]); if (unlikely(ret < 0)) { netdev_err(priv->dev, "%s: alloc rx-%d MSI %d (error: %d)\n", - __func__, i, priv->rx_irq[i], ret); + __func__, i, msi->rx_irq[i], ret); irq_err = REQ_IRQ_ERR_RX; irq_idx = i; goto irq_error; } - irq_set_affinity_hint(priv->rx_irq[i], + irq_set_affinity_hint(msi->rx_irq[i], cpumask_of(i % num_online_cpus())); } @@ -3894,23 +3918,23 @@ static int stmmac_request_irq_multi_msi(struct net_device *dev) for (i = 0; i < priv->plat->tx_queues_to_use; i++) { if (i >= MTL_MAX_TX_QUEUES) break; - if (priv->tx_irq[i] == 0) + if (msi->tx_irq[i] == 0) continue; - int_name = priv->int_name_tx_irq[i]; + int_name = msi->int_name_tx_irq[i]; sprintf(int_name, "%s:%s-%d", dev->name, "tx", i); - ret = request_irq(priv->tx_irq[i], + ret = request_irq(msi->tx_irq[i], stmmac_msi_intr_tx, 0, int_name, &priv->dma_conf.tx_queue[i]); if (unlikely(ret < 0)) { netdev_err(priv->dev, "%s: alloc tx-%d MSI %d (error: %d)\n", - __func__, i, priv->tx_irq[i], ret); + __func__, i, msi->tx_irq[i], ret); irq_err = REQ_IRQ_ERR_TX; irq_idx = i; goto irq_error; } - irq_set_affinity_hint(priv->tx_irq[i], + irq_set_affinity_hint(msi->tx_irq[i], cpumask_of(i % num_online_cpus())); } @@ -7806,12 +7830,12 @@ static int __stmmac_dvr_probe(struct device *device, priv->dev->irq = res->irq; priv->wol_irq = res->wol_irq; priv->sfty_irq = res->sfty_irq; - priv->sfty_ce_irq = res->sfty_ce_irq; - priv->sfty_ue_irq = res->sfty_ue_irq; - for (i = 0; i < MTL_MAX_RX_QUEUES; i++) - priv->rx_irq[i] = res->rx_irq[i]; - for (i = 0; i < MTL_MAX_TX_QUEUES; i++) - priv->tx_irq[i] = res->tx_irq[i]; + + if (priv->plat->flags & STMMAC_FLAG_MULTI_MSI_EN) { + ret = stmmac_msi_init(priv, res); + if (ret) + return ret; + } if (!is_zero_ether_addr(res->mac)) eth_hw_addr_set(priv->dev, res->mac); From 348baefbb635cbb448e154f38c93657d4cf23936 Mon Sep 17 00:00:00 2001 From: Charles Perry Date: Fri, 13 Mar 2026 07:21:39 -0700 Subject: [PATCH 0578/1561] net: macb: set default_an_inband to true for SGMII Most platforms using GEM in SGMII mode use in-band autonegotiation because it is on by default in GEM's 1G PCS and is always on since commit e276e5e40e92 ("net: macb: Disable PCS auto-negotiation for SGMII fixed-link mode"). Leave it on if possible using the "default_an_inband" flag of "struct phylink_config" so that platforms that lack in-band autonegotiation configurability at the PHY do not break with commit 1338cfef1ff1 ("net: macb: fix SGMII with inband aneg disabled") which will turn off in-band autoneg for non hot pluggable PHYs. Once the majority of the PHY drivers that support SGMII have the ->config_inband() callback, this commit could be reverted so that non hot pluggable PHY use outband negotiation with macb, like its the case for other MACs. Fixes: 1338cfef1ff1 ("net: macb: fix SGMII with inband aneg disabled") Reported-by: Conor Dooley Closes: https://lore.kernel.org/r/20260304-nebulizer-rounding-40fbc81a2ba1@spud Signed-off-by: Charles Perry Tested-by: Conor Dooley Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260313142140.4040647-1-charles.perry@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 5e27e0e87a55..63105a5f46a7 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1029,6 +1029,12 @@ static int macb_mii_probe(struct net_device *dev) if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII) { bp->phylink_config.poll_fixed_state = true; bp->phylink_config.get_fixed_state = macb_get_pcs_fixed_state; + /* The PCSAUTONEG bit in PCSCNTRL is on out of reset. Setting + * default_an_inband to true tells phylink to turn it off only + * if necessary (e.g. a fixed link or a PHY that doesn't support + * inband). + */ + bp->phylink_config.default_an_inband = true; } bp->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | From b513dde96c0ac19874175360bde9ad13811a8120 Mon Sep 17 00:00:00 2001 From: Cedric Jehasse Date: Wed, 11 Mar 2026 11:46:18 +0100 Subject: [PATCH 0579/1561] net/sched: cls_flower: remove unions from fl_flow_key When creating a flower classifier with an ipv4 address the flow_dissector has both FLOW_DISSECTOR_KEY_IPV4_ADDRS and FLOW_DISSECTOR_KEY_IPV6_ADDRS bits set in used_keys. This happens because ipv4/ipv6 fields are a union and FL_KEY_SET_IF_MASKED() will interpret either being set as both. Removing the unions fixes this behavior without needing special handling for union fields. Example of a command that caused FLOW_DISSECTOR_KEY_IPV4_ADDRS and FLOW_DISSECTOR_KEY_IPV6_ADDRS to be set: tc filter add dev p1 ingress protocol ip flower skip_sw \ dst_ip 224.0.1.129 action trap Signed-off-by: Cedric Jehasse Link: https://patch.msgid.link/20260311-net-next-mv88e6xxx-tcam-v8-1-32dd5ba30002@luminex.be Signed-off-by: Paolo Abeni --- net/sched/cls_flower.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 26070c892305..88f8a32fab2b 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -59,18 +59,14 @@ struct fl_flow_key { struct flow_dissector_key_eth_addrs eth; struct flow_dissector_key_vlan vlan; struct flow_dissector_key_vlan cvlan; - union { - struct flow_dissector_key_ipv4_addrs ipv4; - struct flow_dissector_key_ipv6_addrs ipv6; - }; + struct flow_dissector_key_ipv4_addrs ipv4; + struct flow_dissector_key_ipv6_addrs ipv6; struct flow_dissector_key_ports tp; struct flow_dissector_key_icmp icmp; struct flow_dissector_key_arp arp; struct flow_dissector_key_keyid enc_key_id; - union { - struct flow_dissector_key_ipv4_addrs enc_ipv4; - struct flow_dissector_key_ipv6_addrs enc_ipv6; - }; + struct flow_dissector_key_ipv4_addrs enc_ipv4; + struct flow_dissector_key_ipv6_addrs enc_ipv6; struct flow_dissector_key_ports enc_tp; struct flow_dissector_key_mpls mpls; struct flow_dissector_key_tcp tcp; From 639f1dcfde5540a8fafa8458d3e6be05207a68db Mon Sep 17 00:00:00 2001 From: Cedric Jehasse Date: Wed, 11 Mar 2026 11:46:19 +0100 Subject: [PATCH 0580/1561] net: dsa: mv88e6xxx: Add partial support for TCAM entries This patch adds partial Ternary Content Addressable Memory (TCAM) for the mv88e6390 and mv88e6393 family of switches. TCAM entries allow the switch to match the first 48 or 96 bytes of a frame and take actions on matched frames. This patch introduces a subset of the available TCAM functionality. Matching on ip addresses/protocol and trapping to the cpu. Eg. to trap traffic with destination ip 224.0.1.129 to the cpu: tc qdisc add dev p1 clsact tc filter add dev p1 ingress protocol ip flower skip_sw \ dst_ip 224.0.1.129 action trap Signed-off-by: Cedric Jehasse Link: https://patch.msgid.link/20260311-net-next-mv88e6xxx-tcam-v8-2-32dd5ba30002@luminex.be Signed-off-by: Paolo Abeni --- drivers/net/dsa/mv88e6xxx/Makefile | 2 + drivers/net/dsa/mv88e6xxx/chip.c | 35 +++ drivers/net/dsa/mv88e6xxx/chip.h | 52 +++++ drivers/net/dsa/mv88e6xxx/port.c | 28 ++- drivers/net/dsa/mv88e6xxx/port.h | 7 +- drivers/net/dsa/mv88e6xxx/tcam.c | 338 +++++++++++++++++++++++++++ drivers/net/dsa/mv88e6xxx/tcam.h | 41 ++++ drivers/net/dsa/mv88e6xxx/tcflower.c | 167 +++++++++++++ drivers/net/dsa/mv88e6xxx/tcflower.h | 14 ++ 9 files changed, 682 insertions(+), 2 deletions(-) create mode 100644 drivers/net/dsa/mv88e6xxx/tcam.c create mode 100644 drivers/net/dsa/mv88e6xxx/tcam.h create mode 100644 drivers/net/dsa/mv88e6xxx/tcflower.c create mode 100644 drivers/net/dsa/mv88e6xxx/tcflower.h diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile index dd961081d631..b0b08c6f159c 100644 --- a/drivers/net/dsa/mv88e6xxx/Makefile +++ b/drivers/net/dsa/mv88e6xxx/Makefile @@ -21,6 +21,8 @@ mv88e6xxx-objs += serdes.o mv88e6xxx-objs += smi.o mv88e6xxx-objs += switchdev.o mv88e6xxx-objs += trace.o +mv88e6xxx-objs += tcflower.o +mv88e6xxx-objs += tcam.o # for tracing framework to find trace.h CFLAGS_trace.o := -I$(src) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 6fcd7181116a..8ca5fd40df92 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -43,6 +43,8 @@ #include "ptp.h" #include "serdes.h" #include "smi.h" +#include "tcam.h" +#include "tcflower.h" static void assert_reg_lock(struct mv88e6xxx_chip *chip) { @@ -3560,6 +3562,11 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) if (err) return err; } + if (chip->info->ops->port_enable_tcam) { + err = chip->info->ops->port_enable_tcam(chip, port); + if (err) + return err; + } if (chip->info->ops->port_tag_remap) { err = chip->info->ops->port_tag_remap(chip, port); @@ -3938,6 +3945,14 @@ static int mv88e6xxx_mdios_register(struct mv88e6xxx_chip *chip) return 0; } +static int mv88e6xxx_tcam_setup(struct mv88e6xxx_chip *chip) +{ + if (!mv88e6xxx_has_tcam(chip)) + return 0; + + return chip->info->ops->tcam_ops->flush_tcam(chip); +} + static void mv88e6xxx_teardown(struct dsa_switch *ds) { struct mv88e6xxx_chip *chip = ds->priv; @@ -3947,6 +3962,7 @@ static void mv88e6xxx_teardown(struct dsa_switch *ds) mv88e6xxx_teardown_devlink_regions_global(ds); mv88e6xxx_hwtstamp_free(chip); mv88e6xxx_ptp_free(chip); + mv88e6xxx_flower_teardown(chip); mv88e6xxx_mdios_unregister(chip); } @@ -4083,6 +4099,10 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) if (err) goto unlock; + err = mv88e6xxx_tcam_setup(chip); + if (err) + goto unlock; + unlock: mv88e6xxx_reg_unlock(chip); @@ -5134,6 +5154,7 @@ static const struct mv88e6xxx_ops mv88e6290_ops = { .ptp_ops = &mv88e6390_ptp_ops, .phylink_get_caps = mv88e6390_phylink_get_caps, .pcs_ops = &mv88e6390_pcs_ops, + .tcam_ops = &mv88e6390_tcam_ops, }; static const struct mv88e6xxx_ops mv88e6320_ops = { @@ -5525,6 +5546,7 @@ static const struct mv88e6xxx_ops mv88e6390_ops = { .serdes_get_regs = mv88e6390_serdes_get_regs, .phylink_get_caps = mv88e6390_phylink_get_caps, .pcs_ops = &mv88e6390_pcs_ops, + .tcam_ops = &mv88e6390_tcam_ops, }; static const struct mv88e6xxx_ops mv88e6390x_ops = { @@ -5621,6 +5643,7 @@ static const struct mv88e6xxx_ops mv88e6393x_ops = { .port_set_cmode = mv88e6393x_port_set_cmode, .port_setup_message_port = mv88e6xxx_setup_message_port, .port_set_upstream_port = mv88e6393x_port_set_upstream_port, + .port_enable_tcam = mv88e6xxx_port_enable_tcam, .stats_snapshot = mv88e6390_g1_stats_snapshot, .stats_set_histogram = mv88e6390_g1_stats_set_histogram, .stats_get_sset_count = mv88e6320_stats_get_sset_count, @@ -5652,6 +5675,7 @@ static const struct mv88e6xxx_ops mv88e6393x_ops = { .ptp_ops = &mv88e6352_ptp_ops, .phylink_get_caps = mv88e6393x_phylink_get_caps, .pcs_ops = &mv88e6393x_pcs_ops, + .tcam_ops = &mv88e6393_tcam_ops, }; static const struct mv88e6xxx_info mv88e6xxx_table[] = { @@ -6125,6 +6149,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 11, /* 10 + Z80 */ .num_internal_phys = 8, + .num_tcam_entries = 256, .internal_phys_offset = 1, .max_vid = 8191, .max_sid = 63, @@ -6132,6 +6157,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, + .tcam_addr = 0x1f, .age_time_coeff = 3750, .g1_irqs = 10, .g2_irqs = 14, @@ -6227,12 +6253,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_ports = 11, /* 10 + Z80 */ .num_internal_phys = 9, .num_gpio = 16, + .num_tcam_entries = 256, .max_vid = 8191, .max_sid = 63, .port_base_addr = 0x0, .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, + .tcam_addr = 0x1f, .age_time_coeff = 3750, .g1_irqs = 9, .g2_irqs = 14, @@ -6439,12 +6467,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_ports = 11, /* 10 + Z80 */ .num_internal_phys = 9, .num_gpio = 16, + .num_tcam_entries = 256, .max_vid = 8191, .max_sid = 63, .port_base_addr = 0x0, .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, + .tcam_addr = 0x1f, .age_time_coeff = 3750, .g1_irqs = 9, .g2_irqs = 14, @@ -6490,6 +6520,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 11, /* 10 + Z80 */ .num_internal_phys = 8, + .num_tcam_entries = 256, .internal_phys_offset = 1, .max_vid = 8191, .max_sid = 63, @@ -6497,6 +6528,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, + .tcam_addr = 0x1f, .age_time_coeff = 3750, .g1_irqs = 10, .g2_irqs = 14, @@ -6589,6 +6621,7 @@ static struct mv88e6xxx_chip *mv88e6xxx_alloc_chip(struct device *dev) INIT_LIST_HEAD(&chip->mdios); idr_init(&chip->policies); INIT_LIST_HEAD(&chip->msts); + INIT_LIST_HEAD(&chip->tcam.entries); return chip; } @@ -7184,6 +7217,8 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .port_hwtstamp_get = mv88e6xxx_port_hwtstamp_get, .port_txtstamp = mv88e6xxx_port_txtstamp, .port_rxtstamp = mv88e6xxx_port_rxtstamp, + .cls_flower_add = mv88e6xxx_cls_flower_add, + .cls_flower_del = mv88e6xxx_cls_flower_del, .get_ts_info = mv88e6xxx_get_ts_info, .devlink_param_get = mv88e6xxx_devlink_param_get, .devlink_param_set = mv88e6xxx_devlink_param_set, diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index e073446ee7d0..2b235ac2c5df 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -135,12 +135,14 @@ struct mv88e6xxx_info { unsigned int num_ports; unsigned int num_internal_phys; unsigned int num_gpio; + unsigned int num_tcam_entries; unsigned int max_vid; unsigned int max_sid; unsigned int port_base_addr; unsigned int phy_base_addr; unsigned int global1_addr; unsigned int global2_addr; + unsigned int tcam_addr; unsigned int age_time_coeff; unsigned int g1_irqs; unsigned int g2_irqs; @@ -210,6 +212,7 @@ struct mv88e6xxx_avb_ops; struct mv88e6xxx_ptp_ops; struct mv88e6xxx_pcs_ops; struct mv88e6xxx_cc_coeffs; +struct mv88e6xxx_tcam_ops; struct mv88e6xxx_irq { u16 masked; @@ -339,6 +342,10 @@ struct mv88e6xxx_hw_stat { int type; }; +struct mv88e6xxx_tcam { + struct list_head entries; +}; + struct mv88e6xxx_chip { const struct mv88e6xxx_info *info; @@ -444,6 +451,35 @@ struct mv88e6xxx_chip { /* FID map */ DECLARE_BITMAP(fid_bitmap, MV88E6XXX_N_FID); + + /* TCAM entries */ + struct mv88e6xxx_tcam tcam; +}; + +#define TCAM_MATCH_SIZE 96 + +struct mv88e6xxx_tcam_key { + u16 spv; + u16 spv_mask; + + u8 frame_data[TCAM_MATCH_SIZE]; + u8 frame_mask[TCAM_MATCH_SIZE]; +}; + +struct mv88e6xxx_tcam_action { + u8 dpv_mode; + u16 dpv; +}; + +struct mv88e6xxx_tcam_entry { + struct list_head list; + unsigned long cookie; + u32 prio; + u8 hw_idx; + + struct mv88e6xxx_tcam_key key; + struct mv88e6xxx_tcam_action action; + }; struct mv88e6xxx_bus_ops { @@ -678,6 +714,11 @@ struct mv88e6xxx_ops { /* Max Frame Size */ int (*set_max_frame_size)(struct mv88e6xxx_chip *chip, int mtu); + + int (*port_enable_tcam)(struct mv88e6xxx_chip *chip, int port); + + /* Ternary Content Addressable Memory operations */ + const struct mv88e6xxx_tcam_ops *tcam_ops; }; struct mv88e6xxx_irq_ops { @@ -752,6 +793,12 @@ struct mv88e6xxx_pcs_ops { }; +struct mv88e6xxx_tcam_ops { + int (*entry_add)(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_tcam_entry *entry, u8 idx); + int (*flush_tcam)(struct mv88e6xxx_chip *chip); +}; + static inline bool mv88e6xxx_has_stu(struct mv88e6xxx_chip *chip) { return chip->info->max_sid > 0 && @@ -769,6 +816,11 @@ static inline bool mv88e6xxx_has_lag(struct mv88e6xxx_chip *chip) return !!chip->info->global2_addr; } +static inline bool mv88e6xxx_has_tcam(struct mv88e6xxx_chip *chip) +{ + return !!chip->info->tcam_addr; +} + static inline unsigned int mv88e6xxx_num_databases(struct mv88e6xxx_chip *chip) { return chip->info->num_databases; diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index 66b1b7277281..49cd82930b7a 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -1380,7 +1380,33 @@ int mv88e6xxx_port_disable_learn_limit(struct mv88e6xxx_chip *chip, int port) int mv88e6xxx_port_disable_pri_override(struct mv88e6xxx_chip *chip, int port) { - return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_PRI_OVERRIDE, 0); + u16 reg; + int err; + + err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_PRI_OVERRIDE, + ®); + if (err) + return err; + + reg &= MV88E6XXX_PORT_PRI_OVERRIDE_TCAM_MODE_MASK; + return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_PRI_OVERRIDE, + reg); +} + +int mv88e6xxx_port_enable_tcam(struct mv88e6xxx_chip *chip, int port) +{ + u16 reg; + int err; + + err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_PRI_OVERRIDE, + ®); + if (err) + return err; + + reg &= ~MV88E6XXX_PORT_PRI_OVERRIDE_TCAM_MODE_MASK; + reg |= MV88E6XXX_PORT_PRI_OVERRIDE_TCAM_MODE_96_BYTE; + return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_PRI_OVERRIDE, + reg); } /* Offset 0x0E: Policy & MGMT Control Register for FAMILY 6191X 6193X 6393X */ diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h index c1d2f99efb1c..a2492cf4d920 100644 --- a/drivers/net/dsa/mv88e6xxx/port.h +++ b/drivers/net/dsa/mv88e6xxx/port.h @@ -254,7 +254,10 @@ #define MV88E6XXX_PORT_ATU_CTL 0x0c /* Offset 0x0D: Priority Override Register */ -#define MV88E6XXX_PORT_PRI_OVERRIDE 0x0d +#define MV88E6XXX_PORT_PRI_OVERRIDE 0x0d +#define MV88E6XXX_PORT_PRI_OVERRIDE_TCAM_MODE_MASK 0x0003 +#define MV88E6XXX_PORT_PRI_OVERRIDE_TCAM_MODE_48_BYTE 0x0001 +#define MV88E6XXX_PORT_PRI_OVERRIDE_TCAM_MODE_96_BYTE 0x0002 /* Offset 0x0E: Policy Control Register */ #define MV88E6XXX_PORT_POLICY_CTL 0x0e @@ -608,4 +611,6 @@ int mv88e6xxx_port_hidden_wait(struct mv88e6xxx_chip *chip); int mv88e6xxx_port_hidden_read(struct mv88e6xxx_chip *chip, int block, int port, int reg, u16 *val); +int mv88e6xxx_port_enable_tcam(struct mv88e6xxx_chip *chip, int port); + #endif /* _MV88E6XXX_PORT_H */ diff --git a/drivers/net/dsa/mv88e6xxx/tcam.c b/drivers/net/dsa/mv88e6xxx/tcam.c new file mode 100644 index 000000000000..fc16bab8e772 --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/tcam.c @@ -0,0 +1,338 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Marvell 88E6xxx Switch TCAM support + * + * Copyright (c) 2026 Luminex Network Intelligence + */ + +#include "linux/list.h" + +#include "chip.h" +#include "tcam.h" + +/* TCAM operatation register */ +#define MV88E6XXX_TCAM_OP 0x00 +#define MV88E6XXX_TCAM_OP_BUSY 0x8000 +#define MV88E6XXX_TCAM_OP_OP_MASK 0x7000 +#define MV88E6XXX_TCAM_OP_OP_FLUSH_ALL 0x1000 +#define MV88E6XXX_TCAM_OP_OP_FLUSH 0x2000 +#define MV88E6XXX_TCAM_OP_OP_LOAD 0x3000 +#define MV88E6XXX_TCAM_OP_OP_GET_NEXT 0x4000 +#define MV88E6XXX_TCAM_OP_OP_READ 0x5000 + +/* TCAM extension register */ +#define MV88E6XXX_TCAM_EXTENSION 0x01 + +/* TCAM keys register 1 */ +#define MV88E6XXX_TCAM_KEYS1 0x02 +#define MV88E6XXX_TCAM_KEYS1_FT_MASK 0xC000 +#define MV88E6XXX_TCAM_KEYS1_SPV_MASK 0x0007 +#define MV88E6XXX_TCAM_KEYS1_SPV_MASK_MASK 0x0700 + +/* TCAM keys register 2 */ +#define MV88E6XXX_TCAM_KEYS2 0x03 +#define MV88E6XXX_TCAM_KEYS2_SPV_MASK 0x00ff +#define MV88E6XXX_TCAM_KEYS2_SPV_MASK_MASK 0xff00 + +#define MV88E6XXX_TCAM_ING_ACT3 0x04 +#define MV88E6XXX_TCAM_ING_ACT3_SF 0x0800 +#define MV88E6XXX_TCAM_ING_ACT3_DPV_MASK 0x07ff + +#define MV88E6XXX_TCAM_ING_ACT5 0x06 +#define MV88E6XXX_TCAM_ING_ACT5_DPV_MODE_MASK 0xc000 + +static int mv88e6xxx_tcam_write(struct mv88e6xxx_chip *chip, int reg, u16 val) +{ + return mv88e6xxx_write(chip, chip->info->tcam_addr, reg, val); +} + +static int mv88e6xxx_tcam_wait(struct mv88e6xxx_chip *chip) +{ + int bit = __bf_shf(MV88E6XXX_TCAM_OP_BUSY); + + return mv88e6xxx_wait_bit(chip, chip->info->tcam_addr, + MV88E6XXX_TCAM_OP, bit, 0); +} + +static int mv88e6xxx_tcam_read_page(struct mv88e6xxx_chip *chip, u8 page, + u8 entry) + +{ + u16 val = MV88E6XXX_TCAM_OP_BUSY | MV88E6XXX_TCAM_OP_OP_READ | + (page & 0x3) << 10 | entry; + int err; + + err = mv88e6xxx_tcam_write(chip, MV88E6XXX_TCAM_OP, val); + if (err) + return err; + + return mv88e6xxx_tcam_wait(chip); +} + +static int mv88e6xxx_tcam_load_page(struct mv88e6xxx_chip *chip, u8 page, + u8 entry) +{ + u16 val = MV88E6XXX_TCAM_OP_BUSY | MV88E6XXX_TCAM_OP_OP_LOAD | + (page & 0x3) << 10 | entry; + int err; + + err = mv88e6xxx_tcam_write(chip, MV88E6XXX_TCAM_OP, val); + if (err) + return err; + + return mv88e6xxx_tcam_wait(chip); +} + +static int mv88e6xxx_tcam_flush_entry(struct mv88e6xxx_chip *chip, u8 entry) +{ + u16 val = MV88E6XXX_TCAM_OP_BUSY | MV88E6XXX_TCAM_OP_OP_FLUSH | entry; + int err; + + err = mv88e6xxx_tcam_write(chip, MV88E6XXX_TCAM_OP, val); + if (err) + return err; + + return mv88e6xxx_tcam_wait(chip); +} + +static int mv88e6xxx_tcam_flush_all(struct mv88e6xxx_chip *chip) +{ + u16 val = MV88E6XXX_TCAM_OP_BUSY | MV88E6XXX_TCAM_OP_OP_FLUSH_ALL; + int err; + + err = mv88e6xxx_tcam_write(chip, MV88E6XXX_TCAM_OP, val); + if (err) + return err; + + return mv88e6xxx_tcam_wait(chip); +} + +struct mv88e6xxx_tcam_entry * +mv88e6xxx_tcam_entry_find(struct mv88e6xxx_chip *chip, unsigned long cookie) +{ + struct mv88e6xxx_tcam_entry *entry; + + list_for_each_entry(entry, &chip->tcam.entries, list) + if (entry->cookie == cookie) + return entry; + + return NULL; +} + +/* insert tcam entry in ordered list and move existing entries if necessary */ +static int mv88e6xxx_tcam_insert_entry(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_tcam_entry *entry) +{ + struct mv88e6xxx_tcam_entry *elem; + struct list_head *hpos; + int err; + + list_for_each_prev(hpos, &chip->tcam.entries) { + u8 move_idx; + + elem = list_entry(hpos, struct mv88e6xxx_tcam_entry, list); + if (entry->prio >= elem->prio) + break; + + move_idx = elem->hw_idx + 1; + + err = mv88e6xxx_tcam_flush_entry(chip, move_idx); + if (err) + return err; + + err = chip->info->ops->tcam_ops->entry_add(chip, elem, + move_idx); + if (err) + return err; + + elem->hw_idx = move_idx; + } + + if (list_is_head(hpos, &chip->tcam.entries)) { + entry->hw_idx = 0; + } else { + elem = list_entry(hpos, struct mv88e6xxx_tcam_entry, list); + entry->hw_idx = elem->hw_idx + 1; + } + list_add(&entry->list, hpos); + return 0; +} + +int mv88e6xxx_tcam_entry_add(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_tcam_entry *entry) +{ + int err; + struct mv88e6xxx_tcam_entry *last; + + last = list_last_entry_or_null(&chip->tcam.entries, + struct mv88e6xxx_tcam_entry, list); + if (last && last->hw_idx == chip->info->num_tcam_entries - 1) + return -ENOSPC; + + err = mv88e6xxx_tcam_insert_entry(chip, entry); + if (err) + return err; + + err = mv88e6xxx_tcam_flush_entry(chip, entry->hw_idx); + if (err) + goto unlink_out; + + err = chip->info->ops->tcam_ops->entry_add(chip, entry, entry->hw_idx); + if (err) + goto unlink_out; + + return 0; + +unlink_out: + list_del(&entry->list); + return err; +} + +int mv88e6xxx_tcam_entry_del(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_tcam_entry *entry) +{ + struct mv88e6xxx_tcam_entry *elem = entry; + u8 move_idx = entry->hw_idx; + int err; + + err = mv88e6xxx_tcam_flush_entry(chip, entry->hw_idx); + if (err) + return err; + + /* move entries that come after the deleted entry forward */ + list_for_each_entry_continue(elem, &chip->tcam.entries, list) { + u8 tmp_idx = elem->hw_idx; + + err = mv88e6xxx_tcam_flush_entry(chip, move_idx); + if (err) + break; + + err = chip->info->ops->tcam_ops->entry_add(chip, elem, + move_idx); + if (err) + break; + + elem->hw_idx = move_idx; + move_idx = tmp_idx; + + /* flush the last entry after moving entries */ + if (list_is_last(&elem->list, &chip->tcam.entries)) + err = mv88e6xxx_tcam_flush_entry(chip, tmp_idx); + } + + list_del(&entry->list); + kfree(entry); + return err; +} + +static int mv88e6390_tcam_entry_add(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_tcam_entry *entry, u8 idx) +{ + int err = 0; + int i; + u16 val, spv_mask, spv; + + err = mv88e6xxx_tcam_read_page(chip, 2, idx); + if (err) + return err; + if (entry->action.dpv_mode != 0) { + val = MV88E6XXX_TCAM_ING_ACT3_SF | + (entry->action.dpv & MV88E6XXX_TCAM_ING_ACT3_DPV_MASK); + + err = mv88e6xxx_tcam_write(chip, MV88E6XXX_TCAM_ING_ACT3, val); + if (err) + return err; + + val = entry->action.dpv_mode << 14; + err = mv88e6xxx_tcam_write(chip, MV88E6XXX_TCAM_ING_ACT5, val); + if (err) + return err; + } + err = mv88e6xxx_tcam_load_page(chip, 2, idx); + if (err) + return err; + + err = mv88e6xxx_tcam_read_page(chip, 1, idx); + if (err) + return err; + + for (i = PAGE0_MATCH_SIZE; + i < PAGE0_MATCH_SIZE + PAGE1_MATCH_SIZE; i++) { + if (entry->key.frame_mask[i]) { + val = entry->key.frame_mask[i] << 8 | + entry->key.frame_data[i]; + + err = mv88e6xxx_tcam_write(chip, + i - PAGE0_MATCH_SIZE + 2, + val); + if (err) + return err; + } + } + err = mv88e6xxx_tcam_load_page(chip, 1, idx); + if (err) + return err; + + err = mv88e6xxx_tcam_read_page(chip, 0, idx); + if (err) + return err; + + for (i = 0; i < PAGE0_MATCH_SIZE; i++) { + if (entry->key.frame_mask[i]) { + val = entry->key.frame_mask[i] << 8 | + entry->key.frame_data[i]; + + err = mv88e6xxx_tcam_write(chip, i + 6, val); + if (err) + return err; + } + } + + spv_mask = entry->key.spv_mask & mv88e6xxx_port_mask(chip); + spv = entry->key.spv & mv88e6xxx_port_mask(chip); + /* frame type mask bits must be set for a valid entry */ + val = MV88E6XXX_TCAM_KEYS1_FT_MASK | + (spv_mask & MV88E6XXX_TCAM_KEYS1_SPV_MASK_MASK) | + ((spv >> 8) & MV88E6XXX_TCAM_KEYS1_SPV_MASK); + err = mv88e6xxx_tcam_write(chip, MV88E6XXX_TCAM_KEYS1, val); + if (err) + return err; + + val = ((spv_mask << 8) & MV88E6XXX_TCAM_KEYS2_SPV_MASK_MASK) | + (spv & MV88E6XXX_TCAM_KEYS2_SPV_MASK); + err = mv88e6xxx_tcam_write(chip, MV88E6XXX_TCAM_KEYS2, val); + if (err) + return err; + + err = mv88e6xxx_tcam_load_page(chip, 0, idx); + if (err) + return err; + + entry->hw_idx = idx; + return 0; +} + +static int mv88e6393_tcam_entry_add(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_tcam_entry *entry, u8 idx) +{ + int err; + + /* select block 0 port 0, then adding an entry is the same as 6390 as + * other blocks aren't used at the moment + */ + err = mv88e6xxx_tcam_write(chip, MV88E6XXX_TCAM_EXTENSION, 0x00); + if (err) + return err; + + return mv88e6390_tcam_entry_add(chip, entry, idx); +} + +const struct mv88e6xxx_tcam_ops mv88e6390_tcam_ops = { + .entry_add = mv88e6390_tcam_entry_add, + .flush_tcam = mv88e6xxx_tcam_flush_all, +}; + +const struct mv88e6xxx_tcam_ops mv88e6393_tcam_ops = { + .entry_add = mv88e6393_tcam_entry_add, + .flush_tcam = mv88e6xxx_tcam_flush_all, +}; diff --git a/drivers/net/dsa/mv88e6xxx/tcam.h b/drivers/net/dsa/mv88e6xxx/tcam.h new file mode 100644 index 000000000000..d82bc9bc0768 --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/tcam.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/* + * Copyright (c) 2026 Luminex Network Intelligence + */ +#ifndef _MV88E6XXX_TCAM_H_ +#define _MV88E6XXX_TCAM_H_ + +#define PAGE0_MATCH_SIZE 22 +#define PAGE1_MATCH_SIZE 26 + +#define DPV_MODE_NOP 0x0 +#define DPV_MODE_AND 0x1 +#define DPV_MODE_OR 0x2 +#define DPV_MODE_REPLACE 0x3 + +int mv88e6xxx_tcam_entry_add(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_tcam_entry *entry); +int mv88e6xxx_tcam_entry_del(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_tcam_entry *entry); +struct mv88e6xxx_tcam_entry * +mv88e6xxx_tcam_entry_find(struct mv88e6xxx_chip *chip, unsigned long cookie); +#define mv88e6xxx_tcam_match_set(key, _offset, data, mask) \ + do { \ + typeof(_offset) (offset) = (_offset); \ + BUILD_BUG_ON((offset) + sizeof((data)) > TCAM_MATCH_SIZE); \ + __mv88e6xxx_tcam_match_set(key, offset, sizeof(data), \ + (u8 *)&(data), (u8 *)&(mask)); \ + } while (0) + +static inline void __mv88e6xxx_tcam_match_set(struct mv88e6xxx_tcam_key *key, + unsigned int offset, size_t size, + u8 *data, u8 *mask) +{ + memcpy(&key->frame_data[offset], data, size); + memcpy(&key->frame_mask[offset], mask, size); +} + +extern const struct mv88e6xxx_tcam_ops mv88e6390_tcam_ops; +extern const struct mv88e6xxx_tcam_ops mv88e6393_tcam_ops; +#endif diff --git a/drivers/net/dsa/mv88e6xxx/tcflower.c b/drivers/net/dsa/mv88e6xxx/tcflower.c new file mode 100644 index 000000000000..d67604a55b9f --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/tcflower.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Marvell 88E6xxx Switch flower support + * + * Copyright (c) 2026 Luminex Network Intelligence + */ + +#include "chip.h" +#include "tcflower.h" +#include "tcam.h" + +#define MV88E6XXX_ETHTYPE_OFFSET 16 +#define MV88E6XXX_IP_PROTO_OFFSET 27 +#define MV88E6XXX_IPV4_SRC_OFFSET 30 +#define MV88E6XXX_IPV4_DST_OFFSET 34 + +static int mv88e6xxx_flower_parse_key(struct mv88e6xxx_chip *chip, + struct netlink_ext_ack *extack, + struct flow_cls_offload *cls, + struct mv88e6xxx_tcam_key *key) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); + struct flow_dissector *dissector = rule->match.dissector; + u16 addr_type = 0; + + if (dissector->used_keys & + ~(BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) | + BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) | + BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS))) { + NL_SET_ERR_MSG_MOD(extack, + "Unsupported keys used"); + return -EOPNOTSUPP; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type; + + if (flow_rule_has_control_flags(match.mask->flags, + cls->common.extack)) + return -EOPNOTSUPP; + } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + mv88e6xxx_tcam_match_set(key, MV88E6XXX_ETHTYPE_OFFSET, + match.key->n_proto, + match.mask->n_proto); + mv88e6xxx_tcam_match_set(key, MV88E6XXX_IP_PROTO_OFFSET, + match.key->ip_proto, + match.mask->ip_proto); + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_ipv4_addrs(cls->rule, &match); + mv88e6xxx_tcam_match_set(key, MV88E6XXX_IPV4_SRC_OFFSET, + match.key->src, + match.mask->src); + mv88e6xxx_tcam_match_set(key, MV88E6XXX_IPV4_DST_OFFSET, + match.key->dst, + match.mask->dst); + } + + return 0; +} + +int mv88e6xxx_cls_flower_add(struct dsa_switch *ds, int port, + struct flow_cls_offload *cls, bool ingress) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); + struct netlink_ext_ack *extack = cls->common.extack; + struct mv88e6xxx_chip *chip = ds->priv; + struct mv88e6xxx_tcam_key key = { 0 }; + const struct flow_action_entry *act; + unsigned long cookie = cls->cookie; + struct mv88e6xxx_tcam_entry *entry; + int err, i; + + if (!mv88e6xxx_has_tcam(chip)) { + NL_SET_ERR_MSG_MOD(extack, "hardware offload not supported"); + return -EOPNOTSUPP; + } + + err = mv88e6xxx_flower_parse_key(chip, extack, cls, &key); + if (err) + return err; + + mv88e6xxx_reg_lock(chip); + entry = mv88e6xxx_tcam_entry_find(chip, cookie); + if (entry) { + err = -EEXIST; + goto err_unlock; + } + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + err = -ENOMEM; + goto err_unlock; + } + + entry->cookie = cookie; + entry->prio = cls->common.prio; + entry->key = key; + + flow_action_for_each(i, act, &rule->action) { + switch (act->id) { + case FLOW_ACTION_TRAP: { + int cpu = dsa_upstream_port(ds, port); + + entry->action.dpv_mode = DPV_MODE_REPLACE; + entry->action.dpv = BIT(cpu); + break; + } + default: + NL_SET_ERR_MSG_MOD(extack, "action not supported"); + err = -EOPNOTSUPP; + goto err_free_entry; + } + } + + entry->key.spv = BIT(port); + entry->key.spv_mask = mv88e6xxx_port_mask(chip); + + err = mv88e6xxx_tcam_entry_add(chip, entry); + if (err) + goto err_free_entry; + + mv88e6xxx_reg_unlock(chip); + return 0; + +err_free_entry: + kfree(entry); +err_unlock: + mv88e6xxx_reg_unlock(chip); + return err; +} + +int mv88e6xxx_cls_flower_del(struct dsa_switch *ds, int port, + struct flow_cls_offload *cls, bool ingress) +{ + struct mv88e6xxx_chip *chip = ds->priv; + struct mv88e6xxx_tcam_entry *entry; + int err = 0; + + mv88e6xxx_reg_lock(chip); + entry = mv88e6xxx_tcam_entry_find(chip, cls->cookie); + + if (entry) + err = mv88e6xxx_tcam_entry_del(chip, entry); + mv88e6xxx_reg_unlock(chip); + return err; +} + +void mv88e6xxx_flower_teardown(struct mv88e6xxx_chip *chip) +{ + struct mv88e6xxx_tcam_entry *pos, *n; + + list_for_each_entry_safe(pos, n, &chip->tcam.entries, list) { + list_del(&pos->list); + kfree(pos); + } +} diff --git a/drivers/net/dsa/mv88e6xxx/tcflower.h b/drivers/net/dsa/mv88e6xxx/tcflower.h new file mode 100644 index 000000000000..fb79f12fe18c --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/tcflower.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/* + * Copyright (c) 2026 Luminex Network Intelligence + */ +#ifndef _MV88E6XXX_TCFLOWER_H_ +#define _MV88E6XXX_TCFLOWER_H_ + +int mv88e6xxx_cls_flower_add(struct dsa_switch *ds, int port, + struct flow_cls_offload *cls, bool ingress); +int mv88e6xxx_cls_flower_del(struct dsa_switch *ds, int port, + struct flow_cls_offload *cls, bool ingress); +void mv88e6xxx_flower_teardown(struct mv88e6xxx_chip *chip); +#endif From 6a196e83a1a7e50be93482d1cd4305641f1a9fb1 Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Thu, 12 Mar 2026 17:37:30 +0800 Subject: [PATCH 0581/1561] ppp: disconnect channel before nullifying pch->chan In ppp_unregister_channel(), pch->chan is set to NULL before calling ppp_disconnect_channel(), which removes the channel from ppp->channels list using list_del_rcu() + synchronize_net(). This creates an intermediate state where the channel is still connected (on the list) but already unregistered (pch->chan == NULL). Call ppp_disconnect_channel() before setting pch->chan to NULL. After the synchronize_net(), no new reader on the transmit path will hold a reference to the channel from the list. This eliminates the problematic state, and prepares for removing the pch->chan NULL checks from the transmit path in a subsequent patch. Signed-off-by: Qingfang Deng Link: https://patch.msgid.link/20260312093732.277254-1-dqfext@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ppp/ppp_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 6344c5eb0f98..ad480b584e25 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -3032,12 +3032,12 @@ ppp_unregister_channel(struct ppp_channel *chan) * This ensures that we have returned from any calls into * the channel's start_xmit or ioctl routine before we proceed. */ + ppp_disconnect_channel(pch); down_write(&pch->chan_sem); spin_lock_bh(&pch->downl); WRITE_ONCE(pch->chan, NULL); spin_unlock_bh(&pch->downl); up_write(&pch->chan_sem); - ppp_disconnect_channel(pch); pn = ppp_pernet(pch->chan_net); spin_lock_bh(&pn->all_channels_lock); From febe8012458fd9057d3fb70f6b37ef67a07ff8a1 Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Thu, 12 Mar 2026 17:37:31 +0800 Subject: [PATCH 0582/1561] ppp: remove pch->chan NULL checks from tx path Now that ppp_disconnect_channel() is called before pch->chan is set to NULL, a channel from ppp->channels list on the transmit path is guaranteed to have non-NULL pch->chan. Remove the pch->chan NULL checks from ppp_push(), ppp_mp_explode(), and ppp_fill_forward_path(), where a channel is obtained from the list. Remove the corresponding WRITE/READ_ONCE annotations as they no longer race. Signed-off-by: Qingfang Deng Link: https://patch.msgid.link/20260312093732.277254-2-dqfext@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ppp/ppp_generic.c | 66 +++++++++++------------------------ 1 file changed, 20 insertions(+), 46 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index ad480b584e25..a036ddfe327b 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1602,10 +1602,7 @@ static int ppp_fill_forward_path(struct net_device_path_ctx *ctx, if (!pch) return -ENODEV; - chan = READ_ONCE(pch->chan); - if (!chan) - return -ENODEV; - + chan = pch->chan; if (!chan->ops->fill_forward_path) return -EOPNOTSUPP; @@ -1928,9 +1925,9 @@ ppp_push(struct ppp *ppp, struct sk_buff *skb) spin_lock(&pch->downl); chan = pch->chan; - if (unlikely(!chan || (!chan->direct_xmit && skb_linearize(skb)))) { - /* channel got unregistered, or it requires a linear - * skb but linearization failed + if (unlikely(!chan->direct_xmit && skb_linearize(skb))) { + /* channel requires a linear skb but linearization + * failed */ kfree_skb(skb); ret = 1; @@ -1991,28 +1988,23 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) hdrlen = (ppp->flags & SC_MP_XSHORTSEQ)? MPHDRLEN_SSN: MPHDRLEN; i = 0; list_for_each_entry(pch, &ppp->channels, clist) { - if (pch->chan) { - pch->avail = 1; - navail++; - pch->speed = pch->chan->speed; - } else { - pch->avail = 0; - } - if (pch->avail) { - if (skb_queue_empty(&pch->file.xq) || - !pch->had_frag) { - if (pch->speed == 0) - nzero++; - else - totspeed += pch->speed; + pch->avail = 1; + navail++; + pch->speed = pch->chan->speed; - pch->avail = 2; - ++nfree; - ++totfree; - } - if (!pch->had_frag && i < ppp->nxchan) - ppp->nxchan = i; + if (skb_queue_empty(&pch->file.xq) || !pch->had_frag) { + if (pch->speed == 0) + nzero++; + else + totspeed += pch->speed; + + pch->avail = 2; + ++nfree; + ++totfree; } + if (!pch->had_frag && i < ppp->nxchan) + ppp->nxchan = i; + ++i; } /* @@ -2071,25 +2063,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) pch->avail = 1; } - /* check the channel's mtu and whether it is still attached. */ spin_lock(&pch->downl); - if (pch->chan == NULL) { - /* can't use this channel, it's being deregistered */ - if (pch->speed == 0) - nzero--; - else - totspeed -= pch->speed; - - spin_unlock(&pch->downl); - pch->avail = 0; - totlen = len; - totfree--; - nfree--; - if (--navail == 0) - break; - continue; - } - /* *if the channel speed is not set divide *the packet evenly among the free channels; @@ -3035,7 +3009,7 @@ ppp_unregister_channel(struct ppp_channel *chan) ppp_disconnect_channel(pch); down_write(&pch->chan_sem); spin_lock_bh(&pch->downl); - WRITE_ONCE(pch->chan, NULL); + pch->chan = NULL; spin_unlock_bh(&pch->downl); up_write(&pch->chan_sem); From a8e136b496259e689942d295d06cc4e517418cc4 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 11 Nov 2025 22:34:51 +0100 Subject: [PATCH 0583/1561] selftests: ovpn: allow compiling ovpn-cli.c with mbedtls3 mbedtls 3 installs headers and calls the shared object differently than version 2, therefore we must now rely on pkgconfig to fill the right C/LDFLAGS. Moreover the mbedtls3 library expects any base64 file to have their content on one line. Since this change does no break older versions, let's change the sample key file format and make mbedtls3 happy. Cc: Shuah Khan Cc: linux-kselftest@vger.kernel.org Cc: horms@kernel.org Signed-off-by: Antonio Quartulli --- tools/testing/selftests/net/ovpn/Makefile | 18 ++++++++++++------ tools/testing/selftests/net/ovpn/data64.key | 6 +----- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/net/ovpn/Makefile b/tools/testing/selftests/net/ovpn/Makefile index dbe0388c8512..e59271a25d76 100644 --- a/tools/testing/selftests/net/ovpn/Makefile +++ b/tools/testing/selftests/net/ovpn/Makefile @@ -2,19 +2,25 @@ # Copyright (C) 2020-2025 OpenVPN, Inc. # CFLAGS = -pedantic -Wextra -Wall -Wl,--no-as-needed -g -O0 -ggdb $(KHDR_INCLUDES) +CFLAGS += $(shell pkg-config --cflags mbedcrypto-3 mbedtls-3 2>/dev/null) + VAR_CFLAGS = $(shell pkg-config --cflags libnl-3.0 libnl-genl-3.0 2>/dev/null) ifeq ($(VAR_CFLAGS),) VAR_CFLAGS = -I/usr/include/libnl3 endif CFLAGS += $(VAR_CFLAGS) - -LDLIBS = -lmbedtls -lmbedcrypto -VAR_LDLIBS = $(shell pkg-config --libs libnl-3.0 libnl-genl-3.0 2>/dev/null) -ifeq ($(VAR_LDLIBS),) -VAR_LDLIBS = -lnl-genl-3 -lnl-3 +MTLS_LDLIBS= $(shell pkg-config --libs mbedcrypto-3 mbedtls-3 2>/dev/null) +ifeq ($(MTLS_LDLIBS),) +MTLS_LDLIBS = -lmbedtls -lmbedcrypto endif -LDLIBS += $(VAR_LDLIBS) +LDLIBS += $(MTLS_LDLIBS) + +NL_LDLIBS = $(shell pkg-config --libs libnl-3.0 libnl-genl-3.0 2>/dev/null) +ifeq ($(NL_LDLIBS),) +NL_LDLIBS = -lnl-genl-3 -lnl-3 +endif +LDLIBS += $(NL_LDLIBS) TEST_FILES = common.sh diff --git a/tools/testing/selftests/net/ovpn/data64.key b/tools/testing/selftests/net/ovpn/data64.key index a99e88c4e290..d04febcdf5a2 100644 --- a/tools/testing/selftests/net/ovpn/data64.key +++ b/tools/testing/selftests/net/ovpn/data64.key @@ -1,5 +1 @@ -jRqMACN7d7/aFQNT8S7jkrBD8uwrgHbG5OQZP2eu4R1Y7tfpS2bf5RHv06Vi163CGoaIiTX99R3B -ia9ycAH8Wz1+9PWv51dnBLur9jbShlgZ2QHLtUc4a/gfT7zZwULXuuxdLnvR21DDeMBaTbkgbai9 -uvAa7ne1liIgGFzbv+Bas4HDVrygxIxuAnP5Qgc3648IJkZ0QEXPF+O9f0n5+QIvGCxkAUVx+5K6 -KIs+SoeWXnAopELmoGSjUpFtJbagXK82HfdqpuUxT2Tnuef0/14SzVE/vNleBNu2ZbyrSAaah8tE -BofkPJUBFY+YQcfZNM5Dgrw3i+Bpmpq/gpdg5w== +jRqMACN7d7/aFQNT8S7jkrBD8uwrgHbG5OQZP2eu4R1Y7tfpS2bf5RHv06Vi163CGoaIiTX99R3Bia9ycAH8Wz1+9PWv51dnBLur9jbShlgZ2QHLtUc4a/gfT7zZwULXuuxdLnvR21DDeMBaTbkgbai9uvAa7ne1liIgGFzbv+Bas4HDVrygxIxuAnP5Qgc3648IJkZ0QEXPF+O9f0n5+QIvGCxkAUVx+5K6KIs+SoeWXnAopELmoGSjUpFtJbagXK82HfdqpuUxT2Tnuef0/14SzVE/vNleBNu2ZbyrSAaah8tEBofkPJUBFY+YQcfZNM5Dgrw3i+Bpmpq/gpdg5w== From 7e7ca01d912637316c0159a0a15a5fc14cd12aff Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 14 Jul 2025 16:20:02 +0200 Subject: [PATCH 0584/1561] ovpn: use correct array size to parse nested attributes in ovpn_nl_key_swap_doit In ovpn_nl_key_swap_doit, the attributes array used to parse the OVPN_A_KEYCONF uses OVPN_A_PEER_MAX instead of OVPN_A_KEYCONF_MAX. Note that this does not cause any bug, since currently OVPN_A_KEYCONF_MAX < OVPN_A_PEER_MAX. The wrong constant was introduced by commit 203e2bf55990 ("ovpn: implement key add/get/del/swap via netlink") Signed-off-by: Sabrina Dubroca Signed-off-by: Antonio Quartulli --- drivers/net/ovpn/netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ovpn/netlink.c b/drivers/net/ovpn/netlink.c index c7f382437630..fed0e46b32a3 100644 --- a/drivers/net/ovpn/netlink.c +++ b/drivers/net/ovpn/netlink.c @@ -1061,8 +1061,8 @@ err: int ovpn_nl_key_swap_doit(struct sk_buff *skb, struct genl_info *info) { + struct nlattr *attrs[OVPN_A_KEYCONF_MAX + 1]; struct ovpn_priv *ovpn = info->user_ptr[0]; - struct nlattr *attrs[OVPN_A_PEER_MAX + 1]; struct ovpn_peer *peer; u32 peer_id; int ret; From 4a6480599ce1aef2235152e16215cc68962e9416 Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Wed, 28 May 2025 16:57:08 +0800 Subject: [PATCH 0585/1561] ovpn: pktid: use bitops.h API Use bitops.h for replay window to simplify code. Signed-off-by: Qingfang Deng [antonio@openvpn.net: extended commit message] Signed-off-by: Antonio Quartulli Reviewed-by: Sabrina Dubroca --- drivers/net/ovpn/pktid.c | 11 ++++------- drivers/net/ovpn/pktid.h | 2 +- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/net/ovpn/pktid.c b/drivers/net/ovpn/pktid.c index 2f29049897e3..f1c243b84463 100644 --- a/drivers/net/ovpn/pktid.c +++ b/drivers/net/ovpn/pktid.c @@ -65,7 +65,7 @@ int ovpn_pktid_recv(struct ovpn_pktid_recv *pr, u32 pkt_id, u32 pkt_time) if (likely(pkt_id == pr->id + 1)) { /* well-formed ID sequence (incremented by 1) */ pr->base = REPLAY_INDEX(pr->base, -1); - pr->history[pr->base / 8] |= (1 << (pr->base % 8)); + __set_bit(pr->base, pr->history); if (pr->extent < REPLAY_WINDOW_SIZE) ++pr->extent; pr->id = pkt_id; @@ -77,14 +77,14 @@ int ovpn_pktid_recv(struct ovpn_pktid_recv *pr, u32 pkt_id, u32 pkt_time) unsigned int i; pr->base = REPLAY_INDEX(pr->base, -delta); - pr->history[pr->base / 8] |= (1 << (pr->base % 8)); + __set_bit(pr->base, pr->history); pr->extent += delta; if (pr->extent > REPLAY_WINDOW_SIZE) pr->extent = REPLAY_WINDOW_SIZE; for (i = 1; i < delta; ++i) { unsigned int newb = REPLAY_INDEX(pr->base, i); - pr->history[newb / 8] &= ~BIT(newb % 8); + __clear_bit(newb, pr->history); } } else { pr->base = 0; @@ -103,14 +103,11 @@ int ovpn_pktid_recv(struct ovpn_pktid_recv *pr, u32 pkt_id, u32 pkt_time) if (pkt_id > pr->id_floor) { const unsigned int ri = REPLAY_INDEX(pr->base, delta); - u8 *p = &pr->history[ri / 8]; - const u8 mask = (1 << (ri % 8)); - if (*p & mask) { + if (__test_and_set_bit(ri, pr->history)) { ret = -EINVAL; goto out; } - *p |= mask; } else { ret = -EINVAL; goto out; diff --git a/drivers/net/ovpn/pktid.h b/drivers/net/ovpn/pktid.h index 0262d026d15e..21845f353bc8 100644 --- a/drivers/net/ovpn/pktid.h +++ b/drivers/net/ovpn/pktid.h @@ -34,7 +34,7 @@ struct ovpn_pktid_xmit { */ struct ovpn_pktid_recv { /* "sliding window" bitmask of recent packet IDs received */ - u8 history[REPLAY_WINDOW_BYTES]; + DECLARE_BITMAP(history, REPLAY_WINDOW_SIZE); /* bit position of deque base in history */ unsigned int base; /* extent (in bits) of deque in history */ From c841b676da98638f5ed8d3f2f449ddd02d9921aa Mon Sep 17 00:00:00 2001 From: Ralf Lici Date: Fri, 14 Nov 2025 11:39:40 +0100 Subject: [PATCH 0586/1561] ovpn: notify userspace on client float event Send a netlink notification when a client updates its remote UDP endpoint. The notification includes the new IP address, port, and scope ID (for IPv6). Cc: linux-kselftest@vger.kernel.org Cc: horms@kernel.org Cc: shuah@kernel.org Cc: donald.hunter@gmail.com Signed-off-by: Ralf Lici Signed-off-by: Antonio Quartulli Reviewed-by: Sabrina Dubroca --- Documentation/netlink/specs/ovpn.yaml | 6 ++ drivers/net/ovpn/netlink.c | 82 +++++++++++++++++++++ drivers/net/ovpn/netlink.h | 2 + drivers/net/ovpn/peer.c | 2 + include/uapi/linux/ovpn.h | 1 + tools/testing/selftests/net/ovpn/ovpn-cli.c | 3 + 6 files changed, 96 insertions(+) diff --git a/Documentation/netlink/specs/ovpn.yaml b/Documentation/netlink/specs/ovpn.yaml index 1b91045cee2e..0d0c028bf96f 100644 --- a/Documentation/netlink/specs/ovpn.yaml +++ b/Documentation/netlink/specs/ovpn.yaml @@ -502,6 +502,12 @@ operations: - ifindex - keyconf + - + name: peer-float-ntf + doc: Notification about a peer floating (changing its remote UDP endpoint) + notify: peer-get + mcgrp: peers + mcast-groups: list: - diff --git a/drivers/net/ovpn/netlink.c b/drivers/net/ovpn/netlink.c index fed0e46b32a3..e10d7f9a28f5 100644 --- a/drivers/net/ovpn/netlink.c +++ b/drivers/net/ovpn/netlink.c @@ -1203,6 +1203,88 @@ err_free_msg: return ret; } +/** + * ovpn_nl_peer_float_notify - notify userspace about peer floating + * @peer: the floated peer + * @ss: sockaddr representing the new remote endpoint + * + * Return: 0 on success or a negative error code otherwise + */ +int ovpn_nl_peer_float_notify(struct ovpn_peer *peer, + const struct sockaddr_storage *ss) +{ + struct ovpn_socket *sock; + struct sockaddr_in6 *sa6; + struct sockaddr_in *sa; + struct sk_buff *msg; + struct nlattr *attr; + int ret = -EMSGSIZE; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &ovpn_nl_family, 0, + OVPN_CMD_PEER_FLOAT_NTF); + if (!hdr) { + ret = -ENOBUFS; + goto err_free_msg; + } + + if (nla_put_u32(msg, OVPN_A_IFINDEX, peer->ovpn->dev->ifindex)) + goto err_cancel_msg; + + attr = nla_nest_start(msg, OVPN_A_PEER); + if (!attr) + goto err_cancel_msg; + + if (nla_put_u32(msg, OVPN_A_PEER_ID, peer->id)) + goto err_cancel_msg; + + if (ss->ss_family == AF_INET) { + sa = (struct sockaddr_in *)ss; + if (nla_put_in_addr(msg, OVPN_A_PEER_REMOTE_IPV4, + sa->sin_addr.s_addr) || + nla_put_net16(msg, OVPN_A_PEER_REMOTE_PORT, sa->sin_port)) + goto err_cancel_msg; + } else if (ss->ss_family == AF_INET6) { + sa6 = (struct sockaddr_in6 *)ss; + if (nla_put_in6_addr(msg, OVPN_A_PEER_REMOTE_IPV6, + &sa6->sin6_addr) || + nla_put_u32(msg, OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID, + sa6->sin6_scope_id) || + nla_put_net16(msg, OVPN_A_PEER_REMOTE_PORT, sa6->sin6_port)) + goto err_cancel_msg; + } else { + ret = -EAFNOSUPPORT; + goto err_cancel_msg; + } + + nla_nest_end(msg, attr); + genlmsg_end(msg, hdr); + + rcu_read_lock(); + sock = rcu_dereference(peer->sock); + if (!sock) { + ret = -EINVAL; + goto err_unlock; + } + genlmsg_multicast_netns(&ovpn_nl_family, sock_net(sock->sk), msg, + 0, OVPN_NLGRP_PEERS, GFP_ATOMIC); + rcu_read_unlock(); + + return 0; + +err_unlock: + rcu_read_unlock(); +err_cancel_msg: + genlmsg_cancel(msg, hdr); +err_free_msg: + nlmsg_free(msg); + return ret; +} + /** * ovpn_nl_key_swap_notify - notify userspace peer's key must be renewed * @peer: the peer whose key needs to be renewed diff --git a/drivers/net/ovpn/netlink.h b/drivers/net/ovpn/netlink.h index 8615dfc3c472..11ee7c681885 100644 --- a/drivers/net/ovpn/netlink.h +++ b/drivers/net/ovpn/netlink.h @@ -13,6 +13,8 @@ int ovpn_nl_register(void); void ovpn_nl_unregister(void); int ovpn_nl_peer_del_notify(struct ovpn_peer *peer); +int ovpn_nl_peer_float_notify(struct ovpn_peer *peer, + const struct sockaddr_storage *ss); int ovpn_nl_key_swap_notify(struct ovpn_peer *peer, u8 key_id); #endif /* _NET_OVPN_NETLINK_H_ */ diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c index 3716a1d82801..4e145b4497e6 100644 --- a/drivers/net/ovpn/peer.c +++ b/drivers/net/ovpn/peer.c @@ -287,6 +287,8 @@ void ovpn_peer_endpoints_update(struct ovpn_peer *peer, struct sk_buff *skb) spin_unlock_bh(&peer->lock); + ovpn_nl_peer_float_notify(peer, &ss); + /* rehashing is required only in MP mode as P2P has one peer * only and thus there is no hashtable */ diff --git a/include/uapi/linux/ovpn.h b/include/uapi/linux/ovpn.h index 959b41def61f..0cce0d58b830 100644 --- a/include/uapi/linux/ovpn.h +++ b/include/uapi/linux/ovpn.h @@ -100,6 +100,7 @@ enum { OVPN_CMD_KEY_SWAP, OVPN_CMD_KEY_SWAP_NTF, OVPN_CMD_KEY_DEL, + OVPN_CMD_PEER_FLOAT_NTF, __OVPN_CMD_MAX, OVPN_CMD_MAX = (__OVPN_CMD_MAX - 1) diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c index 0f3babf19fd0..7178abae1b2f 100644 --- a/tools/testing/selftests/net/ovpn/ovpn-cli.c +++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c @@ -1516,6 +1516,9 @@ static int ovpn_handle_msg(struct nl_msg *msg, void *arg) case OVPN_CMD_PEER_DEL_NTF: fprintf(stdout, "received CMD_PEER_DEL_NTF\n"); break; + case OVPN_CMD_PEER_FLOAT_NTF: + fprintf(stdout, "received CMD_PEER_FLOAT_NTF\n"); + break; case OVPN_CMD_KEY_SWAP_NTF: fprintf(stdout, "received CMD_KEY_SWAP_NTF\n"); break; From 77de28cd7cf172e782319a144bf64e693794d78b Mon Sep 17 00:00:00 2001 From: Ralf Lici Date: Fri, 13 Jun 2025 17:55:39 +0200 Subject: [PATCH 0587/1561] selftests: ovpn: add notification parsing and matching To verify that netlink notifications are correctly emitted and contain the expected fields, this commit uses the tools/net/ynl/pyynl/cli.py script to create multicast listeners. These listeners record the captured notifications to a JSON file, which is later compared to the expected output. Cc: linux-kselftest@vger.kernel.org Cc: shuah@kernel.org Cc: horms@kernel.org Signed-off-by: Ralf Lici Signed-off-by: Antonio Quartulli --- tools/testing/selftests/net/ovpn/Makefile | 9 ++++- tools/testing/selftests/net/ovpn/common.sh | 34 +++++++++++++++++-- .../selftests/net/ovpn/json/peer0-float.json | 9 +++++ .../selftests/net/ovpn/json/peer0.json | 6 ++++ .../selftests/net/ovpn/json/peer1-float.json | 1 + .../selftests/net/ovpn/json/peer1.json | 1 + .../selftests/net/ovpn/json/peer2-float.json | 1 + .../selftests/net/ovpn/json/peer2.json | 1 + .../selftests/net/ovpn/json/peer3-float.json | 1 + .../selftests/net/ovpn/json/peer3.json | 1 + .../selftests/net/ovpn/json/peer4-float.json | 1 + .../selftests/net/ovpn/json/peer4.json | 1 + .../selftests/net/ovpn/json/peer5-float.json | 1 + .../selftests/net/ovpn/json/peer5.json | 1 + .../selftests/net/ovpn/json/peer6-float.json | 1 + .../selftests/net/ovpn/json/peer6.json | 1 + .../testing/selftests/net/ovpn/tcp_peers.txt | 1 + tools/testing/selftests/net/ovpn/test.sh | 8 +++++ 18 files changed, 76 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/net/ovpn/json/peer0-float.json create mode 100644 tools/testing/selftests/net/ovpn/json/peer0.json create mode 120000 tools/testing/selftests/net/ovpn/json/peer1-float.json create mode 100644 tools/testing/selftests/net/ovpn/json/peer1.json create mode 120000 tools/testing/selftests/net/ovpn/json/peer2-float.json create mode 100644 tools/testing/selftests/net/ovpn/json/peer2.json create mode 120000 tools/testing/selftests/net/ovpn/json/peer3-float.json create mode 100644 tools/testing/selftests/net/ovpn/json/peer3.json create mode 120000 tools/testing/selftests/net/ovpn/json/peer4-float.json create mode 100644 tools/testing/selftests/net/ovpn/json/peer4.json create mode 120000 tools/testing/selftests/net/ovpn/json/peer5-float.json create mode 100644 tools/testing/selftests/net/ovpn/json/peer5.json create mode 120000 tools/testing/selftests/net/ovpn/json/peer6-float.json create mode 100644 tools/testing/selftests/net/ovpn/json/peer6.json diff --git a/tools/testing/selftests/net/ovpn/Makefile b/tools/testing/selftests/net/ovpn/Makefile index e59271a25d76..88891d9f5c56 100644 --- a/tools/testing/selftests/net/ovpn/Makefile +++ b/tools/testing/selftests/net/ovpn/Makefile @@ -23,7 +23,14 @@ endif LDLIBS += $(NL_LDLIBS) -TEST_FILES = common.sh +TEST_FILES = \ + common.sh \ + data64.key \ + json \ + tcp_peers.txt \ + udp_peers.txt \ + ../../../../net/ynl/pyynl/cli.py \ +# end of TEST_FILES TEST_PROGS := \ test-chachapoly.sh \ diff --git a/tools/testing/selftests/net/ovpn/common.sh b/tools/testing/selftests/net/ovpn/common.sh index 88869c675d03..df0a541148fe 100644 --- a/tools/testing/selftests/net/ovpn/common.sh +++ b/tools/testing/selftests/net/ovpn/common.sh @@ -7,12 +7,18 @@ UDP_PEERS_FILE=${UDP_PEERS_FILE:-udp_peers.txt} TCP_PEERS_FILE=${TCP_PEERS_FILE:-tcp_peers.txt} OVPN_CLI=${OVPN_CLI:-./ovpn-cli} +YNL_CLI=${YNL_CLI:-../../../../net/ynl/pyynl/cli.py} ALG=${ALG:-aes} PROTO=${PROTO:-UDP} FLOAT=${FLOAT:-0} +JQ_FILTER='map(select(.msg.peer | has("remote-ipv6") | not)) | + map(del(.msg.ifindex)) | sort_by(.msg.peer.id)[]' LAN_IP="11.11.11.11" +declare -A tmp_jsons=() +declare -A listener_pids=() + create_ns() { ip netns add peer${1} } @@ -48,6 +54,14 @@ setup_ns() { ip -n peer${1} link set tun${1} up } +setup_listener() { + file=$(mktemp) + PYTHONUNBUFFERED=1 ip netns exec peer${p} ${YNL_CLI} --family ovpn \ + --subscribe peers --output-json --duration 40 > ${file} & + listener_pids[$1]=$! + tmp_jsons[$1]="${file}" +} + add_peer() { if [ "${PROTO}" == "UDP" ]; then if [ ${1} -eq 0 ]; then @@ -82,6 +96,24 @@ add_peer() { fi } +compare_ntfs() { + if [ ${#tmp_jsons[@]} -gt 0 ]; then + [ "$FLOAT" == 1 ] && suffix="-float" + expected="json/peer${1}${suffix}.json" + received="${tmp_jsons[$1]}" + + kill -TERM ${listener_pids[$1]} || true + wait ${listener_pids[$1]} || true + printf "Checking notifications for peer ${1}... " + if diff <(jq -s "${JQ_FILTER}" ${expected}) \ + <(jq -s "${JQ_FILTER}" ${received}); then + echo "OK" + fi + + rm -f ${received} || true + fi +} + cleanup() { # some ovpn-cli processes sleep in background so they need manual poking killall $(basename ${OVPN_CLI}) 2>/dev/null || true @@ -104,5 +136,3 @@ if [ "${PROTO}" == "UDP" ]; then else NUM_PEERS=${NUM_PEERS:-$(wc -l ${TCP_PEERS_FILE} | awk '{print $1}')} fi - - diff --git a/tools/testing/selftests/net/ovpn/json/peer0-float.json b/tools/testing/selftests/net/ovpn/json/peer0-float.json new file mode 100644 index 000000000000..682fa58ad4ea --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer0-float.json @@ -0,0 +1,9 @@ +{"name": "peer-float-ntf", "msg": {"ifindex": 0, "peer": {"id": 1, "remote-ipv4": "10.10.1.3", "remote-port": 1}}} +{"name": "peer-float-ntf", "msg": {"ifindex": 0, "peer": {"id": 2, "remote-ipv4": "10.10.2.3", "remote-port": 1}}} +{"name": "peer-float-ntf", "msg": {"ifindex": 0, "peer": {"id": 3, "remote-ipv4": "10.10.3.3", "remote-port": 1}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 1}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 2}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 3}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 4}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 5}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 6}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer0.json b/tools/testing/selftests/net/ovpn/json/peer0.json new file mode 100644 index 000000000000..7c46a33d5ecd --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer0.json @@ -0,0 +1,6 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 1}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 2}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 3}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 4}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 5}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 6}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer1-float.json b/tools/testing/selftests/net/ovpn/json/peer1-float.json new file mode 120000 index 000000000000..d28c328d1452 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer1-float.json @@ -0,0 +1 @@ +peer1.json \ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer1.json b/tools/testing/selftests/net/ovpn/json/peer1.json new file mode 100644 index 000000000000..5da4ea9d51fb --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer1.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 1}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer2-float.json b/tools/testing/selftests/net/ovpn/json/peer2-float.json new file mode 120000 index 000000000000..b9f09980aaa0 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer2-float.json @@ -0,0 +1 @@ +peer2.json \ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer2.json b/tools/testing/selftests/net/ovpn/json/peer2.json new file mode 100644 index 000000000000..8f6db4f8c2ac --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer2.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 2}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer3-float.json b/tools/testing/selftests/net/ovpn/json/peer3-float.json new file mode 120000 index 000000000000..2700b55bcf2e --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer3-float.json @@ -0,0 +1 @@ +peer3.json \ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer3.json b/tools/testing/selftests/net/ovpn/json/peer3.json new file mode 100644 index 000000000000..bdabd6fa2e64 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer3.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 3}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer4-float.json b/tools/testing/selftests/net/ovpn/json/peer4-float.json new file mode 120000 index 000000000000..460f6c14cd60 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer4-float.json @@ -0,0 +1 @@ +peer4.json \ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer4.json b/tools/testing/selftests/net/ovpn/json/peer4.json new file mode 100644 index 000000000000..c3734bb9251b --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer4.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 4}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer5-float.json b/tools/testing/selftests/net/ovpn/json/peer5-float.json new file mode 120000 index 000000000000..0f725c50ce19 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer5-float.json @@ -0,0 +1 @@ +peer5.json \ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer5.json b/tools/testing/selftests/net/ovpn/json/peer5.json new file mode 100644 index 000000000000..46c4a348299d --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer5.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 5}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer6-float.json b/tools/testing/selftests/net/ovpn/json/peer6-float.json new file mode 120000 index 000000000000..4d9ded3e0a84 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer6-float.json @@ -0,0 +1 @@ +peer6.json \ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer6.json b/tools/testing/selftests/net/ovpn/json/peer6.json new file mode 100644 index 000000000000..aa30f2cff625 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer6.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 6}}} diff --git a/tools/testing/selftests/net/ovpn/tcp_peers.txt b/tools/testing/selftests/net/ovpn/tcp_peers.txt index d753eebe8716..b8f3cb33eaa2 100644 --- a/tools/testing/selftests/net/ovpn/tcp_peers.txt +++ b/tools/testing/selftests/net/ovpn/tcp_peers.txt @@ -3,3 +3,4 @@ 3 5.5.5.4 4 5.5.5.5 5 5.5.5.6 +6 5.5.5.7 diff --git a/tools/testing/selftests/net/ovpn/test.sh b/tools/testing/selftests/net/ovpn/test.sh index e8acdc303307..c2904342ec57 100755 --- a/tools/testing/selftests/net/ovpn/test.sh +++ b/tools/testing/selftests/net/ovpn/test.sh @@ -17,6 +17,10 @@ for p in $(seq 0 ${NUM_PEERS}); do create_ns ${p} done +for p in $(seq 0 ${NUM_PEERS}); do + setup_listener ${p} +done + for p in $(seq 0 ${NUM_PEERS}); do setup_ns ${p} 5.5.5.$((${p} + 1))/24 ${MTU} done @@ -112,6 +116,10 @@ for p in $(seq 3 ${NUM_PEERS}); do done sleep 5 +for p in $(seq 0 ${NUM_PEERS}); do + compare_ntfs ${p} +done + cleanup modprobe -r ovpn || true From 2e570a51408839b2079f3cb7e3944bf9b1184ee0 Mon Sep 17 00:00:00 2001 From: Ralf Lici Date: Wed, 9 Jul 2025 17:21:25 +0200 Subject: [PATCH 0588/1561] ovpn: add support for asymmetric peer IDs In order to support the multipeer architecture, upon connection setup each side of a tunnel advertises a unique ID that the other side must include in packets sent to them. Therefore when transmitting a packet, a peer inserts the recipient's advertised ID for that specific tunnel into the peer ID field. When receiving a packet, a peer expects to find its own unique receive ID for that specific tunnel in the peer ID field. Add support for the TX peer ID and embed it into transmitting packets. If no TX peer ID is specified, fallback to using the same peer ID both for RX and TX in order to be compatible with the non-multipeer compliant peers. Cc: horms@kernel.org Cc: donald.hunter@gmail.com Signed-off-by: Ralf Lici Signed-off-by: Antonio Quartulli Reviewed-by: Sabrina Dubroca --- Documentation/netlink/specs/ovpn.yaml | 17 ++++++++++++++++- drivers/net/ovpn/crypto_aead.c | 2 +- drivers/net/ovpn/netlink-gen.c | 13 ++++++++++--- drivers/net/ovpn/netlink-gen.h | 6 +++--- drivers/net/ovpn/netlink.c | 14 ++++++++++++-- drivers/net/ovpn/peer.c | 4 ++++ drivers/net/ovpn/peer.h | 4 +++- include/uapi/linux/ovpn.h | 1 + 8 files changed, 50 insertions(+), 11 deletions(-) diff --git a/Documentation/netlink/specs/ovpn.yaml b/Documentation/netlink/specs/ovpn.yaml index 0d0c028bf96f..b0c782e59a32 100644 --- a/Documentation/netlink/specs/ovpn.yaml +++ b/Documentation/netlink/specs/ovpn.yaml @@ -43,7 +43,8 @@ attribute-sets: type: u32 doc: >- The unique ID of the peer in the device context. To be used to - identify peers during operations for a specific device + identify peers during operations for a specific device. + Also used to match packets received from this peer. checks: max: 0xFFFFFF - @@ -160,6 +161,16 @@ attribute-sets: name: link-tx-packets type: uint doc: Number of packets transmitted at the transport level + - + name: tx-id + type: u32 + doc: >- + The ID value used when transmitting packets to this peer. This + way outgoing packets can have a different ID than incoming ones. + Useful in multipeer-to-multipeer connections, where each peer + will advertise the tx-id to be used on the link. + checks: + max: 0xFFFFFF - name: peer-new-input subset-of: peer @@ -188,6 +199,8 @@ attribute-sets: name: keepalive-interval - name: keepalive-timeout + - + name: tx-id - name: peer-set-input subset-of: peer @@ -214,6 +227,8 @@ attribute-sets: name: keepalive-interval - name: keepalive-timeout + - + name: tx-id - name: peer-del-input subset-of: peer diff --git a/drivers/net/ovpn/crypto_aead.c b/drivers/net/ovpn/crypto_aead.c index 77be0942a269..59848c41b7b2 100644 --- a/drivers/net/ovpn/crypto_aead.c +++ b/drivers/net/ovpn/crypto_aead.c @@ -122,7 +122,7 @@ int ovpn_aead_encrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks, memcpy(skb->data, iv, OVPN_NONCE_WIRE_SIZE); /* add packet op as head of additional data */ - op = ovpn_opcode_compose(OVPN_DATA_V2, ks->key_id, peer->id); + op = ovpn_opcode_compose(OVPN_DATA_V2, ks->key_id, peer->tx_id); __skb_push(skb, OVPN_OPCODE_SIZE); BUILD_BUG_ON(sizeof(op) != OVPN_OPCODE_SIZE); *((__force __be32 *)skb->data) = htonl(op); diff --git a/drivers/net/ovpn/netlink-gen.c b/drivers/net/ovpn/netlink-gen.c index ecbe9dcf4f7d..2147cec7c2c5 100644 --- a/drivers/net/ovpn/netlink-gen.c +++ b/drivers/net/ovpn/netlink-gen.c @@ -16,6 +16,10 @@ static const struct netlink_range_validation ovpn_a_peer_id_range = { .max = 16777215ULL, }; +static const struct netlink_range_validation ovpn_a_peer_tx_id_range = { + .max = 16777215ULL, +}; + static const struct netlink_range_validation ovpn_a_keyconf_peer_id_range = { .max = 16777215ULL, }; @@ -51,7 +55,7 @@ const struct nla_policy ovpn_keydir_nl_policy[OVPN_A_KEYDIR_NONCE_TAIL + 1] = { [OVPN_A_KEYDIR_NONCE_TAIL] = NLA_POLICY_EXACT_LEN(OVPN_NONCE_TAIL_SIZE), }; -const struct nla_policy ovpn_peer_nl_policy[OVPN_A_PEER_LINK_TX_PACKETS + 1] = { +const struct nla_policy ovpn_peer_nl_policy[OVPN_A_PEER_TX_ID + 1] = { [OVPN_A_PEER_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_peer_id_range), [OVPN_A_PEER_REMOTE_IPV4] = { .type = NLA_BE32, }, [OVPN_A_PEER_REMOTE_IPV6] = NLA_POLICY_EXACT_LEN(16), @@ -75,13 +79,14 @@ const struct nla_policy ovpn_peer_nl_policy[OVPN_A_PEER_LINK_TX_PACKETS + 1] = { [OVPN_A_PEER_LINK_TX_BYTES] = { .type = NLA_UINT, }, [OVPN_A_PEER_LINK_RX_PACKETS] = { .type = NLA_UINT, }, [OVPN_A_PEER_LINK_TX_PACKETS] = { .type = NLA_UINT, }, + [OVPN_A_PEER_TX_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_peer_tx_id_range), }; const struct nla_policy ovpn_peer_del_input_nl_policy[OVPN_A_PEER_ID + 1] = { [OVPN_A_PEER_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_peer_id_range), }; -const struct nla_policy ovpn_peer_new_input_nl_policy[OVPN_A_PEER_KEEPALIVE_TIMEOUT + 1] = { +const struct nla_policy ovpn_peer_new_input_nl_policy[OVPN_A_PEER_TX_ID + 1] = { [OVPN_A_PEER_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_peer_id_range), [OVPN_A_PEER_REMOTE_IPV4] = { .type = NLA_BE32, }, [OVPN_A_PEER_REMOTE_IPV6] = NLA_POLICY_EXACT_LEN(16), @@ -94,9 +99,10 @@ const struct nla_policy ovpn_peer_new_input_nl_policy[OVPN_A_PEER_KEEPALIVE_TIME [OVPN_A_PEER_LOCAL_IPV6] = NLA_POLICY_EXACT_LEN(16), [OVPN_A_PEER_KEEPALIVE_INTERVAL] = { .type = NLA_U32, }, [OVPN_A_PEER_KEEPALIVE_TIMEOUT] = { .type = NLA_U32, }, + [OVPN_A_PEER_TX_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_peer_tx_id_range), }; -const struct nla_policy ovpn_peer_set_input_nl_policy[OVPN_A_PEER_KEEPALIVE_TIMEOUT + 1] = { +const struct nla_policy ovpn_peer_set_input_nl_policy[OVPN_A_PEER_TX_ID + 1] = { [OVPN_A_PEER_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_peer_id_range), [OVPN_A_PEER_REMOTE_IPV4] = { .type = NLA_BE32, }, [OVPN_A_PEER_REMOTE_IPV6] = NLA_POLICY_EXACT_LEN(16), @@ -108,6 +114,7 @@ const struct nla_policy ovpn_peer_set_input_nl_policy[OVPN_A_PEER_KEEPALIVE_TIME [OVPN_A_PEER_LOCAL_IPV6] = NLA_POLICY_EXACT_LEN(16), [OVPN_A_PEER_KEEPALIVE_INTERVAL] = { .type = NLA_U32, }, [OVPN_A_PEER_KEEPALIVE_TIMEOUT] = { .type = NLA_U32, }, + [OVPN_A_PEER_TX_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_peer_tx_id_range), }; /* OVPN_CMD_PEER_NEW - do */ diff --git a/drivers/net/ovpn/netlink-gen.h b/drivers/net/ovpn/netlink-gen.h index b2301580770f..67cd85f86173 100644 --- a/drivers/net/ovpn/netlink-gen.h +++ b/drivers/net/ovpn/netlink-gen.h @@ -18,10 +18,10 @@ extern const struct nla_policy ovpn_keyconf_del_input_nl_policy[OVPN_A_KEYCONF_S extern const struct nla_policy ovpn_keyconf_get_nl_policy[OVPN_A_KEYCONF_CIPHER_ALG + 1]; extern const struct nla_policy ovpn_keyconf_swap_input_nl_policy[OVPN_A_KEYCONF_PEER_ID + 1]; extern const struct nla_policy ovpn_keydir_nl_policy[OVPN_A_KEYDIR_NONCE_TAIL + 1]; -extern const struct nla_policy ovpn_peer_nl_policy[OVPN_A_PEER_LINK_TX_PACKETS + 1]; +extern const struct nla_policy ovpn_peer_nl_policy[OVPN_A_PEER_TX_ID + 1]; extern const struct nla_policy ovpn_peer_del_input_nl_policy[OVPN_A_PEER_ID + 1]; -extern const struct nla_policy ovpn_peer_new_input_nl_policy[OVPN_A_PEER_KEEPALIVE_TIMEOUT + 1]; -extern const struct nla_policy ovpn_peer_set_input_nl_policy[OVPN_A_PEER_KEEPALIVE_TIMEOUT + 1]; +extern const struct nla_policy ovpn_peer_new_input_nl_policy[OVPN_A_PEER_TX_ID + 1]; +extern const struct nla_policy ovpn_peer_set_input_nl_policy[OVPN_A_PEER_TX_ID + 1]; int ovpn_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); diff --git a/drivers/net/ovpn/netlink.c b/drivers/net/ovpn/netlink.c index e10d7f9a28f5..291e2e5bb450 100644 --- a/drivers/net/ovpn/netlink.c +++ b/drivers/net/ovpn/netlink.c @@ -305,6 +305,12 @@ static int ovpn_nl_peer_modify(struct ovpn_peer *peer, struct genl_info *info, dst_cache_reset(&peer->dst_cache); } + /* In a multipeer-to-multipeer setup we may have asymmetric peer IDs, + * that is peer->id might be different from peer->tx_id. + */ + if (attrs[OVPN_A_PEER_TX_ID]) + peer->tx_id = nla_get_u32(attrs[OVPN_A_PEER_TX_ID]); + if (attrs[OVPN_A_PEER_VPN_IPV4]) { rehash = true; peer->vpn_addrs.ipv4.s_addr = @@ -326,8 +332,8 @@ static int ovpn_nl_peer_modify(struct ovpn_peer *peer, struct genl_info *info, } netdev_dbg(peer->ovpn->dev, - "modify peer id=%u endpoint=%pIScp VPN-IPv4=%pI4 VPN-IPv6=%pI6c\n", - peer->id, &ss, + "modify peer id=%u tx_id=%u endpoint=%pIScp VPN-IPv4=%pI4 VPN-IPv6=%pI6c\n", + peer->id, peer->tx_id, &ss, &peer->vpn_addrs.ipv4.s_addr, &peer->vpn_addrs.ipv6); spin_unlock_bh(&peer->lock); @@ -373,6 +379,7 @@ int ovpn_nl_peer_new_doit(struct sk_buff *skb, struct genl_info *info) } peer_id = nla_get_u32(attrs[OVPN_A_PEER_ID]); + peer = ovpn_peer_new(ovpn, peer_id); if (IS_ERR(peer)) { NL_SET_ERR_MSG_FMT_MOD(info->extack, @@ -572,6 +579,9 @@ static int ovpn_nl_send_peer(struct sk_buff *skb, const struct genl_info *info, if (nla_put_u32(skb, OVPN_A_PEER_ID, peer->id)) goto err; + if (nla_put_u32(skb, OVPN_A_PEER_TX_ID, peer->tx_id)) + goto err; + if (peer->vpn_addrs.ipv4.s_addr != htonl(INADDR_ANY)) if (nla_put_in_addr(skb, OVPN_A_PEER_VPN_IPV4, peer->vpn_addrs.ipv4.s_addr)) diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c index 4e145b4497e6..26b55d813f0e 100644 --- a/drivers/net/ovpn/peer.c +++ b/drivers/net/ovpn/peer.c @@ -99,7 +99,11 @@ struct ovpn_peer *ovpn_peer_new(struct ovpn_priv *ovpn, u32 id) if (!peer) return ERR_PTR(-ENOMEM); + /* in the default case TX and RX IDs are the same. + * the user may set a different TX ID via netlink + */ peer->id = id; + peer->tx_id = id; peer->ovpn = ovpn; peer->vpn_addrs.ipv4.s_addr = htonl(INADDR_ANY); diff --git a/drivers/net/ovpn/peer.h b/drivers/net/ovpn/peer.h index a1423f2b09e0..328401570cba 100644 --- a/drivers/net/ovpn/peer.h +++ b/drivers/net/ovpn/peer.h @@ -21,7 +21,8 @@ * struct ovpn_peer - the main remote peer object * @ovpn: main openvpn instance this peer belongs to * @dev_tracker: reference tracker for associated dev - * @id: unique identifier + * @id: unique identifier, used to match incoming packets + * @tx_id: identifier to be used in TX packets * @vpn_addrs: IP addresses assigned over the tunnel * @vpn_addrs.ipv4: IPv4 assigned to peer on the tunnel * @vpn_addrs.ipv6: IPv6 assigned to peer on the tunnel @@ -64,6 +65,7 @@ struct ovpn_peer { struct ovpn_priv *ovpn; netdevice_tracker dev_tracker; u32 id; + u32 tx_id; struct { struct in_addr ipv4; struct in6_addr ipv6; diff --git a/include/uapi/linux/ovpn.h b/include/uapi/linux/ovpn.h index 0cce0d58b830..06690090a1a9 100644 --- a/include/uapi/linux/ovpn.h +++ b/include/uapi/linux/ovpn.h @@ -55,6 +55,7 @@ enum { OVPN_A_PEER_LINK_TX_BYTES, OVPN_A_PEER_LINK_RX_PACKETS, OVPN_A_PEER_LINK_TX_PACKETS, + OVPN_A_PEER_TX_ID, __OVPN_A_PEER_MAX, OVPN_A_PEER_MAX = (__OVPN_A_PEER_MAX - 1) From 367f4b163a8cff20d0cb06eb265a1bf1c6652bd9 Mon Sep 17 00:00:00 2001 From: Ralf Lici Date: Wed, 23 Jul 2025 14:35:34 +0200 Subject: [PATCH 0589/1561] selftests: ovpn: check asymmetric peer-id Extend the base test to verify that the correct peer-id is set in data packet headers. This is done by capturing ping packets with tcpdump during the initial exchange and matching the first portion of the header against the expected sequence for every connection. Cc: Shuah Khan Cc: linux-kselftest@vger.kernel.org Cc: horms@kernel.org Signed-off-by: Ralf Lici Signed-off-by: Antonio Quartulli --- tools/testing/selftests/net/ovpn/Makefile | 3 + tools/testing/selftests/net/ovpn/common.sh | 69 ++++++++-- .../net/ovpn/json/peer0-symm-float.json | 1 + .../selftests/net/ovpn/json/peer0-symm.json | 1 + .../net/ovpn/json/peer1-symm-float.json | 1 + .../selftests/net/ovpn/json/peer1-symm.json | 1 + .../selftests/net/ovpn/json/peer1.json | 2 +- .../net/ovpn/json/peer2-symm-float.json | 1 + .../selftests/net/ovpn/json/peer2-symm.json | 1 + .../selftests/net/ovpn/json/peer2.json | 2 +- .../net/ovpn/json/peer3-symm-float.json | 1 + .../selftests/net/ovpn/json/peer3-symm.json | 1 + .../selftests/net/ovpn/json/peer3.json | 2 +- .../net/ovpn/json/peer4-symm-float.json | 1 + .../selftests/net/ovpn/json/peer4-symm.json | 1 + .../selftests/net/ovpn/json/peer4.json | 2 +- .../net/ovpn/json/peer5-symm-float.json | 1 + .../selftests/net/ovpn/json/peer5-symm.json | 1 + .../selftests/net/ovpn/json/peer5.json | 2 +- .../net/ovpn/json/peer6-symm-float.json | 1 + .../selftests/net/ovpn/json/peer6-symm.json | 1 + .../selftests/net/ovpn/json/peer6.json | 2 +- tools/testing/selftests/net/ovpn/ovpn-cli.c | 128 +++++++++++++----- .../testing/selftests/net/ovpn/tcp_peers.txt | 12 +- .../selftests/net/ovpn/test-close-socket.sh | 2 +- .../net/ovpn/test-symmetric-id-float.sh | 11 ++ .../net/ovpn/test-symmetric-id-tcp.sh | 11 ++ .../selftests/net/ovpn/test-symmetric-id.sh | 10 ++ tools/testing/selftests/net/ovpn/test.sh | 68 ++++++++-- .../testing/selftests/net/ovpn/udp_peers.txt | 12 +- 30 files changed, 271 insertions(+), 81 deletions(-) create mode 120000 tools/testing/selftests/net/ovpn/json/peer0-symm-float.json create mode 120000 tools/testing/selftests/net/ovpn/json/peer0-symm.json create mode 120000 tools/testing/selftests/net/ovpn/json/peer1-symm-float.json create mode 100644 tools/testing/selftests/net/ovpn/json/peer1-symm.json create mode 120000 tools/testing/selftests/net/ovpn/json/peer2-symm-float.json create mode 100644 tools/testing/selftests/net/ovpn/json/peer2-symm.json create mode 120000 tools/testing/selftests/net/ovpn/json/peer3-symm-float.json create mode 100644 tools/testing/selftests/net/ovpn/json/peer3-symm.json create mode 120000 tools/testing/selftests/net/ovpn/json/peer4-symm-float.json create mode 100644 tools/testing/selftests/net/ovpn/json/peer4-symm.json create mode 120000 tools/testing/selftests/net/ovpn/json/peer5-symm-float.json create mode 100644 tools/testing/selftests/net/ovpn/json/peer5-symm.json create mode 120000 tools/testing/selftests/net/ovpn/json/peer6-symm-float.json create mode 100644 tools/testing/selftests/net/ovpn/json/peer6-symm.json create mode 100755 tools/testing/selftests/net/ovpn/test-symmetric-id-float.sh create mode 100755 tools/testing/selftests/net/ovpn/test-symmetric-id-tcp.sh create mode 100755 tools/testing/selftests/net/ovpn/test-symmetric-id.sh diff --git a/tools/testing/selftests/net/ovpn/Makefile b/tools/testing/selftests/net/ovpn/Makefile index 88891d9f5c56..ce9f79c4f892 100644 --- a/tools/testing/selftests/net/ovpn/Makefile +++ b/tools/testing/selftests/net/ovpn/Makefile @@ -38,6 +38,9 @@ TEST_PROGS := \ test-close-socket.sh \ test-float.sh \ test-large-mtu.sh \ + test-symmetric-id-float.sh \ + test-symmetric-id-tcp.sh \ + test-symmetric-id.sh \ test-tcp.sh \ test.sh \ # end of TEST_PROGS diff --git a/tools/testing/selftests/net/ovpn/common.sh b/tools/testing/selftests/net/ovpn/common.sh index df0a541148fe..4c08f756e63a 100644 --- a/tools/testing/selftests/net/ovpn/common.sh +++ b/tools/testing/selftests/net/ovpn/common.sh @@ -11,6 +11,9 @@ YNL_CLI=${YNL_CLI:-../../../../net/ynl/pyynl/cli.py} ALG=${ALG:-aes} PROTO=${PROTO:-UDP} FLOAT=${FLOAT:-0} +SYMMETRIC_ID=${SYMMETRIC_ID:-0} + +export ID_OFFSET=$(( 9 * (SYMMETRIC_ID == 0) )) JQ_FILTER='map(select(.msg.peer | has("remote-ipv6") | not)) | map(del(.msg.ifindex)) | sort_by(.msg.peer.id)[]' @@ -54,6 +57,25 @@ setup_ns() { ip -n peer${1} link set tun${1} up } +build_capture_filter() { + # match the first four bytes of the openvpn data payload + if [ "${PROTO}" == "UDP" ]; then + # For UDP, libpcap transport indexing only works for IPv4, so + # use an explicit IPv4 or IPv6 expression based on the peer + # address. The IPv6 branch assumes there are no extension + # headers in the outer packet. + if [[ "${2}" == *:* ]]; then + printf "ip6 and ip6[6] = 17 and ip6[48:4] = %s" "${1}" + else + printf "ip and udp[8:4] = %s" "${1}" + fi + else + # openvpn over TCP prepends a 2-byte packet length ahead of the + # DATA_V2 opcode, so skip it before matching the payload header + printf "ip and tcp[(((tcp[12] & 0xf0) >> 2) + 2):4] = %s" "${1}" + fi +} + setup_listener() { file=$(mktemp) PYTHONUNBUFFERED=1 ip netns exec peer${p} ${YNL_CLI} --family ovpn \ @@ -63,26 +85,39 @@ setup_listener() { } add_peer() { + labels=("ASYMM" "SYMM") + M_ID=${labels[SYMMETRIC_ID]} + if [ "${PROTO}" == "UDP" ]; then if [ ${1} -eq 0 ]; then - ip netns exec peer0 ${OVPN_CLI} new_multi_peer tun0 1 ${UDP_PEERS_FILE} + ip netns exec peer0 ${OVPN_CLI} new_multi_peer tun0 1 \ + ${M_ID} ${UDP_PEERS_FILE} for p in $(seq 1 ${NUM_PEERS}); do ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 ${ALG} 0 \ data64.key done else - RADDR=$(awk "NR == ${1} {print \$2}" ${UDP_PEERS_FILE}) - RPORT=$(awk "NR == ${1} {print \$3}" ${UDP_PEERS_FILE}) - LPORT=$(awk "NR == ${1} {print \$5}" ${UDP_PEERS_FILE}) - ip netns exec peer${1} ${OVPN_CLI} new_peer tun${1} ${1} ${LPORT} \ - ${RADDR} ${RPORT} - ip netns exec peer${1} ${OVPN_CLI} new_key tun${1} ${1} 1 0 ${ALG} 1 \ - data64.key + if [ "${SYMMETRIC_ID}" -eq 1 ]; then + PEER_ID=${1} + TX_ID="none" + else + PEER_ID=$(awk "NR == ${1} {print \$2}" \ + ${UDP_PEERS_FILE}) + TX_ID=${1} + fi + RADDR=$(awk "NR == ${1} {print \$3}" ${UDP_PEERS_FILE}) + RPORT=$(awk "NR == ${1} {print \$4}" ${UDP_PEERS_FILE}) + LPORT=$(awk "NR == ${1} {print \$6}" ${UDP_PEERS_FILE}) + ip netns exec peer${1} ${OVPN_CLI} new_peer tun${1} \ + ${PEER_ID} ${TX_ID} ${LPORT} ${RADDR} ${RPORT} + ip netns exec peer${1} ${OVPN_CLI} new_key tun${1} \ + ${PEER_ID} 1 0 ${ALG} 1 data64.key fi else if [ ${1} -eq 0 ]; then - (ip netns exec peer0 ${OVPN_CLI} listen tun0 1 ${TCP_PEERS_FILE} && { + (ip netns exec peer0 ${OVPN_CLI} listen tun0 1 ${M_ID} \ + ${TCP_PEERS_FILE} && { for p in $(seq 1 ${NUM_PEERS}); do ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 \ ${ALG} 0 data64.key @@ -90,15 +125,25 @@ add_peer() { }) & sleep 5 else - ip netns exec peer${1} ${OVPN_CLI} connect tun${1} ${1} 10.10.${1}.1 1 \ - data64.key + if [ "${SYMMETRIC_ID}" -eq 1 ]; then + PEER_ID=${1} + TX_ID="none" + else + PEER_ID=$(awk "NR == ${1} {print \$2}" \ + ${TCP_PEERS_FILE}) + TX_ID=${1} + fi + ip netns exec peer${1} ${OVPN_CLI} connect tun${1} \ + ${PEER_ID} ${TX_ID} 10.10.${1}.1 1 data64.key fi fi } compare_ntfs() { if [ ${#tmp_jsons[@]} -gt 0 ]; then - [ "$FLOAT" == 1 ] && suffix="-float" + suffix="" + [ "${SYMMETRIC_ID}" -eq 1 ] && suffix="${suffix}-symm" + [ "$FLOAT" == 1 ] && suffix="${suffix}-float" expected="json/peer${1}${suffix}.json" received="${tmp_jsons[$1]}" diff --git a/tools/testing/selftests/net/ovpn/json/peer0-symm-float.json b/tools/testing/selftests/net/ovpn/json/peer0-symm-float.json new file mode 120000 index 000000000000..e31a5bd59863 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer0-symm-float.json @@ -0,0 +1 @@ +peer0-float.json \ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer0-symm.json b/tools/testing/selftests/net/ovpn/json/peer0-symm.json new file mode 120000 index 000000000000..57a163048eed --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer0-symm.json @@ -0,0 +1 @@ +peer0.json \ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer1-symm-float.json b/tools/testing/selftests/net/ovpn/json/peer1-symm-float.json new file mode 120000 index 000000000000..b3615dcc523d --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer1-symm-float.json @@ -0,0 +1 @@ +peer1-symm.json \ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer1-symm.json b/tools/testing/selftests/net/ovpn/json/peer1-symm.json new file mode 100644 index 000000000000..5da4ea9d51fb --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer1-symm.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 1}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer1.json b/tools/testing/selftests/net/ovpn/json/peer1.json index 5da4ea9d51fb..1009d26dc14a 100644 --- a/tools/testing/selftests/net/ovpn/json/peer1.json +++ b/tools/testing/selftests/net/ovpn/json/peer1.json @@ -1 +1 @@ -{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 1}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 10}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer2-symm-float.json b/tools/testing/selftests/net/ovpn/json/peer2-symm-float.json new file mode 120000 index 000000000000..28a895cb5170 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer2-symm-float.json @@ -0,0 +1 @@ +peer2-symm.json \ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer2-symm.json b/tools/testing/selftests/net/ovpn/json/peer2-symm.json new file mode 100644 index 000000000000..8f6db4f8c2ac --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer2-symm.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 2}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer2.json b/tools/testing/selftests/net/ovpn/json/peer2.json index 8f6db4f8c2ac..44e9fad2b622 100644 --- a/tools/testing/selftests/net/ovpn/json/peer2.json +++ b/tools/testing/selftests/net/ovpn/json/peer2.json @@ -1 +1 @@ -{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 2}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 11}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer3-symm-float.json b/tools/testing/selftests/net/ovpn/json/peer3-symm-float.json new file mode 120000 index 000000000000..ee8b9719c2fd --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer3-symm-float.json @@ -0,0 +1 @@ +peer3-symm.json \ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer3-symm.json b/tools/testing/selftests/net/ovpn/json/peer3-symm.json new file mode 100644 index 000000000000..bdabd6fa2e64 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer3-symm.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 3}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer3.json b/tools/testing/selftests/net/ovpn/json/peer3.json index bdabd6fa2e64..d4be8ba130ae 100644 --- a/tools/testing/selftests/net/ovpn/json/peer3.json +++ b/tools/testing/selftests/net/ovpn/json/peer3.json @@ -1 +1 @@ -{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 3}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 12}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer4-symm-float.json b/tools/testing/selftests/net/ovpn/json/peer4-symm-float.json new file mode 120000 index 000000000000..7d34ff7305da --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer4-symm-float.json @@ -0,0 +1 @@ +peer4-symm.json \ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer4-symm.json b/tools/testing/selftests/net/ovpn/json/peer4-symm.json new file mode 100644 index 000000000000..c3734bb9251b --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer4-symm.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 4}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer4.json b/tools/testing/selftests/net/ovpn/json/peer4.json index c3734bb9251b..67d27e2d48ac 100644 --- a/tools/testing/selftests/net/ovpn/json/peer4.json +++ b/tools/testing/selftests/net/ovpn/json/peer4.json @@ -1 +1 @@ -{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 4}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 13}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer5-symm-float.json b/tools/testing/selftests/net/ovpn/json/peer5-symm-float.json new file mode 120000 index 000000000000..afc0f5f9f13b --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer5-symm-float.json @@ -0,0 +1 @@ +peer5-symm.json \ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer5-symm.json b/tools/testing/selftests/net/ovpn/json/peer5-symm.json new file mode 100644 index 000000000000..46c4a348299d --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer5-symm.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 5}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer5.json b/tools/testing/selftests/net/ovpn/json/peer5.json index 46c4a348299d..ecd9bd0b2f37 100644 --- a/tools/testing/selftests/net/ovpn/json/peer5.json +++ b/tools/testing/selftests/net/ovpn/json/peer5.json @@ -1 +1 @@ -{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 5}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 14}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer6-symm-float.json b/tools/testing/selftests/net/ovpn/json/peer6-symm-float.json new file mode 120000 index 000000000000..e39203204d8c --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer6-symm-float.json @@ -0,0 +1 @@ +peer6-symm.json \ No newline at end of file diff --git a/tools/testing/selftests/net/ovpn/json/peer6-symm.json b/tools/testing/selftests/net/ovpn/json/peer6-symm.json new file mode 100644 index 000000000000..aa30f2cff625 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/json/peer6-symm.json @@ -0,0 +1 @@ +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 6}}} diff --git a/tools/testing/selftests/net/ovpn/json/peer6.json b/tools/testing/selftests/net/ovpn/json/peer6.json index aa30f2cff625..7fded29c5804 100644 --- a/tools/testing/selftests/net/ovpn/json/peer6.json +++ b/tools/testing/selftests/net/ovpn/json/peer6.json @@ -1 +1 @@ -{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 6}}} +{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 15}}} diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c index 7178abae1b2f..085446471397 100644 --- a/tools/testing/selftests/net/ovpn/ovpn-cli.c +++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c @@ -103,7 +103,7 @@ struct ovpn_ctx { sa_family_t sa_family; - unsigned long peer_id; + unsigned long peer_id, tx_id; unsigned long lport; union { @@ -133,6 +133,8 @@ struct ovpn_ctx { enum ovpn_key_slot key_slot; int key_id; + bool asymm_id; + const char *peers_file; }; @@ -649,6 +651,8 @@ static int ovpn_new_peer(struct ovpn_ctx *ovpn, bool is_tcp) attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER); NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id); + if (ovpn->asymm_id) + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_TX_ID, ovpn->tx_id); NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_SOCKET, ovpn->socket); if (!is_tcp) { @@ -767,6 +771,10 @@ static int ovpn_handle_peer(struct nl_msg *msg, void (*arg)__always_unused) fprintf(stderr, "* Peer %u\n", nla_get_u32(pattrs[OVPN_A_PEER_ID])); + if (pattrs[OVPN_A_PEER_TX_ID]) + fprintf(stderr, "\tTX peer ID %u\n", + nla_get_u32(pattrs[OVPN_A_PEER_TX_ID])); + if (pattrs[OVPN_A_PEER_SOCKET_NETNSID]) fprintf(stderr, "\tsocket NetNS ID: %d\n", nla_get_s32(pattrs[OVPN_A_PEER_SOCKET_NETNSID])); @@ -1657,41 +1665,57 @@ static void usage(const char *cmd) fprintf(stderr, "\tiface: ovpn interface name\n"); fprintf(stderr, - "* listen [ipv6]: listen for incoming peer TCP connections\n"); + "* listen [ipv6]: listen for incoming peer TCP connections\n"); fprintf(stderr, "\tiface: ovpn interface name\n"); fprintf(stderr, "\tlport: TCP port to listen to\n"); + fprintf(stderr, "\tid_type:\n"); + fprintf(stderr, + "\t\t- SYMM for ignoring the TX peer ID from the peers_file\n"); + fprintf(stderr, + "\t\t- ASYMM for using the TX peer ID from the peers_file\n"); fprintf(stderr, "\tpeers_file: file containing one peer per line: Line format:\n"); - fprintf(stderr, "\t\t \n"); + fprintf(stderr, "\t\t \n"); fprintf(stderr, "\tipv6: whether the socket should listen to the IPv6 wildcard address\n"); fprintf(stderr, - "* connect [key_file]: start connecting peer of TCP-based VPN session\n"); + "* connect [key_file]: start connecting peer of TCP-based VPN session\n"); fprintf(stderr, "\tiface: ovpn interface name\n"); - fprintf(stderr, "\tpeer_id: peer ID of the connecting peer\n"); + fprintf(stderr, + "\tpeer_id: peer ID found in data packets received from this peer\n"); + fprintf(stderr, + "\ttx_id: peer ID to be used when sending to this peer, 'none' for symmetric peer ID\n"); fprintf(stderr, "\traddr: peer IP address to connect to\n"); fprintf(stderr, "\trport: peer TCP port to connect to\n"); fprintf(stderr, "\tkey_file: file containing the symmetric key for encryption\n"); fprintf(stderr, - "* new_peer [vpnaddr]: add new peer\n"); + "* new_peer [vpnaddr]: add new peer\n"); fprintf(stderr, "\tiface: ovpn interface name\n"); - fprintf(stderr, "\tlport: local UDP port to bind to\n"); fprintf(stderr, - "\tpeer_id: peer ID to be used in data packets to/from this peer\n"); + "\tpeer_id: peer ID found in data packets received from this peer\n"); + fprintf(stderr, + "\ttx_id: peer ID to be used when sending to this peer, 'none' for symmetric peer ID\n"); + fprintf(stderr, "\tlport: local UDP port to bind to\n"); fprintf(stderr, "\traddr: peer IP address\n"); fprintf(stderr, "\trport: peer UDP port\n"); fprintf(stderr, "\tvpnaddr: peer VPN IP\n"); fprintf(stderr, - "* new_multi_peer : add multiple peers as listed in the file\n"); + "* new_multi_peer : add multiple peers as listed in the file\n"); fprintf(stderr, "\tiface: ovpn interface name\n"); fprintf(stderr, "\tlport: local UDP port to bind to\n"); + fprintf(stderr, "\tid_type:\n"); + fprintf(stderr, + "\t\t- SYMM for ignoring the TX peer ID from the peers_file\n"); + fprintf(stderr, + "\t\t- ASYMM for using the TX peer ID from the peers_file\n"); fprintf(stderr, "\tpeers_file: text file containing one peer per line. Line format:\n"); - fprintf(stderr, "\t\t \n"); + fprintf(stderr, + "\t\t \n"); fprintf(stderr, "* set_peer : set peer attributes\n"); @@ -1804,15 +1828,23 @@ out: } static int ovpn_parse_new_peer(struct ovpn_ctx *ovpn, const char *peer_id, - const char *raddr, const char *rport, - const char *vpnip) + const char *tx_id, const char *raddr, + const char *rport, const char *vpnip) { ovpn->peer_id = strtoul(peer_id, NULL, 10); if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) { - fprintf(stderr, "peer ID value out of range\n"); + fprintf(stderr, "rx peer ID value out of range\n"); return -1; } + if (ovpn->asymm_id) { + ovpn->tx_id = strtoul(tx_id, NULL, 10); + if (errno == ERANGE || ovpn->tx_id > PEER_ID_UNDEF) { + fprintf(stderr, "tx peer ID value out of range\n"); + return -1; + } + } + return ovpn_parse_remote(ovpn, raddr, rport, vpnip); } @@ -1939,8 +1971,8 @@ static void ovpn_waitbg(void) static int ovpn_run_cmd(struct ovpn_ctx *ovpn) { - char peer_id[10], vpnip[INET6_ADDRSTRLEN], laddr[128], lport[10]; - char raddr[128], rport[10]; + char peer_id[10], tx_id[10], vpnip[INET6_ADDRSTRLEN], laddr[128]; + char lport[10], raddr[128], rport[10]; int n, ret; FILE *fp; @@ -1967,7 +1999,8 @@ static int ovpn_run_cmd(struct ovpn_ctx *ovpn) int num_peers = 0; - while ((n = fscanf(fp, "%s %s\n", peer_id, vpnip)) == 2) { + while ((n = fscanf(fp, "%s %s %s\n", peer_id, tx_id, + vpnip)) == 3) { struct ovpn_ctx peer_ctx = { 0 }; if (num_peers == MAX_PEERS) { @@ -1977,6 +2010,7 @@ static int ovpn_run_cmd(struct ovpn_ctx *ovpn) peer_ctx.ifindex = ovpn->ifindex; peer_ctx.sa_family = ovpn->sa_family; + peer_ctx.asymm_id = ovpn->asymm_id; peer_ctx.socket = ovpn_accept(ovpn); if (peer_ctx.socket < 0) { @@ -1987,8 +2021,8 @@ static int ovpn_run_cmd(struct ovpn_ctx *ovpn) /* store peer sockets to test TCP I/O */ ovpn->cli_sockets[num_peers] = peer_ctx.socket; - ret = ovpn_parse_new_peer(&peer_ctx, peer_id, NULL, - NULL, vpnip); + ret = ovpn_parse_new_peer(&peer_ctx, peer_id, tx_id, + NULL, NULL, vpnip); if (ret < 0) { fprintf(stderr, "error while parsing line\n"); return -1; @@ -2056,16 +2090,17 @@ static int ovpn_run_cmd(struct ovpn_ctx *ovpn) return -1; } - while ((n = fscanf(fp, "%s %s %s %s %s %s\n", peer_id, laddr, - lport, raddr, rport, vpnip)) == 6) { + while ((n = fscanf(fp, "%s %s %s %s %s %s %s\n", peer_id, tx_id, + laddr, lport, raddr, rport, vpnip)) == 7) { struct ovpn_ctx peer_ctx = { 0 }; peer_ctx.ifindex = ovpn->ifindex; peer_ctx.socket = ovpn->socket; peer_ctx.sa_family = AF_UNSPEC; + peer_ctx.asymm_id = ovpn->asymm_id; - ret = ovpn_parse_new_peer(&peer_ctx, peer_id, raddr, - rport, vpnip); + ret = ovpn_parse_new_peer(&peer_ctx, peer_id, tx_id, + raddr, rport, vpnip); if (ret < 0) { fprintf(stderr, "error while parsing line\n"); return -1; @@ -2161,7 +2196,7 @@ static int ovpn_parse_cmd_args(struct ovpn_ctx *ovpn, int argc, char *argv[]) case CMD_DEL_IFACE: break; case CMD_LISTEN: - if (argc < 5) + if (argc < 6) return -EINVAL; ovpn->lport = strtoul(argv[3], NULL, 10); @@ -2170,55 +2205,67 @@ static int ovpn_parse_cmd_args(struct ovpn_ctx *ovpn, int argc, char *argv[]) return -1; } - ovpn->peers_file = argv[4]; + if (strcmp(argv[4], "SYMM") == 0) { + ovpn->asymm_id = false; + } else if (strcmp(argv[4], "ASYMM") == 0) { + ovpn->asymm_id = true; + } else { + fprintf(stderr, "Cannot parse id type: %s\n", argv[4]); + return -1; + } + + ovpn->peers_file = argv[5]; ovpn->sa_family = AF_INET; - if (argc > 5 && !strcmp(argv[5], "ipv6")) + if (argc > 6 && !strcmp(argv[6], "ipv6")) ovpn->sa_family = AF_INET6; break; case CMD_CONNECT: - if (argc < 6) + if (argc < 7) return -EINVAL; ovpn->sa_family = AF_INET; + ovpn->asymm_id = strcmp(argv[4], "none"); ret = ovpn_parse_new_peer(ovpn, argv[3], argv[4], argv[5], - NULL); + argv[6], NULL); if (ret < 0) { fprintf(stderr, "Cannot parse remote peer data\n"); return -1; } - if (argc > 6) { + if (argc > 7) { ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY; ovpn->key_id = 0; ovpn->cipher = OVPN_CIPHER_ALG_AES_GCM; ovpn->key_dir = KEY_DIR_OUT; - ret = ovpn_parse_key(argv[6], ovpn); + ret = ovpn_parse_key(argv[7], ovpn); if (ret) return -1; } break; case CMD_NEW_PEER: - if (argc < 7) + if (argc < 8) return -EINVAL; - ovpn->lport = strtoul(argv[4], NULL, 10); + ovpn->asymm_id = strcmp(argv[4], "none"); + + ovpn->lport = strtoul(argv[5], NULL, 10); if (errno == ERANGE || ovpn->lport > 65535) { fprintf(stderr, "lport value out of range\n"); return -1; } - const char *vpnip = (argc > 7) ? argv[7] : NULL; + const char *vpnip = (argc > 8) ? argv[8] : NULL; - ret = ovpn_parse_new_peer(ovpn, argv[3], argv[5], argv[6], - vpnip); + ret = ovpn_parse_new_peer(ovpn, argv[3], argv[4], argv[6], + argv[7], vpnip); if (ret < 0) return -1; break; case CMD_NEW_MULTI_PEER: - if (argc < 5) + if (argc < 6) return -EINVAL; ovpn->lport = strtoul(argv[3], NULL, 10); @@ -2227,7 +2274,16 @@ static int ovpn_parse_cmd_args(struct ovpn_ctx *ovpn, int argc, char *argv[]) return -1; } - ovpn->peers_file = argv[4]; + if (!strcmp(argv[4], "SYMM")) { + ovpn->asymm_id = false; + } else if (!strcmp(argv[4], "ASYMM")) { + ovpn->asymm_id = true; + } else { + fprintf(stderr, "Cannot parse id type: %s\n", argv[4]); + return -1; + } + + ovpn->peers_file = argv[5]; break; case CMD_SET_PEER: if (argc < 6) diff --git a/tools/testing/selftests/net/ovpn/tcp_peers.txt b/tools/testing/selftests/net/ovpn/tcp_peers.txt index b8f3cb33eaa2..3cb67b560705 100644 --- a/tools/testing/selftests/net/ovpn/tcp_peers.txt +++ b/tools/testing/selftests/net/ovpn/tcp_peers.txt @@ -1,6 +1,6 @@ -1 5.5.5.2 -2 5.5.5.3 -3 5.5.5.4 -4 5.5.5.5 -5 5.5.5.6 -6 5.5.5.7 +1 10 5.5.5.2 +2 11 5.5.5.3 +3 12 5.5.5.4 +4 13 5.5.5.5 +5 14 5.5.5.6 +6 15 5.5.5.7 diff --git a/tools/testing/selftests/net/ovpn/test-close-socket.sh b/tools/testing/selftests/net/ovpn/test-close-socket.sh index 5e48a8b67928..0d09df14fe8e 100755 --- a/tools/testing/selftests/net/ovpn/test-close-socket.sh +++ b/tools/testing/selftests/net/ovpn/test-close-socket.sh @@ -27,7 +27,7 @@ done for p in $(seq 1 ${NUM_PEERS}); do ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120 - ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120 + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} $((${p}+9)) 60 120 done sleep 1 diff --git a/tools/testing/selftests/net/ovpn/test-symmetric-id-float.sh b/tools/testing/selftests/net/ovpn/test-symmetric-id-float.sh new file mode 100755 index 000000000000..b3711a81b463 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-symmetric-id-float.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Ralf Lici +# Antonio Quartulli + +SYMMETRIC_ID="1" +FLOAT="1" + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test-symmetric-id-tcp.sh b/tools/testing/selftests/net/ovpn/test-symmetric-id-tcp.sh new file mode 100755 index 000000000000..188cafb67b2f --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-symmetric-id-tcp.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Ralf Lici +# Antonio Quartulli + +PROTO="TCP" +SYMMETRIC_ID=1 + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test-symmetric-id.sh b/tools/testing/selftests/net/ovpn/test-symmetric-id.sh new file mode 100755 index 000000000000..35b119c72e4f --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-symmetric-id.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Ralf Lici +# Antonio Quartulli + +SYMMETRIC_ID="1" + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test.sh b/tools/testing/selftests/net/ovpn/test.sh index c2904342ec57..b60e94a4094e 100755 --- a/tools/testing/selftests/net/ovpn/test.sh +++ b/tools/testing/selftests/net/ovpn/test.sh @@ -31,14 +31,45 @@ done for p in $(seq 1 ${NUM_PEERS}); do ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120 - ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120 + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} \ + $((${p}+ID_OFFSET)) 60 120 done sleep 1 +TCPDUMP_TIMEOUT="1.5s" for p in $(seq 1 ${NUM_PEERS}); do + # The first part of the data packet header consists of: + # - TCP only: 2 bytes for the packet length + # - 5 bits for opcode ("9" for DATA_V2) + # - 3 bits for key-id ("0" at this point) + # - 12 bytes for peer-id: + # - with asymmetric ID: "${p}" one way and "${p} + 9" the other way + # - with symmetric ID: "${p}" both ways + HEADER1=$(printf "0x4800000%x" ${p}) + HEADER2=$(printf "0x4800000%x" $((${p} + ID_OFFSET))) + RADDR="" + if [ "${PROTO}" == "UDP" ]; then + RADDR=$(awk "NR == ${p} {print \$3}" ${UDP_PEERS_FILE}) + fi + + timeout ${TCPDUMP_TIMEOUT} ip netns exec peer${p} \ + tcpdump --immediate-mode -p -ni veth${p} -c 1 \ + "$(build_capture_filter "${HEADER1}" "${RADDR}")" \ + >/dev/null 2>&1 & + TCPDUMP_PID1=$! + timeout ${TCPDUMP_TIMEOUT} ip netns exec peer${p} \ + tcpdump --immediate-mode -p -ni veth${p} -c 1 \ + "$(build_capture_filter "${HEADER2}" "${RADDR}")" \ + >/dev/null 2>&1 & + TCPDUMP_PID2=$! + + sleep 0.3 ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1)) ip netns exec peer0 ping -qfc 500 -s 3000 -w 3 5.5.5.$((${p} + 1)) + + wait ${TCPDUMP_PID1} + wait ${TCPDUMP_PID2} done # ping LAN behind client 1 @@ -61,9 +92,12 @@ ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1 echo "Adding secondary key and then swap:" for p in $(seq 1 ${NUM_PEERS}); do - ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 2 1 ${ALG} 0 data64.key - ip netns exec peer${p} ${OVPN_CLI} new_key tun${p} ${p} 2 1 ${ALG} 1 data64.key - ip netns exec peer${p} ${OVPN_CLI} swap_keys tun${p} ${p} + ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 2 1 ${ALG} 0 \ + data64.key + ip netns exec peer${p} ${OVPN_CLI} new_key tun${p} \ + $((${p} + ID_OFFSET)) 2 1 ${ALG} 1 data64.key + ip netns exec peer${p} ${OVPN_CLI} swap_keys tun${p} \ + $((${p} + ID_OFFSET)) done sleep 1 @@ -75,17 +109,19 @@ ip netns exec peer1 ${OVPN_CLI} get_peer tun1 echo "Querying peer 1:" ip netns exec peer0 ${OVPN_CLI} get_peer tun0 1 -echo "Querying non-existent peer 10:" -ip netns exec peer0 ${OVPN_CLI} get_peer tun0 10 || true +echo "Querying non-existent peer 20:" +ip netns exec peer0 ${OVPN_CLI} get_peer tun0 20 || true echo "Deleting peer 1:" ip netns exec peer0 ${OVPN_CLI} del_peer tun0 1 -ip netns exec peer1 ${OVPN_CLI} del_peer tun1 1 +ip netns exec peer1 ${OVPN_CLI} del_peer tun1 $((1 + ID_OFFSET)) echo "Querying keys:" for p in $(seq 2 ${NUM_PEERS}); do - ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 1 - ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 2 + ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} \ + $((${p} + ID_OFFSET)) 1 + ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} \ + $((${p} + ID_OFFSET)) 2 done echo "Deleting peer while sending traffic:" @@ -94,25 +130,29 @@ sleep 2 ip netns exec peer0 ${OVPN_CLI} del_peer tun0 2 # following command fails in TCP mode # (both ends get conn reset when one peer disconnects) -ip netns exec peer2 ${OVPN_CLI} del_peer tun2 2 || true +ip netns exec peer2 ${OVPN_CLI} del_peer tun2 $((2 + ID_OFFSET)) || true echo "Deleting keys:" for p in $(seq 3 ${NUM_PEERS}); do - ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 1 - ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 2 + ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} \ + $((${p} + ID_OFFSET)) 1 + ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} \ + $((${p} + ID_OFFSET)) 2 done echo "Setting timeout to 3s MP:" for p in $(seq 3 ${NUM_PEERS}); do ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 3 3 || true - ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 0 0 + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} \ + $((${p} + ID_OFFSET)) 0 0 done # wait for peers to timeout sleep 5 echo "Setting timeout to 3s P2P:" for p in $(seq 3 ${NUM_PEERS}); do - ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 3 3 + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} \ + $((${p} + ID_OFFSET)) 3 3 done sleep 5 diff --git a/tools/testing/selftests/net/ovpn/udp_peers.txt b/tools/testing/selftests/net/ovpn/udp_peers.txt index e9773ddf875c..93de6465353c 100644 --- a/tools/testing/selftests/net/ovpn/udp_peers.txt +++ b/tools/testing/selftests/net/ovpn/udp_peers.txt @@ -1,6 +1,6 @@ -1 10.10.1.1 1 10.10.1.2 1 5.5.5.2 -2 10.10.2.1 1 10.10.2.2 1 5.5.5.3 -3 10.10.3.1 1 10.10.3.2 1 5.5.5.4 -4 fd00:0:0:4::1 1 fd00:0:0:4::2 1 5.5.5.5 -5 fd00:0:0:5::1 1 fd00:0:0:5::2 1 5.5.5.6 -6 fd00:0:0:6::1 1 fd00:0:0:6::2 1 5.5.5.7 +1 10 10.10.1.1 1 10.10.1.2 1 5.5.5.2 +2 11 10.10.2.1 1 10.10.2.2 1 5.5.5.3 +3 12 10.10.3.1 1 10.10.3.2 1 5.5.5.4 +4 13 fd00:0:0:4::1 1 fd00:0:0:4::2 1 5.5.5.5 +5 14 fd00:0:0:5::1 1 fd00:0:0:5::2 1 5.5.5.6 +6 15 fd00:0:0:6::1 1 fd00:0:0:6::2 1 5.5.5.7 From 7b80d8a33500bd8ae5081c053f0447b502581d79 Mon Sep 17 00:00:00 2001 From: Ralf Lici Date: Wed, 19 Nov 2025 11:56:52 +0100 Subject: [PATCH 0590/1561] selftests: ovpn: add test for the FW mark feature Add a selftest to verify that the FW mark socket option is correctly supported and its value propagated by ovpn. The test adds and removes nftables DROP rules based on the mark value, and checks that the rule counter aligns with the number of lost ping packets. Cc: Shuah Khan Cc: linux-kselftest@vger.kernel.org Cc: horms@kernel.org Signed-off-by: Ralf Lici Signed-off-by: Antonio Quartulli --- tools/testing/selftests/net/ovpn/Makefile | 1 + tools/testing/selftests/net/ovpn/ovpn-cli.c | 23 ++++- tools/testing/selftests/net/ovpn/test-mark.sh | 96 +++++++++++++++++++ 3 files changed, 119 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/net/ovpn/test-mark.sh diff --git a/tools/testing/selftests/net/ovpn/Makefile b/tools/testing/selftests/net/ovpn/Makefile index ce9f79c4f892..169f0464ac3a 100644 --- a/tools/testing/selftests/net/ovpn/Makefile +++ b/tools/testing/selftests/net/ovpn/Makefile @@ -38,6 +38,7 @@ TEST_PROGS := \ test-close-socket.sh \ test-float.sh \ test-large-mtu.sh \ + test-mark.sh \ test-symmetric-id-float.sh \ test-symmetric-id-tcp.sh \ test-symmetric-id.sh \ diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c index 085446471397..d40953375c86 100644 --- a/tools/testing/selftests/net/ovpn/ovpn-cli.c +++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c @@ -6,6 +6,7 @@ * Author: Antonio Quartulli */ +#include #include #include #include @@ -133,6 +134,7 @@ struct ovpn_ctx { enum ovpn_key_slot key_slot; int key_id; + uint32_t mark; bool asymm_id; const char *peers_file; @@ -523,6 +525,15 @@ static int ovpn_socket(struct ovpn_ctx *ctx, sa_family_t family, int proto) return ret; } + if (ctx->mark != 0) { + ret = setsockopt(s, SOL_SOCKET, SO_MARK, (void *)&ctx->mark, + sizeof(ctx->mark)); + if (ret < 0) { + perror("setsockopt for SO_MARK"); + return ret; + } + } + if (family == AF_INET6) { opt = 0; if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &opt, @@ -1704,7 +1715,7 @@ static void usage(const char *cmd) fprintf(stderr, "\tvpnaddr: peer VPN IP\n"); fprintf(stderr, - "* new_multi_peer : add multiple peers as listed in the file\n"); + "* new_multi_peer [mark]: add multiple peers as listed in the file\n"); fprintf(stderr, "\tiface: ovpn interface name\n"); fprintf(stderr, "\tlport: local UDP port to bind to\n"); fprintf(stderr, "\tid_type:\n"); @@ -1716,6 +1727,7 @@ static void usage(const char *cmd) "\tpeers_file: text file containing one peer per line. Line format:\n"); fprintf(stderr, "\t\t \n"); + fprintf(stderr, "\tmark: socket FW mark value\n"); fprintf(stderr, "* set_peer : set peer attributes\n"); @@ -2284,6 +2296,15 @@ static int ovpn_parse_cmd_args(struct ovpn_ctx *ovpn, int argc, char *argv[]) } ovpn->peers_file = argv[5]; + + ovpn->mark = 0; + if (argc > 6) { + ovpn->mark = strtoul(argv[6], NULL, 10); + if (errno == ERANGE || ovpn->mark > UINT32_MAX) { + fprintf(stderr, "mark value out of range\n"); + return -1; + } + } break; case CMD_SET_PEER: if (argc < 6) diff --git a/tools/testing/selftests/net/ovpn/test-mark.sh b/tools/testing/selftests/net/ovpn/test-mark.sh new file mode 100755 index 000000000000..8534428ed3eb --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-mark.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +# Author: Ralf Lici +# Antonio Quartulli + +#set -x +set -e + +MARK=1056 + +source ./common.sh + +cleanup + +modprobe -q ovpn || true + +for p in $(seq 0 "${NUM_PEERS}"); do + create_ns "${p}" +done + +for p in $(seq 0 3); do + setup_ns "${p}" 5.5.5.$((p + 1))/24 +done + +# add peer0 with mark +ip netns exec peer0 "${OVPN_CLI}" new_multi_peer tun0 1 ASYMM \ + "${UDP_PEERS_FILE}" \ + ${MARK} +for p in $(seq 1 3); do + ip netns exec peer0 "${OVPN_CLI}" new_key tun0 "${p}" 1 0 "${ALG}" 0 \ + data64.key +done + +for p in $(seq 1 3); do + add_peer "${p}" +done + +for p in $(seq 1 3); do + ip netns exec peer0 "${OVPN_CLI}" set_peer tun0 "${p}" 60 120 + ip netns exec peer"${p}" "${OVPN_CLI}" set_peer tun"${p}" \ + $((p + 9)) 60 120 +done + +sleep 1 + +for p in $(seq 1 3); do + ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((p + 1)) +done + +echo "Adding an nftables drop rule based on mark value ${MARK}" +ip netns exec peer0 nft flush ruleset +ip netns exec peer0 nft 'add table inet filter' +ip netns exec peer0 nft 'add chain inet filter output { + type filter hook output priority 0; + policy accept; +}' +ip netns exec peer0 nft add rule inet filter output \ + meta mark == ${MARK} \ + counter drop + +DROP_COUNTER=$(ip netns exec peer0 nft list chain inet filter output \ + | sed -n 's/.*packets \([0-9]*\).*/\1/p') +sleep 1 + +# ping should fail +for p in $(seq 1 3); do + PING_OUTPUT=$(ip netns exec peer0 ping \ + -qfc 500 -w 1 5.5.5.$((p + 1)) 2>&1) && exit 1 + echo "${PING_OUTPUT}" + LOST_PACKETS=$(echo "$PING_OUTPUT" \ + | awk '/packets transmitted/ { print $1 }') + # increment the drop counter by the amount of lost packets + DROP_COUNTER=$((DROP_COUNTER + LOST_PACKETS)) +done + +# check if the final nft counter matches our counter +TOTAL_COUNT=$(ip netns exec peer0 nft list chain inet filter output \ + | sed -n 's/.*packets \([0-9]*\).*/\1/p') +if [ "${DROP_COUNTER}" -ne "${TOTAL_COUNT}" ]; then + echo "Expected ${TOTAL_COUNT} drops, got ${DROP_COUNTER}" + exit 1 +fi + +echo "Removing the drop rule" +ip netns exec peer0 nft flush ruleset +sleep 1 + +for p in $(seq 1 3); do + ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((p + 1)) +done + +cleanup + +modprobe -r ovpn || true From d3244af9c4c2bbce57465130c9cd509182207c2d Mon Sep 17 00:00:00 2001 From: Ralf Lici Date: Fri, 14 Nov 2025 11:40:29 +0100 Subject: [PATCH 0591/1561] ovpn: consolidate crypto allocations in one chunk Currently ovpn uses three separate dynamically allocated structures to set up cryptographic operations for both encryption and decryption. This adds overhead to performance-critical paths and contribute to memory fragmentation. This commit consolidates those allocations into a single temporary blob, similar to what esp_alloc_tmp() does. The resulting performance gain is +7.7% and +4.3% for UDP when using AES and ChaChaPoly respectively, and +4.3% for TCP. Signed-off-by: Ralf Lici Signed-off-by: Antonio Quartulli Reviewed-by: Sabrina Dubroca --- drivers/net/ovpn/crypto_aead.c | 160 +++++++++++++++++++++++++-------- drivers/net/ovpn/io.c | 8 +- drivers/net/ovpn/skb.h | 13 ++- 3 files changed, 135 insertions(+), 46 deletions(-) diff --git a/drivers/net/ovpn/crypto_aead.c b/drivers/net/ovpn/crypto_aead.c index 59848c41b7b2..8f07c418622b 100644 --- a/drivers/net/ovpn/crypto_aead.c +++ b/drivers/net/ovpn/crypto_aead.c @@ -36,6 +36,104 @@ static int ovpn_aead_encap_overhead(const struct ovpn_crypto_key_slot *ks) crypto_aead_authsize(ks->encrypt); /* Auth Tag */ } +/** + * ovpn_aead_crypto_tmp_size - compute the size of a temporary object containing + * an AEAD request structure with extra space for SG + * and IV. + * @tfm: the AEAD cipher handle + * @nfrags: the number of fragments in the skb + * + * This function calculates the size of a contiguous memory block that includes + * the initialization vector (IV), the AEAD request, and an array of scatterlist + * entries. For alignment considerations, the IV is placed first, followed by + * the request, and then the scatterlist. + * Additional alignment is applied according to the requirements of the + * underlying structures. + * + * Return: the size of the temporary memory that needs to be allocated + */ +static unsigned int ovpn_aead_crypto_tmp_size(struct crypto_aead *tfm, + const unsigned int nfrags) +{ + unsigned int len = OVPN_NONCE_SIZE; + + DEBUG_NET_WARN_ON_ONCE(crypto_aead_ivsize(tfm) != OVPN_NONCE_SIZE); + + /* min size for a buffer of ivsize, aligned to alignmask */ + len += crypto_aead_alignmask(tfm) & ~(crypto_tfm_ctx_alignment() - 1); + /* round up to the next multiple of the crypto ctx alignment */ + len = ALIGN(len, crypto_tfm_ctx_alignment()); + + /* reserve space for the AEAD request */ + len += sizeof(struct aead_request) + crypto_aead_reqsize(tfm); + /* round up to the next multiple of the scatterlist alignment */ + len = ALIGN(len, __alignof__(struct scatterlist)); + + /* add enough space for nfrags + 2 scatterlist entries */ + len += array_size(sizeof(struct scatterlist), nfrags + 2); + return len; +} + +/** + * ovpn_aead_crypto_tmp_iv - retrieve the pointer to the IV within a temporary + * buffer allocated using ovpn_aead_crypto_tmp_size + * @aead: the AEAD cipher handle + * @tmp: a pointer to the beginning of the temporary buffer + * + * This function retrieves a pointer to the initialization vector (IV) in the + * temporary buffer. If the AEAD cipher specifies an IV size, the pointer is + * adjusted using the AEAD's alignment mask to ensure proper alignment. + * + * Returns: a pointer to the IV within the temporary buffer + */ +static u8 *ovpn_aead_crypto_tmp_iv(struct crypto_aead *aead, void *tmp) +{ + return likely(crypto_aead_ivsize(aead)) ? + PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : + tmp; +} + +/** + * ovpn_aead_crypto_tmp_req - retrieve the pointer to the AEAD request structure + * within a temporary buffer allocated using + * ovpn_aead_crypto_tmp_size + * @aead: the AEAD cipher handle + * @iv: a pointer to the initialization vector in the temporary buffer + * + * This function computes the location of the AEAD request structure that + * immediately follows the IV in the temporary buffer and it ensures the request + * is aligned to the crypto transform context alignment. + * + * Returns: a pointer to the AEAD request structure + */ +static struct aead_request *ovpn_aead_crypto_tmp_req(struct crypto_aead *aead, + const u8 *iv) +{ + return (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead), + crypto_tfm_ctx_alignment()); +} + +/** + * ovpn_aead_crypto_req_sg - locate the scatterlist following the AEAD request + * within a temporary buffer allocated using + * ovpn_aead_crypto_tmp_size + * @aead: the AEAD cipher handle + * @req: a pointer to the AEAD request structure in the temporary buffer + * + * This function computes the starting address of the scatterlist that is + * allocated immediately after the AEAD request structure. It aligns the pointer + * based on the alignment requirements of the scatterlist structure. + * + * Returns: a pointer to the scatterlist + */ +static struct scatterlist *ovpn_aead_crypto_req_sg(struct crypto_aead *aead, + struct aead_request *req) +{ + return (void *)ALIGN((unsigned long)(req + 1) + + crypto_aead_reqsize(aead), + __alignof__(struct scatterlist)); +} + int ovpn_aead_encrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks, struct sk_buff *skb) { @@ -45,6 +143,7 @@ int ovpn_aead_encrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks, struct scatterlist *sg; int nfrags, ret; u32 pktid, op; + void *tmp; u8 *iv; ovpn_skb_cb(skb)->peer = peer; @@ -71,13 +170,17 @@ int ovpn_aead_encrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks, if (unlikely(nfrags + 2 > (MAX_SKB_FRAGS + 2))) return -ENOSPC; - /* sg may be required by async crypto */ - ovpn_skb_cb(skb)->sg = kmalloc(sizeof(*ovpn_skb_cb(skb)->sg) * - (nfrags + 2), GFP_ATOMIC); - if (unlikely(!ovpn_skb_cb(skb)->sg)) + /* allocate temporary memory for iv, sg and req */ + tmp = kmalloc(ovpn_aead_crypto_tmp_size(ks->encrypt, nfrags), + GFP_ATOMIC); + if (unlikely(!tmp)) return -ENOMEM; - sg = ovpn_skb_cb(skb)->sg; + ovpn_skb_cb(skb)->crypto_tmp = tmp; + + iv = ovpn_aead_crypto_tmp_iv(ks->encrypt, tmp); + req = ovpn_aead_crypto_tmp_req(ks->encrypt, iv); + sg = ovpn_aead_crypto_req_sg(ks->encrypt, req); /* sg table: * 0: op, wire nonce (AD, len=OVPN_OP_SIZE_V2+OVPN_NONCE_WIRE_SIZE), @@ -105,13 +208,6 @@ int ovpn_aead_encrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks, if (unlikely(ret < 0)) return ret; - /* iv may be required by async crypto */ - ovpn_skb_cb(skb)->iv = kmalloc(OVPN_NONCE_SIZE, GFP_ATOMIC); - if (unlikely(!ovpn_skb_cb(skb)->iv)) - return -ENOMEM; - - iv = ovpn_skb_cb(skb)->iv; - /* concat 4 bytes packet id and 8 bytes nonce tail into 12 bytes * nonce */ @@ -130,12 +226,6 @@ int ovpn_aead_encrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks, /* AEAD Additional data */ sg_set_buf(sg, skb->data, OVPN_AAD_SIZE); - req = aead_request_alloc(ks->encrypt, GFP_ATOMIC); - if (unlikely(!req)) - return -ENOMEM; - - ovpn_skb_cb(skb)->req = req; - /* setup async crypto operation */ aead_request_set_tfm(req, ks->encrypt); aead_request_set_callback(req, 0, ovpn_encrypt_post, skb); @@ -156,6 +246,7 @@ int ovpn_aead_decrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks, struct aead_request *req; struct sk_buff *trailer; struct scatterlist *sg; + void *tmp; u8 *iv; payload_offset = OVPN_AAD_SIZE + tag_size; @@ -184,13 +275,17 @@ int ovpn_aead_decrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks, if (unlikely(nfrags + 2 > (MAX_SKB_FRAGS + 2))) return -ENOSPC; - /* sg may be required by async crypto */ - ovpn_skb_cb(skb)->sg = kmalloc(sizeof(*ovpn_skb_cb(skb)->sg) * - (nfrags + 2), GFP_ATOMIC); - if (unlikely(!ovpn_skb_cb(skb)->sg)) + /* allocate temporary memory for iv, sg and req */ + tmp = kmalloc(ovpn_aead_crypto_tmp_size(ks->decrypt, nfrags), + GFP_ATOMIC); + if (unlikely(!tmp)) return -ENOMEM; - sg = ovpn_skb_cb(skb)->sg; + ovpn_skb_cb(skb)->crypto_tmp = tmp; + + iv = ovpn_aead_crypto_tmp_iv(ks->decrypt, tmp); + req = ovpn_aead_crypto_tmp_req(ks->decrypt, iv); + sg = ovpn_aead_crypto_req_sg(ks->decrypt, req); /* sg table: * 0: op, wire nonce (AD, len=OVPN_OPCODE_SIZE+OVPN_NONCE_WIRE_SIZE), @@ -213,24 +308,11 @@ int ovpn_aead_decrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks, /* append auth_tag onto scatterlist */ sg_set_buf(sg + ret + 1, skb->data + OVPN_AAD_SIZE, tag_size); - /* iv may be required by async crypto */ - ovpn_skb_cb(skb)->iv = kmalloc(OVPN_NONCE_SIZE, GFP_ATOMIC); - if (unlikely(!ovpn_skb_cb(skb)->iv)) - return -ENOMEM; - - iv = ovpn_skb_cb(skb)->iv; - /* copy nonce into IV buffer */ memcpy(iv, skb->data + OVPN_OPCODE_SIZE, OVPN_NONCE_WIRE_SIZE); memcpy(iv + OVPN_NONCE_WIRE_SIZE, ks->nonce_tail_recv, OVPN_NONCE_TAIL_SIZE); - req = aead_request_alloc(ks->decrypt, GFP_ATOMIC); - if (unlikely(!req)) - return -ENOMEM; - - ovpn_skb_cb(skb)->req = req; - /* setup async crypto operation */ aead_request_set_tfm(req, ks->decrypt); aead_request_set_callback(req, 0, ovpn_decrypt_post, skb); @@ -273,7 +355,11 @@ static struct crypto_aead *ovpn_aead_init(const char *title, goto error; } - /* basic AEAD assumption */ + /* basic AEAD assumption + * all current algorithms use OVPN_NONCE_SIZE. + * ovpn_aead_crypto_tmp_size and ovpn_aead_encrypt/decrypt + * expect this. + */ if (crypto_aead_ivsize(aead) != OVPN_NONCE_SIZE) { pr_err("%s IV size must be %d\n", title, OVPN_NONCE_SIZE); ret = -EINVAL; diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c index 955c9a37e1f8..db43a1f8a07a 100644 --- a/drivers/net/ovpn/io.c +++ b/drivers/net/ovpn/io.c @@ -119,9 +119,7 @@ void ovpn_decrypt_post(void *data, int ret) peer = ovpn_skb_cb(skb)->peer; /* crypto is done, cleanup skb CB and its members */ - kfree(ovpn_skb_cb(skb)->iv); - kfree(ovpn_skb_cb(skb)->sg); - aead_request_free(ovpn_skb_cb(skb)->req); + kfree(ovpn_skb_cb(skb)->crypto_tmp); if (unlikely(ret < 0)) goto drop; @@ -248,9 +246,7 @@ void ovpn_encrypt_post(void *data, int ret) peer = ovpn_skb_cb(skb)->peer; /* crypto is done, cleanup skb CB and its members */ - kfree(ovpn_skb_cb(skb)->iv); - kfree(ovpn_skb_cb(skb)->sg); - aead_request_free(ovpn_skb_cb(skb)->req); + kfree(ovpn_skb_cb(skb)->crypto_tmp); if (unlikely(ret == -ERANGE)) { /* we ran out of IVs and we must kill the key as it can't be diff --git a/drivers/net/ovpn/skb.h b/drivers/net/ovpn/skb.h index 64430880f1da..4fb7ea025426 100644 --- a/drivers/net/ovpn/skb.h +++ b/drivers/net/ovpn/skb.h @@ -18,12 +18,19 @@ #include #include +/** + * struct ovpn_cb - ovpn skb control block + * @peer: the peer this skb was received from/sent to + * @ks: the crypto key slot used to encrypt/decrypt this skb + * @crypto_tmp: pointer to temporary memory used for crypto operations + * containing the IV, the scatter gather list and the aead request + * @payload_offset: offset in the skb where the payload starts + * @nosignal: whether this skb should be sent with the MSG_NOSIGNAL flag (TCP) + */ struct ovpn_cb { struct ovpn_peer *peer; struct ovpn_crypto_key_slot *ks; - struct aead_request *req; - struct scatterlist *sg; - u8 *iv; + void *crypto_tmp; unsigned int payload_offset; bool nosignal; }; From fdd973148a117f3244c9a0a6abf343da57f76ea7 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Fri, 13 Mar 2026 13:48:27 +0100 Subject: [PATCH 0592/1561] selftests: net: add ipv6 RA route to ECMP merge test As commit bbf4a17ad9ff ("ipv6: Fix ECMP sibling count mismatch when clearing RTF_ADDRCONF") pointed out, RA routes are not elegible for ECMP merging. Add a test scenario mixing RA and static routes with gateway to check that they are not getting merged. Signed-off-by: Fernando Fernandez Mancera Link: https://patch.msgid.link/20260313124827.3945-1-fmancera@suse.de Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/config | 2 ++ tools/testing/selftests/net/fib_tests.sh | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index cd49b7dfe216..2a390cae41bf 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -43,6 +43,8 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_IOAM6_LWTUNNEL=y CONFIG_IPV6_MROUTE=y CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_RPL_LWTUNNEL=y CONFIG_IPV6_SEG6_LWTUNNEL=y CONFIG_IPV6_SIT=y diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 439461080c60..05dadc3f55b1 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -1534,6 +1534,23 @@ fib6_ra_to_static() log_test $ret 0 "ipv6 promote RA route to static" + # Prepare for RA route with gateway + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_ra_rt_info_max_plen=64 + + # Add initial route to cause ECMP merging + $IP -6 route add 2001:12::/64 via fe80::dead:beef dev veth1 + + $NS_EXEC ra6 -i veth2 -d 2001:10::1 -R 2001:12::/64#1#120 + + # Routes are not merged as RA routes are not elegible for ECMP + check_rt_num 2 "$($IP -6 route list | grep -c "2001:12::/64 via")" + + $IP -6 route append 2001:12::/64 via fe80::dead:feeb dev veth1 + + check_rt_num 2 "$($IP -6 route list | grep -c "nexthop via")" + + log_test "$ret" 0 "ipv6 RA route with nexthop do not merge into ECMP with static" + set +e cleanup &> /dev/null From 63e9d434ddc8b2f7b8b6a6d31611736981edc9ca Mon Sep 17 00:00:00 2001 From: Charles Perry Date: Fri, 13 Mar 2026 07:06:08 -0700 Subject: [PATCH 0593/1561] dt-bindings: net: cdns,macb: add a compatible for Microchip pic64hpsc Add "microchip,pic64hpsc-gem" for "PIC64-HPSC" and "microchip,pic64hx-gem" for "PIC64HX", compatible with the former. The generic compatible "cdns,gem" works but offers limited features. Keep it as a fallback. The GEM IPs within pic64hpsc have their MDIO controllers unconnected from any physical pin. Add a check to prevent adding PHYs under the GEM node. Signed-off-by: Charles Perry Acked-by: Conor Dooley Link: https://patch.msgid.link/20260313140610.3681752-2-charles.perry@microchip.com Signed-off-by: Paolo Abeni --- .../devicetree/bindings/net/cdns,macb.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Documentation/devicetree/bindings/net/cdns,macb.yaml b/Documentation/devicetree/bindings/net/cdns,macb.yaml index cb14c35ba996..feb168385837 100644 --- a/Documentation/devicetree/bindings/net/cdns,macb.yaml +++ b/Documentation/devicetree/bindings/net/cdns,macb.yaml @@ -70,6 +70,14 @@ properties: - microchip,sama7d65-gem # Microchip SAMA7D65 gigabit ethernet interface - const: microchip,sama7g5-gem # Microchip SAMA7G5 gigabit ethernet interface + - items: + - const: microchip,pic64hpsc-gem # Microchip PIC64-HPSC + - const: cdns,gem + - items: + - const: microchip,pic64hx-gem # Microchip PIC64HX + - const: microchip,pic64hpsc-gem # Microchip PIC64-HPSC + - const: cdns,gem + reg: minItems: 1 items: @@ -196,6 +204,17 @@ allOf: required: - phys + - if: + properties: + compatible: + contains: + const: microchip,pic64hpsc-gem + then: + patternProperties: + "^ethernet-phy@[0-9a-f]$": false + properties: + mdio: false + unevaluatedProperties: false examples: From 363a99af19a7f5c7ddd7571e39f3ae6663397bb6 Mon Sep 17 00:00:00 2001 From: Charles Perry Date: Fri, 13 Mar 2026 07:06:09 -0700 Subject: [PATCH 0594/1561] net: macb: add safeguards for jumbo frame larger than 10240 The RX buffers for GEM can have a maximum size of 16320 bytes (0xff in the RXBS field of the DMACFG register means 255*64 = 16320 bytes). The GEM IP has configurable maximum jumbo frame length that can go up to 16383. The actual value for this limit can be found in the "jumbo_max_length" field (bits 0..13) of the DCFG2 register. Currently, the macb driver doesn't use the DCFG2 register when determining the max MTU, instead an hardcoded value (jumbo_max_len in struct macb_config) is used for each platform. Right now the maximum value for jumbo_max_len is 10240 (0x2800). GEM uses one buffer per packet which means that one buffer must allow room for the max MTU plus L2 encapsulation and alignment. This is a limitation of the driver. This commit adds a limit to max_mtu and rx_buffer_size so that the RXBS field can never overflow when a large MTU is used. With this commit, it is now possible to add new platforms with a jumbo_max_len of 16383 so that the hardware properties of each IP can be properly captured in struct macb_config. Signed-off-by: Charles Perry Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260313140610.3681752-3-charles.perry@microchip.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/cadence/macb_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 63105a5f46a7..ca6f9e2a4eb4 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -50,6 +50,7 @@ struct sifive_fu540_macb_mgmt { #define MACB_RX_BUFFER_SIZE 128 #define RX_BUFFER_MULTIPLE 64 /* bytes */ +#define RX_BUFFER_MAX (0xFF * RX_BUFFER_MULTIPLE) /* 16320 bytes */ #define DEFAULT_RX_RING_SIZE 512 /* must be power of 2 */ #define MIN_RX_RING_SIZE 64 @@ -2601,7 +2602,7 @@ static void macb_init_rx_buffer_size(struct macb *bp, size_t size) if (!macb_is_gem(bp)) { bp->rx_buffer_size = MACB_RX_BUFFER_SIZE; } else { - bp->rx_buffer_size = size; + bp->rx_buffer_size = MIN(size, RX_BUFFER_MAX); if (bp->rx_buffer_size % RX_BUFFER_MULTIPLE) { netdev_dbg(bp->dev, @@ -5791,7 +5792,8 @@ static int macb_probe(struct platform_device *pdev) /* MTU range: 68 - 1518 or 10240 */ dev->min_mtu = GEM_MTU_MIN_SIZE; if ((bp->caps & MACB_CAPS_JUMBO) && bp->jumbo_max_len) - dev->max_mtu = bp->jumbo_max_len - ETH_HLEN - ETH_FCS_LEN; + dev->max_mtu = MIN(bp->jumbo_max_len, RX_BUFFER_MAX) - + ETH_HLEN - ETH_FCS_LEN; else dev->max_mtu = 1536 - ETH_HLEN - ETH_FCS_LEN; From f45797fe02df3459bb9cef6886fe0cfa3ed273bd Mon Sep 17 00:00:00 2001 From: Charles Perry Date: Fri, 13 Mar 2026 07:06:10 -0700 Subject: [PATCH 0595/1561] net: macb: add support for Microchip pic64hpsc ethernet endpoint pic64hpsc doesn't have the USRIO register so MACB_CAPS_USRIO_DISABLED is used. pic64hpsc does support PTP and has the timestamping unit so MACB_CAPS_GEM_HAS_PTP is used. jumbo_max_len is set to 16383 (0x3FFF) as reported by the DCFG2 register bits 0..13. The JML register also has a default value of 0x3FFF. dma_burst_length is set to 16 because that's what most other platforms use and it worked for me so far. There is one other mode where bursts of up to 256 are allowed but this might impact negatively other masters on the NOC. The register default value is 4 (bursts up to 4). Signed-off-by: Charles Perry Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260313140610.3681752-4-charles.perry@microchip.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/cadence/macb_main.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index ca6f9e2a4eb4..abd328428c94 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5623,6 +5623,14 @@ static const struct macb_config raspberrypi_rp1_config = { .jumbo_max_len = 10240, }; +static const struct macb_config pic64hpsc_config = { + .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO | + MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_USRIO_DISABLED, + .dma_burst_length = 16, + .init = init_reset_optional, + .jumbo_max_len = 16383, +}; + static const struct of_device_id macb_dt_ids[] = { { .compatible = "cdns,at91sam9260-macb", .data = &at91sam9260_config }, { .compatible = "cdns,macb" }, @@ -5641,6 +5649,7 @@ static const struct of_device_id macb_dt_ids[] = { { .compatible = "cdns,zynq-gem", .data = &zynq_config }, /* deprecated */ { .compatible = "sifive,fu540-c000-gem", .data = &fu540_c000_config }, { .compatible = "microchip,mpfs-macb", .data = &mpfs_config }, + { .compatible = "microchip,pic64hpsc-gem", .data = &pic64hpsc_config}, { .compatible = "microchip,sama7g5-gem", .data = &sama7g5_gem_config }, { .compatible = "microchip,sama7g5-emac", .data = &sama7g5_emac_config }, { .compatible = "mobileye,eyeq5-gem", .data = &eyeq5_config }, From 8737d7194d6d5947c3d7d8813895b44a25b84477 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 13 Mar 2026 17:28:36 +0100 Subject: [PATCH 0596/1561] net: airoha: select QDMA block according LAN/WAN configuration Before this patch even GDM ports were assigned to QDMA0 while odd GDM ports were using QDMA1, so, based on the DTS configuration, both QDMA0 and QDMA1 can theoretically receive traffic destinated to the host cpu from LAN or WAN GDM ports. Airoha folks reported the hw design assumes the LAN traffic destinated to the host cpu is be forwarded to QDMA0 while traffic received on WAN GDM port is managed by QDMA1. For this reason, select QDMA block according to the GDM port LAN or WAN configuration: - QDMA0 is used for GDM LAN devices - QDMA1 is used for GDM WAN device Assuming a device with three GDM ports, a typical configuration could be: - MT7530 DSA switch -> GDM1 (eth0) -> QDMA0 (LAN traffic) - External PHY -> GDM2 (eth1) -> QDMA1 (WAN traffic) - External PHY -> GDM3 (eth2) -> QDMA0 (LAN traffic) We can then bridge eth0 DSA port (lanX) with eth2 since they all tx/rx LAN traffic. Please note this patch introduces a change not visible to the user since airoha_eth driver currently supports just the internal phy available via the MT7530 DSA switch and there are no WAN interfaces officially supported since PCS/external phy is not merged mainline yet (it will be posted with following patches). Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260313-airoha-qdma-lan-wan-mode-v2-1-7d577db6e40c@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_eth.c | 18 ++++++++---------- drivers/net/ethernet/airoha/airoha_eth.h | 1 + 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 3e9ec5c178f8..b4563b5d53bb 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -1754,11 +1754,13 @@ static int airhoha_set_gdm2_loopback(struct airoha_gdm_port *port) static int airoha_dev_init(struct net_device *dev) { struct airoha_gdm_port *port = netdev_priv(dev); - struct airoha_qdma *qdma = port->qdma; - struct airoha_eth *eth = qdma->eth; + struct airoha_eth *eth = port->eth; u32 fe_cpu_port; u8 ppe_id; + /* QDMA0 is used for lan ports while QDMA1 is used for WAN ports */ + port->qdma = ð->qdma[!airoha_is_lan_gdm_port(port)]; + port->dev->irq = port->qdma->irq_banks[0].irq; airoha_set_macaddr(port, dev->dev_addr); switch (port->id) { @@ -1782,7 +1784,7 @@ static int airoha_dev_init(struct net_device *dev) } fallthrough; default: { - u8 qdma_id = qdma - ð->qdma[0]; + u8 qdma_id = port->qdma - ð->qdma[0]; /* For PPE1 select cpu port according to the running QDMA. */ fe_cpu_port = qdma_id ? FE_PSE_PORT_CDM2 : FE_PSE_PORT_CDM1; @@ -2866,11 +2868,10 @@ bool airoha_is_valid_gdm_port(struct airoha_eth *eth, } static int airoha_alloc_gdm_port(struct airoha_eth *eth, - struct device_node *np, int index) + struct device_node *np) { const __be32 *id_ptr = of_get_property(np, "reg", NULL); struct airoha_gdm_port *port; - struct airoha_qdma *qdma; struct net_device *dev; int err, p; u32 id; @@ -2901,7 +2902,6 @@ static int airoha_alloc_gdm_port(struct airoha_eth *eth, return -ENOMEM; } - qdma = ð->qdma[index % AIROHA_MAX_NUM_QDMA]; dev->netdev_ops = &airoha_netdev_ops; dev->ethtool_ops = &airoha_ethtool_ops; dev->max_mtu = AIROHA_MAX_MTU; @@ -2913,7 +2913,6 @@ static int airoha_alloc_gdm_port(struct airoha_eth *eth, dev->features |= dev->hw_features; dev->vlan_features = dev->hw_features; dev->dev.of_node = np; - dev->irq = qdma->irq_banks[0].irq; SET_NETDEV_DEV(dev, eth->dev); /* reserve hw queues for HTB offloading */ @@ -2934,7 +2933,7 @@ static int airoha_alloc_gdm_port(struct airoha_eth *eth, port = netdev_priv(dev); u64_stats_init(&port->stats.syncp); spin_lock_init(&port->stats.lock); - port->qdma = qdma; + port->eth = eth; port->dev = dev; port->id = id; eth->ports[p] = port; @@ -3034,7 +3033,6 @@ static int airoha_probe(struct platform_device *pdev) for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) airoha_qdma_start_napi(ð->qdma[i]); - i = 0; for_each_child_of_node(pdev->dev.of_node, np) { if (!of_device_is_compatible(np, "airoha,eth-mac")) continue; @@ -3042,7 +3040,7 @@ static int airoha_probe(struct platform_device *pdev) if (!of_device_is_available(np)) continue; - err = airoha_alloc_gdm_port(eth, np, i++); + err = airoha_alloc_gdm_port(eth, np); if (err) { of_node_put(np); goto error_napi_stop; diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index 59ed7569f7c3..8cfa94ec084a 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -533,6 +533,7 @@ struct airoha_qdma { struct airoha_gdm_port { struct airoha_qdma *qdma; + struct airoha_eth *eth; struct net_device *dev; int id; From 05c1fc56d37b2531b14afac9e76443ed98276923 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Sun, 15 Mar 2026 15:19:44 +0000 Subject: [PATCH 0597/1561] net: phy: mxl-gpy: add PHY-level statistics via ethtool Report PCS receive error counts for all supported GPY115x, GPY2xx and MxL862xx PHYs. Accumulate the vendor-specific PHY_ERRCNT read-clear counter (SEL=RXERR) in .update_stats() and expose it as both IEEE 802.3 SymbolErrorDuringCarrier and generic rx_errors via .get_phy_stats(). Signed-off-by: Daniel Golle Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/0029a2fb29bfdcc26abff828d2e18400067b5c58.1773587924.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/phy/mxl-gpy.c | 66 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c index 5f99766fb64c..0da2d4e9d854 100644 --- a/drivers/net/phy/mxl-gpy.c +++ b/drivers/net/phy/mxl-gpy.c @@ -38,12 +38,17 @@ #define PHY_CTL1_MDICD BIT(3) #define PHY_CTL1_MDIAB BIT(2) #define PHY_CTL1_AMDIX BIT(0) +#define PHY_ERRCNT 0x15 /* Error counter */ #define PHY_MIISTAT 0x18 /* MII state */ #define PHY_IMASK 0x19 /* interrupt mask */ #define PHY_ISTAT 0x1A /* interrupt status */ #define PHY_LED 0x1B /* LEDs */ #define PHY_FWV 0x1E /* firmware version */ +#define PHY_ERRCNT_SEL GENMASK(11, 8) +#define PHY_ERRCNT_COUNT GENMASK(7, 0) +#define PHY_ERRCNT_SEL_RXERR 0 + #define PHY_MIISTAT_SPD_MASK GENMASK(2, 0) #define PHY_MIISTAT_DPX BIT(3) #define PHY_MIISTAT_LS BIT(10) @@ -134,6 +139,7 @@ struct gpy_priv { u8 fw_major; u8 fw_minor; u32 wolopts; + u64 rx_errors; /* It takes 3 seconds to fully switch out of loopback mode before * it can safely re-enter loopback mode. Record the time when @@ -331,8 +337,9 @@ out: static int gpy_config_init(struct phy_device *phydev) { - /* Nothing to configure. Configuration Requirement Placeholder */ - return 0; + /* Count MDI RX errors (SymbolErrorDuringCarrier) */ + return phy_write(phydev, PHY_ERRCNT, + FIELD_PREP(PHY_ERRCNT_SEL, PHY_ERRCNT_SEL_RXERR)); } static int gpy21x_config_init(struct phy_device *phydev) @@ -1067,6 +1074,31 @@ static int gpy_config_inband(struct phy_device *phydev, unsigned int modes) VSPEC1_SGMII_ANEN_ANRS); } +static int gpy_update_stats(struct phy_device *phydev) +{ + struct gpy_priv *priv = phydev->priv; + int ret; + + /* PHY_ERRCNT: 8-bit read-clear counter, SEL set to RXERR */ + ret = phy_read(phydev, PHY_ERRCNT); + if (ret < 0) + return ret; + + priv->rx_errors += FIELD_GET(PHY_ERRCNT_COUNT, ret); + + return 0; +} + +static void gpy_get_phy_stats(struct phy_device *phydev, + struct ethtool_eth_phy_stats *eth_stats, + struct ethtool_phy_stats *stats) +{ + struct gpy_priv *priv = phydev->priv; + + eth_stats->SymbolErrorDuringCarrier = priv->rx_errors; + stats->rx_errors = priv->rx_errors; +} + static struct phy_driver gpy_drivers[] = { { PHY_ID_MATCH_MODEL(PHY_ID_GPY2xx), @@ -1091,6 +1123,8 @@ static struct phy_driver gpy_drivers[] = { .led_hw_control_get = gpy_led_hw_control_get, .led_hw_control_set = gpy_led_hw_control_set, .led_polarity_set = gpy_led_polarity_set, + .update_stats = gpy_update_stats, + .get_phy_stats = gpy_get_phy_stats, }, { .phy_id = PHY_ID_GPY115B, @@ -1116,6 +1150,8 @@ static struct phy_driver gpy_drivers[] = { .led_hw_control_get = gpy_led_hw_control_get, .led_hw_control_set = gpy_led_hw_control_set, .led_polarity_set = gpy_led_polarity_set, + .update_stats = gpy_update_stats, + .get_phy_stats = gpy_get_phy_stats, }, { PHY_ID_MATCH_MODEL(PHY_ID_GPY115C), @@ -1140,6 +1176,8 @@ static struct phy_driver gpy_drivers[] = { .led_hw_control_get = gpy_led_hw_control_get, .led_hw_control_set = gpy_led_hw_control_set, .led_polarity_set = gpy_led_polarity_set, + .update_stats = gpy_update_stats, + .get_phy_stats = gpy_get_phy_stats, }, { .phy_id = PHY_ID_GPY211B, @@ -1165,6 +1203,8 @@ static struct phy_driver gpy_drivers[] = { .led_hw_control_get = gpy_led_hw_control_get, .led_hw_control_set = gpy_led_hw_control_set, .led_polarity_set = gpy_led_polarity_set, + .update_stats = gpy_update_stats, + .get_phy_stats = gpy_get_phy_stats, }, { PHY_ID_MATCH_MODEL(PHY_ID_GPY211C), @@ -1189,6 +1229,8 @@ static struct phy_driver gpy_drivers[] = { .led_hw_control_get = gpy_led_hw_control_get, .led_hw_control_set = gpy_led_hw_control_set, .led_polarity_set = gpy_led_polarity_set, + .update_stats = gpy_update_stats, + .get_phy_stats = gpy_get_phy_stats, }, { .phy_id = PHY_ID_GPY212B, @@ -1214,6 +1256,8 @@ static struct phy_driver gpy_drivers[] = { .led_hw_control_get = gpy_led_hw_control_get, .led_hw_control_set = gpy_led_hw_control_set, .led_polarity_set = gpy_led_polarity_set, + .update_stats = gpy_update_stats, + .get_phy_stats = gpy_get_phy_stats, }, { PHY_ID_MATCH_MODEL(PHY_ID_GPY212C), @@ -1238,6 +1282,8 @@ static struct phy_driver gpy_drivers[] = { .led_hw_control_get = gpy_led_hw_control_get, .led_hw_control_set = gpy_led_hw_control_set, .led_polarity_set = gpy_led_polarity_set, + .update_stats = gpy_update_stats, + .get_phy_stats = gpy_get_phy_stats, }, { .phy_id = PHY_ID_GPY215B, @@ -1263,6 +1309,8 @@ static struct phy_driver gpy_drivers[] = { .led_hw_control_get = gpy_led_hw_control_get, .led_hw_control_set = gpy_led_hw_control_set, .led_polarity_set = gpy_led_polarity_set, + .update_stats = gpy_update_stats, + .get_phy_stats = gpy_get_phy_stats, }, { PHY_ID_MATCH_MODEL(PHY_ID_GPY215C), @@ -1287,6 +1335,8 @@ static struct phy_driver gpy_drivers[] = { .led_hw_control_get = gpy_led_hw_control_get, .led_hw_control_set = gpy_led_hw_control_set, .led_polarity_set = gpy_led_polarity_set, + .update_stats = gpy_update_stats, + .get_phy_stats = gpy_get_phy_stats, }, { PHY_ID_MATCH_MODEL(PHY_ID_GPY241B), @@ -1306,6 +1356,8 @@ static struct phy_driver gpy_drivers[] = { .set_wol = gpy_set_wol, .get_wol = gpy_get_wol, .set_loopback = gpy_loopback, + .update_stats = gpy_update_stats, + .get_phy_stats = gpy_get_phy_stats, }, { PHY_ID_MATCH_MODEL(PHY_ID_GPY241BM), @@ -1325,6 +1377,8 @@ static struct phy_driver gpy_drivers[] = { .set_wol = gpy_set_wol, .get_wol = gpy_get_wol, .set_loopback = gpy_loopback, + .update_stats = gpy_update_stats, + .get_phy_stats = gpy_get_phy_stats, }, { PHY_ID_MATCH_MODEL(PHY_ID_GPY245B), @@ -1344,6 +1398,8 @@ static struct phy_driver gpy_drivers[] = { .set_wol = gpy_set_wol, .get_wol = gpy_get_wol, .set_loopback = gpy_loopback, + .update_stats = gpy_update_stats, + .get_phy_stats = gpy_get_phy_stats, }, { PHY_ID_MATCH_MODEL(PHY_ID_MXL86211C), @@ -1368,6 +1424,8 @@ static struct phy_driver gpy_drivers[] = { .led_hw_control_get = gpy_led_hw_control_get, .led_hw_control_set = gpy_led_hw_control_set, .led_polarity_set = gpy_led_polarity_set, + .update_stats = gpy_update_stats, + .get_phy_stats = gpy_get_phy_stats, }, { PHY_ID_MATCH_MODEL(PHY_ID_MXL86252), @@ -1385,6 +1443,8 @@ static struct phy_driver gpy_drivers[] = { .set_wol = gpy_set_wol, .get_wol = gpy_get_wol, .set_loopback = gpy_loopback, + .update_stats = gpy_update_stats, + .get_phy_stats = gpy_get_phy_stats, .led_brightness_set = gpy_led_brightness_set, .led_hw_is_supported = gpy_led_hw_is_supported, .led_hw_control_get = gpy_led_hw_control_get, @@ -1407,6 +1467,8 @@ static struct phy_driver gpy_drivers[] = { .set_wol = gpy_set_wol, .get_wol = gpy_get_wol, .set_loopback = gpy_loopback, + .update_stats = gpy_update_stats, + .get_phy_stats = gpy_get_phy_stats, .led_brightness_set = gpy_led_brightness_set, .led_hw_is_supported = gpy_led_hw_is_supported, .led_hw_control_get = gpy_led_hw_control_get, From e3f741f587a9826a82304bb5da021e0fe783795d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 16 Mar 2026 13:31:25 +0000 Subject: [PATCH 0598/1561] fou: Remove IPPROTO_UDPLITE check in gue_err() and gue6_err(). UDP-Lite has been removed, and its error handler is no longer found in either inet_protos[IPPROTO_UDPLITE] or inet6_protos[IPPROTO_UDPLITE]. The recursion fixed by the protocol check in gue_err() and gue6_err() no longer occurs with UDP-Lite. Let's remove the checks. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Joe Damato Link: https://patch.msgid.link/20260316133127.2646421-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/fou_core.c | 3 +-- net/ipv6/fou6.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c index 3baaa4df7e42..5bae3cf7fe76 100644 --- a/net/ipv4/fou_core.c +++ b/net/ipv4/fou_core.c @@ -1150,8 +1150,7 @@ static int gue_err(struct sk_buff *skb, u32 info) * recursion. Besides, this kind of encapsulation can't even be * configured currently. Discard this. */ - if (guehdr->proto_ctype == IPPROTO_UDP || - guehdr->proto_ctype == IPPROTO_UDPLITE) + if (guehdr->proto_ctype == IPPROTO_UDP) return -EOPNOTSUPP; skb_set_transport_header(skb, -(int)sizeof(struct icmphdr)); diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c index 430518ae26fa..157765259e2f 100644 --- a/net/ipv6/fou6.c +++ b/net/ipv6/fou6.c @@ -141,8 +141,7 @@ static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, * recursion. Besides, this kind of encapsulation can't even be * configured currently. Discard this. */ - if (guehdr->proto_ctype == IPPROTO_UDP || - guehdr->proto_ctype == IPPROTO_UDPLITE) + if (guehdr->proto_ctype == IPPROTO_UDP) return -EOPNOTSUPP; skb_set_transport_header(skb, -(int)sizeof(struct icmp6hdr)); From bb8539e0e60916ef3ed4a92eb2f3cfd8e34061ef Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Mon, 16 Mar 2026 17:28:23 +0800 Subject: [PATCH 0599/1561] ppp: require callers of ppp_dev_name() to hold RCU ppp_dev_name() holds the RCU read lock internally to protect pch->ppp. However, as it returns netdev->name to the caller, the caller should also hold either RCU or RTNL lock to prevent the netdev from being freed. The only two references of the function is in the L2TP driver, both of which already hold RCU. So remove the internal RCU lock and document that callers must hold RCU. Signed-off-by: Qingfang Deng Reviewed-by: Breno Leitao Link: https://patch.msgid.link/20260316092824.479149-1-dqfext@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ppp/ppp_generic.c | 3 +-- include/linux/ppp_channel.h | 4 +++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index a036ddfe327b..cb29a6968c63 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -2969,6 +2969,7 @@ int ppp_unit_number(struct ppp_channel *chan) /* * Return the PPP device interface name of a channel. + * Caller must hold RCU read lock. */ char *ppp_dev_name(struct ppp_channel *chan) { @@ -2977,11 +2978,9 @@ char *ppp_dev_name(struct ppp_channel *chan) struct ppp *ppp; if (pch) { - rcu_read_lock(); ppp = rcu_dereference(pch->ppp); if (ppp && ppp->dev) name = ppp->dev->name; - rcu_read_unlock(); } return name; } diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h index ca8ad03eeef0..2f63e9a6cc88 100644 --- a/include/linux/ppp_channel.h +++ b/include/linux/ppp_channel.h @@ -72,7 +72,9 @@ extern int ppp_channel_index(struct ppp_channel *); /* Get the unit number associated with a channel, or -1 if none */ extern int ppp_unit_number(struct ppp_channel *); -/* Get the device name associated with a channel, or NULL if none */ +/* Get the device name associated with a channel, or NULL if none. + * Caller must hold RCU read lock. + */ extern char *ppp_dev_name(struct ppp_channel *); /* From f327f5a8115e80d954598cf2d5c461873042b7f6 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Sun, 15 Mar 2026 18:42:19 +0100 Subject: [PATCH 0600/1561] dpll: zl3073x: use struct_group to partition states Organize the zl3073x_out, zl3073x_ref, and zl3073x_synth structures using struct_group() to partition fields into semantic groups: * cfg: mutable configuration written to HW via state_set * inv: invariant fields set once during state_fetch * stat: read-only status This enables group-level operations in place of field-by-field copies: * state_set validates invariants haven't changed (WARN_ON + -EINVAL) * state_set short-circuits when cfg is unchanged * state_set copy entire groups in a single assignment instead of enumerating each field Add kernel doc for zl3073x_out_state_set and zl3073x_ref_state_set documenting the new invariant validation and short-circuit semantics. Remove forward declaration of zl3073x_synth_state_set(). Signed-off-by: Ivan Vecera Link: https://patch.msgid.link/20260315174224.399074-2-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/out.c | 27 +++++++++++++++++++------ drivers/dpll/zl3073x/out.h | 21 ++++++++++++-------- drivers/dpll/zl3073x/ref.c | 38 ++++++++++++++++++++++-------------- drivers/dpll/zl3073x/ref.h | 31 +++++++++++++++++------------ drivers/dpll/zl3073x/synth.h | 16 +++++++-------- 5 files changed, 84 insertions(+), 49 deletions(-) diff --git a/drivers/dpll/zl3073x/out.c b/drivers/dpll/zl3073x/out.c index 86829a0c1c02..eb5628aebcee 100644 --- a/drivers/dpll/zl3073x/out.c +++ b/drivers/dpll/zl3073x/out.c @@ -106,12 +106,32 @@ const struct zl3073x_out *zl3073x_out_state_get(struct zl3073x_dev *zldev, return &zldev->out[index]; } +/** + * zl3073x_out_state_set - commit output state changes to hardware + * @zldev: pointer to zl3073x_dev structure + * @index: output index to set state for + * @out: desired output state + * + * Validates that invariant fields have not been modified, skips the HW + * write if the mutable configuration is unchanged, and otherwise writes + * only the changed cfg fields to hardware via the mailbox interface. + * + * Return: 0 on success, -EINVAL if invariants changed, <0 on HW error + */ int zl3073x_out_state_set(struct zl3073x_dev *zldev, u8 index, const struct zl3073x_out *out) { struct zl3073x_out *dout = &zldev->out[index]; int rc; + /* Reject attempts to change invariant fields (set at fetch only) */ + if (WARN_ON(memcmp(&dout->inv, &out->inv, sizeof(out->inv)))) + return -EINVAL; + + /* Skip HW write if configuration hasn't changed */ + if (!memcmp(&dout->cfg, &out->cfg, sizeof(out->cfg))) + return 0; + guard(mutex)(&zldev->multiop_lock); /* Read output configuration into mailbox */ @@ -146,12 +166,7 @@ int zl3073x_out_state_set(struct zl3073x_dev *zldev, u8 index, return rc; /* After successful commit store new state */ - dout->div = out->div; - dout->width = out->width; - dout->esync_n_period = out->esync_n_period; - dout->esync_n_width = out->esync_n_width; - dout->mode = out->mode; - dout->phase_comp = out->phase_comp; + dout->cfg = out->cfg; return 0; } diff --git a/drivers/dpll/zl3073x/out.h b/drivers/dpll/zl3073x/out.h index 318f9bb8da3a..edf40432bba5 100644 --- a/drivers/dpll/zl3073x/out.h +++ b/drivers/dpll/zl3073x/out.h @@ -4,6 +4,7 @@ #define _ZL3073X_OUT_H #include +#include #include #include "regs.h" @@ -17,17 +18,21 @@ struct zl3073x_dev; * @esync_n_period: embedded sync or n-pin period (for n-div formats) * @esync_n_width: embedded sync or n-pin pulse width * @phase_comp: phase compensation - * @ctrl: output control * @mode: output mode + * @ctrl: output control */ struct zl3073x_out { - u32 div; - u32 width; - u32 esync_n_period; - u32 esync_n_width; - s32 phase_comp; - u8 ctrl; - u8 mode; + struct_group(cfg, /* Config */ + u32 div; + u32 width; + u32 esync_n_period; + u32 esync_n_width; + s32 phase_comp; + u8 mode; + ); + struct_group(inv, /* Invariants */ + u8 ctrl; + ); }; int zl3073x_out_state_fetch(struct zl3073x_dev *zldev, u8 index); diff --git a/drivers/dpll/zl3073x/ref.c b/drivers/dpll/zl3073x/ref.c index 6b65e6103999..8b4c4807bcc4 100644 --- a/drivers/dpll/zl3073x/ref.c +++ b/drivers/dpll/zl3073x/ref.c @@ -73,14 +73,8 @@ int zl3073x_ref_state_fetch(struct zl3073x_dev *zldev, u8 index) struct zl3073x_ref *p_ref = ref - 1; /* P-pin counterpart*/ /* Copy the shared items from the P-pin */ - ref->config = p_ref->config; - ref->esync_n_div = p_ref->esync_n_div; - ref->freq_base = p_ref->freq_base; - ref->freq_mult = p_ref->freq_mult; - ref->freq_ratio_m = p_ref->freq_ratio_m; - ref->freq_ratio_n = p_ref->freq_ratio_n; - ref->phase_comp = p_ref->phase_comp; - ref->sync_ctrl = p_ref->sync_ctrl; + ref->cfg = p_ref->cfg; + ref->inv = p_ref->inv; return 0; /* Finish - no non-shared items for now */ } @@ -154,12 +148,32 @@ zl3073x_ref_state_get(struct zl3073x_dev *zldev, u8 index) return &zldev->ref[index]; } +/** + * zl3073x_ref_state_set - commit input reference state changes to hardware + * @zldev: pointer to zl3073x_dev structure + * @index: input reference index to set state for + * @ref: desired reference state + * + * Validates that invariant fields have not been modified, skips the HW + * write if the mutable configuration is unchanged, and otherwise writes + * only the changed cfg fields to hardware via the mailbox interface. + * + * Return: 0 on success, -EINVAL if invariants changed, <0 on HW error + */ int zl3073x_ref_state_set(struct zl3073x_dev *zldev, u8 index, const struct zl3073x_ref *ref) { struct zl3073x_ref *dref = &zldev->ref[index]; int rc; + /* Reject attempts to change invariant fields (set at init only) */ + if (WARN_ON(memcmp(&dref->inv, &ref->inv, sizeof(ref->inv)))) + return -EINVAL; + + /* Skip HW write if configuration hasn't changed */ + if (!memcmp(&dref->cfg, &ref->cfg, sizeof(ref->cfg))) + return 0; + guard(mutex)(&zldev->multiop_lock); /* Read reference configuration into mailbox */ @@ -207,13 +221,7 @@ int zl3073x_ref_state_set(struct zl3073x_dev *zldev, u8 index, return rc; /* After successful commit store new state */ - dref->freq_base = ref->freq_base; - dref->freq_mult = ref->freq_mult; - dref->freq_ratio_m = ref->freq_ratio_m; - dref->freq_ratio_n = ref->freq_ratio_n; - dref->esync_n_div = ref->esync_n_div; - dref->sync_ctrl = ref->sync_ctrl; - dref->phase_comp = ref->phase_comp; + dref->cfg = ref->cfg; return 0; } diff --git a/drivers/dpll/zl3073x/ref.h b/drivers/dpll/zl3073x/ref.h index 0d8618f5ce8d..ab3a816c2910 100644 --- a/drivers/dpll/zl3073x/ref.h +++ b/drivers/dpll/zl3073x/ref.h @@ -5,6 +5,7 @@ #include #include +#include #include #include "regs.h" @@ -13,28 +14,34 @@ struct zl3073x_dev; /** * struct zl3073x_ref - input reference state - * @ffo: current fractional frequency offset * @phase_comp: phase compensation * @esync_n_div: divisor for embedded sync or n-divided signal formats * @freq_base: frequency base * @freq_mult: frequnecy multiplier * @freq_ratio_m: FEC mode multiplier * @freq_ratio_n: FEC mode divisor - * @config: reference config * @sync_ctrl: reference sync control + * @config: reference config + * @ffo: current fractional frequency offset * @mon_status: reference monitor status */ struct zl3073x_ref { - s64 ffo; - u64 phase_comp; - u32 esync_n_div; - u16 freq_base; - u16 freq_mult; - u16 freq_ratio_m; - u16 freq_ratio_n; - u8 config; - u8 sync_ctrl; - u8 mon_status; + struct_group(cfg, /* Configuration */ + u64 phase_comp; + u32 esync_n_div; + u16 freq_base; + u16 freq_mult; + u16 freq_ratio_m; + u16 freq_ratio_n; + u8 sync_ctrl; + ); + struct_group(inv, /* Invariants */ + u8 config; + ); + struct_group(stat, /* Status */ + s64 ffo; + u8 mon_status; + ); }; int zl3073x_ref_state_fetch(struct zl3073x_dev *zldev, u8 index); diff --git a/drivers/dpll/zl3073x/synth.h b/drivers/dpll/zl3073x/synth.h index 6c55eb8a888c..89e13ea2e6d9 100644 --- a/drivers/dpll/zl3073x/synth.h +++ b/drivers/dpll/zl3073x/synth.h @@ -5,6 +5,7 @@ #include #include +#include #include #include "regs.h" @@ -20,11 +21,13 @@ struct zl3073x_dev; * @ctrl: synth control */ struct zl3073x_synth { - u32 freq_mult; - u16 freq_base; - u16 freq_m; - u16 freq_n; - u8 ctrl; + struct_group(inv, /* Invariants */ + u32 freq_mult; + u16 freq_base; + u16 freq_m; + u16 freq_n; + u8 ctrl; + ); }; int zl3073x_synth_state_fetch(struct zl3073x_dev *zldev, u8 synth_id); @@ -32,9 +35,6 @@ int zl3073x_synth_state_fetch(struct zl3073x_dev *zldev, u8 synth_id); const struct zl3073x_synth *zl3073x_synth_state_get(struct zl3073x_dev *zldev, u8 synth_id); -int zl3073x_synth_state_set(struct zl3073x_dev *zldev, u8 synth_id, - const struct zl3073x_synth *synth); - /** * zl3073x_synth_dpll_get - get DPLL ID the synth is driven by * @synth: pointer to synth state From 05ea2ab3b10075e8fcfcd5e283e486df7055de5e Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Sun, 15 Mar 2026 18:42:20 +0100 Subject: [PATCH 0601/1561] dpll: zl3073x: add zl3073x_ref_state_update helper Extract the per-reference monitor status HW read into a dedicated zl3073x_ref_state_update() helper in the ref module. Rename zl3073x_dev_ref_status_update() to zl3073x_dev_ref_states_update() and use the new helper in it. Call it from zl3073x_ref_state_fetch() as well so that mon_status is initialized at device startup. This keeps direct register access and struct field writes behind the ref module's interface, consistent with the state management pattern used for other ref operations. Signed-off-by: Ivan Vecera Link: https://patch.msgid.link/20260315174224.399074-3-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/core.c | 9 ++++----- drivers/dpll/zl3073x/ref.c | 20 ++++++++++++++++++++ drivers/dpll/zl3073x/ref.h | 2 ++ 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/dpll/zl3073x/core.c b/drivers/dpll/zl3073x/core.c index 10e036ccf08f..07626082aae3 100644 --- a/drivers/dpll/zl3073x/core.c +++ b/drivers/dpll/zl3073x/core.c @@ -543,13 +543,12 @@ zl3073x_dev_state_fetch(struct zl3073x_dev *zldev) } static void -zl3073x_dev_ref_status_update(struct zl3073x_dev *zldev) +zl3073x_dev_ref_states_update(struct zl3073x_dev *zldev) { int i, rc; for (i = 0; i < ZL3073X_NUM_REFS; i++) { - rc = zl3073x_read_u8(zldev, ZL_REG_REF_MON_STATUS(i), - &zldev->ref[i].mon_status); + rc = zl3073x_ref_state_update(zldev, i); if (rc) dev_warn(zldev->dev, "Failed to get REF%u status: %pe\n", i, @@ -679,8 +678,8 @@ zl3073x_dev_periodic_work(struct kthread_work *work) struct zl3073x_dpll *zldpll; int rc; - /* Update input references status */ - zl3073x_dev_ref_status_update(zldev); + /* Update input references' states */ + zl3073x_dev_ref_states_update(zldev); /* Update DPLL-to-connected-ref phase offsets registers */ rc = zl3073x_ref_phase_offsets_update(zldev, -1); diff --git a/drivers/dpll/zl3073x/ref.c b/drivers/dpll/zl3073x/ref.c index 8b4c4807bcc4..825ac30bcd6f 100644 --- a/drivers/dpll/zl3073x/ref.c +++ b/drivers/dpll/zl3073x/ref.c @@ -51,6 +51,21 @@ zl3073x_ref_freq_factorize(u32 freq, u16 *base, u16 *mult) return -EINVAL; } +/** + * zl3073x_ref_state_update - update input reference status from HW + * @zldev: pointer to zl3073x_dev structure + * @index: input reference index + * + * Return: 0 on success, <0 on error + */ +int zl3073x_ref_state_update(struct zl3073x_dev *zldev, u8 index) +{ + struct zl3073x_ref *ref = &zldev->ref[index]; + + return zl3073x_read_u8(zldev, ZL_REG_REF_MON_STATUS(index), + &ref->mon_status); +} + /** * zl3073x_ref_state_fetch - fetch input reference state from hardware * @zldev: pointer to zl3073x_dev structure @@ -79,6 +94,11 @@ int zl3073x_ref_state_fetch(struct zl3073x_dev *zldev, u8 index) return 0; /* Finish - no non-shared items for now */ } + /* Read reference status */ + rc = zl3073x_ref_state_update(zldev, index); + if (rc) + return rc; + guard(mutex)(&zldev->multiop_lock); /* Read reference configuration */ diff --git a/drivers/dpll/zl3073x/ref.h b/drivers/dpll/zl3073x/ref.h index ab3a816c2910..06d8d4d97ea2 100644 --- a/drivers/dpll/zl3073x/ref.h +++ b/drivers/dpll/zl3073x/ref.h @@ -52,6 +52,8 @@ const struct zl3073x_ref *zl3073x_ref_state_get(struct zl3073x_dev *zldev, int zl3073x_ref_state_set(struct zl3073x_dev *zldev, u8 index, const struct zl3073x_ref *ref); +int zl3073x_ref_state_update(struct zl3073x_dev *zldev, u8 index); + int zl3073x_ref_freq_factorize(u32 freq, u16 *base, u16 *mult); /** From 3032e95987fa0da656ce3a5eb454674e7cc60a12 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Sun, 15 Mar 2026 18:42:21 +0100 Subject: [PATCH 0602/1561] dpll: zl3073x: introduce zl3073x_chan for DPLL channel state Extract DPLL channel state management into a dedicated zl3073x_chan module, following the pattern already established by zl3073x_ref, zl3073x_out and zl3073x_synth. The new struct zl3073x_chan caches the raw mode_refsel register value in a cfg group with inline getters and setters to extract and update the bitfields. Three standard state management functions are provided: - zl3073x_chan_state_fetch: read the mode_refsel register from HW - zl3073x_chan_state_get: return cached channel state - zl3073x_chan_state_set: write changed state to HW, skip if unchanged The channel state array chan[ZL3073X_MAX_CHANNELS] is added to struct zl3073x_dev. Channel state is fetched as part of zl3073x_dev_state_fetch, using the chip-specific channel count. The refsel_mode and forced_ref fields are removed from struct zl3073x_dpll and all direct register accesses in dpll.c are replaced with the new chan state operations. Signed-off-by: Ivan Vecera Link: https://patch.msgid.link/20260315174224.399074-4-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/Makefile | 4 +- drivers/dpll/zl3073x/chan.c | 79 ++++++++++++++++++++++++++ drivers/dpll/zl3073x/chan.h | 74 ++++++++++++++++++++++++ drivers/dpll/zl3073x/core.c | 10 ++++ drivers/dpll/zl3073x/core.h | 3 + drivers/dpll/zl3073x/dpll.c | 104 ++++++++++++++++------------------ drivers/dpll/zl3073x/dpll.h | 4 -- 7 files changed, 217 insertions(+), 61 deletions(-) create mode 100644 drivers/dpll/zl3073x/chan.c create mode 100644 drivers/dpll/zl3073x/chan.h diff --git a/drivers/dpll/zl3073x/Makefile b/drivers/dpll/zl3073x/Makefile index bd324c7fe710..906ec3fbcc20 100644 --- a/drivers/dpll/zl3073x/Makefile +++ b/drivers/dpll/zl3073x/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_ZL3073X) += zl3073x.o -zl3073x-objs := core.o devlink.o dpll.o flash.o fw.o \ - out.o prop.o ref.o synth.o +zl3073x-objs := chan.o core.o devlink.o dpll.o \ + flash.o fw.o out.o prop.o ref.o synth.o obj-$(CONFIG_ZL3073X_I2C) += zl3073x_i2c.o zl3073x_i2c-objs := i2c.o diff --git a/drivers/dpll/zl3073x/chan.c b/drivers/dpll/zl3073x/chan.c new file mode 100644 index 000000000000..8019b8ce7351 --- /dev/null +++ b/drivers/dpll/zl3073x/chan.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include + +#include "chan.h" +#include "core.h" + +/** + * zl3073x_chan_state_fetch - fetch DPLL channel state from hardware + * @zldev: pointer to zl3073x_dev structure + * @index: DPLL channel index to fetch state for + * + * Reads the mode_refsel register for the given DPLL channel and stores + * the raw value for later use. + * + * Return: 0 on success, <0 on error + */ +int zl3073x_chan_state_fetch(struct zl3073x_dev *zldev, u8 index) +{ + struct zl3073x_chan *chan = &zldev->chan[index]; + int rc; + + rc = zl3073x_read_u8(zldev, ZL_REG_DPLL_MODE_REFSEL(index), + &chan->mode_refsel); + if (rc) + return rc; + + dev_dbg(zldev->dev, "DPLL%u mode: %u, ref: %u\n", index, + zl3073x_chan_mode_get(chan), zl3073x_chan_ref_get(chan)); + + return 0; +} + +/** + * zl3073x_chan_state_get - get current DPLL channel state + * @zldev: pointer to zl3073x_dev structure + * @index: DPLL channel index to get state for + * + * Return: pointer to given DPLL channel state + */ +const struct zl3073x_chan *zl3073x_chan_state_get(struct zl3073x_dev *zldev, + u8 index) +{ + return &zldev->chan[index]; +} + +/** + * zl3073x_chan_state_set - commit DPLL channel state changes to hardware + * @zldev: pointer to zl3073x_dev structure + * @index: DPLL channel index to set state for + * @chan: desired channel state + * + * Skips the HW write if the configuration is unchanged, and otherwise + * writes the mode_refsel register to hardware. + * + * Return: 0 on success, <0 on HW error + */ +int zl3073x_chan_state_set(struct zl3073x_dev *zldev, u8 index, + const struct zl3073x_chan *chan) +{ + struct zl3073x_chan *dchan = &zldev->chan[index]; + int rc; + + /* Skip HW write if configuration hasn't changed */ + if (!memcmp(&dchan->cfg, &chan->cfg, sizeof(chan->cfg))) + return 0; + + rc = zl3073x_write_u8(zldev, ZL_REG_DPLL_MODE_REFSEL(index), + chan->mode_refsel); + if (rc) + return rc; + + /* After successful write store new state */ + dchan->cfg = chan->cfg; + + return 0; +} diff --git a/drivers/dpll/zl3073x/chan.h b/drivers/dpll/zl3073x/chan.h new file mode 100644 index 000000000000..3e6ffaef0c74 --- /dev/null +++ b/drivers/dpll/zl3073x/chan.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ZL3073X_CHAN_H +#define _ZL3073X_CHAN_H + +#include +#include +#include + +#include "regs.h" + +struct zl3073x_dev; + +/** + * struct zl3073x_chan - DPLL channel state + * @mode_refsel: mode and reference selection register value + */ +struct zl3073x_chan { + struct_group(cfg, + u8 mode_refsel; + ); +}; + +int zl3073x_chan_state_fetch(struct zl3073x_dev *zldev, u8 index); +const struct zl3073x_chan *zl3073x_chan_state_get(struct zl3073x_dev *zldev, + u8 index); +int zl3073x_chan_state_set(struct zl3073x_dev *zldev, u8 index, + const struct zl3073x_chan *chan); + +/** + * zl3073x_chan_mode_get - get DPLL channel operating mode + * @chan: pointer to channel state + * + * Return: reference selection mode of the given DPLL channel + */ +static inline u8 zl3073x_chan_mode_get(const struct zl3073x_chan *chan) +{ + return FIELD_GET(ZL_DPLL_MODE_REFSEL_MODE, chan->mode_refsel); +} + +/** + * zl3073x_chan_ref_get - get manually selected reference + * @chan: pointer to channel state + * + * Return: reference selected in forced reference lock mode + */ +static inline u8 zl3073x_chan_ref_get(const struct zl3073x_chan *chan) +{ + return FIELD_GET(ZL_DPLL_MODE_REFSEL_REF, chan->mode_refsel); +} + +/** + * zl3073x_chan_mode_set - set DPLL channel operating mode + * @chan: pointer to channel state + * @mode: mode to set + */ +static inline void zl3073x_chan_mode_set(struct zl3073x_chan *chan, u8 mode) +{ + chan->mode_refsel &= ~ZL_DPLL_MODE_REFSEL_MODE; + chan->mode_refsel |= FIELD_PREP(ZL_DPLL_MODE_REFSEL_MODE, mode); +} + +/** + * zl3073x_chan_ref_set - set manually selected reference + * @chan: pointer to channel state + * @ref: reference to set + */ +static inline void zl3073x_chan_ref_set(struct zl3073x_chan *chan, u8 ref) +{ + chan->mode_refsel &= ~ZL_DPLL_MODE_REFSEL_REF; + chan->mode_refsel |= FIELD_PREP(ZL_DPLL_MODE_REFSEL_REF, ref); +} + +#endif /* _ZL3073X_CHAN_H */ diff --git a/drivers/dpll/zl3073x/core.c b/drivers/dpll/zl3073x/core.c index 07626082aae3..b03e59fa0834 100644 --- a/drivers/dpll/zl3073x/core.c +++ b/drivers/dpll/zl3073x/core.c @@ -539,6 +539,16 @@ zl3073x_dev_state_fetch(struct zl3073x_dev *zldev) } } + for (i = 0; i < zldev->info->num_channels; i++) { + rc = zl3073x_chan_state_fetch(zldev, i); + if (rc) { + dev_err(zldev->dev, + "Failed to fetch channel state: %pe\n", + ERR_PTR(rc)); + return rc; + } + } + return rc; } diff --git a/drivers/dpll/zl3073x/core.h b/drivers/dpll/zl3073x/core.h index b6f22ee1c0bd..2cfb9dd74aa5 100644 --- a/drivers/dpll/zl3073x/core.h +++ b/drivers/dpll/zl3073x/core.h @@ -9,6 +9,7 @@ #include #include +#include "chan.h" #include "out.h" #include "ref.h" #include "regs.h" @@ -61,6 +62,7 @@ struct zl3073x_chip_info { * @ref: array of input references' invariants * @out: array of outs' invariants * @synth: array of synths' invariants + * @chan: array of DPLL channels' state * @dplls: list of DPLLs * @kworker: thread for periodic work * @work: periodic work @@ -77,6 +79,7 @@ struct zl3073x_dev { struct zl3073x_ref ref[ZL3073X_NUM_REFS]; struct zl3073x_out out[ZL3073X_NUM_OUTS]; struct zl3073x_synth synth[ZL3073X_NUM_SYNTHS]; + struct zl3073x_chan chan[ZL3073X_MAX_CHANNELS]; /* DPLL channels */ struct list_head dplls; diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c index c201c974a7f9..f56f073e57df 100644 --- a/drivers/dpll/zl3073x/dpll.c +++ b/drivers/dpll/zl3073x/dpll.c @@ -259,10 +259,13 @@ static int zl3073x_dpll_selected_ref_get(struct zl3073x_dpll *zldpll, u8 *ref) { struct zl3073x_dev *zldev = zldpll->dev; + const struct zl3073x_chan *chan; u8 state, value; int rc; - switch (zldpll->refsel_mode) { + chan = zl3073x_chan_state_get(zldev, zldpll->id); + + switch (zl3073x_chan_mode_get(chan)) { case ZL_DPLL_MODE_REFSEL_MODE_AUTO: /* For automatic mode read refsel_status register */ rc = zl3073x_read_u8(zldev, @@ -282,7 +285,7 @@ zl3073x_dpll_selected_ref_get(struct zl3073x_dpll *zldpll, u8 *ref) break; case ZL_DPLL_MODE_REFSEL_MODE_REFLOCK: /* For manual mode return stored value */ - *ref = zldpll->forced_ref; + *ref = zl3073x_chan_ref_get(chan); break; default: /* For other modes like NCO, freerun... there is no input ref */ @@ -307,10 +310,11 @@ static int zl3073x_dpll_selected_ref_set(struct zl3073x_dpll *zldpll, u8 ref) { struct zl3073x_dev *zldev = zldpll->dev; - u8 mode, mode_refsel; - int rc; + struct zl3073x_chan chan; + u8 mode; - mode = zldpll->refsel_mode; + chan = *zl3073x_chan_state_get(zldev, zldpll->id); + mode = zl3073x_chan_mode_get(&chan); switch (mode) { case ZL_DPLL_MODE_REFSEL_MODE_REFLOCK: @@ -328,8 +332,8 @@ zl3073x_dpll_selected_ref_set(struct zl3073x_dpll *zldpll, u8 ref) break; } /* Keep selected reference */ - ref = zldpll->forced_ref; - } else if (ref == zldpll->forced_ref) { + ref = zl3073x_chan_ref_get(&chan); + } else if (ref == zl3073x_chan_ref_get(&chan)) { /* No register update - same mode and same ref */ return 0; } @@ -351,21 +355,10 @@ zl3073x_dpll_selected_ref_set(struct zl3073x_dpll *zldpll, u8 ref) return -EOPNOTSUPP; } - /* Build mode_refsel value */ - mode_refsel = FIELD_PREP(ZL_DPLL_MODE_REFSEL_MODE, mode) | - FIELD_PREP(ZL_DPLL_MODE_REFSEL_REF, ref); + zl3073x_chan_mode_set(&chan, mode); + zl3073x_chan_ref_set(&chan, ref); - /* Update dpll_mode_refsel register */ - rc = zl3073x_write_u8(zldev, ZL_REG_DPLL_MODE_REFSEL(zldpll->id), - mode_refsel); - if (rc) - return rc; - - /* Store new mode and forced reference */ - zldpll->refsel_mode = mode; - zldpll->forced_ref = ref; - - return rc; + return zl3073x_chan_state_set(zldev, zldpll->id, &chan); } /** @@ -624,9 +617,11 @@ zl3073x_dpll_ref_state_get(struct zl3073x_dpll_pin *pin, { struct zl3073x_dpll *zldpll = pin->dpll; struct zl3073x_dev *zldev = zldpll->dev; + const struct zl3073x_chan *chan; u8 ref, ref_conn; int rc; + chan = zl3073x_chan_state_get(zldev, zldpll->id); ref = zl3073x_input_pin_ref_get(pin->id); /* Get currently connected reference */ @@ -643,7 +638,7 @@ zl3073x_dpll_ref_state_get(struct zl3073x_dpll_pin *pin, * selectable and its monitor does not report any error then report * pin as selectable. */ - if (zldpll->refsel_mode == ZL_DPLL_MODE_REFSEL_MODE_AUTO && + if (zl3073x_chan_mode_get(chan) == ZL_DPLL_MODE_REFSEL_MODE_AUTO && zl3073x_dev_ref_is_status_ok(zldev, ref) && pin->selectable) { *state = DPLL_PIN_STATE_SELECTABLE; return 0; @@ -678,10 +673,13 @@ zl3073x_dpll_input_pin_state_on_dpll_set(const struct dpll_pin *dpll_pin, { struct zl3073x_dpll *zldpll = dpll_priv; struct zl3073x_dpll_pin *pin = pin_priv; + const struct zl3073x_chan *chan; u8 new_ref; int rc; - switch (zldpll->refsel_mode) { + chan = zl3073x_chan_state_get(zldpll->dev, zldpll->id); + + switch (zl3073x_chan_mode_get(chan)) { case ZL_DPLL_MODE_REFSEL_MODE_REFLOCK: case ZL_DPLL_MODE_REFSEL_MODE_FREERUN: case ZL_DPLL_MODE_REFSEL_MODE_HOLDOVER: @@ -1092,10 +1090,13 @@ zl3073x_dpll_lock_status_get(const struct dpll_device *dpll, void *dpll_priv, { struct zl3073x_dpll *zldpll = dpll_priv; struct zl3073x_dev *zldev = zldpll->dev; + const struct zl3073x_chan *chan; u8 mon_status, state; int rc; - switch (zldpll->refsel_mode) { + chan = zl3073x_chan_state_get(zldev, zldpll->id); + + switch (zl3073x_chan_mode_get(chan)) { case ZL_DPLL_MODE_REFSEL_MODE_FREERUN: case ZL_DPLL_MODE_REFSEL_MODE_NCO: /* In FREERUN and NCO modes the DPLL is always unlocked */ @@ -1140,13 +1141,16 @@ zl3073x_dpll_supported_modes_get(const struct dpll_device *dpll, struct netlink_ext_ack *extack) { struct zl3073x_dpll *zldpll = dpll_priv; + const struct zl3073x_chan *chan; + + chan = zl3073x_chan_state_get(zldpll->dev, zldpll->id); /* We support switching between automatic and manual mode, except in * a case where the DPLL channel is configured to run in NCO mode. * In this case, report only the manual mode to which the NCO is mapped * as the only supported one. */ - if (zldpll->refsel_mode != ZL_DPLL_MODE_REFSEL_MODE_NCO) + if (zl3073x_chan_mode_get(chan) != ZL_DPLL_MODE_REFSEL_MODE_NCO) __set_bit(DPLL_MODE_AUTOMATIC, modes); __set_bit(DPLL_MODE_MANUAL, modes); @@ -1159,8 +1163,11 @@ zl3073x_dpll_mode_get(const struct dpll_device *dpll, void *dpll_priv, enum dpll_mode *mode, struct netlink_ext_ack *extack) { struct zl3073x_dpll *zldpll = dpll_priv; + const struct zl3073x_chan *chan; - switch (zldpll->refsel_mode) { + chan = zl3073x_chan_state_get(zldpll->dev, zldpll->id); + + switch (zl3073x_chan_mode_get(chan)) { case ZL_DPLL_MODE_REFSEL_MODE_FREERUN: case ZL_DPLL_MODE_REFSEL_MODE_HOLDOVER: case ZL_DPLL_MODE_REFSEL_MODE_NCO: @@ -1239,7 +1246,8 @@ zl3073x_dpll_mode_set(const struct dpll_device *dpll, void *dpll_priv, enum dpll_mode mode, struct netlink_ext_ack *extack) { struct zl3073x_dpll *zldpll = dpll_priv; - u8 hw_mode, mode_refsel, ref; + struct zl3073x_chan chan; + u8 hw_mode, ref; int rc; rc = zl3073x_dpll_selected_ref_get(zldpll, &ref); @@ -1287,26 +1295,18 @@ zl3073x_dpll_mode_set(const struct dpll_device *dpll, void *dpll_priv, hw_mode = ZL_DPLL_MODE_REFSEL_MODE_AUTO; } - /* Build mode_refsel value */ - mode_refsel = FIELD_PREP(ZL_DPLL_MODE_REFSEL_MODE, hw_mode); - + chan = *zl3073x_chan_state_get(zldpll->dev, zldpll->id); + zl3073x_chan_mode_set(&chan, hw_mode); if (ZL3073X_DPLL_REF_IS_VALID(ref)) - mode_refsel |= FIELD_PREP(ZL_DPLL_MODE_REFSEL_REF, ref); + zl3073x_chan_ref_set(&chan, ref); - /* Update dpll_mode_refsel register */ - rc = zl3073x_write_u8(zldpll->dev, ZL_REG_DPLL_MODE_REFSEL(zldpll->id), - mode_refsel); + rc = zl3073x_chan_state_set(zldpll->dev, zldpll->id, &chan); if (rc) { NL_SET_ERR_MSG_MOD(extack, "failed to set reference selection mode"); return rc; } - zldpll->refsel_mode = hw_mode; - - if (ZL3073X_DPLL_REF_IS_VALID(ref)) - zldpll->forced_ref = ref; - return 0; } @@ -1559,15 +1559,18 @@ zl3073x_dpll_pin_is_registrable(struct zl3073x_dpll *zldpll, enum dpll_pin_direction dir, u8 index) { struct zl3073x_dev *zldev = zldpll->dev; + const struct zl3073x_chan *chan; bool is_diff, is_enabled; const char *name; + chan = zl3073x_chan_state_get(zldev, zldpll->id); + if (dir == DPLL_PIN_DIRECTION_INPUT) { u8 ref_id = zl3073x_input_pin_ref_get(index); const struct zl3073x_ref *ref; /* Skip the pin if the DPLL is running in NCO mode */ - if (zldpll->refsel_mode == ZL_DPLL_MODE_REFSEL_MODE_NCO) + if (zl3073x_chan_mode_get(chan) == ZL_DPLL_MODE_REFSEL_MODE_NCO) return false; name = "REF"; @@ -1675,21 +1678,8 @@ static int zl3073x_dpll_device_register(struct zl3073x_dpll *zldpll) { struct zl3073x_dev *zldev = zldpll->dev; - u8 dpll_mode_refsel; int rc; - /* Read DPLL mode and forcibly selected reference */ - rc = zl3073x_read_u8(zldev, ZL_REG_DPLL_MODE_REFSEL(zldpll->id), - &dpll_mode_refsel); - if (rc) - return rc; - - /* Extract mode and selected input reference */ - zldpll->refsel_mode = FIELD_GET(ZL_DPLL_MODE_REFSEL_MODE, - dpll_mode_refsel); - zldpll->forced_ref = FIELD_GET(ZL_DPLL_MODE_REFSEL_REF, - dpll_mode_refsel); - zldpll->ops = zl3073x_dpll_device_ops; if (zldev->info->flags & ZL3073X_FLAG_DIE_TEMP) zldpll->ops.temp_get = zl3073x_dpll_temp_get; @@ -1844,8 +1834,10 @@ zl3073x_dpll_changes_check(struct zl3073x_dpll *zldpll) struct zl3073x_dev *zldev = zldpll->dev; enum dpll_lock_status lock_status; struct device *dev = zldev->dev; + const struct zl3073x_chan *chan; struct zl3073x_dpll_pin *pin; int rc; + u8 mode; zldpll->check_count++; @@ -1867,8 +1859,10 @@ zl3073x_dpll_changes_check(struct zl3073x_dpll *zldpll) /* Input pin monitoring does make sense only in automatic * or forced reference modes. */ - if (zldpll->refsel_mode != ZL_DPLL_MODE_REFSEL_MODE_AUTO && - zldpll->refsel_mode != ZL_DPLL_MODE_REFSEL_MODE_REFLOCK) + chan = zl3073x_chan_state_get(zldev, zldpll->id); + mode = zl3073x_chan_mode_get(chan); + if (mode != ZL_DPLL_MODE_REFSEL_MODE_AUTO && + mode != ZL_DPLL_MODE_REFSEL_MODE_REFLOCK) return; /* Update phase offset latch registers for this DPLL if the phase diff --git a/drivers/dpll/zl3073x/dpll.h b/drivers/dpll/zl3073x/dpll.h index 278a24f357c9..115ee4f67e7a 100644 --- a/drivers/dpll/zl3073x/dpll.h +++ b/drivers/dpll/zl3073x/dpll.h @@ -13,8 +13,6 @@ * @list: this DPLL list entry * @dev: pointer to multi-function parent device * @id: DPLL index - * @refsel_mode: reference selection mode - * @forced_ref: selected reference in forced reference lock mode * @check_count: periodic check counter * @phase_monitor: is phase offset monitor enabled * @ops: DPLL device operations for this instance @@ -28,8 +26,6 @@ struct zl3073x_dpll { struct list_head list; struct zl3073x_dev *dev; u8 id; - u8 refsel_mode; - u8 forced_ref; u8 check_count; bool phase_monitor; struct dpll_device_ops ops; From 41bab554d7e9840e17e4c8c7e93647161c6596bf Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Sun, 15 Mar 2026 18:42:22 +0100 Subject: [PATCH 0603/1561] dpll: zl3073x: add DPLL channel status fields to zl3073x_chan Add mon_status and refsel_status fields to struct zl3073x_chan in a stat group to cache the 'dpll_mon_status' and 'dpll_refsel_status' registers. Add zl3073x_chan_lock_state_get(), zl3073x_chan_is_ho_ready(), zl3073x_chan_refsel_state_get() and zl3073x_chan_refsel_ref_get() inline helpers for reading cached state, and zl3073x_chan_state_update() for refreshing both registers from hardware. Call it from zl3073x_chan_state_fetch() as well so that channel status is initialized at device startup. Call zl3073x_dev_chan_states_update() from the periodic work to keep the cached state up to date and convert zl3073x_dpll_lock_status_get() and zl3073x_dpll_selected_ref_get() to use the cached state via the new helpers instead of direct register reads. Signed-off-by: Ivan Vecera Link: https://patch.msgid.link/20260315174224.399074-5-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/chan.c | 32 +++++++++++++++++++++++ drivers/dpll/zl3073x/chan.h | 52 +++++++++++++++++++++++++++++++++++++ drivers/dpll/zl3073x/core.c | 17 ++++++++++++ drivers/dpll/zl3073x/dpll.c | 41 +++++++---------------------- 4 files changed, 111 insertions(+), 31 deletions(-) diff --git a/drivers/dpll/zl3073x/chan.c b/drivers/dpll/zl3073x/chan.c index 8019b8ce7351..71fb60a9859b 100644 --- a/drivers/dpll/zl3073x/chan.c +++ b/drivers/dpll/zl3073x/chan.c @@ -7,6 +7,27 @@ #include "chan.h" #include "core.h" +/** + * zl3073x_chan_state_update - update DPLL channel status from HW + * @zldev: pointer to zl3073x_dev structure + * @index: DPLL channel index + * + * Return: 0 on success, <0 on error + */ +int zl3073x_chan_state_update(struct zl3073x_dev *zldev, u8 index) +{ + struct zl3073x_chan *chan = &zldev->chan[index]; + int rc; + + rc = zl3073x_read_u8(zldev, ZL_REG_DPLL_MON_STATUS(index), + &chan->mon_status); + if (rc) + return rc; + + return zl3073x_read_u8(zldev, ZL_REG_DPLL_REFSEL_STATUS(index), + &chan->refsel_status); +} + /** * zl3073x_chan_state_fetch - fetch DPLL channel state from hardware * @zldev: pointer to zl3073x_dev structure @@ -30,6 +51,17 @@ int zl3073x_chan_state_fetch(struct zl3073x_dev *zldev, u8 index) dev_dbg(zldev->dev, "DPLL%u mode: %u, ref: %u\n", index, zl3073x_chan_mode_get(chan), zl3073x_chan_ref_get(chan)); + rc = zl3073x_chan_state_update(zldev, index); + if (rc) + return rc; + + dev_dbg(zldev->dev, + "DPLL%u lock_state: %u, ho: %u, sel_state: %u, sel_ref: %u\n", + index, zl3073x_chan_lock_state_get(chan), + zl3073x_chan_is_ho_ready(chan) ? 1 : 0, + zl3073x_chan_refsel_state_get(chan), + zl3073x_chan_refsel_ref_get(chan)); + return 0; } diff --git a/drivers/dpll/zl3073x/chan.h b/drivers/dpll/zl3073x/chan.h index 3e6ffaef0c74..f73a07610855 100644 --- a/drivers/dpll/zl3073x/chan.h +++ b/drivers/dpll/zl3073x/chan.h @@ -14,11 +14,17 @@ struct zl3073x_dev; /** * struct zl3073x_chan - DPLL channel state * @mode_refsel: mode and reference selection register value + * @mon_status: monitor status register value + * @refsel_status: reference selection status register value */ struct zl3073x_chan { struct_group(cfg, u8 mode_refsel; ); + struct_group(stat, + u8 mon_status; + u8 refsel_status; + ); }; int zl3073x_chan_state_fetch(struct zl3073x_dev *zldev, u8 index); @@ -27,6 +33,8 @@ const struct zl3073x_chan *zl3073x_chan_state_get(struct zl3073x_dev *zldev, int zl3073x_chan_state_set(struct zl3073x_dev *zldev, u8 index, const struct zl3073x_chan *chan); +int zl3073x_chan_state_update(struct zl3073x_dev *zldev, u8 index); + /** * zl3073x_chan_mode_get - get DPLL channel operating mode * @chan: pointer to channel state @@ -71,4 +79,48 @@ static inline void zl3073x_chan_ref_set(struct zl3073x_chan *chan, u8 ref) chan->mode_refsel |= FIELD_PREP(ZL_DPLL_MODE_REFSEL_REF, ref); } +/** + * zl3073x_chan_lock_state_get - get DPLL channel lock state + * @chan: pointer to channel state + * + * Return: lock state of the given DPLL channel + */ +static inline u8 zl3073x_chan_lock_state_get(const struct zl3073x_chan *chan) +{ + return FIELD_GET(ZL_DPLL_MON_STATUS_STATE, chan->mon_status); +} + +/** + * zl3073x_chan_is_ho_ready - check if holdover is ready + * @chan: pointer to channel state + * + * Return: true if holdover is ready, false otherwise + */ +static inline bool zl3073x_chan_is_ho_ready(const struct zl3073x_chan *chan) +{ + return !!FIELD_GET(ZL_DPLL_MON_STATUS_HO_READY, chan->mon_status); +} + +/** + * zl3073x_chan_refsel_state_get - get reference selection state + * @chan: pointer to channel state + * + * Return: reference selection state of the given DPLL channel + */ +static inline u8 zl3073x_chan_refsel_state_get(const struct zl3073x_chan *chan) +{ + return FIELD_GET(ZL_DPLL_REFSEL_STATUS_STATE, chan->refsel_status); +} + +/** + * zl3073x_chan_refsel_ref_get - get currently selected reference in auto mode + * @chan: pointer to channel state + * + * Return: reference selected by the DPLL in automatic mode + */ +static inline u8 zl3073x_chan_refsel_ref_get(const struct zl3073x_chan *chan) +{ + return FIELD_GET(ZL_DPLL_REFSEL_STATUS_REFSEL, chan->refsel_status); +} + #endif /* _ZL3073X_CHAN_H */ diff --git a/drivers/dpll/zl3073x/core.c b/drivers/dpll/zl3073x/core.c index b03e59fa0834..6363002d48d4 100644 --- a/drivers/dpll/zl3073x/core.c +++ b/drivers/dpll/zl3073x/core.c @@ -566,6 +566,20 @@ zl3073x_dev_ref_states_update(struct zl3073x_dev *zldev) } } +static void +zl3073x_dev_chan_states_update(struct zl3073x_dev *zldev) +{ + int i, rc; + + for (i = 0; i < zldev->info->num_channels; i++) { + rc = zl3073x_chan_state_update(zldev, i); + if (rc) + dev_warn(zldev->dev, + "Failed to get DPLL%u state: %pe\n", i, + ERR_PTR(rc)); + } +} + /** * zl3073x_ref_phase_offsets_update - update reference phase offsets * @zldev: pointer to zl3073x_dev structure @@ -691,6 +705,9 @@ zl3073x_dev_periodic_work(struct kthread_work *work) /* Update input references' states */ zl3073x_dev_ref_states_update(zldev); + /* Update DPLL channels' states */ + zl3073x_dev_chan_states_update(zldev); + /* Update DPLL-to-connected-ref phase offsets registers */ rc = zl3073x_ref_phase_offsets_update(zldev, -1); if (rc) diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c index f56f073e57df..49e3f9f13084 100644 --- a/drivers/dpll/zl3073x/dpll.c +++ b/drivers/dpll/zl3073x/dpll.c @@ -258,28 +258,16 @@ zl3073x_dpll_input_pin_frequency_set(const struct dpll_pin *dpll_pin, static int zl3073x_dpll_selected_ref_get(struct zl3073x_dpll *zldpll, u8 *ref) { - struct zl3073x_dev *zldev = zldpll->dev; const struct zl3073x_chan *chan; - u8 state, value; - int rc; - chan = zl3073x_chan_state_get(zldev, zldpll->id); + chan = zl3073x_chan_state_get(zldpll->dev, zldpll->id); switch (zl3073x_chan_mode_get(chan)) { case ZL_DPLL_MODE_REFSEL_MODE_AUTO: - /* For automatic mode read refsel_status register */ - rc = zl3073x_read_u8(zldev, - ZL_REG_DPLL_REFSEL_STATUS(zldpll->id), - &value); - if (rc) - return rc; - - /* Extract reference state */ - state = FIELD_GET(ZL_DPLL_REFSEL_STATUS_STATE, value); - /* Return the reference only if the DPLL is locked to it */ - if (state == ZL_DPLL_REFSEL_STATUS_STATE_LOCK) - *ref = FIELD_GET(ZL_DPLL_REFSEL_STATUS_REFSEL, value); + if (zl3073x_chan_refsel_state_get(chan) == + ZL_DPLL_REFSEL_STATUS_STATE_LOCK) + *ref = zl3073x_chan_refsel_ref_get(chan); else *ref = ZL3073X_DPLL_REF_NONE; break; @@ -1089,12 +1077,9 @@ zl3073x_dpll_lock_status_get(const struct dpll_device *dpll, void *dpll_priv, struct netlink_ext_ack *extack) { struct zl3073x_dpll *zldpll = dpll_priv; - struct zl3073x_dev *zldev = zldpll->dev; const struct zl3073x_chan *chan; - u8 mon_status, state; - int rc; - chan = zl3073x_chan_state_get(zldev, zldpll->id); + chan = zl3073x_chan_state_get(zldpll->dev, zldpll->id); switch (zl3073x_chan_mode_get(chan)) { case ZL_DPLL_MODE_REFSEL_MODE_FREERUN: @@ -1107,16 +1092,9 @@ zl3073x_dpll_lock_status_get(const struct dpll_device *dpll, void *dpll_priv, break; } - /* Read DPLL monitor status */ - rc = zl3073x_read_u8(zldev, ZL_REG_DPLL_MON_STATUS(zldpll->id), - &mon_status); - if (rc) - return rc; - state = FIELD_GET(ZL_DPLL_MON_STATUS_STATE, mon_status); - - switch (state) { + switch (zl3073x_chan_lock_state_get(chan)) { case ZL_DPLL_MON_STATUS_STATE_LOCK: - if (FIELD_GET(ZL_DPLL_MON_STATUS_HO_READY, mon_status)) + if (zl3073x_chan_is_ho_ready(chan)) *status = DPLL_LOCK_STATUS_LOCKED_HO_ACQ; else *status = DPLL_LOCK_STATUS_LOCKED; @@ -1126,8 +1104,9 @@ zl3073x_dpll_lock_status_get(const struct dpll_device *dpll, void *dpll_priv, *status = DPLL_LOCK_STATUS_HOLDOVER; break; default: - dev_warn(zldev->dev, "Unknown DPLL monitor status: 0x%02x\n", - mon_status); + dev_warn(zldpll->dev->dev, + "Unknown DPLL monitor status: 0x%02x\n", + chan->mon_status); *status = DPLL_LOCK_STATUS_UNLOCKED; break; } From f6b075bc3ad545d1c91e181c3f8ea6193d01602f Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Sun, 15 Mar 2026 18:42:23 +0100 Subject: [PATCH 0604/1561] dpll: zl3073x: add reference priority to zl3073x_chan Cache the ZL_REG_DPLL_REF_PRIO registers in the zl3073x_chan cfg group. These mailbox-based registers store per-reference priority values (4 bits each, P/N packed) used for automatic reference selection. Add ref_prio[] array to struct zl3073x_chan and provide inline helpers zl3073x_chan_ref_prio_get(), zl3073x_chan_ref_prio_set(), and zl3073x_chan_ref_is_selectable() for nibble-level access and priority queries. Extend state_fetch and state_set with DPLL mailbox operations to read and write the priority registers. Replace the ad-hoc zl3073x_dpll_ref_prio_get/set functions in dpll.c with the cached state pattern, removing direct mailbox access from the DPLL layer. This also simplifies pin registration since reading priority from cached state cannot fail. Remove the pin->selectable flag from struct zl3073x_dpll_pin and derive the selectable state from the cached ref priority via zl3073x_chan_ref_is_selectable(), eliminating a redundant cache. Inline zl3073x_dpll_selected_ref_set() into zl3073x_dpll_input_pin_state_on_dpll_set(), unifying all manual and automatic mode paths to commit changes through a single zl3073x_chan_state_set() call at the end of the function. Move hardware limit constants from core.h to regs.h so that chan.h can reference ZL3073X_NUM_REFS for the ref_prio array size. Signed-off-by: Ivan Vecera Link: https://patch.msgid.link/20260315174224.399074-6-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/chan.c | 68 ++++++++- drivers/dpll/zl3073x/chan.h | 53 +++++++ drivers/dpll/zl3073x/core.h | 11 -- drivers/dpll/zl3073x/dpll.c | 297 +++++++++--------------------------- drivers/dpll/zl3073x/regs.h | 12 ++ 5 files changed, 201 insertions(+), 240 deletions(-) diff --git a/drivers/dpll/zl3073x/chan.c b/drivers/dpll/zl3073x/chan.c index 71fb60a9859b..2f48ca239149 100644 --- a/drivers/dpll/zl3073x/chan.c +++ b/drivers/dpll/zl3073x/chan.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only +#include #include #include #include @@ -33,15 +34,15 @@ int zl3073x_chan_state_update(struct zl3073x_dev *zldev, u8 index) * @zldev: pointer to zl3073x_dev structure * @index: DPLL channel index to fetch state for * - * Reads the mode_refsel register for the given DPLL channel and stores - * the raw value for later use. + * Reads the mode_refsel register and reference priority registers for + * the given DPLL channel and stores the raw values for later use. * * Return: 0 on success, <0 on error */ int zl3073x_chan_state_fetch(struct zl3073x_dev *zldev, u8 index) { struct zl3073x_chan *chan = &zldev->chan[index]; - int rc; + int rc, i; rc = zl3073x_read_u8(zldev, ZL_REG_DPLL_MODE_REFSEL(index), &chan->mode_refsel); @@ -62,6 +63,22 @@ int zl3073x_chan_state_fetch(struct zl3073x_dev *zldev, u8 index) zl3073x_chan_refsel_state_get(chan), zl3073x_chan_refsel_ref_get(chan)); + guard(mutex)(&zldev->multiop_lock); + + /* Read DPLL configuration from mailbox */ + rc = zl3073x_mb_op(zldev, ZL_REG_DPLL_MB_SEM, ZL_DPLL_MB_SEM_RD, + ZL_REG_DPLL_MB_MASK, BIT(index)); + if (rc) + return rc; + + /* Read reference priority registers */ + for (i = 0; i < ARRAY_SIZE(chan->ref_prio); i++) { + rc = zl3073x_read_u8(zldev, ZL_REG_DPLL_REF_PRIO(i), + &chan->ref_prio[i]); + if (rc) + return rc; + } + return 0; } @@ -85,7 +102,9 @@ const struct zl3073x_chan *zl3073x_chan_state_get(struct zl3073x_dev *zldev, * @chan: desired channel state * * Skips the HW write if the configuration is unchanged, and otherwise - * writes the mode_refsel register to hardware. + * writes only the changed registers to hardware. The mode_refsel register + * is written directly, while the reference priority registers are written + * via the DPLL mailbox interface. * * Return: 0 on success, <0 on HW error */ @@ -93,14 +112,49 @@ int zl3073x_chan_state_set(struct zl3073x_dev *zldev, u8 index, const struct zl3073x_chan *chan) { struct zl3073x_chan *dchan = &zldev->chan[index]; - int rc; + int rc, i; /* Skip HW write if configuration hasn't changed */ if (!memcmp(&dchan->cfg, &chan->cfg, sizeof(chan->cfg))) return 0; - rc = zl3073x_write_u8(zldev, ZL_REG_DPLL_MODE_REFSEL(index), - chan->mode_refsel); + /* Direct register write for mode_refsel */ + if (dchan->mode_refsel != chan->mode_refsel) { + rc = zl3073x_write_u8(zldev, ZL_REG_DPLL_MODE_REFSEL(index), + chan->mode_refsel); + if (rc) + return rc; + dchan->mode_refsel = chan->mode_refsel; + } + + /* Mailbox write for ref_prio if changed */ + if (!memcmp(dchan->ref_prio, chan->ref_prio, sizeof(chan->ref_prio))) { + dchan->cfg = chan->cfg; + return 0; + } + + guard(mutex)(&zldev->multiop_lock); + + /* Read DPLL configuration into mailbox */ + rc = zl3073x_mb_op(zldev, ZL_REG_DPLL_MB_SEM, ZL_DPLL_MB_SEM_RD, + ZL_REG_DPLL_MB_MASK, BIT(index)); + if (rc) + return rc; + + /* Update changed ref_prio registers */ + for (i = 0; i < ARRAY_SIZE(chan->ref_prio); i++) { + if (dchan->ref_prio[i] != chan->ref_prio[i]) { + rc = zl3073x_write_u8(zldev, + ZL_REG_DPLL_REF_PRIO(i), + chan->ref_prio[i]); + if (rc) + return rc; + } + } + + /* Commit DPLL configuration */ + rc = zl3073x_mb_op(zldev, ZL_REG_DPLL_MB_SEM, ZL_DPLL_MB_SEM_WR, + ZL_REG_DPLL_MB_MASK, BIT(index)); if (rc) return rc; diff --git a/drivers/dpll/zl3073x/chan.h b/drivers/dpll/zl3073x/chan.h index f73a07610855..e0f02d343208 100644 --- a/drivers/dpll/zl3073x/chan.h +++ b/drivers/dpll/zl3073x/chan.h @@ -14,12 +14,14 @@ struct zl3073x_dev; /** * struct zl3073x_chan - DPLL channel state * @mode_refsel: mode and reference selection register value + * @ref_prio: reference priority registers (4 bits per ref, P/N packed) * @mon_status: monitor status register value * @refsel_status: reference selection status register value */ struct zl3073x_chan { struct_group(cfg, u8 mode_refsel; + u8 ref_prio[ZL3073X_NUM_REFS / 2]; ); struct_group(stat, u8 mon_status; @@ -79,6 +81,57 @@ static inline void zl3073x_chan_ref_set(struct zl3073x_chan *chan, u8 ref) chan->mode_refsel |= FIELD_PREP(ZL_DPLL_MODE_REFSEL_REF, ref); } +/** + * zl3073x_chan_ref_prio_get - get reference priority + * @chan: pointer to channel state + * @ref: input reference index + * + * Return: priority of the given reference <0, 15> + */ +static inline u8 +zl3073x_chan_ref_prio_get(const struct zl3073x_chan *chan, u8 ref) +{ + u8 val = chan->ref_prio[ref / 2]; + + if (!(ref & 1)) + return FIELD_GET(ZL_DPLL_REF_PRIO_REF_P, val); + else + return FIELD_GET(ZL_DPLL_REF_PRIO_REF_N, val); +} + +/** + * zl3073x_chan_ref_prio_set - set reference priority + * @chan: pointer to channel state + * @ref: input reference index + * @prio: priority to set + */ +static inline void +zl3073x_chan_ref_prio_set(struct zl3073x_chan *chan, u8 ref, u8 prio) +{ + u8 *val = &chan->ref_prio[ref / 2]; + + if (!(ref & 1)) { + *val &= ~ZL_DPLL_REF_PRIO_REF_P; + *val |= FIELD_PREP(ZL_DPLL_REF_PRIO_REF_P, prio); + } else { + *val &= ~ZL_DPLL_REF_PRIO_REF_N; + *val |= FIELD_PREP(ZL_DPLL_REF_PRIO_REF_N, prio); + } +} + +/** + * zl3073x_chan_ref_is_selectable - check if reference is selectable + * @chan: pointer to channel state + * @ref: input reference index + * + * Return: true if the reference priority is not NONE, false otherwise + */ +static inline bool +zl3073x_chan_ref_is_selectable(const struct zl3073x_chan *chan, u8 ref) +{ + return zl3073x_chan_ref_prio_get(chan, ref) != ZL_DPLL_REF_PRIO_NONE; +} + /** * zl3073x_chan_lock_state_get - get DPLL channel lock state * @chan: pointer to channel state diff --git a/drivers/dpll/zl3073x/core.h b/drivers/dpll/zl3073x/core.h index 2cfb9dd74aa5..99440620407d 100644 --- a/drivers/dpll/zl3073x/core.h +++ b/drivers/dpll/zl3073x/core.h @@ -19,17 +19,6 @@ struct device; struct regmap; struct zl3073x_dpll; -/* - * Hardware limits for ZL3073x chip family - */ -#define ZL3073X_MAX_CHANNELS 5 -#define ZL3073X_NUM_REFS 10 -#define ZL3073X_NUM_OUTS 10 -#define ZL3073X_NUM_SYNTHS 5 -#define ZL3073X_NUM_INPUT_PINS ZL3073X_NUM_REFS -#define ZL3073X_NUM_OUTPUT_PINS (ZL3073X_NUM_OUTS * 2) -#define ZL3073X_NUM_PINS (ZL3073X_NUM_INPUT_PINS + \ - ZL3073X_NUM_OUTPUT_PINS) enum zl3073x_flags { ZL3073X_FLAG_REF_PHASE_COMP_32_BIT, diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c index 49e3f9f13084..7fb851976402 100644 --- a/drivers/dpll/zl3073x/dpll.c +++ b/drivers/dpll/zl3073x/dpll.c @@ -34,7 +34,6 @@ * @dir: pin direction * @id: pin id * @prio: pin priority <0, 14> - * @selectable: pin is selectable in automatic mode * @esync_control: embedded sync is controllable * @phase_gran: phase adjustment granularity * @pin_state: last saved pin state @@ -50,7 +49,6 @@ struct zl3073x_dpll_pin { enum dpll_pin_direction dir; u8 id; u8 prio; - bool selectable; bool esync_control; s32 phase_gran; enum dpll_pin_state pin_state; @@ -284,71 +282,6 @@ zl3073x_dpll_selected_ref_get(struct zl3073x_dpll *zldpll, u8 *ref) return 0; } -/** - * zl3073x_dpll_selected_ref_set - select reference in manual mode - * @zldpll: pointer to zl3073x_dpll - * @ref: input reference to be selected - * - * Selects the given reference for the DPLL channel it should be - * locked to. - * - * Return: 0 on success, <0 on error - */ -static int -zl3073x_dpll_selected_ref_set(struct zl3073x_dpll *zldpll, u8 ref) -{ - struct zl3073x_dev *zldev = zldpll->dev; - struct zl3073x_chan chan; - u8 mode; - - chan = *zl3073x_chan_state_get(zldev, zldpll->id); - mode = zl3073x_chan_mode_get(&chan); - - switch (mode) { - case ZL_DPLL_MODE_REFSEL_MODE_REFLOCK: - /* Manual mode with ref selected */ - if (ref == ZL3073X_DPLL_REF_NONE) { - switch (zldpll->lock_status) { - case DPLL_LOCK_STATUS_LOCKED_HO_ACQ: - case DPLL_LOCK_STATUS_HOLDOVER: - /* Switch to forced holdover */ - mode = ZL_DPLL_MODE_REFSEL_MODE_HOLDOVER; - break; - default: - /* Switch to freerun */ - mode = ZL_DPLL_MODE_REFSEL_MODE_FREERUN; - break; - } - /* Keep selected reference */ - ref = zl3073x_chan_ref_get(&chan); - } else if (ref == zl3073x_chan_ref_get(&chan)) { - /* No register update - same mode and same ref */ - return 0; - } - break; - case ZL_DPLL_MODE_REFSEL_MODE_FREERUN: - case ZL_DPLL_MODE_REFSEL_MODE_HOLDOVER: - /* Manual mode without no ref */ - if (ref == ZL3073X_DPLL_REF_NONE) - /* No register update - keep current mode */ - return 0; - - /* Switch to reflock mode and update ref selection */ - mode = ZL_DPLL_MODE_REFSEL_MODE_REFLOCK; - break; - default: - /* For other modes like automatic or NCO ref cannot be selected - * manually - */ - return -EOPNOTSUPP; - } - - zl3073x_chan_mode_set(&chan, mode); - zl3073x_chan_ref_set(&chan, ref); - - return zl3073x_chan_state_set(zldev, zldpll->id, &chan); -} - /** * zl3073x_dpll_connected_ref_get - get currently connected reference * @zldpll: pointer to zl3073x_dpll @@ -497,98 +430,6 @@ zl3073x_dpll_input_pin_phase_adjust_set(const struct dpll_pin *dpll_pin, return zl3073x_ref_state_set(zldev, ref_id, &ref); } -/** - * zl3073x_dpll_ref_prio_get - get priority for given input pin - * @pin: pointer to pin - * @prio: place to store priority - * - * Reads current priority for the given input pin and stores the value - * to @prio. - * - * Return: 0 on success, <0 on error - */ -static int -zl3073x_dpll_ref_prio_get(struct zl3073x_dpll_pin *pin, u8 *prio) -{ - struct zl3073x_dpll *zldpll = pin->dpll; - struct zl3073x_dev *zldev = zldpll->dev; - u8 ref, ref_prio; - int rc; - - guard(mutex)(&zldev->multiop_lock); - - /* Read DPLL configuration */ - rc = zl3073x_mb_op(zldev, ZL_REG_DPLL_MB_SEM, ZL_DPLL_MB_SEM_RD, - ZL_REG_DPLL_MB_MASK, BIT(zldpll->id)); - if (rc) - return rc; - - /* Read reference priority - one value for P&N pins (4 bits/pin) */ - ref = zl3073x_input_pin_ref_get(pin->id); - rc = zl3073x_read_u8(zldev, ZL_REG_DPLL_REF_PRIO(ref / 2), - &ref_prio); - if (rc) - return rc; - - /* Select nibble according pin type */ - if (zl3073x_dpll_is_p_pin(pin)) - *prio = FIELD_GET(ZL_DPLL_REF_PRIO_REF_P, ref_prio); - else - *prio = FIELD_GET(ZL_DPLL_REF_PRIO_REF_N, ref_prio); - - return rc; -} - -/** - * zl3073x_dpll_ref_prio_set - set priority for given input pin - * @pin: pointer to pin - * @prio: place to store priority - * - * Sets priority for the given input pin. - * - * Return: 0 on success, <0 on error - */ -static int -zl3073x_dpll_ref_prio_set(struct zl3073x_dpll_pin *pin, u8 prio) -{ - struct zl3073x_dpll *zldpll = pin->dpll; - struct zl3073x_dev *zldev = zldpll->dev; - u8 ref, ref_prio; - int rc; - - guard(mutex)(&zldev->multiop_lock); - - /* Read DPLL configuration into mailbox */ - rc = zl3073x_mb_op(zldev, ZL_REG_DPLL_MB_SEM, ZL_DPLL_MB_SEM_RD, - ZL_REG_DPLL_MB_MASK, BIT(zldpll->id)); - if (rc) - return rc; - - /* Read reference priority - one value shared between P&N pins */ - ref = zl3073x_input_pin_ref_get(pin->id); - rc = zl3073x_read_u8(zldev, ZL_REG_DPLL_REF_PRIO(ref / 2), &ref_prio); - if (rc) - return rc; - - /* Update nibble according pin type */ - if (zl3073x_dpll_is_p_pin(pin)) { - ref_prio &= ~ZL_DPLL_REF_PRIO_REF_P; - ref_prio |= FIELD_PREP(ZL_DPLL_REF_PRIO_REF_P, prio); - } else { - ref_prio &= ~ZL_DPLL_REF_PRIO_REF_N; - ref_prio |= FIELD_PREP(ZL_DPLL_REF_PRIO_REF_N, prio); - } - - /* Update reference priority */ - rc = zl3073x_write_u8(zldev, ZL_REG_DPLL_REF_PRIO(ref / 2), ref_prio); - if (rc) - return rc; - - /* Commit configuration */ - return zl3073x_mb_op(zldev, ZL_REG_DPLL_MB_SEM, ZL_DPLL_MB_SEM_WR, - ZL_REG_DPLL_MB_MASK, BIT(zldpll->id)); -} - /** * zl3073x_dpll_ref_state_get - get status for given input pin * @pin: pointer to pin @@ -627,7 +468,8 @@ zl3073x_dpll_ref_state_get(struct zl3073x_dpll_pin *pin, * pin as selectable. */ if (zl3073x_chan_mode_get(chan) == ZL_DPLL_MODE_REFSEL_MODE_AUTO && - zl3073x_dev_ref_is_status_ok(zldev, ref) && pin->selectable) { + zl3073x_dev_ref_is_status_ok(zldev, ref) && + zl3073x_chan_ref_is_selectable(chan, ref)) { *state = DPLL_PIN_STATE_SELECTABLE; return 0; } @@ -661,71 +503,81 @@ zl3073x_dpll_input_pin_state_on_dpll_set(const struct dpll_pin *dpll_pin, { struct zl3073x_dpll *zldpll = dpll_priv; struct zl3073x_dpll_pin *pin = pin_priv; - const struct zl3073x_chan *chan; - u8 new_ref; + struct zl3073x_chan chan; + u8 mode, ref; int rc; - chan = zl3073x_chan_state_get(zldpll->dev, zldpll->id); + chan = *zl3073x_chan_state_get(zldpll->dev, zldpll->id); + ref = zl3073x_input_pin_ref_get(pin->id); + mode = zl3073x_chan_mode_get(&chan); - switch (zl3073x_chan_mode_get(chan)) { + switch (mode) { case ZL_DPLL_MODE_REFSEL_MODE_REFLOCK: + if (state == DPLL_PIN_STATE_CONNECTED) { + /* Choose the pin as new selected reference */ + zl3073x_chan_ref_set(&chan, ref); + } else if (state == DPLL_PIN_STATE_DISCONNECTED) { + /* Choose new mode based on lock status */ + switch (zldpll->lock_status) { + case DPLL_LOCK_STATUS_LOCKED_HO_ACQ: + case DPLL_LOCK_STATUS_HOLDOVER: + mode = ZL_DPLL_MODE_REFSEL_MODE_HOLDOVER; + break; + default: + mode = ZL_DPLL_MODE_REFSEL_MODE_FREERUN; + break; + } + zl3073x_chan_mode_set(&chan, mode); + } else { + goto invalid_state; + } + break; case ZL_DPLL_MODE_REFSEL_MODE_FREERUN: case ZL_DPLL_MODE_REFSEL_MODE_HOLDOVER: if (state == DPLL_PIN_STATE_CONNECTED) { /* Choose the pin as new selected reference */ - new_ref = zl3073x_input_pin_ref_get(pin->id); - } else if (state == DPLL_PIN_STATE_DISCONNECTED) { - /* No reference */ - new_ref = ZL3073X_DPLL_REF_NONE; - } else { - NL_SET_ERR_MSG_MOD(extack, - "Invalid pin state for manual mode"); - return -EINVAL; + zl3073x_chan_ref_set(&chan, ref); + /* Switch to reflock mode */ + zl3073x_chan_mode_set(&chan, + ZL_DPLL_MODE_REFSEL_MODE_REFLOCK); + } else if (state != DPLL_PIN_STATE_DISCONNECTED) { + goto invalid_state; } - - rc = zl3073x_dpll_selected_ref_set(zldpll, new_ref); break; - case ZL_DPLL_MODE_REFSEL_MODE_AUTO: if (state == DPLL_PIN_STATE_SELECTABLE) { - if (pin->selectable) + if (zl3073x_chan_ref_is_selectable(&chan, ref)) return 0; /* Pin is already selectable */ /* Restore pin priority in HW */ - rc = zl3073x_dpll_ref_prio_set(pin, pin->prio); - if (rc) - return rc; - - /* Mark pin as selectable */ - pin->selectable = true; + zl3073x_chan_ref_prio_set(&chan, ref, pin->prio); } else if (state == DPLL_PIN_STATE_DISCONNECTED) { - if (!pin->selectable) + if (!zl3073x_chan_ref_is_selectable(&chan, ref)) return 0; /* Pin is already disconnected */ /* Set pin priority to none in HW */ - rc = zl3073x_dpll_ref_prio_set(pin, - ZL_DPLL_REF_PRIO_NONE); - if (rc) - return rc; - - /* Mark pin as non-selectable */ - pin->selectable = false; + zl3073x_chan_ref_prio_set(&chan, ref, + ZL_DPLL_REF_PRIO_NONE); } else { - NL_SET_ERR_MSG(extack, - "Invalid pin state for automatic mode"); - return -EINVAL; + goto invalid_state; } break; - default: /* In other modes we cannot change input reference */ NL_SET_ERR_MSG(extack, "Pin state cannot be changed in current mode"); - rc = -EOPNOTSUPP; - break; + return -EOPNOTSUPP; } - return rc; + /* Commit DPLL channel changes */ + rc = zl3073x_chan_state_set(zldpll->dev, zldpll->id, &chan); + if (rc) + return rc; + + return 0; +invalid_state: + NL_SET_ERR_MSG_MOD(extack, "Invalid pin state for this device mode"); + return -EINVAL; } static int @@ -745,15 +597,21 @@ zl3073x_dpll_input_pin_prio_set(const struct dpll_pin *dpll_pin, void *pin_priv, const struct dpll_device *dpll, void *dpll_priv, u32 prio, struct netlink_ext_ack *extack) { + struct zl3073x_dpll *zldpll = dpll_priv; struct zl3073x_dpll_pin *pin = pin_priv; + struct zl3073x_chan chan; + u8 ref; int rc; if (prio > ZL_DPLL_REF_PRIO_MAX) return -EINVAL; /* If the pin is selectable then update HW registers */ - if (pin->selectable) { - rc = zl3073x_dpll_ref_prio_set(pin, prio); + chan = *zl3073x_chan_state_get(zldpll->dev, zldpll->id); + ref = zl3073x_input_pin_ref_get(pin->id); + if (zl3073x_chan_ref_is_selectable(&chan, ref)) { + zl3073x_chan_ref_prio_set(&chan, ref, prio); + rc = zl3073x_chan_state_set(zldpll->dev, zldpll->id, &chan); if (rc) return rc; } @@ -1235,6 +1093,8 @@ zl3073x_dpll_mode_set(const struct dpll_device *dpll, void *dpll_priv, return rc; } + chan = *zl3073x_chan_state_get(zldpll->dev, zldpll->id); + if (mode == DPLL_MODE_MANUAL) { /* We are switching from automatic to manual mode: * - if we have a valid reference selected during auto mode then @@ -1256,25 +1116,21 @@ zl3073x_dpll_mode_set(const struct dpll_device *dpll, void *dpll_priv, * it is selectable after switch to automatic mode * - switch to automatic mode */ - struct zl3073x_dpll_pin *pin; + if (ZL3073X_DPLL_REF_IS_VALID(ref) && + !zl3073x_chan_ref_is_selectable(&chan, ref)) { + struct zl3073x_dpll_pin *pin; - pin = zl3073x_dpll_pin_get_by_ref(zldpll, ref); - if (pin && !pin->selectable) { - /* Restore pin priority in HW */ - rc = zl3073x_dpll_ref_prio_set(pin, pin->prio); - if (rc) { - NL_SET_ERR_MSG_MOD(extack, - "failed to restore pin priority"); - return rc; + pin = zl3073x_dpll_pin_get_by_ref(zldpll, ref); + if (pin) { + /* Restore pin priority in HW */ + zl3073x_chan_ref_prio_set(&chan, ref, + pin->prio); } - - pin->selectable = true; } hw_mode = ZL_DPLL_MODE_REFSEL_MODE_AUTO; } - chan = *zl3073x_chan_state_get(zldpll->dev, zldpll->id); zl3073x_chan_mode_set(&chan, hw_mode); if (ZL3073X_DPLL_REF_IS_VALID(ref)) zl3073x_chan_ref_set(&chan, ref); @@ -1426,18 +1282,16 @@ zl3073x_dpll_pin_register(struct zl3073x_dpll_pin *pin, u32 index) pin->phase_gran = props->dpll_props.phase_gran; if (zl3073x_dpll_is_input_pin(pin)) { - rc = zl3073x_dpll_ref_prio_get(pin, &pin->prio); - if (rc) - goto err_prio_get; + const struct zl3073x_chan *chan; + u8 ref; - if (pin->prio == ZL_DPLL_REF_PRIO_NONE) { - /* Clamp prio to max value & mark pin non-selectable */ + chan = zl3073x_chan_state_get(zldpll->dev, zldpll->id); + ref = zl3073x_input_pin_ref_get(pin->id); + pin->prio = zl3073x_chan_ref_prio_get(chan, ref); + + if (pin->prio == ZL_DPLL_REF_PRIO_NONE) + /* Clamp prio to max value */ pin->prio = ZL_DPLL_REF_PRIO_MAX; - pin->selectable = false; - } else { - /* Mark pin as selectable */ - pin->selectable = true; - } } /* Create or get existing DPLL pin */ @@ -1466,7 +1320,6 @@ zl3073x_dpll_pin_register(struct zl3073x_dpll_pin *pin, u32 index) err_register: dpll_pin_put(pin->dpll_pin, &pin->tracker); -err_prio_get: pin->dpll_pin = NULL; err_pin_get: zl3073x_pin_props_put(props); diff --git a/drivers/dpll/zl3073x/regs.h b/drivers/dpll/zl3073x/regs.h index 19c598daa784..5ae50cb761a9 100644 --- a/drivers/dpll/zl3073x/regs.h +++ b/drivers/dpll/zl3073x/regs.h @@ -6,6 +6,18 @@ #include #include +/* + * Hardware limits for ZL3073x chip family + */ +#define ZL3073X_MAX_CHANNELS 5 +#define ZL3073X_NUM_REFS 10 +#define ZL3073X_NUM_OUTS 10 +#define ZL3073X_NUM_SYNTHS 5 +#define ZL3073X_NUM_INPUT_PINS ZL3073X_NUM_REFS +#define ZL3073X_NUM_OUTPUT_PINS (ZL3073X_NUM_OUTS * 2) +#define ZL3073X_NUM_PINS (ZL3073X_NUM_INPUT_PINS + \ + ZL3073X_NUM_OUTPUT_PINS) + /* * Register address structure: * =========================== From acee049a6af2a7b4baabd28f16fb53628ea6e7a5 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Sun, 15 Mar 2026 18:42:24 +0100 Subject: [PATCH 0605/1561] dpll: zl3073x: drop selected and simplify connected ref getter The HW reports the currently selected reference in the dpll_refsel_status register regardless of the DPLL mode. Use this to delete zl3073x_dpll_selected_ref_get() and have callers read the register directly via the cached channel state. Simplify zl3073x_dpll_connected_ref_get() to check refsel_state for LOCK directly and return the reference index, changing the return type from int to u8. The redundant ref_is_status_ok check is removed since the DPLL cannot be in LOCK state with a failed reference. In zl3073x_dpll_mode_set(), replace the selected_ref_get() call with zl3073x_chan_refsel_ref_get() to read the currently selected reference directly from the cached channel state. Signed-off-by: Ivan Vecera Link: https://patch.msgid.link/20260315174224.399074-7-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/dpll.c | 94 +++++++------------------------------ 1 file changed, 18 insertions(+), 76 deletions(-) diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c index 7fb851976402..a29f606318f6 100644 --- a/drivers/dpll/zl3073x/dpll.c +++ b/drivers/dpll/zl3073x/dpll.c @@ -243,72 +243,27 @@ zl3073x_dpll_input_pin_frequency_set(const struct dpll_pin *dpll_pin, return zl3073x_ref_state_set(zldev, ref_id, &ref); } -/** - * zl3073x_dpll_selected_ref_get - get currently selected reference - * @zldpll: pointer to zl3073x_dpll - * @ref: place to store selected reference - * - * Check for currently selected reference the DPLL should be locked to - * and stores its index to given @ref. - * - * Return: 0 on success, <0 on error - */ -static int -zl3073x_dpll_selected_ref_get(struct zl3073x_dpll *zldpll, u8 *ref) -{ - const struct zl3073x_chan *chan; - - chan = zl3073x_chan_state_get(zldpll->dev, zldpll->id); - - switch (zl3073x_chan_mode_get(chan)) { - case ZL_DPLL_MODE_REFSEL_MODE_AUTO: - /* Return the reference only if the DPLL is locked to it */ - if (zl3073x_chan_refsel_state_get(chan) == - ZL_DPLL_REFSEL_STATUS_STATE_LOCK) - *ref = zl3073x_chan_refsel_ref_get(chan); - else - *ref = ZL3073X_DPLL_REF_NONE; - break; - case ZL_DPLL_MODE_REFSEL_MODE_REFLOCK: - /* For manual mode return stored value */ - *ref = zl3073x_chan_ref_get(chan); - break; - default: - /* For other modes like NCO, freerun... there is no input ref */ - *ref = ZL3073X_DPLL_REF_NONE; - break; - } - - return 0; -} - /** * zl3073x_dpll_connected_ref_get - get currently connected reference * @zldpll: pointer to zl3073x_dpll - * @ref: place to store selected reference * - * Looks for currently connected the DPLL is locked to and stores its index - * to given @ref. + * Looks for currently connected reference the DPLL is locked to. * - * Return: 0 on success, <0 on error + * Return: reference index if locked, ZL3073X_DPLL_REF_NONE otherwise */ -static int -zl3073x_dpll_connected_ref_get(struct zl3073x_dpll *zldpll, u8 *ref) +static u8 +zl3073x_dpll_connected_ref_get(struct zl3073x_dpll *zldpll) { - struct zl3073x_dev *zldev = zldpll->dev; - int rc; + const struct zl3073x_chan *chan = zl3073x_chan_state_get(zldpll->dev, + zldpll->id); + u8 state; - /* Get currently selected input reference */ - rc = zl3073x_dpll_selected_ref_get(zldpll, ref); - if (rc) - return rc; + /* A reference is connected only when the DPLL is locked to it */ + state = zl3073x_chan_refsel_state_get(chan); + if (state == ZL_DPLL_REFSEL_STATUS_STATE_LOCK) + return zl3073x_chan_refsel_ref_get(chan); - /* If the monitor indicates an error nothing is connected */ - if (ZL3073X_DPLL_REF_IS_VALID(*ref) && - !zl3073x_dev_ref_is_status_ok(zldev, *ref)) - *ref = ZL3073X_DPLL_REF_NONE; - - return 0; + return ZL3073X_DPLL_REF_NONE; } static int @@ -324,12 +279,9 @@ zl3073x_dpll_input_pin_phase_offset_get(const struct dpll_pin *dpll_pin, const struct zl3073x_ref *ref; u8 conn_id, ref_id; s64 ref_phase; - int rc; /* Get currently connected reference */ - rc = zl3073x_dpll_connected_ref_get(zldpll, &conn_id); - if (rc) - return rc; + conn_id = zl3073x_dpll_connected_ref_get(zldpll); /* Report phase offset only for currently connected pin if the phase * monitor feature is disabled and only if the input pin signal is @@ -367,7 +319,7 @@ zl3073x_dpll_input_pin_phase_offset_get(const struct dpll_pin *dpll_pin, *phase_offset = ref_phase * DPLL_PHASE_OFFSET_DIVIDER; - return rc; + return 0; } static int @@ -447,18 +399,13 @@ zl3073x_dpll_ref_state_get(struct zl3073x_dpll_pin *pin, struct zl3073x_dpll *zldpll = pin->dpll; struct zl3073x_dev *zldev = zldpll->dev; const struct zl3073x_chan *chan; - u8 ref, ref_conn; - int rc; + u8 ref; chan = zl3073x_chan_state_get(zldev, zldpll->id); ref = zl3073x_input_pin_ref_get(pin->id); - /* Get currently connected reference */ - rc = zl3073x_dpll_connected_ref_get(zldpll, &ref_conn); - if (rc) - return rc; - - if (ref == ref_conn) { + /* Check if the pin reference is connected */ + if (ref == zl3073x_dpll_connected_ref_get(zldpll)) { *state = DPLL_PIN_STATE_CONNECTED; return 0; } @@ -1087,13 +1034,8 @@ zl3073x_dpll_mode_set(const struct dpll_device *dpll, void *dpll_priv, u8 hw_mode, ref; int rc; - rc = zl3073x_dpll_selected_ref_get(zldpll, &ref); - if (rc) { - NL_SET_ERR_MSG_MOD(extack, "failed to get selected reference"); - return rc; - } - chan = *zl3073x_chan_state_get(zldpll->dev, zldpll->id); + ref = zl3073x_chan_refsel_ref_get(&chan); if (mode == DPLL_MODE_MANUAL) { /* We are switching from automatic to manual mode: From 6829f90906aaed1e4781742b96822031d5c2bd85 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 14 Mar 2026 12:43:33 +0100 Subject: [PATCH 0606/1561] ptp: vmw: Convert to a platform driver In all cases in which a struct acpi_driver is used for binding a driver to an ACPI device object, a corresponding platform device is created by the ACPI core and that device is regarded as a proper representation of underlying hardware. Accordingly, a struct platform_driver should be used by driver code to bind to that device. There are multiple reasons why drivers should not bind directly to ACPI device objects [1]. Overall, it is better to bind drivers to platform devices than to their ACPI companions, so convert the PTP VMware ACPI driver to a platform one. While this is not expected to alter functionality, it changes sysfs layout and so it will be visible to user space. Link: https://lore.kernel.org/all/2396510.ElGaqSPkdT@rafael.j.wysocki/ [1] Signed-off-by: Rafael J. Wysocki Reviewed-by: Simon Horman Link: https://patch.msgid.link/12883468.O9o76ZdvQC@rafael.j.wysocki Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_vmw.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/ptp/ptp_vmw.c b/drivers/ptp/ptp_vmw.c index 20ab05c4daa8..8510121d79d1 100644 --- a/drivers/ptp/ptp_vmw.c +++ b/drivers/ptp/ptp_vmw.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -83,7 +84,7 @@ static struct ptp_clock_info ptp_vmw_clock_info = { * ACPI driver ops for VMware "precision clock" virtual device. */ -static int ptp_vmw_acpi_add(struct acpi_device *device) +static int ptp_vmw_acpi_probe(struct platform_device *pdev) { ptp_vmw_clock = ptp_clock_register(&ptp_vmw_clock_info, NULL); if (IS_ERR(ptp_vmw_clock)) { @@ -91,11 +92,11 @@ static int ptp_vmw_acpi_add(struct acpi_device *device) return PTR_ERR(ptp_vmw_clock); } - ptp_vmw_acpi_device = device; + ptp_vmw_acpi_device = ACPI_COMPANION(&pdev->dev); return 0; } -static void ptp_vmw_acpi_remove(struct acpi_device *device) +static void ptp_vmw_acpi_remove(struct platform_device *pdev) { ptp_clock_unregister(ptp_vmw_clock); } @@ -107,12 +108,12 @@ static const struct acpi_device_id ptp_vmw_acpi_device_ids[] = { MODULE_DEVICE_TABLE(acpi, ptp_vmw_acpi_device_ids); -static struct acpi_driver ptp_vmw_acpi_driver = { - .name = "ptp_vmw", - .ids = ptp_vmw_acpi_device_ids, - .ops = { - .add = ptp_vmw_acpi_add, - .remove = ptp_vmw_acpi_remove +static struct platform_driver ptp_vmw_acpi_driver = { + .probe = ptp_vmw_acpi_probe, + .remove = ptp_vmw_acpi_remove, + .driver = { + .name = "ptp_vmw_acpi", + .acpi_match_table = ptp_vmw_acpi_device_ids, }, }; @@ -120,12 +121,12 @@ static int __init ptp_vmw_init(void) { if (x86_hyper_type != X86_HYPER_VMWARE) return -1; - return acpi_bus_register_driver(&ptp_vmw_acpi_driver); + return platform_driver_register(&ptp_vmw_acpi_driver); } static void __exit ptp_vmw_exit(void) { - acpi_bus_unregister_driver(&ptp_vmw_acpi_driver); + platform_driver_unregister(&ptp_vmw_acpi_driver); } module_init(ptp_vmw_init); From bb30400a566c7a6a9355873344ec63e2c6310e2c Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Mon, 16 Mar 2026 09:00:37 +0800 Subject: [PATCH 0607/1561] dt-bindings: net: Add support for Spacemit K3 dwmac The GMAC IP on Spacemit K3 is almost a standard Synopsys DesignWare MAC (version 5.40a) with some extra clock. Add necessary compatible string for this device. Signed-off-by: Inochi Amaoto Reviewed-by: Rob Herring (Arm) Link: https://patch.msgid.link/20260316010041.164360-2-inochiama@gmail.com Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/net/snps,dwmac.yaml | 2 + .../bindings/net/spacemit,k3-dwmac.yaml | 102 ++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/spacemit,k3-dwmac.yaml diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index 38bc34dc4f09..98ebb6276bc6 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -109,6 +109,7 @@ properties: - snps,dwmac-5.10a - snps,dwmac-5.20 - snps,dwmac-5.30a + - snps,dwmac-5.40a - snps,dwxgmac - snps,dwxgmac-2.10 - sophgo,sg2042-dwmac @@ -656,6 +657,7 @@ allOf: - snps,dwmac-5.10a - snps,dwmac-5.20 - snps,dwmac-5.30a + - snps,dwmac-5.40a - snps,dwxgmac - snps,dwxgmac-2.10 - st,spear600-gmac diff --git a/Documentation/devicetree/bindings/net/spacemit,k3-dwmac.yaml b/Documentation/devicetree/bindings/net/spacemit,k3-dwmac.yaml new file mode 100644 index 000000000000..678eccf044f9 --- /dev/null +++ b/Documentation/devicetree/bindings/net/spacemit,k3-dwmac.yaml @@ -0,0 +1,102 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/spacemit,k3-dwmac.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Spacemit K3 DWMAC glue layer + +maintainers: + - Inochi Amaoto + +select: + properties: + compatible: + contains: + const: spacemit,k3-dwmac + required: + - compatible + +properties: + compatible: + items: + - const: spacemit,k3-dwmac + - const: snps,dwmac-5.40a + + reg: + maxItems: 1 + + clocks: + items: + - description: GMAC application clock + - description: PTP clock + - description: TX clock + + clock-names: + items: + - const: stmmaceth + - const: ptp_ref + - const: tx + + interrupts: + minItems: 1 + items: + - description: MAC interrupt + - description: MAC wake interrupt + + interrupt-names: + minItems: 1 + items: + - const: macirq + - const: eth_wake_irq + + resets: + maxItems: 1 + + reset-names: + const: stmmaceth + + spacemit,apmu: + $ref: /schemas/types.yaml#/definitions/phandle-array + items: + - items: + - description: phandle to the syscon node which control the glue register + - description: offset of the control register + - description: offset of the dline register + description: + A phandle to syscon with offset to control registers for this MAC + +required: + - compatible + - reg + - clocks + - clock-names + - interrupts + - interrupt-names + - resets + - reset-names + - spacemit,apmu + +allOf: + - $ref: snps,dwmac.yaml# + +unevaluatedProperties: false + +examples: + - | + #include + + ethernet@cac80000 { + compatible = "spacemit,k3-dwmac", "snps,dwmac-5.40a"; + reg = <0xcac80000 0x2000>; + clocks = <&syscon_apmu 66>, <&syscon_apmu 68>, + <&syscon_apmu 69>; + clock-names = "stmmaceth", "ptp_ref", "tx"; + interrupts = <131 IRQ_TYPE_LEVEL_HIGH>, <276 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq", "eth_wake_irq"; + phy-mode = "rgmii-id"; + phy-handle = <&phy0>; + resets = <&syscon_apmu 67>; + reset-names = "stmmaceth"; + spacemit,apmu = <&syscon_apmu 0x384 0x38c>; + }; From d35aa97ea908a17809358a981bef6cd752f2e8a0 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Mon, 16 Mar 2026 09:00:38 +0800 Subject: [PATCH 0608/1561] net: stmmac: platform: Add snps,dwmac-5.40a IP compatible string Add compatible string for 5.40a version that can avoid to define some platform data in the glue layer. Signed-off-by: Inochi Amaoto Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260316010041.164360-3-inochiama@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 59aac0afc609..545b8a3425eb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -403,6 +403,7 @@ static const char * const stmmac_gmac4_compats[] = { "snps,dwmac-5.10a", "snps,dwmac-5.20", "snps,dwmac-5.30a", + "snps,dwmac-5.40a", NULL }; From 30f0ba420ed3fb9a16d55523ae3c1b43a6f00e22 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Mon, 16 Mar 2026 09:00:39 +0800 Subject: [PATCH 0609/1561] net: stmmac: Add glue layer for Spacemit K3 SoC The ethernet controller on Spacemit K3 SoC is Synopsys DesignWare MAC (version 5.40a), with the following special points: 1. The rate of the tx clock line is auto changed when the mac speed rate is changed, and no need for changing the input tx clock. 2. This controller require a extra syscon device to configure the interface type, enable wake up interrupt and delay configuration if needed. Add Spacemit dwmac driver support on the Spacemit K3 SoC. Signed-off-by: Inochi Amaoto Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260316010041.164360-4-inochiama@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 12 + drivers/net/ethernet/stmicro/stmmac/Makefile | 1 + .../ethernet/stmicro/stmmac/dwmac-spacemit.c | 227 ++++++++++++++++++ 3 files changed, 240 insertions(+) create mode 100644 drivers/net/ethernet/stmicro/stmmac/dwmac-spacemit.c diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 07088d03dbab..d3a6ab7383fc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -216,6 +216,18 @@ config DWMAC_SOPHGO for the stmmac device driver. This driver is used for the ethernet controllers on various Sophgo SoCs. +config DWMAC_SPACEMIT + tristate "Spacemit dwmac support" + depends on OF && (ARCH_SPACEMIT || COMPILE_TEST) + select MFD_SYSCON + default m if ARCH_SPACEMIT + help + Support for ethernet controllers on Spacemit RISC-V SoCs + + This selects the Spacemit platform specific glue layer support + for the stmmac device driver. This driver is used for the + Spacemit K3 ethernet controllers. + config DWMAC_STARFIVE tristate "StarFive dwmac support" depends on OF && (ARCH_STARFIVE || COMPILE_TEST) diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index c9263987ef8d..945c5354eced 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_DWMAC_RZN1) += dwmac-rzn1.o obj-$(CONFIG_DWMAC_S32) += dwmac-s32.o obj-$(CONFIG_DWMAC_SOCFPGA) += dwmac-altr-socfpga.o obj-$(CONFIG_DWMAC_SOPHGO) += dwmac-sophgo.o +obj-$(CONFIG_DWMAC_SPACEMIT) += dwmac-spacemit.o obj-$(CONFIG_DWMAC_STARFIVE) += dwmac-starfive.o obj-$(CONFIG_DWMAC_STI) += dwmac-sti.o obj-$(CONFIG_DWMAC_STM32) += dwmac-stm32.o diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-spacemit.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-spacemit.c new file mode 100644 index 000000000000..223754cc5c79 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-spacemit.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Spacemit DWMAC platform driver + * + * Copyright (C) 2026 Inochi Amaoto + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "stmmac_platform.h" + +/* ctrl register bits */ +#define CTRL_PHY_INTF_RGMII BIT(3) +#define CTRL_PHY_INTF_MII BIT(4) +#define CTRL_WAKE_IRQ_EN BIT(9) +#define CTRL_PHY_IRQ_EN BIT(12) + +/* dline register bits */ +#define RGMII_RX_DLINE_EN BIT(0) +#define RGMII_RX_DLINE_STEP GENMASK(5, 4) +#define RGMII_RX_DLINE_CODE GENMASK(15, 8) +#define RGMII_TX_DLINE_EN BIT(16) +#define RGMII_TX_DLINE_STEP GENMASK(21, 20) +#define RGMII_TX_DLINE_CODE GENMASK(31, 24) + +#define MAX_DLINE_DELAY_CODE 0xff +#define MAX_WORKED_DELAY 2800 +/* Note: the delay step value is at 0.1ps */ +#define K3_DELAY_STEP 367 + +struct spacmit_dwmac { + struct regmap *apmu; + unsigned int ctrl_offset; + unsigned int dline_offset; +}; + +static int spacemit_dwmac_set_delay(struct spacmit_dwmac *dwmac, + unsigned int tx_code, unsigned int rx_code) +{ + unsigned int mask, val; + + mask = RGMII_TX_DLINE_STEP | RGMII_TX_DLINE_CODE | RGMII_TX_DLINE_EN | + RGMII_RX_DLINE_STEP | RGMII_RX_DLINE_CODE | RGMII_RX_DLINE_EN; + + /* + * Since the delay step provided by config 0 is small enough, and + * it can cover the range of the valid delay, so there is no needed + * to use other step config. + */ + val = FIELD_PREP(RGMII_TX_DLINE_STEP, 0) | + FIELD_PREP(RGMII_TX_DLINE_CODE, tx_code) | RGMII_TX_DLINE_EN | + FIELD_PREP(RGMII_RX_DLINE_STEP, 0) | + FIELD_PREP(RGMII_RX_DLINE_CODE, rx_code) | RGMII_RX_DLINE_EN; + + return regmap_update_bits(dwmac->apmu, dwmac->dline_offset, + mask, val); +} + +static int spacemit_dwmac_detected_delay_value(unsigned int delay) +{ + if (delay == 0) + return 0; + + if (delay > MAX_WORKED_DELAY) + return -EINVAL; + + /* + * Note K3 require a specific factor for calculate + * the delay, in this scenario it is 0.9. So the + * formula is code * step / 10 * 0.9 + */ + return DIV_ROUND_CLOSEST(delay * 10 * 10, K3_DELAY_STEP * 9); +} + +static int spacemit_dwmac_fix_delay(struct spacmit_dwmac *dwmac, + unsigned int tx_delay, + unsigned int rx_delay) +{ + int rx_code; + int tx_code; + + rx_code = spacemit_dwmac_detected_delay_value(rx_delay); + if (rx_code < 0) + return rx_code; + + tx_code = spacemit_dwmac_detected_delay_value(tx_delay); + if (tx_code < 0) + return tx_code; + + return spacemit_dwmac_set_delay(dwmac, tx_code, rx_code); +} + +static int spacemit_dwmac_update_irq_config(struct spacmit_dwmac *dwmac, + struct stmmac_resources *stmmac_res) +{ + unsigned int val = stmmac_res->wol_irq >= 0 ? CTRL_WAKE_IRQ_EN : 0; + unsigned int mask = CTRL_WAKE_IRQ_EN; + + return regmap_update_bits(dwmac->apmu, dwmac->ctrl_offset, + mask, val); +} + +static void spacemit_get_interfaces(struct stmmac_priv *priv, void *bsp_priv, + unsigned long *interfaces) +{ + __set_bit(PHY_INTERFACE_MODE_MII, interfaces); + __set_bit(PHY_INTERFACE_MODE_RMII, interfaces); + phy_interface_set_rgmii(interfaces); +} + +static int spacemit_set_phy_intf_sel(void *bsp_priv, u8 phy_intf_sel) +{ + unsigned int mask = CTRL_PHY_INTF_MII | CTRL_PHY_INTF_RGMII; + struct spacmit_dwmac *dwmac = bsp_priv; + unsigned int val = 0; + + switch (phy_intf_sel) { + case PHY_INTF_SEL_GMII_MII: + val = CTRL_PHY_INTF_MII; + break; + + case PHY_INTF_SEL_RMII: + break; + + case PHY_INTF_SEL_RGMII: + val = CTRL_PHY_INTF_RGMII; + break; + + default: + return -EINVAL; + } + + return regmap_update_bits(dwmac->apmu, dwmac->ctrl_offset, + mask, val); +} + +static int spacemit_dwmac_probe(struct platform_device *pdev) +{ + struct plat_stmmacenet_data *plat_dat; + struct stmmac_resources stmmac_res; + struct device *dev = &pdev->dev; + struct spacmit_dwmac *dwmac; + unsigned int offset[2]; + struct regmap *apmu; + struct clk *clk_tx; + u32 rx_delay = 0; + u32 tx_delay = 0; + int ret; + + ret = stmmac_get_platform_resources(pdev, &stmmac_res); + if (ret) + return dev_err_probe(dev, ret, + "failed to get platform resources\n"); + + dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL); + if (!dwmac) + return -ENOMEM; + + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); + if (IS_ERR(plat_dat)) + return dev_err_probe(dev, PTR_ERR(plat_dat), + "failed to parse DT parameters\n"); + + clk_tx = devm_clk_get_enabled(&pdev->dev, "tx"); + if (IS_ERR(clk_tx)) + return dev_err_probe(&pdev->dev, PTR_ERR(clk_tx), + "failed to get tx clock\n"); + + apmu = syscon_regmap_lookup_by_phandle_args(pdev->dev.of_node, + "spacemit,apmu", 2, + offset); + if (IS_ERR(apmu)) + return dev_err_probe(dev, PTR_ERR(apmu), + "Failed to get apmu regmap\n"); + + dwmac->apmu = apmu; + dwmac->ctrl_offset = offset[0]; + dwmac->dline_offset = offset[1]; + + ret = spacemit_dwmac_update_irq_config(dwmac, &stmmac_res); + if (ret) + return dev_err_probe(dev, ret, "Failed to configure irq config\n"); + + of_property_read_u32(pdev->dev.of_node, "tx-internal-delay-ps", + &tx_delay); + of_property_read_u32(pdev->dev.of_node, "rx-internal-delay-ps", + &rx_delay); + + plat_dat->get_interfaces = spacemit_get_interfaces; + plat_dat->set_phy_intf_sel = spacemit_set_phy_intf_sel; + plat_dat->bsp_priv = dwmac; + + ret = spacemit_dwmac_fix_delay(dwmac, tx_delay, rx_delay); + if (ret) + return dev_err_probe(dev, ret, "Failed to configure delay\n"); + + return stmmac_dvr_probe(dev, plat_dat, &stmmac_res); +} + +static const struct of_device_id spacemit_dwmac_match[] = { + { .compatible = "spacemit,k3-dwmac" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, spacemit_dwmac_match); + +static struct platform_driver spacemit_dwmac_driver = { + .probe = spacemit_dwmac_probe, + .remove = stmmac_pltfr_remove, + .driver = { + .name = "spacemit-dwmac", + .pm = &stmmac_pltfr_pm_ops, + .of_match_table = spacemit_dwmac_match, + }, +}; +module_platform_driver(spacemit_dwmac_driver); + +MODULE_AUTHOR("Inochi Amaoto "); +MODULE_DESCRIPTION("Spacemit DWMAC platform driver"); +MODULE_LICENSE("GPL"); From 25e7553a502b92b08b67d40d1ac71e6cded3d5d1 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Mon, 16 Mar 2026 09:00:40 +0800 Subject: [PATCH 0610/1561] MAINTAINERS: add entry for SpacemiT DWMAC glue layer Add a MAINTAINERS entry for the SpacemiT DWMAC glue layer driver and its DT binding. Signed-off-by: Inochi Amaoto Link: https://patch.msgid.link/20260316010041.164360-5-inochiama@gmail.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index ff6f17458f19..5d477fd592db 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -24827,6 +24827,12 @@ W: https://linuxtv.org Q: http://patchwork.linuxtv.org/project/linux-media/list/ F: drivers/media/dvb-frontends/sp2* +SPACEMIT DWMAC GLUE LAYER +M: Inochi Amaoto +S: Maintained +F: Documentation/devicetree/bindings/net/spacemit,k3-dwmac.yaml +F: drivers/net/ethernet/stmicro/stmmac/dwmac-spacemit.c + SPACEMIT K1 I2C DRIVER M: Troy Mitchell S: Maintained From 12b4b16c0c7ed6a4b98cfbec7c4ee69e94973334 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 14 Mar 2026 09:42:24 +0000 Subject: [PATCH 0611/1561] net: stmmac: rearrange stmmac_tx_info members to pack better Rearrange the struct stmmac_tx_info members to pack better, essentially by sorting by type size: xsk_meta embeds only a pointer - 32 or 64 bit buf dma address, 32 or 64 bit len normally 32 bit buf_type dependent on arch map_as_page normally 8 bit last_segment normally 8 bit is_jumbo normally 8 bit Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w1LVo-0000000DGRl-44lt@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 7a66edba8f66..a2dc167adb6c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -47,13 +47,13 @@ enum stmmac_txbuf_type { }; struct stmmac_tx_info { + struct xsk_tx_metadata_compl xsk_meta; dma_addr_t buf; - bool map_as_page; unsigned len; + enum stmmac_txbuf_type buf_type; + bool map_as_page; bool last_segment; bool is_jumbo; - enum stmmac_txbuf_type buf_type; - struct xsk_tx_metadata_compl xsk_meta; }; #define STMMAC_TBS_AVAIL BIT(0) From a4b1590ee0f09b20cc8551b0c81f38e4739e93f2 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 14 Mar 2026 09:42:30 +0000 Subject: [PATCH 0612/1561] net: stmmac: helpers for filling tx_q->tx_skbuff_dma Add helpers to fill in the transmit queue metadata to ensure that all entries are initialised when preparing to transmit. This avoids clean up code running into surprises. For example, stmmac_clean_desc3() (which calls clean_desc3() in chain_mode.c or ring_mode.c) looks at the .last_segment, and in the latter case, .is_jumbo members. AI believes that there is a missing .buf_type assignment in stmmac_tso_xmit(), but this is a mis-analysis. AI believes that stmmac_tso_allocator() which would increment tx_q->cur_tx will be called within the loop when nfrags is zero, but it's failing to realise that none of the code within the for() loop will be executed. In any case, at the point where the loop exits, tx_q->tx_skbuff_dma[tx_q->cur_tx].buf_type has been correctly set via the call to stmmac_set_tx_skb_dma_entry() - either the one before the loop, or the one at the end of the loop block. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w1LVu-0000000DGRr-0Ni3@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 96 +++++++++++-------- 1 file changed, 54 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 124d7a00f9f0..daa6496c1a8e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1925,6 +1925,34 @@ err_init_rx_buffers: return ret; } +static void stmmac_set_tx_dma_entry(struct stmmac_tx_queue *tx_q, + unsigned int entry, + enum stmmac_txbuf_type type, + dma_addr_t addr, size_t len, + bool map_as_page) +{ + tx_q->tx_skbuff_dma[entry].buf = addr; + tx_q->tx_skbuff_dma[entry].len = len; + tx_q->tx_skbuff_dma[entry].buf_type = type; + tx_q->tx_skbuff_dma[entry].map_as_page = map_as_page; + tx_q->tx_skbuff_dma[entry].last_segment = false; + tx_q->tx_skbuff_dma[entry].is_jumbo = false; +} + +static void stmmac_set_tx_skb_dma_entry(struct stmmac_tx_queue *tx_q, + unsigned int entry, dma_addr_t addr, + size_t len, bool map_as_page) +{ + stmmac_set_tx_dma_entry(tx_q, entry, STMMAC_TXBUF_T_SKB, addr, len, + map_as_page); +} + +static void stmmac_set_tx_dma_last_segment(struct stmmac_tx_queue *tx_q, + unsigned int entry) +{ + tx_q->tx_skbuff_dma[entry].last_segment = true; +} + /** * __init_dma_tx_desc_rings - init the TX descriptor ring (per queue) * @priv: driver private structure @@ -1970,11 +1998,8 @@ static int __init_dma_tx_desc_rings(struct stmmac_priv *priv, p = tx_q->dma_tx + i; stmmac_clear_desc(priv, p); + stmmac_set_tx_skb_dma_entry(tx_q, i, 0, 0, false); - tx_q->tx_skbuff_dma[i].buf = 0; - tx_q->tx_skbuff_dma[i].map_as_page = false; - tx_q->tx_skbuff_dma[i].len = 0; - tx_q->tx_skbuff_dma[i].last_segment = false; tx_q->tx_skbuff[i] = NULL; } @@ -2695,19 +2720,15 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget) meta = xsk_buff_get_metadata(pool, xdp_desc.addr); xsk_buff_raw_dma_sync_for_device(pool, dma_addr, xdp_desc.len); - tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XSK_TX; - /* To return XDP buffer to XSK pool, we simple call * xsk_tx_completed(), so we don't need to fill up * 'buf' and 'xdpf'. */ - tx_q->tx_skbuff_dma[entry].buf = 0; - tx_q->xdpf[entry] = NULL; + stmmac_set_tx_dma_entry(tx_q, entry, STMMAC_TXBUF_T_XSK_TX, + 0, xdp_desc.len, false); + stmmac_set_tx_dma_last_segment(tx_q, entry); - tx_q->tx_skbuff_dma[entry].map_as_page = false; - tx_q->tx_skbuff_dma[entry].len = xdp_desc.len; - tx_q->tx_skbuff_dma[entry].last_segment = true; - tx_q->tx_skbuff_dma[entry].is_jumbo = false; + tx_q->xdpf[entry] = NULL; stmmac_set_desc_addr(priv, tx_desc, dma_addr); @@ -2882,6 +2903,9 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue, tx_q->tx_skbuff_dma[entry].map_as_page = false; } + /* This looks at tx_q->tx_skbuff_dma[tx_q->dirty_tx].is_jumbo + * and tx_q->tx_skbuff_dma[tx_q->dirty_tx].last_segment + */ stmmac_clean_desc3(priv, tx_q, p); tx_q->tx_skbuff_dma[entry].last_segment = false; @@ -4495,10 +4519,8 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) * this DMA buffer right after the DMA engine completely finishes the * full buffer transmission. */ - tx_q->tx_skbuff_dma[tx_q->cur_tx].buf = des; - tx_q->tx_skbuff_dma[tx_q->cur_tx].len = skb_headlen(skb); - tx_q->tx_skbuff_dma[tx_q->cur_tx].map_as_page = false; - tx_q->tx_skbuff_dma[tx_q->cur_tx].buf_type = STMMAC_TXBUF_T_SKB; + stmmac_set_tx_skb_dma_entry(tx_q, tx_q->cur_tx, des, skb_headlen(skb), + false); /* Prepare fragments */ for (i = 0; i < nfrags; i++) { @@ -4513,17 +4535,14 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) stmmac_tso_allocator(priv, des, skb_frag_size(frag), (i == nfrags - 1), queue); - tx_q->tx_skbuff_dma[tx_q->cur_tx].buf = des; - tx_q->tx_skbuff_dma[tx_q->cur_tx].len = skb_frag_size(frag); - tx_q->tx_skbuff_dma[tx_q->cur_tx].map_as_page = true; - tx_q->tx_skbuff_dma[tx_q->cur_tx].buf_type = STMMAC_TXBUF_T_SKB; + stmmac_set_tx_skb_dma_entry(tx_q, tx_q->cur_tx, des, + skb_frag_size(frag), true); } - tx_q->tx_skbuff_dma[tx_q->cur_tx].last_segment = true; + stmmac_set_tx_dma_last_segment(tx_q, tx_q->cur_tx); /* Only the last descriptor gets to point to the skb. */ tx_q->tx_skbuff[tx_q->cur_tx] = skb; - tx_q->tx_skbuff_dma[tx_q->cur_tx].buf_type = STMMAC_TXBUF_T_SKB; /* Manage tx mitigation */ tx_packets = CIRC_CNT(tx_q->cur_tx + 1, first_tx, @@ -4782,23 +4801,18 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) if (dma_mapping_error(priv->device, des)) goto dma_map_err; /* should reuse desc w/o issues */ - tx_q->tx_skbuff_dma[entry].buf = des; - + stmmac_set_tx_skb_dma_entry(tx_q, entry, des, len, true); stmmac_set_desc_addr(priv, desc, des); - tx_q->tx_skbuff_dma[entry].map_as_page = true; - tx_q->tx_skbuff_dma[entry].len = len; - tx_q->tx_skbuff_dma[entry].last_segment = last_segment; - tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_SKB; - /* Prepare the descriptor and set the own bit too */ stmmac_prepare_tx_desc(priv, desc, 0, len, csum_insertion, priv->mode, 1, last_segment, skb->len); } + stmmac_set_tx_dma_last_segment(tx_q, entry); + /* Only the last descriptor gets to point to the skb. */ tx_q->tx_skbuff[entry] = skb; - tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_SKB; /* According to the coalesce parameter the IC bit for the latest * segment is reset and the timer re-started to clean the tx status. @@ -4877,14 +4891,13 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) if (dma_mapping_error(priv->device, des)) goto dma_map_err; - tx_q->tx_skbuff_dma[first_entry].buf = des; - tx_q->tx_skbuff_dma[first_entry].buf_type = STMMAC_TXBUF_T_SKB; - tx_q->tx_skbuff_dma[first_entry].map_as_page = false; + stmmac_set_tx_skb_dma_entry(tx_q, first_entry, des, nopaged_len, + false); stmmac_set_desc_addr(priv, first, des); - tx_q->tx_skbuff_dma[first_entry].len = nopaged_len; - tx_q->tx_skbuff_dma[first_entry].last_segment = last_segment; + if (last_segment) + stmmac_set_tx_dma_last_segment(tx_q, first_entry); if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && priv->hwts_tx_en)) { @@ -5086,6 +5099,7 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue]; bool csum = !priv->plat->tx_queues_cfg[queue].coe_unsupported; unsigned int entry = tx_q->cur_tx; + enum stmmac_txbuf_type buf_type; struct dma_desc *tx_desc; dma_addr_t dma_addr; bool set_ic; @@ -5113,7 +5127,7 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, if (dma_mapping_error(priv->device, dma_addr)) return STMMAC_XDP_CONSUMED; - tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XDP_NDO; + buf_type = STMMAC_TXBUF_T_XDP_NDO; } else { struct page *page = virt_to_page(xdpf->data); @@ -5122,14 +5136,12 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, dma_sync_single_for_device(priv->device, dma_addr, xdpf->len, DMA_BIDIRECTIONAL); - tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XDP_TX; + buf_type = STMMAC_TXBUF_T_XDP_TX; } - tx_q->tx_skbuff_dma[entry].buf = dma_addr; - tx_q->tx_skbuff_dma[entry].map_as_page = false; - tx_q->tx_skbuff_dma[entry].len = xdpf->len; - tx_q->tx_skbuff_dma[entry].last_segment = true; - tx_q->tx_skbuff_dma[entry].is_jumbo = false; + stmmac_set_tx_dma_entry(tx_q, entry, buf_type, dma_addr, xdpf->len, + false); + stmmac_set_tx_dma_last_segment(tx_q, entry); tx_q->xdpf[entry] = xdpf; From ee19df4d3bb7a207db813aaa0b2f26a1f1841a69 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 14 Mar 2026 09:42:35 +0000 Subject: [PATCH 0613/1561] net: stmmac: clean up stmmac_clear_rx_descriptors() The two paths calling stmmac_init_rx_desc() are identical apart from the way the pointer to the descriptor is fetched. Split this out. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w1LVz-0000000DGRx-0v9B@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index daa6496c1a8e..0fed9808d679 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1514,20 +1514,20 @@ static void stmmac_clear_rx_descriptors(struct stmmac_priv *priv, u32 queue) { struct stmmac_rx_queue *rx_q = &dma_conf->rx_queue[queue]; + struct dma_desc *desc; int i; /* Clear the RX descriptors */ - for (i = 0; i < dma_conf->dma_rx_size; i++) + for (i = 0; i < dma_conf->dma_rx_size; i++) { if (priv->extend_desc) - stmmac_init_rx_desc(priv, &rx_q->dma_erx[i].basic, - priv->use_riwt, priv->mode, - (i == dma_conf->dma_rx_size - 1), - dma_conf->dma_buf_sz); + desc = &rx_q->dma_erx[i].basic; else - stmmac_init_rx_desc(priv, &rx_q->dma_rx[i], - priv->use_riwt, priv->mode, - (i == dma_conf->dma_rx_size - 1), - dma_conf->dma_buf_sz); + desc = &rx_q->dma_rx[i]; + + stmmac_init_rx_desc(priv, desc, priv->use_riwt, priv->mode, + (i == dma_conf->dma_rx_size - 1), + dma_conf->dma_buf_sz); + } } /** From ee1b6a94ba1246e505e2411080f23afb29e03425 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 14 Mar 2026 09:42:40 +0000 Subject: [PATCH 0614/1561] net: stmmac: add helper to get hardware receive descriptor Provide a helper to get a hardware receive descriptor that takes account of whether extended format is being used, but returning the base struct dma_desc pointer. This avoids multiple instances where this is open coded. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w1LW4-0000000DGS3-1J49@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 76 +++++++------------ 1 file changed, 26 insertions(+), 50 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 0fed9808d679..f447893a7ad1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -362,6 +362,16 @@ static inline u32 stmmac_tx_avail(struct stmmac_priv *priv, u32 queue) priv->dma_conf.dma_tx_size); } +static struct dma_desc *stmmac_get_rx_desc(struct stmmac_priv *priv, + struct stmmac_rx_queue *rx_q, + unsigned int index) +{ + if (priv->extend_desc) + return &rx_q->dma_erx[index].basic; + else + return &rx_q->dma_rx[index]; +} + /** * stmmac_rx_dirty - Get RX queue dirty * @priv: driver private structure @@ -1421,13 +1431,11 @@ static void stmmac_display_rx_rings(struct stmmac_priv *priv, pr_info("\tRX Queue %u rings\n", queue); - if (priv->extend_desc) { - head_rx = (void *)rx_q->dma_erx; + head_rx = stmmac_get_rx_desc(priv, rx_q, 0); + if (priv->extend_desc) desc_size = sizeof(struct dma_extended_desc); - } else { - head_rx = (void *)rx_q->dma_rx; + else desc_size = sizeof(struct dma_desc); - } /* Display RX ring */ stmmac_display_ring(priv, head_rx, dma_conf->dma_rx_size, true, @@ -1519,10 +1527,7 @@ static void stmmac_clear_rx_descriptors(struct stmmac_priv *priv, /* Clear the RX descriptors */ for (i = 0; i < dma_conf->dma_rx_size; i++) { - if (priv->extend_desc) - desc = &rx_q->dma_erx[i].basic; - else - desc = &rx_q->dma_rx[i]; + desc = stmmac_get_rx_desc(priv, rx_q, i); stmmac_init_rx_desc(priv, desc, priv->use_riwt, priv->mode, (i == dma_conf->dma_rx_size - 1), @@ -1731,10 +1736,7 @@ static int stmmac_alloc_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p; int ret; - if (priv->extend_desc) - p = &((rx_q->dma_erx + i)->basic); - else - p = rx_q->dma_rx + i; + p = stmmac_get_rx_desc(priv, rx_q, i); ret = stmmac_init_rx_buffers(priv, dma_conf, p, i, flags, queue); @@ -1789,10 +1791,7 @@ static int stmmac_alloc_rx_buffers_zc(struct stmmac_priv *priv, dma_addr_t dma_addr; struct dma_desc *p; - if (priv->extend_desc) - p = (struct dma_desc *)(rx_q->dma_erx + i); - else - p = rx_q->dma_rx + i; + p = stmmac_get_rx_desc(priv, rx_q, i); buf = &rx_q->buf_pool[i]; @@ -4978,10 +4977,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) struct dma_desc *p; bool use_rx_wd; - if (priv->extend_desc) - p = (struct dma_desc *)(rx_q->dma_erx + entry); - else - p = rx_q->dma_rx + entry; + p = stmmac_get_rx_desc(priv, rx_q, entry); if (!buf->page) { buf->page = page_pool_alloc_pages(rx_q->page_pool, gfp); @@ -5379,10 +5375,7 @@ static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget) } } - if (priv->extend_desc) - rx_desc = (struct dma_desc *)(rx_q->dma_erx + entry); - else - rx_desc = rx_q->dma_rx + entry; + rx_desc = stmmac_get_rx_desc(priv, rx_q, entry); dma_addr = xsk_buff_xdp_get_dma(buf->xdp); stmmac_set_desc_addr(priv, rx_desc, dma_addr); @@ -5440,14 +5433,12 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue) int status = 0; if (netif_msg_rx_status(priv)) { - void *rx_head; + void *rx_head = stmmac_get_rx_desc(priv, rx_q, 0); netdev_dbg(priv->dev, "%s: descriptor ring:\n", __func__); if (priv->extend_desc) { - rx_head = (void *)rx_q->dma_erx; desc_size = sizeof(struct dma_extended_desc); } else { - rx_head = (void *)rx_q->dma_rx; desc_size = sizeof(struct dma_desc); } @@ -5485,10 +5476,7 @@ read_again: dirty = 0; } - if (priv->extend_desc) - p = (struct dma_desc *)(rx_q->dma_erx + entry); - else - p = rx_q->dma_rx + entry; + p = stmmac_get_rx_desc(priv, rx_q, entry); /* read the status of the incoming frame */ status = stmmac_rx_status(priv, &priv->xstats, p); @@ -5501,10 +5489,7 @@ read_again: priv->dma_conf.dma_rx_size); next_entry = rx_q->cur_rx; - if (priv->extend_desc) - np = (struct dma_desc *)(rx_q->dma_erx + next_entry); - else - np = rx_q->dma_rx + next_entry; + np = stmmac_get_rx_desc(priv, rx_q, next_entry); prefetch(np); @@ -5640,16 +5625,13 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) limit = min(priv->dma_conf.dma_rx_size - 1, (unsigned int)limit); if (netif_msg_rx_status(priv)) { - void *rx_head; + void *rx_head = stmmac_get_rx_desc(priv, rx_q, 0); netdev_dbg(priv->dev, "%s: descriptor ring:\n", __func__); - if (priv->extend_desc) { - rx_head = (void *)rx_q->dma_erx; + if (priv->extend_desc) desc_size = sizeof(struct dma_extended_desc); - } else { - rx_head = (void *)rx_q->dma_rx; + else desc_size = sizeof(struct dma_desc); - } stmmac_display_ring(priv, rx_head, priv->dma_conf.dma_rx_size, true, rx_q->dma_rx_phy, desc_size); @@ -5682,10 +5664,7 @@ read_again: entry = next_entry; buf = &rx_q->buf_pool[entry]; - if (priv->extend_desc) - p = (struct dma_desc *)(rx_q->dma_erx + entry); - else - p = rx_q->dma_rx + entry; + p = stmmac_get_rx_desc(priv, rx_q, entry); /* read the status of the incoming frame */ status = stmmac_rx_status(priv, &priv->xstats, p); @@ -5697,10 +5676,7 @@ read_again: priv->dma_conf.dma_rx_size); next_entry = rx_q->cur_rx; - if (priv->extend_desc) - np = (struct dma_desc *)(rx_q->dma_erx + next_entry); - else - np = rx_q->dma_rx + next_entry; + np = stmmac_get_rx_desc(priv, rx_q, next_entry); prefetch(np); From 4f6280c450ba699e3b700f1cbc4ff8dbc70f3c96 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 14 Mar 2026 09:42:45 +0000 Subject: [PATCH 0615/1561] net: stmmac: add helper to get size of a receive descriptor Add and use a helper to get the size of the hardware receive descriptor. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w1LW9-0000000DGS9-1mzX@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f447893a7ad1..24b10db83a35 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -362,6 +362,14 @@ static inline u32 stmmac_tx_avail(struct stmmac_priv *priv, u32 queue) priv->dma_conf.dma_tx_size); } +static size_t stmmac_get_rx_desc_size(struct stmmac_priv *priv) +{ + if (priv->extend_desc) + return sizeof(struct dma_extended_desc); + else + return sizeof(struct dma_desc); +} + static struct dma_desc *stmmac_get_rx_desc(struct stmmac_priv *priv, struct stmmac_rx_queue *rx_q, unsigned int index) @@ -1432,10 +1440,7 @@ static void stmmac_display_rx_rings(struct stmmac_priv *priv, pr_info("\tRX Queue %u rings\n", queue); head_rx = stmmac_get_rx_desc(priv, rx_q, 0); - if (priv->extend_desc) - desc_size = sizeof(struct dma_extended_desc); - else - desc_size = sizeof(struct dma_desc); + desc_size = stmmac_get_rx_desc_size(priv); /* Display RX ring */ stmmac_display_ring(priv, head_rx, dma_conf->dma_rx_size, true, @@ -5436,11 +5441,7 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue) void *rx_head = stmmac_get_rx_desc(priv, rx_q, 0); netdev_dbg(priv->dev, "%s: descriptor ring:\n", __func__); - if (priv->extend_desc) { - desc_size = sizeof(struct dma_extended_desc); - } else { - desc_size = sizeof(struct dma_desc); - } + desc_size = stmmac_get_rx_desc_size(priv); stmmac_display_ring(priv, rx_head, priv->dma_conf.dma_rx_size, true, rx_q->dma_rx_phy, desc_size); @@ -5628,10 +5629,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) void *rx_head = stmmac_get_rx_desc(priv, rx_q, 0); netdev_dbg(priv->dev, "%s: descriptor ring:\n", __func__); - if (priv->extend_desc) - desc_size = sizeof(struct dma_extended_desc); - else - desc_size = sizeof(struct dma_desc); + desc_size = stmmac_get_rx_desc_size(priv); stmmac_display_ring(priv, rx_head, priv->dma_conf.dma_rx_size, true, rx_q->dma_rx_phy, desc_size); From e9d0cafa9d60738ab396aa4b68d7e40c0855b4db Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 14 Mar 2026 09:42:50 +0000 Subject: [PATCH 0616/1561] net: stmmac: add helper to set receive tail pointer Setting the queue receive tail pointer follows a common pattern: calculate the DMA address, and then call stmmac_set_rx_tail_ptr(). The only difference between all the call sites is the index used. Factor this out into a static function, and add a comment about why it only uses the normal descriptor size. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w1LWE-0000000DGSF-2Hbs@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 24b10db83a35..d4129e9a0fb6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -380,6 +380,18 @@ static struct dma_desc *stmmac_get_rx_desc(struct stmmac_priv *priv, return &rx_q->dma_rx[index]; } +static void stmmac_set_queue_rx_tail_ptr(struct stmmac_priv *priv, + struct stmmac_rx_queue *rx_q, + unsigned int chan, unsigned int index) +{ + /* This only needs to deal with normal descriptors as enhanced + * descriptiors are only supported with dwmac1000 (rx_tail_addr = rx_q->dma_rx_phy + index * sizeof(struct dma_desc); + stmmac_set_rx_tail_ptr(priv, priv->ioaddr, rx_q->rx_tail_addr, chan); +} + /** * stmmac_rx_dirty - Get RX queue dirty * @priv: driver private structure @@ -3302,11 +3314,8 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) stmmac_init_rx_chan(priv, priv->ioaddr, priv->plat->dma_cfg, rx_q->dma_rx_phy, chan); - rx_q->rx_tail_addr = rx_q->dma_rx_phy + - (rx_q->buf_alloc_num * - sizeof(struct dma_desc)); - stmmac_set_rx_tail_ptr(priv, priv->ioaddr, - rx_q->rx_tail_addr, chan); + stmmac_set_queue_rx_tail_ptr(priv, rx_q, chan, + rx_q->buf_alloc_num); } /* DMA TX Channel Configuration */ @@ -5023,9 +5032,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_rx_size); } rx_q->dirty_rx = entry; - rx_q->rx_tail_addr = rx_q->dma_rx_phy + - (rx_q->dirty_rx * sizeof(struct dma_desc)); - stmmac_set_rx_tail_ptr(priv, priv->ioaddr, rx_q->rx_tail_addr, queue); + stmmac_set_queue_rx_tail_ptr(priv, rx_q, queue, rx_q->dirty_rx); /* Wake up Rx DMA from the suspend state if required */ stmmac_enable_dma_reception(priv, priv->ioaddr, queue); } @@ -5405,9 +5412,7 @@ static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget) if (rx_desc) { rx_q->dirty_rx = entry; - rx_q->rx_tail_addr = rx_q->dma_rx_phy + - (rx_q->dirty_rx * sizeof(struct dma_desc)); - stmmac_set_rx_tail_ptr(priv, priv->ioaddr, rx_q->rx_tail_addr, queue); + stmmac_set_queue_rx_tail_ptr(priv, rx_q, queue, rx_q->dirty_rx); } return ret; @@ -6975,10 +6980,8 @@ void stmmac_enable_rx_queue(struct stmmac_priv *priv, u32 queue) stmmac_init_rx_chan(priv, priv->ioaddr, priv->plat->dma_cfg, rx_q->dma_rx_phy, rx_q->queue_index); - rx_q->rx_tail_addr = rx_q->dma_rx_phy + (rx_q->buf_alloc_num * - sizeof(struct dma_desc)); - stmmac_set_rx_tail_ptr(priv, priv->ioaddr, - rx_q->rx_tail_addr, rx_q->queue_index); + stmmac_set_queue_rx_tail_ptr(priv, rx_q, rx_q->queue_index, + rx_q->buf_alloc_num); if (rx_q->xsk_pool && rx_q->buf_alloc_num) { buf_size = xsk_pool_get_rx_frame_size(rx_q->xsk_pool); @@ -7129,11 +7132,8 @@ int stmmac_xdp_open(struct net_device *dev) stmmac_init_rx_chan(priv, priv->ioaddr, priv->plat->dma_cfg, rx_q->dma_rx_phy, chan); - rx_q->rx_tail_addr = rx_q->dma_rx_phy + - (rx_q->buf_alloc_num * - sizeof(struct dma_desc)); - stmmac_set_rx_tail_ptr(priv, priv->ioaddr, - rx_q->rx_tail_addr, chan); + stmmac_set_queue_rx_tail_ptr(priv, rx_q, chan, + rx_q->buf_alloc_num); if (rx_q->xsk_pool && rx_q->buf_alloc_num) { buf_size = xsk_pool_get_rx_frame_size(rx_q->xsk_pool); From bea37bda7f75c552e0ddddec2d04c3c7c6222208 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 14 Mar 2026 09:42:55 +0000 Subject: [PATCH 0617/1561] net: stmmac: remove rx_tail_addr There is only one place where rx_q->rx_tail_addr is used - the new stmmac_set_queue_rx_tail_ptr(). Make this a local variable and remove it from struct stmmac_rx_queue. This commit does not change the semantics - the hardware relies upon the descriptor ring not crossing a 4GiB boundary as the high address bits are programmed into a separate register via stmmac_init_rx_chan(). Hence, truncating the DMA address to 32-bit is fine as the register it will be programmed into is 32-bit, and the high bits are handled elsewhere. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w1LWJ-0000000DGSL-2jWd@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 1 - drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index a2dc167adb6c..2f24d5c584e6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -131,7 +131,6 @@ struct stmmac_rx_queue { unsigned int buf_alloc_num; unsigned int napi_skb_frag_size; dma_addr_t dma_rx_phy; - u32 rx_tail_addr; unsigned int state_saved; struct { struct sk_buff *skb; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d4129e9a0fb6..dafd3a3e9844 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -388,8 +388,9 @@ static void stmmac_set_queue_rx_tail_ptr(struct stmmac_priv *priv, * descriptiors are only supported with dwmac1000 (rx_tail_addr = rx_q->dma_rx_phy + index * sizeof(struct dma_desc); - stmmac_set_rx_tail_ptr(priv, priv->ioaddr, rx_q->rx_tail_addr, chan); + u32 rx_tail_addr = rx_q->dma_rx_phy + index * sizeof(struct dma_desc); + + stmmac_set_rx_tail_ptr(priv, priv->ioaddr, rx_tail_addr, chan); } /** From 0b3d090314372055e0c5544be1f781e988859198 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 14 Mar 2026 09:43:00 +0000 Subject: [PATCH 0618/1561] net: stmmac: use consistent tests for receive buffer size Two out of the three sites that set the receive buffer size (via stmmac_set_dma_bfsize()) check for rx_q->xsk_pool && rx_q->buf_alloc_num. One uses just rx_q->xsk_pool. Discussing with Yoong Siang Song, the conclusion is that stmmac_dma_operation_mode() is missing the rx_q->buf_alloc_num check. Add this check. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w1LWO-0000000DGSR-3CaB@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index dafd3a3e9844..b37bac337d6a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2612,7 +2612,7 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan, rxfifosz, qmode); - if (rx_q->xsk_pool) { + if (rx_q->xsk_pool && rx_q->buf_alloc_num) { buf_size = xsk_pool_get_rx_frame_size(rx_q->xsk_pool); stmmac_set_dma_bfsize(priv, priv->ioaddr, buf_size, From a32734d25d870e4b6d3a9de87dd6e82b27c6d914 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 14 Mar 2026 09:43:05 +0000 Subject: [PATCH 0619/1561] net: stmmac: add helper to set receive buffer size There are three sites that configure the hardware for the receive buffer size using the same logic to determine the buffer size. Add a helper so there is only one copy of this code. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w1LWT-0000000DGSX-3fxc@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 54 ++++++++----------- 1 file changed, 21 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b37bac337d6a..8a149fdc45d0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -393,6 +393,24 @@ static void stmmac_set_queue_rx_tail_ptr(struct stmmac_priv *priv, stmmac_set_rx_tail_ptr(priv, priv->ioaddr, rx_tail_addr, chan); } +static void stmmac_set_queue_rx_buf_size(struct stmmac_priv *priv, + struct stmmac_rx_queue *rx_q, + unsigned int chan) +{ + u32 buf_size; + + if (rx_q->xsk_pool && rx_q->buf_alloc_num) { + buf_size = xsk_pool_get_rx_frame_size(rx_q->xsk_pool); + stmmac_set_dma_bfsize(priv, priv->ioaddr, + buf_size, + rx_q->queue_index); + } else { + stmmac_set_dma_bfsize(priv, priv->ioaddr, + priv->dma_conf.dma_buf_sz, + rx_q->queue_index); + } +} + /** * stmmac_rx_dirty - Get RX queue dirty * @priv: driver private structure @@ -2605,23 +2623,13 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) /* configure all channels */ for (chan = 0; chan < rx_channels_count; chan++) { struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan]; - u32 buf_size; qmode = priv->plat->rx_queues_cfg[chan].mode_to_use; stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan, rxfifosz, qmode); - if (rx_q->xsk_pool && rx_q->buf_alloc_num) { - buf_size = xsk_pool_get_rx_frame_size(rx_q->xsk_pool); - stmmac_set_dma_bfsize(priv, priv->ioaddr, - buf_size, - chan); - } else { - stmmac_set_dma_bfsize(priv, priv->ioaddr, - priv->dma_conf.dma_buf_sz, - chan); - } + stmmac_set_queue_rx_buf_size(priv, rx_q, chan); } for (chan = 0; chan < tx_channels_count; chan++) { @@ -6959,7 +6967,6 @@ void stmmac_enable_rx_queue(struct stmmac_priv *priv, u32 queue) struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[queue]; struct stmmac_channel *ch = &priv->channel[queue]; unsigned long flags; - u32 buf_size; int ret; ret = __alloc_dma_rx_desc_resources(priv, &priv->dma_conf, queue); @@ -6984,16 +6991,7 @@ void stmmac_enable_rx_queue(struct stmmac_priv *priv, u32 queue) stmmac_set_queue_rx_tail_ptr(priv, rx_q, rx_q->queue_index, rx_q->buf_alloc_num); - if (rx_q->xsk_pool && rx_q->buf_alloc_num) { - buf_size = xsk_pool_get_rx_frame_size(rx_q->xsk_pool); - stmmac_set_dma_bfsize(priv, priv->ioaddr, - buf_size, - rx_q->queue_index); - } else { - stmmac_set_dma_bfsize(priv, priv->ioaddr, - priv->dma_conf.dma_buf_sz, - rx_q->queue_index); - } + stmmac_set_queue_rx_buf_size(priv, rx_q, rx_q->queue_index); stmmac_start_rx_dma(priv, queue); @@ -7096,7 +7094,6 @@ int stmmac_xdp_open(struct net_device *dev) u8 dma_csr_ch = max(rx_cnt, tx_cnt); struct stmmac_rx_queue *rx_q; struct stmmac_tx_queue *tx_q; - u32 buf_size; bool sph_en; u8 chan; int ret; @@ -7136,16 +7133,7 @@ int stmmac_xdp_open(struct net_device *dev) stmmac_set_queue_rx_tail_ptr(priv, rx_q, chan, rx_q->buf_alloc_num); - if (rx_q->xsk_pool && rx_q->buf_alloc_num) { - buf_size = xsk_pool_get_rx_frame_size(rx_q->xsk_pool); - stmmac_set_dma_bfsize(priv, priv->ioaddr, - buf_size, - rx_q->queue_index); - } else { - stmmac_set_dma_bfsize(priv, priv->ioaddr, - priv->dma_conf.dma_buf_sz, - rx_q->queue_index); - } + stmmac_set_queue_rx_buf_size(priv, rx_q, chan); stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan); } From 337191f37748ee9cf986c8154d32209241495d8b Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 14 Mar 2026 09:43:10 +0000 Subject: [PATCH 0620/1561] net: stmmac: simplify stmmac_set_queue_rx_buf_size() Clean up the new stmmac_set_queue_rx_buf_size() to simplify the code. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w1LWY-0000000DGSd-49H6@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 8a149fdc45d0..dacca3189a63 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -399,16 +399,12 @@ static void stmmac_set_queue_rx_buf_size(struct stmmac_priv *priv, { u32 buf_size; - if (rx_q->xsk_pool && rx_q->buf_alloc_num) { + if (rx_q->xsk_pool && rx_q->buf_alloc_num) buf_size = xsk_pool_get_rx_frame_size(rx_q->xsk_pool); - stmmac_set_dma_bfsize(priv, priv->ioaddr, - buf_size, - rx_q->queue_index); - } else { - stmmac_set_dma_bfsize(priv, priv->ioaddr, - priv->dma_conf.dma_buf_sz, - rx_q->queue_index); - } + else + buf_size = priv->dma_conf.dma_buf_sz; + + stmmac_set_dma_bfsize(priv, priv->ioaddr, buf_size, rx_q->queue_index); } /** From c493261de1c4c295c7dab8e59e58069a14d47e42 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 14 Mar 2026 09:43:16 +0000 Subject: [PATCH 0621/1561] net: stmmac: add helper to get hardware transmit descriptor Provide a helper to get the hardware transmit descriptor that takes account of whether extended format and TBS are being used, returning the base struct dma_desc pointer. This avoids multiple instances where these tests are open coded. We need to update dwmac4_display_ring() to cope the passed head pointer always pointing at the struct dma_desc by using dma_desc_to_edesc() to convert it to struct dma_edesc. This is the only stmmac_display_ring() implementation that this affects. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w1LWe-0000000DGSj-0KtE@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/descs.h | 2 + .../ethernet/stmicro/stmmac/dwmac4_descs.c | 2 +- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 87 ++++++------------- 3 files changed, 28 insertions(+), 63 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/descs.h b/drivers/net/ethernet/stmicro/stmmac/descs.h index e62e2ebcf273..846007cc245f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/descs.h +++ b/drivers/net/ethernet/stmicro/stmmac/descs.h @@ -173,6 +173,8 @@ struct dma_edesc { struct dma_desc basic; }; +#define dma_desc_to_edesc(x) container_of(x, struct dma_edesc, basic) + /* Transmit checksum insertion control */ #define TX_CIC_FULL 3 /* Include IP header and pseudoheader */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 1bbf02504dad..d5c003f3fbbc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -427,7 +427,7 @@ static void dwmac4_display_ring(void *head, unsigned int size, bool rx, extp++; } } else if (desc_size == sizeof(struct dma_edesc)) { - struct dma_edesc *ep = (struct dma_edesc *)head; + struct dma_edesc *ep = dma_desc_to_edesc(head); for (i = 0; i < size; i++) { dma_addr = dma_rx_phy + i * sizeof(*ep); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index dacca3189a63..419ac4cac94e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -362,6 +362,18 @@ static inline u32 stmmac_tx_avail(struct stmmac_priv *priv, u32 queue) priv->dma_conf.dma_tx_size); } +static struct dma_desc *stmmac_get_tx_desc(struct stmmac_priv *priv, + struct stmmac_tx_queue *tx_q, + unsigned int index) +{ + if (priv->extend_desc) + return &tx_q->dma_etx[index].basic; + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + return &tx_q->dma_entx[index].basic; + else + return &tx_q->dma_tx[index]; +} + static size_t stmmac_get_rx_desc_size(struct stmmac_priv *priv) { if (priv->extend_desc) @@ -1489,16 +1501,14 @@ static void stmmac_display_tx_rings(struct stmmac_priv *priv, pr_info("\tTX Queue %d rings\n", queue); - if (priv->extend_desc) { - head_tx = (void *)tx_q->dma_etx; + if (priv->extend_desc) desc_size = sizeof(struct dma_extended_desc); - } else if (tx_q->tbs & STMMAC_TBS_AVAIL) { - head_tx = (void *)tx_q->dma_entx; + else if (tx_q->tbs & STMMAC_TBS_AVAIL) desc_size = sizeof(struct dma_edesc); - } else { - head_tx = (void *)tx_q->dma_tx; + else desc_size = sizeof(struct dma_desc); - } + + head_tx = stmmac_get_tx_desc(priv, tx_q, 0); stmmac_display_ring(priv, head_tx, dma_conf->dma_tx_size, false, tx_q->dma_tx_phy, desc_size); @@ -1587,13 +1597,7 @@ static void stmmac_clear_tx_descriptors(struct stmmac_priv *priv, int last = (i == (dma_conf->dma_tx_size - 1)); struct dma_desc *p; - if (priv->extend_desc) - p = &tx_q->dma_etx[i].basic; - else if (tx_q->tbs & STMMAC_TBS_AVAIL) - p = &tx_q->dma_entx[i].basic; - else - p = &tx_q->dma_tx[i]; - + p = stmmac_get_tx_desc(priv, tx_q, i); stmmac_init_tx_desc(priv, p, priv->mode, last); } } @@ -2021,13 +2025,7 @@ static int __init_dma_tx_desc_rings(struct stmmac_priv *priv, for (i = 0; i < dma_conf->dma_tx_size; i++) { struct dma_desc *p; - if (priv->extend_desc) - p = &((tx_q->dma_etx + i)->basic); - else if (tx_q->tbs & STMMAC_TBS_AVAIL) - p = &((tx_q->dma_entx + i)->basic); - else - p = tx_q->dma_tx + i; - + p = stmmac_get_tx_desc(priv, tx_q, i); stmmac_clear_desc(priv, p); stmmac_set_tx_skb_dma_entry(tx_q, i, 0, 0, false); @@ -2730,13 +2728,7 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget) continue; } - if (likely(priv->extend_desc)) - tx_desc = (struct dma_desc *)(tx_q->dma_etx + entry); - else if (tx_q->tbs & STMMAC_TBS_AVAIL) - tx_desc = &tx_q->dma_entx[entry].basic; - else - tx_desc = tx_q->dma_tx + entry; - + tx_desc = stmmac_get_tx_desc(priv, tx_q, entry); dma_addr = xsk_buff_raw_get_dma(pool, xdp_desc.addr); meta = xsk_buff_get_metadata(pool, xdp_desc.addr); xsk_buff_raw_dma_sync_for_device(pool, dma_addr, xdp_desc.len); @@ -2863,13 +2855,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue, skb = NULL; } - if (priv->extend_desc) - p = (struct dma_desc *)(tx_q->dma_etx + entry); - else if (tx_q->tbs & STMMAC_TBS_AVAIL) - p = &tx_q->dma_entx[entry].basic; - else - p = tx_q->dma_tx + entry; - + p = stmmac_get_tx_desc(priv, tx_q, entry); status = stmmac_tx_status(priv, &priv->xstats, p, priv->ioaddr); /* Check if the descriptor is owned by the DMA */ if (unlikely(status & tx_dma_own)) @@ -4776,13 +4762,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) csum_insertion = !csum_insertion; } - if (likely(priv->extend_desc)) - desc = (struct dma_desc *)(tx_q->dma_etx + entry); - else if (tx_q->tbs & STMMAC_TBS_AVAIL) - desc = &tx_q->dma_entx[entry].basic; - else - desc = tx_q->dma_tx + entry; - + desc = stmmac_get_tx_desc(priv, tx_q, entry); first = desc; if (has_vlan) @@ -4807,12 +4787,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_tx_size); WARN_ON(tx_q->tx_skbuff[entry]); - if (likely(priv->extend_desc)) - desc = (struct dma_desc *)(tx_q->dma_etx + entry); - else if (tx_q->tbs & STMMAC_TBS_AVAIL) - desc = &tx_q->dma_entx[entry].basic; - else - desc = tx_q->dma_tx + entry; + desc = stmmac_get_tx_desc(priv, tx_q, entry); des = skb_frag_dma_map(priv->device, frag, 0, len, DMA_TO_DEVICE); @@ -4853,13 +4828,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) set_ic = false; if (set_ic) { - if (likely(priv->extend_desc)) - desc = &tx_q->dma_etx[entry].basic; - else if (tx_q->tbs & STMMAC_TBS_AVAIL) - desc = &tx_q->dma_entx[entry].basic; - else - desc = &tx_q->dma_tx[entry]; - + desc = stmmac_get_tx_desc(priv, tx_q, entry); tx_q->tx_count_frames = 0; stmmac_set_tx_ic(priv, desc); } @@ -5127,13 +5096,7 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, return STMMAC_XDP_CONSUMED; } - if (likely(priv->extend_desc)) - tx_desc = (struct dma_desc *)(tx_q->dma_etx + entry); - else if (tx_q->tbs & STMMAC_TBS_AVAIL) - tx_desc = &tx_q->dma_entx[entry].basic; - else - tx_desc = tx_q->dma_tx + entry; - + tx_desc = stmmac_get_tx_desc(priv, tx_q, entry); if (dma_map) { dma_addr = dma_map_single(priv->device, xdpf->data, xdpf->len, DMA_TO_DEVICE); From 2c3525ad09e937e58ad6c78b9257af7d41bf3ab0 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 14 Mar 2026 09:43:21 +0000 Subject: [PATCH 0622/1561] net: stmmac: add helper to get size of a transmit descriptor Add and use a helper to get the size of the hardware transmit descriptor. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w1LWj-0000000DGSp-0lhO@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 40 +++++++------------ 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 419ac4cac94e..ff4f844476bf 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -362,6 +362,17 @@ static inline u32 stmmac_tx_avail(struct stmmac_priv *priv, u32 queue) priv->dma_conf.dma_tx_size); } +static size_t stmmac_get_tx_desc_size(struct stmmac_priv *priv, + struct stmmac_tx_queue *tx_q) +{ + if (priv->extend_desc) + return sizeof(struct dma_extended_desc); + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + return sizeof(struct dma_edesc); + else + return sizeof(struct dma_desc); +} + static struct dma_desc *stmmac_get_tx_desc(struct stmmac_priv *priv, struct stmmac_tx_queue *tx_q, unsigned int index) @@ -1501,14 +1512,8 @@ static void stmmac_display_tx_rings(struct stmmac_priv *priv, pr_info("\tTX Queue %d rings\n", queue); - if (priv->extend_desc) - desc_size = sizeof(struct dma_extended_desc); - else if (tx_q->tbs & STMMAC_TBS_AVAIL) - desc_size = sizeof(struct dma_edesc); - else - desc_size = sizeof(struct dma_desc); - head_tx = stmmac_get_tx_desc(priv, tx_q, 0); + desc_size = stmmac_get_tx_desc_size(priv, tx_q); stmmac_display_ring(priv, head_tx, dma_conf->dma_tx_size, false, tx_q->dma_tx_phy, desc_size); @@ -2186,17 +2191,14 @@ static void __free_dma_tx_desc_resources(struct stmmac_priv *priv, dma_free_tx_skbufs(priv, dma_conf, queue); if (priv->extend_desc) { - size = sizeof(struct dma_extended_desc); addr = tx_q->dma_etx; } else if (tx_q->tbs & STMMAC_TBS_AVAIL) { - size = sizeof(struct dma_edesc); addr = tx_q->dma_entx; } else { - size = sizeof(struct dma_desc); addr = tx_q->dma_tx; } - size *= dma_conf->dma_tx_size; + size = stmmac_get_tx_desc_size(priv, tx_q) * dma_conf->dma_tx_size; dma_free_coherent(priv->device, size, addr, tx_q->dma_tx_phy); @@ -2358,14 +2360,7 @@ static int __alloc_dma_tx_desc_resources(struct stmmac_priv *priv, if (!tx_q->tx_skbuff) return -ENOMEM; - if (priv->extend_desc) - size = sizeof(struct dma_extended_desc); - else if (tx_q->tbs & STMMAC_TBS_AVAIL) - size = sizeof(struct dma_edesc); - else - size = sizeof(struct dma_desc); - - size *= dma_conf->dma_tx_size; + size = stmmac_get_tx_desc_size(priv, tx_q) * dma_conf->dma_tx_size; addr = dma_alloc_coherent(priv->device, size, &tx_q->dma_tx_phy, GFP_KERNEL); @@ -4357,12 +4352,7 @@ static void stmmac_flush_tx_descriptors(struct stmmac_priv *priv, int queue) struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue]; int desc_size; - if (likely(priv->extend_desc)) - desc_size = sizeof(struct dma_extended_desc); - else if (tx_q->tbs & STMMAC_TBS_AVAIL) - desc_size = sizeof(struct dma_edesc); - else - desc_size = sizeof(struct dma_desc); + desc_size = stmmac_get_tx_desc_size(priv, tx_q); /* The own bit must be the latest setting done when prepare the * descriptor and then barrier is needed to make sure that From b2fd52d90b8a1daeb6d44583bada7baad82b3c98 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 14 Mar 2026 09:43:26 +0000 Subject: [PATCH 0623/1561] net: stmmac: add helper to set transmit tail pointer Setting the queue transmit tail pointer follows a common pattern: calculate the DMA address, and then call stmmac_set_tx_tail_ptr(). The only difference between all the call sites is the index used. Factor this out into a static function. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w1LWo-0000000DGSw-1MCe@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ff4f844476bf..dc7b19063564 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -385,6 +385,18 @@ static struct dma_desc *stmmac_get_tx_desc(struct stmmac_priv *priv, return &tx_q->dma_tx[index]; } +static void stmmac_set_queue_tx_tail_ptr(struct stmmac_priv *priv, + struct stmmac_tx_queue *tx_q, + unsigned int chan, unsigned int index) +{ + int desc_size; + + desc_size = stmmac_get_tx_desc_size(priv, tx_q); + + tx_q->tx_tail_addr = tx_q->dma_tx_phy + index * desc_size; + stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, chan); +} + static size_t stmmac_get_rx_desc_size(struct stmmac_priv *priv) { if (priv->extend_desc) @@ -3311,9 +3323,7 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) stmmac_init_tx_chan(priv, priv->ioaddr, priv->plat->dma_cfg, tx_q->dma_tx_phy, chan); - tx_q->tx_tail_addr = tx_q->dma_tx_phy; - stmmac_set_tx_tail_ptr(priv, priv->ioaddr, - tx_q->tx_tail_addr, chan); + stmmac_set_queue_tx_tail_ptr(priv, tx_q, chan, 0); } return ret; @@ -4350,9 +4360,6 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, dma_addr_t des, static void stmmac_flush_tx_descriptors(struct stmmac_priv *priv, int queue) { struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue]; - int desc_size; - - desc_size = stmmac_get_tx_desc_size(priv, tx_q); /* The own bit must be the latest setting done when prepare the * descriptor and then barrier is needed to make sure that @@ -4360,8 +4367,7 @@ static void stmmac_flush_tx_descriptors(struct stmmac_priv *priv, int queue) */ wmb(); - tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * desc_size); - stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue); + stmmac_set_queue_tx_tail_ptr(priv, tx_q, queue, tx_q->cur_tx); } /** @@ -6991,9 +6997,7 @@ void stmmac_enable_tx_queue(struct stmmac_priv *priv, u32 queue) if (tx_q->tbs & STMMAC_TBS_AVAIL) stmmac_enable_tbs(priv, priv->ioaddr, 1, tx_q->queue_index); - tx_q->tx_tail_addr = tx_q->dma_tx_phy; - stmmac_set_tx_tail_ptr(priv, priv->ioaddr, - tx_q->tx_tail_addr, tx_q->queue_index); + stmmac_set_queue_tx_tail_ptr(priv, tx_q, tx_q->queue_index, 0); stmmac_start_tx_dma(priv, queue); @@ -7094,9 +7098,7 @@ int stmmac_xdp_open(struct net_device *dev) stmmac_init_tx_chan(priv, priv->ioaddr, priv->plat->dma_cfg, tx_q->dma_tx_phy, chan); - tx_q->tx_tail_addr = tx_q->dma_tx_phy; - stmmac_set_tx_tail_ptr(priv, priv->ioaddr, - tx_q->tx_tail_addr, chan); + stmmac_set_queue_tx_tail_ptr(priv, tx_q, chan, 0); hrtimer_setup(&tx_q->txtimer, stmmac_tx_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); } From 0da7809235d9cc34de71c9c5bf5028cc3034f806 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 14 Mar 2026 09:43:31 +0000 Subject: [PATCH 0624/1561] net: stmmac: remove tx_tail_addr There is only one place where tx_q->tx_tail_addr is used - the new stmmac_set_queue_tx_tail_ptr(). Make this a local variable and remove it from struct stmmac_tx_queue. This commit does not change the semantics - the hardware relies upon the descriptor ring not crossing a 4GiB boundary as the high address bits are programmed into a separate register via stmmac_init_tx_chan(). Hence, truncating the DMA address to 32-bit is fine as the register it will be programmed into is 32-bit, and the high bits are handled elsewhere. Also change the type of desc_size to size_t, as this variable is initialised from sizeof(). Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w1LWt-0000000DGT2-1oeO@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 1 - drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 7 ++++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 2f24d5c584e6..b9d849a3f06e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -79,7 +79,6 @@ struct stmmac_tx_queue { unsigned int cur_tx; unsigned int dirty_tx; dma_addr_t dma_tx_phy; - dma_addr_t tx_tail_addr; u32 mss; }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index dc7b19063564..f096d52a1672 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -389,12 +389,13 @@ static void stmmac_set_queue_tx_tail_ptr(struct stmmac_priv *priv, struct stmmac_tx_queue *tx_q, unsigned int chan, unsigned int index) { - int desc_size; + size_t desc_size; + u32 tx_tail_addr; desc_size = stmmac_get_tx_desc_size(priv, tx_q); - tx_q->tx_tail_addr = tx_q->dma_tx_phy + index * desc_size; - stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, chan); + tx_tail_addr = tx_q->dma_tx_phy + index * desc_size; + stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_tail_addr, chan); } static size_t stmmac_get_rx_desc_size(struct stmmac_priv *priv) From 8cde3c87039b2a468d7e39ed6927403ce42fe55e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 14 Mar 2026 09:43:36 +0000 Subject: [PATCH 0625/1561] net: stmmac: use queue rather than ->queue_index We use a lot of ->queue_index where we already have the queue / channel index (which are actually the same index), which has been used to get the queue struct. Since queue and queue_index are identical in priv->(tx|rx)_queue[queue]->queue_index there is no point using the queue_index where we already have queue. Use queue rather than queue_index. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w1LWy-0000000DGT8-2Hoc@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f096d52a1672..0a933aac3f03 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -440,7 +440,7 @@ static void stmmac_set_queue_rx_buf_size(struct stmmac_priv *priv, else buf_size = priv->dma_conf.dma_buf_sz; - stmmac_set_dma_bfsize(priv, priv->ioaddr, buf_size, rx_q->queue_index); + stmmac_set_dma_bfsize(priv, priv->ioaddr, buf_size, chan); } /** @@ -1902,7 +1902,7 @@ static int __init_dma_rx_desc_rings(struct stmmac_priv *priv, NULL)); netdev_info(priv->dev, "Register MEM_TYPE_XSK_BUFF_POOL RxQ-%d\n", - rx_q->queue_index); + queue); xsk_pool_set_rxq_info(rx_q->xsk_pool, &rx_q->xdp_rxq); } else { WARN_ON(xdp_rxq_info_reg_mem_model(&rx_q->xdp_rxq, @@ -1910,7 +1910,7 @@ static int __init_dma_rx_desc_rings(struct stmmac_priv *priv, rx_q->page_pool)); netdev_info(priv->dev, "Register MEM_TYPE_PAGE_POOL RxQ-%d\n", - rx_q->queue_index); + queue); } if (rx_q->xsk_pool) { @@ -2310,9 +2310,7 @@ static int __alloc_dma_rx_desc_resources(struct stmmac_priv *priv, else napi_id = ch->rx_napi.napi_id; - ret = xdp_rxq_info_reg(&rx_q->xdp_rxq, priv->dev, - rx_q->queue_index, - napi_id); + ret = xdp_rxq_info_reg(&rx_q->xdp_rxq, priv->dev, queue, napi_id); if (ret) { netdev_err(priv->dev, "Failed to register xdp rxq info\n"); return -EINVAL; @@ -3340,7 +3338,7 @@ static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue) if (!tx_coal_timer) return; - ch = &priv->channel[tx_q->queue_index]; + ch = &priv->channel[queue]; napi = tx_q->xsk_pool ? &ch->rxtx_napi : &ch->tx_napi; /* Arm timer only if napi is not already scheduled. @@ -6942,12 +6940,11 @@ void stmmac_enable_rx_queue(struct stmmac_priv *priv, u32 queue) stmmac_clear_rx_descriptors(priv, &priv->dma_conf, queue); stmmac_init_rx_chan(priv, priv->ioaddr, priv->plat->dma_cfg, - rx_q->dma_rx_phy, rx_q->queue_index); + rx_q->dma_rx_phy, queue); - stmmac_set_queue_rx_tail_ptr(priv, rx_q, rx_q->queue_index, - rx_q->buf_alloc_num); + stmmac_set_queue_rx_tail_ptr(priv, rx_q, queue, rx_q->buf_alloc_num); - stmmac_set_queue_rx_buf_size(priv, rx_q, rx_q->queue_index); + stmmac_set_queue_rx_buf_size(priv, rx_q, queue); stmmac_start_rx_dma(priv, queue); @@ -6993,12 +6990,12 @@ void stmmac_enable_tx_queue(struct stmmac_priv *priv, u32 queue) stmmac_clear_tx_descriptors(priv, &priv->dma_conf, queue); stmmac_init_tx_chan(priv, priv->ioaddr, priv->plat->dma_cfg, - tx_q->dma_tx_phy, tx_q->queue_index); + tx_q->dma_tx_phy, queue); if (tx_q->tbs & STMMAC_TBS_AVAIL) - stmmac_enable_tbs(priv, priv->ioaddr, 1, tx_q->queue_index); + stmmac_enable_tbs(priv, priv->ioaddr, 1, queue); - stmmac_set_queue_tx_tail_ptr(priv, tx_q, tx_q->queue_index, 0); + stmmac_set_queue_tx_tail_ptr(priv, tx_q, queue, 0); stmmac_start_tx_dma(priv, queue); From 17a55ddb19567642aa404f5525be3cf4425c6c1a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 13 Mar 2026 16:20:47 -0700 Subject: [PATCH 0626/1561] tools: ynl: rework policy access to support recursion Donald points out that the current naive implementation using dicts breaks if policy is recursive (child nest uses policy idx already used by its parent). Lean more into the NlPolicy class. This lets us "render" the policy on demand, when user accesses it. If someone wants to do an infinite walk that's on them :) Show policy info as attributes of the class and use dict format to descend into sub-policies for extra neatness. Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20260313232047.2068518-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/cli.py | 4 +- tools/net/ynl/pyynl/lib/ynl.py | 152 ++++++++++++++++------- tools/testing/selftests/net/nl_nlctrl.py | 26 ++-- 3 files changed, 120 insertions(+), 62 deletions(-) diff --git a/tools/net/ynl/pyynl/cli.py b/tools/net/ynl/pyynl/cli.py index fc9e84754e4b..8275a806cf73 100755 --- a/tools/net/ynl/pyynl/cli.py +++ b/tools/net/ynl/pyynl/cli.py @@ -313,11 +313,11 @@ def main(): if args.policy: if args.do: pol = ynl.get_policy(args.do, 'do') - output(pol.attrs if pol else None) + output(pol.to_dict() if pol else None) args.do = None if args.dump: pol = ynl.get_policy(args.dump, 'dump') - output(pol.attrs if pol else None) + output(pol.to_dict() if pol else None) args.dump = None if args.ntf: diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py index 0eedeee465d8..9c078599cea0 100644 --- a/tools/net/ynl/pyynl/lib/ynl.py +++ b/tools/net/ynl/pyynl/lib/ynl.py @@ -143,32 +143,117 @@ class ConfigError(Exception): pass -# pylint: disable=too-few-public-methods class NlPolicy: """Kernel policy for one mode (do or dump) of one operation. - Returned by YnlFamily.get_policy(). Contains a dict of attributes - the kernel accepts, with their validation constraints. + Returned by YnlFamily.get_policy(). Attributes of the policy + are accessible as attributes of the object. Nested policies + can be accessed indexing the object like a dictionary:: - Attributes: - attrs: dict mapping attribute names to policy dicts, e.g. - page-pool-stats-get do policy:: + pol = ynl.get_policy('page-pool-stats-get', 'do') + pol['info'].type # 'nested' + pol['info']['id'].type # 'uint' + pol['info']['id'].min_value # 1 - { - 'info': {'type': 'nested', 'policy': { - 'id': {'type': 'uint', 'min-value': 1, - 'max-value': 4294967295}, - 'ifindex': {'type': 'u32', 'min-value': 1, - 'max-value': 2147483647}, - }}, - } + Each policy entry always has a 'type' attribute (e.g. u32, string, + nested). Optional attributes depending on the 'type': min-value, + max-value, min-length, max-length, mask. - Each policy dict always contains 'type' (e.g. u32, string, - nested). Optional keys: min-value, max-value, min-length, - max-length, mask, policy. + Policies can form infinite nesting loops. These loops are trimmed + when policy is converted to a dict with pol.to_dict(). """ - def __init__(self, attrs): - self.attrs = attrs + def __init__(self, ynl, policy_idx, policy_table, attr_set, props=None): + self._policy_idx = policy_idx + self._policy_table = policy_table + self._ynl = ynl + self._props = props or {} + self._entries = {} + self._cache = {} + if policy_idx is not None and policy_idx in policy_table: + for attr_id, decoded in policy_table[policy_idx].items(): + if attr_set and attr_id in attr_set.attrs_by_val: + spec = attr_set.attrs_by_val[attr_id] + name = spec['name'] + else: + spec = None + name = f'attr-{attr_id}' + self._entries[name] = (spec, decoded) + + def __getitem__(self, name): + """Descend into a nested policy by attribute name.""" + if name not in self._cache: + spec, decoded = self._entries[name] + props = dict(decoded) + child_idx = None + child_set = None + if 'policy-idx' in props: + child_idx = props.pop('policy-idx') + if spec and 'nested-attributes' in spec.yaml: + child_set = self._ynl.attr_sets[spec.yaml['nested-attributes']] + self._cache[name] = NlPolicy(self._ynl, child_idx, + self._policy_table, + child_set, props) + return self._cache[name] + + def __getattr__(self, name): + """Access this policy entry's own properties (type, min-value, etc.). + + Underscores in the name are converted to dashes, so that + pol.min_value looks up "min-value". + """ + key = name.replace('_', '-') + try: + # Hack for level-0 which we still want to have .type but we don't + # want type to pointlessly show up in the dict / JSON form. + if not self._props and name == "type": + return "nested" + return self._props[key] + except KeyError: + raise AttributeError(name) + + def get(self, name, default=None): + """Look up a child policy entry by attribute name, with a default.""" + try: + return self[name] + except KeyError: + return default + + def __contains__(self, name): + return name in self._entries + + def __len__(self): + return len(self._entries) + + def __iter__(self): + return iter(self._entries) + + def keys(self): + """Return attribute names accepted by this policy.""" + return self._entries.keys() + + def to_dict(self, seen=None): + """Convert to a plain dict, suitable for JSON serialization. + + Nested NlPolicy objects are expanded recursively. Cyclic + references are trimmed (resolved to just {"type": "nested"}). + """ + if seen is None: + seen = set() + result = dict(self._props) + if self._policy_idx is not None: + if self._policy_idx not in seen: + seen = seen | {self._policy_idx} + children = {} + for name in self: + children[name] = self[name].to_dict(seen) + if self._props: + result['policy'] = children + else: + result = children + return result + + def __repr__(self): + return repr(self.to_dict()) class NlAttr: @@ -1308,28 +1393,6 @@ class YnlFamily(SpecFamily): def do_multi(self, ops): return self._ops(ops) - def _resolve_policy(self, policy_idx, policy_table, attr_set): - attrs = {} - if policy_idx not in policy_table: - return attrs - for attr_id, decoded in policy_table[policy_idx].items(): - if attr_set and attr_id in attr_set.attrs_by_val: - spec = attr_set.attrs_by_val[attr_id] - name = spec['name'] - else: - spec = None - name = f'attr-{attr_id}' - if 'policy-idx' in decoded: - sub_set = None - if spec and 'nested-attributes' in spec.yaml: - sub_set = self.attr_sets[spec.yaml['nested-attributes']] - nested = self._resolve_policy(decoded['policy-idx'], - policy_table, sub_set) - del decoded['policy-idx'] - decoded['policy'] = nested - attrs[name] = decoded - return attrs - def get_policy(self, op_name, mode): """Query running kernel for the Netlink policy of an operation. @@ -1341,8 +1404,8 @@ class YnlFamily(SpecFamily): mode: 'do' or 'dump' Returns: - NlPolicy with an attrs dict mapping attribute names to - their policy properties (type, min/max, nested, etc.), + NlPolicy acting as a read-only dict mapping attribute names + to their policy properties (type, min/max, nested, etc.), or None if the operation has no policy for the given mode. Empty policy usually implies that the operation rejects all attributes. @@ -1353,5 +1416,4 @@ class YnlFamily(SpecFamily): if mode not in op_policy: return None policy_idx = op_policy[mode] - attrs = self._resolve_policy(policy_idx, policy_table, op.attr_set) - return NlPolicy(attrs) + return NlPolicy(self, policy_idx, policy_table, op.attr_set) diff --git a/tools/testing/selftests/net/nl_nlctrl.py b/tools/testing/selftests/net/nl_nlctrl.py index d19e206c2c02..fe1f66dc9435 100755 --- a/tools/testing/selftests/net/nl_nlctrl.py +++ b/tools/testing/selftests/net/nl_nlctrl.py @@ -68,39 +68,35 @@ def getpolicy_dump(_ctrl) -> None: # dev-get: do has a real policy with ifindex, dump has no policy # (only the reject-all policy with maxattr=0) pol = ndev.get_policy('dev-get', 'do') - ksft_in('ifindex', pol.attrs, - comment="dev-get do policy should have ifindex") - ksft_eq(pol.attrs.get('ifindex', {}).get('type'), 'u32') + ksft_in('ifindex', pol, comment="dev-get do policy should have ifindex") + ksft_eq(pol['ifindex'].type, 'u32') pol_dump = ndev.get_policy('dev-get', 'dump') - ksft_eq(len(pol_dump.attrs), 0, - comment="dev-get should not accept any attrs") + ksft_eq(len(pol_dump), 0, comment="dev-get should not accept any attrs") # napi-get: both do and dump have real policies pol_do = ndev.get_policy('napi-get', 'do') - ksft_ge(len(pol_do.attrs), 1) + ksft_ge(len(pol_do), 1) pol_dump = ndev.get_policy('napi-get', 'dump') - ksft_ge(len(pol_dump.attrs), 1) + ksft_ge(len(pol_dump), 1) # -- ethtool (full ops) -- et = EthtoolFamily() # strset-get (has both do and dump, full ops share policy) pol_do = et.get_policy('strset-get', 'do') - ksft_ge(len(pol_do.attrs), 1, comment="strset-get should have a do policy") + ksft_ge(len(pol_do), 1, comment="strset-get should have a do policy") pol_dump = et.get_policy('strset-get', 'dump') - ksft_ge(len(pol_dump.attrs), 1, - comment="strset-get should have a dump policy") + ksft_ge(len(pol_dump), 1, comment="strset-get should have a dump policy") # Same policy means same attribute names - ksft_eq(set(pol_do.attrs.keys()), set(pol_dump.attrs.keys())) + ksft_eq(set(pol_do.keys()), set(pol_dump.keys())) # linkinfo-set is do-only (SET command), no dump pol_do = et.get_policy('linkinfo-set', 'do') - ksft_ge(len(pol_do.attrs), 1, - comment="linkinfo-set should have a do policy") + ksft_ge(len(pol_do), 1, comment="linkinfo-set should have a do policy") pol_dump = et.get_policy('linkinfo-set', 'dump') ksft_eq(pol_dump, None, @@ -113,9 +109,9 @@ def getpolicy_by_op(_ctrl) -> None: # dev-get do policy should have named attributes from the spec pol = ndev.get_policy('dev-get', 'do') - ksft_ge(len(pol.attrs), 1) + ksft_ge(len(pol), 1) # All attr names should be resolved (no 'attr-N' fallbacks) - for name in pol.attrs: + for name in pol: ksft_true(not name.startswith('attr-'), comment=f"unresolved attr name: {name}") From 46906242fe6103c81af59c541be2ab2902d7a335 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 17 Mar 2026 09:02:45 +0100 Subject: [PATCH 0627/1561] dt-bindings: net: micrel: Sort lists Sort lists of PHY models and compatible values alphabetically. Signed-off-by: Geert Uytterhoeven Reviewed-by: Stefan Eichenberger Acked-by: Conor Dooley Link: https://patch.msgid.link/3877a8bca7e4c13119387870d10b0758274fa6a0.1773734298.git.geert+renesas@glider.be Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/micrel.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/micrel.yaml b/Documentation/devicetree/bindings/net/micrel.yaml index ecc00169ef80..5d25f0d0a508 100644 --- a/Documentation/devicetree/bindings/net/micrel.yaml +++ b/Documentation/devicetree/bindings/net/micrel.yaml @@ -51,9 +51,9 @@ properties: bits that are currently supported: KSZ8001: register 0x1e, bits 15..14 - KSZ8041: register 0x1e, bits 15..14 KSZ8021: register 0x1f, bits 5..4 KSZ8031: register 0x1f, bits 5..4 + KSZ8041: register 0x1e, bits 15..14 KSZ8051: register 0x1f, bits 5..4 KSZ8081: register 0x1f, bits 5..4 KSZ8091: register 0x1f, bits 5..4 @@ -80,9 +80,9 @@ allOf: contains: enum: - ethernet-phy-id0022.1510 + - ethernet-phy-id0022.1550 - ethernet-phy-id0022.1555 - ethernet-phy-id0022.1556 - - ethernet-phy-id0022.1550 - ethernet-phy-id0022.1560 - ethernet-phy-id0022.161a then: From 22214fb2fad04d23c40598ec9be21ea638d75841 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 17 Mar 2026 09:02:46 +0100 Subject: [PATCH 0628/1561] dt-bindings: net: micrel: KSZ8041RNLI supports LED mode Micrel KSZ8041RNLI supports LED mode, just like KSZ8041. This fixes (a.o.) the following "make dtbs_check" warning: arch/arm/boot/dts/renesas/r8a7791-koelsch.dtb: ethernet-phy@1 (ethernet-phy-id0022.1537): False schema does not allow 1 from schema $id: http://devicetree.org/schemas/net/micrel.yaml Signed-off-by: Geert Uytterhoeven Reviewed-by: Stefan Eichenberger Acked-by: Conor Dooley Link: https://patch.msgid.link/efad6c7e024b3a9aa2882db65909ee5bbbcbdc45.1773734298.git.geert+renesas@glider.be Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/micrel.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/net/micrel.yaml b/Documentation/devicetree/bindings/net/micrel.yaml index 5d25f0d0a508..6fa568057b92 100644 --- a/Documentation/devicetree/bindings/net/micrel.yaml +++ b/Documentation/devicetree/bindings/net/micrel.yaml @@ -54,6 +54,7 @@ properties: KSZ8021: register 0x1f, bits 5..4 KSZ8031: register 0x1f, bits 5..4 KSZ8041: register 0x1e, bits 15..14 + KSZ8041RNLI: register 0x1e, bits 15..14 KSZ8051: register 0x1f, bits 5..4 KSZ8081: register 0x1f, bits 5..4 KSZ8091: register 0x1f, bits 5..4 @@ -80,6 +81,7 @@ allOf: contains: enum: - ethernet-phy-id0022.1510 + - ethernet-phy-id0022.1537 - ethernet-phy-id0022.1550 - ethernet-phy-id0022.1555 - ethernet-phy-id0022.1556 From d347b28c492e746c0db9439265e55a50a912e034 Mon Sep 17 00:00:00 2001 From: Nimrod Oren Date: Tue, 17 Mar 2026 12:49:34 +0200 Subject: [PATCH 0629/1561] net/mlx5e: Add hds-thresh query support via ethtool Add support for reporting HDS (Header-Data Split) threshold via ethtool. When applicable, mlx5 hardware splits packets of all sizes with no configurable threshold, so report both hds-thresh and hds-thresh-max as 0 (i.e. always split regardless of size). Signed-off-by: Nimrod Oren Reviewed-by: Carolina Jubran Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Joe Damato Link: https://patch.msgid.link/20260317104934.16124-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 4a8dc85d5924..bb61e2179078 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -371,6 +371,9 @@ void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv, param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; param->rx_pending = 1 << priv->channels.params.log_rq_mtu_frames; param->tx_pending = 1 << priv->channels.params.log_sq_size; + + kernel_param->hds_thresh = 0; + kernel_param->hds_thresh_max = 0; } static void mlx5e_get_ringparam(struct net_device *dev, @@ -2735,7 +2738,8 @@ const struct ethtool_ops mlx5e_ethtool_ops = { ETHTOOL_COALESCE_USE_ADAPTIVE | ETHTOOL_COALESCE_USE_CQE, .supported_input_xfrm = RXH_XFRM_SYM_OR_XOR, - .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT, + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT | + ETHTOOL_RING_USE_HDS_THRS, .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, .get_link_ext_state = mlx5e_get_link_ext_state, From e611a97032f0fa484d14656acf4a330448bf21a3 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 15 Mar 2026 17:48:43 +0100 Subject: [PATCH 0630/1561] regmap: mdio: make it depend on PHYLIB MDIO-based regmap is the last user of config symbol MDIO_BUS. MDIO access needs a MII bus, which requires PHYLIB for the provider part. Therefore make REGMAP_MDIO depend on PHYLIB, what allows to remove config symbol MDIO_BUS in a follow-up patch. Note: After c5a219395b4e ("regmap: Move selecting for REGMAP_MDIO and REGMAP_IRQ") switching to "depends on" should be fine, w/o risk of a circular dependency. Signed-off-by: Heiner Kallweit Acked-by: Mark Brown Link: https://patch.msgid.link/a21a3b3e-272e-4c61-986e-48a2cb3421d9@gmail.com Signed-off-by: Jakub Kicinski --- drivers/base/regmap/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/regmap/Kconfig b/drivers/base/regmap/Kconfig index 06d7eb2aac14..3c76a5bf67b7 100644 --- a/drivers/base/regmap/Kconfig +++ b/drivers/base/regmap/Kconfig @@ -56,7 +56,7 @@ config REGMAP_W1 config REGMAP_MDIO tristate - select MDIO_BUS + depends on PHYLIB config REGMAP_MMIO tristate From 91283bd5b008d5cd2b6a24f246ffb3e706b8c981 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 15 Mar 2026 17:50:00 +0100 Subject: [PATCH 0631/1561] net: phy: remove Kconfig symbol MDIO_BUS After usage of config symbol MDIO_BUS has been removed from REGMAP_MIO as last user, the symbol can be removed. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/9cdf83e9-470d-45da-8efe-ace0decf0204@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/Kconfig | 6 ------ drivers/net/phy/Makefile | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 7b73332a13d9..b5ee338b620d 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -3,11 +3,6 @@ # PHY Layer Configuration # -config MDIO_BUS - tristate "MDIO bus consumer layer" - help - MDIO bus consumer layer - config PHYLINK tristate select PHYLIB @@ -19,7 +14,6 @@ config PHYLINK menuconfig PHYLIB tristate "PHY Device support and infrastructure" - select MDIO_BUS help Ethernet controllers are usually attached to PHY devices. This option provides infrastructure for diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 8d262b4e2be2..05e4878af27a 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -15,7 +15,7 @@ libphy-$(CONFIG_SWPHY) += swphy.o libphy-$(CONFIG_LED_TRIGGER_PHY) += phy_led_triggers.o libphy-$(CONFIG_OPEN_ALLIANCE_HELPERS) += open_alliance_helpers.o -obj-$(CONFIG_MDIO_BUS) += mdio_bus.o +obj-$(CONFIG_PHYLIB) += mdio_bus.o obj-$(CONFIG_PHYLINK) += phylink.o obj-$(CONFIG_PHYLIB) += libphy.o obj-$(CONFIG_PHYLIB) += mdio_devres.o From 9c6b4009da5991db6bf02a47a578885a04a5e3f6 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 16 Mar 2026 11:04:03 +0100 Subject: [PATCH 0632/1561] net: mdio-gpio: remove linux/mdio-gpio.h The three defines from the linux/mdio-gpio.h header are only used in the mdio-gpio module. There's no reason to have them in a public header. Move them into the driver and remove mdio-gpio.h. Signed-off-by: Bartosz Golaszewski Link: https://patch.msgid.link/20260316-gpio-mdio-hdr-cleanup-v1-1-2df696f74728@oss.qualcomm.com Signed-off-by: Jakub Kicinski --- drivers/net/mdio/mdio-gpio.c | 5 ++++- include/linux/mdio-gpio.h | 9 --------- 2 files changed, 4 insertions(+), 10 deletions(-) delete mode 100644 include/linux/mdio-gpio.h diff --git a/drivers/net/mdio/mdio-gpio.c b/drivers/net/mdio/mdio-gpio.c index 1cfd538b5105..c99310889896 100644 --- a/drivers/net/mdio/mdio-gpio.c +++ b/drivers/net/mdio/mdio-gpio.c @@ -20,13 +20,16 @@ #include #include #include -#include #include #include #include #include #include +#define MDIO_GPIO_MDC 0 +#define MDIO_GPIO_MDIO 1 +#define MDIO_GPIO_MDO 2 + struct mdio_gpio_info { struct mdiobb_ctrl ctrl; struct gpio_desc *mdc, *mdio, *mdo; diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h deleted file mode 100644 index cea443a672cb..000000000000 --- a/include/linux/mdio-gpio.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_MDIO_GPIO_H -#define __LINUX_MDIO_GPIO_H - -#define MDIO_GPIO_MDC 0 -#define MDIO_GPIO_MDIO 1 -#define MDIO_GPIO_MDO 2 - -#endif From 356d4fbcf3defaff0f98d2b6b54f3b26f0ff189d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 16 Mar 2026 11:04:04 +0100 Subject: [PATCH 0633/1561] net: mdio-gpio: remove linux/platform_data/mdio-gpio.h Nobody defines struct mdio_gpio_platform_data. Remove platform data support from mdio-gpio and drop the header. Signed-off-by: Bartosz Golaszewski Link: https://patch.msgid.link/20260316-gpio-mdio-hdr-cleanup-v1-2-2df696f74728@oss.qualcomm.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 - drivers/net/mdio/mdio-gpio.c | 7 ------- include/linux/platform_data/mdio-gpio.h | 14 -------------- 3 files changed, 22 deletions(-) delete mode 100644 include/linux/platform_data/mdio-gpio.h diff --git a/MAINTAINERS b/MAINTAINERS index 5d477fd592db..7d65f9435950 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9547,7 +9547,6 @@ F: include/linux/phy_fixed.h F: include/linux/phy_link_topology.h F: include/linux/phylib_stubs.h F: include/linux/platform_data/mdio-bcm-unimac.h -F: include/linux/platform_data/mdio-gpio.h F: include/net/phy/ F: include/trace/events/mdio.h F: include/uapi/linux/mdio.h diff --git a/drivers/net/mdio/mdio-gpio.c b/drivers/net/mdio/mdio-gpio.c index c99310889896..958d1c6608ab 100644 --- a/drivers/net/mdio/mdio-gpio.c +++ b/drivers/net/mdio/mdio-gpio.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -113,7 +112,6 @@ static struct mii_bus *mdio_gpio_bus_init(struct device *dev, struct mdio_gpio_info *bitbang, int bus_id) { - struct mdio_gpio_platform_data *pdata = dev_get_platdata(dev); struct mii_bus *new_bus; bitbang->ctrl.ops = &mdio_gpio_ops; @@ -130,11 +128,6 @@ static struct mii_bus *mdio_gpio_bus_init(struct device *dev, else strscpy(new_bus->id, "gpio", sizeof(new_bus->id)); - if (pdata) { - new_bus->phy_mask = pdata->phy_mask; - new_bus->phy_ignore_ta_mask = pdata->phy_ignore_ta_mask; - } - if (device_is_compatible(dev, "microchip,mdio-smi0")) { bitbang->ctrl.op_c22_read = 0; bitbang->ctrl.op_c22_write = 0; diff --git a/include/linux/platform_data/mdio-gpio.h b/include/linux/platform_data/mdio-gpio.h deleted file mode 100644 index 13874fa6e767..000000000000 --- a/include/linux/platform_data/mdio-gpio.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * MDIO-GPIO bus platform data structure - */ - -#ifndef __LINUX_MDIO_GPIO_PDATA_H -#define __LINUX_MDIO_GPIO_PDATA_H - -struct mdio_gpio_platform_data { - u32 phy_mask; - u32 phy_ignore_ta_mask; -}; - -#endif /* __LINUX_MDIO_GPIO_PDATA_H */ From f87ca3b905e2226b31510de1f53b5df6dd0f80aa Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Mon, 16 Mar 2026 15:36:06 +0200 Subject: [PATCH 0634/1561] net/mlx5: Move crosststamp setup into helper function Move the crosststamp registration logic into a dedicated helper, mlx5_init_crosststamp(). This prepares the code for a follow-up patch around PTM handling. Signed-off-by: Carolina Jubran Reviewed-by: Shahar Shitrit Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260316133607.8738-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/lib/clock.c | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index bd4e042077af..3322814819ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -1301,6 +1301,24 @@ static void mlx5_init_timer_max_freq_adjustment(struct mlx5_core_dev *mdev) min(S32_MAX, 1 << log_max_freq_adjustment); } +static void mlx5_init_crosststamp(struct mlx5_core_dev *mdev, + bool expose_cycles, struct mlx5_clock *clock) +{ +#if defined(CONFIG_X86) + if (!boot_cpu_has(X86_FEATURE_ART)) + return; + + if (!MLX5_CAP_MCAM_REG3(mdev, mtptm) || + !MLX5_CAP_MCAM_REG3(mdev, mtctr)) + return; + + clock->ptp_info.getcrosststamp = mlx5_ptp_getcrosststamp; + if (expose_cycles) + clock->ptp_info.getcrosscycles = mlx5_ptp_getcrosscycles; + +#endif /* CONFIG_X86 */ +} + static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev) { struct mlx5_clock *clock = mdev->clock; @@ -1315,15 +1333,7 @@ static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev) expose_cycles = !MLX5_CAP_GEN(mdev, disciplined_fr_counter) || !mlx5_real_time_mode(mdev); -#ifdef CONFIG_X86 - if (MLX5_CAP_MCAM_REG3(mdev, mtptm) && - MLX5_CAP_MCAM_REG3(mdev, mtctr) && boot_cpu_has(X86_FEATURE_ART)) { - clock->ptp_info.getcrosststamp = mlx5_ptp_getcrosststamp; - if (expose_cycles) - clock->ptp_info.getcrosscycles = - mlx5_ptp_getcrosscycles; - } -#endif /* CONFIG_X86 */ + mlx5_init_crosststamp(mdev, expose_cycles, clock); if (expose_cycles) clock->ptp_info.getcyclesx64 = mlx5_ptp_getcyclesx; From 96aca5efec8a737057f31cd61586a4bb62af9829 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Mon, 16 Mar 2026 15:36:07 +0200 Subject: [PATCH 0635/1561] net/mlx5: Support cross-timestamping on ARM architectures Extend cross-timestamp support for ARM systems that implement the ARM architected timer. Signed-off-by: Carolina Jubran Reviewed-by: Shahar Shitrit Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260316133607.8738-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/lib/clock.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 3322814819ea..d785f1b4f2e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -38,10 +38,10 @@ #include "lib/eq.h" #include "en.h" #include "clock.h" -#ifdef CONFIG_X86 +#if defined(CONFIG_X86) || defined(CONFIG_ARM_ARCH_TIMER) #include #include -#endif /* CONFIG_X86 */ +#endif /* CONFIG_X86 || CONFIG_ARM_ARCH_TIMER */ #define MLX5_RT_CLOCK_IDENTITY_SIZE MLX5_FLD_SZ_BYTES(mrtcq_reg, rt_clock_identity) @@ -229,7 +229,7 @@ static int mlx5_set_mtutc(struct mlx5_core_dev *dev, u32 *mtutc, u32 size) MLX5_REG_MTUTC, 0, 1); } -#ifdef CONFIG_X86 +#if defined(CONFIG_X86) || defined(CONFIG_ARM_ARCH_TIMER) static bool mlx5_is_ptm_source_time_available(struct mlx5_core_dev *dev) { u32 out[MLX5_ST_SZ_DW(mtptm_reg)] = {0}; @@ -275,7 +275,8 @@ static int mlx5_mtctr_read(struct mlx5_core_dev *mdev, host = MLX5_GET64(mtctr_reg, out, first_clock_timestamp); *sys_counterval = (struct system_counterval_t) { .cycles = host, - .cs_id = CSID_X86_ART, + .cs_id = IS_ENABLED(CONFIG_X86) ? CSID_X86_ART : + CSID_ARM_ARCH_COUNTER, .use_nsecs = true, }; *device = MLX5_GET64(mtctr_reg, out, second_clock_timestamp); @@ -373,7 +374,7 @@ unlock: mlx5_clock_unlock(clock); return err; } -#endif /* CONFIG_X86 */ +#endif /* CONFIG_X86 || CONFIG_ARM_ARCH_TIMER */ static u64 mlx5_read_time(struct mlx5_core_dev *dev, struct ptp_system_timestamp *sts, @@ -1307,7 +1308,8 @@ static void mlx5_init_crosststamp(struct mlx5_core_dev *mdev, #if defined(CONFIG_X86) if (!boot_cpu_has(X86_FEATURE_ART)) return; - +#endif /* CONFIG_X86 */ +#if defined(CONFIG_X86) || defined(CONFIG_ARM_ARCH_TIMER) if (!MLX5_CAP_MCAM_REG3(mdev, mtptm) || !MLX5_CAP_MCAM_REG3(mdev, mtctr)) return; @@ -1316,7 +1318,7 @@ static void mlx5_init_crosststamp(struct mlx5_core_dev *mdev, if (expose_cycles) clock->ptp_info.getcrosscycles = mlx5_ptp_getcrosscycles; -#endif /* CONFIG_X86 */ +#endif /* CONFIG_X86 || CONFIG_ARM_ARCH_TIMER */ } static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev) From d5516452a362aab2c136ab815967c4417c92d228 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 16 Mar 2026 15:14:53 -0700 Subject: [PATCH 0636/1561] qed: Reimplement qed_mcast_bin_from_mac() using library functions The calculation done by qed_calc_crc32c() is the standard least-significant-bit-first CRC-32C except it uses most-significant-bit-first order for the actual CRC variable. That is equivalent to bit-reflecting the input and output CRC. Replace it with equivalent calls to the corresponding library functions. Tested with a simple userspace program which tested that the old and new implementations of qed_mcast_bin_from_mac() produce the same outputs. Signed-off-by: Eric Biggers Link: https://patch.msgid.link/20260316221453.66078-1-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 53 +++++------------------- 1 file changed, 11 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 0f6905e33df6..2d873a596261 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -39,7 +40,6 @@ #include "qed_sriov.h" #define QED_MAX_SGES_NUM 16 -#define CRC32_POLY 0x1edc6f41 struct qed_l2_info { u32 queues; @@ -1412,50 +1412,19 @@ int qed_sp_eth_filter_ucast(struct qed_hwfn *p_hwfn, return 0; } -/******************************************************************************* - * Description: - * Calculates crc 32 on a buffer - * Note: crc32_length MUST be aligned to 8 - * Return: - ******************************************************************************/ -static u32 qed_calc_crc32c(u8 *crc32_packet, - u32 crc32_length, u32 crc32_seed, u8 complement) -{ - u32 byte = 0, bit = 0, crc32_result = crc32_seed; - u8 msb = 0, current_byte = 0; - - if ((!crc32_packet) || - (crc32_length == 0) || - ((crc32_length % 8) != 0)) - return crc32_result; - for (byte = 0; byte < crc32_length; byte++) { - current_byte = crc32_packet[byte]; - for (bit = 0; bit < 8; bit++) { - msb = (u8)(crc32_result >> 31); - crc32_result = crc32_result << 1; - if (msb != (0x1 & (current_byte >> bit))) { - crc32_result = crc32_result ^ CRC32_POLY; - crc32_result |= 1; /*crc32_result[0] = 1;*/ - } - } - } - return crc32_result; -} - -static u32 qed_crc32c_le(u32 seed, u8 *mac, u32 len) -{ - u32 packet_buf[2] = { 0 }; - - memcpy((u8 *)(&packet_buf[0]), &mac[0], 6); - return qed_calc_crc32c((u8 *)packet_buf, 8, seed, 0); -} - u8 qed_mcast_bin_from_mac(u8 *mac) { - u32 crc = qed_crc32c_le(ETH_MULTICAST_BIN_FROM_MAC_SEED, - mac, ETH_ALEN); + u8 padded_mac[8] = {}; + u32 crc; - return crc & 0xff; + memcpy(padded_mac, mac, ETH_ALEN); + /* + * This uses the standard CRC-32C, but with the input and output CRCs + * bit-reflected and the bit-reflected output CRC truncated to 8 bits. + */ + crc = crc32c(bitrev32(ETH_MULTICAST_BIN_FROM_MAC_SEED), padded_mac, + sizeof(padded_mac)); + return bitrev8(crc >> 24); } static int From 3883c2b5091016da2b23ba365f296310f1042321 Mon Sep 17 00:00:00 2001 From: Bobby Eshleman Date: Mon, 16 Mar 2026 17:56:15 -0700 Subject: [PATCH 0637/1561] selftests/vsock: auto-detect kernel for guest VMs When running vmtest.sh inside a nested VM the running kernel may not be installed on the filesystem at the standard /boot/ or /usr/lib/modules/ paths. Previously, this would cause vng to fail with "does not exist" since it could not find the kernel image. Instead, this patch uses --dry-run to detect if the kernel is available. If not, then we fall back to the kernel in the kernel source tree. If that fails, then we die. This way runners, like NIPA, can use vng --run arch/x86/boot/bzImage to setup an outer VM, and vmtest.sh will still do the right thing setting up the inner VM. Due to job control issues in vng, a workaround is used to prevent 'make kselftest TARGETS=vsock' from hanging until test timeout. A PR has been placed upstream to solve the issue in vng: https://github.com/arighi/virtme-ng/pull/453 Signed-off-by: Bobby Eshleman Link: https://patch.msgid.link/20260316-vsock-vmtest-autodetect-kernel-v2-1-5eec7b4831f8@meta.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/vsock/vmtest.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh index 86e338886b33..9eacb753238a 100755 --- a/tools/testing/selftests/vsock/vmtest.sh +++ b/tools/testing/selftests/vsock/vmtest.sh @@ -415,6 +415,19 @@ terminate_pids() { done } +vng_dry_run() { + # WORKAROUND: use setsid to work around a virtme-ng bug where vng hangs + # when called from a background process group (e.g., under make + # kselftest). vng save/restores terminal settings using tcsetattr(), + # which is not allowed for background process groups because the + # controlling terminal is owned by the foreground process group. vng is + # stopped with SIGTTOU and hangs until kselftest's timer expires. + # setsid works around this by launching vng in a new session that has + # no controlling terminal, so tcsetattr() succeeds. + + setsid -w vng --run "$@" --dry-run &>/dev/null +} + vm_start() { local pidfile=$1 local ns=$2 @@ -441,6 +454,12 @@ vm_start() { if [[ "${BUILD}" -eq 1 ]]; then kernel_opt="${KERNEL_CHECKOUT}" + elif vng_dry_run; then + kernel_opt="" + elif vng_dry_run "${KERNEL_CHECKOUT}"; then + kernel_opt="${KERNEL_CHECKOUT}" + else + die "No suitable kernel found" fi if [[ "${ns}" != "init_ns" ]]; then From 04cd075557e8f68e709d69ede0a8daedee3813f9 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 17 Mar 2026 12:45:48 +0200 Subject: [PATCH 0638/1561] net/mlx5e: Remove unused field in mlx5e_flow_steering struct Not used in mlx5e, clean it up. Signed-off-by: Saeed Mahameed Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Reviewed-by: Joe Damato Link: https://patch.msgid.link/20260317104548.15697-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 9352e2183312..55255fe6e415 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -47,7 +47,6 @@ struct mlx5e_flow_steering { bool state_destroy; bool vlan_strip_disable; struct mlx5_core_dev *mdev; - struct net_device *netdev; struct mlx5_flow_namespace *ns; struct mlx5_flow_namespace *egress_ns; #ifdef CONFIG_MLX5_EN_RXNFC From dc3d720e12f602059490c1ab2bfee84a7465998f Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Tue, 17 Mar 2026 12:18:05 -0700 Subject: [PATCH 0639/1561] net: ethtool: add ethtool COALESCE_RX_CQE_FRAMES/NSECS Add two parameters for drivers supporting Rx CQE coalescing / descriptor writeback. ETHTOOL_A_COALESCE_RX_CQE_FRAMES: Maximum number of frames that can be coalesced into a CQE or writeback. ETHTOOL_A_COALESCE_RX_CQE_NSECS: Max time in nanoseconds after the first packet arrival in a coalesced CQE or writeback to be sent. Signed-off-by: Haiyang Zhang Link: https://patch.msgid.link/20260317191826.1346111-2-haiyangz@linux.microsoft.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 8 ++++++++ Documentation/networking/ethtool-netlink.rst | 11 +++++++++++ include/linux/ethtool.h | 6 +++++- include/uapi/linux/ethtool_netlink_generated.h | 2 ++ net/ethtool/coalesce.c | 14 +++++++++++++- 5 files changed, 39 insertions(+), 2 deletions(-) diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index a05b5425b76a..5dd4d1b5d94b 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -857,6 +857,12 @@ attribute-sets: name: tx-profile type: nest nested-attributes: profile + - + name: rx-cqe-frames + type: u32 + - + name: rx-cqe-nsecs + type: u32 - name: pause-stat @@ -2253,6 +2259,8 @@ operations: - tx-aggr-time-usecs - rx-profile - tx-profile + - rx-cqe-frames + - rx-cqe-nsecs dump: *coalesce-get-op - name: coalesce-set diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 32179168eb73..e92abf45faf5 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1076,6 +1076,8 @@ Kernel response contents: ``ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS`` u32 time (us), aggr, Tx ``ETHTOOL_A_COALESCE_RX_PROFILE`` nested profile of DIM, Rx ``ETHTOOL_A_COALESCE_TX_PROFILE`` nested profile of DIM, Tx + ``ETHTOOL_A_COALESCE_RX_CQE_FRAMES`` u32 max packets, Rx CQE + ``ETHTOOL_A_COALESCE_RX_CQE_NSECS`` u32 delay (ns), Rx CQE =========================================== ====== ======================= Attributes are only included in reply if their value is not zero or the @@ -1109,6 +1111,13 @@ well with frequent small-sized URBs transmissions. to DIM parameters, see `Generic Network Dynamic Interrupt Moderation (Net DIM) `_. +Rx CQE coalescing allows multiple received packets to be coalesced into a +single Completion Queue Entry (CQE) or descriptor writeback. +``ETHTOOL_A_COALESCE_RX_CQE_FRAMES`` describes the maximum number of +frames that can be coalesced into a CQE or writeback. +``ETHTOOL_A_COALESCE_RX_CQE_NSECS`` describes max time in nanoseconds after +the first packet arrival in a coalesced CQE or writeback to be sent. + COALESCE_SET ============ @@ -1147,6 +1156,8 @@ Request contents: ``ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS`` u32 time (us), aggr, Tx ``ETHTOOL_A_COALESCE_RX_PROFILE`` nested profile of DIM, Rx ``ETHTOOL_A_COALESCE_TX_PROFILE`` nested profile of DIM, Tx + ``ETHTOOL_A_COALESCE_RX_CQE_FRAMES`` u32 max packets, Rx CQE + ``ETHTOOL_A_COALESCE_RX_CQE_NSECS`` u32 delay (ns), Rx CQE =========================================== ====== ======================= Request is rejected if it attributes declared as unsupported by driver (i.e. diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 83c375840835..656d465bcd06 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -332,6 +332,8 @@ struct kernel_ethtool_coalesce { u32 tx_aggr_max_bytes; u32 tx_aggr_max_frames; u32 tx_aggr_time_usecs; + u32 rx_cqe_frames; + u32 rx_cqe_nsecs; }; /** @@ -380,7 +382,9 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, #define ETHTOOL_COALESCE_TX_AGGR_TIME_USECS BIT(26) #define ETHTOOL_COALESCE_RX_PROFILE BIT(27) #define ETHTOOL_COALESCE_TX_PROFILE BIT(28) -#define ETHTOOL_COALESCE_ALL_PARAMS GENMASK(28, 0) +#define ETHTOOL_COALESCE_RX_CQE_FRAMES BIT(29) +#define ETHTOOL_COALESCE_RX_CQE_NSECS BIT(30) +#define ETHTOOL_COALESCE_ALL_PARAMS GENMASK(30, 0) #define ETHTOOL_COALESCE_USECS \ (ETHTOOL_COALESCE_RX_USECS | ETHTOOL_COALESCE_TX_USECS) diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 114b83017297..8134baf7860f 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -371,6 +371,8 @@ enum { ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, ETHTOOL_A_COALESCE_RX_PROFILE, ETHTOOL_A_COALESCE_TX_PROFILE, + ETHTOOL_A_COALESCE_RX_CQE_FRAMES, + ETHTOOL_A_COALESCE_RX_CQE_NSECS, __ETHTOOL_A_COALESCE_CNT, ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1) diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c index 3e18ca1ccc5e..349bb02c517a 100644 --- a/net/ethtool/coalesce.c +++ b/net/ethtool/coalesce.c @@ -118,6 +118,8 @@ static int coalesce_reply_size(const struct ethnl_req_info *req_base, nla_total_size(sizeof(u32)) + /* _TX_AGGR_MAX_BYTES */ nla_total_size(sizeof(u32)) + /* _TX_AGGR_MAX_FRAMES */ nla_total_size(sizeof(u32)) + /* _TX_AGGR_TIME_USECS */ + nla_total_size(sizeof(u32)) + /* _RX_CQE_FRAMES */ + nla_total_size(sizeof(u32)) + /* _RX_CQE_NSECS */ total_modersz * 2; /* _{R,T}X_PROFILE */ } @@ -269,7 +271,11 @@ static int coalesce_fill_reply(struct sk_buff *skb, coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, kcoal->tx_aggr_max_frames, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, - kcoal->tx_aggr_time_usecs, supported)) + kcoal->tx_aggr_time_usecs, supported) || + coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RX_CQE_FRAMES, + kcoal->rx_cqe_frames, supported) || + coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RX_CQE_NSECS, + kcoal->rx_cqe_nsecs, supported)) return -EMSGSIZE; if (!req_base->dev || !req_base->dev->irq_moder) @@ -338,6 +344,8 @@ const struct nla_policy ethnl_coalesce_set_policy[] = { [ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS] = { .type = NLA_U32 }, + [ETHTOOL_A_COALESCE_RX_CQE_FRAMES] = { .type = NLA_U32 }, + [ETHTOOL_A_COALESCE_RX_CQE_NSECS] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_RX_PROFILE] = NLA_POLICY_NESTED(coalesce_profile_policy), [ETHTOOL_A_COALESCE_TX_PROFILE] = @@ -570,6 +578,10 @@ __ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info, tb[ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES], &mod); ethnl_update_u32(&kernel_coalesce.tx_aggr_time_usecs, tb[ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS], &mod); + ethnl_update_u32(&kernel_coalesce.rx_cqe_frames, + tb[ETHTOOL_A_COALESCE_RX_CQE_FRAMES], &mod); + ethnl_update_u32(&kernel_coalesce.rx_cqe_nsecs, + tb[ETHTOOL_A_COALESCE_RX_CQE_NSECS], &mod); if (dev->irq_moder && dev->irq_moder->profile_flags & DIM_PROFILE_RX) { ret = ethnl_update_profile(dev, &dev->irq_moder->rx_profile, From c2fe3ff3d66d6f53ec5857c277fae5b3ff9881c1 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Tue, 17 Mar 2026 12:18:06 -0700 Subject: [PATCH 0640/1561] net: mana: Add support for RX CQE Coalescing Our NIC can have up to 4 RX packets on 1 CQE. To support this feature, check and process the type CQE_RX_COALESCED_4. The default setting is disabled, to avoid possible regression on latency. And, add ethtool handler to switch this feature. To turn it on, run: ethtool -C rx-cqe-frames 4 To turn it off: ethtool -C rx-cqe-frames 1 The rx-cqe-nsec is the time out value in nanoseconds after the first packet arrival in a coalesced CQE to be sent. It's read-only for this NIC. Reviewed-by: Long Li Signed-off-by: Haiyang Zhang Link: https://patch.msgid.link/20260317191826.1346111-3-haiyangz@linux.microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/mana/mana_en.c | 82 ++++++++++++------- .../ethernet/microsoft/mana/mana_ethtool.c | 60 +++++++++++++- include/net/mana/mana.h | 8 +- 3 files changed, 117 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index ea71de39f996..fa30046dcd3d 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1365,6 +1365,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, sizeof(resp)); req->hdr.req.msg_version = GDMA_MESSAGE_V2; + req->hdr.resp.msg_version = GDMA_MESSAGE_V2; req->vport = apc->port_handle; req->num_indir_entries = apc->indir_table_sz; @@ -1376,7 +1377,9 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, req->update_hashkey = update_key; req->update_indir_tab = update_tab; req->default_rxobj = apc->default_rxobj; - req->cqe_coalescing_enable = 0; + + if (rx != TRI_STATE_FALSE) + req->cqe_coalescing_enable = apc->cqe_coalescing_enable; if (update_key) memcpy(&req->hashkey, apc->hashkey, MANA_HASH_KEY_SIZE); @@ -1405,8 +1408,13 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, netdev_err(ndev, "vPort RX configuration failed: 0x%x\n", resp.hdr.status); err = -EPROTO; + goto out; } + if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V2) + apc->cqe_coalescing_timeout_ns = + resp.cqe_coalescing_timeout_ns; + netdev_info(ndev, "Configured steering vPort %llu entries %u\n", apc->port_handle, apc->indir_table_sz); out: @@ -1915,11 +1923,12 @@ static struct sk_buff *mana_build_skb(struct mana_rxq *rxq, void *buf_va, } static void mana_rx_skb(void *buf_va, bool from_pool, - struct mana_rxcomp_oob *cqe, struct mana_rxq *rxq) + struct mana_rxcomp_oob *cqe, struct mana_rxq *rxq, + int i) { struct mana_stats_rx *rx_stats = &rxq->stats; struct net_device *ndev = rxq->ndev; - uint pkt_len = cqe->ppi[0].pkt_len; + uint pkt_len = cqe->ppi[i].pkt_len; u16 rxq_idx = rxq->rxq_idx; struct napi_struct *napi; struct xdp_buff xdp = {}; @@ -1963,7 +1972,7 @@ static void mana_rx_skb(void *buf_va, bool from_pool, } if (cqe->rx_hashtype != 0 && (ndev->features & NETIF_F_RXHASH)) { - hash_value = cqe->ppi[0].pkt_hash; + hash_value = cqe->ppi[i].pkt_hash; if (cqe->rx_hashtype & MANA_HASH_L4) skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L4); @@ -2098,9 +2107,11 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, struct mana_recv_buf_oob *rxbuf_oob; struct mana_port_context *apc; struct device *dev = gc->dev; + bool coalesced = false; void *old_buf = NULL; u32 curr, pktlen; bool old_fp; + int i; apc = netdev_priv(ndev); @@ -2112,12 +2123,15 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, ++ndev->stats.rx_dropped; rxbuf_oob = &rxq->rx_oobs[rxq->buf_index]; netdev_warn_once(ndev, "Dropped a truncated packet\n"); - goto drop; + + mana_move_wq_tail(rxq->gdma_rq, + rxbuf_oob->wqe_inf.wqe_size_in_bu); + mana_post_pkt_rxq(rxq); + return; case CQE_RX_COALESCED_4: - netdev_err(ndev, "RX coalescing is unsupported\n"); - apc->eth_stats.rx_coalesced_err++; - return; + coalesced = true; + break; case CQE_RX_OBJECT_FENCE: complete(&rxq->fence_event); @@ -2130,30 +2144,37 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, return; } - pktlen = oob->ppi[0].pkt_len; + for (i = 0; i < MANA_RXCOMP_OOB_NUM_PPI; i++) { + old_buf = NULL; + pktlen = oob->ppi[i].pkt_len; + if (pktlen == 0) { + if (i == 0) + netdev_err_once( + ndev, + "RX pkt len=0, rq=%u, cq=%u, rxobj=0x%llx\n", + rxq->gdma_id, cq->gdma_id, rxq->rxobj); + break; + } - if (pktlen == 0) { - /* data packets should never have packetlength of zero */ - netdev_err(ndev, "RX pkt len=0, rq=%u, cq=%u, rxobj=0x%llx\n", - rxq->gdma_id, cq->gdma_id, rxq->rxobj); - return; + curr = rxq->buf_index; + rxbuf_oob = &rxq->rx_oobs[curr]; + WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1); + + mana_refill_rx_oob(dev, rxq, rxbuf_oob, &old_buf, &old_fp); + + /* Unsuccessful refill will have old_buf == NULL. + * In this case, mana_rx_skb() will drop the packet. + */ + mana_rx_skb(old_buf, old_fp, oob, rxq, i); + + mana_move_wq_tail(rxq->gdma_rq, + rxbuf_oob->wqe_inf.wqe_size_in_bu); + + mana_post_pkt_rxq(rxq); + + if (!coalesced) + break; } - - curr = rxq->buf_index; - rxbuf_oob = &rxq->rx_oobs[curr]; - WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1); - - mana_refill_rx_oob(dev, rxq, rxbuf_oob, &old_buf, &old_fp); - - /* Unsuccessful refill will have old_buf == NULL. - * In this case, mana_rx_skb() will drop the packet. - */ - mana_rx_skb(old_buf, old_fp, oob, rxq); - -drop: - mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu); - - mana_post_pkt_rxq(rxq); } static void mana_poll_rx_cq(struct mana_cq *cq) @@ -3332,6 +3353,7 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, apc->port_handle = INVALID_MANA_HANDLE; apc->pf_filter_handle = INVALID_MANA_HANDLE; apc->port_idx = port_idx; + apc->cqe_coalescing_enable = 0; mutex_init(&apc->vport_mutex); apc->vport_use_count = 0; diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c index f2d220b371b5..4b234b16e57a 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c @@ -20,8 +20,6 @@ static const struct mana_stats_desc mana_eth_stats[] = { tx_cqe_unknown_type)}, {"tx_linear_pkt_cnt", offsetof(struct mana_ethtool_stats, tx_linear_pkt_cnt)}, - {"rx_coalesced_err", offsetof(struct mana_ethtool_stats, - rx_coalesced_err)}, {"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats, rx_cqe_unknown_type)}, }; @@ -390,6 +388,61 @@ static void mana_get_channels(struct net_device *ndev, channel->combined_count = apc->num_queues; } +#define MANA_RX_CQE_NSEC_DEF 2048 +static int mana_get_coalesce(struct net_device *ndev, + struct ethtool_coalesce *ec, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct mana_port_context *apc = netdev_priv(ndev); + + kernel_coal->rx_cqe_frames = + apc->cqe_coalescing_enable ? MANA_RXCOMP_OOB_NUM_PPI : 1; + + kernel_coal->rx_cqe_nsecs = apc->cqe_coalescing_timeout_ns; + + /* Return the default timeout value for old FW not providing + * this value. + */ + if (apc->port_is_up && apc->cqe_coalescing_enable && + !kernel_coal->rx_cqe_nsecs) + kernel_coal->rx_cqe_nsecs = MANA_RX_CQE_NSEC_DEF; + + return 0; +} + +static int mana_set_coalesce(struct net_device *ndev, + struct ethtool_coalesce *ec, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct mana_port_context *apc = netdev_priv(ndev); + u8 saved_cqe_coalescing_enable; + int err; + + if (kernel_coal->rx_cqe_frames != 1 && + kernel_coal->rx_cqe_frames != MANA_RXCOMP_OOB_NUM_PPI) { + NL_SET_ERR_MSG_FMT(extack, + "rx-frames must be 1 or %u, got %u", + MANA_RXCOMP_OOB_NUM_PPI, + kernel_coal->rx_cqe_frames); + return -EINVAL; + } + + saved_cqe_coalescing_enable = apc->cqe_coalescing_enable; + apc->cqe_coalescing_enable = + kernel_coal->rx_cqe_frames == MANA_RXCOMP_OOB_NUM_PPI; + + if (!apc->port_is_up) + return 0; + + err = mana_config_rss(apc, TRI_STATE_TRUE, false, false); + if (err) + apc->cqe_coalescing_enable = saved_cqe_coalescing_enable; + + return err; +} + static int mana_set_channels(struct net_device *ndev, struct ethtool_channels *channels) { @@ -510,6 +563,7 @@ static int mana_get_link_ksettings(struct net_device *ndev, } const struct ethtool_ops mana_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_RX_CQE_FRAMES, .get_ethtool_stats = mana_get_ethtool_stats, .get_sset_count = mana_get_sset_count, .get_strings = mana_get_strings, @@ -520,6 +574,8 @@ const struct ethtool_ops mana_ethtool_ops = { .set_rxfh = mana_set_rxfh, .get_channels = mana_get_channels, .set_channels = mana_set_channels, + .get_coalesce = mana_get_coalesce, + .set_coalesce = mana_set_coalesce, .get_ringparam = mana_get_ringparam, .set_ringparam = mana_set_ringparam, .get_link_ksettings = mana_get_link_ksettings, diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index a078af283bdd..a7f89e7ddc56 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -378,7 +378,6 @@ struct mana_ethtool_stats { u64 tx_cqe_err; u64 tx_cqe_unknown_type; u64 tx_linear_pkt_cnt; - u64 rx_coalesced_err; u64 rx_cqe_unknown_type; }; @@ -557,6 +556,9 @@ struct mana_port_context { bool port_is_up; bool port_st_save; /* Saved port state */ + u8 cqe_coalescing_enable; + u32 cqe_coalescing_timeout_ns; + struct mana_ethtool_stats eth_stats; struct mana_ethtool_phy_stats phy_stats; @@ -902,6 +904,10 @@ struct mana_cfg_rx_steer_req_v2 { struct mana_cfg_rx_steer_resp { struct gdma_resp_hdr hdr; + + /* V2 */ + u32 cqe_coalescing_timeout_ns; + u32 reserved1; }; /* HW DATA */ /* Register HW vPort */ From d01440e10a82cae2c4a28c76e46e6a8b94b27a84 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Tue, 17 Mar 2026 12:18:07 -0700 Subject: [PATCH 0641/1561] net: mana: Add ethtool counters for RX CQEs in coalesced type For RX CQEs with type CQE_RX_COALESCED_4, to measure the coalescing efficiency, add counters to count how many contains 2, 3, 4 packets respectively. Also, add a counter for the error case of first packet with length == 0. Reviewed-by: Long Li Signed-off-by: Haiyang Zhang Link: https://patch.msgid.link/20260317191826.1346111-4-haiyangz@linux.microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/mana/mana_en.c | 24 +++++++++++++------ .../ethernet/microsoft/mana/mana_ethtool.c | 15 ++++++++++-- include/net/mana/mana.h | 9 ++++--- 3 files changed, 36 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index fa30046dcd3d..49c65cc1697c 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -2147,14 +2147,8 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, for (i = 0; i < MANA_RXCOMP_OOB_NUM_PPI; i++) { old_buf = NULL; pktlen = oob->ppi[i].pkt_len; - if (pktlen == 0) { - if (i == 0) - netdev_err_once( - ndev, - "RX pkt len=0, rq=%u, cq=%u, rxobj=0x%llx\n", - rxq->gdma_id, cq->gdma_id, rxq->rxobj); + if (pktlen == 0) break; - } curr = rxq->buf_index; rxbuf_oob = &rxq->rx_oobs[curr]; @@ -2175,6 +2169,22 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, if (!coalesced) break; } + + /* Collect coalesced CQE count based on packets processed. + * Coalesced CQEs have at least 2 packets, so index is i - 2. + */ + if (i > 1) { + u64_stats_update_begin(&rxq->stats.syncp); + rxq->stats.coalesced_cqe[i - 2]++; + u64_stats_update_end(&rxq->stats.syncp); + } else if (!i && !pktlen) { + u64_stats_update_begin(&rxq->stats.syncp); + rxq->stats.pkt_len0_err++; + u64_stats_update_end(&rxq->stats.syncp); + netdev_err_once(ndev, + "RX pkt len=0, rq=%u, cq=%u, rxobj=0x%llx\n", + rxq->gdma_id, cq->gdma_id, rxq->rxobj); + } } static void mana_poll_rx_cq(struct mana_cq *cq) diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c index 4b234b16e57a..6a4b42fe0944 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c @@ -149,7 +149,7 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) { struct mana_port_context *apc = netdev_priv(ndev); unsigned int num_queues = apc->num_queues; - int i; + int i, j; if (stringset != ETH_SS_STATS) return; @@ -168,6 +168,9 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) ethtool_sprintf(&data, "rx_%d_xdp_drop", i); ethtool_sprintf(&data, "rx_%d_xdp_tx", i); ethtool_sprintf(&data, "rx_%d_xdp_redirect", i); + ethtool_sprintf(&data, "rx_%d_pkt_len0_err", i); + for (j = 0; j < MANA_RXCOMP_OOB_NUM_PPI - 1; j++) + ethtool_sprintf(&data, "rx_%d_coalesced_cqe_%d", i, j + 2); } for (i = 0; i < num_queues; i++) { @@ -201,6 +204,8 @@ static void mana_get_ethtool_stats(struct net_device *ndev, u64 xdp_xmit; u64 xdp_drop; u64 xdp_tx; + u64 pkt_len0_err; + u64 coalesced_cqe[MANA_RXCOMP_OOB_NUM_PPI - 1]; u64 tso_packets; u64 tso_bytes; u64 tso_inner_packets; @@ -209,7 +214,7 @@ static void mana_get_ethtool_stats(struct net_device *ndev, u64 short_pkt_fmt; u64 csum_partial; u64 mana_map_err; - int q, i = 0; + int q, i = 0, j; if (!apc->port_is_up) return; @@ -239,6 +244,9 @@ static void mana_get_ethtool_stats(struct net_device *ndev, xdp_drop = rx_stats->xdp_drop; xdp_tx = rx_stats->xdp_tx; xdp_redirect = rx_stats->xdp_redirect; + pkt_len0_err = rx_stats->pkt_len0_err; + for (j = 0; j < MANA_RXCOMP_OOB_NUM_PPI - 1; j++) + coalesced_cqe[j] = rx_stats->coalesced_cqe[j]; } while (u64_stats_fetch_retry(&rx_stats->syncp, start)); data[i++] = packets; @@ -246,6 +254,9 @@ static void mana_get_ethtool_stats(struct net_device *ndev, data[i++] = xdp_drop; data[i++] = xdp_tx; data[i++] = xdp_redirect; + data[i++] = pkt_len0_err; + for (j = 0; j < MANA_RXCOMP_OOB_NUM_PPI - 1; j++) + data[i++] = coalesced_cqe[j]; } for (q = 0; q < num_queues; q++) { diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index a7f89e7ddc56..3336688fed5e 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -61,8 +61,11 @@ enum TRI_STATE { #define MAX_PORTS_IN_MANA_DEV 256 +/* Maximum number of packets per coalesced CQE */ +#define MANA_RXCOMP_OOB_NUM_PPI 4 + /* Update this count whenever the respective structures are changed */ -#define MANA_STATS_RX_COUNT 5 +#define MANA_STATS_RX_COUNT (6 + MANA_RXCOMP_OOB_NUM_PPI - 1) #define MANA_STATS_TX_COUNT 11 #define MANA_RX_FRAG_ALIGNMENT 64 @@ -73,6 +76,8 @@ struct mana_stats_rx { u64 xdp_drop; u64 xdp_tx; u64 xdp_redirect; + u64 pkt_len0_err; + u64 coalesced_cqe[MANA_RXCOMP_OOB_NUM_PPI - 1]; struct u64_stats_sync syncp; }; @@ -227,8 +232,6 @@ struct mana_rxcomp_perpkt_info { u32 pkt_hash; }; /* HW DATA */ -#define MANA_RXCOMP_OOB_NUM_PPI 4 - /* Receive completion OOB */ struct mana_rxcomp_oob { struct mana_cqe_header cqe_hdr; From eef6d4449e8a540fde792968a26d8aa514af8089 Mon Sep 17 00:00:00 2001 From: Shin-Yi Lin Date: Thu, 12 Mar 2026 13:57:24 +0800 Subject: [PATCH 0642/1561] wifi: rtw89: usb: Rx aggregation for RTL8832CU/RTL8851BU USB RX Aggregation is a performance optimization technique used in USB network devices to increase throughput. Instead of sending every received network packet to the host computer individually, the device hardware groups multiple smaller packets into a single, large USB Bulk Transfer. * toAP/toNB use iperf3 respectively. With Cisco BE6000 - iperf3 tcp 10 pair (to another NB) [6G 160Mhz]: RTL8832CU-USB3.0 before after TX 941 941 RX 847 919 RTL8832CU-USB2.0 before after TX 293 286 RX 342 356 [5G 80Mhz]: RTL8832CU-USB3.0 before after TX 864 877 RX 864 902 RTL8832CU-USB2.0 before after TX 279 271 RX 327 349 RTL8851BU before after TX 115 114 RX 295 306 Signed-off-by: Shin-Yi Lin Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260312055724.12177-1-pkshih@realtek.com --- .../net/wireless/realtek/rtw89/rtw8851bu.c | 1 + .../net/wireless/realtek/rtw89/rtw8852au.c | 1 + .../net/wireless/realtek/rtw89/rtw8852bu.c | 1 + .../net/wireless/realtek/rtw89/rtw8852cu.c | 1 + drivers/net/wireless/realtek/rtw89/usb.c | 84 ++++++++++++++++--- drivers/net/wireless/realtek/rtw89/usb.h | 12 +++ 6 files changed, 87 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/rtw8851bu.c b/drivers/net/wireless/realtek/rtw89/rtw8851bu.c index 959d62aefdd8..6a8d31544314 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8851bu.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8851bu.c @@ -15,6 +15,7 @@ static const struct rtw89_usb_info rtw8851b_usb_info = { .usb3_mac_npi_config_intf_0 = R_AX_USB3_MAC_NPI_CONFIG_INTF_0, .usb_endpoint_0 = R_AX_USB_ENDPOINT_0, .usb_endpoint_2 = R_AX_USB_ENDPOINT_2, + .rx_agg_alignment = 8, .bulkout_id = { [RTW89_DMA_ACH0] = 3, [RTW89_DMA_ACH1] = 4, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852au.c b/drivers/net/wireless/realtek/rtw89/rtw8852au.c index ccdbcc178c2a..4cced4619b7d 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852au.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852au.c @@ -15,6 +15,7 @@ static const struct rtw89_usb_info rtw8852a_usb_info = { .usb3_mac_npi_config_intf_0 = R_AX_USB3_MAC_NPI_CONFIG_INTF_0, .usb_endpoint_0 = R_AX_USB_ENDPOINT_0, .usb_endpoint_2 = R_AX_USB_ENDPOINT_2, + .rx_agg_alignment = 8, .bulkout_id = { [RTW89_DMA_ACH0] = 3, [RTW89_DMA_ACH2] = 5, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852bu.c b/drivers/net/wireless/realtek/rtw89/rtw8852bu.c index 84cd3ec971f9..37111fed276f 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852bu.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852bu.c @@ -15,6 +15,7 @@ static const struct rtw89_usb_info rtw8852b_usb_info = { .usb3_mac_npi_config_intf_0 = R_AX_USB3_MAC_NPI_CONFIG_INTF_0, .usb_endpoint_0 = R_AX_USB_ENDPOINT_0, .usb_endpoint_2 = R_AX_USB_ENDPOINT_2, + .rx_agg_alignment = 8, .bulkout_id = { [RTW89_DMA_ACH0] = 3, [RTW89_DMA_ACH1] = 4, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852cu.c b/drivers/net/wireless/realtek/rtw89/rtw8852cu.c index 3b9825c92a0d..0c5aebaed873 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852cu.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852cu.c @@ -15,6 +15,7 @@ static const struct rtw89_usb_info rtw8852c_usb_info = { .usb3_mac_npi_config_intf_0 = R_AX_USB3_MAC_NPI_CONFIG_INTF_0_V1, .usb_endpoint_0 = R_AX_USB_ENDPOINT_0_V1, .usb_endpoint_2 = R_AX_USB_ENDPOINT_2_V1, + .rx_agg_alignment = 8, .bulkout_id = { [RTW89_DMA_ACH0] = 3, [RTW89_DMA_ACH2] = 5, diff --git a/drivers/net/wireless/realtek/rtw89/usb.c b/drivers/net/wireless/realtek/rtw89/usb.c index 64f0c0ec5ed4..581b8c05f930 100644 --- a/drivers/net/wireless/realtek/rtw89/usb.c +++ b/drivers/net/wireless/realtek/rtw89/usb.c @@ -408,11 +408,14 @@ static int rtw89_usb_ops_tx_write(struct rtw89_dev *rtwdev, static void rtw89_usb_rx_handler(struct work_struct *work) { struct rtw89_usb *rtwusb = container_of(work, struct rtw89_usb, rx_work); + const struct rtw89_usb_info *info = rtwusb->info; struct rtw89_dev *rtwdev = rtwusb->rtwdev; struct rtw89_rx_desc_info desc_info; + s32 aligned_offset, remaining; struct sk_buff *rx_skb; struct sk_buff *skb; u32 pkt_offset; + u8 *pkt_ptr; int limit; for (limit = 0; limit < 200; limit++) { @@ -425,23 +428,38 @@ static void rtw89_usb_rx_handler(struct work_struct *work) goto free_or_reuse; } - memset(&desc_info, 0, sizeof(desc_info)); - rtw89_chip_query_rxdesc(rtwdev, &desc_info, rx_skb->data, 0); + pkt_ptr = rx_skb->data; + remaining = rx_skb->len; - skb = rtw89_alloc_skb_for_rx(rtwdev, desc_info.pkt_size); - if (!skb) { - rtw89_debug(rtwdev, RTW89_DBG_HCI, - "failed to allocate RX skb of size %u\n", - desc_info.pkt_size); - goto free_or_reuse; - } + do { + memset(&desc_info, 0, sizeof(desc_info)); + rtw89_chip_query_rxdesc(rtwdev, &desc_info, pkt_ptr, 0); - pkt_offset = desc_info.offset + desc_info.rxd_len; + pkt_offset = desc_info.offset + desc_info.rxd_len; + if (remaining < (pkt_offset + desc_info.pkt_size)) { + rtw89_debug(rtwdev, RTW89_DBG_HCI, + "Failed to get remaining RX pkt %u > %u\n", + pkt_offset + desc_info.pkt_size, remaining); + goto free_or_reuse; + } - skb_put_data(skb, rx_skb->data + pkt_offset, - desc_info.pkt_size); + skb = rtw89_alloc_skb_for_rx(rtwdev, desc_info.pkt_size); + if (!skb) { + rtw89_debug(rtwdev, RTW89_DBG_HCI, + "failed to allocate RX skb of size %u\n", + desc_info.pkt_size); + goto free_or_reuse; + } - rtw89_core_rx(rtwdev, &desc_info, skb); + skb_put_data(skb, pkt_ptr + pkt_offset, desc_info.pkt_size); + rtw89_core_rx(rtwdev, &desc_info, skb); + + /* next frame */ + pkt_offset += desc_info.pkt_size; + aligned_offset = ALIGN(pkt_offset, info->rx_agg_alignment); + pkt_ptr += aligned_offset; + remaining -= aligned_offset; + } while (remaining > 0); free_or_reuse: if (skb_queue_len(&rtwusb->rx_free_queue) >= RTW89_USB_RX_SKB_NUM) @@ -745,6 +763,44 @@ static int rtw89_usb_ops_mac_pre_deinit(struct rtw89_dev *rtwdev) return 0; /* Nothing to do. */ } +static void rtw89_usb_rx_agg_cfg_v1(struct rtw89_dev *rtwdev) +{ + const u32 rxagg_0 = FIELD_PREP_CONST(B_AX_RXAGG_0_EN, 1) | + FIELD_PREP_CONST(B_AX_RXAGG_0_NUM_TH, 0) | + FIELD_PREP_CONST(B_AX_RXAGG_0_TIME_32US_TH, 32) | + FIELD_PREP_CONST(B_AX_RXAGG_0_BUF_SZ_4K, 5); + + rtw89_write32(rtwdev, R_AX_RXAGG_0, rxagg_0); +} + +static void rtw89_usb_rx_agg_cfg_v2(struct rtw89_dev *rtwdev) +{ + const u32 rxagg_0 = FIELD_PREP_CONST(B_AX_RXAGG_0_EN, 1) | + FIELD_PREP_CONST(B_AX_RXAGG_0_NUM_TH, 255) | + FIELD_PREP_CONST(B_AX_RXAGG_0_TIME_32US_TH, 32) | + FIELD_PREP_CONST(B_AX_RXAGG_0_BUF_SZ_1K, 20); + + rtw89_write32(rtwdev, R_AX_RXAGG_0_V1, rxagg_0); + rtw89_write32(rtwdev, R_AX_RXAGG_1_V1, 0x1F); +} + +static void rtw89_usb_rx_agg_cfg(struct rtw89_dev *rtwdev) +{ + switch (rtwdev->chip->chip_id) { + case RTL8851B: + case RTL8852A: + case RTL8852B: + rtw89_usb_rx_agg_cfg_v1(rtwdev); + break; + case RTL8852C: + rtw89_usb_rx_agg_cfg_v2(rtwdev); + break; + default: + rtw89_warn(rtwdev, "%s: USB RX agg not support\n", __func__); + return; + } +} + static int rtw89_usb_ops_mac_post_init(struct rtw89_dev *rtwdev) { struct rtw89_usb *rtwusb = rtw89_usb_priv(rtwdev); @@ -773,6 +829,8 @@ static int rtw89_usb_ops_mac_post_init(struct rtw89_dev *rtwdev) rtw89_write8(rtwdev, info->usb_endpoint_2 + 1, NUMP); } + rtw89_usb_rx_agg_cfg(rtwdev); + return 0; } diff --git a/drivers/net/wireless/realtek/rtw89/usb.h b/drivers/net/wireless/realtek/rtw89/usb.h index 203ec8e993e9..3d17e514e346 100644 --- a/drivers/net/wireless/realtek/rtw89/usb.h +++ b/drivers/net/wireless/realtek/rtw89/usb.h @@ -20,6 +20,17 @@ #define RTW89_MAX_ENDPOINT_NUM 9 #define RTW89_MAX_BULKOUT_NUM 7 +#define R_AX_RXAGG_0_V1 0x6000 +#define B_AX_RXAGG_0_EN BIT(31) +#define B_AX_RXAGG_0_NUM_TH GENMASK(23, 16) +#define B_AX_RXAGG_0_TIME_32US_TH GENMASK(15, 8) +#define B_AX_RXAGG_0_BUF_SZ_1K GENMASK(7, 0) + +#define R_AX_RXAGG_1_V1 0x6004 + +#define R_AX_RXAGG_0 0x8900 +#define B_AX_RXAGG_0_BUF_SZ_4K GENMASK(7, 0) + struct rtw89_usb_info { u32 usb_host_request_2; u32 usb_wlan0_1; @@ -27,6 +38,7 @@ struct rtw89_usb_info { u32 usb3_mac_npi_config_intf_0; u32 usb_endpoint_0; u32 usb_endpoint_2; + u8 rx_agg_alignment; u8 bulkout_id[RTW89_DMA_CH_NUM]; }; From eb092b188fcf96ef2c770ff086ebfc2a15b061d3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Mar 2026 18:06:22 +0100 Subject: [PATCH 0643/1561] wifi: mac80211: fix STA link removal during link removal ieee80211_sta_free_link() only frees the link and doesn't unhash it, so it can't be used here. Instead this needs to use ieee80211_sta_remove_link(), which unhashes it. An argument against it was that it also calls the driver and that already happened, but calls to the driver removing a link that's already removed are suppressed, so that's not actually an issue. Use it to fix the hashtable. Reported-and-tested-by: Jouni Malinen Fixes: 84674b03d8bf ("wifi: mac80211: Remove deleted sta links in ieee80211_ml_reconf_work()") Acked-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260318180622.9240067117e9.I45fb2b7f04d75e48d2f3e9c6650ef9f54a314f5b@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f279bdb03aca..0cd8d07bf668 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7119,7 +7119,7 @@ static void ieee80211_ml_reconf_work(struct wiphy *wiphy, for_each_set_bit(link_id, &removed_links, IEEE80211_MLD_MAX_NUM_LINKS) - ieee80211_sta_free_link(sta, link_id); + ieee80211_sta_remove_link(sta, link_id); } new_dormant_links = sdata->vif.dormant_links & ~sdata->u.mgd.removed_links; From 777d8ba5aada960c666f810d5d820ab55ebb64c3 Mon Sep 17 00:00:00 2001 From: Ville Nummela Date: Wed, 18 Mar 2026 10:19:12 +0200 Subject: [PATCH 0644/1561] wifi: rsi_91x_usb: do not pause rfkill polling when stopping mac80211 Removing rsi_91x USB adapter could cause rtnetlink to lock up. When rsi_mac80211_stop is called, wiphy_lock is locked. Call to wiphy_rfkill_stop_polling would wait until the work queue has finished, but because the work queue waits for wiphy_lock, that would never happen. Moving the call to rsi_disconnect avoids the lock up. Signed-off-by: Ville Nummela Link: https://patch.msgid.link/20260318081912.87744-1-ville.nummela@kempower.com Signed-off-by: Johannes Berg --- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 17 ++++++++++++++++- drivers/net/wireless/rsi/rsi_91x_usb.c | 2 ++ drivers/net/wireless/rsi/rsi_common.h | 1 + 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index c7ae8031436a..3faf2235728b 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -325,6 +325,22 @@ void rsi_mac80211_detach(struct rsi_hw *adapter) } EXPORT_SYMBOL_GPL(rsi_mac80211_detach); +/** + * rsi_mac80211_rfkill_exit() - This function is used to stop rfkill polling + * when the device is removed. + * @adapter: Pointer to the adapter structure. + * + * Return: None. + */ +void rsi_mac80211_rfkill_exit(struct rsi_hw *adapter) +{ + struct ieee80211_hw *hw = adapter->hw; + + if (hw) + wiphy_rfkill_stop_polling(hw->wiphy); +} +EXPORT_SYMBOL_GPL(rsi_mac80211_rfkill_exit); + /** * rsi_indicate_tx_status() - This function indicates the transmit status. * @adapter: Pointer to the adapter structure. @@ -422,7 +438,6 @@ static void rsi_mac80211_stop(struct ieee80211_hw *hw, bool suspend) rsi_dbg(ERR_ZONE, "===> Interface DOWN <===\n"); mutex_lock(&common->mutex); common->iface_down = true; - wiphy_rfkill_stop_polling(hw->wiphy); /* Block all rx frames */ rsi_send_rx_filter_frame(common, 0xffff); diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c index d83204701e27..8765cac6f875 100644 --- a/drivers/net/wireless/rsi/rsi_91x_usb.c +++ b/drivers/net/wireless/rsi/rsi_91x_usb.c @@ -877,6 +877,8 @@ static void rsi_disconnect(struct usb_interface *pfunction) if (!adapter) return; + rsi_mac80211_rfkill_exit(adapter); + rsi_mac80211_detach(adapter); if (IS_ENABLED(CONFIG_RSI_COEX) && adapter->priv->coex_mode > 1 && diff --git a/drivers/net/wireless/rsi/rsi_common.h b/drivers/net/wireless/rsi/rsi_common.h index 7aa5124575cf..591602beeec6 100644 --- a/drivers/net/wireless/rsi/rsi_common.h +++ b/drivers/net/wireless/rsi/rsi_common.h @@ -79,6 +79,7 @@ static inline int rsi_kill_thread(struct rsi_thread *handle) } void rsi_mac80211_detach(struct rsi_hw *hw); +void rsi_mac80211_rfkill_exit(struct rsi_hw *hw); u16 rsi_get_connected_channel(struct ieee80211_vif *vif); struct rsi_hw *rsi_91x_init(u16 oper_mode); void rsi_91x_deinit(struct rsi_hw *adapter); From f10ebd136dfef1d8fac0ac29587e7e898c980e3e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 16 Mar 2026 12:30:50 +0100 Subject: [PATCH 0645/1561] wifi: nl80211: use int for band coming from netlink This was pointed out before, but there are issues with just removing the <0 check since enum representation isn't fixed, nla_type() returns int but really can only return small non-negative values, etc. Now newer versions of sparse are also starting to warn on it. Just use int for the band var. Link: https://patch.msgid.link/20260316123050.8c2d9f3426a0.I86acfa785982993fbffd148cc59049991bd6158f@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d2ef13ab1a20..e15cd26f3a79 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5843,7 +5843,7 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, */ BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8); nla_for_each_nested(tx_rates, attrs[attr], rem) { - enum nl80211_band band = nla_type(tx_rates); + int band = nla_type(tx_rates); int err; if (band < 0 || band >= NUM_NL80211_BANDS) @@ -10705,7 +10705,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SUPP_RATES], tmp) { - enum nl80211_band band = nla_type(attr); + int band = nla_type(attr); if (band < 0 || band >= NUM_NL80211_BANDS) { err = -EINVAL; From a57f35fc19add4dfe33703af575a2c19c2cef9c7 Mon Sep 17 00:00:00 2001 From: Heitor Alves de Siqueira Date: Fri, 13 Mar 2026 18:27:57 -0300 Subject: [PATCH 0646/1561] wifi: libertas: use USB anchors for tracking in-flight URBs The libertas driver currently handles URB lifecycles manually, which makes it non-trivial to check if specific URBs are pending or not. Add anchors for TX/RX URBs, and use those to track in-flight requests. Signed-off-by: Heitor Alves de Siqueira Link: https://patch.msgid.link/20260313-libertas-usb-anchors-v1-1-915afbe988d7@igalia.com Signed-off-by: Johannes Berg --- .../net/wireless/marvell/libertas/if_usb.c | 27 ++++++++++++------- .../net/wireless/marvell/libertas/if_usb.h | 3 +++ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas/if_usb.c b/drivers/net/wireless/marvell/libertas/if_usb.c index 05fcf9cc28fa..71fa38ef2c96 100644 --- a/drivers/net/wireless/marvell/libertas/if_usb.c +++ b/drivers/net/wireless/marvell/libertas/if_usb.c @@ -114,8 +114,8 @@ static void if_usb_write_bulk_callback(struct urb *urb) static void if_usb_free(struct if_usb_card *cardp) { /* Unlink tx & rx urb */ - usb_kill_urb(cardp->tx_urb); - usb_kill_urb(cardp->rx_urb); + usb_kill_anchored_urbs(&cardp->tx_submitted); + usb_kill_anchored_urbs(&cardp->rx_submitted); usb_free_urb(cardp->tx_urb); cardp->tx_urb = NULL; @@ -221,6 +221,9 @@ static int if_usb_probe(struct usb_interface *intf, udev->descriptor.bDeviceSubClass, udev->descriptor.bDeviceProtocol); + init_usb_anchor(&cardp->rx_submitted); + init_usb_anchor(&cardp->tx_submitted); + for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { endpoint = &iface_desc->endpoint[i].desc; if (usb_endpoint_is_bulk_in(endpoint)) { @@ -423,7 +426,7 @@ static int usb_tx_block(struct if_usb_card *cardp, uint8_t *payload, uint16_t nb goto tx_ret; } - usb_kill_urb(cardp->tx_urb); + usb_kill_anchored_urbs(&cardp->tx_submitted); usb_fill_bulk_urb(cardp->tx_urb, cardp->udev, usb_sndbulkpipe(cardp->udev, @@ -432,8 +435,10 @@ static int usb_tx_block(struct if_usb_card *cardp, uint8_t *payload, uint16_t nb cardp->tx_urb->transfer_flags |= URB_ZERO_PACKET; + usb_anchor_urb(cardp->tx_urb, &cardp->tx_submitted); if ((ret = usb_submit_urb(cardp->tx_urb, GFP_ATOMIC))) { lbs_deb_usbd(&cardp->udev->dev, "usb_submit_urb failed: %d\n", ret); + usb_unanchor_urb(cardp->tx_urb); } else { lbs_deb_usb2(&cardp->udev->dev, "usb_submit_urb success\n"); ret = 0; @@ -464,8 +469,10 @@ static int __if_usb_submit_rx_urb(struct if_usb_card *cardp, cardp); lbs_deb_usb2(&cardp->udev->dev, "Pointer for rx_urb %p\n", cardp->rx_urb); + usb_anchor_urb(cardp->rx_urb, &cardp->rx_submitted); if ((ret = usb_submit_urb(cardp->rx_urb, GFP_ATOMIC))) { lbs_deb_usbd(&cardp->udev->dev, "Submit Rx URB failed: %d\n", ret); + usb_unanchor_urb(cardp->rx_urb); kfree_skb(skb); cardp->rx_skb = NULL; ret = -1; @@ -835,8 +842,8 @@ static void if_usb_prog_firmware(struct lbs_private *priv, int ret, } /* Cancel any pending usb business */ - usb_kill_urb(cardp->rx_urb); - usb_kill_urb(cardp->tx_urb); + usb_kill_anchored_urbs(&cardp->rx_submitted); + usb_kill_anchored_urbs(&cardp->tx_submitted); cardp->fwlastblksent = 0; cardp->fwdnldover = 0; @@ -866,8 +873,8 @@ restart: if (cardp->bootcmdresp == BOOT_CMD_RESP_NOT_SUPPORTED) { /* Return to normal operation */ ret = -EOPNOTSUPP; - usb_kill_urb(cardp->rx_urb); - usb_kill_urb(cardp->tx_urb); + usb_kill_anchored_urbs(&cardp->rx_submitted); + usb_kill_anchored_urbs(&cardp->tx_submitted); if (if_usb_submit_rx_urb(cardp) < 0) ret = -EIO; goto done; @@ -897,7 +904,7 @@ restart: wait_event_interruptible(cardp->fw_wq, cardp->surprise_removed || cardp->fwdnldover); timer_delete_sync(&cardp->fw_timeout); - usb_kill_urb(cardp->rx_urb); + usb_kill_anchored_urbs(&cardp->rx_submitted); if (!cardp->fwdnldover) { pr_info("failed to load fw, resetting device!\n"); @@ -957,8 +964,8 @@ static int if_usb_suspend(struct usb_interface *intf, pm_message_t message) goto out; /* Unlink tx & rx urb */ - usb_kill_urb(cardp->tx_urb); - usb_kill_urb(cardp->rx_urb); + usb_kill_anchored_urbs(&cardp->tx_submitted); + usb_kill_anchored_urbs(&cardp->rx_submitted); out: return ret; diff --git a/drivers/net/wireless/marvell/libertas/if_usb.h b/drivers/net/wireless/marvell/libertas/if_usb.h index 7d0daeb33c3f..a0cd36197c2b 100644 --- a/drivers/net/wireless/marvell/libertas/if_usb.h +++ b/drivers/net/wireless/marvell/libertas/if_usb.h @@ -48,6 +48,9 @@ struct if_usb_card { struct urb *rx_urb, *tx_urb; struct lbs_private *priv; + struct usb_anchor rx_submitted; + struct usb_anchor tx_submitted; + struct sk_buff *rx_skb; uint8_t ep_in; From 7c5c2b661bdb78c1472b8833265c9ed1ee880039 Mon Sep 17 00:00:00 2001 From: Heitor Alves de Siqueira Date: Fri, 13 Mar 2026 18:27:58 -0300 Subject: [PATCH 0647/1561] wifi: libertas: don't kill URBs in interrupt context Serialization for the TX path was enforced by calling usb_kill_urb()/usb_kill_anchored_urbs(), to prevent transmission before a previous URB was completed. usb_tx_block() can be called from interrupt context (e.g. in the HCD giveback path), so we can't always use it to kill in-flight URBs. Prevent sleeping during interrupt context by checking the tx_submitted anchor for existing URBs. We now return -EBUSY, to indicate there's a pending request. Reported-by: syzbot+74afbb6355826ffc2239@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=74afbb6355826ffc2239 Fixes: d66676e6ca96 ("wifi: libertas: fix WARNING in usb_tx_block") Signed-off-by: Heitor Alves de Siqueira Link: https://patch.msgid.link/20260313-libertas-usb-anchors-v1-2-915afbe988d7@igalia.com Signed-off-by: Johannes Berg --- drivers/net/wireless/marvell/libertas/if_usb.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/libertas/if_usb.c b/drivers/net/wireless/marvell/libertas/if_usb.c index 71fa38ef2c96..176f2106bab6 100644 --- a/drivers/net/wireless/marvell/libertas/if_usb.c +++ b/drivers/net/wireless/marvell/libertas/if_usb.c @@ -426,7 +426,12 @@ static int usb_tx_block(struct if_usb_card *cardp, uint8_t *payload, uint16_t nb goto tx_ret; } - usb_kill_anchored_urbs(&cardp->tx_submitted); + /* check if there are pending URBs */ + if (!usb_anchor_empty(&cardp->tx_submitted)) { + lbs_deb_usbd(&cardp->udev->dev, "%s failed: pending URB\n", __func__); + ret = -EBUSY; + goto tx_ret; + } usb_fill_bulk_urb(cardp->tx_urb, cardp->udev, usb_sndbulkpipe(cardp->udev, From dee55bc7cb8ad70b8c8598df60f378b7aed2e41b Mon Sep 17 00:00:00 2001 From: Roi L Date: Sat, 14 Mar 2026 18:08:49 +0200 Subject: [PATCH 0648/1561] qtnfmac: use alloc_netdev macro for single queue devices alloc_netdev is a macro for single queue devices, so there's no need to call alloc_netdev_mqs with a single tx/rx queue. Signed-off-by: Roi L Link: https://patch.msgid.link/SN6PR05MB58064E57FE979CE7B2BF7EF3DD42A@SN6PR05MB5806.namprd05.prod.outlook.com Signed-off-by: Johannes Berg --- drivers/net/wireless/quantenna/qtnfmac/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.c b/drivers/net/wireless/quantenna/qtnfmac/core.c index 0c106709ae29..0b5b60815c7f 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.c +++ b/drivers/net/wireless/quantenna/qtnfmac/core.c @@ -452,8 +452,8 @@ int qtnf_core_net_attach(struct qtnf_wmac *mac, struct qtnf_vif *vif, void *qdev_vif; int ret; - dev = alloc_netdev_mqs(sizeof(struct qtnf_vif *), name, - name_assign_type, ether_setup, 1, 1); + dev = alloc_netdev(sizeof(struct qtnf_vif *), name, + name_assign_type, ether_setup); if (!dev) return -ENOMEM; From 96a584db75bb21781562dc79f11932ce38a1205f Mon Sep 17 00:00:00 2001 From: Simon Baatz Date: Mon, 16 Mar 2026 19:51:10 +0100 Subject: [PATCH 0649/1561] selftests/net: packetdrill: improve tcp_rcv_neg_window.pkt The test depends on accepting a packet that is larger than the advertised window and that does not trigger an immediate ACK. Previously, the test might still pass even if kernel behavior changed unexpectedly. Add assertions verifying that the large packet was accepted and no ACK was sent. Suggested-by: Eric Dumazet Signed-off-by: Simon Baatz Link: https://patch.msgid.link/20260316-improve_tcp_neg_usable_wnd_test-v1-1-f16d5e365107@gmail.com Signed-off-by: Paolo Abeni --- .../net/packetdrill/tcp_rcv_neg_window.pkt | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_neg_window.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_neg_window.pkt index 15a9b4938f16..b9ab264b2a11 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_rcv_neg_window.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_neg_window.pkt @@ -1,4 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 +// Test maximum advertised window limit when rcv_nxt advances past +// rcv_mwnd_seq. The "usable window" must be properly clamped to zero +// rather than becoming negative. --mss=1000 @@ -17,10 +20,15 @@ +0 accept(3, ..., ...) = 4 -// A too big packet is accepted if the receive queue is empty +// A too big packet is accepted if the receive queue is empty. It +// does not trigger an immediate ACK. +0 < P. 1:20001(20000) ack 1 win 257 + +0 %{ assert tcpi_bytes_received == 20000, tcpi_bytes_received; }% + // Send a RST immediately so that there is no rcv_wup/rcv_mwnd_seq update yet +0 < R. 20001:20001(0) ack 1 win 257 - +.1 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% - +// Verify that the RST was accepted. Indirectly this also verifies that no +// immediate ACK was sent for the data packet above. + +0 < . 20001:20001(0) ack 1 win 257 + +0 > R 1:1(0) From 9f4960b94f1a044f76da98a765d6cbd294c22c92 Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Tue, 17 Mar 2026 13:41:40 +0800 Subject: [PATCH 0650/1561] l2tp: ppp: use max L2TP header size for PPP channel hdrlen chan.hdrlen is read once at channel registration by ppp_register_net_channel(), and used to set the PPP net device's hard_header_len. It was set to PPPOL2TP_L2TP_HDR_SIZE_NOSEQ (6), which is 4 bytes too small if sequence numbers are later enabled via setsockopt(PPPOL2TP_SO_SENDSEQ), causing unnecessary skb reallocations on the TX path. The setsockopt handler attempted to change netdev's hard_header_len by updating chan.hdrlen, but the PPP layer never re-reads it after the registration, so the update had no effect. To avoid the unnecessary reallocations, set chan.hdrlen to PPPOL2TP_L2TP_HDR_SIZE_SEQ (10) unconditionally at registration and remove the ineffective update in the setsockopt callback. Signed-off-by: Qingfang Deng Link: https://patch.msgid.link/20260317054141.524879-1-dqfext@gmail.com Signed-off-by: Paolo Abeni --- net/l2tp/l2tp_ppp.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index ae4543d5597b..99d6582f41de 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -787,11 +787,12 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr_unsized *userva goto out_no_ppp; } - /* The only header we need to worry about is the L2TP - * header. This size is different depending on whether - * sequence numbers are enabled for the data channel. + /* Reserve enough headroom for the L2TP header with sequence numbers, + * which is the largest possible. This is used by the PPP layer to set + * the net device's hard_header_len at registration, which must be + * sufficient regardless of whether sequence numbers are enabled later. */ - po->chan.hdrlen = PPPOL2TP_L2TP_HDR_SIZE_NOSEQ; + po->chan.hdrlen = PPPOL2TP_L2TP_HDR_SIZE_SEQ; po->chan.private = sk; po->chan.ops = &pppol2tp_chan_ops; @@ -1176,12 +1177,6 @@ static int pppol2tp_session_setsockopt(struct sock *sk, break; } session->send_seq = !!val; - { - struct pppox_sock *po = pppox_sk(sk); - - po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ : - PPPOL2TP_L2TP_HDR_SIZE_NOSEQ; - } l2tp_session_set_header_len(session, session->tunnel->version, session->tunnel->encap); break; From 96450df197bda7de927b51372c13f1002d0e76e3 Mon Sep 17 00:00:00 2001 From: Eric Woudstra Date: Tue, 17 Mar 2026 12:03:47 +0100 Subject: [PATCH 0651/1561] bridge: No DEV_PATH_BR_VLAN_UNTAG_HW for dsa foreign In network setup as below: fastpath bypass .----------------------------------------. / \ | IP - forwarding | | / \ v | / wan ... | / | | | | | brlan.1 | | | +-------------------------------+ | | vlan 1 | | | | | | brlan (vlan-filtering) | | | +---------------+ | | | DSA-SWITCH | | | vlan 1 | | | | to | | | | untagged 1 vlan 1 | | +---------------+---------------+ . / \ ----->wlan1 lan0 . . . ^ ^ vlan 1 tagged packets untagged packets br_vlan_fill_forward_path_mode() sets DEV_PATH_BR_VLAN_UNTAG_HW when filling in from brlan.1 towards wlan1. But it should be set to DEV_PATH_BR_VLAN_UNTAG in this case. Using BR_VLFLAG_ADDED_BY_SWITCHDEV is not correct. The dsa switchdev adds it as a foreign port. The same problem for all foreignly added dsa vlans on the bridge. First add the vlan, trying only native devices. If this fails, we know this may be a vlan from a foreign device. Use BR_VLFLAG_TAGGING_BY_SWITCHDEV to make sure DEV_PATH_BR_VLAN_UNTAG_HW is set only when there if no foreign device involved. Acked-by: Nikolay Aleksandrov Signed-off-by: Eric Woudstra Link: https://patch.msgid.link/20260317110347.363875-1-ericwouds@gmail.com Signed-off-by: Paolo Abeni --- include/net/switchdev.h | 1 + net/bridge/br_private.h | 10 ++++++++++ net/bridge/br_switchdev.c | 15 +++++++++++++++ net/bridge/br_vlan.c | 7 ++++++- net/switchdev/switchdev.c | 2 +- 5 files changed, 33 insertions(+), 2 deletions(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 8346b0d29542..ee500706496b 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -15,6 +15,7 @@ #define SWITCHDEV_F_NO_RECURSE BIT(0) #define SWITCHDEV_F_SKIP_EOPNOTSUPP BIT(1) #define SWITCHDEV_F_DEFER BIT(2) +#define SWITCHDEV_F_NO_FOREIGN BIT(3) enum switchdev_attr_id { SWITCHDEV_ATTR_ID_UNDEFINED, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 9b55d38ea9ed..6dbca845e625 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -182,6 +182,7 @@ enum { BR_VLFLAG_MCAST_ENABLED = BIT(2), BR_VLFLAG_GLOBAL_MCAST_ENABLED = BIT(3), BR_VLFLAG_NEIGH_SUPPRESS_ENABLED = BIT(4), + BR_VLFLAG_TAGGING_BY_SWITCHDEV = BIT(5), }; /** @@ -2234,6 +2235,8 @@ void br_switchdev_mdb_notify(struct net_device *dev, int type); int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags, bool changed, struct netlink_ext_ack *extack); +int br_switchdev_port_vlan_no_foreign_add(struct net_device *dev, u16 vid, u16 flags, + bool changed, struct netlink_ext_ack *extack); int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid); void br_switchdev_init(struct net_bridge *br); @@ -2317,6 +2320,13 @@ static inline int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, return -EOPNOTSUPP; } +static inline int br_switchdev_port_vlan_no_foreign_add(struct net_device *dev, u16 vid, + u16 flags, bool changed, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + static inline int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid) { return -EOPNOTSUPP; diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 4fac002922d2..18b558a931ad 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -190,6 +190,21 @@ int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags, return switchdev_port_obj_add(dev, &v.obj, extack); } +int br_switchdev_port_vlan_no_foreign_add(struct net_device *dev, u16 vid, u16 flags, + bool changed, struct netlink_ext_ack *extack) +{ + struct switchdev_obj_port_vlan v = { + .obj.orig_dev = dev, + .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, + .obj.flags = SWITCHDEV_F_NO_FOREIGN, + .flags = flags, + .vid = vid, + .changed = changed, + }; + + return switchdev_port_obj_add(dev, &v.obj, extack); +} + int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid) { struct switchdev_obj_port_vlan v = { diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 326933b455b3..84a180927eb7 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -109,6 +109,11 @@ static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br, /* Try switchdev op first. In case it is not supported, fallback to * 8021q add. */ + err = br_switchdev_port_vlan_no_foreign_add(dev, v->vid, flags, false, extack); + if (err != -EOPNOTSUPP) { + v->priv_flags |= BR_VLFLAG_ADDED_BY_SWITCHDEV | BR_VLFLAG_TAGGING_BY_SWITCHDEV; + return err; + } err = br_switchdev_port_vlan_add(dev, v->vid, flags, false, extack); if (err == -EOPNOTSUPP) return vlan_vid_add(dev, br->vlan_proto, v->vid); @@ -1491,7 +1496,7 @@ int br_vlan_fill_forward_path_mode(struct net_bridge *br, if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG) path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP; - else if (v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) + else if (v->priv_flags & BR_VLFLAG_TAGGING_BY_SWITCHDEV) path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG_HW; else path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index b55df183e6d5..474df25e96a0 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -760,7 +760,7 @@ static int __switchdev_handle_port_obj_add(struct net_device *dev, /* Event is neither on a bridge nor a LAG. Check whether it is on an * interface that is in a bridge with us. */ - if (!foreign_dev_check_cb) + if (!foreign_dev_check_cb || port_obj_info->obj->flags & SWITCHDEV_F_NO_FOREIGN) return err; br = netdev_master_upper_dev_get(dev); From 8888bf4fb98057a93f3ac7392aa3913e1b318c86 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 17 Mar 2026 20:38:14 -0700 Subject: [PATCH 0652/1561] selftests: net: move gro to lib for HW vs SW reuse The gro.c packet sender is used for SW testing but bulk of incoming new tests will be HW-specific. So it's better to put them under drivers/net/hw/, to avoid tip-toeing around netdevsim. Move gro.c to lib so we can reuse it. Reviewed-by: Petr Machata Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260318033819.1469350-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/.gitignore | 1 - tools/testing/selftests/drivers/net/Makefile | 1 - tools/testing/selftests/drivers/net/gro.py | 2 +- tools/testing/selftests/net/lib/.gitignore | 1 + tools/testing/selftests/net/lib/Makefile | 1 + tools/testing/selftests/{drivers/net => net/lib}/gro.c | 2 +- 6 files changed, 4 insertions(+), 4 deletions(-) rename tools/testing/selftests/{drivers/net => net/lib}/gro.c (99%) diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore index 3633c7a3ed65..585ecb4d5dc4 100644 --- a/tools/testing/selftests/drivers/net/.gitignore +++ b/tools/testing/selftests/drivers/net/.gitignore @@ -1,4 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only -gro napi_id_helper psp_responder diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 8154d6d429d3..7c7fa75b80c2 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -6,7 +6,6 @@ TEST_INCLUDES := $(wildcard lib/py/*.py) \ ../../net/lib.sh \ TEST_GEN_FILES := \ - gro \ napi_id_helper \ # end of TEST_GEN_FILES diff --git a/tools/testing/selftests/drivers/net/gro.py b/tools/testing/selftests/drivers/net/gro.py index cbc1b19dbc91..2da53686354f 100755 --- a/tools/testing/selftests/drivers/net/gro.py +++ b/tools/testing/selftests/drivers/net/gro.py @@ -117,7 +117,7 @@ def _setup(cfg, mode, test_name): """ Setup hardware loopback mode for GRO testing. """ if not hasattr(cfg, "bin_remote"): - cfg.bin_local = cfg.test_dir / "gro" + cfg.bin_local = cfg.net_lib_dir / "gro" cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) if not hasattr(cfg, "feat"): diff --git a/tools/testing/selftests/net/lib/.gitignore b/tools/testing/selftests/net/lib/.gitignore index bbc97d6bf556..6cd2b762af5d 100644 --- a/tools/testing/selftests/net/lib/.gitignore +++ b/tools/testing/selftests/net/lib/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only csum +gro xdp_helper diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile index 5339f56329e1..ff83603397d0 100644 --- a/tools/testing/selftests/net/lib/Makefile +++ b/tools/testing/selftests/net/lib/Makefile @@ -14,6 +14,7 @@ TEST_FILES := \ TEST_GEN_FILES := \ $(patsubst %.c,%.o,$(wildcard *.bpf.c)) \ csum \ + gro \ xdp_helper \ # end of TEST_GEN_FILES diff --git a/tools/testing/selftests/drivers/net/gro.c b/tools/testing/selftests/net/lib/gro.c similarity index 99% rename from tools/testing/selftests/drivers/net/gro.c rename to tools/testing/selftests/net/lib/gro.c index 3c0745b68bfa..02e29509fbea 100644 --- a/tools/testing/selftests/drivers/net/gro.c +++ b/tools/testing/selftests/net/lib/gro.c @@ -77,7 +77,7 @@ #include #include "kselftest.h" -#include "../../net/lib/ksft.h" +#include "ksft.h" #define DPORT 8000 #define SPORT 1500 From 9b29afa1166088ca4e8223857508f2a19d88b58b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 17 Mar 2026 20:38:15 -0700 Subject: [PATCH 0653/1561] selftests: drv-net: give HW stats sync time extra 25% of margin There are transient failures for devices which update stats periodically, especially if it's the FW DMA'ing the stats rather than host periodic work querying the FW. Wait 25% longer than strictly necessary. For devices which don't report stats-block-usecs we retain 25 msec as the default wait time (0.025sec == 20,000usec * 1.25). Reviewed-by: Petr Machata Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260318033819.1469350-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/lib/py/env.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index ccff345fe1c1..6a71c7e7f136 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -288,8 +288,8 @@ class NetDrvEpEnv(NetDrvEnvBase): if "Operation not supported" not in e.cmd.stderr: raise - self._stats_settle_time = 0.025 + \ - data.get('stats-block-usecs', 0) / 1000 / 1000 + self._stats_settle_time = \ + 1.25 * data.get('stats-block-usecs', 20000) / 1000 / 1000 time.sleep(self._stats_settle_time) From f26d43acf12f914e4562b6cf17f93af3d4a594a0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 17 Mar 2026 20:38:16 -0700 Subject: [PATCH 0654/1561] selftests: drv-net: gro: use SO_TXTIME to schedule packets together Longer packet sequence tests are quite flaky when the test is run over a real network. Try to avoid at least the jitter on the sender side by scheduling all the packets to be sent at once using SO_TXTIME. Use hardcoded tx time of 5msec in the future. In my test increasing this time past 2msec makes no difference so 5msec is plenty of margin. Since we now expect more output buffering make sure to raise SNDBUF. Note that this is an opportunistic reliability improvement which will only work if the qdisc can schedule Tx time for us (fq). Fiddling with qdisc config was deemed too complex, so it's not part of the patch. Reviewed-by: Willem de Bruijn Reviewed-by: Petr Machata Link: https://patch.msgid.link/20260318033819.1469350-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib/gro.c | 57 +++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/lib/gro.c b/tools/testing/selftests/net/lib/gro.c index 02e29509fbea..5b3f8fca08e6 100644 --- a/tools/testing/selftests/net/lib/gro.c +++ b/tools/testing/selftests/net/lib/gro.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include #include @@ -74,6 +75,7 @@ #include #include #include +#include #include #include "kselftest.h" @@ -123,6 +125,9 @@ static int tcp_offset = -1; static int total_hdr_len = -1; static int ethhdr_proto = -1; static bool ipip; +static uint64_t txtime_ns; + +#define TXTIME_DELAY_MS 5 static void vlog(const char *fmt, ...) { @@ -330,13 +335,37 @@ static void fill_transportlayer(void *buf, int seq_offset, int ack_offset, static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr) { + char control[CMSG_SPACE(sizeof(uint64_t))]; + struct msghdr msg = {}; + struct iovec iov = {}; + struct cmsghdr *cm; int ret = -1; - ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr)); + iov.iov_base = buf; + iov.iov_len = len; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_name = daddr; + msg.msg_namelen = sizeof(*daddr); + + if (txtime_ns) { + memset(control, 0, sizeof(control)); + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + cm = CMSG_FIRSTHDR(&msg); + cm->cmsg_level = SOL_SOCKET; + cm->cmsg_type = SCM_TXTIME; + cm->cmsg_len = CMSG_LEN(sizeof(uint64_t)); + memcpy(CMSG_DATA(cm), &txtime_ns, sizeof(txtime_ns)); + } + + ret = sendmsg(fd, &msg, 0); if (ret == -1) - error(1, errno, "sendto failure"); + error(1, errno, "sendmsg failure"); if (ret != len) - error(1, errno, "sendto wrong length"); + error(1, 0, "sendmsg wrong length: %d vs %d", ret, len); } static void create_packet(void *buf, int seq_offset, int ack_offset, @@ -1058,6 +1087,7 @@ static void check_recv_pkts(int fd, int *correct_payload, static void gro_sender(void) { + int bufsize = 4 * 1024 * 1024; /* 4 MB */ const int fin_delay_us = 100 * 1000; static char fin_pkt[MAX_HDR_LEN]; struct sockaddr_ll daddr = {}; @@ -1067,6 +1097,27 @@ static void gro_sender(void) if (txfd < 0) error(1, errno, "socket creation"); + if (setsockopt(txfd, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize))) + error(1, errno, "cannot set sndbuf size, setsockopt failed"); + + /* Enable SO_TXTIME unless test case generates more than one flow + * SO_TXTIME could result in qdisc layer sorting the packets at sender. + */ + if (true) { + struct sock_txtime so_txtime = { .clockid = CLOCK_MONOTONIC, }; + struct timespec ts; + + if (setsockopt(txfd, SOL_SOCKET, SO_TXTIME, + &so_txtime, sizeof(so_txtime))) + error(1, errno, "setsockopt SO_TXTIME"); + + if (clock_gettime(CLOCK_MONOTONIC, &ts)) + error(1, errno, "clock_gettime"); + + txtime_ns = ts.tv_sec * 1000000000ULL + ts.tv_nsec; + txtime_ns += TXTIME_DELAY_MS * 1000000ULL; + } + memset(&daddr, 0, sizeof(daddr)); daddr.sll_ifindex = if_nametoindex(ifname); if (daddr.sll_ifindex == 0) From ba5d4128fca8d141cced21f7ed10d14582cd5c1c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 17 Mar 2026 20:38:17 -0700 Subject: [PATCH 0655/1561] selftests: drv-net: gro: test GRO stats Test accuracy of GRO stats. We want to cover two potentially tricky cases: - single segment GRO - packets which were eligible but didn't get GRO'd The first case is trivial, teach gro.c to send one packet, and check GRO stats didn't move. Second case requires gro.c to send a lot of flows expecting the NIC to run out of GRO flow capacity. To avoid system traffic noise we steer the packets to a dedicated queue and operate on qstat. Link: https://patch.msgid.link/20260318033819.1469350-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .../testing/selftests/drivers/net/hw/Makefile | 1 + .../selftests/drivers/net/hw/gro_hw.py | 271 ++++++++++++++++++ tools/testing/selftests/net/lib/gro.c | 163 ++++++++++- 3 files changed, 433 insertions(+), 2 deletions(-) create mode 100755 tools/testing/selftests/drivers/net/hw/gro_hw.py diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 91df028abfc0..3c97dac9baaa 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -26,6 +26,7 @@ TEST_PROGS = \ ethtool_extended_state.sh \ ethtool_mm.sh \ ethtool_rmon.sh \ + gro_hw.py \ hw_stats_l3.sh \ hw_stats_l3_gre.sh \ iou-zcrx.py \ diff --git a/tools/testing/selftests/drivers/net/hw/gro_hw.py b/tools/testing/selftests/drivers/net/hw/gro_hw.py new file mode 100755 index 000000000000..3bca19e8f339 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/gro_hw.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +HW GRO tests focusing on device machinery like stats, rather than protocol +processing. +""" + +import glob +import re + +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import ksft_eq, ksft_ge +from lib.py import NetDrvEpEnv, NetdevFamily +from lib.py import KsftSkipEx +from lib.py import bkg, cmd, defer, ethtool, ip + + +# gro.c uses hardcoded DPORT=8000 +GRO_DPORT = 8000 + + +def _get_queue_stats(cfg, queue_id): + """Get stats for a specific Rx queue.""" + cfg.wait_hw_stats_settle() + data = cfg.netnl.qstats_get({"ifindex": cfg.ifindex, "scope": ["queue"]}, + dump=True) + for q in data: + if q.get('queue-type') == 'rx' and q.get('queue-id') == queue_id: + return q + return {} + + +def _resolve_dmac(cfg, ipver): + """Find the destination MAC address for sending packets.""" + attr = "dmac" + ipver + if hasattr(cfg, attr): + return getattr(cfg, attr) + + route = ip(f"-{ipver} route get {cfg.addr_v[ipver]}", + json=True, host=cfg.remote)[0] + gw = route.get("gateway") + if not gw: + setattr(cfg, attr, cfg.dev['address']) + return getattr(cfg, attr) + + cmd(f"ping -c1 -W0 -I{cfg.remote_ifname} {gw}", host=cfg.remote) + neigh = ip(f"neigh get {gw} dev {cfg.remote_ifname}", + json=True, host=cfg.remote)[0] + setattr(cfg, attr, neigh['lladdr']) + return getattr(cfg, attr) + + +def _setup_isolated_queue(cfg): + """Set up an isolated queue for testing using ntuple filter. + + Remove queue 1 from the default RSS context and steer test traffic to it. + """ + test_queue = 1 + + qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) + if qcnt < 2: + raise KsftSkipEx(f"Need at least 2 queues, have {qcnt}") + + # Remove queue 1 from default RSS context by setting its weight to 0 + weights = ["1"] * qcnt + weights[test_queue] = "0" + ethtool(f"-X {cfg.ifname} weight " + " ".join(weights)) + defer(ethtool, f"-X {cfg.ifname} default") + + # Set up ntuple filter to steer our test traffic to the isolated queue + flow = f"flow-type tcp{cfg.addr_ipver} " + flow += f"dst-ip {cfg.addr} dst-port {GRO_DPORT} action {test_queue}" + output = ethtool(f"-N {cfg.ifname} {flow}").stdout + ntuple_id = int(output.split()[-1]) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + return test_queue + + +def _run_gro_test(cfg, test_name, num_flows=None, ignore_fail=False): + """Run gro binary with given test and return output.""" + if not hasattr(cfg, "bin_remote"): + cfg.bin_local = cfg.net_lib_dir / "gro" + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + + ipver = cfg.addr_ipver + protocol = f"--ipv{ipver}" + dmac = _resolve_dmac(cfg, ipver) + + base_args = [ + protocol, + f"--dmac {dmac}", + f"--smac {cfg.remote_dev['address']}", + f"--daddr {cfg.addr}", + f"--saddr {cfg.remote_addr_v[ipver]}", + f"--test {test_name}", + ] + if num_flows: + base_args.append(f"--num-flows {num_flows}") + + args = " ".join(base_args) + + rx_cmd = f"{cfg.bin_local} {args} --rx --iface {cfg.ifname}" + tx_cmd = f"{cfg.bin_remote} {args} --iface {cfg.remote_ifname}" + + with bkg(rx_cmd, ksft_ready=True, exit_wait=True, fail=False) as rx_proc: + cmd(tx_cmd, host=cfg.remote) + + if not ignore_fail: + ksft_eq(rx_proc.ret, 0) + if rx_proc.ret != 0: + ksft_pr(rx_proc) + + return rx_proc.stdout + + +def _require_hw_gro_stats(cfg, queue_id): + """Check if device reports HW GRO stats for the queue.""" + stats = _get_queue_stats(cfg, queue_id) + required = ['rx-packets', 'rx-hw-gro-packets', 'rx-hw-gro-wire-packets'] + for stat in required: + if stat not in stats: + raise KsftSkipEx(f"Driver does not report '{stat}' via qstats") + + +def _set_ethtool_feat(cfg, current, feats): + """Set ethtool features with defer to restore original state.""" + s2n = {True: "on", False: "off"} + + new = ["-K", cfg.ifname] + old = ["-K", cfg.ifname] + no_change = True + for name, state in feats.items(): + new += [name, s2n[state]] + old += [name, s2n[current[name]["active"]]] + + if current[name]["active"] != state: + no_change = False + if current[name]["fixed"]: + raise KsftSkipEx(f"Device does not support {name}") + if no_change: + return + + eth_cmd = ethtool(" ".join(new)) + defer(ethtool, " ".join(old)) + + # If ethtool printed something kernel must have modified some features + if eth_cmd.stdout: + ksft_pr(eth_cmd) + + +def _setup_hw_gro(cfg): + """Enable HW GRO on the device, disabling SW GRO.""" + feat = ethtool(f"-k {cfg.ifname}", json=True)[0] + + # Try to disable SW GRO and enable HW GRO + _set_ethtool_feat(cfg, feat, + {"generic-receive-offload": False, + "rx-gro-hw": True, + "large-receive-offload": False}) + + # Some NICs treat HW GRO as a GRO sub-feature so disabling GRO + # will also clear HW GRO. Use a hack of installing XDP generic + # to skip SW GRO, even when enabled. + feat = ethtool(f"-k {cfg.ifname}", json=True)[0] + if not feat["rx-gro-hw"]["active"]: + ksft_pr("Driver clears HW GRO when SW GRO is cleared, using generic XDP workaround") + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" + ip(f"link set dev {cfg.ifname} xdpgeneric obj {prog} sec xdp") + defer(ip, f"link set dev {cfg.ifname} xdpgeneric off") + + # Attaching XDP may change features, fetch the latest state + feat = ethtool(f"-k {cfg.ifname}", json=True)[0] + + _set_ethtool_feat(cfg, feat, + {"generic-receive-offload": True, + "rx-gro-hw": True, + "large-receive-offload": False}) + + +def _check_gro_stats(cfg, test_queue, stats_before, + expect_rx, expect_gro, expect_wire): + """Validate GRO stats against expected values.""" + stats_after = _get_queue_stats(cfg, test_queue) + + rx_delta = (stats_after.get('rx-packets', 0) - + stats_before.get('rx-packets', 0)) + gro_delta = (stats_after.get('rx-hw-gro-packets', 0) - + stats_before.get('rx-hw-gro-packets', 0)) + wire_delta = (stats_after.get('rx-hw-gro-wire-packets', 0) - + stats_before.get('rx-hw-gro-wire-packets', 0)) + + ksft_eq(rx_delta, expect_rx, comment="rx-packets") + ksft_eq(gro_delta, expect_gro, comment="rx-hw-gro-packets") + ksft_eq(wire_delta, expect_wire, comment="rx-hw-gro-wire-packets") + + +def test_gro_stats_single(cfg): + """ + Test that a single packet doesn't affect GRO stats. + + Send a single packet that cannot be coalesced (nothing to coalesce with). + GRO stats should not increase since no coalescing occurred. + rx-packets should increase by 2 (1 data + 1 FIN). + """ + _setup_hw_gro(cfg) + + test_queue = _setup_isolated_queue(cfg) + _require_hw_gro_stats(cfg, test_queue) + + stats_before = _get_queue_stats(cfg, test_queue) + + _run_gro_test(cfg, "single") + + # 1 data + 1 FIN = 2 rx-packets, no coalescing + _check_gro_stats(cfg, test_queue, stats_before, + expect_rx=2, expect_gro=0, expect_wire=0) + + +def test_gro_stats_full(cfg): + """ + Test GRO stats when overwhelming HW GRO capacity. + + Send 500 flows to exceed HW GRO flow capacity on a single queue. + This should result in some packets not being coalesced. + Validate that qstats match what gro.c observed. + """ + _setup_hw_gro(cfg) + + test_queue = _setup_isolated_queue(cfg) + _require_hw_gro_stats(cfg, test_queue) + + num_flows = 500 + stats_before = _get_queue_stats(cfg, test_queue) + + # Run capacity test - will likely fail because not all packets coalesce + output = _run_gro_test(cfg, "capacity", num_flows=num_flows, + ignore_fail=True) + + # Parse gro.c output: "STATS: received=X wire=Y coalesced=Z" + match = re.search(r'STATS: received=(\d+) wire=(\d+) coalesced=(\d+)', + output) + if not match: + raise KsftSkipEx(f"Could not parse gro.c output: {output}") + + rx_frames = int(match.group(2)) + gro_coalesced = int(match.group(3)) + + ksft_ge(gro_coalesced, 1, + comment="At least some packets should coalesce") + + # received + 1 FIN, coalesced super-packets, coalesced * 2 wire packets + _check_gro_stats(cfg, test_queue, stats_before, + expect_rx=rx_frames + 1, + expect_gro=gro_coalesced, + expect_wire=gro_coalesced * 2) + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.netnl = NetdevFamily() + ksft_run([test_gro_stats_single, + test_gro_stats_full], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/lib/gro.c b/tools/testing/selftests/net/lib/gro.c index 5b3f8fca08e6..41794b9f6f8a 100644 --- a/tools/testing/selftests/net/lib/gro.c +++ b/tools/testing/selftests/net/lib/gro.c @@ -43,6 +43,10 @@ * - large_max: exceeding max size * - large_rem: remainder handling * + * single, capacity: + * Boring cases used to test coalescing machinery itself and stats + * more than protocol behavior. + * * MSS is defined as 4096 - header because if it is too small * (i.e. 1500 MTU - header), it will result in many packets, * increasing the "large" test case's flakiness. This is because @@ -126,6 +130,9 @@ static int total_hdr_len = -1; static int ethhdr_proto = -1; static bool ipip; static uint64_t txtime_ns; +static int num_flows = 4; + +#define CAPACITY_PAYLOAD_LEN 200 #define TXTIME_DELAY_MS 5 @@ -389,6 +396,45 @@ static void create_packet(void *buf, int seq_offset, int ack_offset, fill_datalinklayer(buf); } +static void create_capacity_packet(void *buf, int flow_id, int pkt_idx, int psh) +{ + int seq_offset = pkt_idx * CAPACITY_PAYLOAD_LEN; + struct tcphdr *tcph; + + create_packet(buf, seq_offset, 0, CAPACITY_PAYLOAD_LEN, 0); + + /* Customize for this flow id */ + memset(buf + total_hdr_len, 'a' + flow_id, CAPACITY_PAYLOAD_LEN); + + tcph = buf + tcp_offset; + tcph->source = htons(SPORT + flow_id); + tcph->psh = psh; + tcph->check = 0; + tcph->check = tcp_checksum(tcph, CAPACITY_PAYLOAD_LEN); +} + +/* Send a capacity test, 2 packets per flow, all first packets then all second: + * A1 B1 C1 D1 ... A2 B2 C2 D2 ... + */ +static void send_capacity(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + CAPACITY_PAYLOAD_LEN]; + int pkt_size = total_hdr_len + CAPACITY_PAYLOAD_LEN; + int i; + + /* Send first packet of each flow (no PSH) */ + for (i = 0; i < num_flows; i++) { + create_capacity_packet(buf, i, 0, 0); + write_packet(fd, buf, pkt_size, daddr); + } + + /* Send second packet of each flow (with PSH to flush) */ + for (i = 0; i < num_flows; i++) { + create_capacity_packet(buf, i, 1, 1); + write_packet(fd, buf, pkt_size, daddr); + } +} + #ifndef TH_CWR #define TH_CWR 0x80 #endif @@ -1085,6 +1131,93 @@ static void check_recv_pkts(int fd, int *correct_payload, printf("Test succeeded\n\n"); } +static void check_capacity_pkts(int fd) +{ + static char buffer[IP_MAXPACKET + ETH_HLEN + 1]; + struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN); + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN); + const char *fail_reason = NULL; + int flow_order[num_flows * 2]; + int coalesced[num_flows]; + struct tcphdr *tcph; + int ip_ext_len = 0; + int total_data = 0; + int pkt_size = -1; + int data_len = 0; + int num_pkt = 0; + int num_coal = 0; + int flow_id; + int sport; + + memset(coalesced, 0, sizeof(coalesced)); + memset(flow_order, -1, sizeof(flow_order)); + + while (total_data < num_flows * CAPACITY_PAYLOAD_LEN * 2) { + ip_ext_len = 0; + pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0); + if (pkt_size < 0) + recv_error(fd, errno); + + if (iph->version == 4) + ip_ext_len = (iph->ihl - 5) * 4; + else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP) + ip_ext_len = MIN_EXTHDR_SIZE; + + tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len); + + /* FIN packet terminates reception */ + if (tcph->fin) + break; + + sport = ntohs(tcph->source); + flow_id = sport - SPORT; + + if (flow_id < 0 || flow_id >= num_flows) { + vlog("Invalid flow_id %d from sport %d\n", + flow_id, sport); + fail_reason = fail_reason ?: "invalid packet"; + continue; + } + + /* Calculate payload length */ + if (pkt_size == ETH_ZLEN && iph->version == 4) { + data_len = ntohs(iph->tot_len) + - sizeof(struct tcphdr) - sizeof(struct iphdr); + } else { + data_len = pkt_size - total_hdr_len - ip_ext_len; + } + + flow_order[num_pkt] = flow_id; + coalesced[flow_id] = data_len; + + if (data_len == CAPACITY_PAYLOAD_LEN * 2) { + num_coal++; + } else { + vlog("Pkt %d: flow %d, sport %d, len %d (expected %d)\n", + num_pkt, flow_id, sport, data_len, + CAPACITY_PAYLOAD_LEN * 2); + fail_reason = fail_reason ?: "not coalesced"; + } + + num_pkt++; + total_data += data_len; + } + + if (!fail_reason) { + vlog("All %d flows coalesced correctly\n", num_flows); + printf("Test succeeded\n\n"); + } else { + printf("FAILED\n"); + } + + /* Always print stats for external validation */ + printf("STATS: received=%d wire=%d coalesced=%d\n", + num_pkt, num_pkt + num_coal, num_coal); + + if (fail_reason) + error(1, 0, "capacity test failed %s", fail_reason); +} + static void gro_sender(void) { int bufsize = 4 * 1024 * 1024; /* 4 MB */ @@ -1103,7 +1236,7 @@ static void gro_sender(void) /* Enable SO_TXTIME unless test case generates more than one flow * SO_TXTIME could result in qdisc layer sorting the packets at sender. */ - if (true) { + if (strcmp(testname, "single") && strcmp(testname, "capacity")) { struct sock_txtime so_txtime = { .clockid = CLOCK_MONOTONIC, }; struct timespec ts; @@ -1250,6 +1383,19 @@ static void gro_sender(void) send_large(txfd, &daddr, remainder + 1); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + /* machinery sub-tests */ + } else if (strcmp(testname, "single") == 0) { + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(txfd, buf, total_hdr_len + PAYLOAD_LEN, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "capacity") == 0) { + send_capacity(txfd, &daddr); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else { error(1, 0, "Unknown testcase: %s", testname); } @@ -1445,6 +1591,15 @@ static void gro_receiver(void) correct_payload[2] = remainder + 1; printf("last segment sent individually: "); check_recv_pkts(rxfd, correct_payload, 3); + + /* machinery sub-tests */ + } else if (strcmp(testname, "single") == 0) { + printf("single data packet: "); + correct_payload[0] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 1); + } else if (strcmp(testname, "capacity") == 0) { + check_capacity_pkts(rxfd); + } else { error(1, 0, "Test case error: unknown testname %s", testname); } @@ -1462,6 +1617,7 @@ static void parse_args(int argc, char **argv) { "ipv4", no_argument, NULL, '4' }, { "ipv6", no_argument, NULL, '6' }, { "ipip", no_argument, NULL, 'e' }, + { "num-flows", required_argument, NULL, 'n' }, { "rx", no_argument, NULL, 'r' }, { "saddr", required_argument, NULL, 's' }, { "smac", required_argument, NULL, 'S' }, @@ -1471,7 +1627,7 @@ static void parse_args(int argc, char **argv) }; int c; - while ((c = getopt_long(argc, argv, "46d:D:ei:rs:S:t:v", opts, NULL)) != -1) { + while ((c = getopt_long(argc, argv, "46d:D:ei:n:rs:S:t:v", opts, NULL)) != -1) { switch (c) { case '4': proto = PF_INET; @@ -1495,6 +1651,9 @@ static void parse_args(int argc, char **argv) case 'i': ifname = optarg; break; + case 'n': + num_flows = atoi(optarg); + break; case 'r': tx_socket = false; break; From ff1cb3ad2abce4d03eaf3e8d5f38a7a5dfd36e79 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 17 Mar 2026 20:38:18 -0700 Subject: [PATCH 0656/1561] selftests: drv-net: gro: add test for packet ordering Add a test to check if the NIC reorders packets if the hit GRO. Link: https://patch.msgid.link/20260318033819.1469350-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/hw/gro_hw.py | 29 ++++++++++++-- tools/testing/selftests/net/lib/gro.c | 38 +++++++++++++++++-- 2 files changed, 61 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/gro_hw.py b/tools/testing/selftests/drivers/net/hw/gro_hw.py index 3bca19e8f339..10e08b22ee0e 100755 --- a/tools/testing/selftests/drivers/net/hw/gro_hw.py +++ b/tools/testing/selftests/drivers/net/hw/gro_hw.py @@ -10,7 +10,7 @@ import glob import re from lib.py import ksft_run, ksft_exit, ksft_pr -from lib.py import ksft_eq, ksft_ge +from lib.py import ksft_eq, ksft_ge, ksft_variants from lib.py import NetDrvEpEnv, NetdevFamily from lib.py import KsftSkipEx from lib.py import bkg, cmd, defer, ethtool, ip @@ -78,7 +78,8 @@ def _setup_isolated_queue(cfg): return test_queue -def _run_gro_test(cfg, test_name, num_flows=None, ignore_fail=False): +def _run_gro_test(cfg, test_name, num_flows=None, ignore_fail=False, + order_check=False): """Run gro binary with given test and return output.""" if not hasattr(cfg, "bin_remote"): cfg.bin_local = cfg.net_lib_dir / "gro" @@ -98,6 +99,8 @@ def _run_gro_test(cfg, test_name, num_flows=None, ignore_fail=False): ] if num_flows: base_args.append(f"--num-flows {num_flows}") + if order_check: + base_args.append("--order-check") args = " ".join(base_args) @@ -257,13 +260,33 @@ def test_gro_stats_full(cfg): expect_wire=gro_coalesced * 2) +@ksft_variants([4, 32, 512]) +def test_gro_order(cfg, num_flows): + """ + Test that HW GRO preserves packet ordering between flows. + + Packets may get delayed until the aggregate is released, + but reordering between aggregates and packet terminating + the aggregate and normal packets should not happen. + + Note that this test is stricter than truly required. + Reordering packets between flows should not cause issues. + This test will also fail if traffic is run over an ECMP fabric. + """ + _setup_hw_gro(cfg) + _setup_isolated_queue(cfg) + + _run_gro_test(cfg, "capacity", num_flows=num_flows, order_check=True) + + def main() -> None: """ Ksft boiler plate main """ with NetDrvEpEnv(__file__, nsim_test=False) as cfg: cfg.netnl = NetdevFamily() ksft_run([test_gro_stats_single, - test_gro_stats_full], args=(cfg,)) + test_gro_stats_full, + test_gro_order], args=(cfg,)) ksft_exit() diff --git a/tools/testing/selftests/net/lib/gro.c b/tools/testing/selftests/net/lib/gro.c index 41794b9f6f8a..3e611ae25f61 100644 --- a/tools/testing/selftests/net/lib/gro.c +++ b/tools/testing/selftests/net/lib/gro.c @@ -131,6 +131,7 @@ static int ethhdr_proto = -1; static bool ipip; static uint64_t txtime_ns; static int num_flows = 4; +static bool order_check; #define CAPACITY_PAYLOAD_LEN 200 @@ -1136,6 +1137,7 @@ static void check_capacity_pkts(int fd) static char buffer[IP_MAXPACKET + ETH_HLEN + 1]; struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN); struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN); + int num_pkt = 0, num_coal = 0, pkt_idx; const char *fail_reason = NULL; int flow_order[num_flows * 2]; int coalesced[num_flows]; @@ -1144,8 +1146,6 @@ static void check_capacity_pkts(int fd) int total_data = 0; int pkt_size = -1; int data_len = 0; - int num_pkt = 0; - int num_coal = 0; int flow_id; int sport; @@ -1203,6 +1203,34 @@ static void check_capacity_pkts(int fd) total_data += data_len; } + /* Check flow ordering. We expect to see all non-coalesced first segs + * then interleaved coalesced and non-coalesced second frames. + */ + pkt_idx = 0; + for (flow_id = 0; order_check && flow_id < num_flows; flow_id++) { + bool coaled = coalesced[flow_id] > CAPACITY_PAYLOAD_LEN; + + if (coaled) + continue; + + if (flow_order[pkt_idx] != flow_id) { + vlog("Flow order mismatch (non-coalesced) at position %d: expected flow %d, got flow %d\n", + pkt_idx, flow_id, flow_order[pkt_idx]); + fail_reason = fail_reason ?: "bad packet order (1)"; + } + pkt_idx++; + } + for (flow_id = 0; order_check && flow_id < num_flows; flow_id++) { + bool coaled = coalesced[flow_id] > CAPACITY_PAYLOAD_LEN; + + if (flow_order[pkt_idx] != flow_id) { + vlog("Flow order mismatch at position %d: expected flow %d, got flow %d, coalesced: %d\n", + pkt_idx, flow_id, flow_order[pkt_idx], coaled); + fail_reason = fail_reason ?: "bad packet order (2)"; + } + pkt_idx++; + } + if (!fail_reason) { vlog("All %d flows coalesced correctly\n", num_flows); printf("Test succeeded\n\n"); @@ -1622,12 +1650,13 @@ static void parse_args(int argc, char **argv) { "saddr", required_argument, NULL, 's' }, { "smac", required_argument, NULL, 'S' }, { "test", required_argument, NULL, 't' }, + { "order-check", no_argument, NULL, 'o' }, { "verbose", no_argument, NULL, 'v' }, { 0, 0, 0, 0 } }; int c; - while ((c = getopt_long(argc, argv, "46d:D:ei:n:rs:S:t:v", opts, NULL)) != -1) { + while ((c = getopt_long(argc, argv, "46d:D:ei:n:rs:S:t:ov", opts, NULL)) != -1) { switch (c) { case '4': proto = PF_INET; @@ -1666,6 +1695,9 @@ static void parse_args(int argc, char **argv) case 't': testname = optarg; break; + case 'o': + order_check = true; + break; case 'v': verbose = true; break; From 7dae8ffb0987f802bf4fabad987e69913ffd82e8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 17 Mar 2026 20:38:19 -0700 Subject: [PATCH 0657/1561] selftests: drv-net: gro: add a test for GRO depth Reuse the long sequence test to max out the GRO contexts. Repeat for a single queue, 8 queues, and default number of queues but flow steering to just one. The SW GRO's capacity should be around 64 per queue (8 buckets, up to 8 skbs in a chain). Link: https://patch.msgid.link/20260318033819.1469350-7-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/gro.py | 201 +++++++++++++++++++-- 1 file changed, 181 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/drivers/net/gro.py b/tools/testing/selftests/drivers/net/gro.py index 2da53686354f..70709bf670c7 100755 --- a/tools/testing/selftests/drivers/net/gro.py +++ b/tools/testing/selftests/drivers/net/gro.py @@ -35,11 +35,18 @@ Test cases: - large_rem: Large packet remainder handling """ +import glob import os +import re from lib.py import ksft_run, ksft_exit, ksft_pr from lib.py import NetDrvEpEnv, KsftXfailEx +from lib.py import NetdevFamily, EthtoolFamily from lib.py import bkg, cmd, defer, ethtool, ip -from lib.py import ksft_variants +from lib.py import ksft_variants, KsftNamedVariant + + +# gro.c uses hardcoded DPORT=8000 +GRO_DPORT = 8000 def _resolve_dmac(cfg, ipver): @@ -113,6 +120,98 @@ def _set_ethtool_feat(dev, current, feats, host=None): ksft_pr(eth_cmd) +def _get_queue_stats(cfg, queue_id): + """Get stats for a specific Rx queue.""" + cfg.wait_hw_stats_settle() + data = cfg.netnl.qstats_get({"ifindex": cfg.ifindex, "scope": ["queue"]}, + dump=True) + for q in data: + if q.get('queue-type') == 'rx' and q.get('queue-id') == queue_id: + return q + return {} + + +def _setup_isolated_queue(cfg): + """Set up an isolated queue for testing using ntuple filter. + + Remove queue 1 from the default RSS context and steer test traffic to it. + """ + test_queue = 1 + + qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) + if qcnt < 2: + raise KsftXfailEx(f"Need at least 2 queues, have {qcnt}") + + # Remove queue 1 from default RSS context by setting its weight to 0 + weights = ["1"] * qcnt + weights[test_queue] = "0" + ethtool(f"-X {cfg.ifname} weight " + " ".join(weights)) + defer(ethtool, f"-X {cfg.ifname} default") + + # Set up ntuple filter to steer our test traffic to the isolated queue + flow = f"flow-type tcp{cfg.addr_ipver} " + flow += f"dst-ip {cfg.addr} dst-port {GRO_DPORT} action {test_queue}" + output = ethtool(f"-N {cfg.ifname} {flow}").stdout + ntuple_id = int(output.split()[-1]) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + return test_queue + + +def _setup_queue_count(cfg, num_queues): + """Configure the NIC to use a specific number of queues.""" + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + ch_max = channels.get('combined-max', 0) + qcnt = channels['combined-count'] + + if ch_max < num_queues: + raise KsftXfailEx(f"Need at least {num_queues} queues, max={ch_max}") + + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + ethtool(f"-L {cfg.ifname} combined {num_queues}") + + +def _run_gro_bin(cfg, test_name, protocol=None, num_flows=None, + order_check=False, verbose=False, fail=False): + """Run gro binary with given test and return the process result.""" + if not hasattr(cfg, "bin_remote"): + cfg.bin_local = cfg.net_lib_dir / "gro" + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + + if protocol is None: + ipver = cfg.addr_ipver + protocol = f"ipv{ipver}" + else: + ipver = "6" if protocol[-1] == "6" else "4" + + dmac = _resolve_dmac(cfg, ipver) + + base_args = [ + f"--{protocol}", + f"--dmac {dmac}", + f"--smac {cfg.remote_dev['address']}", + f"--daddr {cfg.addr_v[ipver]}", + f"--saddr {cfg.remote_addr_v[ipver]}", + f"--test {test_name}", + ] + if num_flows: + base_args.append(f"--num-flows {num_flows}") + if order_check: + base_args.append("--order-check") + if verbose: + base_args.append("--verbose") + + args = " ".join(base_args) + + rx_cmd = f"{cfg.bin_local} {args} --rx --iface {cfg.ifname}" + tx_cmd = f"{cfg.bin_remote} {args} --iface {cfg.remote_ifname}" + + with bkg(rx_cmd, ksft_ready=True, exit_wait=True, fail=fail) as rx_proc: + cmd(tx_cmd, host=cfg.remote) + + return rx_proc + + def _setup(cfg, mode, test_name): """ Setup hardware loopback mode for GRO testing. """ @@ -233,30 +332,14 @@ def test(cfg, mode, protocol, test_name): _setup(cfg, mode, test_name) - base_cmd_args = [ - f"--{protocol}", - f"--dmac {_resolve_dmac(cfg, ipver)}", - f"--smac {cfg.remote_dev['address']}", - f"--daddr {cfg.addr_v[ipver]}", - f"--saddr {cfg.remote_addr_v[ipver]}", - f"--test {test_name}", - "--verbose" - ] - base_args = " ".join(base_cmd_args) - # Each test is run 6 times to deflake, because given the receive timing, # not all packets that should coalesce will be considered in the same flow # on every try. max_retries = 6 for attempt in range(max_retries): - rx_cmd = f"{cfg.bin_local} {base_args} --rx --iface {cfg.ifname}" - tx_cmd = f"{cfg.bin_remote} {base_args} --iface {cfg.remote_ifname}" - fail_now = attempt >= max_retries - 1 - - with bkg(rx_cmd, ksft_ready=True, exit_wait=True, - fail=fail_now) as rx_proc: - cmd(tx_cmd, host=cfg.remote) + rx_proc = _run_gro_bin(cfg, test_name, protocol=protocol, + verbose=True, fail=fail_now) if rx_proc.ret == 0: return @@ -270,11 +353,89 @@ def test(cfg, mode, protocol, test_name): ksft_pr(f"Attempt {attempt + 1}/{max_retries} failed, retrying...") +def _capacity_variants(): + """Generate variants for capacity test: mode x queue setup.""" + setups = [ + ("isolated", _setup_isolated_queue), + ("1q", lambda cfg: _setup_queue_count(cfg, 1)), + ("8q", lambda cfg: _setup_queue_count(cfg, 8)), + ] + for mode in ["sw", "hw", "lro"]: + for name, func in setups: + yield KsftNamedVariant(f"{mode}_{name}", mode, func) + + +@ksft_variants(_capacity_variants()) +def test_gro_capacity(cfg, mode, setup_func): + """ + Probe GRO capacity. + + Start with 8 flows and increase by 2x on each successful run. + Retry up to 3 times on failure. + + Variants combine mode (sw, hw, lro) with queue setup: + - isolated: Use a single queue isolated from RSS + - 1q: Configure NIC to use 1 queue + - 8q: Configure NIC to use 8 queues + """ + max_retries = 3 + + _setup(cfg, mode, "capacity") + queue_id = setup_func(cfg) + + num_flows = 8 + while True: + success = False + for attempt in range(max_retries): + if queue_id is not None: + stats_before = _get_queue_stats(cfg, queue_id) + + rx_proc = _run_gro_bin(cfg, "capacity", num_flows=num_flows) + output = rx_proc.stdout + + if queue_id is not None: + stats_after = _get_queue_stats(cfg, queue_id) + qstat_pkts = (stats_after.get('rx-packets', 0) - + stats_before.get('rx-packets', 0)) + gro_pkts = (stats_after.get('rx-hw-gro-packets', 0) - + stats_before.get('rx-hw-gro-packets', 0)) + qstat_str = f" qstat={qstat_pkts} hw-gro={gro_pkts}" + else: + qstat_str = "" + + # Parse and print STATS line + match = re.search( + r'STATS: received=(\d+) wire=(\d+) coalesced=(\d+)', output) + if match: + received = int(match.group(1)) + wire = int(match.group(2)) + coalesced = int(match.group(3)) + status = "PASS" if received == num_flows else "MISS" + ksft_pr(f"flows={num_flows} attempt={attempt + 1} " + f"received={received} wire={wire} " + f"coalesced={coalesced}{qstat_str} [{status}]") + if received == num_flows: + success = True + break + else: + ksft_pr(rx_proc) + ksft_pr(f"flows={num_flows} attempt={attempt + 1}" + f"{qstat_str} [FAIL - can't parse stats]") + + if not success: + ksft_pr(f"Stopped at {num_flows} flows") + break + + num_flows *= 2 + + def main() -> None: """ Ksft boiler plate main """ with NetDrvEpEnv(__file__) as cfg: - ksft_run(cases=[test], args=(cfg,)) + cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() + ksft_run(cases=[test, test_gro_capacity], args=(cfg,)) ksft_exit() From b29580d58be6d4d24d66c89d9bea96db342cff00 Mon Sep 17 00:00:00 2001 From: Oskar Ray-Frayssinet Date: Wed, 18 Mar 2026 22:11:53 +0100 Subject: [PATCH 0658/1561] net: ntb_netdev: add SPDX tag and remove boilerplate license text Add SPDX-License-Identifier tag to reflect the dual GPL-2.0-only/BSD-3-Clause license, remove the redundant boilerplate license text and contact information, keeping only the driver description. Signed-off-by: Oskar Ray-Frayssinet Acked-by: Dave Jiang Link: https://patch.msgid.link/20260318211153.9460-1-rayfraytech@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ntb_netdev.c | 47 +--------------------------------------- 1 file changed, 1 insertion(+), 46 deletions(-) diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index 6397d898c991..c3a6ba96fc8a 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -1,51 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause /* - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2012 Intel Corporation. All rights reserved. - * Copyright (C) 2015 EMC Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * BSD LICENSE - * - * Copyright(c) 2012 Intel Corporation. All rights reserved. - * Copyright (C) 2015 EMC Corporation. All Rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copy - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * * PCIe NTB Network Linux driver - * - * Contact Information: - * Jon Mason */ #include #include From 1939749ce92dd47f55f1318522f4b8f6b46e13aa Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 18 Mar 2026 18:26:33 +0000 Subject: [PATCH 0659/1561] net: stmmac: rename "mode" to "descriptor_mode" priv->mode doesn't describe what it refers to, it is whether we operate the DMA descriptors as a ring or chain. It is also difficult to grep for as there are several "mode" struct members. Add "descriptor_" prefix. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w2vbF-0000000DbWQ-4674@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/hwif.c | 4 ++-- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 5 +++- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 24 ++++++++++--------- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 71dac8c1a3ca..511b0fd5e834 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -57,11 +57,11 @@ static void stmmac_dwmac_mode_quirk(struct stmmac_priv *priv) if (priv->chain_mode) { dev_info(priv->device, "Chain mode enabled\n"); - priv->mode = STMMAC_CHAIN_MODE; + priv->descriptor_mode = STMMAC_CHAIN_MODE; mac->mode = &chain_mode_ops; } else { dev_info(priv->device, "Ring mode enabled\n"); - priv->mode = STMMAC_RING_MODE; + priv->descriptor_mode = STMMAC_RING_MODE; mac->mode = &ring_mode_ops; } } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index b9d849a3f06e..919a93a52390 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -323,7 +323,10 @@ struct stmmac_priv { bool extend_desc; /* chain_mode: requested descriptor mode */ bool chain_mode; - unsigned int mode; + /* descriptor_mode: actual descriptor mode, + * see STMMAC_CHAIN_MODE or STMMAC_RING_MODE + */ + u8 descriptor_mode; struct kernel_hwtstamp_config tstamp_config; struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_clock_ops; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 0a933aac3f03..567499e74917 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1589,7 +1589,8 @@ static void stmmac_clear_rx_descriptors(struct stmmac_priv *priv, for (i = 0; i < dma_conf->dma_rx_size; i++) { desc = stmmac_get_rx_desc(priv, rx_q, i); - stmmac_init_rx_desc(priv, desc, priv->use_riwt, priv->mode, + stmmac_init_rx_desc(priv, desc, priv->use_riwt, + priv->descriptor_mode, (i == dma_conf->dma_rx_size - 1), dma_conf->dma_buf_sz); } @@ -1616,7 +1617,7 @@ static void stmmac_clear_tx_descriptors(struct stmmac_priv *priv, struct dma_desc *p; p = stmmac_get_tx_desc(priv, tx_q, i); - stmmac_init_tx_desc(priv, p, priv->mode, last); + stmmac_init_tx_desc(priv, p, priv->descriptor_mode, last); } } @@ -1925,7 +1926,7 @@ static int __init_dma_rx_desc_rings(struct stmmac_priv *priv, } /* Setup the chained descriptor addresses */ - if (priv->mode == STMMAC_CHAIN_MODE) { + if (priv->descriptor_mode == STMMAC_CHAIN_MODE) { if (priv->extend_desc) stmmac_mode_init(priv, rx_q->dma_erx, rx_q->dma_rx_phy, @@ -2027,7 +2028,7 @@ static int __init_dma_tx_desc_rings(struct stmmac_priv *priv, (u32)tx_q->dma_tx_phy); /* Setup the chained descriptor addresses */ - if (priv->mode == STMMAC_CHAIN_MODE) { + if (priv->descriptor_mode == STMMAC_CHAIN_MODE) { if (priv->extend_desc) stmmac_mode_init(priv, tx_q->dma_etx, tx_q->dma_tx_phy, @@ -2774,7 +2775,7 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget) } stmmac_prepare_tx_desc(priv, tx_desc, 1, xdp_desc.len, - csum, priv->mode, true, true, + csum, priv->descriptor_mode, true, true, xdp_desc.len); stmmac_enable_dma_transmission(priv, priv->ioaddr, queue); @@ -2948,7 +2949,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue, } } - stmmac_release_tx_desc(priv, p, priv->mode); + stmmac_release_tx_desc(priv, p, priv->descriptor_mode); entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_tx_size); } @@ -4794,7 +4795,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) /* Prepare the descriptor and set the own bit too */ stmmac_prepare_tx_desc(priv, desc, 0, len, csum_insertion, - priv->mode, 1, last_segment, skb->len); + priv->descriptor_mode, 1, last_segment, + skb->len); } stmmac_set_tx_dma_last_segment(tx_q, entry); @@ -4890,8 +4892,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) /* Prepare the first descriptor setting the OWN bit too */ stmmac_prepare_tx_desc(priv, first, 1, nopaged_len, - csum_insertion, priv->mode, 0, last_segment, - skb->len); + csum_insertion, priv->descriptor_mode, + 0, last_segment, skb->len); } if (tx_q->tbs & STMMAC_TBS_EN) { @@ -5119,7 +5121,7 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, stmmac_set_desc_addr(priv, tx_desc, dma_addr); stmmac_prepare_tx_desc(priv, tx_desc, 1, xdpf->len, - csum, priv->mode, true, true, + csum, priv->descriptor_mode, true, true, xdpf->len); tx_q->tx_count_frames++; @@ -7432,7 +7434,7 @@ static int stmmac_hw_init(struct stmmac_priv *priv) * is not expected to change this. */ priv->plat->dma_cfg->atds = priv->extend_desc && - priv->mode == STMMAC_RING_MODE; + priv->descriptor_mode == STMMAC_RING_MODE; /* Rx Watchdog is available in the COREs newer than the 3.40. * In some case, for example on bugged HW this feature From 33be7846e4bebb44fb0c09ee92e99cdd111558fd Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 18 Mar 2026 18:26:39 +0000 Subject: [PATCH 0660/1561] net: stmmac: more mode -> descriptor_mode renames Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w2vbL-0000000DbWW-0PON@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac4_descs.c | 12 ++++++----- .../ethernet/stmicro/stmmac/dwxgmac2_descs.c | 12 ++++++----- .../net/ethernet/stmicro/stmmac/enh_desc.c | 20 ++++++++++-------- drivers/net/ethernet/stmicro/stmmac/hwif.h | 12 +++++------ .../net/ethernet/stmicro/stmmac/norm_desc.c | 21 ++++++++++--------- 5 files changed, 42 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index d5c003f3fbbc..2994df41ec2c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -289,12 +289,13 @@ exit: } static void dwmac4_rd_init_rx_desc(struct dma_desc *p, int disable_rx_ic, - int mode, int end, int bfsize) + u8 descriptor_mode, int end, int bfsize) { dwmac4_set_rx_owner(p, disable_rx_ic); } -static void dwmac4_rd_init_tx_desc(struct dma_desc *p, int mode, int end) +static void dwmac4_rd_init_tx_desc(struct dma_desc *p, u8 descriptor_mode, + int end) { p->des0 = 0; p->des1 = 0; @@ -303,8 +304,9 @@ static void dwmac4_rd_init_tx_desc(struct dma_desc *p, int mode, int end) } static void dwmac4_rd_prepare_tx_desc(struct dma_desc *p, int is_fs, int len, - bool csum_flag, int mode, bool tx_own, - bool ls, unsigned int tot_pkt_len) + bool csum_flag, u8 descriptor_mode, + bool tx_own, bool ls, + unsigned int tot_pkt_len) { u32 tdes3 = le32_to_cpu(p->des3); @@ -381,7 +383,7 @@ static void dwmac4_rd_prepare_tso_tx_desc(struct dma_desc *p, int is_fs, p->des3 = cpu_to_le32(tdes3); } -static void dwmac4_release_tx_desc(struct dma_desc *p, int mode) +static void dwmac4_release_tx_desc(struct dma_desc *p, u8 descriptor_mode) { p->des0 = 0; p->des1 = 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c index 1009ef436a1e..b5f200a87484 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c @@ -130,12 +130,13 @@ static int dwxgmac2_get_rx_timestamp_status(void *desc, void *next_desc, } static void dwxgmac2_init_rx_desc(struct dma_desc *p, int disable_rx_ic, - int mode, int end, int bfsize) + u8 descriptor_mode, int end, int bfsize) { dwxgmac2_set_rx_owner(p, disable_rx_ic); } -static void dwxgmac2_init_tx_desc(struct dma_desc *p, int mode, int end) +static void dwxgmac2_init_tx_desc(struct dma_desc *p, u8 descriptor_mode, + int end) { p->des0 = 0; p->des1 = 0; @@ -144,8 +145,9 @@ static void dwxgmac2_init_tx_desc(struct dma_desc *p, int mode, int end) } static void dwxgmac2_prepare_tx_desc(struct dma_desc *p, int is_fs, int len, - bool csum_flag, int mode, bool tx_own, - bool ls, unsigned int tot_pkt_len) + bool csum_flag, u8 descriptor_mode, + bool tx_own, bool ls, + unsigned int tot_pkt_len) { u32 tdes3 = le32_to_cpu(p->des3); @@ -219,7 +221,7 @@ static void dwxgmac2_prepare_tso_tx_desc(struct dma_desc *p, int is_fs, p->des3 = cpu_to_le32(tdes3); } -static void dwxgmac2_release_tx_desc(struct dma_desc *p, int mode) +static void dwxgmac2_release_tx_desc(struct dma_desc *p, u8 descriptor_mode) { p->des0 = 0; p->des1 = 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index ead468f4b645..051253601225 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -245,7 +245,7 @@ static int enh_desc_get_rx_status(struct stmmac_extra_stats *x, } static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, - int mode, int end, int bfsize) + u8 descriptor_mode, int end, int bfsize) { int bfsize1; @@ -254,7 +254,7 @@ static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, bfsize1 = min(bfsize, BUF_SIZE_8KiB); p->des1 |= cpu_to_le32(bfsize1 & ERDES1_BUFFER1_SIZE_MASK); - if (mode == STMMAC_CHAIN_MODE) + if (descriptor_mode == STMMAC_CHAIN_MODE) ehn_desc_rx_set_on_chain(p); else ehn_desc_rx_set_on_ring(p, end, bfsize); @@ -263,10 +263,11 @@ static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, p->des1 |= cpu_to_le32(ERDES1_DISABLE_IC); } -static void enh_desc_init_tx_desc(struct dma_desc *p, int mode, int end) +static void enh_desc_init_tx_desc(struct dma_desc *p, u8 descriptor_mode, + int end) { p->des0 &= cpu_to_le32(~ETDES0_OWN); - if (mode == STMMAC_CHAIN_MODE) + if (descriptor_mode == STMMAC_CHAIN_MODE) enh_desc_end_tx_desc_on_chain(p); else enh_desc_end_tx_desc_on_ring(p, end); @@ -282,24 +283,25 @@ static void enh_desc_set_rx_owner(struct dma_desc *p, int disable_rx_ic) p->des0 |= cpu_to_le32(RDES0_OWN); } -static void enh_desc_release_tx_desc(struct dma_desc *p, int mode) +static void enh_desc_release_tx_desc(struct dma_desc *p, u8 descriptor_mode) { int ter = (le32_to_cpu(p->des0) & ETDES0_END_RING) >> 21; memset(p, 0, offsetof(struct dma_desc, des2)); - if (mode == STMMAC_CHAIN_MODE) + if (descriptor_mode == STMMAC_CHAIN_MODE) enh_desc_end_tx_desc_on_chain(p); else enh_desc_end_tx_desc_on_ring(p, ter); } static void enh_desc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len, - bool csum_flag, int mode, bool tx_own, - bool ls, unsigned int tot_pkt_len) + bool csum_flag, u8 descriptor_mode, + bool tx_own, bool ls, + unsigned int tot_pkt_len) { u32 tdes0 = le32_to_cpu(p->des0); - if (mode == STMMAC_CHAIN_MODE) + if (descriptor_mode == STMMAC_CHAIN_MODE) enh_set_tx_desc_len_on_chain(p, len); else enh_set_tx_desc_len_on_ring(p, len); diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 010b4d32484a..e6317b94fff7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -38,21 +38,21 @@ struct dma_edesc; /* Descriptors helpers */ struct stmmac_desc_ops { /* DMA RX descriptor ring initialization */ - void (*init_rx_desc)(struct dma_desc *p, int disable_rx_ic, int mode, - int end, int bfsize); + void (*init_rx_desc)(struct dma_desc *p, int disable_rx_ic, + u8 descriptor_mode, int end, int bfsize); /* DMA TX descriptor ring initialization */ - void (*init_tx_desc)(struct dma_desc *p, int mode, int end); + void (*init_tx_desc)(struct dma_desc *p, u8 descriptor_mode, int end); /* Invoked by the xmit function to prepare the tx descriptor */ void (*prepare_tx_desc)(struct dma_desc *p, int is_fs, int len, - bool csum_flag, int mode, bool tx_own, bool ls, - unsigned int tot_pkt_len); + bool csum_flag, u8 descriptor_mode, bool tx_own, + bool ls, unsigned int tot_pkt_len); void (*prepare_tso_tx_desc)(struct dma_desc *p, int is_fs, int len1, int len2, bool tx_own, bool ls, unsigned int tcphdrlen, unsigned int tcppayloadlen); /* Set/get the owner of the descriptor */ void (*set_tx_owner)(struct dma_desc *p); /* Clean the tx descriptor as soon as the tx irq is received */ - void (*release_tx_desc)(struct dma_desc *p, int mode); + void (*release_tx_desc)(struct dma_desc *p, u8 descriptor_mode); /* Clear interrupt on tx frame completion. When this bit is * set an interrupt happens as soon as the frame is transmitted */ void (*set_tx_ic)(struct dma_desc *p); diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index 7c3a818c33c1..c4b613564f87 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -108,8 +108,8 @@ static int ndesc_get_rx_status(struct stmmac_extra_stats *x, return ret; } -static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, - int end, int bfsize) +static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, + u8 descriptor_mode, int end, int bfsize) { int bfsize1; @@ -118,7 +118,7 @@ static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, bfsize1 = min(bfsize, BUF_SIZE_2KiB - 1); p->des1 |= cpu_to_le32(bfsize1 & RDES1_BUFFER1_SIZE_MASK); - if (mode == STMMAC_CHAIN_MODE) + if (descriptor_mode == STMMAC_CHAIN_MODE) ndesc_rx_set_on_chain(p, end); else ndesc_rx_set_on_ring(p, end, bfsize); @@ -127,10 +127,10 @@ static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, p->des1 |= cpu_to_le32(RDES1_DISABLE_IC); } -static void ndesc_init_tx_desc(struct dma_desc *p, int mode, int end) +static void ndesc_init_tx_desc(struct dma_desc *p, u8 descriptor_mode, int end) { p->des0 &= cpu_to_le32(~TDES0_OWN); - if (mode == STMMAC_CHAIN_MODE) + if (descriptor_mode == STMMAC_CHAIN_MODE) ndesc_tx_set_on_chain(p); else ndesc_end_tx_desc_on_ring(p, end); @@ -146,20 +146,21 @@ static void ndesc_set_rx_owner(struct dma_desc *p, int disable_rx_ic) p->des0 |= cpu_to_le32(RDES0_OWN); } -static void ndesc_release_tx_desc(struct dma_desc *p, int mode) +static void ndesc_release_tx_desc(struct dma_desc *p, u8 descriptor_mode) { int ter = (le32_to_cpu(p->des1) & TDES1_END_RING) >> 25; memset(p, 0, offsetof(struct dma_desc, des2)); - if (mode == STMMAC_CHAIN_MODE) + if (descriptor_mode == STMMAC_CHAIN_MODE) ndesc_tx_set_on_chain(p); else ndesc_end_tx_desc_on_ring(p, ter); } static void ndesc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len, - bool csum_flag, int mode, bool tx_own, - bool ls, unsigned int tot_pkt_len) + bool csum_flag, u8 descriptor_mode, + bool tx_own, bool ls, + unsigned int tot_pkt_len) { u32 tdes1 = le32_to_cpu(p->des1); @@ -176,7 +177,7 @@ static void ndesc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len, p->des1 = cpu_to_le32(tdes1); - if (mode == STMMAC_CHAIN_MODE) + if (descriptor_mode == STMMAC_CHAIN_MODE) norm_set_tx_desc_len_on_chain(p, len); else norm_set_tx_desc_len_on_ring(p, len); From e73b19baa3b1c8eb909d0990e32c8d596b52be53 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 18 Mar 2026 18:26:44 +0000 Subject: [PATCH 0661/1561] net: stmmac: simplify DMA descriptor allocation/init/freeing Rather than having separate branches to handle the different types of descriptors, use the helper functions to calculate the total size of the DMA descriptors. Use this to allocate or free the descriptor array, and use a local variable to hold the address of the descriptor array, so we only need one dma_alloc_coherent() or dma_free_coherent() call in these paths. Also do the same for the receive ring initialisation. The transmit ring can't be converted as there is a case where stmmac_mode_init() is not called. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w2vbQ-0000000DbWc-0ty5@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 57 +++++++++---------- 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 567499e74917..8a84fdd4577d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1885,6 +1885,7 @@ static int __init_dma_rx_desc_rings(struct stmmac_priv *priv, u32 queue, gfp_t flags) { struct stmmac_rx_queue *rx_q = &dma_conf->rx_queue[queue]; + void *des; int ret; netif_dbg(priv, probe, priv->dev, @@ -1928,13 +1929,12 @@ static int __init_dma_rx_desc_rings(struct stmmac_priv *priv, /* Setup the chained descriptor addresses */ if (priv->descriptor_mode == STMMAC_CHAIN_MODE) { if (priv->extend_desc) - stmmac_mode_init(priv, rx_q->dma_erx, - rx_q->dma_rx_phy, - dma_conf->dma_rx_size, 1); + des = rx_q->dma_erx; else - stmmac_mode_init(priv, rx_q->dma_rx, - rx_q->dma_rx_phy, - dma_conf->dma_rx_size, 0); + des = rx_q->dma_rx; + + stmmac_mode_init(priv, des, rx_q->dma_rx_phy, + dma_conf->dma_rx_size, priv->extend_desc); } return 0; @@ -2148,6 +2148,8 @@ static void __free_dma_rx_desc_resources(struct stmmac_priv *priv, u32 queue) { struct stmmac_rx_queue *rx_q = &dma_conf->rx_queue[queue]; + size_t size; + void *addr; /* Release the DMA RX socket buffers */ if (rx_q->xsk_pool) @@ -2159,14 +2161,14 @@ static void __free_dma_rx_desc_resources(struct stmmac_priv *priv, rx_q->xsk_pool = NULL; /* Free DMA regions of consistent memory previously allocated */ - if (!priv->extend_desc) - dma_free_coherent(priv->device, dma_conf->dma_rx_size * - sizeof(struct dma_desc), - rx_q->dma_rx, rx_q->dma_rx_phy); + if (priv->extend_desc) + addr = rx_q->dma_erx; else - dma_free_coherent(priv->device, dma_conf->dma_rx_size * - sizeof(struct dma_extended_desc), - rx_q->dma_erx, rx_q->dma_rx_phy); + addr = rx_q->dma_rx; + + size = stmmac_get_rx_desc_size(priv) * dma_conf->dma_rx_size; + + dma_free_coherent(priv->device, size, addr, rx_q->dma_rx_phy); if (xdp_rxq_info_is_reg(&rx_q->xdp_rxq)) xdp_rxq_info_unreg(&rx_q->xdp_rxq); @@ -2251,6 +2253,8 @@ static int __alloc_dma_rx_desc_resources(struct stmmac_priv *priv, struct page_pool_params pp_params = { 0 }; unsigned int dma_buf_sz_pad, num_pages; unsigned int napi_id; + size_t size; + void *addr; int ret; dma_buf_sz_pad = stmmac_rx_offset(priv) + dma_conf->dma_buf_sz + @@ -2286,24 +2290,17 @@ static int __alloc_dma_rx_desc_resources(struct stmmac_priv *priv, if (!rx_q->buf_pool) return -ENOMEM; - if (priv->extend_desc) { - rx_q->dma_erx = dma_alloc_coherent(priv->device, - dma_conf->dma_rx_size * - sizeof(struct dma_extended_desc), - &rx_q->dma_rx_phy, - GFP_KERNEL); - if (!rx_q->dma_erx) - return -ENOMEM; + size = stmmac_get_rx_desc_size(priv) * dma_conf->dma_rx_size; - } else { - rx_q->dma_rx = dma_alloc_coherent(priv->device, - dma_conf->dma_rx_size * - sizeof(struct dma_desc), - &rx_q->dma_rx_phy, - GFP_KERNEL); - if (!rx_q->dma_rx) - return -ENOMEM; - } + addr = dma_alloc_coherent(priv->device, size, &rx_q->dma_rx_phy, + GFP_KERNEL); + if (!addr) + return -ENOMEM; + + if (priv->extend_desc) + rx_q->dma_erx = addr; + else + rx_q->dma_rx = addr; if (stmmac_xdp_is_enabled(priv) && test_bit(queue, priv->af_xdp_zc_qps)) From b4df951549ddd44ebee4ef3cd7491ed57343fd25 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 18 Mar 2026 18:26:49 +0000 Subject: [PATCH 0662/1561] net: stmmac: use more descriptive names in stmmac_xmit() Use "frag_size" rather than "len", correcting its type to be unsigned int. Rename "des" to "dma_addr" since that's what it is. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w2vbV-0000000DbWi-1O80@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 45 ++++++++++--------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 8a84fdd4577d..3c0206278081 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4684,12 +4684,12 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int first_entry, tx_packets; int gso = skb_shinfo(skb)->gso_type; struct stmmac_txq_stats *txq_stats; + struct dma_desc *desc, *first_desc; struct dma_edesc *tbs_desc = NULL; - struct dma_desc *desc, *first; struct stmmac_tx_queue *tx_q; int i, csum_insertion = 0; int entry, first_tx; - dma_addr_t des; + dma_addr_t dma_addr; u32 sdu_len; tx_q = &priv->dma_conf.tx_queue[queue]; @@ -4756,10 +4756,10 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) } desc = stmmac_get_tx_desc(priv, tx_q, entry); - first = desc; + first_desc = desc; if (has_vlan) - stmmac_set_desc_vlan(priv, first, STMMAC_VLAN_INSERT); + stmmac_set_desc_vlan(priv, first_desc, STMMAC_VLAN_INSERT); enh_desc = priv->plat->enh_desc; /* To program the descriptors according to the size of the frame */ @@ -4774,7 +4774,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) for (i = 0; i < nfrags; i++) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - int len = skb_frag_size(frag); + unsigned int frag_size = skb_frag_size(frag); bool last_segment = (i == (nfrags - 1)); entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_tx_size); @@ -4782,16 +4782,17 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) desc = stmmac_get_tx_desc(priv, tx_q, entry); - des = skb_frag_dma_map(priv->device, frag, 0, len, - DMA_TO_DEVICE); - if (dma_mapping_error(priv->device, des)) + dma_addr = skb_frag_dma_map(priv->device, frag, 0, frag_size, + DMA_TO_DEVICE); + if (dma_mapping_error(priv->device, dma_addr)) goto dma_map_err; /* should reuse desc w/o issues */ - stmmac_set_tx_skb_dma_entry(tx_q, entry, des, len, true); - stmmac_set_desc_addr(priv, desc, des); + stmmac_set_tx_skb_dma_entry(tx_q, entry, dma_addr, frag_size, + true); + stmmac_set_desc_addr(priv, desc, dma_addr); /* Prepare the descriptor and set the own bit too */ - stmmac_prepare_tx_desc(priv, desc, 0, len, csum_insertion, + stmmac_prepare_tx_desc(priv, desc, 0, frag_size, csum_insertion, priv->descriptor_mode, 1, last_segment, skb->len); } @@ -4839,7 +4840,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) netdev_dbg(priv->dev, "%s: curr=%d dirty=%d f=%d, e=%d, first=%p, nfrags=%d", __func__, tx_q->cur_tx, tx_q->dirty_tx, first_entry, - entry, first, nfrags); + entry, first_desc, nfrags); netdev_dbg(priv->dev, ">>> frame to be transmitted: "); print_pkt(skb->data, skb->len); @@ -4858,7 +4859,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) u64_stats_update_end(&txq_stats->q_syncp); if (priv->sarc_type) - stmmac_set_desc_sarc(priv, first, priv->sarc_type); + stmmac_set_desc_sarc(priv, first_desc, priv->sarc_type); /* Ready to fill the first descriptor and set the OWN bit w/o any * problems because all the descriptors are actually ready to be @@ -4867,15 +4868,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) if (likely(!is_jumbo)) { bool last_segment = (nfrags == 0); - des = dma_map_single(priv->device, skb->data, - nopaged_len, DMA_TO_DEVICE); - if (dma_mapping_error(priv->device, des)) + dma_addr = dma_map_single(priv->device, skb->data, + nopaged_len, DMA_TO_DEVICE); + if (dma_mapping_error(priv->device, dma_addr)) goto dma_map_err; - stmmac_set_tx_skb_dma_entry(tx_q, first_entry, des, nopaged_len, - false); + stmmac_set_tx_skb_dma_entry(tx_q, first_entry, dma_addr, + nopaged_len, false); - stmmac_set_desc_addr(priv, first, des); + stmmac_set_desc_addr(priv, first_desc, dma_addr); if (last_segment) stmmac_set_tx_dma_last_segment(tx_q, first_entry); @@ -4884,11 +4885,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) priv->hwts_tx_en)) { /* declare that device is doing timestamping */ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - stmmac_enable_tx_timestamp(priv, first); + stmmac_enable_tx_timestamp(priv, first_desc); } /* Prepare the first descriptor setting the OWN bit too */ - stmmac_prepare_tx_desc(priv, first, 1, nopaged_len, + stmmac_prepare_tx_desc(priv, first_desc, 1, nopaged_len, csum_insertion, priv->descriptor_mode, 0, last_segment, skb->len); } @@ -4900,7 +4901,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) stmmac_set_desc_tbs(priv, tbs_desc, ts.tv_sec, ts.tv_nsec); } - stmmac_set_tx_owner(priv, first); + stmmac_set_tx_owner(priv, first_desc); netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len); From 6b4286e0550814cdc4b897f881ec1fa8b0313227 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 18 Mar 2026 18:26:54 +0000 Subject: [PATCH 0663/1561] net: stmmac: rename STMMAC_GET_ENTRY() -> STMMAC_NEXT_ENTRY() STMMAC_GET_ENTRY() doesn't describe what this macro is doing - it is incrementing the provided index for the circular array of descriptors. Replace "GET" with "NEXT" as this better describes the action here. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w2vba-0000000DbWo-1oL5@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/chain_mode.c | 2 +- drivers/net/ethernet/stmicro/stmmac/common.h | 2 +- .../net/ethernet/stmicro/stmmac/ring_mode.c | 2 +- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 26 +++++++++---------- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c index 120a009c9992..dc7df4208c53 100644 --- a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c @@ -46,7 +46,7 @@ static int jumbo_frm(struct stmmac_tx_queue *tx_q, struct sk_buff *skb, while (len != 0) { tx_q->tx_skbuff[entry] = NULL; - entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_tx_size); + entry = STMMAC_NEXT_ENTRY(entry, priv->dma_conf.dma_tx_size); desc = tx_q->dma_tx + entry; if (len > bmax) { diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index f1628de8ed18..8166389c853f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -63,7 +63,7 @@ static inline bool dwmac_is_xmac(enum dwmac_core_type core_type) #define DMA_MIN_RX_SIZE 64 #define DMA_MAX_RX_SIZE 1024 #define DMA_DEFAULT_RX_SIZE 512 -#define STMMAC_GET_ENTRY(x, size) ((x + 1) & (size - 1)) +#define STMMAC_NEXT_ENTRY(x, size) ((x + 1) & (size - 1)) #undef FRAME_FILTER_DEBUG /* #define FRAME_FILTER_DEBUG */ diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index 382d94a3b972..78fc6aa5bbe9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -51,7 +51,7 @@ static int jumbo_frm(struct stmmac_tx_queue *tx_q, struct sk_buff *skb, stmmac_prepare_tx_desc(priv, desc, 1, bmax, csum, STMMAC_RING_MODE, 0, false, skb->len); tx_q->tx_skbuff[entry] = NULL; - entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_tx_size); + entry = STMMAC_NEXT_ENTRY(entry, priv->dma_conf.dma_tx_size); if (priv->extend_desc) desc = (struct dma_desc *)(tx_q->dma_etx + entry); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 3c0206278081..5062537f79e9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2780,7 +2780,7 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget) xsk_tx_metadata_to_compl(meta, &tx_q->tx_skbuff_dma[entry].xsk_meta); - tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size); + tx_q->cur_tx = STMMAC_NEXT_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size); entry = tx_q->cur_tx; } u64_stats_update_begin(&txq_stats->napi_syncp); @@ -2948,7 +2948,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue, stmmac_release_tx_desc(priv, p, priv->descriptor_mode); - entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_tx_size); + entry = STMMAC_NEXT_ENTRY(entry, priv->dma_conf.dma_tx_size); } tx_q->dirty_tx = entry; @@ -4303,7 +4303,7 @@ static bool stmmac_vlan_insert(struct stmmac_priv *priv, struct sk_buff *skb, return false; stmmac_set_tx_owner(priv, p); - tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size); + tx_q->cur_tx = STMMAC_NEXT_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size); return true; } @@ -4331,7 +4331,7 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, dma_addr_t des, while (tmp_len > 0) { dma_addr_t curr_addr; - tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, + tx_q->cur_tx = STMMAC_NEXT_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size); WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]); @@ -4473,7 +4473,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) stmmac_set_mss(priv, mss_desc, mss); tx_q->mss = mss; - tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, + tx_q->cur_tx = STMMAC_NEXT_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size); WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]); } @@ -4573,7 +4573,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) * ndo_start_xmit will fill this descriptor the next time it's * called and stmmac_tx_clean may clean up to this descriptor. */ - tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size); + tx_q->cur_tx = STMMAC_NEXT_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size); if (unlikely(stmmac_tx_avail(priv, queue) <= (MAX_SKB_FRAGS + 1))) { netif_dbg(priv, hw, priv->dev, "%s: stop transmitted packets\n", @@ -4777,7 +4777,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int frag_size = skb_frag_size(frag); bool last_segment = (i == (nfrags - 1)); - entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_tx_size); + entry = STMMAC_NEXT_ENTRY(entry, priv->dma_conf.dma_tx_size); WARN_ON(tx_q->tx_skbuff[entry]); desc = stmmac_get_tx_desc(priv, tx_q, entry); @@ -4833,7 +4833,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) * ndo_start_xmit will fill this descriptor the next time it's * called and stmmac_tx_clean may clean up to this descriptor. */ - entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_tx_size); + entry = STMMAC_NEXT_ENTRY(entry, priv->dma_conf.dma_tx_size); tx_q->cur_tx = entry; if (netif_msg_pktdata(priv)) { @@ -4998,7 +4998,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) dma_wmb(); stmmac_set_rx_owner(priv, p, use_rx_wd); - entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_rx_size); + entry = STMMAC_NEXT_ENTRY(entry, priv->dma_conf.dma_rx_size); } rx_q->dirty_rx = entry; stmmac_set_queue_rx_tail_ptr(priv, rx_q, queue, rx_q->dirty_rx); @@ -5139,7 +5139,7 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, stmmac_enable_dma_transmission(priv, priv->ioaddr, queue); - entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_tx_size); + entry = STMMAC_NEXT_ENTRY(entry, priv->dma_conf.dma_tx_size); tx_q->cur_tx = entry; return STMMAC_XDP_TX; @@ -5370,7 +5370,7 @@ static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget) dma_wmb(); stmmac_set_rx_owner(priv, rx_desc, use_rx_wd); - entry = STMMAC_GET_ENTRY(entry, priv->dma_conf.dma_rx_size); + entry = STMMAC_NEXT_ENTRY(entry, priv->dma_conf.dma_rx_size); } if (rx_desc) { @@ -5454,7 +5454,7 @@ read_again: break; /* Prefetch the next RX descriptor */ - rx_q->cur_rx = STMMAC_GET_ENTRY(rx_q->cur_rx, + rx_q->cur_rx = STMMAC_NEXT_ENTRY(rx_q->cur_rx, priv->dma_conf.dma_rx_size); next_entry = rx_q->cur_rx; @@ -5638,7 +5638,7 @@ read_again: if (unlikely(status & dma_own)) break; - rx_q->cur_rx = STMMAC_GET_ENTRY(rx_q->cur_rx, + rx_q->cur_rx = STMMAC_NEXT_ENTRY(rx_q->cur_rx, priv->dma_conf.dma_rx_size); next_entry = rx_q->cur_rx; From ca7e99335aea7c5977683624ba319157a4603f96 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 18 Mar 2026 14:43:55 +0100 Subject: [PATCH 0664/1561] net: fjes: Drop fjes_acpi_driver and rework initialization The ACPI driver interface used by the Fujitsu Extended Socket (fjes) Network Device driver is redundant because its only role is to create a platform device the fjes platform driver can bind to, which can be done already at the module initialization time. Namely, acpi_find_extended_socket_device() looks for the requisite ACPI device object anyway and it may as well check its resources, and the platform device can be created when the ACPI object in question has been found (and it can be freed when the module is unloaded). Moreover, as a rule, it is better to avoid binding drivers directly to ACPI device objects [1]. Accordingly, drop fjes_acpi_driver, adjust the module initialization and exit code as per the above and set the fwnode for the fjes platform device to point to the corresponding ACPI device object as its ACPI companion. While this is not expected to alter functionality, it changes sysfs layout and so it will be visible to user space. Link: https://lore.kernel.org/all/2396510.ElGaqSPkdT@rafael.j.wysocki/ [1] Signed-off-by: Rafael J. Wysocki Reviewed-by: Simon Horman Link: https://patch.msgid.link/12857407.O9o76ZdvQC@rafael.j.wysocki Signed-off-by: Jakub Kicinski --- drivers/net/fjes/fjes_main.c | 120 +++++++++++++++-------------------- 1 file changed, 50 insertions(+), 70 deletions(-) diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index b63965d9a1ba..1f0f38980549 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -111,56 +111,6 @@ fjes_get_acpi_resource(struct acpi_resource *acpi_res, void *data) return AE_OK; } -static struct resource fjes_resource[] = { - DEFINE_RES_MEM(0, 1), - DEFINE_RES_IRQ(0) -}; - -static int fjes_acpi_add(struct acpi_device *device) -{ - struct platform_device *plat_dev; - acpi_status status; - - if (!is_extended_socket_device(device)) - return -ENODEV; - - if (acpi_check_extended_socket_status(device)) - return -ENODEV; - - status = acpi_walk_resources(device->handle, METHOD_NAME__CRS, - fjes_get_acpi_resource, fjes_resource); - if (ACPI_FAILURE(status)) - return -ENODEV; - - /* create platform_device */ - plat_dev = platform_device_register_simple(DRV_NAME, 0, fjes_resource, - ARRAY_SIZE(fjes_resource)); - if (IS_ERR(plat_dev)) - return PTR_ERR(plat_dev); - - device->driver_data = plat_dev; - - return 0; -} - -static void fjes_acpi_remove(struct acpi_device *device) -{ - struct platform_device *plat_dev; - - plat_dev = (struct platform_device *)acpi_driver_data(device); - platform_device_unregister(plat_dev); -} - -static struct acpi_driver fjes_acpi_driver = { - .name = DRV_NAME, - .class = DRV_NAME, - .ids = fjes_acpi_ids, - .ops = { - .add = fjes_acpi_add, - .remove = fjes_acpi_remove, - }, -}; - static int fjes_setup_resources(struct fjes_adapter *adapter) { struct net_device *netdev = adapter->netdev; @@ -1470,43 +1420,81 @@ static struct platform_driver fjes_driver = { .remove = fjes_remove, }; +struct fjes_acpi_walk_context { + struct acpi_device *adev; + struct resource resources[2]; +}; + static acpi_status acpi_find_extended_socket_device(acpi_handle obj_handle, u32 level, void *context, void **return_value) { + struct fjes_acpi_walk_context *fjes_context = context; struct acpi_device *device; - bool *found = context; + acpi_status status; - device = acpi_fetch_acpi_dev(obj_handle); + device = acpi_get_acpi_dev(obj_handle); if (!device) return AE_OK; if (strcmp(acpi_device_hid(device), ACPI_MOTHERBOARD_RESOURCE_HID)) - return AE_OK; + goto skip; if (!is_extended_socket_device(device)) - return AE_OK; + goto skip; if (acpi_check_extended_socket_status(device)) - return AE_OK; + goto skip; + + status = acpi_walk_resources(obj_handle, METHOD_NAME__CRS, + fjes_get_acpi_resource, fjes_context->resources); + if (ACPI_FAILURE(status)) + goto skip; + + fjes_context->adev = device; - *found = true; return AE_CTRL_TERMINATE; + +skip: + acpi_dev_put(device); + return AE_OK; } +static struct platform_device *fjes_plat_dev; + /* fjes_init_module - Driver Registration Routine */ static int __init fjes_init_module(void) { - bool found = false; + struct fjes_acpi_walk_context fjes_context = { + .adev = NULL, + .resources = { + DEFINE_RES_MEM(0, 1), + DEFINE_RES_IRQ(0) + } + }; + struct platform_device_info pdevinfo; int result; acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, - acpi_find_extended_socket_device, NULL, &found, + acpi_find_extended_socket_device, NULL, &fjes_context, NULL); - - if (!found) + if (!fjes_context.adev) return -ENODEV; + memset(&pdevinfo, 0, sizeof(pdevinfo)); + + pdevinfo.name = DRV_NAME; + pdevinfo.res = fjes_context.resources; + pdevinfo.num_res = ARRAY_SIZE(fjes_context.resources); + pdevinfo.fwnode = acpi_fwnode_handle(fjes_context.adev); + + fjes_plat_dev = platform_device_register_full(&pdevinfo); + + acpi_dev_put(fjes_context.adev); + + if (IS_ERR(fjes_plat_dev)) + return PTR_ERR(fjes_plat_dev); + pr_info("%s - version %s - %s\n", fjes_driver_string, fjes_driver_version, fjes_copyright); @@ -1515,19 +1503,11 @@ static int __init fjes_init_module(void) result = platform_driver_register(&fjes_driver); if (result < 0) { fjes_dbg_exit(); + platform_device_unregister(fjes_plat_dev); return result; } - result = acpi_bus_register_driver(&fjes_acpi_driver); - if (result < 0) - goto fail_acpi_driver; - return 0; - -fail_acpi_driver: - platform_driver_unregister(&fjes_driver); - fjes_dbg_exit(); - return result; } module_init(fjes_init_module); @@ -1535,9 +1515,9 @@ module_init(fjes_init_module); /* fjes_exit_module - Driver Exit Cleanup Routine */ static void __exit fjes_exit_module(void) { - acpi_bus_unregister_driver(&fjes_acpi_driver); platform_driver_unregister(&fjes_driver); fjes_dbg_exit(); + platform_device_unregister(fjes_plat_dev); } module_exit(fjes_exit_module); From 6b5f49176a08e01deb7b1435658152237bb44173 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Wed, 18 Mar 2026 03:07:52 +0000 Subject: [PATCH 0665/1561] net: dsa: mxl862xx: don't read out-of-bounds The write loop in mxl862xx_api_wrap() computes the word count as (size + 1) / 2, rounding up for odd-sized structs. On the last iteration of an odd-sized buffer it reads a full __le16 from data[i], accessing one byte past the end of the caller's struct. KASAN catches this as a stack-out-of-bounds read during probe (e.g. from mxl862xx_bridge_config_fwd() because of the odd length of sizeof(struct mxl862xx_bridge_config) == 49). The read-back loop already handles this case, it writes only a single byte when (i * 2 + 1) == size. The write loop lacked the same guard. In practice the over-read is harmless: the extra stack byte is sent to the firmware which ignores trailing data beyond the command's declared payload size. Apply the same odd-size last-byte handling to the write path: when the final word contains only one valid byte, send *(u8 *)&data[i] instead of le16_to_cpu(data[i]). This is endian-safe because data is __le16-encoded and the low byte is always at the lowest address regardless of host byte order. Signed-off-by: Daniel Golle Reviewed-by: Simon Horman Link: https://patch.msgid.link/83356ad9c9a4470dd49b6b3d661c2a8dd85cc6a1.1773803190.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mxl862xx/mxl862xx-host.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mxl862xx/mxl862xx-host.c b/drivers/net/dsa/mxl862xx/mxl862xx-host.c index 8c55497a0ce8..4eefd2a759a7 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx-host.c +++ b/drivers/net/dsa/mxl862xx/mxl862xx-host.c @@ -175,8 +175,14 @@ int mxl862xx_api_wrap(struct mxl862xx_priv *priv, u16 cmd, void *_data, goto out; } - ret = mxl862xx_reg_write(priv, MXL862XX_MMD_REG_DATA_FIRST + off, - le16_to_cpu(data[i])); + if ((i * 2 + 1) == size) + ret = mxl862xx_reg_write(priv, + MXL862XX_MMD_REG_DATA_FIRST + off, + *(u8 *)&data[i]); + else + ret = mxl862xx_reg_write(priv, + MXL862XX_MMD_REG_DATA_FIRST + off, + le16_to_cpu(data[i])); if (ret < 0) goto out; } From f44218cd5e6a3d98b137344b808ee154117ce765 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 17 Mar 2026 17:40:47 +0100 Subject: [PATCH 0666/1561] net: airoha: Reset PPE cpu port configuration in airoha_ppe_hw_init() Before this patch, the PPE cpu port configuration used for a specific GDM device was set just running ndo_init() callback during the device initialization. The selected PPE cpu port configuration depends on the QDMA block assigned to the GDM port. The QDMA block is selected according to the GDM port LAN/WAN configuration as specified in the commit '8737d7194d6d ("net: airoha: select QDMA block according LAN/WAN configuration"). However, the user selected PPE cpu port configuration can be different with respect to the one hardcoded in the NPU firmware binary. The hardcoded NPU PPE cpu port configuration is loaded initializing the PPE engine running the NPU ops ppe_init() callback in airoha_ppe_offload_setup routine (this is executed at runtime by the netfilter flowtable infrastructure during flow offloading). Reset the PPE cpu port configuration in airoha_ppe_hw_init routine in order to apply the user requested setup according to the device DTS. Please note this patch is fixing an issue not visible to the user (so we do not need to backport it) since airoha_eth driver currently supports just the internal phy available via the MT7530 DSA switch and there are no WAN interfaces officially supported since PCS/external phy is not merged mainline yet (it will be posted with following patches). Signed-off-by: Lorenzo Bianconi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260317-airoha-fix-ppe-def-cpu-v1-1-338533d8e234@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 28 +++++------------------ drivers/net/ethernet/airoha/airoha_eth.h | 2 ++ drivers/net/ethernet/airoha/airoha_ppe.c | 23 ++++++++++++++++++- drivers/net/ethernet/airoha/airoha_regs.h | 7 +++--- 4 files changed, 33 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index db9d9531711e..961325da833b 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -1755,8 +1755,7 @@ static int airoha_dev_init(struct net_device *dev) { struct airoha_gdm_port *port = netdev_priv(dev); struct airoha_eth *eth = port->eth; - u32 fe_cpu_port; - u8 ppe_id; + int i; /* QDMA0 is used for lan ports while QDMA1 is used for WAN ports */ port->qdma = ð->qdma[!airoha_is_lan_gdm_port(port)]; @@ -1774,28 +1773,13 @@ static int airoha_dev_init(struct net_device *dev) if (err) return err; } - fallthrough; - case AIROHA_GDM2_IDX: - if (airoha_ppe_is_enabled(eth, 1)) { - /* For PPE2 always use secondary cpu port. */ - fe_cpu_port = FE_PSE_PORT_CDM2; - ppe_id = 1; - break; - } - fallthrough; - default: { - u8 qdma_id = port->qdma - ð->qdma[0]; - - /* For PPE1 select cpu port according to the running QDMA. */ - fe_cpu_port = qdma_id ? FE_PSE_PORT_CDM2 : FE_PSE_PORT_CDM1; - ppe_id = 0; + break; + default: break; } - } - airoha_fe_rmw(eth, REG_PPE_DFT_CPORT0(ppe_id), - DFT_CPORT_MASK(port->id), - __field_prep(DFT_CPORT_MASK(port->id), fe_cpu_port)); + for (i = 0; i < eth->soc->num_ppe; i++) + airoha_ppe_set_cpu_port(port, i); return 0; } @@ -1898,7 +1882,7 @@ static u32 airoha_get_dsa_tag(struct sk_buff *skb, struct net_device *dev) #endif } -static int airoha_get_fe_port(struct airoha_gdm_port *port) +int airoha_get_fe_port(struct airoha_gdm_port *port) { struct airoha_qdma *qdma = port->qdma; struct airoha_eth *eth = qdma->eth; diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index 8cfa94ec084a..7df4dbcd8861 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -646,9 +646,11 @@ static inline bool airoha_is_7583(struct airoha_eth *eth) return eth->soc->version == 0x7583; } +int airoha_get_fe_port(struct airoha_gdm_port *port); bool airoha_is_valid_gdm_port(struct airoha_eth *eth, struct airoha_gdm_port *port); +void airoha_ppe_set_cpu_port(struct airoha_gdm_port *port, u8 ppe_id); bool airoha_ppe_is_enabled(struct airoha_eth *eth, int index); void airoha_ppe_check_skb(struct airoha_ppe_dev *dev, struct sk_buff *skb, u16 hash, bool rx_wlan); diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index ec5ce41dad80..7666b1d2f4f6 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -85,6 +85,20 @@ static u32 airoha_ppe_get_timestamp(struct airoha_ppe *ppe) return FIELD_GET(AIROHA_FOE_IB1_BIND_TIMESTAMP, timestamp); } +void airoha_ppe_set_cpu_port(struct airoha_gdm_port *port, u8 ppe_id) +{ + struct airoha_qdma *qdma = port->qdma; + u8 fport = airoha_get_fe_port(port); + struct airoha_eth *eth = qdma->eth; + u8 qdma_id = qdma - ð->qdma[0]; + u32 fe_cpu_port; + + fe_cpu_port = qdma_id ? FE_PSE_PORT_CDM2 : FE_PSE_PORT_CDM1; + airoha_fe_rmw(eth, REG_PPE_DFT_CPORT(ppe_id, fport), + DFT_CPORT_MASK(fport), + __field_prep(DFT_CPORT_MASK(fport), fe_cpu_port)); +} + static void airoha_ppe_hw_init(struct airoha_ppe *ppe) { u32 sram_ppe_num_data_entries = PPE_SRAM_NUM_ENTRIES, sram_num_entries; @@ -147,7 +161,9 @@ static void airoha_ppe_hw_init(struct airoha_ppe *ppe) airoha_fe_wr(eth, REG_PPE_HASH_SEED(i), PPE_HASH_SEED); - for (p = 0; p < ARRAY_SIZE(eth->ports); p++) + for (p = 0; p < ARRAY_SIZE(eth->ports); p++) { + struct airoha_gdm_port *port = eth->ports[p]; + airoha_fe_rmw(eth, REG_PPE_MTU(i, p), FP0_EGRESS_MTU_MASK | FP1_EGRESS_MTU_MASK, @@ -155,6 +171,11 @@ static void airoha_ppe_hw_init(struct airoha_ppe *ppe) AIROHA_MAX_MTU) | FIELD_PREP(FP1_EGRESS_MTU_MASK, AIROHA_MAX_MTU)); + if (!port) + continue; + + airoha_ppe_set_cpu_port(port, i); + } } } diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h index 29878b954c77..436f3c8779c1 100644 --- a/drivers/net/ethernet/airoha/airoha_regs.h +++ b/drivers/net/ethernet/airoha/airoha_regs.h @@ -312,10 +312,9 @@ #define REG_PPE_HASH_SEED(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x244) #define PPE_HASH_SEED 0x12345678 -#define REG_PPE_DFT_CPORT0(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x248) -#define DFT_CPORT_MASK(_n) GENMASK(3 + ((_n) << 2), ((_n) << 2)) - -#define REG_PPE_DFT_CPORT1(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x24c) +#define REG_PPE_DFT_CPORT_BASE(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x248) +#define REG_PPE_DFT_CPORT(_m, _n) (REG_PPE_DFT_CPORT_BASE(_m) + (((_n) / 8) << 2)) +#define DFT_CPORT_MASK(_n) GENMASK(3 + (((_n) % 8) << 2), (((_n) % 8) << 2)) #define REG_PPE_TB_HASH_CFG(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x250) #define PPE_DRAM_HASH1_MODE_MASK GENMASK(31, 28) From eb2415854f3ba7d95c4f30d259f6f598ab604616 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 18 Mar 2026 08:27:44 +0000 Subject: [PATCH 0667/1561] net: phylink: add debug for phy_config_inband() Add debug for the phy_config_inband() call so we can see which inband modes are being configured at the PHY. Signed-off-by: Russell King (Oracle) Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/E1w2mFk-0000000DXW2-2PR9@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index aba1b35c7cd7..f80cd13c8e32 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1341,6 +1341,13 @@ static void phylink_major_config(struct phylink *pl, bool restart, } if (pl->phydev && pl->phy_ib_mode) { + phylink_dbg(pl, "configuring PHY for inband%s%s%s\n", + pl->phy_ib_mode & LINK_INBAND_DISABLE ? + " disable" : "", + pl->phy_ib_mode & LINK_INBAND_ENABLE ? + " enable" : "", + pl->phy_ib_mode & LINK_INBAND_BYPASS ? + " bypass" : ""); err = phy_config_inband(pl->phydev, pl->phy_ib_mode); if (err < 0) { phylink_err(pl, "phy_config_inband: %pe\n", From 79cfb2d1f25a2de43d846d91143c8b66e99688e5 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 18 Mar 2026 05:22:48 -0700 Subject: [PATCH 0668/1561] bonding: remove bonding_priv.h bonding_priv.h only defined DRV_NAME and DRV_DESCRIPTION, but caused unnecessary recompilation: it included to define bond_version, which is used solely in bond_procfs.c. With CONFIG_LOCALVERSION_AUTO=y, utsrelease.h is regenerated on every git commit, so any git operation triggered recompilation of bond_main.c which also included bonding_priv.h. Remove the header entirely, as suggested by Jakub, given the macros on this file can be integrated into the C files directly. Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260318-bond_uts-v2-1-033fe0d4e903@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 6 ++---- drivers/net/bonding/bond_procfs.c | 11 ++++++----- drivers/net/bonding/bonding_priv.h | 22 ---------------------- 3 files changed, 8 insertions(+), 31 deletions(-) delete mode 100644 drivers/net/bonding/bonding_priv.h diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 7fb10e93883d..3e3f7dab791e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -93,8 +93,6 @@ #include #include -#include "bonding_priv.h" - /*---------------------------- Module parameters ----------------------------*/ /* monitor all links that often (in milliseconds). <=0 disables monitoring */ @@ -5876,7 +5874,7 @@ static int bond_ethtool_get_link_ksettings(struct net_device *bond_dev, static void bond_ethtool_get_drvinfo(struct net_device *bond_dev, struct ethtool_drvinfo *drvinfo) { - strscpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver)); + strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver)); snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d", BOND_ABI_VERSION); } @@ -6656,6 +6654,6 @@ static void __exit bonding_exit(void) module_init(bonding_init); module_exit(bonding_exit); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION(DRV_DESCRIPTION); +MODULE_DESCRIPTION("Ethernet Channel Bonding Driver"); MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others"); MODULE_IMPORT_NS("NETDEV_INTERNAL"); diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c index 7edf72ec816a..e34f80305191 100644 --- a/drivers/net/bonding/bond_procfs.c +++ b/drivers/net/bonding/bond_procfs.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include @@ -6,7 +7,7 @@ #include #include -#include "bonding_priv.h" +#define bond_version "Ethernet Channel Bonding Driver: v" UTS_RELEASE "\n" static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) @@ -290,7 +291,7 @@ void bond_create_proc_entry(struct bonding *bond) bn->proc_dir, &bond_info_seq_ops, bond); if (bond->proc_entry == NULL) netdev_warn(bond_dev, "Cannot create /proc/net/%s/%s\n", - DRV_NAME, bond_dev->name); + KBUILD_MODNAME, bond_dev->name); else memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ); } @@ -314,10 +315,10 @@ void bond_remove_proc_entry(struct bonding *bond) void __net_init bond_create_proc_dir(struct bond_net *bn) { if (!bn->proc_dir) { - bn->proc_dir = proc_mkdir(DRV_NAME, bn->net->proc_net); + bn->proc_dir = proc_mkdir(KBUILD_MODNAME, bn->net->proc_net); if (!bn->proc_dir) pr_warn("Warning: Cannot create /proc/net/%s\n", - DRV_NAME); + KBUILD_MODNAME); } } @@ -326,7 +327,7 @@ void __net_init bond_create_proc_dir(struct bond_net *bn) void __net_exit bond_destroy_proc_dir(struct bond_net *bn) { if (bn->proc_dir) { - remove_proc_entry(DRV_NAME, bn->net->proc_net); + remove_proc_entry(KBUILD_MODNAME, bn->net->proc_net); bn->proc_dir = NULL; } } diff --git a/drivers/net/bonding/bonding_priv.h b/drivers/net/bonding/bonding_priv.h deleted file mode 100644 index fef6288c6944..000000000000 --- a/drivers/net/bonding/bonding_priv.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-1.0+ */ -/* - * Bond several ethernet interfaces into a Cisco, running 'Etherchannel'. - * - * Portions are (c) Copyright 1995 Simon "Guru Aleph-Null" Janes - * NCM: Network and Communications Management, Inc. - * - * BUT, I'm the one who modified it for ethernet, so: - * (c) Copyright 1999, Thomas Davis, tadavis@lbl.gov - * - */ - -#ifndef _BONDING_PRIV_H -#define _BONDING_PRIV_H -#include - -#define DRV_NAME "bonding" -#define DRV_DESCRIPTION "Ethernet Channel Bonding Driver" - -#define bond_version DRV_DESCRIPTION ": v" UTS_RELEASE "\n" - -#endif From 865926e26e05be52ff311883619b9198298698df Mon Sep 17 00:00:00 2001 From: Bobby Eshleman Date: Tue, 17 Mar 2026 15:09:35 -0700 Subject: [PATCH 0669/1561] selftests/vsock: fix vmtest.sh for read-only nested VM runners When running vmtest.sh inside a nested VM, there occurs a problem with stacking two sets of virtiofs/overlay layers (the first set from the outer VM and the second set from the inner VM). The virtme init scripts (sshd, udhcpd, etc...) fail to execute basic programs (e.g., /bin/cat) and load library dependencies (e.g., libpam) due to ESTALE. This only occurs when both layers (outer and inner) use virtiofs. Work around this by using 9p in the inner VM via --force-9p. Additionally, when the outer VM is read-only, the inner VM's attempt at populating SSH keys to the root filesystem fails: virtme-ng-init: mkdir: cannot create directory '/root/.cache': Read-only file system Work around this by creating a temporary home directory with generated SSH keys and passing it through to the guest as /root via --rwdir. Disable strict host key checking in vm_ssh() since the VM will be seen as a new host each run. The --rw arg had to be removed to prevent a vng complaint about overlay (in combination with the other parameters). The guest doesn't really need write access anyway, so this was probably overly permissive to begin with. Signed-off-by: Bobby Eshleman Link: https://patch.msgid.link/20260317-vsock-vmtest-nested-fixes-v2-1-0b3f53b80a0f@meta.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/vsock/vmtest.sh | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh index 9eacb753238a..c8d5cb66bf5f 100755 --- a/tools/testing/selftests/vsock/vmtest.sh +++ b/tools/testing/selftests/vsock/vmtest.sh @@ -42,6 +42,8 @@ readonly KERNEL_CMDLINE="\ virtme.ssh virtme_ssh_channel=tcp virtme_ssh_user=$USER \ " readonly LOG=$(mktemp /tmp/vsock_vmtest_XXXX.log) +readonly TEST_HOME="$(mktemp -d /tmp/vmtest_home_XXXX)" +readonly SSH_KEY_PATH="${TEST_HOME}"/.ssh/id_ed25519 # Namespace tests must use the ns_ prefix. This is checked in check_netns() and # is used to determine if a test needs namespace setup before test execution. @@ -257,7 +259,12 @@ vm_ssh() { shift - ${ns_exec} ssh -q -o UserKnownHostsFile=/dev/null -p "${SSH_HOST_PORT}" localhost "$@" + ${ns_exec} ssh -q \ + -i "${SSH_KEY_PATH}" \ + -o UserKnownHostsFile=/dev/null \ + -o StrictHostKeyChecking=no \ + -p "${SSH_HOST_PORT}" \ + localhost "$@" return $? } @@ -265,6 +272,7 @@ vm_ssh() { cleanup() { terminate_pidfiles "${!PIDFILES[@]}" del_namespaces + rm -rf "${TEST_HOME}" } check_args() { @@ -382,6 +390,11 @@ handle_build() { popd &>/dev/null } +setup_home() { + mkdir -p "$(dirname "${SSH_KEY_PATH}")" + ssh-keygen -t ed25519 -f "${SSH_KEY_PATH}" -N "" -q +} + create_pidfile() { local pidfile @@ -470,11 +483,14 @@ vm_start() { --run \ ${kernel_opt} \ ${verbose_opt} \ + --rwdir=/root="${TEST_HOME}" \ + --force-9p \ + --cwd /root \ --qemu-opts="${qemu_opts}" \ --qemu="${qemu}" \ --user root \ --append "${KERNEL_CMDLINE}" \ - --rw &> ${logfile} & + &> ${logfile} & timeout "${WAIT_QEMU}" \ bash -c 'while [[ ! -s '"${pidfile}"' ]]; do sleep 1; done; exit 0' @@ -1551,6 +1567,7 @@ check_deps check_vng check_socat handle_build +setup_home echo "1..${#ARGS[@]}" From 0c3893d37892f332b595906710bf53bbd9c7c572 Mon Sep 17 00:00:00 2001 From: Bobby Eshleman Date: Tue, 17 Mar 2026 15:09:36 -0700 Subject: [PATCH 0670/1561] selftests/vsock: fix vsock_test path shadowing in nested VMs The /root mount introduced for nested VM support shadows any host paths under /root. This breaks systems where the outer VM runs as root and the vsock_test binary path is something like: /root/linux/tools/testing/selftests/vsock/vsock_test Fix this by copying vsock_test into the temporary home directory that gets mounted as /root in the guest, and using a relative path to invoke it. Signed-off-by: Bobby Eshleman Link: https://patch.msgid.link/20260317-vsock-vmtest-nested-fixes-v2-2-0b3f53b80a0f@meta.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/vsock/vmtest.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh index c8d5cb66bf5f..d97913a6bdc7 100755 --- a/tools/testing/selftests/vsock/vmtest.sh +++ b/tools/testing/selftests/vsock/vmtest.sh @@ -393,6 +393,7 @@ handle_build() { setup_home() { mkdir -p "$(dirname "${SSH_KEY_PATH}")" ssh-keygen -t ed25519 -f "${SSH_KEY_PATH}" -N "" -q + cp "${VSOCK_TEST}" "${TEST_HOME}"/vsock_test } create_pidfile() { @@ -620,7 +621,7 @@ vm_vsock_test() { # log output and use pipefail to respect vsock_test errors set -o pipefail if [[ "${host}" != server ]]; then - vm_ssh "${ns}" -- "${VSOCK_TEST}" \ + vm_ssh "${ns}" -- ./vsock_test \ --mode=client \ --control-host="${host}" \ --peer-cid="${cid}" \ @@ -628,7 +629,7 @@ vm_vsock_test() { 2>&1 | log_guest rc=$? else - vm_ssh "${ns}" -- "${VSOCK_TEST}" \ + vm_ssh "${ns}" -- ./vsock_test \ --mode=server \ --peer-cid="${cid}" \ --control-port="${port}" \ From e783e40fb689381caca31e03d28c39e10c82e722 Mon Sep 17 00:00:00 2001 From: Thangaraj Samynathan Date: Wed, 18 Mar 2026 12:02:28 +0530 Subject: [PATCH 0671/1561] net: lan743x: fix SGMII detection on PCI1xxxx B0+ during warm reset A warm reset on boards using an EEPROM-only strap configuration (where no MAC address is set in the image) can cause the driver to incorrectly revert to RGMII mode. This occurs because the ENET_CONFIG_LOAD_STARTED bit may not persist or behave as expected. Update pci11x1x_strap_get_status() to use revision-specific validation: - For PCI11x1x A0: Continue using the legacy check (config load started or reset protection) to validate the SGMII strap. - For PCI11x1x B0 and later: Use the newly available STRAP_READ_USE_SGMII_EN_ bit in the upper strap register to validate the lower SGMII_EN bit. This ensures the SGMII interface is correctly identified even after a warm reboot. Signed-off-by: Thangaraj Samynathan Link: https://patch.msgid.link/20260318063228.17110-1-thangaraj.s@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan743x_main.c | 15 +++++++++++---- drivers/net/ethernet/microchip/lan743x_main.h | 1 + 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index a3845edf0e48..ff48012c8046 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -28,6 +28,12 @@ #define RFE_RD_FIFO_TH_3_DWORDS 0x3 +static bool pci11x1x_is_a0(struct lan743x_adapter *adapter) +{ + u32 dev_rev = adapter->csr.id_rev & ID_REV_CHIP_REV_MASK_; + return dev_rev == ID_REV_CHIP_REV_PCI11X1X_A0_; +} + static void pci11x1x_strap_get_status(struct lan743x_adapter *adapter) { u32 chip_rev; @@ -47,10 +53,11 @@ static void pci11x1x_strap_get_status(struct lan743x_adapter *adapter) cfg_load = lan743x_csr_read(adapter, ETH_SYS_CONFIG_LOAD_STARTED_REG); lan743x_hs_syslock_release(adapter); hw_cfg = lan743x_csr_read(adapter, HW_CFG); - - if (cfg_load & GEN_SYS_LOAD_STARTED_REG_ETH_ || - hw_cfg & HW_CFG_RST_PROTECT_) { - strap = lan743x_csr_read(adapter, STRAP_READ); + strap = lan743x_csr_read(adapter, STRAP_READ); + if ((pci11x1x_is_a0(adapter) && + (cfg_load & GEN_SYS_LOAD_STARTED_REG_ETH_ || + hw_cfg & HW_CFG_RST_PROTECT_)) || + (strap & STRAP_READ_USE_SGMII_EN_)) { if (strap & STRAP_READ_SGMII_EN_) adapter->is_sgmii_en = true; else diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index 02a28b709163..160d94a7cee6 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -27,6 +27,7 @@ #define ID_REV_CHIP_REV_MASK_ (0x0000FFFF) #define ID_REV_CHIP_REV_A0_ (0x00000000) #define ID_REV_CHIP_REV_B0_ (0x00000010) +#define ID_REV_CHIP_REV_PCI11X1X_A0_ (0x000000A0) #define ID_REV_CHIP_REV_PCI11X1X_B0_ (0x000000B0) #define FPGA_REV (0x04) From 82a5852595f5afb36aebddcc3ebc2654ee4d3879 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Thu, 19 Mar 2026 19:05:54 +0100 Subject: [PATCH 0672/1561] net: ethtool: re-order local includes Most local #include in the ethtool command handling is out of order, with either : #include "netlink.h" #include "common.h" or even : #include "netlink.h" #include "common.h" #include "bitset.h" One of the reasons is because bitset.h s lacking definitions for nlattr, netlink_ext_ack, ETH_GSTRING_LEN, and types such as u32, bool, etc. Make bitset.h standalone by including for ETH_GSTRING_LEN, and for nlattr, netlink_ext_ack and the rest. While at it, take a pass on ethnl sources to re-order the local includes : - put them after the global includes - add a newline between global and local includes - alpha-sort the local includes One notable exception is the cmis.h include, that needs definitions from module_fw.h. Keep them in this order for now. Signed-off-by: Maxime Chevallier Link: https://patch.msgid.link/20260319180555.1531386-1-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- net/ethtool/bitset.c | 3 ++- net/ethtool/bitset.h | 3 +++ net/ethtool/cabletest.c | 3 ++- net/ethtool/channels.c | 2 +- net/ethtool/coalesce.c | 3 ++- net/ethtool/common.c | 2 +- net/ethtool/debug.c | 4 ++-- net/ethtool/eee.c | 4 ++-- net/ethtool/eeprom.c | 3 ++- net/ethtool/features.c | 4 ++-- net/ethtool/fec.c | 4 ++-- net/ethtool/ioctl.c | 1 + net/ethtool/linkinfo.c | 2 +- net/ethtool/linkmodes.c | 4 ++-- net/ethtool/linkstate.c | 5 +++-- net/ethtool/module.c | 4 ++-- net/ethtool/mse.c | 2 +- net/ethtool/netlink.c | 3 ++- net/ethtool/pause.c | 2 +- net/ethtool/phc_vclocks.c | 2 +- net/ethtool/phy.c | 6 +++--- net/ethtool/plca.c | 2 +- net/ethtool/privflags.c | 4 ++-- net/ethtool/pse-pd.c | 7 ++++--- net/ethtool/rings.c | 2 +- net/ethtool/rss.c | 2 +- net/ethtool/stats.c | 4 ++-- net/ethtool/strset.c | 3 ++- net/ethtool/tsconfig.c | 6 +++--- net/ethtool/tsinfo.c | 4 ++-- net/ethtool/wol.c | 4 ++-- 31 files changed, 58 insertions(+), 46 deletions(-) diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c index f0883357d12e..8bb98d3ea3db 100644 --- a/net/ethtool/bitset.c +++ b/net/ethtool/bitset.c @@ -2,8 +2,9 @@ #include #include -#include "netlink.h" + #include "bitset.h" +#include "netlink.h" /* Some bitmaps are internally represented as an array of unsigned long, some * as an array of u32 (some even as single u32 for now). To avoid the need of diff --git a/net/ethtool/bitset.h b/net/ethtool/bitset.h index c2c2e0051d00..07bc547d47a8 100644 --- a/net/ethtool/bitset.h +++ b/net/ethtool/bitset.h @@ -3,6 +3,9 @@ #ifndef _NET_ETHTOOL_BITSET_H #define _NET_ETHTOOL_BITSET_H +#include +#include + #define ETHNL_MAX_BITSET_SIZE S16_MAX typedef const char (*const ethnl_string_array_t)[ETH_GSTRING_LEN]; diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c index 0364b8fb577b..8d375dac2a40 100644 --- a/net/ethtool/cabletest.c +++ b/net/ethtool/cabletest.c @@ -3,8 +3,9 @@ #include #include #include -#include "netlink.h" + #include "common.h" +#include "netlink.h" /* 802.3 standard allows 100 meters for BaseT cables. However longer * cables might work, depending on the quality of the cables and the diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c index ca4f80282448..45232cf1c144 100644 --- a/net/ethtool/channels.c +++ b/net/ethtool/channels.c @@ -2,8 +2,8 @@ #include -#include "netlink.h" #include "common.h" +#include "netlink.h" struct channels_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c index 349bb02c517a..1e2c5c7048a8 100644 --- a/net/ethtool/coalesce.c +++ b/net/ethtool/coalesce.c @@ -1,8 +1,9 @@ // SPDX-License-Identifier: GPL-2.0-only #include -#include "netlink.h" + #include "common.h" +#include "netlink.h" struct coalesce_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/common.c b/net/ethtool/common.c index e252cf20c22f..6a4a3797a812 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -8,8 +8,8 @@ #include #include -#include "netlink.h" #include "common.h" +#include "netlink.h" #include "../core/dev.h" diff --git a/net/ethtool/debug.c b/net/ethtool/debug.c index 0b2dea56d461..6043916b440e 100644 --- a/net/ethtool/debug.c +++ b/net/ethtool/debug.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "netlink.h" -#include "common.h" #include "bitset.h" +#include "common.h" +#include "netlink.h" struct debug_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/eee.c b/net/ethtool/eee.c index bf398973eb8a..50d6fcd3661b 100644 --- a/net/ethtool/eee.c +++ b/net/ethtool/eee.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "netlink.h" -#include "common.h" #include "bitset.h" +#include "common.h" +#include "netlink.h" struct eee_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c index 3b8209e930fd..caf1e41e676b 100644 --- a/net/ethtool/eeprom.c +++ b/net/ethtool/eeprom.c @@ -2,8 +2,9 @@ #include #include -#include "netlink.h" + #include "common.h" +#include "netlink.h" struct eeprom_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/features.c b/net/ethtool/features.c index f2217983be2b..d9455b30aec9 100644 --- a/net/ethtool/features.c +++ b/net/ethtool/features.c @@ -2,9 +2,9 @@ #include -#include "netlink.h" -#include "common.h" #include "bitset.h" +#include "common.h" +#include "netlink.h" struct features_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/fec.c b/net/ethtool/fec.c index 4669e74cbcaa..e2d539271060 100644 --- a/net/ethtool/fec.c +++ b/net/ethtool/fec.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "netlink.h" -#include "common.h" #include "bitset.h" +#include "common.h" +#include "netlink.h" struct fec_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index ff4b4780d6af..11dfbf076b6d 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -33,6 +33,7 @@ #include #include #include + #include "common.h" /* State held across locks and calls for commands which have devlink fallback */ diff --git a/net/ethtool/linkinfo.c b/net/ethtool/linkinfo.c index 30b8ce275159..244ff92e2ff9 100644 --- a/net/ethtool/linkinfo.c +++ b/net/ethtool/linkinfo.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "netlink.h" #include "common.h" +#include "netlink.h" struct linkinfo_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c index 259cd9ef1f2a..30d703531652 100644 --- a/net/ethtool/linkmodes.c +++ b/net/ethtool/linkmodes.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "netlink.h" -#include "common.h" #include "bitset.h" +#include "common.h" +#include "netlink.h" /* LINKMODES_GET */ diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c index 05a5f72c99fa..8a5985fd7712 100644 --- a/net/ethtool/linkstate.c +++ b/net/ethtool/linkstate.c @@ -1,10 +1,11 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "netlink.h" -#include "common.h" #include #include +#include "common.h" +#include "netlink.h" + struct linkstate_req_info { struct ethnl_req_info base; }; diff --git a/net/ethtool/module.c b/net/ethtool/module.c index 0a761bf4771e..cad2eb25b5a4 100644 --- a/net/ethtool/module.c +++ b/net/ethtool/module.c @@ -6,10 +6,10 @@ #include #include -#include "netlink.h" -#include "common.h" #include "bitset.h" +#include "common.h" #include "module_fw.h" +#include "netlink.h" struct module_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/mse.c b/net/ethtool/mse.c index 8cb3fc5e7be4..e91b74430f76 100644 --- a/net/ethtool/mse.c +++ b/net/ethtool/mse.c @@ -4,8 +4,8 @@ #include #include -#include "netlink.h" #include "common.h" +#include "netlink.h" /* Channels A-D only; WORST and LINK are exclusive alternatives */ #define PHY_MSE_CHANNEL_COUNT 4 diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 6e5f0f4f815a..14cb06ec5171 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -6,8 +6,9 @@ #include #include #include -#include "netlink.h" + #include "module_fw.h" +#include "netlink.h" static struct genl_family ethtool_genl_family; diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c index 5d28f642764c..29f812e935b6 100644 --- a/net/ethtool/pause.c +++ b/net/ethtool/pause.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "netlink.h" #include "common.h" +#include "netlink.h" struct pause_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/phc_vclocks.c b/net/ethtool/phc_vclocks.c index cadaabed60bd..15146e38ab27 100644 --- a/net/ethtool/phc_vclocks.c +++ b/net/ethtool/phc_vclocks.c @@ -2,8 +2,8 @@ /* * Copyright 2021 NXP */ -#include "netlink.h" #include "common.h" +#include "netlink.h" struct phc_vclocks_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c index 68372bef4b2f..d4e6887055ab 100644 --- a/net/ethtool/phy.c +++ b/net/ethtool/phy.c @@ -3,14 +3,14 @@ * Copyright 2023 Bootlin * */ -#include "common.h" -#include "netlink.h" - #include #include #include #include +#include "common.h" +#include "netlink.h" + struct phy_req_info { struct ethnl_req_info base; }; diff --git a/net/ethtool/plca.c b/net/ethtool/plca.c index e1f7820a6158..91f0c4233298 100644 --- a/net/ethtool/plca.c +++ b/net/ethtool/plca.c @@ -3,8 +3,8 @@ #include #include -#include "netlink.h" #include "common.h" +#include "netlink.h" struct plca_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/privflags.c b/net/ethtool/privflags.c index 297be6a13ab9..46a4d2a43ba8 100644 --- a/net/ethtool/privflags.c +++ b/net/ethtool/privflags.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "netlink.h" -#include "common.h" #include "bitset.h" +#include "common.h" +#include "netlink.h" struct privflags_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c index 24def9c9dd54..2eb9bdc2dcb9 100644 --- a/net/ethtool/pse-pd.c +++ b/net/ethtool/pse-pd.c @@ -6,14 +6,15 @@ // Copyright (c) 2022 Pengutronix, Oleksij Rempel // -#include "common.h" -#include "linux/pse-pd/pse.h" -#include "netlink.h" #include #include #include #include +#include "common.h" +#include "linux/pse-pd/pse.h" +#include "netlink.h" + struct pse_req_info { struct ethnl_req_info base; }; diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index aeedd5ec6b8c..0fd5dcc3729f 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -2,8 +2,8 @@ #include -#include "netlink.h" #include "common.h" +#include "netlink.h" struct rings_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index da5934cceb07..0f4e5cd2ac71 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -2,8 +2,8 @@ #include -#include "netlink.h" #include "common.h" +#include "netlink.h" struct rss_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c index 3ca8eb2a3b31..38136f19b2ec 100644 --- a/net/ethtool/stats.c +++ b/net/ethtool/stats.c @@ -3,9 +3,9 @@ #include #include -#include "netlink.h" -#include "common.h" #include "bitset.h" +#include "common.h" +#include "netlink.h" struct stats_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index f6a67109beda..aa1e50c98f34 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -2,8 +2,9 @@ #include #include -#include "netlink.h" + #include "common.h" +#include "netlink.h" struct strset_info { bool per_dev; diff --git a/net/ethtool/tsconfig.c b/net/ethtool/tsconfig.c index e49e612a68c2..e4f518e49d4c 100644 --- a/net/ethtool/tsconfig.c +++ b/net/ethtool/tsconfig.c @@ -3,11 +3,11 @@ #include #include -#include "netlink.h" -#include "common.h" #include "bitset.h" -#include "../core/dev.h" +#include "common.h" +#include "netlink.h" #include "ts.h" +#include "../core/dev.h" struct tsconfig_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c index c0145c752d2f..9aad62695dfb 100644 --- a/net/ethtool/tsinfo.c +++ b/net/ethtool/tsinfo.c @@ -6,9 +6,9 @@ #include #include -#include "netlink.h" -#include "common.h" #include "bitset.h" +#include "common.h" +#include "netlink.h" #include "ts.h" struct tsinfo_req_info { diff --git a/net/ethtool/wol.c b/net/ethtool/wol.c index a39d8000d808..60f9a1d8535e 100644 --- a/net/ethtool/wol.c +++ b/net/ethtool/wol.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "netlink.h" -#include "common.h" #include "bitset.h" +#include "common.h" +#include "netlink.h" struct wol_req_info { struct ethnl_req_info base; From c1887257a81bf62f48178d3b9d31e23520d67b2c Mon Sep 17 00:00:00 2001 From: Damien Dejean Date: Wed, 18 Mar 2026 22:54:58 +0100 Subject: [PATCH 0673/1561] dt-bindings: net: ethernet-phy: add property enet-phy-pair-order Add property enet-phy-pair-order to the device tree bindings to define the pair order of the PHY. To simplify PCB design some manufacturers allow to wire the pairs in a reverse order, and change the order in software. The property can be set to 0 to force the normal pair order (ABCD), or 1 to force the reverse pair order (DCBA). Signed-off-by: Damien Dejean Reviewed-by: Rob Herring (Arm) Link: https://patch.msgid.link/20260318215502.106528-2-dam.dejean@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/ethernet-phy.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml index 58634fee9fc4..4a27547f7d7a 100644 --- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml @@ -126,6 +126,12 @@ properties: e.g. wrong bootstrap configuration caused by issues in PCB layout design. + enet-phy-pair-order: + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1] + description: + For normal (0) or reverse (1) order of the pairs (ABCD -> DCBA). + eee-broken-100tx: $ref: /schemas/types.yaml#/definitions/flag description: From 330296ea9e158758aa65631f5ec64aa74806b7e2 Mon Sep 17 00:00:00 2001 From: Damien Dejean Date: Wed, 18 Mar 2026 22:54:59 +0100 Subject: [PATCH 0674/1561] net: phy: realtek: add RTL8224 pair order support The RTL8224 has a register to configure a pair swap (from ABCD order to DCBA) providing PCB designers more flexbility when wiring the chip. The swap parameter has to be set correctly for each of the 4 ports before the chip can detect a link. After a reset, this register is (unfortunately) left in a random state, thus it has to be initialized. On most of the devices the bootloader does it once for all and we can rely on the value set, on some other it is not and the kernel has to do it. The MDI pair swap can be set in the device tree using the property enet-phy-pair-order. The property is set to 0 to keep the default order (ABCD), or 1 to reverse the pairs (DCBA). Signed-off-by: Damien Dejean Link: https://patch.msgid.link/20260318215502.106528-3-dam.dejean@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/realtek/Kconfig | 1 + drivers/net/phy/realtek/realtek_main.c | 64 ++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/drivers/net/phy/realtek/Kconfig b/drivers/net/phy/realtek/Kconfig index b05c2a1e9024..a741b34d193e 100644 --- a/drivers/net/phy/realtek/Kconfig +++ b/drivers/net/phy/realtek/Kconfig @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config REALTEK_PHY tristate "Realtek PHYs" + select PHY_PACKAGE help Currently supports RTL821x/RTL822x and fast ethernet PHYs diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index 530b4e26d16e..63134e300c33 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -171,6 +171,8 @@ #define RTL8224_SRAM_RTCT_LEN(pair) (0x8028 + (pair) * 4) +#define RTL8224_VND1_MDI_PAIR_SWAP 0xa90 + #define RTL8366RB_POWER_SAVE 0x15 #define RTL8366RB_POWER_SAVE_ON BIT(12) @@ -1820,6 +1822,66 @@ static int rtl8224_cable_test_get_status(struct phy_device *phydev, bool *finish return rtl8224_cable_test_report(phydev, finished); } +static int rtl8224_package_modify_mmd(struct phy_device *phydev, int devad, + u32 regnum, u16 mask, u16 set) +{ + int val, ret; + + phy_lock_mdio_bus(phydev); + + val = __phy_package_read_mmd(phydev, 0, devad, regnum); + if (val < 0) { + ret = val; + goto exit; + } + + val &= ~mask; + val |= set; + + ret = __phy_package_write_mmd(phydev, 0, devad, regnum, val); + +exit: + phy_unlock_mdio_bus(phydev); + return ret; +} + +static int rtl8224_mdi_config_order(struct phy_device *phydev) +{ + struct device_node *np = phydev->mdio.dev.of_node; + u8 port_offset = phydev->mdio.addr & 3; + u32 order = 0; + int ret; + + ret = of_property_read_u32(np, "enet-phy-pair-order", &order); + + /* Do nothing in case the property is not present */ + if (ret == -EINVAL || ret == -ENOSYS) + return 0; + + if (ret) + return ret; + + if (order & ~1) + return -EINVAL; + + return rtl8224_package_modify_mmd(phydev, MDIO_MMD_VEND1, + RTL8224_VND1_MDI_PAIR_SWAP, + BIT(port_offset), + order ? BIT(port_offset) : 0); +} + +static int rtl8224_config_init(struct phy_device *phydev) +{ + return rtl8224_mdi_config_order(phydev); +} + +static int rtl8224_probe(struct phy_device *phydev) +{ + /* Chip exposes 4 ports, join all of them in the same package */ + return devm_phy_package_join(&phydev->mdio.dev, phydev, + phydev->mdio.addr & ~3, 0); +} + static bool rtlgen_supports_2_5gbps(struct phy_device *phydev) { int val; @@ -2395,6 +2457,8 @@ static struct phy_driver realtek_drvs[] = { PHY_ID_MATCH_EXACT(0x001ccad0), .name = "RTL8224 2.5Gbps PHY", .flags = PHY_POLL_CABLE_TEST, + .probe = rtl8224_probe, + .config_init = rtl8224_config_init, .get_features = rtl822x_c45_get_features, .config_aneg = rtl822x_c45_config_aneg, .read_status = rtl822x_c45_read_status, From 58ffb5910f32e5b387d4af31ee21851c40eb31b5 Mon Sep 17 00:00:00 2001 From: Damien Dejean Date: Wed, 18 Mar 2026 22:55:00 +0100 Subject: [PATCH 0675/1561] dt-bindings: net: ethernet-phy: add property enet-phy-pair-polarity Add the property enet-phy-pair-polarity to describe the polarity of the PHY pairs. To ease PCB designs some manufacturers allow to wire the pairs with a reverse polarity and provide a way to configure it. The property 'enet-phy-pair-polarity' sets the polarity of each pair. Bit 0 to 3 configure the polarity or pairs A to D, if set to 1 the polarity is reversed for this pair. Signed-off-by: Damien Dejean Reviewed-by: Rob Herring (Arm) Link: https://patch.msgid.link/20260318215502.106528-4-dam.dejean@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/ethernet-phy.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml index 4a27547f7d7a..21a1a63506f0 100644 --- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml @@ -132,6 +132,14 @@ properties: description: For normal (0) or reverse (1) order of the pairs (ABCD -> DCBA). + enet-phy-pair-polarity: + $ref: /schemas/types.yaml#/definitions/uint32 + maximum: 0xf + description: + A bitmap to describe pair polarity swap. Bit 0 to swap polarity of pair A, + bit 1 to swap polarity of pair B, bit 2 to swap polarity of pair C and bit + 3 to swap polarity of pair D. + eee-broken-100tx: $ref: /schemas/types.yaml#/definitions/flag description: From beed9c0e9b53c98bc66d28d46fbe38c347e9aa74 Mon Sep 17 00:00:00 2001 From: Damien Dejean Date: Wed, 18 Mar 2026 22:55:01 +0100 Subject: [PATCH 0676/1561] net: phy: realtek: add RTL8224 polarity support The RTL8224 has a register to configure the polarity of every pair of each port. It provides device designers more flexbility when wiring the chip. Unfortunately, the register is left in an unknown state after a reset. Thus on devices where the bootloader don't initialize it, the driver has to do it to detect and use a link. The MDI polarity swap can be set in the device tree using the property enet-phy-pair-polarity. The u32 value is a bitfield where bit[0..3] control the polarity of pairs A..D. Signed-off-by: Damien Dejean Link: https://patch.msgid.link/20260318215502.106528-5-dam.dejean@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/realtek/realtek_main.c | 34 +++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index 63134e300c33..023e47ad605b 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -172,6 +172,7 @@ #define RTL8224_SRAM_RTCT_LEN(pair) (0x8028 + (pair) * 4) #define RTL8224_VND1_MDI_PAIR_SWAP 0xa90 +#define RTL8224_VND1_MDI_POLARITY_SWAP 0xa94 #define RTL8366RB_POWER_SAVE 0x15 #define RTL8366RB_POWER_SAVE_ON BIT(12) @@ -1870,9 +1871,40 @@ static int rtl8224_mdi_config_order(struct phy_device *phydev) order ? BIT(port_offset) : 0); } +static int rtl8224_mdi_config_polarity(struct phy_device *phydev) +{ + struct device_node *np = phydev->mdio.dev.of_node; + u8 offset = (phydev->mdio.addr & 3) * 4; + u32 polarity = 0; + int ret; + + ret = of_property_read_u32(np, "enet-phy-pair-polarity", &polarity); + + /* Do nothing if the property is not present */ + if (ret == -EINVAL || ret == -ENOSYS) + return 0; + + if (ret) + return ret; + + if (polarity & ~0xf) + return -EINVAL; + + return rtl8224_package_modify_mmd(phydev, MDIO_MMD_VEND1, + RTL8224_VND1_MDI_POLARITY_SWAP, + 0xf << offset, + polarity << offset); +} + static int rtl8224_config_init(struct phy_device *phydev) { - return rtl8224_mdi_config_order(phydev); + int ret; + + ret = rtl8224_mdi_config_order(phydev); + if (ret) + return ret; + + return rtl8224_mdi_config_polarity(phydev); } static int rtl8224_probe(struct phy_device *phydev) From 2e69e55897dc1898175f42989dd575ca8c467363 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Thu, 19 Mar 2026 19:17:04 +0100 Subject: [PATCH 0677/1561] net: dsa: microchip: Don't embed struct phy_device to maintain the port state The KSZ9477 maintains the SGMII port's state for speed, duplex and link status to be able to fixup the accesses to its internal older version of the Designware XPCS. However, it does so by embedding a full instance of struct phy_device, only to use the 'speed', 'link' and 'duplex' fields. This is also only used for the SGMII port, it's otherwise unused for all other regular ports. Replace that with simple int/bool values. Signed-off-by: Maxime Chevallier Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260319181705.1576679-1-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz9477.c | 17 ++++++++--------- drivers/net/dsa/microchip/ksz_common.c | 2 +- drivers/net/dsa/microchip/ksz_common.h | 4 +++- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 5facffbb9c9a..5416016b33e0 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -224,14 +224,13 @@ static int ksz9477_pcs_read(struct mii_bus *bus, int phy, int mmd, int reg) else duplex = DUPLEX_HALF; - if (!p->phydev.link || - p->phydev.speed != speed || - p->phydev.duplex != duplex) { + if (!p->link || p->speed != speed || + p->duplex != duplex) { u16 ctrl; - p->phydev.link = 1; - p->phydev.speed = speed; - p->phydev.duplex = duplex; + p->link = true; + p->speed = speed; + p->duplex = duplex; port_sgmii_r(dev, port, mmd, MII_BMCR, &ctrl); ctrl &= BMCR_ANENABLE; @@ -241,10 +240,10 @@ static int ksz9477_pcs_read(struct mii_bus *bus, int phy, int mmd, int reg) ctrl); } } else { - p->phydev.link = 0; + p->link = false; } } else if (reg == MII_BMSR) { - p->phydev.link = !!(val & BMSR_LSTATUS); + p->link = !!(val & BMSR_LSTATUS); } } @@ -557,7 +556,7 @@ int ksz9477_r_phy(struct ksz_device *dev, u16 addr, u16 reg, u16 *data) val = 0x0700; break; case MII_STAT1000: - if (p->phydev.speed == SPEED_1000) + if (p->speed == SPEED_1000) val = 0x3800; else val = 0; diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index c517478cc476..144373e13bea 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -3983,7 +3983,7 @@ static void ksz9477_phylink_mac_link_up(struct phylink_config *config, if (dev->info->internal_phy[port]) return; - p->phydev.speed = speed; + p->speed = speed; ksz_port_set_xmii_speed(dev, port, speed); diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index 929aff4c55de..18f13ee9c7b6 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -129,7 +129,9 @@ struct ksz_port { bool learning; bool isolated; int stp_state; - struct phy_device phydev; + int speed; + int duplex; + bool link; u32 fiber:1; /* port is fiber */ u32 force:1; From 609e79253ace7f6eba5fdfe6ba8bd3bfd7b9e79b Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 18 Mar 2026 17:54:56 -0700 Subject: [PATCH 0678/1561] net/mlx5e: Allow set_rx_mode on uplink representor set_rx_mode handler was skipped on uplink representor, since uplink relies on FDB to forward all traffic to it by default, which works perfectly on a single PF per physical port configuration, as explicit mac request isn't required, but In case of multi-host and DPU environments, uplink can only use own mac address, as set_rx_mode wasn't honored in uplink rep. Since MPFs (Multi PF switch) requires PFs to request explicit mac forwarding, this patch enables set_rx_mode on uplink representor to allow PF mac programming into MPFs table in switchdev mode, allowing use-cases such as arbitrary mac address forwarding via linux bridge. Signed-off-by: Saeed Mahameed Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260319005456.82745-1-saeed@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 10 ++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 ++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 55255fe6e415..fdfe9d1cfe21 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -822,8 +822,14 @@ static void mlx5e_destroy_promisc_table(struct mlx5e_flow_steering *fs) void mlx5e_fs_set_rx_mode_work(struct mlx5e_flow_steering *fs, struct net_device *netdev) { + struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_l2_table *ea = &fs->l2; + if (mlx5e_is_uplink_rep(priv)) { + mlx5e_handle_netdev_addr(fs, netdev); + goto update_vport_context; + } + bool rx_mode_enable = fs->state_destroy; bool promisc_enabled = rx_mode_enable && (netdev->flags & IFF_PROMISC); bool allmulti_enabled = rx_mode_enable && (netdev->flags & IFF_ALLMULTI); @@ -863,6 +869,7 @@ void mlx5e_fs_set_rx_mode_work(struct mlx5e_flow_steering *fs, ea->allmulti_enabled = allmulti_enabled; ea->broadcast_enabled = broadcast_enabled; +update_vport_context: mlx5e_vport_context_update(fs, netdev); } @@ -983,6 +990,9 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_flow_steering *fs, u8 *mc_dmac; u8 *mv_dmac; + if (!ft) + return -EINVAL; + spec = kvzalloc_obj(*spec); if (!spec) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f7009da94f0b..3eebdf402129 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4102,9 +4102,6 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) static void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv) { - if (mlx5e_is_uplink_rep(priv)) - return; /* no rx mode for uplink rep */ - queue_work(priv->wq, &priv->set_rx_mode_work); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 1db4ecb2356f..8992f0f7a870 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1369,6 +1369,8 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) netdev_unlock(priv->netdev); rtnl_unlock(); + /* clean-up uplink's mpfs mac table */ + queue_work(priv->wq, &priv->set_rx_mode_work); mlx5e_rep_bridge_cleanup(priv); mlx5e_dcbnl_delete_app(priv); mlx5_notifier_unregister(mdev, &priv->events_nb); From 53edd8309570517137d58e3644976843b817179c Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Wed, 18 Mar 2026 23:08:10 -0700 Subject: [PATCH 0679/1561] net: netdevsim: correct typo in new_device_store error message Fix the format hint by replacing "unit" with "uint" in the pr_err() string. Signed-off-by: Alok Tiwari Reviewed-by: Joe Damato Link: https://patch.msgid.link/20260319060812.495488-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index f85b1c261dfb..41483e371f05 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -177,7 +177,7 @@ new_device_store(const struct bus_type *bus, const char *buf, size_t count) } break; default: - pr_err("Format for adding new device is \"id port_count num_queues\" (uint uint unit).\n"); + pr_err("Format for adding new device is \"id port_count num_queues\" (uint uint uint).\n"); return -EINVAL; } From 544921efd4e4b1f135c87335dd59114a302c574d Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 19 Mar 2026 19:40:54 +0100 Subject: [PATCH 0680/1561] netdevsim: move TC offload code to a dedicated file This commit has no functional change. Reviewed-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Link: https://patch.msgid.link/b7881fd53f8a5d8eff4eae8121576c3cd60c2ed7.1773945414.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/Makefile | 2 +- drivers/net/netdevsim/netdev.c | 51 --------------------------- drivers/net/netdevsim/netdevsim.h | 3 ++ drivers/net/netdevsim/tc.c | 57 +++++++++++++++++++++++++++++++ 4 files changed, 61 insertions(+), 52 deletions(-) create mode 100644 drivers/net/netdevsim/tc.c diff --git a/drivers/net/netdevsim/Makefile b/drivers/net/netdevsim/Makefile index 14a553e000ec..87718204fb4d 100644 --- a/drivers/net/netdevsim/Makefile +++ b/drivers/net/netdevsim/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_NETDEVSIM) += netdevsim.o netdevsim-objs := \ - netdev.o dev.o ethtool.o fib.o bus.o health.o hwstats.o udp_tunnels.o + netdev.o dev.o ethtool.o fib.o bus.o health.o hwstats.o udp_tunnels.o tc.o ifeq ($(CONFIG_BPF_SYSCALL),y) netdevsim-objs += \ diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 3645ebde049a..c71b8d116f18 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -202,12 +202,6 @@ static int nsim_change_mtu(struct net_device *dev, int new_mtu) return 0; } -static int -nsim_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) -{ - return nsim_bpf_setup_tc_block_cb(type, type_data, cb_priv); -} - static int nsim_set_vf_mac(struct net_device *dev, int vf, u8 *mac) { struct netdevsim *ns = netdev_priv(dev); @@ -338,51 +332,6 @@ static int nsim_set_vf_link_state(struct net_device *dev, int vf, int state) return 0; } -static void nsim_taprio_stats(struct tc_taprio_qopt_stats *stats) -{ - stats->window_drops = 0; - stats->tx_overruns = 0; -} - -static int nsim_setup_tc_taprio(struct net_device *dev, - struct tc_taprio_qopt_offload *offload) -{ - int err = 0; - - switch (offload->cmd) { - case TAPRIO_CMD_REPLACE: - case TAPRIO_CMD_DESTROY: - break; - case TAPRIO_CMD_STATS: - nsim_taprio_stats(&offload->stats); - break; - default: - err = -EOPNOTSUPP; - } - - return err; -} - -static LIST_HEAD(nsim_block_cb_list); - -static int -nsim_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) -{ - struct netdevsim *ns = netdev_priv(dev); - - switch (type) { - case TC_SETUP_QDISC_TAPRIO: - return nsim_setup_tc_taprio(dev, type_data); - case TC_SETUP_BLOCK: - return flow_block_cb_setup_simple(type_data, - &nsim_block_cb_list, - nsim_setup_tc_block_cb, - ns, ns, true); - default: - return -EOPNOTSUPP; - } -} - static int nsim_set_features(struct net_device *dev, netdev_features_t features) { diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index f767fc8a7505..c904e14f6b3f 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -455,6 +455,9 @@ static inline void nsim_psp_handle_ext(struct sk_buff *skb, struct skb_ext *psp_ext) {} #endif +int nsim_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data); + struct nsim_bus_dev { struct device dev; struct list_head list; diff --git a/drivers/net/netdevsim/tc.c b/drivers/net/netdevsim/tc.c new file mode 100644 index 000000000000..8a1960f5f99e --- /dev/null +++ b/drivers/net/netdevsim/tc.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +#include "netdevsim.h" + +static int +nsim_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) +{ + return nsim_bpf_setup_tc_block_cb(type, type_data, cb_priv); +} + +static void nsim_taprio_stats(struct tc_taprio_qopt_stats *stats) +{ + stats->window_drops = 0; + stats->tx_overruns = 0; +} + +static int nsim_setup_tc_taprio(struct net_device *dev, + struct tc_taprio_qopt_offload *offload) +{ + int err = 0; + + switch (offload->cmd) { + case TAPRIO_CMD_REPLACE: + case TAPRIO_CMD_DESTROY: + break; + case TAPRIO_CMD_STATS: + nsim_taprio_stats(&offload->stats); + break; + default: + err = -EOPNOTSUPP; + } + + return err; +} + +static LIST_HEAD(nsim_block_cb_list); + +int +nsim_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) +{ + struct netdevsim *ns = netdev_priv(dev); + + switch (type) { + case TC_SETUP_QDISC_TAPRIO: + return nsim_setup_tc_taprio(dev, type_data); + case TC_SETUP_BLOCK: + return flow_block_cb_setup_simple(type_data, + &nsim_block_cb_list, + nsim_setup_tc_block_cb, + ns, ns, true); + default: + return -EOPNOTSUPP; + } +} From abdf5133bfa12c45d402f7b73d39bca772f3644a Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 19 Mar 2026 19:40:55 +0100 Subject: [PATCH 0681/1561] netdevsim: support tc-ets offload Extend netdevsim to accept ndo_setup_tc(TC_SETUP_QDISC_ETS) calls, so that it's possible to run tdc on ETS offload code path. Reviewed-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Link: https://patch.msgid.link/d04086cd0204d4aaf6524e972198faa1a4e5d657.1773945414.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/tc.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/net/netdevsim/tc.c b/drivers/net/netdevsim/tc.c index 8a1960f5f99e..8f013a5895a2 100644 --- a/drivers/net/netdevsim/tc.c +++ b/drivers/net/netdevsim/tc.c @@ -2,6 +2,7 @@ #include #include +#include #include "netdevsim.h" @@ -36,6 +37,25 @@ static int nsim_setup_tc_taprio(struct net_device *dev, return err; } +static int nsim_setup_tc_ets(struct net_device *dev, + struct tc_ets_qopt_offload *offload) +{ + int err = 0; + + switch (offload->command) { + case TC_ETS_REPLACE: + case TC_ETS_DESTROY: + break; + case TC_ETS_STATS: + _bstats_update(offload->stats.bstats, 0, 0); + break; + default: + err = -EOPNOTSUPP; + } + + return err; +} + static LIST_HEAD(nsim_block_cb_list); int @@ -46,6 +66,8 @@ nsim_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) switch (type) { case TC_SETUP_QDISC_TAPRIO: return nsim_setup_tc_taprio(dev, type_data); + case TC_SETUP_QDISC_ETS: + return nsim_setup_tc_ets(dev, type_data); case TC_SETUP_BLOCK: return flow_block_cb_setup_simple(type_data, &nsim_block_cb_list, From 5754a1c9f9b6e298791c4bb34263f37dfe93ee35 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 19 Mar 2026 19:40:56 +0100 Subject: [PATCH 0682/1561] tc-testing: add a test case for ETS offload While reviewing the fix for unintentional u32 overflows in ets offload code, Jamal said: [...] > otherwise a tdc test should cover it fine (when you get to the > netdevsim change perhaps) Extend tdc to allow setting hw-tc-offload via ethtool, and add a test case to reproduce the division by zero fixed in [1]. [1] https://lore.kernel.org/all/CAM0EoMm17wsYZmdFLshH3_-GrZtzd=i0xnoO2yiVB=-N4761mw@mail.gmail.com/ Suggested-by: Jamal Hadi Salim Reviewed-by: Jamal Hadi Salim Co-developed-by: Victor Nogueira Signed-off-by: Victor Nogueira Signed-off-by: Davide Caratti Link: https://patch.msgid.link/39129c374cbd00147b8c5afc04db59db62b50acc.1773945414.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/qdiscs/ets.json | 23 +++++++++++++++++++ tools/testing/selftests/tc-testing/tdc.py | 3 +++ .../selftests/tc-testing/tdc_config.py | 1 + 3 files changed, 27 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json index a5d94cdec605..ee09e6d6fdf3 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json @@ -984,5 +984,28 @@ "matchCount": "1", "teardown": [ ] + }, + { + "id": "41f5", + "name": "ETS offload where the sum of quanta wraps u32", + "category": [ + "qdisc", + "ets" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device", + "$ETHTOOL -K $ETH hw-tc-offload on" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH root ets quanta 4294967294 1 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc ets .*bands 3 quanta 4294967294 1 1", + "matchCount": "1", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index 4f255cec0c22..81b4ac3f050c 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -755,6 +755,9 @@ def check_default_settings(args, remaining, pm): NAMES['DEV2'] = args.device if 'TIMEOUT' not in NAMES: NAMES['TIMEOUT'] = None + if 'ETHTOOL' in NAMES and not os.path.isfile(NAMES['ETHTOOL']): + print(f"The specified ethtool path {NAMES['ETHTOOL']} does not exist.") + exit(1) if not os.path.isfile(NAMES['TC']): print("The specified tc path " + NAMES['TC'] + " does not exist.") exit(1) diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py index ccb0f06ef9e3..9488b03cbc2c 100644 --- a/tools/testing/selftests/tc-testing/tdc_config.py +++ b/tools/testing/selftests/tc-testing/tdc_config.py @@ -17,6 +17,7 @@ NAMES = { 'DEV1': 'v0p1', 'DEV2': '', 'DUMMY': 'dummy1', + 'ETHTOOL': '/usr/sbin/ethtool', 'ETH': 'eth0', 'BATCH_FILE': './batch.txt', 'BATCH_DIR': 'tmp', From c0368933dd3d4a8210a07a0c95c471421fbf7523 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 19 Mar 2026 14:22:10 +0200 Subject: [PATCH 0683/1561] mlx5: Remove redundant iseg base iseg_base and base_addr both point to BAR0, making iseg_base redundant. Remove iseg_base and rely on base_addr instead, reducing the size of struct mlx5_core_dev. Signed-off-by: Parav Pandit Reviewed-by: Shay Drori Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260319122211.27384-2-tariqt@nvidia.com Reviewed-by: Joe Damato Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c | 3 +-- include/linux/mlx5/driver.h | 1 - 4 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 9fb0629978bd..5b8987ddaa8e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2740,7 +2740,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm if (PAGE_SIZE > 4096) return -EOPNOTSUPP; - pfn = (dev->mdev->iseg_base + + pfn = (dev->mdev->bar_addr + offsetof(struct mlx5_init_seg, internal_timer_h)) >> PAGE_SHIFT; return rdma_user_mmap_io(&context->ibucontext, vma, pfn, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index b0bc4a7d4a93..661b211eeb95 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -950,8 +950,7 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev, pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128)) mlx5_core_dbg(dev, "Enabling pci atomics failed\n"); - dev->iseg_base = dev->bar_addr; - dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg)); + dev->iseg = ioremap(dev->bar_addr, sizeof(*dev->iseg)); if (!dev->iseg) { err = -ENOMEM; mlx5_core_err(dev, "Failed mapping initialization segment, aborting\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index c45540fe7d9d..4391ef0bab5d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -37,7 +37,6 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia mdev->device = &adev->dev; mdev->pdev = sf_dev->parent_mdev->pdev; mdev->bar_addr = sf_dev->bar_base_addr; - mdev->iseg_base = sf_dev->bar_base_addr; mdev->coredev_type = MLX5_COREDEV_SF; mdev->priv.parent_mdev = sf_dev->parent_mdev; mdev->priv.adev_idx = adev->id; @@ -53,7 +52,7 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia goto mdev_err; } - mdev->iseg = ioremap(mdev->iseg_base, sizeof(*mdev->iseg)); + mdev->iseg = ioremap(mdev->bar_addr, sizeof(*mdev->iseg)); if (!mdev->iseg) { mlx5_core_warn(mdev, "remap error\n"); err = -ENOMEM; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 04dcd09f7517..b8b5af78284d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -755,7 +755,6 @@ struct mlx5_core_dev { } caps; struct mlx5_timeouts *timeouts; u64 sys_image_guid; - phys_addr_t iseg_base; struct mlx5_init_seg __iomem *iseg; phys_addr_t bar_addr; enum mlx5_device_state state; From 26469110c750c8179560637dd813e5d65b8148d2 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 19 Mar 2026 14:22:11 +0200 Subject: [PATCH 0684/1561] net/mlx5: Add vhca_id_type bit to alias context Add vhca_id_type bit to alias context which allows indicating the vhca_id_type to be passed at vhca_id_to_be_accessed, which can be either HW or SW, note that SW_VHCA_ID must be used to allow alias to work properly after migration. Signed-off-by: Patrisious Haddad Reviewed-by: Leon Romanovsky Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260319122211.27384-3-tariqt@nvidia.com Reviewed-by: Joe Damato Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 8fa4fb3d36cf..2400b4c38c77 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1968,7 +1968,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_360[0x3]; u8 log_max_rq[0x5]; - u8 reserved_at_368[0x3]; + u8 ft_alias_sw_vhca_id[0x1]; + u8 reserved_at_369[0x2]; u8 log_max_sq[0x5]; u8 reserved_at_370[0x3]; u8 log_max_tir[0x5]; @@ -6957,7 +6958,9 @@ struct mlx5_ifc_create_match_definer_out_bits { struct mlx5_ifc_alias_context_bits { u8 vhca_id_to_be_accessed[0x10]; - u8 reserved_at_10[0xd]; + u8 reserved_at_10[0xb]; + u8 vhca_id_type[0x1]; + u8 reserved_at_1c[0x1]; u8 status[0x3]; u8 object_id_to_be_accessed[0x20]; u8 reserved_at_40[0x40]; From 20743b0b64d91927d1a9346341f3ecdc527c8776 Mon Sep 17 00:00:00 2001 From: Shayne Chen Date: Mon, 15 Dec 2025 14:37:22 +0800 Subject: [PATCH 0685/1561] wifi: mt76: mt7996: extend CSA and CCA support for MLO Use correct link_id to report CSA and CCA countdown events, and also modify mt7996_channel_switch_beacon() to set beacon with the correct link_id. Co-developed-by: Peter Chiu Signed-off-by: Peter Chiu Co-developed-by: StanleyYP Wang Signed-off-by: StanleyYP Wang Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20251215063728.3013365-1-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7996/main.c | 23 ++- .../net/wireless/mediatek/mt76/mt7996/mcu.c | 159 ++++++++++++------ .../net/wireless/mediatek/mt76/mt7996/mcu.h | 8 +- 3 files changed, 131 insertions(+), 59 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index f16135f0b7f9..4b73fefcee8e 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -927,9 +927,30 @@ mt7996_channel_switch_beacon(struct ieee80211_hw *hw, struct cfg80211_chan_def *chandef) { struct mt7996_dev *dev = mt7996_hw_dev(hw); + struct mt7996_phy *phy = mt7996_band_phy(dev, chandef->chan->band); + struct ieee80211_bss_conf *link_conf; + unsigned int link_id; mutex_lock(&dev->mt76.mutex); - mt7996_mcu_add_beacon(hw, vif, &vif->bss_conf, vif->bss_conf.enable_beacon); + + for_each_vif_active_link(vif, link_conf, link_id) { + struct mt7996_vif_link *link = + mt7996_vif_link(dev, vif, link_id); + + if (!link || link->phy != phy) + continue; + + /* Reset beacon when channel switch triggered during CAC to let + * FW correctly perform CSA countdown + */ + if (!cfg80211_reg_can_beacon(hw->wiphy, &phy->mt76->chandef, + vif->type)) + mt7996_mcu_add_beacon(hw, vif, link_conf, false); + + mt7996_mcu_add_beacon(hw, vif, link_conf, true); + break; + } + mutex_unlock(&dev->mt76.mutex); } diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index b4422a4754cd..c059ddbf1e42 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -390,13 +390,117 @@ int mt7996_mcu_wa_cmd(struct mt7996_dev *dev, int cmd, u32 a1, u32 a2, u32 a3) sizeof(req), false); } +struct mt7996_mcu_countdown_data { + struct mt76_phy *mphy; + u8 omac_idx; +}; + static void mt7996_mcu_csa_finish(void *priv, u8 *mac, struct ieee80211_vif *vif) { - if (!vif->bss_conf.csa_active || vif->type == NL80211_IFTYPE_STATION) + struct mt7996_mcu_countdown_data *cdata = (void *)priv; + struct mt7996_vif *mvif = (struct mt7996_vif *)vif->drv_priv; + struct ieee80211_bss_conf *link_conf = NULL; + unsigned long valid_links = vif->valid_links ?: BIT(0); + unsigned int link_id; + + if (vif->type == NL80211_IFTYPE_STATION) return; - ieee80211_csa_finish(vif, 0); + for_each_set_bit(link_id, &valid_links, IEEE80211_MLD_MAX_NUM_LINKS) { + struct mt76_vif_link *mlink = + rcu_dereference(mvif->mt76.link[link_id]); + + if (mlink && mlink->band_idx == cdata->mphy->band_idx && + mlink->omac_idx == cdata->omac_idx) { + link_conf = rcu_dereference(vif->link_conf[link_id]); + break; + } + } + + if (!link_conf || !link_conf->csa_active) + return; + + ieee80211_csa_finish(vif, link_conf->link_id); +} + +static void +mt7996_mcu_cca_finish(void *priv, u8 *mac, struct ieee80211_vif *vif) +{ + struct mt7996_mcu_countdown_data *cdata = (void *)priv; + struct mt7996_vif *mvif = (struct mt7996_vif *)vif->drv_priv; + struct ieee80211_bss_conf *link_conf = NULL; + unsigned long valid_links = vif->valid_links ?: BIT(0); + unsigned int link_id; + + if (vif->type == NL80211_IFTYPE_STATION) + return; + + for_each_set_bit(link_id, &valid_links, IEEE80211_MLD_MAX_NUM_LINKS) { + struct mt76_vif_link *mlink = + rcu_dereference(mvif->mt76.link[link_id]); + + if (mlink && mlink->band_idx == cdata->mphy->band_idx && + mlink->omac_idx == cdata->omac_idx) { + link_conf = rcu_dereference(vif->link_conf[link_id]); + break; + } + } + + if (!link_conf || !link_conf->color_change_active) + return; + + ieee80211_color_change_finish(vif, link_conf->link_id); +} + +static void +mt7996_mcu_ie_countdown(struct mt7996_dev *dev, struct sk_buff *skb) +{ +#define UNI_EVENT_IE_COUNTDOWN_CSA 0 +#define UNI_EVENT_IE_COUNTDOWN_BCC 1 + struct header { + u8 band; + u8 rsv[3]; + }; + struct mt7996_mcu_rxd *rxd = (struct mt7996_mcu_rxd *)skb->data; + const char *data = (char *)&rxd[1], *tail; + struct header *hdr = (struct header *)data; + struct tlv *tlv = (struct tlv *)(data + 4); + struct mt7996_mcu_countdown_notify *event; + struct mt7996_mcu_countdown_data cdata; + + if (hdr->band >= ARRAY_SIZE(dev->mt76.phys)) + return; + + cdata.mphy = dev->mt76.phys[hdr->band]; + if (!cdata.mphy) + return; + + tail = skb->data + skb->len; + data += sizeof(*hdr); + while (data + sizeof(*tlv) < tail && le16_to_cpu(tlv->len)) { + event = (struct mt7996_mcu_countdown_notify *)tlv->data; + + cdata.omac_idx = event->omac_idx; + + switch (le16_to_cpu(tlv->tag)) { + case UNI_EVENT_IE_COUNTDOWN_CSA: + ieee80211_iterate_active_interfaces_atomic(mt76_hw(dev), + IEEE80211_IFACE_ITER_RESUME_ALL, + mt7996_mcu_csa_finish, &cdata); + break; + case UNI_EVENT_IE_COUNTDOWN_BCC: + ieee80211_iterate_active_interfaces_atomic(mt76_hw(dev), + IEEE80211_IFACE_ITER_RESUME_ALL, + mt7996_mcu_cca_finish, &cdata); + break; + default: + break; + } + + data += le16_to_cpu(tlv->len); + tlv = (struct tlv *)data; + } } static void @@ -476,57 +580,6 @@ out: wiphy_info(mt76_hw(dev)->wiphy, "%s: %.*s", type, len, data); } -static void -mt7996_mcu_cca_finish(void *priv, u8 *mac, struct ieee80211_vif *vif) -{ - if (!vif->bss_conf.color_change_active || vif->type == NL80211_IFTYPE_STATION) - return; - - ieee80211_color_change_finish(vif, 0); -} - -static void -mt7996_mcu_ie_countdown(struct mt7996_dev *dev, struct sk_buff *skb) -{ -#define UNI_EVENT_IE_COUNTDOWN_CSA 0 -#define UNI_EVENT_IE_COUNTDOWN_BCC 1 - struct header { - u8 band; - u8 rsv[3]; - }; - struct mt76_phy *mphy = &dev->mt76.phy; - struct mt7996_mcu_rxd *rxd = (struct mt7996_mcu_rxd *)skb->data; - const char *data = (char *)&rxd[1], *tail; - struct header *hdr = (struct header *)data; - struct tlv *tlv = (struct tlv *)(data + 4); - - if (hdr->band >= ARRAY_SIZE(dev->mt76.phys)) - return; - - if (hdr->band && dev->mt76.phys[hdr->band]) - mphy = dev->mt76.phys[hdr->band]; - - tail = skb->data + skb->len; - data += sizeof(struct header); - while (data + sizeof(struct tlv) < tail && le16_to_cpu(tlv->len)) { - switch (le16_to_cpu(tlv->tag)) { - case UNI_EVENT_IE_COUNTDOWN_CSA: - ieee80211_iterate_active_interfaces_atomic(mphy->hw, - IEEE80211_IFACE_ITER_RESUME_ALL, - mt7996_mcu_csa_finish, mphy->hw); - break; - case UNI_EVENT_IE_COUNTDOWN_BCC: - ieee80211_iterate_active_interfaces_atomic(mphy->hw, - IEEE80211_IFACE_ITER_RESUME_ALL, - mt7996_mcu_cca_finish, mphy->hw); - break; - } - - data += le16_to_cpu(tlv->len); - tlv = (struct tlv *)data; - } -} - static int mt7996_mcu_update_tx_gi(struct rate_info *rate, struct all_sta_trx_rate *mcu_rate) { diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h index e0b83ac9f5e2..fc8b09e76f01 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h @@ -52,12 +52,10 @@ struct mt7996_mcu_thermal_enable { u8 rsv[2]; } __packed; -struct mt7996_mcu_csa_notify { - struct mt7996_mcu_rxd rxd; - +struct mt7996_mcu_countdown_notify { u8 omac_idx; - u8 csa_count; - u8 band_idx; + u8 count; + u8 csa_failure_reason; /* 0: success, 1: beacon disabled */ u8 rsv; } __packed; From 45a09251d610f3b8a1fb02039146e42f1f4efe90 Mon Sep 17 00:00:00 2001 From: StanleyYP Wang Date: Mon, 15 Dec 2025 14:37:23 +0800 Subject: [PATCH 0686/1561] wifi: mt76: mt7996: fix the behavior of radar detection RDD_DET_MODE is a firmware command intended for testing and does not pause TX after radar detection, so remove it from the normal flow; instead, use the MAC_ENABLE_CTRL firmware command to resume TX after the radar-triggered channel switch completes. Fixes: 1529e335f93d ("wifi: mt76: mt7996: rework radar HWRDD idx") Co-developed-by: Shayne Chen Signed-off-by: Shayne Chen Signed-off-by: StanleyYP Wang Link: https://patch.msgid.link/20251215063728.3013365-2-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7996/mac.c | 8 +-- .../net/wireless/mediatek/mt76/mt7996/main.c | 20 ++++++++ .../net/wireless/mediatek/mt76/mt7996/mcu.c | 49 ++++++++++++++++--- .../net/wireless/mediatek/mt76/mt7996/mcu.h | 1 + .../wireless/mediatek/mt76/mt7996/mt7996.h | 2 + 5 files changed, 68 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index 84cbf36b493c..2765ac7285b4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -2974,7 +2974,7 @@ static void mt7996_dfs_stop_radar_detector(struct mt7996_phy *phy) static int mt7996_dfs_start_rdd(struct mt7996_dev *dev, int rdd_idx) { - int err, region; + int region; switch (dev->mt76.region) { case NL80211_DFS_ETSI: @@ -2989,11 +2989,7 @@ static int mt7996_dfs_start_rdd(struct mt7996_dev *dev, int rdd_idx) break; } - err = mt7996_mcu_rdd_cmd(dev, RDD_START, rdd_idx, region); - if (err < 0) - return err; - - return mt7996_mcu_rdd_cmd(dev, RDD_DET_MODE, rdd_idx, 1); + return mt7996_mcu_rdd_cmd(dev, RDD_START, rdd_idx, region); } static int mt7996_dfs_start_radar_detector(struct mt7996_phy *phy) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 4b73fefcee8e..fac50dbceae7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -79,6 +79,7 @@ static void mt7996_stop_phy(struct mt7996_phy *phy) mutex_lock(&dev->mt76.mutex); + mt7996_mcu_rdd_resume_tx(phy); mt7996_mcu_set_radio_en(phy, false); clear_bit(MT76_STATE_RUNNING, &phy->mt76->state); @@ -954,6 +955,24 @@ mt7996_channel_switch_beacon(struct ieee80211_hw *hw, mutex_unlock(&dev->mt76.mutex); } +static int +mt7996_post_channel_switch(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf) +{ + struct cfg80211_chan_def *chandef = &link_conf->chanreq.oper; + struct mt7996_dev *dev = mt7996_hw_dev(hw); + struct mt7996_phy *phy = mt7996_band_phy(dev, chandef->chan->band); + int ret; + + mutex_lock(&dev->mt76.mutex); + + ret = mt7996_mcu_rdd_resume_tx(phy); + + mutex_unlock(&dev->mt76.mutex); + + return ret; +} + static int mt7996_mac_sta_init_link(struct mt7996_dev *dev, struct ieee80211_bss_conf *link_conf, @@ -2327,6 +2346,7 @@ const struct ieee80211_ops mt7996_ops = { .release_buffered_frames = mt76_release_buffered_frames, .get_txpower = mt7996_get_txpower, .channel_switch_beacon = mt7996_channel_switch_beacon, + .post_channel_switch = mt7996_post_channel_switch, .get_stats = mt7996_get_stats, .get_et_sset_count = mt7996_get_et_sset_count, .get_et_stats = mt7996_get_et_stats, diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index c059ddbf1e42..6294704f7881 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -520,24 +520,32 @@ mt7996_mcu_rx_radar_detected(struct mt7996_dev *dev, struct sk_buff *skb) break; case MT_RDD_IDX_BACKGROUND: if (!dev->rdd2_phy) - return; + goto err; mphy = dev->rdd2_phy->mt76; break; default: - dev_err(dev->mt76.dev, "Unknown RDD idx %d\n", r->rdd_idx); - return; + goto err; } if (!mphy) - return; + goto err; - if (r->rdd_idx == MT_RDD_IDX_BACKGROUND) + if (r->rdd_idx == MT_RDD_IDX_BACKGROUND) { cfg80211_background_radar_event(mphy->hw->wiphy, &dev->rdd2_chandef, GFP_ATOMIC); - else + } else { + struct mt7996_phy *phy = mphy->priv; + + phy->rdd_tx_paused = true; ieee80211_radar_detected(mphy->hw, NULL); + } dev->hw_pattern++; + + return; + +err: + dev_err(dev->mt76.dev, "Invalid RDD idx %d\n", r->rdd_idx); } static void @@ -4612,6 +4620,35 @@ int mt7996_mcu_set_radio_en(struct mt7996_phy *phy, bool enable) &req, sizeof(req), true); } +int mt7996_mcu_rdd_resume_tx(struct mt7996_phy *phy) +{ + struct { + u8 band_idx; + u8 _rsv[3]; + + __le16 tag; + __le16 len; + u8 mac_enable; + u8 _rsv2[3]; + } __packed req = { + .band_idx = phy->mt76->band_idx, + .tag = cpu_to_le16(UNI_BAND_CONFIG_MAC_ENABLE_CTRL), + .len = cpu_to_le16(sizeof(req) - 4), + .mac_enable = 2, + }; + int ret; + + if (!phy->rdd_tx_paused) + return 0; + + ret = mt76_mcu_send_msg(&phy->dev->mt76, MCU_WM_UNI_CMD(BAND_CONFIG), + &req, sizeof(req), true); + if (!ret) + phy->rdd_tx_paused = false; + + return ret; +} + int mt7996_mcu_rdd_cmd(struct mt7996_dev *dev, int cmd, u8 rdd_idx, u8 val) { struct { diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h index fc8b09e76f01..5b3597ca79be 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h @@ -835,6 +835,7 @@ enum { enum { UNI_BAND_CONFIG_RADIO_ENABLE, UNI_BAND_CONFIG_RTS_THRESHOLD = 0x08, + UNI_BAND_CONFIG_MAC_ENABLE_CTRL = 0x0c, }; enum { diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index 7a884311800e..d31864f973cc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -377,6 +377,7 @@ struct mt7996_phy { bool has_aux_rx; bool counter_reset; + bool rdd_tx_paused; }; struct mt7996_dev { @@ -726,6 +727,7 @@ int mt7996_mcu_get_temperature(struct mt7996_phy *phy); int mt7996_mcu_set_thermal_throttling(struct mt7996_phy *phy, u8 state); int mt7996_mcu_set_thermal_protect(struct mt7996_phy *phy, bool enable); int mt7996_mcu_set_txpower_sku(struct mt7996_phy *phy); +int mt7996_mcu_rdd_resume_tx(struct mt7996_phy *phy); int mt7996_mcu_rdd_cmd(struct mt7996_dev *dev, int cmd, u8 rdd_idx, u8 val); int mt7996_mcu_rdd_background_enable(struct mt7996_phy *phy, struct cfg80211_chan_def *chandef); From 7247037a016ed4bc8a50507d74d0bae98409ae3f Mon Sep 17 00:00:00 2001 From: StanleyYP Wang Date: Mon, 15 Dec 2025 14:37:24 +0800 Subject: [PATCH 0687/1561] wifi: mt76: mt7996: set specific BSSINFO and STAREC commands after channel switch After channel switch, some tags of BSSINFO (rfch) and STAREC (bfer, rate_ctrl) commands should also be updated. Otherwise, a BSS might not be able to transmit with its peer using correct bandwidth. Co-developed-by: Shayne Chen Signed-off-by: Shayne Chen Signed-off-by: StanleyYP Wang Link: https://patch.msgid.link/20251215063728.3013365-3-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7996/main.c | 14 ++++- .../net/wireless/mediatek/mt76/mt7996/mcu.c | 59 +++++++++++++++++++ .../wireless/mediatek/mt76/mt7996/mt7996.h | 3 + 3 files changed, 75 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index fac50dbceae7..393823368bed 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -962,12 +962,24 @@ mt7996_post_channel_switch(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct cfg80211_chan_def *chandef = &link_conf->chanreq.oper; struct mt7996_dev *dev = mt7996_hw_dev(hw); struct mt7996_phy *phy = mt7996_band_phy(dev, chandef->chan->band); - int ret; + struct mt7996_vif_link *link; + int ret = -EINVAL; mutex_lock(&dev->mt76.mutex); + link = mt7996_vif_conf_link(dev, vif, link_conf); + if (!link) + goto out; + + ret = mt7996_mcu_update_bss_rfch(phy, link); + if (ret) + goto out; + + ieee80211_iterate_stations_mtx(hw, mt7996_mcu_update_sta_rec_bw, link); + ret = mt7996_mcu_rdd_resume_tx(phy); +out: mutex_unlock(&dev->mt76.mutex); return ret; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index 6294704f7881..5a634b71ed2a 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -1231,6 +1231,22 @@ out: MCU_WMWA_UNI_CMD(BSS_INFO_UPDATE), true); } +int mt7996_mcu_update_bss_rfch(struct mt7996_phy *phy, struct mt7996_vif_link *link) +{ + struct mt7996_dev *dev = phy->dev; + struct sk_buff *skb; + + skb = __mt7996_mcu_alloc_bss_req(&dev->mt76, &link->mt76, + MT7996_BSS_UPDATE_MAX_SIZE); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + mt7996_mcu_bss_rfch_tlv(skb, phy); + + return mt76_mcu_skb_send_msg(&dev->mt76, skb, + MCU_WMWA_UNI_CMD(BSS_INFO_UPDATE), true); +} + int mt7996_mcu_set_timing(struct mt7996_phy *phy, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf) { @@ -2590,6 +2606,49 @@ int mt7996_mcu_teardown_mld_sta(struct mt7996_dev *dev, MCU_WMWA_UNI_CMD(STA_REC_UPDATE), true); } +void mt7996_mcu_update_sta_rec_bw(void *data, struct ieee80211_sta *sta) +{ + struct mt7996_vif_link *link = (struct mt7996_vif_link *)data; + struct mt7996_sta *msta = (struct mt7996_sta *)sta->drv_priv; + struct mt7996_sta_link *msta_link; + struct mt7996_dev *dev; + struct ieee80211_bss_conf *link_conf; + struct ieee80211_link_sta *link_sta; + struct ieee80211_vif *vif; + struct sk_buff *skb; + int link_id; + + if (link->mt76.mvif != &msta->vif->mt76) + return; + + dev = link->phy->dev; + link_id = link->msta_link.wcid.link_id; + link_sta = link_sta_dereference_protected(sta, link_id); + if (!link_sta) + return; + + msta_link = mt76_dereference(msta->link[link_id], &dev->mt76); + if (!msta_link) + return; + + vif = container_of((void *)msta->vif, struct ieee80211_vif, drv_priv); + link_conf = link_conf_dereference_protected(vif, link_id); + if (!link_conf) + return; + + skb = __mt76_connac_mcu_alloc_sta_req(&dev->mt76, &link->mt76, + &msta_link->wcid, + MT7996_STA_UPDATE_MAX_SIZE); + if (IS_ERR(skb)) + return; + + mt7996_mcu_sta_bfer_tlv(dev, skb, link_conf, link_sta, link); + mt7996_mcu_sta_rate_ctrl_tlv(skb, dev, vif, link_conf, link_sta, link); + + mt76_mcu_skb_send_msg(&dev->mt76, skb, + MCU_WMWA_UNI_CMD(STA_REC_UPDATE), true); +} + static int mt7996_mcu_sta_key_tlv(struct mt76_dev *dev, struct mt76_wcid *wcid, struct sk_buff *skb, diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index d31864f973cc..8f1043159f58 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -670,6 +670,8 @@ int mt7996_mcu_add_bss_info(struct mt7996_phy *phy, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf, struct mt76_vif_link *mlink, struct mt7996_sta_link *msta_link, int enable); +int mt7996_mcu_update_bss_rfch(struct mt7996_phy *phy, + struct mt7996_vif_link *link); int mt7996_mcu_add_sta(struct mt7996_dev *dev, struct ieee80211_bss_conf *link_conf, struct ieee80211_link_sta *link_sta, @@ -679,6 +681,7 @@ int mt7996_mcu_add_sta(struct mt7996_dev *dev, int mt7996_mcu_teardown_mld_sta(struct mt7996_dev *dev, struct mt7996_vif_link *link, struct mt7996_sta_link *msta_link); +void mt7996_mcu_update_sta_rec_bw(void *data, struct ieee80211_sta *sta); int mt7996_mcu_add_tx_ba(struct mt7996_dev *dev, struct ieee80211_ampdu_params *params, struct ieee80211_vif *vif, bool enable); From fdce55c038702ac8f330de0697e907713a1e976b Mon Sep 17 00:00:00 2001 From: StanleyYP Wang Date: Mon, 15 Dec 2025 14:37:25 +0800 Subject: [PATCH 0688/1561] wifi: mt76: mt7996: abort CCA when CSA is starting When CSA countdown is going to start, carry UNI_BSS_INFO_BCN_BCC tag to abort any CCA countdown. Signed-off-by: StanleyYP Wang Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20251215063728.3013365-4-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/mcu.c | 10 ++++++++++ drivers/net/wireless/mediatek/mt76/mt7996/mcu.h | 11 ++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index 5a634b71ed2a..165f87cc7275 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -2796,6 +2796,16 @@ mt7996_mcu_beacon_cntdwn(struct sk_buff *rskb, struct sk_buff *skb, info = (struct bss_bcn_cntdwn_tlv *)tlv; info->cnt = skb->data[offs->cntdwn_counter_offs[0]]; + + /* abort the CCA countdown when starting CSA countdown */ + if (csa) { + struct bss_bcn_cntdwn_tlv *cca_info; + + tlv = mt7996_mcu_add_uni_tlv(rskb, UNI_BSS_INFO_BCN_BCC, + sizeof(*cca_info)); + cca_info = (struct bss_bcn_cntdwn_tlv *)tlv; + cca_info->cca.abort = true; + } } static void diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h index 5b3597ca79be..1283beb0dc19 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h @@ -412,7 +412,16 @@ struct bss_bcn_cntdwn_tlv { __le16 tag; __le16 len; u8 cnt; - u8 rsv[3]; + union { + struct { + bool static_pp; + bool abort; + } csa; + struct { + bool abort; + } cca; + }; + u8 rsv; } __packed; struct bss_bcn_mbss_tlv { From 956d2e65da93f59fb50bc149f2009565bec26f56 Mon Sep 17 00:00:00 2001 From: StanleyYP Wang Date: Mon, 15 Dec 2025 14:37:26 +0800 Subject: [PATCH 0689/1561] wifi: mt76: mt7996: offload radar threshold initialization Since some radar specifications maintained by the driver are incorrect and are now also maintained by the firmware, offload the initialization procedure to the firmware. This fixes issues for radar detection rate testings. Signed-off-by: StanleyYP Wang Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20251215063728.3013365-5-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7996/mac.c | 77 ------------------- .../net/wireless/mediatek/mt76/mt7996/mac.h | 5 -- 2 files changed, 82 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index 2765ac7285b4..77040980dea3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -13,45 +13,6 @@ #define to_rssi(field, rcpi) ((FIELD_GET(field, rcpi) - 220) / 2) -static const struct mt7996_dfs_radar_spec etsi_radar_specs = { - .pulse_th = { 110, -10, -80, 40, 5200, 128, 5200 }, - .radar_pattern = { - [5] = { 1, 0, 6, 32, 28, 0, 990, 5010, 17, 1, 1 }, - [6] = { 1, 0, 9, 32, 28, 0, 615, 5010, 27, 1, 1 }, - [7] = { 1, 0, 15, 32, 28, 0, 240, 445, 27, 1, 1 }, - [8] = { 1, 0, 12, 32, 28, 0, 240, 510, 42, 1, 1 }, - [9] = { 1, 1, 0, 0, 0, 0, 2490, 3343, 14, 0, 0, 12, 32, 28, { }, 126 }, - [10] = { 1, 1, 0, 0, 0, 0, 2490, 3343, 14, 0, 0, 15, 32, 24, { }, 126 }, - [11] = { 1, 1, 0, 0, 0, 0, 823, 2510, 14, 0, 0, 18, 32, 28, { }, 54 }, - [12] = { 1, 1, 0, 0, 0, 0, 823, 2510, 14, 0, 0, 27, 32, 24, { }, 54 }, - }, -}; - -static const struct mt7996_dfs_radar_spec fcc_radar_specs = { - .pulse_th = { 110, -10, -80, 40, 5200, 128, 5200 }, - .radar_pattern = { - [0] = { 1, 0, 8, 32, 28, 0, 508, 3076, 13, 1, 1 }, - [1] = { 1, 0, 12, 32, 28, 0, 140, 240, 17, 1, 1 }, - [2] = { 1, 0, 8, 32, 28, 0, 190, 510, 22, 1, 1 }, - [3] = { 1, 0, 6, 32, 28, 0, 190, 510, 32, 1, 1 }, - [4] = { 1, 0, 9, 255, 28, 0, 323, 343, 13, 1, 32 }, - }, -}; - -static const struct mt7996_dfs_radar_spec jp_radar_specs = { - .pulse_th = { 110, -10, -80, 40, 5200, 128, 5200 }, - .radar_pattern = { - [0] = { 1, 0, 8, 32, 28, 0, 508, 3076, 13, 1, 1 }, - [1] = { 1, 0, 12, 32, 28, 0, 140, 240, 17, 1, 1 }, - [2] = { 1, 0, 8, 32, 28, 0, 190, 510, 22, 1, 1 }, - [3] = { 1, 0, 6, 32, 28, 0, 190, 510, 32, 1, 1 }, - [4] = { 1, 0, 9, 255, 28, 0, 323, 343, 13, 1, 32 }, - [13] = { 1, 0, 7, 32, 28, 0, 3836, 3856, 14, 1, 1 }, - [14] = { 1, 0, 6, 32, 28, 0, 615, 5010, 110, 1, 1 }, - [15] = { 1, 1, 0, 0, 0, 0, 15, 5010, 110, 0, 0, 12, 32, 28 }, - }, -}; - static struct mt76_wcid *mt7996_rx_get_wcid(struct mt7996_dev *dev, u16 idx, u8 band_idx) { @@ -3011,40 +2972,6 @@ static int mt7996_dfs_start_radar_detector(struct mt7996_phy *phy) return err; } -static int -mt7996_dfs_init_radar_specs(struct mt7996_phy *phy) -{ - const struct mt7996_dfs_radar_spec *radar_specs; - struct mt7996_dev *dev = phy->dev; - int err, i; - - switch (dev->mt76.region) { - case NL80211_DFS_FCC: - radar_specs = &fcc_radar_specs; - err = mt7996_mcu_set_fcc5_lpn(dev, 8); - if (err < 0) - return err; - break; - case NL80211_DFS_ETSI: - radar_specs = &etsi_radar_specs; - break; - case NL80211_DFS_JP: - radar_specs = &jp_radar_specs; - break; - default: - return -EINVAL; - } - - for (i = 0; i < ARRAY_SIZE(radar_specs->radar_pattern); i++) { - err = mt7996_mcu_set_radar_th(dev, i, - &radar_specs->radar_pattern[i]); - if (err < 0) - return err; - } - - return mt7996_mcu_set_pulse_th(dev, &radar_specs->pulse_th); -} - int mt7996_dfs_init_radar_detector(struct mt7996_phy *phy) { struct mt7996_dev *dev = phy->dev; @@ -3064,10 +2991,6 @@ int mt7996_dfs_init_radar_detector(struct mt7996_phy *phy) goto stop; if (prev_state <= MT_DFS_STATE_DISABLED) { - err = mt7996_dfs_init_radar_specs(phy); - if (err < 0) - return err; - err = mt7996_dfs_start_radar_detector(phy); if (err < 0) return err; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.h b/drivers/net/wireless/mediatek/mt76/mt7996/mac.h index 4eca37b013fc..70ee30f32f88 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.h @@ -37,9 +37,4 @@ struct mt7996_dfs_pattern { u32 min_stgpr_diff; } __packed; -struct mt7996_dfs_radar_spec { - struct mt7996_dfs_pulse pulse_th; - struct mt7996_dfs_pattern radar_pattern[16]; -}; - #endif From 7f3ec778593f24584dbcf25995f2b651133e956d Mon Sep 17 00:00:00 2001 From: Shayne Chen Date: Mon, 15 Dec 2025 14:37:27 +0800 Subject: [PATCH 0690/1561] wifi: mt76: mt7996: add duplicated WTBL command This is a firmware mechanism to improve packet loss issues for mt7996 and mt7992 chipsets. Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20251215063728.3013365-6-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7996/init.c | 3 +++ .../net/wireless/mediatek/mt76/mt7996/mcu.c | 25 +++++++++++++++++-- .../net/wireless/mediatek/mt76/mt7996/mcu.h | 5 ++++ .../wireless/mediatek/mt76/mt7996/mt7996.h | 1 + 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/init.c b/drivers/net/wireless/mediatek/mt76/mt7996/init.c index 00a8286bd136..7e8bd3b142e7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/init.c @@ -756,6 +756,9 @@ static void mt7996_init_work(struct work_struct *work) mt7996_mcu_set_eeprom(dev); mt7996_mac_init(dev); mt7996_txbf_init(dev); + + if (!is_mt7990(&dev->mt76)) + mt7996_mcu_set_dup_wtbl(dev); } void mt7996_wfsys_reset(struct mt7996_dev *dev) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index 165f87cc7275..aab83ad9c1b5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -4033,7 +4033,6 @@ int mt7996_mcu_get_eeprom_free_block(struct mt7996_dev *dev, u8 *block_num) int mt7996_mcu_get_chip_config(struct mt7996_dev *dev, u32 *cap) { -#define NIC_CAP 3 #define UNI_EVENT_CHIP_CONFIG_EFUSE_VERSION 0x21 struct { u8 _rsv[4]; @@ -4041,7 +4040,7 @@ int mt7996_mcu_get_chip_config(struct mt7996_dev *dev, u32 *cap) __le16 tag; __le16 len; } __packed req = { - .tag = cpu_to_le16(NIC_CAP), + .tag = cpu_to_le16(UNI_CHIP_CONFIG_NIC_CAPA), .len = cpu_to_le16(sizeof(req) - 4), }; struct sk_buff *skb; @@ -5048,3 +5047,25 @@ int mt7996_mcu_cp_support(struct mt7996_dev *dev, u8 mode) return mt76_mcu_send_msg(&dev->mt76, MCU_WA_EXT_CMD(CP_SUPPORT), &cp_mode, sizeof(cp_mode), true); } + +int mt7996_mcu_set_dup_wtbl(struct mt7996_dev *dev) +{ +#define DUP_WTBL_NUM 80 + struct { + u8 _rsv[4]; + + __le16 tag; + __le16 len; + __le16 base; + __le16 num; + u8 _rsv2[4]; + } __packed req = { + .tag = cpu_to_le16(UNI_CHIP_CONFIG_DUP_WTBL), + .len = cpu_to_le16(sizeof(req) - 4), + .base = cpu_to_le16(MT7996_WTBL_STA - DUP_WTBL_NUM + 1), + .num = cpu_to_le16(DUP_WTBL_NUM), + }; + + return mt76_mcu_send_msg(&dev->mt76, MCU_WM_UNI_CMD(CHIP_CONFIG), &req, + sizeof(req), true); +} diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h index 1283beb0dc19..de14394bec22 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h @@ -798,6 +798,11 @@ enum { UNI_CHANNEL_RX_PATH, }; +enum { + UNI_CHIP_CONFIG_NIC_CAPA = 3, + UNI_CHIP_CONFIG_DUP_WTBL = 4, +}; + #define MT7996_BSS_UPDATE_MAX_SIZE (sizeof(struct bss_req_hdr) + \ sizeof(struct mt76_connac_bss_basic_tlv) + \ sizeof(struct bss_rlm_tlv) + \ diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index 8f1043159f58..f850be874b1b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -748,6 +748,7 @@ void mt7996_mcu_exit(struct mt7996_dev *dev); int mt7996_mcu_get_all_sta_info(struct mt7996_phy *phy, u16 tag); int mt7996_mcu_wed_rro_reset_sessions(struct mt7996_dev *dev, u16 id); int mt7996_mcu_set_sniffer_mode(struct mt7996_phy *phy, bool enabled); +int mt7996_mcu_set_dup_wtbl(struct mt7996_dev *dev); static inline bool mt7996_has_hwrro(struct mt7996_dev *dev) { From 5ef0e8e2653b1ba325eb883ffb94073f19cb669a Mon Sep 17 00:00:00 2001 From: Shayne Chen Date: Mon, 15 Dec 2025 14:37:28 +0800 Subject: [PATCH 0691/1561] wifi: mt76: mt7996: fix iface combination for different chipsets MT7992 and MT7990 support up to 19 interfaces per band and 32 in total. Fixes: 8df63a4bbe3d ("wifi: mt76: mt7996: adjust interface num and wtbl size for mt7992") Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20251215063728.3013365-7-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7996/init.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/init.c b/drivers/net/wireless/mediatek/mt76/mt7996/init.c index 7e8bd3b142e7..2e439f0815d4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/init.c @@ -34,6 +34,20 @@ static const struct ieee80211_iface_combination if_comb_global = { BIT(NL80211_CHAN_WIDTH_40) | BIT(NL80211_CHAN_WIDTH_80) | BIT(NL80211_CHAN_WIDTH_160), + .beacon_int_min_gcd = 100, +}; + +static const struct ieee80211_iface_combination if_comb_global_7992 = { + .limits = &if_limits_global, + .n_limits = 1, + .max_interfaces = 32, + .num_different_channels = MT7996_MAX_RADIOS - 1, + .radar_detect_widths = BIT(NL80211_CHAN_WIDTH_20_NOHT) | + BIT(NL80211_CHAN_WIDTH_20) | + BIT(NL80211_CHAN_WIDTH_40) | + BIT(NL80211_CHAN_WIDTH_80) | + BIT(NL80211_CHAN_WIDTH_160), + .beacon_int_min_gcd = 100, }; static const struct ieee80211_iface_limit if_limits[] = { @@ -485,7 +499,8 @@ mt7996_init_wiphy(struct ieee80211_hw *hw, struct mtk_wed_device *wed) hw->vif_data_size = sizeof(struct mt7996_vif); hw->chanctx_data_size = sizeof(struct mt76_chanctx); - wiphy->iface_combinations = &if_comb_global; + wiphy->iface_combinations = is_mt7996(&dev->mt76) ? &if_comb_global : + &if_comb_global_7992; wiphy->n_iface_combinations = 1; wiphy->radio = dev->radios; From bb8e38fcdbf7290d7f0cd572d2d8fdb2b641b492 Mon Sep 17 00:00:00 2001 From: Quan Zhou Date: Thu, 27 Nov 2025 15:49:11 +0800 Subject: [PATCH 0692/1561] wifi: mt76: mt7925: fix AMPDU state handling in mt7925_tx_check_aggr Previously, the AMPDU state bit for a given TID was set before attempting to start a BA session, which could result in the AMPDU state being marked active even if ieee80211_start_tx_ba_session() failed. This patch changes the logic to only set the AMPDU state bit after successfully starting a BA session, ensuring proper synchronization between AMPDU state and BA session status. This fixes potential issues with aggregation state tracking and improves compatibility with mac80211 BA session management. Fixes: 44eb173bdd4f ("wifi: mt76: mt7925: add link handling in mt7925_txwi_free") Cc: stable@vger.kernel.org Signed-off-by: Quan Zhou Reviewed-by: Sean Wang Link: https://patch.msgid.link/d5960fbced0beaf33c30203f7f8fb91d0899c87b.1764228973.git.quan.zhou@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/mac.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c index caaf71c31480..63e58c177d65 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c @@ -882,8 +882,10 @@ static void mt7925_tx_check_aggr(struct ieee80211_sta *sta, struct sk_buff *skb, else mlink = &msta->deflink; - if (!test_and_set_bit(tid, &mlink->wcid.ampdu_state)) - ieee80211_start_tx_ba_session(sta, tid, 0); + if (!test_and_set_bit(tid, &mlink->wcid.ampdu_state)) { + if (ieee80211_start_tx_ba_session(sta, tid, 0)) + clear_bit(tid, &mlink->wcid.ampdu_state); + } } static bool From 524ef4b42b40bf1cf634663e746ace0af3fce45c Mon Sep 17 00:00:00 2001 From: David Bauer Date: Sat, 29 Nov 2025 03:39:02 +0100 Subject: [PATCH 0693/1561] wifi: mt76: mt76x02: wake queues after reconfig The shared reset procedure of MT7610 and MT7612 stop all queues before starting the reset sequence. They however never restart these like other supported mt76 chips do in the reconfig_complete call. This leads to TX not continuing after the reset. Restart queues in the reconfig_complete callback to restore functionality after the reset. Signed-off-by: David Bauer Link: https://patch.msgid.link/20251129023904.288484-1-mail@david-bauer.net Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c index dd71c1c95cc9..dc7c03d23123 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c @@ -534,6 +534,7 @@ void mt76x02_reconfig_complete(struct ieee80211_hw *hw, return; clear_bit(MT76_RESTART, &dev->mphy.state); + ieee80211_wake_queues(hw); } EXPORT_SYMBOL_GPL(mt76x02_reconfig_complete); From 7900da40e315cd1971405ef95e561b0176e0dac2 Mon Sep 17 00:00:00 2001 From: Leon Yen Date: Fri, 26 Sep 2025 13:34:47 +0800 Subject: [PATCH 0694/1561] wifi: mt76: mt7925: introduce CSA support in non-MLO mode Add CSA (Channel Switch Announcement) related implementation in collaboration with mac80211 to deal with dynamic channel switching. Signed-off-by: Leon Yen Signed-off-by: Ming Yen Hsieh Link: https://patch.msgid.link/20250926053447.4036650-1-mingyen.hsieh@mediatek.com Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7925/main.c | 139 ++++++++++++++++++ .../wireless/mediatek/mt76/mt7925/mt7925.h | 1 + .../net/wireless/mediatek/mt76/mt792x_core.c | 5 +- 3 files changed, 142 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index 2d358a96640c..9861c1fde1ae 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -245,6 +245,7 @@ int mt7925_init_mlo_caps(struct mt792x_phy *phy) { struct wiphy *wiphy = phy->mt76->hw->wiphy; static const u8 ext_capa_sta[] = { + [0] = WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING, [2] = WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT, [7] = WLAN_EXT_CAPA8_OPMODE_NOTIF, }; @@ -438,6 +439,9 @@ mt7925_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) if (phy->chip_cap & MT792x_CHIP_CAP_RSSI_NOTIFY_EVT_EN) vif->driver_flags |= IEEE80211_VIF_SUPPORTS_CQM_RSSI; + INIT_WORK(&mvif->csa_work, mt7925_csa_work); + timer_setup(&mvif->csa_timer, mt792x_csa_timer, 0); + out: mt792x_mutex_release(dev); @@ -1749,6 +1753,10 @@ static int mt7925_add_chanctx(struct ieee80211_hw *hw, struct ieee80211_chanctx_conf *ctx) { + struct mt792x_dev *dev = mt792x_hw_dev(hw); + + dev->new_ctx = ctx; + return 0; } @@ -1756,6 +1764,11 @@ static void mt7925_remove_chanctx(struct ieee80211_hw *hw, struct ieee80211_chanctx_conf *ctx) { + struct mt792x_dev *dev = mt792x_hw_dev(hw); + + if (dev->new_ctx == ctx) + dev->new_ctx = NULL; + } static void @@ -2144,6 +2157,11 @@ static void mt7925_unassign_vif_chanctx(struct ieee80211_hw *hw, mctx->bss_conf = NULL; mconf->mt76.ctx = NULL; mutex_unlock(&dev->mt76.mutex); + + if (link_conf->csa_active) { + timer_delete_sync(&mvif->csa_timer); + cancel_work_sync(&mvif->csa_work); + } } static void mt7925_rfkill_poll(struct ieee80211_hw *hw) @@ -2158,6 +2176,121 @@ static void mt7925_rfkill_poll(struct ieee80211_hw *hw) wiphy_rfkill_set_hw_state(hw->wiphy, ret == 0); } +static int mt7925_switch_vif_chanctx(struct ieee80211_hw *hw, + struct ieee80211_vif_chanctx_switch *vifs, + int n_vifs, + enum ieee80211_chanctx_switch_mode mode) +{ + return mt7925_assign_vif_chanctx(hw, vifs->vif, vifs->link_conf, + vifs->new_ctx); +} + +void mt7925_csa_work(struct work_struct *work) +{ + struct mt792x_vif *mvif; + struct mt792x_dev *dev; + struct ieee80211_vif *vif; + struct ieee80211_bss_conf *link_conf; + struct mt792x_bss_conf *mconf; + u8 link_id, roc_rtype; + int ret = 0; + + mvif = (struct mt792x_vif *)container_of(work, struct mt792x_vif, + csa_work); + dev = mvif->phy->dev; + vif = container_of((void *)mvif, struct ieee80211_vif, drv_priv); + + if (ieee80211_vif_is_mld(vif)) + return; + + if (!dev->new_ctx) + return; + + link_id = 0; + mconf = &mvif->bss_conf; + link_conf = &vif->bss_conf; + roc_rtype = MT7925_ROC_REQ_JOIN; + + mt792x_mutex_acquire(dev); + ret = mt7925_set_roc(mvif->phy, mconf, dev->new_ctx->def.chan, + 4000, roc_rtype); + mt792x_mutex_release(dev); + if (!ret) { + mt792x_mutex_acquire(dev); + ret = mt7925_mcu_set_chctx(mvif->phy->mt76, &mconf->mt76, link_conf, + dev->new_ctx); + mt792x_mutex_release(dev); + + mt7925_abort_roc(mvif->phy, mconf); + } + + ieee80211_chswitch_done(vif, !ret, link_id); +} + +static int mt7925_pre_channel_switch(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_channel_switch *chsw) +{ + if (ieee80211_vif_is_mld(vif)) + return -EOPNOTSUPP; + + if (vif->type != NL80211_IFTYPE_STATION || !vif->cfg.assoc) + return -EOPNOTSUPP; + + if (!cfg80211_chandef_usable(hw->wiphy, &chsw->chandef, + IEEE80211_CHAN_DISABLED)) + return -EOPNOTSUPP; + + return 0; +} + +static void mt7925_channel_switch(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_channel_switch *chsw) +{ + struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv; + u16 beacon_interval; + + if (ieee80211_vif_is_mld(vif)) + return; + + beacon_interval = vif->bss_conf.beacon_int; + + mvif->csa_timer.expires = TU_TO_EXP_TIME(beacon_interval * chsw->count); + add_timer(&mvif->csa_timer); +} + +static void mt7925_abort_channel_switch(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf) +{ + struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv; + + timer_delete_sync(&mvif->csa_timer); + cancel_work_sync(&mvif->csa_work); +} + +static void mt7925_channel_switch_rx_beacon(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_channel_switch *chsw) +{ + struct mt792x_dev *dev = mt792x_hw_dev(hw); + struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv; + u16 beacon_interval; + + if (ieee80211_vif_is_mld(vif)) + return; + + beacon_interval = vif->bss_conf.beacon_int; + + if (cfg80211_chandef_identical(&chsw->chandef, + &dev->new_ctx->def) && + chsw->count) { + mod_timer(&mvif->csa_timer, + TU_TO_EXP_TIME(beacon_interval * chsw->count)); + } +} + const struct ieee80211_ops mt7925_ops = { .tx = mt792x_tx, .start = mt7925_start, @@ -2221,6 +2354,12 @@ const struct ieee80211_ops mt7925_ops = { .change_vif_links = mt7925_change_vif_links, .change_sta_links = mt7925_change_sta_links, .rfkill_poll = mt7925_rfkill_poll, + + .switch_vif_chanctx = mt7925_switch_vif_chanctx, + .pre_channel_switch = mt7925_pre_channel_switch, + .channel_switch = mt7925_channel_switch, + .abort_channel_switch = mt7925_abort_channel_switch, + .channel_switch_rx_beacon = mt7925_channel_switch_rx_beacon, }; EXPORT_SYMBOL_GPL(mt7925_ops); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h b/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h index 6b9bf1b89032..5030d7714bcf 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h @@ -298,6 +298,7 @@ int mt7925_mcu_uni_rx_ba(struct mt792x_dev *dev, void mt7925_mlo_pm_work(struct work_struct *work); void mt7925_scan_work(struct work_struct *work); void mt7925_roc_work(struct work_struct *work); +void mt7925_csa_work(struct work_struct *work); int mt7925_mcu_uni_bss_ps(struct mt792x_dev *dev, struct ieee80211_bss_conf *link_conf); void mt7925_coredump_work(struct work_struct *work); diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_core.c b/drivers/net/wireless/mediatek/mt76/mt792x_core.c index f2ed16feb6c1..f318a53e4deb 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_core.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_core.c @@ -691,9 +691,8 @@ int mt792x_init_wiphy(struct ieee80211_hw *hw) ieee80211_hw_set(hw, SUPPORTS_MULTI_BSSID); ieee80211_hw_set(hw, SUPPORTS_ONLY_HE_MULTI_BSSID); - if (is_mt7921(&dev->mt76)) { - ieee80211_hw_set(hw, CHANCTX_STA_CSA); - } + ieee80211_hw_set(hw, CHANCTX_STA_CSA); + if (dev->pm.enable) ieee80211_hw_set(hw, CONNECTION_MONITOR); From 0cd776fdccec526ee1f45c81d00da8a316b6e892 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 28 Nov 2025 17:44:30 +0000 Subject: [PATCH 0695/1561] wifi: mt76: mt7996: Fix spelling mistake "retriving" -> "retrieving" There are a handful of spelling mistakes in various warning messages. Fix them. Signed-off-by: Colin Ian King Link: https://patch.msgid.link/20251128174430.318838-1-colin.i.king@gmail.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/npu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/npu.c b/drivers/net/wireless/mediatek/mt76/mt7996/npu.c index 29bb735da4cb..1422533e59c7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/npu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/npu.c @@ -120,7 +120,7 @@ static int mt7996_npu_rxd_init(struct mt7996_dev *dev, struct airoha_npu *npu) &val, GFP_KERNEL); if (err) { dev_warn(dev->mt76.dev, - "failed retriving NPU wlan rx ring0 addr\n"); + "failed retrieving NPU wlan rx ring0 addr\n"); return err; } writel(val, &dev->mt76.q_rx[MT_RXQ_RRO_BAND0].regs->desc_base); @@ -129,7 +129,7 @@ static int mt7996_npu_rxd_init(struct mt7996_dev *dev, struct airoha_npu *npu) &val, GFP_KERNEL); if (err) { dev_warn(dev->mt76.dev, - "failed retriving NPU wlan rx ring1 addr\n"); + "failed retrieving NPU wlan rx ring1 addr\n"); return err; } writel(val, &dev->mt76.q_rx[MT_RXQ_RRO_BAND1].regs->desc_base); @@ -138,7 +138,7 @@ static int mt7996_npu_rxd_init(struct mt7996_dev *dev, struct airoha_npu *npu) &val, GFP_KERNEL); if (err) { dev_warn(dev->mt76.dev, - "failed retriving NPU wlan rxdmad_c ring addr\n"); + "failed retrieving NPU wlan rxdmad_c ring addr\n"); return err; } writel(val, &dev->mt76.q_rx[MT_RXQ_RRO_RXDMAD_C].regs->desc_base); @@ -159,7 +159,7 @@ static int mt7996_npu_txd_init(struct mt7996_dev *dev, struct airoha_npu *npu) &val, GFP_KERNEL); if (err) { dev_warn(dev->mt76.dev, - "failed retriving NPU wlan tx ring addr\n"); + "failed retrieving NPU wlan tx ring addr\n"); return err; } writel(val, &dev->mt76.phys[i]->q_tx[0]->regs->desc_base); From 654abcbe4528f74428b69292fad5c4224414fa1b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 5 Dec 2025 11:24:36 +0100 Subject: [PATCH 0696/1561] wifi: mt76: mt7996: Set mtxq->wcid just for primary link Set WCID index in mt76_txq struct just for the primary link in mt7996_vif_link_add routine. Fixes: 69d54ce7491d0 ("wifi: mt76: mt7996: switch to single multi-radio wiphy") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20251205-mt76-txq-wicd-fix-v2-1-f19ba48af7c1@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/main.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 393823368bed..d646d1ef8f82 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -301,7 +301,6 @@ int mt7996_vif_link_add(struct mt76_phy *mphy, struct ieee80211_vif *vif, .cmd = SET_KEY, .link_id = link_conf->link_id, }; - struct mt76_txq *mtxq; int mld_idx, idx, ret; mlink->idx = __ffs64(~dev->mt76.vif_mask); @@ -344,11 +343,6 @@ int mt7996_vif_link_add(struct mt76_phy *mphy, struct ieee80211_vif *vif, mt7996_mac_wtbl_update(dev, idx, MT_WTBL_UPDATE_ADM_COUNT_CLEAR); - if (vif->txq) { - mtxq = (struct mt76_txq *)vif->txq->drv_priv; - mtxq->wcid = idx; - } - if (vif->type != NL80211_IFTYPE_AP && (!mlink->omac_idx || mlink->omac_idx > 3)) vif->offload_flags = 0; @@ -371,9 +365,13 @@ int mt7996_vif_link_add(struct mt76_phy *mphy, struct ieee80211_vif *vif, ieee80211_iter_keys(mphy->hw, vif, mt7996_key_iter, &it); - if (!mlink->wcid->offchannel && - mvif->mt76.deflink_id == IEEE80211_LINK_UNSPECIFIED) + if (vif->txq && !mlink->wcid->offchannel && + mvif->mt76.deflink_id == IEEE80211_LINK_UNSPECIFIED) { + struct mt76_txq *mtxq = (struct mt76_txq *)vif->txq->drv_priv; + mvif->mt76.deflink_id = link_conf->link_id; + mtxq->wcid = idx; + } return 0; } From 751a2679b15e3a0fa8fc9175862f0ec40643db68 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 5 Dec 2025 11:24:37 +0100 Subject: [PATCH 0697/1561] wifi: mt76: mt7996: Reset mtxq->idx if primary link is removed in mt7996_vif_link_remove() Reset WCID index in mt76_txq struct if primary link is removed in mt7996_vif_link_remove routine. Fixes: a3316d2fc669f ("wifi: mt76: mt7996: set vif default link_id adding/removing vif links") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20251205-mt76-txq-wicd-fix-v2-2-f19ba48af7c1@kernel.org Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7996/main.c | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index d646d1ef8f82..8a610e0e9bae 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -402,17 +402,28 @@ void mt7996_vif_link_remove(struct mt76_phy *mphy, struct ieee80211_vif *vif, rcu_assign_pointer(dev->mt76.wcid[idx], NULL); - if (!mlink->wcid->offchannel && + if (vif->txq && !mlink->wcid->offchannel && mvif->mt76.deflink_id == link_conf->link_id) { struct ieee80211_bss_conf *iter; + struct mt76_txq *mtxq; unsigned int link_id; mvif->mt76.deflink_id = IEEE80211_LINK_UNSPECIFIED; + mtxq = (struct mt76_txq *)vif->txq->drv_priv; + /* Primary link will be removed, look for a new one */ for_each_vif_active_link(vif, iter, link_id) { - if (link_id != IEEE80211_LINK_UNSPECIFIED) { - mvif->mt76.deflink_id = link_id; - break; - } + struct mt7996_vif_link *link; + + if (link_id == link_conf->link_id) + continue; + + link = mt7996_vif_link(dev, vif, link_id); + if (!link) + continue; + + mtxq->wcid = link->msta_link.wcid.idx; + mvif->mt76.deflink_id = link_id; + break; } } From 5ef44c200618430b004233cbfc1b0929a13d5ac8 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 5 Dec 2025 11:24:38 +0100 Subject: [PATCH 0698/1561] wifi: mt76: mt7996: Switch to the secondary link if the default one is removed Switch to the secondary link if available in mt7996_mac_sta_remove_links routine if the primary one is removed. Moreover reset secondary link index for single link scenario. Fixes: 85cd5534a3f2e ("wifi: mt76: mt7996: use correct link_id when filling TXD and TXP") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20251205-mt76-txq-wicd-fix-v2-3-f19ba48af7c1@kernel.org Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7996/mac.c | 12 ++--- .../net/wireless/mediatek/mt76/mt7996/main.c | 51 +++++++++++++------ 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index 77040980dea3..cf7b0f290328 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -2365,14 +2365,12 @@ mt7996_mac_reset_sta_iter(void *data, struct ieee80211_sta *sta) continue; mt7996_mac_sta_deinit_link(dev, msta_link); - - if (msta->deflink_id == i) { - msta->deflink_id = IEEE80211_LINK_UNSPECIFIED; - continue; - } - - kfree_rcu(msta_link, rcu_head); + if (msta_link != &msta->deflink) + kfree_rcu(msta_link, rcu_head); } + + msta->deflink_id = IEEE80211_LINK_UNSPECIFIED; + msta->seclink_id = msta->deflink_id; } static void diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 8a610e0e9bae..3585a9674adc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -994,6 +994,22 @@ out: return ret; } +static void +mt7996_sta_init_txq_wcid(struct ieee80211_sta *sta, int idx) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(sta->txq); i++) { + struct mt76_txq *mtxq; + + if (!sta->txq[i]) + continue; + + mtxq = (struct mt76_txq *)sta->txq[i]->drv_priv; + mtxq->wcid = idx; + } +} + static int mt7996_mac_sta_init_link(struct mt7996_dev *dev, struct ieee80211_bss_conf *link_conf, @@ -1011,21 +1027,10 @@ mt7996_mac_sta_init_link(struct mt7996_dev *dev, return -ENOSPC; if (msta->deflink_id == IEEE80211_LINK_UNSPECIFIED) { - int i; - msta_link = &msta->deflink; msta->deflink_id = link_id; msta->seclink_id = msta->deflink_id; - - for (i = 0; i < ARRAY_SIZE(sta->txq); i++) { - struct mt76_txq *mtxq; - - if (!sta->txq[i]) - continue; - - mtxq = (struct mt76_txq *)sta->txq[i]->drv_priv; - mtxq->wcid = idx; - } + mt7996_sta_init_txq_wcid(sta, idx); } else { msta_link = kzalloc_obj(*msta_link); if (!msta_link) @@ -1108,12 +1113,28 @@ mt7996_mac_sta_remove_links(struct mt7996_dev *dev, struct ieee80211_vif *vif, mphy->num_sta--; if (msta->deflink_id == link_id) { msta->deflink_id = IEEE80211_LINK_UNSPECIFIED; - continue; + if (msta->seclink_id == link_id) { + /* no secondary link available */ + msta->seclink_id = msta->deflink_id; + } else { + struct mt7996_sta_link *msta_seclink; + + /* switch to the secondary link */ + msta_seclink = mt76_dereference( + msta->link[msta->seclink_id], + mdev); + if (msta_seclink) { + msta->deflink_id = msta->seclink_id; + mt7996_sta_init_txq_wcid(sta, + msta_seclink->wcid.idx); + } + } } else if (msta->seclink_id == link_id) { - msta->seclink_id = IEEE80211_LINK_UNSPECIFIED; + msta->seclink_id = msta->deflink_id; } - kfree_rcu(msta_link, rcu_head); + if (msta_link != &msta->deflink) + kfree_rcu(msta_link, rcu_head); } } From 88973240dc7c976dd320b36a9e6d925c9be083ae Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 5 Dec 2025 11:24:39 +0100 Subject: [PATCH 0699/1561] wifi: mt76: mt7996: Clear wcid pointer in mt7996_mac_sta_deinit_link() Clear WCID pointer removing the sta link in mt7996_mac_sta_deinit_link routine. Fixes: dd82a9e02c054 ("wifi: mt76: mt7996: Rely on mt7996_sta_link in sta_add/sta_remove callbacks") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20251205-mt76-txq-wicd-fix-v2-4-f19ba48af7c1@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 3585a9674adc..583724c31099 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -1076,6 +1076,7 @@ void mt7996_mac_sta_deinit_link(struct mt7996_dev *dev, list_del_init(&msta_link->rc_list); spin_unlock_bh(&dev->mt76.sta_poll_lock); + rcu_assign_pointer(dev->mt76.wcid[msta_link->wcid.idx], NULL); mt76_wcid_cleanup(&dev->mt76, &msta_link->wcid); mt76_wcid_mask_clear(dev->mt76.wcid_mask, msta_link->wcid.idx); } From c0747db7c10c2dfbdcff0e8e97021e3df1f1e362 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 14 Dec 2025 10:55:30 +0100 Subject: [PATCH 0700/1561] wifi: mt76: mt7996: Reset ampdu_state state in case of failure in mt7996_tx_check_aggr() Reset the ampdu_state configured state if ieee80211_start_tx_ba_session routine fails in mt7996_tx_check_aggr() Fixes: 98686cd21624c ("wifi: mt76: mt7996: add driver for MediaTek Wi-Fi 7 (802.11be) devices") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20251214-mt7996-aggr-check-fix-v1-1-33a8b62ec0fc@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/mac.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index cf7b0f290328..0ca908b87a46 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -1231,8 +1231,9 @@ mt7996_tx_check_aggr(struct ieee80211_link_sta *link_sta, if (unlikely(fc != (IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_DATA))) return; - if (!test_and_set_bit(tid, &wcid->ampdu_state)) - ieee80211_start_tx_ba_session(link_sta->sta, tid, 0); + if (!test_and_set_bit(tid, &wcid->ampdu_state) && + ieee80211_start_tx_ba_session(link_sta->sta, tid, 0)) + clear_bit(tid, &wcid->ampdu_state); } static void From 53ffffeb9624ffab6d9a3b1da8635a23f1172b5e Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Mon, 15 Dec 2025 18:59:30 -0600 Subject: [PATCH 0701/1561] wifi: mt76: mt7921: Reset ampdu_state state in case of failure in mt76_connac2_tx_check_aggr() Reset ampdu_state if ieee80211_start_tx_ba_session() fails in mt76_connac2_tx_check_aggr(), otherwise the driver may incorrectly assume aggregation is active and skip future BA setup attempts. Fixes: 163f4d22c118 ("mt76: mt7921: add MAC support") Signed-off-by: Sean Wang Link: https://patch.msgid.link/20251216005930.9412-1-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c index f946ddc20a47..c46691248513 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c @@ -1153,8 +1153,10 @@ void mt76_connac2_tx_check_aggr(struct ieee80211_sta *sta, __le32 *txwi) return; wcid = (struct mt76_wcid *)sta->drv_priv; - if (!test_and_set_bit(tid, &wcid->ampdu_state)) - ieee80211_start_tx_ba_session(sta, tid, 0); + if (!test_and_set_bit(tid, &wcid->ampdu_state)) { + if (ieee80211_start_tx_ba_session(sta, tid, 0)) + clear_bit(tid, &wcid->ampdu_state); + } } EXPORT_SYMBOL_GPL(mt76_connac2_tx_check_aggr); From 1695f662329faa07c860c73453c097823852df28 Mon Sep 17 00:00:00 2001 From: Leon Yen Date: Thu, 11 Dec 2025 20:38:36 +0800 Subject: [PATCH 0702/1561] wifi: mt76: mt7925: Fix incorrect MLO mode in firmware control The selection of MLO mode should depend on the capabilities of the STA rather than those of the peer AP to avoid compatibility issues with certain APs, such as Xiaomi BE5000 WiFi7 router. Fixes: 69acd6d910b0c ("wifi: mt76: mt7925: add mt7925_change_vif_links") Signed-off-by: Leon Yen Link: https://patch.msgid.link/20251211123836.4169436-1-leon.yen@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/main.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7925/mcu.c | 9 ++++++--- drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h | 4 ++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index 9861c1fde1ae..1fea2e807f77 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -545,7 +545,7 @@ static int mt7925_set_mlo_roc(struct mt792x_phy *phy, phy->roc_grant = false; - err = mt7925_mcu_set_mlo_roc(mconf, sel_links, 5, ++phy->roc_token_id); + err = mt7925_mcu_set_mlo_roc(phy, mconf, sel_links, 5, ++phy->roc_token_id); if (err < 0) { clear_bit(MT76_STATE_ROC, &phy->mt76->state); goto out; diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index cf0fdea45cf7..8d9d2c1b83ac 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -1294,8 +1294,8 @@ int mt7925_mcu_add_key(struct mt76_dev *dev, struct ieee80211_vif *vif, return mt76_mcu_skb_send_msg(dev, skb, mcu_cmd, true); } -int mt7925_mcu_set_mlo_roc(struct mt792x_bss_conf *mconf, u16 sel_links, - int duration, u8 token_id) +int mt7925_mcu_set_mlo_roc(struct mt792x_phy *phy, struct mt792x_bss_conf *mconf, + u16 sel_links, int duration, u8 token_id) { struct mt792x_vif *mvif = mconf->vif; struct ieee80211_vif *vif = container_of((void *)mvif, @@ -1330,6 +1330,8 @@ int mt7925_mcu_set_mlo_roc(struct mt792x_bss_conf *mconf, u16 sel_links, .roc[1].len = cpu_to_le16(sizeof(struct roc_acquire_tlv)) }; + struct wiphy *wiphy = phy->mt76->hw->wiphy; + if (!mconf || hweight16(vif->valid_links) < 2 || hweight16(sel_links) != 2) return -EPERM; @@ -1352,7 +1354,8 @@ int mt7925_mcu_set_mlo_roc(struct mt792x_bss_conf *mconf, u16 sel_links, is_AG_band |= links[i].chan->band == NL80211_BAND_2GHZ; } - if (vif->cfg.eml_cap & IEEE80211_EML_CAP_EMLSR_SUPP) + if (!(wiphy->iftype_ext_capab[0].mld_capa_and_ops & + IEEE80211_MLD_CAP_OP_MAX_SIMUL_LINKS)) type = is_AG_band ? MT7925_ROC_REQ_MLSR_AG : MT7925_ROC_REQ_MLSR_AA; else diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h b/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h index 5030d7714bcf..0f0eff748bb7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h @@ -350,8 +350,8 @@ int mt7925_set_tx_sar_pwr(struct ieee80211_hw *hw, int mt7925_mcu_regval(struct mt792x_dev *dev, u32 regidx, u32 *val, bool set); int mt7925_mcu_set_clc(struct mt792x_dev *dev, u8 *alpha2, enum environment_cap env_cap); -int mt7925_mcu_set_mlo_roc(struct mt792x_bss_conf *mconf, u16 sel_links, - int duration, u8 token_id); +int mt7925_mcu_set_mlo_roc(struct mt792x_phy *phy, struct mt792x_bss_conf *mconf, + u16 sel_links, int duration, u8 token_id); int mt7925_mcu_set_roc(struct mt792x_phy *phy, struct mt792x_bss_conf *mconf, struct ieee80211_channel *chan, int duration, enum mt7925_roc_req type, u8 token_id); From bb2f07819d063a58756186cac6465341956ac0a4 Mon Sep 17 00:00:00 2001 From: Leon Yen Date: Mon, 15 Dec 2025 20:22:31 +0800 Subject: [PATCH 0703/1561] wifi: mt76: mt792x: Fix a potential deadlock in high-load situations A deadlock may occur between two works, ps_work and mac_work, if their work functions run simultaneously as they attempt to cancel each other by calling cancel_delayed_work_sync(). mt792x_mac_work() -> ... -> cancel_delayed_work_sync(&pm->ps_work); mt792x_pm_power_save_work() -> cancel_delayed_work_sync(&mphy->mac_work); In high-load situations, they are queued but may not have chance to be executed until the CPUs are released. Once the CPUs are available, there is a high possibility that the ps_work function and mac_work function will be executed simultaneously, resulting in a possible deadlock. This patch replaces cancel_delayed_work_sync() with cancel_delayed_work() in ps_work to eliminate the deadlock and make the code easier to maintain. Signed-off-by: Leon Yen Tested-by: Chia-Lin Kao (AceLan) Link: https://patch.msgid.link/20251215122231.3180648-1-leon.yen@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt792x_mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_mac.c b/drivers/net/wireless/mediatek/mt76/mt792x_mac.c index 71dec93094eb..888e5a505673 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_mac.c @@ -375,7 +375,7 @@ void mt792x_pm_power_save_work(struct work_struct *work) } if (!mt792x_mcu_fw_pmctrl(dev)) { - cancel_delayed_work_sync(&mphy->mac_work); + cancel_delayed_work(&mphy->mac_work); return; } out: From dcfbd5d3b82d3b5e94df3761c4d25086cab08c38 Mon Sep 17 00:00:00 2001 From: Christian Hewitt Date: Sat, 27 Dec 2025 11:22:19 +0000 Subject: [PATCH 0704/1561] wifi: mt7601u: check multiple firmware paths The linux-firmware repo moved mt7601u.bin from its root folder to the mediatek sub-folder some time ago, but the driver still tries to load firmware from the old location. Users might have firmware in either location so update the driver to check both. Signed-off-by: Christian Hewitt Link: https://patch.msgid.link/20251227112219.2768439-1-christianshewitt@gmail.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt7601u/mcu.c | 15 ++++++++++++++- drivers/net/wireless/mediatek/mt7601u/usb.h | 1 + 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt7601u/mcu.c b/drivers/net/wireless/mediatek/mt7601u/mcu.c index 1b5cc271a9e1..bad6ca821400 100644 --- a/drivers/net/wireless/mediatek/mt7601u/mcu.c +++ b/drivers/net/wireless/mediatek/mt7601u/mcu.c @@ -403,12 +403,18 @@ error: return ret; } +static const char * const mt7601u_fw_paths[] = { + "mediatek/" MT7601U_FIRMWARE, + MT7601U_FIRMWARE, +}; + static int mt7601u_load_firmware(struct mt7601u_dev *dev) { const struct firmware *fw; const struct mt76_fw_header *hdr; int len, ret; u32 val; + int i; mt7601u_wr(dev, MT_USB_DMA_CFG, (MT_USB_DMA_CFG_RX_BULK_EN | MT_USB_DMA_CFG_TX_BULK_EN)); @@ -416,7 +422,14 @@ static int mt7601u_load_firmware(struct mt7601u_dev *dev) if (firmware_running(dev)) return firmware_request_cache(dev->dev, MT7601U_FIRMWARE); - ret = request_firmware(&fw, MT7601U_FIRMWARE, dev->dev); + /* Try loading firmware from multiple locations */ + fw = NULL; + for (i = 0; i < MT7601U_FIRMWARE_PATHS; i++) { + ret = request_firmware(&fw, mt7601u_fw_paths[i], dev->dev); + if (ret == 0) + break; + } + if (ret) return ret; diff --git a/drivers/net/wireless/mediatek/mt7601u/usb.h b/drivers/net/wireless/mediatek/mt7601u/usb.h index 9fdf35970339..723025f84483 100644 --- a/drivers/net/wireless/mediatek/mt7601u/usb.h +++ b/drivers/net/wireless/mediatek/mt7601u/usb.h @@ -9,6 +9,7 @@ #include "mt7601u.h" #define MT7601U_FIRMWARE "mt7601u.bin" +#define MT7601U_FIRMWARE_PATHS ARRAY_SIZE(mt7601u_fw_paths) #define MT_VEND_REQ_MAX_RETRY 10 #define MT_VEND_REQ_TOUT_MS 300 From 1974a67d9b65c29a0a9426e32e8cd8c056de48b7 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Wed, 21 Jan 2026 09:41:56 -0800 Subject: [PATCH 0705/1561] wifi: mt76: mt7615: fix use_cts_prot support Driver should not directly write WTBL to prevent overwritten issues. With this fix, when driver needs to adjust its behavior for compatibility, especially concerning older 11g/n devices, by enabling or disabling CTS protection frames, often for hidden SSIDs or to manage legacy clients. Fixes: e34235ccc5e3 ("wifi: mt76: mt7615: enable use_cts_prot support") Signed-off-by: Ryder Lee Link: https://patch.msgid.link/edb87088b0111b32fafc6c4179f54a5286dd37d8.1768879119.git.ryder.lee@mediatek.com Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7615/mac.c | 15 ------ .../net/wireless/mediatek/mt76/mt7615/main.c | 7 +-- .../net/wireless/mediatek/mt76/mt7615/mcu.c | 47 +++++++++++++++++++ .../wireless/mediatek/mt76/mt7615/mt7615.h | 5 +- .../net/wireless/mediatek/mt76/mt7615/regs.h | 2 - 5 files changed, 53 insertions(+), 23 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c index 45992fdcec60..ce0051468501 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c @@ -1167,21 +1167,6 @@ void mt7615_mac_set_rates(struct mt7615_phy *phy, struct mt7615_sta *sta, } EXPORT_SYMBOL_GPL(mt7615_mac_set_rates); -void mt7615_mac_enable_rtscts(struct mt7615_dev *dev, - struct ieee80211_vif *vif, bool enable) -{ - struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv; - u32 addr; - - addr = mt7615_mac_wtbl_addr(dev, mvif->sta.wcid.idx) + 3 * 4; - - if (enable) - mt76_set(dev, addr, MT_WTBL_W3_RTS); - else - mt76_clear(dev, addr, MT_WTBL_W3_RTS); -} -EXPORT_SYMBOL_GPL(mt7615_mac_enable_rtscts); - static int mt7615_mac_wtbl_update_key(struct mt7615_dev *dev, struct mt76_wcid *wcid, struct ieee80211_key_conf *key, diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c index 727266892c3d..fc619acbb40d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c @@ -583,9 +583,6 @@ static void mt7615_bss_info_changed(struct ieee80211_hw *hw, } } - if (changed & BSS_CHANGED_ERP_CTS_PROT) - mt7615_mac_enable_rtscts(dev, vif, info->use_cts_prot); - if (changed & BSS_CHANGED_BEACON_ENABLED && info->enable_beacon) { mt7615_mcu_add_bss_info(phy, vif, NULL, true); mt7615_mcu_sta_add(phy, vif, NULL, true); @@ -598,6 +595,10 @@ static void mt7615_bss_info_changed(struct ieee80211_hw *hw, BSS_CHANGED_BEACON_ENABLED)) mt7615_mcu_add_beacon(dev, hw, vif, info->enable_beacon); + if (changed & BSS_CHANGED_HT || changed & BSS_CHANGED_ERP_CTS_PROT) + mt7615_mcu_set_protection(phy, vif, info->ht_operation_mode, + info->use_cts_prot); + if (changed & BSS_CHANGED_PS) mt76_connac_mcu_set_vif_ps(&dev->mt76, vif); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c index fc0054f8bd60..ff57ede87f71 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c @@ -2564,3 +2564,50 @@ int mt7615_mcu_set_roc(struct mt7615_phy *phy, struct ieee80211_vif *vif, return mt76_mcu_send_msg(&dev->mt76, MCU_CE_CMD(SET_ROC), &req, sizeof(req), false); } + +int mt7615_mcu_set_protection(struct mt7615_phy *phy, struct ieee80211_vif *vif, + u8 ht_mode, bool use_cts_prot) +{ + struct mt7615_dev *dev = phy->dev; + struct { + u8 prot_idx; + u8 band; + u8 rsv[2]; + + bool long_nav; + bool prot_mm; + bool prot_gf; + bool prot_bw40; + bool prot_rifs; + bool prot_bw80; + bool prot_bw160; + u8 prot_erp_mask; + } __packed req = { + .prot_idx = 0x2, + .band = phy != &dev->phy, + }; + + switch (ht_mode & IEEE80211_HT_OP_MODE_PROTECTION) { + case IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER: + case IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED: + req.prot_mm = true; + req.prot_gf = true; + fallthrough; + case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ: + req.prot_bw40 = true; + break; + } + + if (ht_mode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT) + req.prot_gf = true; + + if (use_cts_prot) { + struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv; + u8 i = mvif->mt76.omac_idx > HW_BSSID_MAX ? HW_BSSID_0 : mvif->mt76.omac_idx; + + req.prot_erp_mask = BIT(i); + } + + return mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD(PROTECT_CTRL), &req, + sizeof(req), true); +} diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h index c93fd245c90f..391928405f32 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h @@ -467,8 +467,6 @@ void mt7615_mac_reset_counters(struct mt7615_phy *phy); void mt7615_mac_cca_stats_reset(struct mt7615_phy *phy); void mt7615_mac_set_scs(struct mt7615_phy *phy, bool enable); void mt7615_mac_enable_nf(struct mt7615_dev *dev, bool ext_phy); -void mt7615_mac_enable_rtscts(struct mt7615_dev *dev, - struct ieee80211_vif *vif, bool enable); void mt7615_mac_sta_poll(struct mt7615_dev *dev); int mt7615_mac_write_txwi(struct mt7615_dev *dev, __le32 *txwi, struct sk_buff *skb, struct mt76_wcid *wcid, @@ -523,7 +521,8 @@ int mt7615_mcu_set_sku_en(struct mt7615_phy *phy, bool enable); int mt7615_mcu_apply_rx_dcoc(struct mt7615_phy *phy); int mt7615_mcu_apply_tx_dpd(struct mt7615_phy *phy); int mt7615_dfs_init_radar_detector(struct mt7615_phy *phy); - +int mt7615_mcu_set_protection(struct mt7615_phy *phy, struct ieee80211_vif *vif, + u8 ht_mode, bool use_cts_prot); int mt7615_mcu_set_roc(struct mt7615_phy *phy, struct ieee80211_vif *vif, struct ieee80211_channel *chan, int duration); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h index eb3c24d51987..e4133e9181d0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h +++ b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h @@ -455,8 +455,6 @@ enum mt7615_reg_base { #define MT_WTBL_RIUCR3_RATE6 GENMASK(19, 8) #define MT_WTBL_RIUCR3_RATE7 GENMASK(31, 20) -#define MT_WTBL_W3_RTS BIT(22) - #define MT_WTBL_W5_CHANGE_BW_RATE GENMASK(7, 5) #define MT_WTBL_W5_SHORT_GI_20 BIT(8) #define MT_WTBL_W5_SHORT_GI_40 BIT(9) From 8b2c26562b95c6397e132d21f2bd3d73aaee0c0a Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Wed, 21 Jan 2026 09:41:57 -0800 Subject: [PATCH 0706/1561] wifi: mt76: mt7915: fix use_cts_prot support With this fix, when driver needs to adjust its behavior for compatibility, especially concerning older 11g/n devices, by enabling or disabling CTS protection frames, often for hidden SSIDs or to manage legacy clients. Fixes: 150b91419d3d ("wifi: mt76: mt7915: enable use_cts_prot support") Signed-off-by: Ryder Lee Link: https://patch.msgid.link/eb8db4d0bf1c89b7486e89facb788ae3e510dd8b.1768879119.git.ryder.lee@mediatek.com Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7915/mac.c | 13 ---- .../net/wireless/mediatek/mt76/mt7915/main.c | 7 ++- .../net/wireless/mediatek/mt76/mt7915/mcu.c | 62 +++++++++++++++++++ .../net/wireless/mediatek/mt76/mt7915/mcu.h | 11 ++++ .../wireless/mediatek/mt76/mt7915/mt7915.h | 4 ++ 5 files changed, 81 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c index cefe56c05731..cec2c4208255 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c @@ -232,19 +232,6 @@ static void mt7915_mac_sta_poll(struct mt7915_dev *dev) rcu_read_unlock(); } -void mt7915_mac_enable_rtscts(struct mt7915_dev *dev, - struct ieee80211_vif *vif, bool enable) -{ - struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv; - u32 addr; - - addr = mt7915_mac_wtbl_lmac_addr(dev, mvif->sta.wcid.idx, 5); - if (enable) - mt76_set(dev, addr, BIT(5)); - else - mt76_clear(dev, addr, BIT(5)); -} - static void mt7915_wed_check_ppe(struct mt7915_dev *dev, struct mt76_queue *q, struct mt7915_sta *msta, struct sk_buff *skb, diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c index 90d5e79fbf74..0892291616ea 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c @@ -68,7 +68,7 @@ int mt7915_run(struct ieee80211_hw *hw) if (ret) goto out; - ret = mt76_connac_mcu_set_rts_thresh(&dev->mt76, 0x92b, + ret = mt76_connac_mcu_set_rts_thresh(&dev->mt76, MT7915_RTS_LEN_THRES, phy->mt76->band_idx); if (ret) goto out; @@ -633,8 +633,9 @@ static void mt7915_bss_info_changed(struct ieee80211_hw *hw, if (set_sta == 1) mt7915_mcu_add_sta(dev, vif, NULL, CONN_STATE_PORT_SECURE, false); - if (changed & BSS_CHANGED_ERP_CTS_PROT) - mt7915_mac_enable_rtscts(dev, vif, info->use_cts_prot); + if (changed & BSS_CHANGED_HT || changed & BSS_CHANGED_ERP_CTS_PROT) + mt7915_mcu_set_protection(phy, vif, info->ht_operation_mode, + info->use_cts_prot); if (changed & BSS_CHANGED_ERP_SLOT) { int slottime = 9; diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c index 2d2f34aa465d..d6f54b1edfb1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c @@ -3954,6 +3954,68 @@ out: return ret; } +int mt7915_mcu_set_protection(struct mt7915_phy *phy, struct ieee80211_vif *vif, + u8 ht_mode, bool use_cts_prot) +{ + struct mt7915_dev *dev = phy->dev; + int len = sizeof(struct sta_req_hdr) + sizeof(struct bss_info_prot); + struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv; + struct bss_info_prot *prot; + struct sk_buff *skb; + struct tlv *tlv; + enum { + PROT_NONMEMBER = BIT(1), + PROT_20MHZ = BIT(2), + PROT_NONHT_MIXED = BIT(3), + PROT_LEGACY_ERP = BIT(5), + PROT_NONGF_STA = BIT(7), + }; + u32 rts_threshold; + + skb = __mt76_connac_mcu_alloc_sta_req(&dev->mt76, &mvif->mt76, + NULL, len); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + tlv = mt76_connac_mcu_add_tlv(skb, BSS_INFO_PROTECT_INFO, + sizeof(*prot)); + prot = (struct bss_info_prot *)tlv; + + switch (ht_mode & IEEE80211_HT_OP_MODE_PROTECTION) { + case IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER: + prot->prot_mode = cpu_to_le32(PROT_NONMEMBER); + break; + case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ: + prot->prot_mode = cpu_to_le32(PROT_20MHZ); + break; + case IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED: + prot->prot_mode = cpu_to_le32(PROT_NONHT_MIXED); + break; + } + + if (ht_mode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT) + prot->prot_mode |= cpu_to_le32(PROT_NONGF_STA); + + if (use_cts_prot) + prot->prot_mode |= cpu_to_le32(PROT_LEGACY_ERP); + + /* reuse current RTS setting */ + rts_threshold = phy->mt76->hw->wiphy->rts_threshold; + if (rts_threshold == (u32)-1) + prot->rts_len_thres = cpu_to_le32(MT7915_RTS_LEN_THRES); + else + prot->rts_len_thres = cpu_to_le32(rts_threshold); + + prot->rts_pkt_thres = 0x2; + + prot->he_rts_thres = cpu_to_le16(vif->bss_conf.frame_time_rts_th); + if (!prot->he_rts_thres) + prot->he_rts_thres = cpu_to_le16(DEFAULT_HE_DURATION_RTS_THRES); + + return mt76_mcu_skb_send_msg(&dev->mt76, skb, + MCU_EXT_CMD(BSS_INFO_UPDATE), true); +} + int mt7915_mcu_update_bss_color(struct mt7915_dev *dev, struct ieee80211_vif *vif, struct cfg80211_he_bss_color *he_bss_color) { diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h index 3af11a075a2f..22f73a5ed425 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h @@ -399,6 +399,17 @@ struct bss_info_inband_discovery { __le16 prob_rsp_len; } __packed __aligned(4); +struct bss_info_prot { + __le16 tag; + __le16 len; + __le32 prot_type; + __le32 prot_mode; + __le32 rts_len_thres; + __le16 he_rts_thres; + u8 rts_pkt_thres; + u8 rsv[5]; +} __packed; + enum { BSS_INFO_BCN_CSA, BSS_INFO_BCN_BCC, diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h index b5c06201b707..bf1d915a3ca2 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h @@ -84,6 +84,8 @@ #define MT7915_CRIT_TEMP 110 #define MT7915_MAX_TEMP 120 +#define MT7915_RTS_LEN_THRES 0x92b + struct mt7915_vif; struct mt7915_sta; struct mt7915_dfs_pulse; @@ -473,6 +475,8 @@ int mt7915_mcu_add_inband_discov(struct mt7915_dev *dev, struct ieee80211_vif *v u32 changed); int mt7915_mcu_add_beacon(struct ieee80211_hw *hw, struct ieee80211_vif *vif, int enable, u32 changed); +int mt7915_mcu_set_protection(struct mt7915_phy *phy, struct ieee80211_vif *vif, + u8 ht_mode, bool use_cts_prot); int mt7915_mcu_add_obss_spr(struct mt7915_phy *phy, struct ieee80211_vif *vif, struct ieee80211_he_obss_pd *he_obss_pd); int mt7915_mcu_add_rate_ctrl(struct mt7915_dev *dev, struct ieee80211_vif *vif, From 079db35fae4dd7d6daedfb144d50b517d0da10e2 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Wed, 21 Jan 2026 09:41:58 -0800 Subject: [PATCH 0707/1561] wifi: mt76: mt7996: add support for ERP CTS & HT protection This patch adds support for handling BSS_CHANGED_ERP_CTS_PROT and BSS_CHANGED_HT. With this change, when the Wi-Fi driver needs to adjust its behavior for compatibility or performance, especially concerning older 11g/n devices, by enabling or disabling CTS protection frames, often for hidden SSIDs or to manage legacy clients. It also introduces debugfs options to manually control protection mode, allowing users to select betweenno protection, RTS/CTS, and CTS-to-self. Reviewed-by: Money Wang Signed-off-by: Ryder Lee Link: https://patch.msgid.link/942ddb5777d5c201930d6609e9ba877a6ba6714a.1768879119.git.ryder.lee@mediatek.com Signed-off-by: Felix Fietkau --- .../wireless/mediatek/mt76/mt76_connac_mcu.h | 1 + .../net/wireless/mediatek/mt76/mt7996/main.c | 4 ++ .../net/wireless/mediatek/mt76/mt7996/mcu.c | 46 +++++++++++++++++++ .../net/wireless/mediatek/mt76/mt7996/mcu.h | 6 +++ .../wireless/mediatek/mt76/mt7996/mt7996.h | 2 + 5 files changed, 59 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h index 8d59cf43f0e2..f44977f9093d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h @@ -1363,6 +1363,7 @@ enum { UNI_BSS_INFO_BASIC = 0, UNI_BSS_INFO_RA = 1, UNI_BSS_INFO_RLM = 2, + UNI_BSS_INFO_PROTECT_INFO = 3, UNI_BSS_INFO_BSS_COLOR = 4, UNI_BSS_INFO_HE_BASIC = 5, UNI_BSS_INFO_11V_MBSSID = 6, diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 583724c31099..493c47c59d57 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -874,6 +874,10 @@ mt7996_link_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif, !!(changed & BSS_CHANGED_BSSID)); } + if (changed & BSS_CHANGED_HT || changed & BSS_CHANGED_ERP_CTS_PROT) + mt7996_mcu_set_protection(phy, link, info->ht_operation_mode, + info->use_cts_prot); + if (changed & BSS_CHANGED_ERP_SLOT) { int slottime = info->use_short_slot ? 9 : 20; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index aab83ad9c1b5..82eea809c47b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -1247,6 +1247,52 @@ int mt7996_mcu_update_bss_rfch(struct mt7996_phy *phy, struct mt7996_vif_link *l MCU_WMWA_UNI_CMD(BSS_INFO_UPDATE), true); } +int mt7996_mcu_set_protection(struct mt7996_phy *phy, struct mt7996_vif_link *link, + u8 ht_mode, bool use_cts_prot) +{ + struct mt7996_dev *dev = phy->dev; + struct bss_prot_tlv *prot; + struct sk_buff *skb; + struct tlv *tlv; + enum { + PROT_NONMEMBER = BIT(1), + PROT_20MHZ = BIT(2), + PROT_NONHT_MIXED = BIT(3), + PROT_LEGACY_ERP = BIT(5), + PROT_NONGF_STA = BIT(7), + }; + + skb = __mt7996_mcu_alloc_bss_req(&dev->mt76, &link->mt76, + MT7996_BSS_UPDATE_MAX_SIZE); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + tlv = mt7996_mcu_add_uni_tlv(skb, UNI_BSS_INFO_PROTECT_INFO, + sizeof(*prot)); + prot = (struct bss_prot_tlv *)tlv; + + switch (ht_mode & IEEE80211_HT_OP_MODE_PROTECTION) { + case IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER: + prot->prot_mode = cpu_to_le32(PROT_NONMEMBER); + break; + case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ: + prot->prot_mode = cpu_to_le32(PROT_20MHZ); + break; + case IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED: + prot->prot_mode = cpu_to_le32(PROT_NONHT_MIXED); + break; + } + + if (ht_mode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT) + prot->prot_mode |= cpu_to_le32(PROT_NONGF_STA); + + if (use_cts_prot) + prot->prot_mode |= cpu_to_le32(PROT_LEGACY_ERP); + + return mt76_mcu_skb_send_msg(&dev->mt76, skb, + MCU_WM_UNI_CMD(BSS_INFO_UPDATE), true); +} + int mt7996_mcu_set_timing(struct mt7996_phy *phy, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf) { diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h index de14394bec22..d9fb49f7b01b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h @@ -481,6 +481,12 @@ struct bss_mld_tlv { u8 __rsv[2]; } __packed; +struct bss_prot_tlv { + __le16 tag; + __le16 len; + __le32 prot_mode; +} __packed; + struct sta_rec_ht_uni { __le16 tag; __le16 len; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index f850be874b1b..f8b79b05169b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -723,6 +723,8 @@ int mt7996_mcu_set_radar_th(struct mt7996_dev *dev, int index, const struct mt7996_dfs_pattern *pattern); int mt7996_mcu_set_radio_en(struct mt7996_phy *phy, bool enable); int mt7996_mcu_set_rts_thresh(struct mt7996_phy *phy, u32 val); +int mt7996_mcu_set_protection(struct mt7996_phy *phy, struct mt7996_vif_link *link, + u8 ht_mode, bool use_cts_prot); int mt7996_mcu_set_timing(struct mt7996_phy *phy, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf); int mt7996_mcu_get_chan_mib_info(struct mt7996_phy *phy, bool chan_switch); From ccb186326bb6b7f20d77982f855568e7087ad0d7 Mon Sep 17 00:00:00 2001 From: Ming Yen Hsieh Date: Mon, 8 Sep 2025 15:25:26 +0800 Subject: [PATCH 0708/1561] wifi: mt76: mt7925: fix incorrect length field in txpower command Set `tx_power_tlv->len` to `msg_len` instead of `sizeof(*tx_power_tlv)` to ensure the correct message length is sent to firmware. Cc: stable@vger.kernel.org Fixes: c948b5da6bbe ("wifi: mt76: mt7925: add Mediatek Wi-Fi7 driver for mt7925 chips") Signed-off-by: Ming Yen Hsieh Link: https://patch.msgid.link/20250908072526.1833938-1-mingyen.hsieh@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/mcu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index 8d9d2c1b83ac..2daf5a29220f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -3730,7 +3730,7 @@ mt7925_mcu_rate_txpower_band(struct mt76_phy *phy, memcpy(tx_power_tlv->alpha2, dev->alpha2, sizeof(dev->alpha2)); tx_power_tlv->n_chan = num_ch; tx_power_tlv->tag = cpu_to_le16(0x1); - tx_power_tlv->len = cpu_to_le16(sizeof(*tx_power_tlv)); + tx_power_tlv->len = cpu_to_le16(msg_len); switch (band) { case NL80211_BAND_2GHZ: From 34163942195410372fb138bea806c9b34e2f5257 Mon Sep 17 00:00:00 2001 From: Zac Bowling Date: Tue, 20 Jan 2026 12:10:32 -0800 Subject: [PATCH 0709/1561] wifi: mt76: fix list corruption in mt76_wcid_cleanup mt76_wcid_cleanup() was not removing wcid entries from sta_poll_list before mt76_reset_device() reinitializes the master list. This leaves stale pointers in wcid->poll_list, causing list corruption when mt76_wcid_add_poll() later checks list_empty() and tries to add the entry back. The fix adds proper cleanup of poll_list in mt76_wcid_cleanup(), matching how tx_list is already handled. This is similar to what mt7996_mac_sta_deinit_link() already does correctly. Fixes list corruption warnings like: list_add corruption. prev->next should be next (ffffffff...) Signed-off-by: Zac Bowling Link: https://patch.msgid.link/20260120201043.38225-3-zac@zacbowling.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mac80211.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 75772979f438..d0c522909e98 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -1716,6 +1716,16 @@ void mt76_wcid_cleanup(struct mt76_dev *dev, struct mt76_wcid *wcid) idr_destroy(&wcid->pktid); + /* Remove from sta_poll_list to prevent list corruption after reset. + * Without this, mt76_reset_device() reinitializes sta_poll_list but + * leaves wcid->poll_list with stale pointers, causing list corruption + * when mt76_wcid_add_poll() checks list_empty(). + */ + spin_lock_bh(&dev->sta_poll_lock); + if (!list_empty(&wcid->poll_list)) + list_del_init(&wcid->poll_list); + spin_unlock_bh(&dev->sta_poll_lock); + spin_lock_bh(&phy->tx_lock); if (!list_empty(&wcid->tx_list)) From 83ae3a18ba957257b4c406273d2da2caeea2b439 Mon Sep 17 00:00:00 2001 From: Ming Yen Hsieh Date: Thu, 4 Sep 2025 11:06:48 +0800 Subject: [PATCH 0710/1561] wifi: mt76: mt7925: prevent NULL pointer dereference in mt7925_tx_check_aggr() Move the NULL check for 'sta' before dereferencing it to prevent a possible crash. Fixes: 44eb173bdd4f ("wifi: mt76: mt7925: add link handling in mt7925_txwi_free") Signed-off-by: Ming Yen Hsieh Link: https://patch.msgid.link/20250904030649.655436-4-mingyen.hsieh@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/mac.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c index 63e58c177d65..33cd5e85a31d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c @@ -846,11 +846,14 @@ static void mt7925_tx_check_aggr(struct ieee80211_sta *sta, struct sk_buff *skb, bool is_8023; u16 fc, tid; + if (!sta) + return; + link_sta = rcu_dereference(sta->link[wcid->link_id]); if (!link_sta) return; - if (!sta || !(link_sta->ht_cap.ht_supported || link_sta->he_cap.has_he)) + if (!(link_sta->ht_cap.ht_supported || link_sta->he_cap.has_he)) return; tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; From 962eb04e67552be406c906c83099c1d736aae3b6 Mon Sep 17 00:00:00 2001 From: Ming Yen Hsieh Date: Thu, 4 Sep 2025 11:06:47 +0800 Subject: [PATCH 0711/1561] wifi: mt76: mt7925: prevent NULL vif dereference in mt7925_mac_write_txwi Check for a NULL `vif` before accessing `ieee80211_vif_is_mld(vif)` to avoid a potential kernel panic in scenarios where `vif` might not be initialized. Fixes: ebb1406813c6 ("wifi: mt76: mt7925: add link handling to txwi") Signed-off-by: Ming Yen Hsieh Link: https://patch.msgid.link/20250904030649.655436-3-mingyen.hsieh@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/mac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c index 33cd5e85a31d..0bafa8e770a6 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c @@ -804,8 +804,8 @@ mt7925_mac_write_txwi(struct mt76_dev *dev, __le32 *txwi, txwi[5] = cpu_to_le32(val); val = MT_TXD6_DAS | FIELD_PREP(MT_TXD6_MSDU_CNT, 1); - if (!ieee80211_vif_is_mld(vif) || - (q_idx >= MT_LMAC_ALTX0 && q_idx <= MT_LMAC_BCN0)) + if (vif && (!ieee80211_vif_is_mld(vif) || + (q_idx >= MT_LMAC_ALTX0 && q_idx <= MT_LMAC_BCN0))) val |= MT_TXD6_DIS_MAT; txwi[6] = cpu_to_le32(val); txwi[7] = 0; From d8db56142e531f060c938fa0b5175ed6c8cabb11 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Mon, 13 Oct 2025 02:08:24 -0700 Subject: [PATCH 0712/1561] wifi: mt76: mt7996: fix FCS error flag check in RX descriptor The mt7996 driver currently checks the MT_RXD3_NORMAL_FCS_ERR bit in rxd1 whereas other Connac3-based drivers(mt7925) correctly check this bit in rxd3. Since the MT_RXD3_NORMAL_FCS_ERR bit is defined in the fourth RX descriptor word (rxd3), update mt7996 to use the proper descriptor field. This change aligns mt7996 with mt7925 and the rest of the Connac3 family. Fixes: 98686cd21624 ("wifi: mt76: mt7996: add driver for MediaTek Wi-Fi 7 (802.11be) devices") Signed-off-by: Alok Tiwari Reviewed-by: AngeloGioacchino Del Regno Link: https://patch.msgid.link/20251013090826.753992-1-alok.a.tiwari@oracle.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index 0ca908b87a46..9c1715f4a3b8 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -488,7 +488,7 @@ mt7996_mac_fill_rx(struct mt7996_dev *dev, enum mt76_rxq_id q, !(csum_status & (BIT(0) | BIT(2) | BIT(3)))) skb->ip_summed = CHECKSUM_UNNECESSARY; - if (rxd1 & MT_RXD3_NORMAL_FCS_ERR) + if (rxd3 & MT_RXD3_NORMAL_FCS_ERR) status->flag |= RX_FLAG_FAILED_FCS_CRC; if (rxd1 & MT_RXD1_NORMAL_TKIP_MIC_ERR) From 4d0bf21e3e20619d51d06c0c36207aabab8b712c Mon Sep 17 00:00:00 2001 From: Rory Little Date: Wed, 3 Sep 2025 17:07:11 -0700 Subject: [PATCH 0713/1561] wifi: mt76: mt7921: Place upper limit on station AID Any station configured with an AID over 20 causes a firmware crash. This situation occurred in our testing using an AP interface on 7922 hardware, with a modified hostapd, sourced from Mediatek's OpenWRT feeds. In stock hostapd, station AIDs begin counting at 1, and this configuration is prevented with an upper limit on associated stations. However, the modified hostapd began allocation at 65, which caused the firmware to crash. This fix does not allow these AIDs to work, but will prevent the firmware crash. This crash was only seen on IFTYPE_AP interfaces, and the fix does not appear to have an effect on IFTYPE_STATION behavior. Fixes: 5c14a5f944b9 ("mt76: mt7921: introduce mt7921e support") Signed-off-by: Rory Little Link: https://patch.msgid.link/20250904000711.3033860-1-rory@candelatech.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7921/main.c | 6 ++++++ drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 5fae9a6e273c..debecd3dff75 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -807,6 +807,9 @@ int mt7921_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif, struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv; int ret, idx; + if (sta->aid > MT7921_MAX_AID) + return -ENOENT; + idx = mt76_wcid_alloc(dev->mt76.wcid_mask, MT792x_WTBL_STA - 1); if (idx < 0) return -ENOSPC; @@ -850,6 +853,9 @@ int mt7921_mac_sta_event(struct mt76_dev *mdev, struct ieee80211_vif *vif, struct mt792x_sta *msta = (struct mt792x_sta *)sta->drv_priv; struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv; + if (sta->aid > MT7921_MAX_AID) + return -ENOENT; + if (ev != MT76_STA_EVENT_ASSOC) return 0; diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h index 83fc7f49ff84..ad92af98e314 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h @@ -7,6 +7,8 @@ #include "../mt792x.h" #include "regs.h" +#define MT7921_MAX_AID 20 + #define MT7921_TX_RING_SIZE 2048 #define MT7921_TX_MCU_RING_SIZE 256 #define MT7921_TX_FWDL_RING_SIZE 128 From 5373f8b19e568b5c217832b9bbef165bd2b2df14 Mon Sep 17 00:00:00 2001 From: Leon Yen Date: Thu, 9 Oct 2025 10:01:58 +0800 Subject: [PATCH 0714/1561] wifi: mt76: mt7921: fix a potential clc buffer length underflow The buf_len is used to limit the iterations for retrieving the country power setting and may underflow under certain conditions due to changes in the power table in CLC. This underflow leads to an almost infinite loop or an invalid power setting resulting in driver initialization failure. Cc: stable@vger.kernel.org Fixes: fa6ad88e023d ("wifi: mt76: mt7921: fix country count limitation for CLC") Signed-off-by: Leon Yen Signed-off-by: Ming Yen Hsieh Link: https://patch.msgid.link/20251009020158.1923429-1-mingyen.hsieh@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7921/mcu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c index 833d0ab64230..8442dbd2ee23 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c @@ -1353,6 +1353,9 @@ int __mt7921_mcu_set_clc(struct mt792x_dev *dev, u8 *alpha2, u16 len = le16_to_cpu(rule->len); u16 offset = len + sizeof(*rule); + if (buf_len < offset) + break; + pos += offset; buf_len -= offset; if (rule->alpha2[0] != alpha2[0] || From 6b470f36616e3448d44b0ef4b1de2a3e3a31b5be Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 8 Dec 2025 19:54:08 +0100 Subject: [PATCH 0715/1561] wifi: mt76: Fix memory leak destroying device All MT76 rx queues have an associated page_pool even if the queue is not associated to a NAPI (e.g. WED RRO queues with WED enabled). Destroy the page_pool running mt76_dma_cleanup routine during module unload. Moreover returns pages to the page pool if WED is not enabled for WED RRO queues. Fixes: 950d0abb5cd94 ("wifi: mt76: mt7996: add wed rx support") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20251208-mt76-fix-memory-leak-v1-1-cba813fc62b8@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/dma.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index f240016ed9f0..893ac14285ca 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -874,7 +874,12 @@ mt76_dma_rx_cleanup(struct mt76_dev *dev, struct mt76_queue *q) if (!buf) break; - if (!mt76_queue_is_wed_rro(q)) + if (mtk_wed_device_active(&dev->mmio.wed) && + mt76_queue_is_wed_rro(q)) + continue; + + if (!mt76_queue_is_wed_rro_rxdmad_c(q) && + !mt76_queue_is_wed_rro_ind(q)) mt76_put_page_pool_buf(buf, false); } while (1); @@ -1168,10 +1173,6 @@ void mt76_dma_cleanup(struct mt76_dev *dev) mt76_for_each_q_rx(dev, i) { struct mt76_queue *q = &dev->q_rx[i]; - if (mtk_wed_device_active(&dev->mmio.wed) && - mt76_queue_is_wed_rro(q)) - continue; - netif_napi_del(&dev->napi[i]); mt76_dma_rx_cleanup(dev, q); From 7aed20bd9fe427b192cce80a164429584b298bbe Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 22 Jan 2026 11:39:45 +0100 Subject: [PATCH 0716/1561] wifi: mt76: mt7996: Fix NPU stop procedure Move mt7996_npu_hw_stop routine before disabling rx NAPIs in order to fix NPU stop procedure used during device L1 SER recovery. Add missing usleep_range in mt7996_npu_hw_stop(). Fixes: 377aa17d2aedc ("wifi: mt76: mt7996: Add NPU offload support to MT7996 driver") Tested-by: Kang Yang Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260122-mt76-npu-eagle-offload-v2-1-2374614c0de6@kernel.org Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7996/mac.c | 3 +-- .../net/wireless/mediatek/mt76/mt7996/npu.c | 23 +++++++++++-------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index 9c1715f4a3b8..00c6045622e3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -2504,6 +2504,7 @@ void mt7996_mac_reset_work(struct work_struct *work) if (mtk_wed_device_active(&dev->mt76.mmio.wed)) mtk_wed_device_stop(&dev->mt76.mmio.wed); + mt7996_npu_hw_stop(dev); ieee80211_stop_queues(mt76_hw(dev)); set_bit(MT76_RESET, &dev->mphy.state); @@ -2530,8 +2531,6 @@ void mt7996_mac_reset_work(struct work_struct *work) mutex_lock(&dev->mt76.mutex); - mt7996_npu_hw_stop(dev); - mt76_wr(dev, MT_MCU_INT_EVENT, MT_MCU_INT_EVENT_DMA_STOPPED); if (mt7996_wait_reset_state(dev, MT_MCU_CMD_RESET_DONE)) { diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/npu.c b/drivers/net/wireless/mediatek/mt76/mt7996/npu.c index 1422533e59c7..9c3b241aae38 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/npu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/npu.c @@ -320,33 +320,38 @@ unlock: int mt7996_npu_hw_stop(struct mt7996_dev *dev) { struct airoha_npu *npu; - int i, err; + int i, err = 0; u32 info; + mutex_lock(&dev->mt76.mutex); + npu = rcu_dereference_protected(dev->mt76.mmio.npu, &dev->mt76.mutex); if (!npu) - return 0; + goto unlock; err = mt76_npu_send_msg(npu, 4, WLAN_FUNC_SET_WAIT_INODE_TXRX_REG_ADDR, 0, GFP_KERNEL); if (err) - return err; + goto unlock; for (i = 0; i < 10; i++) { err = mt76_npu_get_msg(npu, 3, WLAN_FUNC_GET_WAIT_NPU_INFO, &info, GFP_KERNEL); - if (err) - continue; + if (!err && !info) + break; - if (info) { - err = -ETIMEDOUT; - continue; - } + err = -ETIMEDOUT; + usleep_range(10000, 15000); } if (!err) err = mt76_npu_send_msg(npu, 6, WLAN_FUNC_SET_WAIT_INODE_TXRX_REG_ADDR, 0, GFP_KERNEL); + else + dev_err(dev->mt76.dev, "npu stop failed\n"); +unlock: + mutex_unlock(&dev->mt76.mutex); + return err; } From 25e3203a2192f2b0d697b2410126bad87e62d4f0 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 22 Jan 2026 11:39:46 +0100 Subject: [PATCH 0717/1561] wifi: mt76: npu: Add missing rx_token_size initialization Add missing rx_token_size initialization for NPU offloading. Fixes: 7fb554b1b623 ("wifi: mt76: Introduce the NPU generic layer") Tested-by: Kang Yang Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260122-mt76-npu-eagle-offload-v2-2-2374614c0de6@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/npu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mediatek/mt76/npu.c b/drivers/net/wireless/mediatek/mt76/npu.c index ec36975f6dc9..9679237f7398 100644 --- a/drivers/net/wireless/mediatek/mt76/npu.c +++ b/drivers/net/wireless/mediatek/mt76/npu.c @@ -457,6 +457,7 @@ int mt76_npu_init(struct mt76_dev *dev, phys_addr_t phy_addr, int type) dev->mmio.npu_type = type; /* NPU offloading requires HW-RRO for RX packet reordering. */ dev->hwrro_mode = MT76_HWRRO_V3_1; + dev->rx_token_size = 32768; rcu_assign_pointer(dev->mmio.npu, npu); rcu_assign_pointer(dev->mmio.ppe_dev, ppe_dev); From f801fec3f0850ac00073bc322c0e4ea446d938ae Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 22 Jan 2026 11:39:47 +0100 Subject: [PATCH 0718/1561] wifi: mt76: always enable RRO queues for non-MT7992 chipset MT7990 NPU binary requires to initialize NPU desc_base after configuring ring_size. This is a preliminary patch to enable NPU offload for MT7996 (Eagle) chipset. Tested-by: Kang Yang Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260122-mt76-npu-eagle-offload-v2-3-2374614c0de6@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/dma.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 893ac14285ca..f5c6bb94ccbb 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -6,6 +6,7 @@ #include #include "mt76.h" #include "dma.h" +#include "mt76_connac.h" static struct mt76_txwi_cache * mt76_alloc_txwi(struct mt76_dev *dev) @@ -188,16 +189,18 @@ mt76_dma_queue_magic_cnt_init(struct mt76_dev *dev, struct mt76_queue *q) static void mt76_dma_sync_idx(struct mt76_dev *dev, struct mt76_queue *q) { - Q_WRITE(q, desc_base, q->desc_dma); - if ((q->flags & MT_QFLAG_WED_RRO_EN) && !mt76_npu_device_active(dev)) + if ((q->flags & MT_QFLAG_WED_RRO_EN) && + (!is_mt7992(dev) || !mt76_npu_device_active(dev))) Q_WRITE(q, ring_size, MT_DMA_RRO_EN | q->ndesc); else Q_WRITE(q, ring_size, q->ndesc); if (mt76_queue_is_npu_tx(q)) { - writel(q->desc_dma, &q->regs->desc_base); writel(q->ndesc, &q->regs->ring_size); + writel(q->desc_dma, &q->regs->desc_base); } + + Q_WRITE(q, desc_base, q->desc_dma); q->head = Q_READ(q, dma_idx); q->tail = q->head; } From b849930f2ce77185b833d93ab4317a33ffc584c5 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 22 Jan 2026 11:39:48 +0100 Subject: [PATCH 0719/1561] wifi: mt76: mt7996: Fix BAND2 tx queues initialization when NPU is enabled Fix BAND2 tx queues initialization for MT7990 chipset when NPU is enabled. This is a preliminary patch to enable NPU offload for MT7996 (Eagle) chipset. Tested-by: Kang Yang Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260122-mt76-npu-eagle-offload-v2-4-2374614c0de6@kernel.org Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7996/init.c | 18 ++++++++++++------ .../net/wireless/mediatek/mt76/mt7996/mt7996.h | 1 + 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/init.c b/drivers/net/wireless/mediatek/mt76/mt7996/init.c index 2e439f0815d4..e678f06b4556 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/init.c @@ -683,8 +683,9 @@ static int mt7996_register_phy(struct mt7996_dev *dev, enum mt76_band_id band) return 0; if (dev->hif2 && - ((is_mt7996(&dev->mt76) && band == MT_BAND2) || - (is_mt7992(&dev->mt76) && band == MT_BAND1))) { + ((is_mt7992(&dev->mt76) && band == MT_BAND1) || + (is_mt7996(&dev->mt76) && band == MT_BAND2 && + !mt76_npu_device_active(&dev->mt76)))) { hif1_ofs = MT_WFDMA0_PCIE1(0) - MT_WFDMA0(0); wed = &dev->mt76.mmio.wed_hif2; } @@ -724,14 +725,19 @@ static int mt7996_register_phy(struct mt7996_dev *dev, enum mt76_band_id band) /* init wiphy according to mphy and phy */ mt7996_init_wiphy_band(mphy->hw, phy); - if (is_mt7996(&dev->mt76) && !dev->hif2 && band == MT_BAND1) { + if (is_mt7996(&dev->mt76) && + ((band == MT_BAND1 && !dev->hif2) || + (band == MT_BAND2 && mt76_npu_device_active(&dev->mt76)))) { int i; for (i = 0; i <= MT_TXQ_PSD; i++) - mphy->q_tx[i] = dev->mt76.phys[MT_BAND0]->q_tx[0]; + mphy->q_tx[i] = dev->mt76.phys[band - 1]->q_tx[0]; } else { - ret = mt7996_init_tx_queues(mphy->priv, MT_TXQ_ID(band), - MT7996_TX_RING_SIZE, + int size = is_mt7996(&dev->mt76) && + mt76_npu_device_active(&dev->mt76) + ? MT7996_NPU_TX_RING_SIZE / 2 : MT7996_TX_RING_SIZE; + + ret = mt7996_init_tx_queues(mphy->priv, MT_TXQ_ID(band), size, MT_TXQ_RING_BASE(band) + hif1_ofs, wed); if (ret) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index f8b79b05169b..09808e79ec86 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -29,6 +29,7 @@ #define MT7996_RX_RING_SIZE 1536 #define MT7996_RX_MCU_RING_SIZE 512 #define MT7996_RX_MCU_RING_SIZE_WA 1024 +#define MT7996_NPU_TX_RING_SIZE 1024 /* scatter-gather of mcu event is not supported in connac3 */ #define MT7996_RX_MCU_BUF_SIZE (2048 + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) From 00fa11ec4ab236a0e959093dc804285533846213 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 22 Jan 2026 11:39:49 +0100 Subject: [PATCH 0720/1561] wifi: mt76: mt7996: Fix wdma_idx for MT7996 device if NPU is enabled This is a preliminary patch to enable NPU offload for MT7996 (Eagle) chipset. Tested-by: Kang Yang Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260122-mt76-npu-eagle-offload-v2-5-2374614c0de6@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 493c47c59d57..c0fae7aec1ae 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -2291,6 +2291,10 @@ mt7996_net_fill_forward_path(struct ieee80211_hw *hw, path->mtk_wdma.wdma_idx = wed->wdma_idx; else #endif + if (is_mt7996(&dev->mt76) && mt76_npu_device_active(&dev->mt76) && + msta_link->wcid.phy_idx == MT_BAND2) + path->mtk_wdma.wdma_idx = 1; + else path->mtk_wdma.wdma_idx = link->mt76.band_idx; path->mtk_wdma.bss = link->mt76.idx; path->mtk_wdma.queue = 0; From a9ac8f837f12ce180da90e70277c36ecd04b01e2 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 22 Jan 2026 11:39:50 +0100 Subject: [PATCH 0721/1561] wifi: mt76: mt7996: Add mt7992_npu_txrx_offload_init routine Introduce mt7992_npu_txrx_offload_init utility routine. This is a preliminary patch to enable NPU offload for MT7996 (Eagle) chipset. Tested-by: Kang Yang Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260122-mt76-npu-eagle-offload-v2-6-2374614c0de6@kernel.org Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7996/npu.c | 63 +++++++++++-------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/npu.c b/drivers/net/wireless/mediatek/mt76/mt7996/npu.c index 9c3b241aae38..0f1aabd63748 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/npu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/npu.c @@ -8,34 +8,14 @@ #include "mt7996.h" -static int mt7996_npu_offload_init(struct mt7996_dev *dev, - struct airoha_npu *npu) +static int mt7992_npu_txrx_offload_init(struct mt7996_dev *dev, + struct airoha_npu *npu) { + u32 hif1_ofs = dev->hif2 ? MT_WFDMA0_PCIE1(0) - MT_WFDMA0(0) : 0; phys_addr_t phy_addr = dev->mt76.mmio.phy_addr; - u32 val, hif1_ofs = 0, dma_addr; + u32 dma_addr; int i, err; - err = mt76_npu_get_msg(npu, 0, WLAN_FUNC_GET_WAIT_NPU_VERSION, - &val, GFP_KERNEL); - if (err) { - dev_warn(dev->mt76.dev, "failed getting NPU fw version\n"); - return err; - } - - dev_info(dev->mt76.dev, "NPU version: %0d.%d\n", - (val >> 16) & 0xffff, val & 0xffff); - - err = mt76_npu_send_msg(npu, 0, WLAN_FUNC_SET_WAIT_PCIE_PORT_TYPE, - dev->mt76.mmio.npu_type, GFP_KERNEL); - if (err) { - dev_warn(dev->mt76.dev, - "failed setting NPU wlan PCIe port type\n"); - return err; - } - - if (dev->hif2) - hif1_ofs = MT_WFDMA0_PCIE1(0) - MT_WFDMA0(0); - for (i = MT_BAND0; i < MT_BAND2; i++) { dma_addr = phy_addr; if (i) @@ -56,7 +36,7 @@ static int mt7996_npu_offload_init(struct mt7996_dev *dev, MT7996_RX_RING_SIZE, GFP_KERNEL); if (err) { dev_warn(dev->mt76.dev, - "failed setting NPU wlan PCIe desc size\n"); + "failed setting NPU wlan rx desc size\n"); return err; } @@ -97,10 +77,41 @@ static int mt7996_npu_offload_init(struct mt7996_dev *dev, phy_addr + MT_RRO_ACK_SN_CTRL, GFP_KERNEL); if (err) { dev_warn(dev->mt76.dev, - "failed setting NPU wlan rro_ack_sn desc addr\n"); + "failed setting NPU wlan tx desc addr\n"); return err; } + return 0; +} + +static int mt7996_npu_offload_init(struct mt7996_dev *dev, + struct airoha_npu *npu) +{ + u32 val; + int err; + + err = mt76_npu_get_msg(npu, 0, WLAN_FUNC_GET_WAIT_NPU_VERSION, + &val, GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, "failed getting NPU fw version\n"); + return err; + } + + dev_info(dev->mt76.dev, "NPU version: %0d.%d\n", + (val >> 16) & 0xffff, val & 0xffff); + + err = mt76_npu_send_msg(npu, 0, WLAN_FUNC_SET_WAIT_PCIE_PORT_TYPE, + dev->mt76.mmio.npu_type, GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, + "failed setting NPU wlan PCIe port type\n"); + return err; + } + + err = mt7992_npu_txrx_offload_init(dev, npu); + if (err) + return err; + err = mt76_npu_send_msg(npu, 0, WLAN_FUNC_SET_WAIT_TOKEN_ID_SIZE, MT7996_HW_TOKEN_SIZE, GFP_KERNEL); if (err) From c93e2fbdc79b91e5c221446f970ab847db38309e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 22 Jan 2026 11:39:51 +0100 Subject: [PATCH 0722/1561] wifi: mt76: mt7996: Rename mt7996_npu_rxd_init() in mt7992_npu_rxd_init() This is a preliminary patch to enable NPU offload for MT7996 (Eagle) chipset. Tested-by: Kang Yang Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260122-mt76-npu-eagle-offload-v2-7-2374614c0de6@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/npu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/npu.c b/drivers/net/wireless/mediatek/mt76/mt7996/npu.c index 0f1aabd63748..b2e7b5aba272 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/npu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/npu.c @@ -122,7 +122,7 @@ static int mt7996_npu_offload_init(struct mt7996_dev *dev, return 0; } -static int mt7996_npu_rxd_init(struct mt7996_dev *dev, struct airoha_npu *npu) +static int mt7992_npu_rxd_init(struct mt7996_dev *dev, struct airoha_npu *npu) { u32 val; int err; @@ -304,7 +304,7 @@ int mt7996_npu_hw_init(struct mt7996_dev *dev) if (err) goto unlock; - err = mt7996_npu_rxd_init(dev, npu); + err = mt7992_npu_rxd_init(dev, npu); if (err) goto unlock; From 880f4e3e5a4c465fba0390ed3c2afa1d7eece550 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 22 Jan 2026 11:39:52 +0100 Subject: [PATCH 0723/1561] wifi: mt76: mt7996: Add NPU support for MT7990 chipset Introduce support for MT7990 chipset in MT7996 npu configuration codebase. Tested-by: Kang Yang Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260122-mt76-npu-eagle-offload-v2-8-2374614c0de6@kernel.org Signed-off-by: Felix Fietkau --- .../wireless/mediatek/mt76/mt7996/mt7996.h | 1 + .../net/wireless/mediatek/mt76/mt7996/npu.c | 306 ++++++++++++++++-- 2 files changed, 276 insertions(+), 31 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index 09808e79ec86..c9c506e1d6ed 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -30,6 +30,7 @@ #define MT7996_RX_MCU_RING_SIZE 512 #define MT7996_RX_MCU_RING_SIZE_WA 1024 #define MT7996_NPU_TX_RING_SIZE 1024 +#define MT7996_NPU_RX_RING_SIZE 1024 /* scatter-gather of mcu event is not supported in connac3 */ #define MT7996_RX_MCU_BUF_SIZE (2048 + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/npu.c b/drivers/net/wireless/mediatek/mt76/mt7996/npu.c index b2e7b5aba272..c3307bbbb547 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/npu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/npu.c @@ -17,21 +17,6 @@ static int mt7992_npu_txrx_offload_init(struct mt7996_dev *dev, int i, err; for (i = MT_BAND0; i < MT_BAND2; i++) { - dma_addr = phy_addr; - if (i) - dma_addr += MT_RXQ_RING_BASE(MT_RXQ_RRO_BAND1) + 0x90 + - hif1_ofs; - else - dma_addr += MT_RXQ_RING_BASE(MT_RXQ_RRO_BAND0) + 0x80; - - err = mt76_npu_send_msg(npu, i, WLAN_FUNC_SET_WAIT_PCIE_ADDR, - dma_addr, GFP_KERNEL); - if (err) { - dev_warn(dev->mt76.dev, - "failed setting NPU wlan PCIe desc addr\n"); - return err; - } - err = mt76_npu_send_msg(npu, i, WLAN_FUNC_SET_WAIT_DESC, MT7996_RX_RING_SIZE, GFP_KERNEL); if (err) { @@ -84,6 +69,134 @@ static int mt7992_npu_txrx_offload_init(struct mt7996_dev *dev, return 0; } +static int mt7996_npu_txrx_offload_init(struct mt7996_dev *dev, + struct airoha_npu *npu) +{ + u32 hif1_ofs = dev->hif2 ? MT_WFDMA0_PCIE1(0) - MT_WFDMA0(0) : 0; + phys_addr_t phy_addr = dev->mt76.mmio.phy_addr; + u32 dma_addr; + int err; + + /* npu rx rro ring0 */ + err = mt76_npu_send_msg(npu, 0, WLAN_FUNC_SET_WAIT_DESC, + MT7996_RX_RING_SIZE, GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, + "failed setting NPU wlan rx desc size\n"); + return err; + } + + /* npu rx rro ring1 */ + err = mt76_npu_send_msg(npu, 2, WLAN_FUNC_SET_WAIT_DESC, + MT7996_NPU_RX_RING_SIZE, GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, + "failed setting NPU wlan rx desc size\n"); + return err; + } + + /* msdu pg 2GHz */ + dma_addr = phy_addr + MT_RXQ_RING_BASE(MT_RXQ_MSDU_PAGE_BAND0) + 0xa0; + err = mt76_npu_send_msg(npu, 5, WLAN_FUNC_SET_WAIT_PCIE_ADDR, + dma_addr, GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, + "failed setting NPU wlan PCIe desc addr\n"); + return err; + } + + err = mt76_npu_send_msg(npu, 5, WLAN_FUNC_SET_WAIT_DESC, + MT7996_NPU_RX_RING_SIZE / 4, GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, + "failed setting NPU wlan rx desc size\n"); + return err; + } + + /* msdu pg 5GHz */ + dma_addr = phy_addr + MT_RXQ_RING_BASE(MT_RXQ_MSDU_PAGE_BAND1) + 0xb0; + err = mt76_npu_send_msg(npu, 6, WLAN_FUNC_SET_WAIT_PCIE_ADDR, + dma_addr, GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, + "failed setting NPU wlan PCIe desc addr\n"); + return err; + } + + err = mt76_npu_send_msg(npu, 6, WLAN_FUNC_SET_WAIT_DESC, + MT7996_NPU_RX_RING_SIZE / 2, GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, + "failed setting NPU wlan rx desc size\n"); + return err; + } + + /* msdu pg 6GHz */ + dma_addr = phy_addr + MT_RXQ_RING_BASE(MT_RXQ_MSDU_PAGE_BAND2) + 0xc0; + err = mt76_npu_send_msg(npu, 7, WLAN_FUNC_SET_WAIT_PCIE_ADDR, + dma_addr, GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, + "failed setting NPU wlan PCIe desc addr\n"); + return err; + } + + err = mt76_npu_send_msg(npu, 7, WLAN_FUNC_SET_WAIT_DESC, + MT7996_NPU_RX_RING_SIZE, GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, + "failed setting NPU wlan rx desc size\n"); + return err; + } + + /* ind cmd ring */ + err = mt76_npu_send_msg(npu, 8, WLAN_FUNC_SET_WAIT_PCIE_ADDR, + phy_addr + MT_RXQ_RRO_IND_RING_BASE, + GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, + "failed setting NPU wlan PCIe desc addr\n"); + return err; + } + + err = mt76_npu_send_msg(npu, 8, WLAN_FUNC_SET_WAIT_DESC, + MT7996_RX_RING_SIZE, GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, + "failed setting NPU wlan rx desc size\n"); + return err; + } + + err = mt76_npu_send_msg(npu, 3, WLAN_FUNC_SET_WAIT_TX_RING_PCIE_ADDR, + phy_addr + MT_RRO_ACK_SN_CTRL, GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, + "failed setting NPU wlan tx desc addr\n"); + return err; + } + + /* npu tx */ + dma_addr = phy_addr + MT_TXQ_RING_BASE(1) + 0x120; + err = mt76_npu_send_msg(npu, 0, WLAN_FUNC_SET_WAIT_TX_RING_PCIE_ADDR, + dma_addr, GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, + "failed setting NPU wlan tx desc addr\n"); + return err; + } + + dma_addr = phy_addr + MT_TXQ_RING_BASE(0) + 0x150 + hif1_ofs; + err = mt76_npu_send_msg(npu, 2, WLAN_FUNC_SET_WAIT_TX_RING_PCIE_ADDR, + dma_addr, GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, + "failed setting NPU wlan tx desc addr\n"); + return err; + } + + return 0; +} + static int mt7996_npu_offload_init(struct mt7996_dev *dev, struct airoha_npu *npu) { @@ -108,7 +221,11 @@ static int mt7996_npu_offload_init(struct mt7996_dev *dev, return err; } - err = mt7992_npu_txrx_offload_init(dev, npu); + if (is_mt7996(&dev->mt76)) + err = mt7996_npu_txrx_offload_init(dev, npu); + else + err = mt7992_npu_txrx_offload_init(dev, npu); + if (err) return err; @@ -157,15 +274,84 @@ static int mt7992_npu_rxd_init(struct mt7996_dev *dev, struct airoha_npu *npu) return 0; } +static int mt7996_npu_rxd_init(struct mt7996_dev *dev, struct airoha_npu *npu) +{ + u32 val; + int err; + + err = mt76_npu_get_msg(npu, 0, WLAN_FUNC_GET_WAIT_RXDESC_BASE, + &val, GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, + "failed retriving NPU wlan rx ring0 addr\n"); + return err; + } + writel(val, &dev->mt76.q_rx[MT_RXQ_RRO_BAND0].regs->desc_base); + + err = mt76_npu_get_msg(npu, 2, WLAN_FUNC_GET_WAIT_RXDESC_BASE, + &val, GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, + "failed retriving NPU wlan rx ring2 addr\n"); + return err; + } + writel(val, &dev->mt76.q_rx[MT_RXQ_RRO_BAND2].regs->desc_base); + + /* msdu pg ring */ + err = mt76_npu_get_msg(npu, 10, WLAN_FUNC_GET_WAIT_RXDESC_BASE, + &val, GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, + "failed retriving NPU wlan msdu pg ring addr\n"); + return err; + } + writel(val, &dev->mt76.q_rx[MT_RXQ_MSDU_PAGE_BAND0].regs->desc_base); + + err = mt76_npu_get_msg(npu, 11, WLAN_FUNC_GET_WAIT_RXDESC_BASE, + &val, GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, + "failed retriving NPU wlan msdu pg ring addr\n"); + return err; + } + writel(val, &dev->mt76.q_rx[MT_RXQ_MSDU_PAGE_BAND1].regs->desc_base); + + err = mt76_npu_get_msg(npu, 12, WLAN_FUNC_GET_WAIT_RXDESC_BASE, + &val, GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, + "failed retriving NPU wlan msdu pg ring addr\n"); + return err; + } + writel(val, &dev->mt76.q_rx[MT_RXQ_MSDU_PAGE_BAND2].regs->desc_base); + + /* ind_cmd ring */ + err = mt76_npu_get_msg(npu, 8, WLAN_FUNC_GET_WAIT_RXDESC_BASE, + &val, GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, + "failed retriving NPU wlan ind_cmd ring addr\n"); + return err; + } + writel(val, &dev->mt76.q_rx[MT_RXQ_RRO_IND].regs->desc_base); + + return 0; +} + static int mt7996_npu_txd_init(struct mt7996_dev *dev, struct airoha_npu *npu) { - int i, err; + const enum mt76_band_id band_list[] = { + MT_BAND0, + is_mt7996(&dev->mt76) ? MT_BAND2 : MT_BAND1, + }; + int i; - for (i = MT_BAND0; i < MT_BAND2; i++) { + for (i = 0; i < ARRAY_SIZE(band_list); i++) { + int err, band = band_list[i], phy_id; dma_addr_t dma_addr; - u32 val; + u32 val, size; - err = mt76_npu_get_msg(npu, i + 5, + err = mt76_npu_get_msg(npu, band + 5, WLAN_FUNC_GET_WAIT_RXDESC_BASE, &val, GFP_KERNEL); if (err) { @@ -173,14 +359,20 @@ static int mt7996_npu_txd_init(struct mt7996_dev *dev, struct airoha_npu *npu) "failed retrieving NPU wlan tx ring addr\n"); return err; } - writel(val, &dev->mt76.phys[i]->q_tx[0]->regs->desc_base); - if (!dmam_alloc_coherent(dev->mt76.dma_dev, - 256 * MT7996_TX_RING_SIZE, + phy_id = is_mt7996(&dev->mt76) ? band == MT_BAND0 ? 1 : 0 + : band; + writel(val, &dev->mt76.phys[phy_id]->q_tx[0]->regs->desc_base); + + size = is_mt7996(&dev->mt76) ? band == MT_BAND2 + ? MT7996_NPU_TX_RING_SIZE + : MT7996_NPU_RX_RING_SIZE / 2 + : MT7996_TX_RING_SIZE; + if (!dmam_alloc_coherent(dev->mt76.dma_dev, 256 * size, &dma_addr, GFP_KERNEL)) return -ENOMEM; - err = mt76_npu_send_msg(npu, i, + err = mt76_npu_send_msg(npu, band, WLAN_FUNC_SET_WAIT_TX_BUF_SPACE_HW_BASE, dma_addr, GFP_KERNEL); if (err) { @@ -189,12 +381,11 @@ static int mt7996_npu_txd_init(struct mt7996_dev *dev, struct airoha_npu *npu) return err; } - if (!dmam_alloc_coherent(dev->mt76.dma_dev, - 256 * MT7996_TX_RING_SIZE, + if (!dmam_alloc_coherent(dev->mt76.dma_dev, 256 * size, &dma_addr, GFP_KERNEL)) return -ENOMEM; - err = mt76_npu_send_msg(npu, i + 5, + err = mt76_npu_send_msg(npu, band + 5, WLAN_FUNC_SET_WAIT_TX_BUF_SPACE_HW_BASE, dma_addr, GFP_KERNEL); if (err) { @@ -207,7 +398,7 @@ static int mt7996_npu_txd_init(struct mt7996_dev *dev, struct airoha_npu *npu) &dma_addr, GFP_KERNEL)) return -ENOMEM; - err = mt76_npu_send_msg(npu, i + 10, + err = mt76_npu_send_msg(npu, band + 10, WLAN_FUNC_SET_WAIT_TX_BUF_SPACE_HW_BASE, dma_addr, GFP_KERNEL); if (err) { @@ -223,8 +414,9 @@ static int mt7996_npu_txd_init(struct mt7996_dev *dev, struct airoha_npu *npu) static int mt7996_npu_rx_event_init(struct mt7996_dev *dev, struct airoha_npu *npu) { - struct mt76_queue *q = &dev->mt76.q_rx[MT_RXQ_MAIN_WA]; + int qid = is_mt7996(&dev->mt76) ? MT_RXQ_TXFREE_BAND0 : MT_RXQ_MAIN_WA; phys_addr_t phy_addr = dev->mt76.mmio.phy_addr; + struct mt76_queue *q = &dev->mt76.q_rx[qid]; int err; err = mt76_npu_send_msg(npu, 0, @@ -244,7 +436,8 @@ static int mt7996_npu_rx_event_init(struct mt7996_dev *dev, return err; } - phy_addr += MT_RXQ_RING_BASE(MT_RXQ_MAIN_WA) + 0x20; + phy_addr += MT_RXQ_RING_BASE(qid); + phy_addr += is_mt7996(&dev->mt76) ? 0x90 : 0x20; err = mt76_npu_send_msg(npu, 10, WLAN_FUNC_SET_WAIT_PCIE_ADDR, phy_addr, GFP_KERNEL); if (err) @@ -253,11 +446,54 @@ static int mt7996_npu_rx_event_init(struct mt7996_dev *dev, return err; } +static int mt7996_npu_set_pcie_addr(struct mt7996_dev *dev, + struct airoha_npu *npu) +{ + u32 hif1_ofs = dev->hif2 ? MT_WFDMA0_PCIE1(0) - MT_WFDMA0(0) : 0; + dma_addr_t dma_addr = dev->mt76.mmio.phy_addr; + int err; + + dma_addr += MT_RXQ_RING_BASE(MT_RXQ_RRO_BAND0) + 0x80; + err = mt76_npu_send_msg(npu, 0, WLAN_FUNC_SET_WAIT_PCIE_ADDR, + dma_addr, GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, + "failed setting NPU wlan PCIe desc addr\n"); + return err; + } + + dma_addr = dev->mt76.mmio.phy_addr + hif1_ofs; + if (is_mt7996(&dev->mt76)) { + dma_addr += MT_RXQ_RING_BASE(MT_RXQ_RRO_BAND2) + 0x60; + err = mt76_npu_send_msg(npu, 2, WLAN_FUNC_SET_WAIT_PCIE_ADDR, + dma_addr, GFP_KERNEL); + } else { + dma_addr += MT_RXQ_RING_BASE(MT_RXQ_RRO_BAND1) + 0x90; + err = mt76_npu_send_msg(npu, 1, WLAN_FUNC_SET_WAIT_PCIE_ADDR, + dma_addr, GFP_KERNEL); + } + + if (err) + dev_warn(dev->mt76.dev, + "failed setting NPU wlan PCIe desc addr\n"); + + return err; +} + static int mt7996_npu_tx_done_init(struct mt7996_dev *dev, struct airoha_npu *npu) { int err; + /* rro ring cpu idx */ + err = mt76_npu_send_msg(npu, 15, WLAN_FUNC_SET_WAIT_PCIE_ADDR, + 0, GFP_KERNEL); + if (err) { + dev_warn(dev->mt76.dev, + "failed setting NPU wlan PCIe desc addr\n"); + return err; + } + err = mt76_npu_send_msg(npu, 2, WLAN_FUNC_SET_WAIT_INODE_TXRX_REG_ADDR, 0, GFP_KERNEL); if (err) { @@ -304,7 +540,11 @@ int mt7996_npu_hw_init(struct mt7996_dev *dev) if (err) goto unlock; - err = mt7992_npu_rxd_init(dev, npu); + if (is_mt7996(&dev->mt76)) + err = mt7996_npu_rxd_init(dev, npu); + else + err = mt7992_npu_rxd_init(dev, npu); + if (err) goto unlock; @@ -316,6 +556,10 @@ int mt7996_npu_hw_init(struct mt7996_dev *dev) if (err) goto unlock; + err = mt7996_npu_set_pcie_addr(dev, npu); + if (err) + goto unlock; + err = mt7996_npu_tx_done_init(dev, npu); if (err) goto unlock; From aa6a0ded87d7dababcb2e9b23e8137131557b8fa Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 22 Jan 2026 11:39:53 +0100 Subject: [PATCH 0724/1561] wifi: mt76: mt7996: Integrate NPU in RRO session management Add NPU integration in RRO 3.0 session management. This is a preliminary patch to enable NPU offload for MT7996 (Eagle) chipset. Tested-by: Kang Yang Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260122-mt76-npu-eagle-offload-v2-9-2374614c0de6@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76.h | 10 +++++++ .../net/wireless/mediatek/mt76/mt7996/init.c | 16 +++++++++- drivers/net/wireless/mediatek/mt76/npu.c | 30 +++++++++++++++++++ 3 files changed, 55 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index d05e83ea1cac..eefc3f555f8a 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -1649,6 +1649,9 @@ void mt76_npu_txdesc_cleanup(struct mt76_queue *q, int index); int mt76_npu_net_setup_tc(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct net_device *dev, enum tc_setup_type type, void *type_data); +int mt76_npu_send_txrx_addr(struct mt76_dev *dev, int ifindex, + u32 direction, u32 i_count_addr, + u32 o_status_addr, u32 o_count_addr); #else static inline void mt76_npu_check_ppe(struct mt76_dev *dev, struct sk_buff *skb, u32 info) @@ -1707,6 +1710,13 @@ static inline int mt76_npu_net_setup_tc(struct ieee80211_hw *hw, { return -EOPNOTSUPP; } + +static inline int mt76_npu_send_txrx_addr(struct mt76_dev *dev, int ifindex, + u32 direction, u32 i_count_addr, + u32 o_status_addr, u32 o_count_addr) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_MT76_NPU */ static inline bool mt76_npu_device_active(struct mt76_dev *dev) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/init.c b/drivers/net/wireless/mediatek/mt76/mt7996/init.c index e678f06b4556..b0f0d3adbb04 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/init.c @@ -959,6 +959,12 @@ static int mt7996_wed_rro_init(struct mt7996_dev *dev) addr++; } + if (is_mt7996(&dev->mt76) && + mt76_npu_device_active(&dev->mt76)) + mt76_npu_send_txrx_addr(&dev->mt76, 0, i, + dev->wed_rro.addr_elem[i].phy_addr, + 0, 0); + #ifdef CONFIG_NET_MEDIATEK_SOC_WED if (mtk_wed_device_active(&dev->mt76.mmio.wed) && mtk_wed_get_rx_capa(&dev->mt76.mmio.wed)) { @@ -1019,6 +1025,10 @@ static int mt7996_wed_rro_init(struct mt7996_dev *dev) addr++; } + if (is_mt7996(&dev->mt76) && mt76_npu_device_active(&dev->mt76)) + mt76_npu_send_txrx_addr(&dev->mt76, 1, 0, + dev->wed_rro.session.phy_addr, 0, 0); + mt7996_rro_hw_init(dev); return mt7996_dma_rro_init(dev); @@ -1105,8 +1115,12 @@ static void mt7996_wed_rro_work(struct work_struct *work) list); list_del_init(&e->list); - if (mt76_npu_device_active(&dev->mt76)) + if (mt76_npu_device_active(&dev->mt76)) { + if (is_mt7996(&dev->mt76)) + mt76_npu_send_txrx_addr(&dev->mt76, 3, e->id, + 0, 0, 0); goto reset_session; + } for (i = 0; i < MT7996_RRO_WINDOW_MAX_LEN; i++) { void *ptr = dev->wed_rro.session.ptr; diff --git a/drivers/net/wireless/mediatek/mt76/npu.c b/drivers/net/wireless/mediatek/mt76/npu.c index 9679237f7398..bc8f2012be9d 100644 --- a/drivers/net/wireless/mediatek/mt76/npu.c +++ b/drivers/net/wireless/mediatek/mt76/npu.c @@ -390,6 +390,36 @@ int mt76_npu_net_setup_tc(struct ieee80211_hw *hw, struct ieee80211_vif *vif, } EXPORT_SYMBOL_GPL(mt76_npu_net_setup_tc); +int mt76_npu_send_txrx_addr(struct mt76_dev *dev, int ifindex, + u32 direction, u32 i_count_addr, + u32 o_status_addr, u32 o_count_addr) +{ + struct { + __le32 dir; + __le32 in_count_addr; + __le32 out_status_addr; + __le32 out_count_addr; + } info = { + .dir = cpu_to_le32(direction), + .in_count_addr = cpu_to_le32(i_count_addr), + .out_status_addr = cpu_to_le32(o_status_addr), + .out_count_addr = cpu_to_le32(o_count_addr), + }; + struct airoha_npu *npu; + int err = -ENODEV; + + rcu_read_lock(); + npu = rcu_dereference(dev->mmio.npu); + if (npu) + err = airoha_npu_wlan_send_msg(npu, ifindex, + WLAN_FUNC_SET_WAIT_INODE_TXRX_REG_ADDR, + &info, sizeof(info), GFP_ATOMIC); + rcu_read_unlock(); + + return err; +} +EXPORT_SYMBOL_GPL(mt76_npu_send_txrx_addr); + void mt76_npu_disable_irqs(struct mt76_dev *dev) { struct airoha_npu *npu; From 26c28522fa460435bd9a0dc4e05ae599f21ada6b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 22 Jan 2026 11:39:54 +0100 Subject: [PATCH 0725/1561] wifi: mt76: mt7996: Integrate MT7990 init configuration for NPU Add NPU integration in MT7996 init codebase for MT7990 chipset. This is a preliminary patch to enable NPU offload for MT7996 (Eagle) chipset. Tested-by: Kang Yang Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260122-mt76-npu-eagle-offload-v2-10-2374614c0de6@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/init.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/init.c b/drivers/net/wireless/mediatek/mt76/mt7996/init.c index b0f0d3adbb04..b76bd324a927 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/init.c @@ -607,7 +607,7 @@ static void mt7996_mac_init_basic_rates(struct mt7996_dev *dev) void mt7996_mac_init(struct mt7996_dev *dev) { #define HIF_TXD_V2_1 0x21 - int i; + int i, rx_path_type; mt76_clear(dev, MT_MDP_DCR2, MT_MDP_DCR2_RX_TRANS_SHORT); @@ -621,11 +621,16 @@ void mt7996_mac_init(struct mt7996_dev *dev) } /* rro module init */ - if (dev->hif2) + if (dev->hif2) { + if (mt76_npu_device_active(&dev->mt76)) + rx_path_type = is_mt7996(&dev->mt76) ? 6 : 8; + else + rx_path_type = is_mt7996(&dev->mt76) ? 2 : 7; mt7996_mcu_set_rro(dev, UNI_RRO_SET_PLATFORM_TYPE, - is_mt7996(&dev->mt76) ? 2 : 7); - else + rx_path_type); + } else { mt7996_mcu_set_rro(dev, UNI_RRO_SET_PLATFORM_TYPE, 0); + } if (mt7996_has_hwrro(dev)) { u16 timeout; From cd7951f242a7e4114de8f41804d708f5b5079d53 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 22 Jan 2026 11:39:55 +0100 Subject: [PATCH 0726/1561] wifi: mt76: mt7996: Integrate MT7990 dma configuration for NPU Add NPU integration in MT7996 dma codebase for MT7990 chipset. This is a preliminary patch to enable NPU offload for MT7996 (Eagle) chipset. Tested-by: Kang Yang Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260122-mt76-npu-eagle-offload-v2-11-2374614c0de6@kernel.org Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7996/dma.c | 132 ++++++++++++------ 1 file changed, 86 insertions(+), 46 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/dma.c b/drivers/net/wireless/mediatek/mt76/mt7996/dma.c index 274b273df1ee..07212d93bc62 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/dma.c @@ -128,15 +128,27 @@ static void mt7996_dma_config(struct mt7996_dev *dev) /* data tx queue */ if (is_mt7996(&dev->mt76)) { - TXQ_CONFIG(0, WFDMA0, MT_INT_TX_DONE_BAND0, MT7996_TXQ_BAND0); if (dev->hif2) { - /* default bn1:ring19 bn2:ring21 */ - TXQ_CONFIG(1, WFDMA0, MT_INT_TX_DONE_BAND1, - MT7996_TXQ_BAND1); - TXQ_CONFIG(2, WFDMA0, MT_INT_TX_DONE_BAND2, - MT7996_TXQ_BAND2); + if (mt76_npu_device_active(&dev->mt76)) { + TXQ_CONFIG(0, WFDMA0, MT_INT_TX_DONE_BAND2, + MT7996_TXQ_BAND2); + TXQ_CONFIG(1, WFDMA0, MT_INT_TX_DONE_BAND0, + MT7996_TXQ_BAND0); + TXQ_CONFIG(2, WFDMA0, MT_INT_TX_DONE_BAND1, + MT7996_TXQ_BAND1); + } else { + /* default bn1:ring19 bn2:ring21 */ + TXQ_CONFIG(0, WFDMA0, MT_INT_TX_DONE_BAND0, + MT7996_TXQ_BAND0); + TXQ_CONFIG(1, WFDMA0, MT_INT_TX_DONE_BAND1, + MT7996_TXQ_BAND1); + TXQ_CONFIG(2, WFDMA0, MT_INT_TX_DONE_BAND2, + MT7996_TXQ_BAND2); + } } else { /* single pcie bn0/1:ring18 bn2:ring19 */ + TXQ_CONFIG(0, WFDMA0, MT_INT_TX_DONE_BAND0, + MT7996_TXQ_BAND0); TXQ_CONFIG(2, WFDMA0, MT_INT_TX_DONE_BAND1, MT7996_TXQ_BAND1); } @@ -350,6 +362,9 @@ void mt7996_dma_start(struct mt7996_dev *dev, bool reset, bool wed_reset) if (!mt7996_has_wa(dev) || mt76_npu_device_active(&dev->mt76)) irq_mask &= ~(MT_INT_RX(MT_RXQ_MAIN_WA) | MT_INT_RX(MT_RXQ_BAND1_WA)); + if (is_mt7996(&dev->mt76) && mt76_npu_device_active(&dev->mt76)) + irq_mask &= ~(MT_INT_RX(MT_RXQ_TXFREE_BAND0) | + MT_INT_RX(MT_RXQ_MSDU_PAGE_BAND2)); irq_mask = reset ? MT_INT_MCU_CMD : irq_mask; mt7996_irq_enable(dev, irq_mask); @@ -430,39 +445,48 @@ static void mt7996_dma_enable(struct mt7996_dev *dev, bool reset) MT_WFDMA_HOST_CONFIG_BAND1_PCIE1 | MT_WFDMA_HOST_CONFIG_BAND2_PCIE1); - if (is_mt7996(&dev->mt76)) - mt76_set(dev, MT_WFDMA_HOST_CONFIG, - MT_WFDMA_HOST_CONFIG_BAND2_PCIE1); - else + if (is_mt7996(&dev->mt76)) { + if (mt76_npu_device_active(&dev->mt76)) + mt76_set(dev, MT_WFDMA_HOST_CONFIG, + MT_WFDMA_HOST_CONFIG_BAND0_PCIE1); + else + mt76_set(dev, MT_WFDMA_HOST_CONFIG, + MT_WFDMA_HOST_CONFIG_BAND2_PCIE1); + } else { mt76_set(dev, MT_WFDMA_HOST_CONFIG, MT_WFDMA_HOST_CONFIG_BAND1_PCIE1); + } /* AXI read outstanding number */ mt76_rmw(dev, MT_WFDMA_AXI_R2A_CTRL, MT_WFDMA_AXI_R2A_CTRL_OUTSTAND_MASK, 0x14); - if (dev->hif2->speed < PCIE_SPEED_5_0GT || - (dev->hif2->speed == PCIE_SPEED_5_0GT && - dev->hif2->width < PCIE_LNK_X2)) { - mt76_rmw(dev, WF_WFDMA0_GLO_CFG_EXT0 + hif1_ofs, - WF_WFDMA0_GLO_CFG_EXT0_OUTSTAND_MASK, - FIELD_PREP(WF_WFDMA0_GLO_CFG_EXT0_OUTSTAND_MASK, - 0x1)); - mt76_rmw(dev, MT_WFDMA_AXI_R2A_CTRL2, - MT_WFDMA_AXI_R2A_CTRL2_OUTSTAND_MASK, - FIELD_PREP(MT_WFDMA_AXI_R2A_CTRL2_OUTSTAND_MASK, - 0x1)); - } else if (dev->hif2->speed < PCIE_SPEED_8_0GT || - (dev->hif2->speed == PCIE_SPEED_8_0GT && - dev->hif2->width < PCIE_LNK_X2)) { - mt76_rmw(dev, WF_WFDMA0_GLO_CFG_EXT0 + hif1_ofs, - WF_WFDMA0_GLO_CFG_EXT0_OUTSTAND_MASK, - FIELD_PREP(WF_WFDMA0_GLO_CFG_EXT0_OUTSTAND_MASK, - 0x2)); - mt76_rmw(dev, MT_WFDMA_AXI_R2A_CTRL2, - MT_WFDMA_AXI_R2A_CTRL2_OUTSTAND_MASK, - FIELD_PREP(MT_WFDMA_AXI_R2A_CTRL2_OUTSTAND_MASK, - 0x2)); + if (!is_mt7996(&dev->mt76) || + !mt76_npu_device_active(&dev->mt76)) { + if (dev->hif2->speed < PCIE_SPEED_5_0GT || + (dev->hif2->speed == PCIE_SPEED_5_0GT && + dev->hif2->width < PCIE_LNK_X2)) { + mt76_rmw(dev, + WF_WFDMA0_GLO_CFG_EXT0 + hif1_ofs, + WF_WFDMA0_GLO_CFG_EXT0_OUTSTAND_MASK, + FIELD_PREP(WF_WFDMA0_GLO_CFG_EXT0_OUTSTAND_MASK, + 0x1)); + mt76_rmw(dev, MT_WFDMA_AXI_R2A_CTRL2, + MT_WFDMA_AXI_R2A_CTRL2_OUTSTAND_MASK, + FIELD_PREP(MT_WFDMA_AXI_R2A_CTRL2_OUTSTAND_MASK, + 0x1)); + } else if (dev->hif2->speed < PCIE_SPEED_8_0GT || + (dev->hif2->speed == PCIE_SPEED_8_0GT && + dev->hif2->width < PCIE_LNK_X2)) { + mt76_rmw(dev, WF_WFDMA0_GLO_CFG_EXT0 + hif1_ofs, + WF_WFDMA0_GLO_CFG_EXT0_OUTSTAND_MASK, + FIELD_PREP(WF_WFDMA0_GLO_CFG_EXT0_OUTSTAND_MASK, + 0x2)); + mt76_rmw(dev, MT_WFDMA_AXI_R2A_CTRL2, + MT_WFDMA_AXI_R2A_CTRL2_OUTSTAND_MASK, + FIELD_PREP(MT_WFDMA_AXI_R2A_CTRL2_OUTSTAND_MASK, + 0x2)); + } } /* WFDMA rx threshold */ @@ -497,7 +521,7 @@ static void mt7996_dma_enable(struct mt7996_dev *dev, bool reset) int mt7996_dma_rro_init(struct mt7996_dev *dev) { struct mt76_dev *mdev = &dev->mt76; - u32 irq_mask; + u32 irq_mask, size; int ret; if (dev->mt76.hwrro_mode == MT76_HWRRO_V3_1) { @@ -545,10 +569,12 @@ int mt7996_dma_rro_init(struct mt7996_dev *dev) if (mtk_wed_device_active(&mdev->mmio.wed) && mtk_wed_get_rx_capa(&mdev->mmio.wed)) mdev->q_rx[MT_RXQ_MSDU_PAGE_BAND0].wed = &mdev->mmio.wed; + + size = is_mt7996(mdev) && mt76_npu_device_active(mdev) + ? MT7996_NPU_RX_RING_SIZE / 4 : MT7996_RX_RING_SIZE; ret = mt76_queue_alloc(dev, &mdev->q_rx[MT_RXQ_MSDU_PAGE_BAND0], MT_RXQ_ID(MT_RXQ_MSDU_PAGE_BAND0), - MT7996_RX_RING_SIZE, - MT7996_RX_MSDU_PAGE_SIZE, + size, MT7996_RX_MSDU_PAGE_SIZE, MT_RXQ_RING_BASE(MT_RXQ_MSDU_PAGE_BAND0)); if (ret) return ret; @@ -560,10 +586,12 @@ int mt7996_dma_rro_init(struct mt7996_dev *dev) if (mtk_wed_device_active(&mdev->mmio.wed) && mtk_wed_get_rx_capa(&mdev->mmio.wed)) mdev->q_rx[MT_RXQ_MSDU_PAGE_BAND1].wed = &mdev->mmio.wed; + + size = is_mt7996(mdev) && mt76_npu_device_active(mdev) + ? MT7996_NPU_RX_RING_SIZE / 2 : MT7996_RX_RING_SIZE; ret = mt76_queue_alloc(dev, &mdev->q_rx[MT_RXQ_MSDU_PAGE_BAND1], MT_RXQ_ID(MT_RXQ_MSDU_PAGE_BAND1), - MT7996_RX_RING_SIZE, - MT7996_RX_MSDU_PAGE_SIZE, + size, MT7996_RX_MSDU_PAGE_SIZE, MT_RXQ_RING_BASE(MT_RXQ_MSDU_PAGE_BAND1)); if (ret) return ret; @@ -576,10 +604,12 @@ int mt7996_dma_rro_init(struct mt7996_dev *dev) if (mtk_wed_device_active(&mdev->mmio.wed) && mtk_wed_get_rx_capa(&mdev->mmio.wed)) mdev->q_rx[MT_RXQ_MSDU_PAGE_BAND2].wed = &mdev->mmio.wed; + + size = is_mt7996(mdev) && mt76_npu_device_active(mdev) + ? MT7996_NPU_RX_RING_SIZE : MT7996_RX_RING_SIZE; ret = mt76_queue_alloc(dev, &mdev->q_rx[MT_RXQ_MSDU_PAGE_BAND2], MT_RXQ_ID(MT_RXQ_MSDU_PAGE_BAND2), - MT7996_RX_RING_SIZE, - MT7996_RX_MSDU_PAGE_SIZE, + size, MT7996_RX_MSDU_PAGE_SIZE, MT_RXQ_RING_BASE(MT_RXQ_MSDU_PAGE_BAND2)); if (ret) return ret; @@ -642,11 +672,16 @@ int mt7996_dma_init(struct mt7996_dev *dev) mt7996_dma_disable(dev, true); /* init tx queue */ - ret = mt7996_init_tx_queues(&dev->phy, - MT_TXQ_ID(dev->mphy.band_idx), - MT7996_TX_RING_SIZE, - MT_TXQ_RING_BASE(0), - wed); + if (is_mt7996(&dev->mt76) && mt76_npu_device_active(&dev->mt76)) + ret = mt7996_init_tx_queues(&dev->phy, MT_TXQ_ID(0), + MT7996_NPU_TX_RING_SIZE, + MT_TXQ_RING_BASE(0) + hif1_ofs, + NULL); + else + ret = mt7996_init_tx_queues(&dev->phy, + MT_TXQ_ID(dev->mphy.band_idx), + MT7996_TX_RING_SIZE, + MT_TXQ_RING_BASE(0), wed); if (ret) return ret; @@ -859,16 +894,21 @@ int mt7996_dma_init(struct mt7996_dev *dev) } if (mt7996_band_valid(dev, MT_BAND2)) { + u32 size; + /* rx rro data queue for band2 */ dev->mt76.q_rx[MT_RXQ_RRO_BAND2].flags = MT_WED_RRO_Q_DATA(1) | MT_QFLAG_WED_RRO_EN; if (mtk_wed_device_active(wed) && mtk_wed_get_rx_capa(wed)) dev->mt76.q_rx[MT_RXQ_RRO_BAND2].wed = wed; + + size = is_mt7996(&dev->mt76) && + mt76_npu_device_active(&dev->mt76) + ? MT7996_NPU_RX_RING_SIZE : MT7996_RX_RING_SIZE; ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_RRO_BAND2], MT_RXQ_ID(MT_RXQ_RRO_BAND2), - MT7996_RX_RING_SIZE, - MT7996_RX_BUF_SIZE, + size, MT7996_RX_BUF_SIZE, MT_RXQ_RING_BASE(MT_RXQ_RRO_BAND2) + hif1_ofs); if (ret) return ret; From 93e2491470d34d2d45b240123da0267d6de68c71 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 22 Jan 2026 11:39:56 +0100 Subject: [PATCH 0727/1561] wifi: mt76: mt7996: Add __mt7996_npu_hw_init routine Introduce __mt7996_npu_hw_init utility routine in order to run it holding mt76 mutex and move NPU hw re-initialization before restarting the NAPIs during device reset. Tested-by: Kang Yang Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260122-mt76-npu-eagle-offload-v2-12-2374614c0de6@kernel.org Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7996/mac.c | 4 +-- .../wireless/mediatek/mt76/mt7996/mt7996.h | 6 ++++ .../net/wireless/mediatek/mt76/mt7996/npu.c | 31 ++++++++++++------- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index 00c6045622e3..7bee97ff71df 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -2569,6 +2569,8 @@ void mt7996_mac_reset_work(struct work_struct *work) MT_INT_TX_RX_DONE_EXT); } + __mt7996_npu_hw_init(dev); + clear_bit(MT76_MCU_RESET, &dev->mphy.state); mt7996_for_each_phy(dev, phy) clear_bit(MT76_RESET, &phy->mt76->state); @@ -2598,8 +2600,6 @@ void mt7996_mac_reset_work(struct work_struct *work) mutex_unlock(&dev->mt76.mutex); - mt7996_npu_hw_init(dev); - mt7996_for_each_phy(dev, phy) ieee80211_queue_delayed_work(hw, &phy->mt76->mac_work, MT7996_WATCHDOG_TIME); diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index c9c506e1d6ed..dfa338e8508b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -889,10 +889,16 @@ int mt7996_mtk_init_debugfs(struct mt7996_phy *phy, struct dentry *dir); int mt7996_dma_rro_init(struct mt7996_dev *dev); #ifdef CONFIG_MT7996_NPU +int __mt7996_npu_hw_init(struct mt7996_dev *dev); int mt7996_npu_hw_init(struct mt7996_dev *dev); int mt7996_npu_hw_stop(struct mt7996_dev *dev); int mt7996_npu_rx_queues_init(struct mt7996_dev *dev); #else +static inline int __mt7996_npu_hw_init(struct mt7996_dev *dev) +{ + return 0; +} + static inline int mt7996_npu_hw_init(struct mt7996_dev *dev) { return 0; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/npu.c b/drivers/net/wireless/mediatek/mt76/mt7996/npu.c index c3307bbbb547..0085eddc88bc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/npu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/npu.c @@ -525,20 +525,18 @@ int mt7996_npu_rx_queues_init(struct mt7996_dev *dev) &dev->mt76.q_rx[MT_RXQ_NPU1]); } -int mt7996_npu_hw_init(struct mt7996_dev *dev) +int __mt7996_npu_hw_init(struct mt7996_dev *dev) { struct airoha_npu *npu; - int i, err = 0; - - mutex_lock(&dev->mt76.mutex); + int i, err; npu = rcu_dereference_protected(dev->mt76.mmio.npu, &dev->mt76.mutex); if (!npu) - goto unlock; + return 0; err = mt7996_npu_offload_init(dev, npu); if (err) - goto unlock; + return err; if (is_mt7996(&dev->mt76)) err = mt7996_npu_rxd_init(dev, npu); @@ -546,27 +544,36 @@ int mt7996_npu_hw_init(struct mt7996_dev *dev) err = mt7992_npu_rxd_init(dev, npu); if (err) - goto unlock; + return err; err = mt7996_npu_txd_init(dev, npu); if (err) - goto unlock; + return err; err = mt7996_npu_rx_event_init(dev, npu); if (err) - goto unlock; + return err; err = mt7996_npu_set_pcie_addr(dev, npu); if (err) - goto unlock; + return err; err = mt7996_npu_tx_done_init(dev, npu); if (err) - goto unlock; + return err; for (i = MT_RXQ_NPU0; i <= MT_RXQ_NPU1; i++) airoha_npu_wlan_enable_irq(npu, i - MT_RXQ_NPU0); -unlock: + + return 0; +} + +int mt7996_npu_hw_init(struct mt7996_dev *dev) +{ + int err; + + mutex_lock(&dev->mt76.mutex); + err = __mt7996_npu_hw_init(dev); mutex_unlock(&dev->mt76.mutex); return err; From ae8ee98014bab9b6b1b782bb19cf47317ea0499a Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 22 Jan 2026 11:39:57 +0100 Subject: [PATCH 0728/1561] wifi: mt76: mt7996: Move RRO dma start in a dedicated routine This is a preliminary patch to properly enable NPU offloading for MT7996 chipset since NPU initialization must be completed before kicking rx queues. Tested-by: Kang Yang Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260122-mt76-npu-eagle-offload-v2-13-2374614c0de6@kernel.org Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7996/dma.c | 73 ++++++++++--------- .../net/wireless/mediatek/mt76/mt7996/init.c | 2 + .../wireless/mediatek/mt76/mt7996/mt7996.h | 1 + 3 files changed, 43 insertions(+), 33 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/dma.c b/drivers/net/wireless/mediatek/mt76/mt7996/dma.c index 07212d93bc62..1a4f5f5b2a84 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/dma.c @@ -521,7 +521,7 @@ static void mt7996_dma_enable(struct mt7996_dev *dev, bool reset) int mt7996_dma_rro_init(struct mt7996_dev *dev) { struct mt76_dev *mdev = &dev->mt76; - u32 irq_mask, size; + u32 size; int ret; if (dev->mt76.hwrro_mode == MT76_HWRRO_V3_1) { @@ -548,7 +548,8 @@ int mt7996_dma_rro_init(struct mt7996_dev *dev) mt76_queue_reset(dev, &mdev->q_rx[MT_RXQ_RRO_RXDMAD_C], true); } - goto start_hw_rro; + + return 0; } /* ind cmd */ @@ -615,43 +616,49 @@ int mt7996_dma_rro_init(struct mt7996_dev *dev) return ret; } -start_hw_rro: - if (mtk_wed_device_active(&mdev->mmio.wed)) { - irq_mask = mdev->mmio.irqmask | + return 0; +} + +void mt7996_dma_rro_start(struct mt7996_dev *dev) +{ + u32 irq_mask; + + if (mtk_wed_device_active(&dev->mt76.mmio.wed)) { + irq_mask = dev->mt76.mmio.irqmask | MT_INT_TX_DONE_BAND2; mt76_wr(dev, MT_INT_MASK_CSR, irq_mask); - mtk_wed_device_start_hw_rro(&mdev->mmio.wed, irq_mask, false); + mtk_wed_device_start_hw_rro(&dev->mt76.mmio.wed, irq_mask, + false); mt7996_irq_enable(dev, irq_mask); - } else { - if (is_mt7996(&dev->mt76)) { - mt76_queue_rx_init(dev, MT_RXQ_MSDU_PAGE_BAND1, - mt76_dma_rx_poll); - mt76_queue_rx_init(dev, MT_RXQ_MSDU_PAGE_BAND2, - mt76_dma_rx_poll); - mt76_queue_rx_init(dev, MT_RXQ_RRO_BAND2, - mt76_dma_rx_poll); - } else { - mt76_queue_rx_init(dev, MT_RXQ_RRO_BAND1, - mt76_dma_rx_poll); - } - - mt76_queue_rx_init(dev, MT_RXQ_RRO_BAND0, mt76_dma_rx_poll); - if (dev->mt76.hwrro_mode == MT76_HWRRO_V3_1) { - mt76_queue_rx_init(dev, MT_RXQ_RRO_RXDMAD_C, - mt76_dma_rx_poll); - } else { - mt76_queue_rx_init(dev, MT_RXQ_RRO_IND, - mt76_dma_rx_poll); - mt76_queue_rx_init(dev, MT_RXQ_MSDU_PAGE_BAND0, - mt76_dma_rx_poll); - } - - if (!mt76_npu_device_active(&dev->mt76)) - mt7996_irq_enable(dev, MT_INT_RRO_RX_DONE); + return; } - return 0; + if (is_mt7996(&dev->mt76)) { + mt76_queue_rx_init(dev, MT_RXQ_MSDU_PAGE_BAND1, + mt76_dma_rx_poll); + mt76_queue_rx_init(dev, MT_RXQ_MSDU_PAGE_BAND2, + mt76_dma_rx_poll); + mt76_queue_rx_init(dev, MT_RXQ_RRO_BAND2, + mt76_dma_rx_poll); + } else { + mt76_queue_rx_init(dev, MT_RXQ_RRO_BAND1, + mt76_dma_rx_poll); + } + + mt76_queue_rx_init(dev, MT_RXQ_RRO_BAND0, mt76_dma_rx_poll); + if (dev->mt76.hwrro_mode == MT76_HWRRO_V3_1) { + mt76_queue_rx_init(dev, MT_RXQ_RRO_RXDMAD_C, + mt76_dma_rx_poll); + } else { + mt76_queue_rx_init(dev, MT_RXQ_RRO_IND, + mt76_dma_rx_poll); + mt76_queue_rx_init(dev, MT_RXQ_MSDU_PAGE_BAND0, + mt76_dma_rx_poll); + } + + if (!mt76_npu_device_active(&dev->mt76)) + mt7996_irq_enable(dev, MT_INT_RRO_RX_DONE); } int mt7996_dma_init(struct mt7996_dev *dev) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/init.c b/drivers/net/wireless/mediatek/mt76/mt7996/init.c index b76bd324a927..8aa9807b5087 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/init.c @@ -1739,6 +1739,8 @@ int mt7996_register_device(struct mt7996_dev *dev) if (ret) return ret; + mt7996_dma_rro_start(dev); + ret = mt76_register_device(&dev->mt76, true, mt76_rates, ARRAY_SIZE(mt76_rates)); if (ret) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index dfa338e8508b..5b2aa7b2fa4f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -887,6 +887,7 @@ int mt7996_mtk_init_debugfs(struct mt7996_phy *phy, struct dentry *dir); #endif int mt7996_dma_rro_init(struct mt7996_dev *dev); +void mt7996_dma_rro_start(struct mt7996_dev *dev); #ifdef CONFIG_MT7996_NPU int __mt7996_npu_hw_init(struct mt7996_dev *dev); From 966c44ba73097a81fb47e9c6cac71e816e9f5084 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 22 Jan 2026 11:39:58 +0100 Subject: [PATCH 0729/1561] wifi: mt76: Do not reset idx for NPU tx queues during reset Do not run reset_q callaback with reset_idx set to true for NPU Tx queues. This is a preliminary patch to properly manage reset procedure when NPU offloading is enabled. Tested-by: Kang Yang Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260122-mt76-npu-eagle-offload-v2-14-2374614c0de6@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/dma.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/dma.h b/drivers/net/wireless/mediatek/mt76/dma.h index 4a63de6c5bf5..2a0226c83f3c 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.h +++ b/drivers/net/wireless/mediatek/mt76/dma.h @@ -174,7 +174,9 @@ void mt76_dma_queue_reset(struct mt76_dev *dev, struct mt76_queue *q, static inline void mt76_dma_reset_tx_queue(struct mt76_dev *dev, struct mt76_queue *q) { - dev->queue_ops->reset_q(dev, q, true); + bool reset_idx = q && !mt76_queue_is_npu_tx(q); + + dev->queue_ops->reset_q(dev, q, reset_idx); if (mtk_wed_device_active(&dev->mmio.wed)) mt76_wed_dma_setup(dev, q, true); } From 850856c4777c80348507da1543e58006ff0063d2 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 22 Jan 2026 11:39:59 +0100 Subject: [PATCH 0730/1561] wifi: mt76: mt7996: Do not schedule RRO and TxFree queues during reset for NPU This is a preliminary patch to properly manage reset procedure when NPU offloading is enabled. Tested-by: Kang Yang Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260122-mt76-npu-eagle-offload-v2-15-2374614c0de6@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/dma.c | 11 +++++++++++ drivers/net/wireless/mediatek/mt76/mt76.h | 10 ++++++++++ drivers/net/wireless/mediatek/mt76/mt7996/dma.c | 5 +++++ drivers/net/wireless/mediatek/mt76/mt7996/mac.c | 14 ++++++++++++++ 4 files changed, 40 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index f5c6bb94ccbb..2d133ace7c33 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -881,6 +881,10 @@ mt76_dma_rx_cleanup(struct mt76_dev *dev, struct mt76_queue *q) mt76_queue_is_wed_rro(q)) continue; + if (mt76_npu_device_active(dev) && + mt76_queue_is_wed_rro(q)) + continue; + if (!mt76_queue_is_wed_rro_rxdmad_c(q) && !mt76_queue_is_wed_rro_ind(q)) mt76_put_page_pool_buf(buf, false); @@ -923,6 +927,13 @@ mt76_dma_rx_reset(struct mt76_dev *dev, enum mt76_rxq_id qid) mt76_queue_is_wed_rro(q)) return; + if (mt76_npu_device_active(dev) && + mt76_queue_is_wed_rro(q)) + return; + + if (mt76_queue_is_npu_txfree(q)) + return; + mt76_dma_sync_idx(dev, q); if (mt76_queue_is_npu(q)) mt76_npu_fill_rx_queue(dev, q); diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index eefc3f555f8a..5e68efc367fc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -55,6 +55,8 @@ FIELD_PREP(MT_QFLAG_WED_RING, _n)) #define MT_NPU_Q_TX(_n) __MT_NPU_Q(MT76_WED_Q_TX, _n) #define MT_NPU_Q_RX(_n) __MT_NPU_Q(MT76_WED_Q_RX, _n) +#define MT_NPU_Q_TXFREE(_n) (FIELD_PREP(MT_QFLAG_WED_TYPE, MT76_WED_Q_TXFREE) | \ + FIELD_PREP(MT_QFLAG_WED_RING, _n)) struct mt76_dev; struct mt76_phy; @@ -2003,6 +2005,14 @@ static inline bool mt76_queue_is_npu_rx(struct mt76_queue *q) FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) == MT76_WED_Q_RX; } +static inline bool mt76_queue_is_npu_txfree(struct mt76_queue *q) +{ + if (q->flags & MT_QFLAG_WED) + return false; + + return FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) == MT76_WED_Q_TXFREE; +} + struct mt76_txwi_cache * mt76_token_release(struct mt76_dev *dev, int token, bool *wake); int mt76_token_consume(struct mt76_dev *dev, struct mt76_txwi_cache **ptxwi); diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/dma.c b/drivers/net/wireless/mediatek/mt76/mt7996/dma.c index 1a4f5f5b2a84..8f5d297dafce 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/dma.c @@ -756,6 +756,9 @@ int mt7996_dma_init(struct mt7996_dev *dev) (is_mt7992(&dev->mt76)))) { dev->mt76.q_rx[MT_RXQ_MAIN_WA].flags = MT_WED_Q_TXFREE; dev->mt76.q_rx[MT_RXQ_MAIN_WA].wed = wed; + } else if (is_mt7992(&dev->mt76) && + mt76_npu_device_active(&dev->mt76)) { + dev->mt76.q_rx[MT_RXQ_MAIN_WA].flags = MT_NPU_Q_TXFREE(0); } if (mt7996_has_wa(dev)) { @@ -888,6 +891,8 @@ int mt7996_dma_init(struct mt7996_dev *dev) /* tx free notify event from WA for band0 */ dev->mt76.q_rx[MT_RXQ_TXFREE_BAND0].flags = MT_WED_Q_TXFREE; dev->mt76.q_rx[MT_RXQ_TXFREE_BAND0].wed = wed; + } else if (mt76_npu_device_active(&dev->mt76)) { + dev->mt76.q_rx[MT_RXQ_TXFREE_BAND0].flags = MT_NPU_Q_TXFREE(0); } ret = mt76_queue_alloc(dev, diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index 7bee97ff71df..c8cfc343d37b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -2525,6 +2525,13 @@ void mt7996_mac_reset_work(struct work_struct *work) mt76_queue_is_wed_rro(&dev->mt76.q_rx[i])) continue; + if (mt76_npu_device_active(&dev->mt76) && + mt76_queue_is_wed_rro(&dev->mt76.q_rx[i])) + continue; + + if (mt76_queue_is_npu_txfree(&dev->mt76.q_rx[i])) + continue; + napi_disable(&dev->mt76.napi[i]); } napi_disable(&dev->mt76.tx_napi); @@ -2580,6 +2587,13 @@ void mt7996_mac_reset_work(struct work_struct *work) mt76_queue_is_wed_rro(&dev->mt76.q_rx[i])) continue; + if (mt76_npu_device_active(&dev->mt76) && + mt76_queue_is_wed_rro(&dev->mt76.q_rx[i])) + continue; + + if (mt76_queue_is_npu_txfree(&dev->mt76.q_rx[i])) + continue; + napi_enable(&dev->mt76.napi[i]); local_bh_disable(); napi_schedule(&dev->mt76.napi[i]); From c2efd5fe154686e52fff7321c97b2d21c569c36e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 22 Jan 2026 11:40:00 +0100 Subject: [PATCH 0731/1561] wifi: mt76: mt7996: Store DMA mapped buffer addresses in mt7996_npu_hw_init() In order to not always reallocate them during NPU reset, store the DMA mapped buffer addresses allocated by mt7996_npu_hw_init routine in mt7996 structure. Tested-by: Kang Yang Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260122-mt76-npu-eagle-offload-v2-16-2374614c0de6@kernel.org Signed-off-by: Felix Fietkau --- .../wireless/mediatek/mt76/mt7996/mt7996.h | 4 ++ .../net/wireless/mediatek/mt76/mt7996/npu.c | 58 +++++++++++-------- 2 files changed, 39 insertions(+), 23 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index 5b2aa7b2fa4f..3ff730e36fa6 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -31,6 +31,8 @@ #define MT7996_RX_MCU_RING_SIZE_WA 1024 #define MT7996_NPU_TX_RING_SIZE 1024 #define MT7996_NPU_RX_RING_SIZE 1024 +#define MT7996_NPU_TXD_SIZE 3 + /* scatter-gather of mcu event is not supported in connac3 */ #define MT7996_RX_MCU_BUF_SIZE (2048 + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) @@ -476,6 +478,8 @@ struct mt7996_dev { struct list_head page_map[MT7996_RRO_MSDU_PG_HASH_SIZE]; } wed_rro; + dma_addr_t npu_txd_addr[2 * MT7996_NPU_TXD_SIZE]; + bool ibf; u8 fw_debug_wm; u8 fw_debug_wa; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/npu.c b/drivers/net/wireless/mediatek/mt76/mt7996/npu.c index 0085eddc88bc..b8006b8729a1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/npu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/npu.c @@ -344,12 +344,14 @@ static int mt7996_npu_txd_init(struct mt7996_dev *dev, struct airoha_npu *npu) MT_BAND0, is_mt7996(&dev->mt76) ? MT_BAND2 : MT_BAND1, }; - int i; + int i, index = 0; + + BUILD_BUG_ON(ARRAY_SIZE(band_list) * 3 != + ARRAY_SIZE(dev->npu_txd_addr)); for (i = 0; i < ARRAY_SIZE(band_list); i++) { int err, band = band_list[i], phy_id; - dma_addr_t dma_addr; - u32 val, size; + u32 val; err = mt76_npu_get_msg(npu, band + 5, WLAN_FUNC_GET_WAIT_RXDESC_BASE, @@ -364,43 +366,29 @@ static int mt7996_npu_txd_init(struct mt7996_dev *dev, struct airoha_npu *npu) : band; writel(val, &dev->mt76.phys[phy_id]->q_tx[0]->regs->desc_base); - size = is_mt7996(&dev->mt76) ? band == MT_BAND2 - ? MT7996_NPU_TX_RING_SIZE - : MT7996_NPU_RX_RING_SIZE / 2 - : MT7996_TX_RING_SIZE; - if (!dmam_alloc_coherent(dev->mt76.dma_dev, 256 * size, - &dma_addr, GFP_KERNEL)) - return -ENOMEM; - err = mt76_npu_send_msg(npu, band, WLAN_FUNC_SET_WAIT_TX_BUF_SPACE_HW_BASE, - dma_addr, GFP_KERNEL); + dev->npu_txd_addr[index++], GFP_KERNEL); if (err) { dev_warn(dev->mt76.dev, "failed setting NPU wlan queue buf addr\n"); return err; } - if (!dmam_alloc_coherent(dev->mt76.dma_dev, 256 * size, - &dma_addr, GFP_KERNEL)) - return -ENOMEM; - err = mt76_npu_send_msg(npu, band + 5, WLAN_FUNC_SET_WAIT_TX_BUF_SPACE_HW_BASE, - dma_addr, GFP_KERNEL); + dev->npu_txd_addr[index++], + GFP_KERNEL); if (err) { dev_warn(dev->mt76.dev, "failed setting NPU wlan tx buf addr\n"); return err; } - if (!dmam_alloc_coherent(dev->mt76.dma_dev, 256 * 1024, - &dma_addr, GFP_KERNEL)) - return -ENOMEM; - err = mt76_npu_send_msg(npu, band + 10, WLAN_FUNC_SET_WAIT_TX_BUF_SPACE_HW_BASE, - dma_addr, GFP_KERNEL); + dev->npu_txd_addr[index++], + GFP_KERNEL); if (err) { dev_warn(dev->mt76.dev, "failed setting NPU wlan tx buf base\n"); @@ -570,7 +558,31 @@ int __mt7996_npu_hw_init(struct mt7996_dev *dev) int mt7996_npu_hw_init(struct mt7996_dev *dev) { - int err; + int i, err; + + BUILD_BUG_ON(ARRAY_SIZE(dev->npu_txd_addr) % 3); + + for (i = 0; i < ARRAY_SIZE(dev->npu_txd_addr); i += 3) { + int band = i && is_mt7996(&dev->mt76) ? MT_BAND2 : MT_BAND0; + u32 size = is_mt7996(&dev->mt76) ? band == MT_BAND2 + ? MT7996_NPU_TX_RING_SIZE + : MT7996_NPU_RX_RING_SIZE / 2 + : MT7996_TX_RING_SIZE; + + if (!dmam_alloc_coherent(dev->mt76.dma_dev, 256 * size, + &dev->npu_txd_addr[i], GFP_KERNEL)) + return -ENOMEM; + + if (!dmam_alloc_coherent(dev->mt76.dma_dev, 256 * size, + &dev->npu_txd_addr[i + 1], + GFP_KERNEL)) + return -ENOMEM; + + if (!dmam_alloc_coherent(dev->mt76.dma_dev, 256 * 1024, + &dev->npu_txd_addr[i + 2], + GFP_KERNEL)) + return -ENOMEM; + } mutex_lock(&dev->mt76.mutex); err = __mt7996_npu_hw_init(dev); From 53afca4329af885fe08703b93e71cb5589835f27 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 22 Jan 2026 11:40:01 +0100 Subject: [PATCH 0732/1561] wifi: mt76: Enable NPU support for MT7996 devices Enable NPU offloading for MT7990 chipset. Tested-by: Kang Yang Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260122-mt76-npu-eagle-offload-v2-17-2374614c0de6@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/npu.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/npu.c b/drivers/net/wireless/mediatek/mt76/npu.c index bc8f2012be9d..c4c7c0af6321 100644 --- a/drivers/net/wireless/mediatek/mt76/npu.c +++ b/drivers/net/wireless/mediatek/mt76/npu.c @@ -450,10 +450,6 @@ int mt76_npu_init(struct mt76_dev *dev, phys_addr_t phy_addr, int type) struct airoha_npu *npu; int err = 0; - /* NPU offloading is only supported by MT7992 */ - if (!is_mt7992(dev)) - return 0; - mutex_lock(&dev->mutex); npu = airoha_npu_get(dev->dev); @@ -486,7 +482,7 @@ int mt76_npu_init(struct mt76_dev *dev, phys_addr_t phy_addr, int type) dev->mmio.phy_addr = phy_addr; dev->mmio.npu_type = type; /* NPU offloading requires HW-RRO for RX packet reordering. */ - dev->hwrro_mode = MT76_HWRRO_V3_1; + dev->hwrro_mode = is_mt7996(dev) ? MT76_HWRRO_V3 : MT76_HWRRO_V3_1; dev->rx_token_size = 32768; rcu_assign_pointer(dev->mmio.npu, npu); From 59a1864509d084a4b34117e693951c06b846b00a Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Mon, 15 Dec 2025 20:20:17 -0600 Subject: [PATCH 0733/1561] wifi: mt76: mt7925: drop puncturing handling from BSS change path IEEE80211_CHANCTX_CHANGE_PUNCTURING is a channel context change flag and should not be checked in the BSS change handler, where the changed mask represents enum ieee80211_bss_change. Remove the puncturing handling from the BSS path and rely on mt7925_change_chanctx() to update puncturing configuration. Fixes: cadebdad959b ("wifi: mt76: mt7925: add EHT preamble puncturing") Signed-off-by: Sean Wang Link: https://patch.msgid.link/20251216022017.23870-1-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/main.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index 1fea2e807f77..a0a4307652d2 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -1907,10 +1907,8 @@ static void mt7925_link_info_changed(struct ieee80211_hw *hw, struct mt792x_phy *phy = mt792x_hw_phy(hw); struct mt792x_dev *dev = mt792x_hw_dev(hw); struct mt792x_bss_conf *mconf; - struct ieee80211_bss_conf *link_conf; mconf = mt792x_vif_to_link(mvif, info->link_id); - link_conf = mt792x_vif_to_bss_conf(vif, mconf->link_id); mt792x_mutex_acquire(dev); @@ -1952,10 +1950,6 @@ static void mt7925_link_info_changed(struct ieee80211_hw *hw, mvif->mlo_pm_state = MT792x_MLO_CHANGED_PS; } - if (changed & IEEE80211_CHANCTX_CHANGE_PUNCTURING) - mt7925_mcu_set_eht_pp(mvif->phy->mt76, &mconf->mt76, - link_conf, NULL); - if (changed & BSS_CHANGED_CQM) mt7925_mcu_set_rssimonitor(dev, vif); From 8c7e19612b01567f641d3ffe21e47fa21c331171 Mon Sep 17 00:00:00 2001 From: Michael Lo Date: Mon, 12 Jan 2026 19:40:07 +0800 Subject: [PATCH 0734/1561] wifi: mt76: mt7925: Skip scan process during suspend. We are experiencing command timeouts because an upper layer triggers an unexpected scan while the system/device is in suspend. The upper layer should not initiate scans until the NIC has fully resumed. We want to prevent scans during suspend and avoid timeouts without harming power management or user experience. Signed-off-by: Michael Lo Link: https://patch.msgid.link/20260112114007.2115873-1-leon.yen@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/main.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index a0a4307652d2..4220db3f7216 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -1327,10 +1327,18 @@ void mt7925_mlo_pm_work(struct work_struct *work) void mt7925_scan_work(struct work_struct *work) { struct mt792x_phy *phy; + struct mt792x_dev *dev; + struct mt76_connac_pm *pm; phy = (struct mt792x_phy *)container_of(work, struct mt792x_phy, scan_work.work); + dev = phy->dev; + pm = &dev->pm; + + if (pm->suspended) + return; + while (true) { struct sk_buff *skb; struct tlv *tlv; From dd08ca3f092f4185ece69ce2a835c23198b1628a Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Mon, 15 Dec 2025 19:38:49 -0600 Subject: [PATCH 0735/1561] wifi: mt76: mt7925: fix potential deadlock in mt7925_roc_abort_sync roc_abort_sync() can deadlock with roc_work(). roc_work() holds dev->mt76.mutex, while cancel_work_sync() waits for roc_work() to finish. If the caller already owns the same mutex, both sides block and no progress is possible. This deadlock can occur during station removal when mt76_sta_state() -> mt76_sta_remove() -> mt7925_mac_sta_remove_link() -> mt7925_mac_link_sta_remove() -> mt7925_roc_abort_sync() invokes cancel_work_sync() while roc_work() is still running and holding dev->mt76.mutex. This avoids the mutex deadlock and preserves exactly-once work ownership. Fixes: 45064d19fd3a ("wifi: mt76: mt7925: fix a potential association failure upon resuming") Co-developed-by: Quan Zhou Signed-off-by: Quan Zhou Signed-off-by: Sean Wang Link: https://patch.msgid.link/20251216013849.17976-1-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/main.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index 4220db3f7216..afcc0fa4aa35 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -461,12 +461,16 @@ void mt7925_roc_abort_sync(struct mt792x_dev *dev) { struct mt792x_phy *phy = &dev->phy; + if (!test_and_clear_bit(MT76_STATE_ROC, &phy->mt76->state)) + return; + timer_delete_sync(&phy->roc_timer); - cancel_work_sync(&phy->roc_work); - if (test_and_clear_bit(MT76_STATE_ROC, &phy->mt76->state)) - ieee80211_iterate_interfaces(mt76_hw(dev), - IEEE80211_IFACE_ITER_RESUME_ALL, - mt7925_roc_iter, (void *)phy); + + cancel_work(&phy->roc_work); + + ieee80211_iterate_interfaces(mt76_hw(dev), + IEEE80211_IFACE_ITER_RESUME_ALL, + mt7925_roc_iter, (void *)phy); } EXPORT_SYMBOL_GPL(mt7925_roc_abort_sync); From c41075ce8cf05ed8c0e7b7efef000dce548ffc42 Mon Sep 17 00:00:00 2001 From: Zilin Guan Date: Fri, 16 Jan 2026 14:49:19 +0000 Subject: [PATCH 0736/1561] wifi: mt76: Fix memory leak after mt76_connac_mcu_alloc_sta_req() mt76_connac_mcu_alloc_sta_req() allocates an skb which is expected to be freed eventually by mt76_mcu_skb_send_msg(). However, currently if an intermediate function fails before sending, the allocated skb is leaked. Specifically, mt76_connac_mcu_sta_wed_update() and mt76_connac_mcu_sta_key_tlv() may fail, leading to an immediate memory leak in the error path. Fix this by explicitly freeing the skb in these error paths. Commit 7c0f63fe37a5 ("wifi: mt76: mt7996: fix memory leak on mt7996_mcu_sta_key_tlv error") made a similar change. Compile tested only. Issue found using a prototype static analysis tool and code review. Fixes: d1369e515efe ("wifi: mt76: connac: introduce mt76_connac_mcu_sta_wed_update utility routine") Fixes: 6683d988089c ("mt76: connac: move mt76_connac_mcu_add_key in connac module") Fixes: 4f831d18d12d ("wifi: mt76: mt7915: enable WED RX support") Fixes: c948b5da6bbe ("wifi: mt76: mt7925: add Mediatek Wi-Fi7 driver for mt7925 chips") Signed-off-by: Zilin Guan Link: https://patch.msgid.link/20260116144919.1482558-1-zilin@seu.edu.cn Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt76_connac_mcu.c | 16 ++++++++++++---- drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 4 +++- drivers/net/wireless/mediatek/mt76/mt7925/mcu.c | 4 +++- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c index 0457712286d5..3f583e2a1dc1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c @@ -1295,8 +1295,10 @@ int mt76_connac_mcu_sta_ba(struct mt76_dev *dev, struct mt76_vif_link *mvif, wtbl_hdr); ret = mt76_connac_mcu_sta_wed_update(dev, skb); - if (ret) + if (ret) { + dev_kfree_skb(skb); return ret; + } ret = mt76_mcu_skb_send_msg(dev, skb, cmd, true); if (ret) @@ -1309,8 +1311,10 @@ int mt76_connac_mcu_sta_ba(struct mt76_dev *dev, struct mt76_vif_link *mvif, mt76_connac_mcu_sta_ba_tlv(skb, params, enable, tx); ret = mt76_connac_mcu_sta_wed_update(dev, skb); - if (ret) + if (ret) { + dev_kfree_skb(skb); return ret; + } return mt76_mcu_skb_send_msg(dev, skb, cmd, true); } @@ -2764,12 +2768,16 @@ int mt76_connac_mcu_add_key(struct mt76_dev *dev, struct ieee80211_vif *vif, return PTR_ERR(skb); ret = mt76_connac_mcu_sta_key_tlv(sta_key_conf, skb, key, cmd); - if (ret) + if (ret) { + dev_kfree_skb(skb); return ret; + } ret = mt76_connac_mcu_sta_wed_update(dev, skb); - if (ret) + if (ret) { + dev_kfree_skb(skb); return ret; + } return mt76_mcu_skb_send_msg(dev, skb, mcu_cmd, true); } diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c index d6f54b1edfb1..318c38149463 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c @@ -1765,8 +1765,10 @@ int mt7915_mcu_add_sta(struct mt7915_dev *dev, struct ieee80211_vif *vif, } out: ret = mt76_connac_mcu_sta_wed_update(&dev->mt76, skb); - if (ret) + if (ret) { + dev_kfree_skb(skb); return ret; + } return mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_EXT_CMD(STA_REC_UPDATE), true); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index 2daf5a29220f..1379bf6a26b5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -1288,8 +1288,10 @@ int mt7925_mcu_add_key(struct mt76_dev *dev, struct ieee80211_vif *vif, return PTR_ERR(skb); ret = mt7925_mcu_sta_key_tlv(wcid, sta_key_conf, skb, key, cmd, msta); - if (ret) + if (ret) { + dev_kfree_skb(skb); return ret; + } return mt76_mcu_skb_send_msg(dev, skb, mcu_cmd, true); } From aae89dc4a1608da9060bada757f650ac94b7f184 Mon Sep 17 00:00:00 2001 From: Leon Yen Date: Wed, 21 Jan 2026 00:31:52 +0800 Subject: [PATCH 0737/1561] wifi: mt76: mt7925: fix tx power setting failure after chip reset After the chip reset, the procedure to set the tx power will not be successful because the previous region setting is still remains. Clear the region setting during MAC initialization and allow it to be reset to finalize the TX power setting. Fixes: 3bc62aa4484d ("wifi: mt76: mt7925: add auto regdomain switch support") Signed-off-by: Leon Yen Link: https://patch.msgid.link/20260120163152.3694116-1-leon.yen@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/init.c | 2 ++ drivers/net/wireless/mediatek/mt76/mt7925/regd.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/init.c b/drivers/net/wireless/mediatek/mt76/mt7925/init.c index 3ce5d6fcc69d..c0c5cb9aff75 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/init.c @@ -91,6 +91,8 @@ int mt7925_mac_init(struct mt792x_dev *dev) mt7925_mac_init_basic_rates(dev); + memzero_explicit(&dev->mt76.alpha2, sizeof(dev->mt76.alpha2)); + return 0; } EXPORT_SYMBOL_GPL(mt7925_mac_init); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/regd.c b/drivers/net/wireless/mediatek/mt76/mt7925/regd.c index 292087e882d1..16f56ee879d4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/regd.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/regd.c @@ -232,7 +232,8 @@ int mt7925_regd_change(struct mt792x_phy *phy, char *alpha2) dev->regd_user) return -EINVAL; - if (mdev->alpha2[0] != '0' && mdev->alpha2[1] != '0') + if ((mdev->alpha2[0] && mdev->alpha2[0] != '0') && + (mdev->alpha2[1] && mdev->alpha2[1] != '0')) return 0; /* do not need to update the same country twice */ From fdfa39f9f4fbae532b162da913a67b2410caf38f Mon Sep 17 00:00:00 2001 From: Quan Zhou Date: Fri, 23 Jan 2026 10:16:25 +0800 Subject: [PATCH 0738/1561] wifi: mt76: mt7921: fix ROC abort flow interruption in mt7921_roc_work The mt7921_set_roc API may be executed concurrently with mt7921_roc_work, specifically between the following code paths: - The check and clear of MT76_STATE_ROC in mt7921_roc_work: if (!test_and_clear_bit(MT76_STATE_ROC, &phy->mt76->state)) return; - The execution of ieee80211_iterate_active_interfaces. This race condition can interrupt the ROC abort flow, resulting in the ROC process failing to abort as expected. To address this defect, the modification of MT76_STATE_ROC is now protected by mt792x_mutex_acquire(phy->dev). This ensures that changes to the ROC state are properly synchronized, preventing race conditions and ensuring the ROC abort flow is not interrupted. Fixes: 034ae28b56f1 ("wifi: mt76: mt7921: introduce remain_on_channel support") Cc: stable@vger.kernel.org Signed-off-by: Quan Zhou Reviewed-by: Sean Wang Link: https://patch.msgid.link/2568ece8b557e5dda79391414c834ef3233049b6.1769133724.git.quan.zhou@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7921/main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index debecd3dff75..f42e40f9663d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -387,10 +387,11 @@ void mt7921_roc_work(struct work_struct *work) phy = (struct mt792x_phy *)container_of(work, struct mt792x_phy, roc_work); - if (!test_and_clear_bit(MT76_STATE_ROC, &phy->mt76->state)) - return; - mt792x_mutex_acquire(phy->dev); + if (!test_and_clear_bit(MT76_STATE_ROC, &phy->mt76->state)) { + mt792x_mutex_release(phy->dev); + return; + } ieee80211_iterate_active_interfaces(phy->mt76->hw, IEEE80211_IFACE_ITER_RESUME_ALL, mt7921_roc_iter, phy); From d5059e52fd8bc624ec4255c9fa01a266513d126b Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Mon, 26 Jan 2026 12:00:13 -0600 Subject: [PATCH 0739/1561] wifi: mt76: mt7921: fix potential deadlock in mt7921_roc_abort_sync roc_abort_sync() can deadlock with roc_work(). roc_work() holds dev->mt76.mutex, while cancel_work_sync() waits for roc_work() to finish. If the caller already owns the same mutex, both sides block and no progress is possible. This deadlock can occur during station removal when mt76_sta_state() -> mt76_sta_remove() -> mt7921_mac_sta_remove() -> mt7921_roc_abort_sync() invokes cancel_work_sync() while roc_work() is still running and holding dev->mt76.mutex. This avoids the mutex deadlock and preserves exactly-once work ownership. Fixes: 352d966126e6 ("wifi: mt76: mt7921: fix a potential association failure upon resuming") Co-developed-by: Quan Zhou Signed-off-by: Quan Zhou Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260126180013.8167-1-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7921/main.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index f42e40f9663d..42b9514e04e7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -371,12 +371,15 @@ void mt7921_roc_abort_sync(struct mt792x_dev *dev) { struct mt792x_phy *phy = &dev->phy; + if (!test_and_clear_bit(MT76_STATE_ROC, &phy->mt76->state)) + return; + timer_delete_sync(&phy->roc_timer); - cancel_work_sync(&phy->roc_work); - if (test_and_clear_bit(MT76_STATE_ROC, &phy->mt76->state)) - ieee80211_iterate_interfaces(mt76_hw(dev), - IEEE80211_IFACE_ITER_RESUME_ALL, - mt7921_roc_iter, (void *)phy); + cancel_work(&phy->roc_work); + + ieee80211_iterate_interfaces(mt76_hw(dev), + IEEE80211_IFACE_ITER_RESUME_ALL, + mt7921_roc_iter, (void *)phy); } EXPORT_SYMBOL_GPL(mt7921_roc_abort_sync); From 6939b97ddad3cf3dfbb3b5a0a12ef79cb886747e Mon Sep 17 00:00:00 2001 From: Chad Monroe Date: Mon, 8 Dec 2025 14:31:32 +0000 Subject: [PATCH 0740/1561] wifi: mt76: fix deadlock in remain-on-channel mt76_remain_on_channel() and mt76_roc_complete() call mt76_set_channel() while already holding dev->mutex. Since mt76_set_channel() also acquires dev->mutex, this results in a deadlock. Use __mt76_set_channel() instead of mt76_set_channel(). Add cancel_delayed_work_sync() for mac_work before acquiring the mutex in mt76_remain_on_channel() to prevent a secondary deadlock with the mac_work workqueue. Fixes: a8f424c1287c ("wifi: mt76: add multi-radio remain_on_channel functions") Signed-off-by: Chad Monroe Link: https://patch.msgid.link/ace737e7b621af7c2adb33b0188011a5c1de2166.1765204256.git.chad@monroe.io Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/channel.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/channel.c b/drivers/net/wireless/mediatek/mt76/channel.c index 2b705bdb7993..d9f8529db7ed 100644 --- a/drivers/net/wireless/mediatek/mt76/channel.c +++ b/drivers/net/wireless/mediatek/mt76/channel.c @@ -326,7 +326,7 @@ void mt76_roc_complete(struct mt76_phy *phy) mlink->mvif->roc_phy = NULL; if (phy->main_chandef.chan && !test_bit(MT76_MCU_RESET, &dev->phy.state)) - mt76_set_channel(phy, &phy->main_chandef, false); + __mt76_set_channel(phy, &phy->main_chandef, false); mt76_put_vif_phy_link(phy, phy->roc_vif, phy->roc_link); phy->roc_vif = NULL; phy->roc_link = NULL; @@ -370,6 +370,8 @@ int mt76_remain_on_channel(struct ieee80211_hw *hw, struct ieee80211_vif *vif, if (!phy) return -EINVAL; + cancel_delayed_work_sync(&phy->mac_work); + mutex_lock(&dev->mutex); if (phy->roc_vif || dev->scan.phy == phy || @@ -388,7 +390,14 @@ int mt76_remain_on_channel(struct ieee80211_hw *hw, struct ieee80211_vif *vif, phy->roc_vif = vif; phy->roc_link = mlink; cfg80211_chandef_create(&chandef, chan, NL80211_CHAN_HT20); - mt76_set_channel(phy, &chandef, true); + ret = __mt76_set_channel(phy, &chandef, true); + if (ret) { + mlink->mvif->roc_phy = NULL; + phy->roc_vif = NULL; + phy->roc_link = NULL; + mt76_put_vif_phy_link(phy, vif, mlink); + goto out; + } ieee80211_ready_on_channel(hw); ieee80211_queue_delayed_work(phy->hw, &phy->roc_work, msecs_to_jiffies(duration)); From d2b860454ea2df8f336e9b859da7ffb27f43444d Mon Sep 17 00:00:00 2001 From: Chad Monroe Date: Mon, 8 Dec 2025 14:24:00 +0000 Subject: [PATCH 0741/1561] wifi: mt76: mt7996: reset device after MCU message timeout Trigger a full reset after MCU message timeout. Signed-off-by: Chad Monroe Link: https://patch.msgid.link/6e05ed063f3763ad3457633c56b60a728a49a6f0.1765203753.git.chad@monroe.io Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/mac.c | 5 +++++ drivers/net/wireless/mediatek/mt76/mt7996/mcu.c | 9 +++++++++ 2 files changed, 14 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index c8cfc343d37b..c6028fabd7d1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -2710,6 +2710,11 @@ void mt7996_reset(struct mt7996_dev *dev) return; } + if (READ_ONCE(dev->recovery.state) & MT_MCU_CMD_STOP_DMA) { + set_bit(MT76_MCU_RESET, &dev->mphy.state); + wake_up(&dev->mt76.mcu.wait); + } + queue_work(dev->mt76.wq, &dev->reset_work); wake_up(&dev->reset_wait); } diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index 82eea809c47b..7741ba0aa0bd 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -209,6 +209,7 @@ static int mt7996_mcu_parse_response(struct mt76_dev *mdev, int cmd, struct sk_buff *skb, int seq) { + struct mt7996_dev *dev = container_of(mdev, struct mt7996_dev, mt76); struct mt7996_mcu_rxd *rxd; struct mt7996_mcu_uni_event *event; int mcu_cmd = FIELD_GET(__MCU_CMD_FIELD_ID, cmd); @@ -217,6 +218,14 @@ mt7996_mcu_parse_response(struct mt76_dev *mdev, int cmd, if (!skb) { dev_err(mdev->dev, "Message %08x (seq %d) timeout\n", cmd, seq); + + if (!test_and_set_bit(MT76_MCU_RESET, &dev->mphy.state)) { + dev->recovery.restart = true; + wake_up(&dev->mt76.mcu.wait); + queue_work(dev->mt76.wq, &dev->reset_work); + wake_up(&dev->reset_wait); + } + return -ETIMEDOUT; } From 0176417d10ce964cd195e64eff9e079cc0a52b69 Mon Sep 17 00:00:00 2001 From: Chad Monroe Date: Mon, 8 Dec 2025 14:14:50 +0000 Subject: [PATCH 0742/1561] wifi: mt76: mt7996: increase txq memory limit to 32 MiB Prior to this change, both 2G and 6G radios would fall back to the mac80211 default of 4MB which is not enough for high data rates. Signed-off-by: Chad Monroe Link: https://patch.msgid.link/acfe2e25768b414518be2db22b1d3ba6f5db6fa1.1765203249.git.chad@monroe.io Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/init.c b/drivers/net/wireless/mediatek/mt76/mt7996/init.c index 8aa9807b5087..2937e89ad0c9 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/init.c @@ -538,6 +538,7 @@ mt7996_init_wiphy(struct ieee80211_hw *hw, struct mtk_wed_device *wed) ieee80211_hw_set(hw, SUPPORTS_MULTI_BSSID); hw->max_tx_fragments = 4; + wiphy->txq_memory_limit = 32 << 20; /* 32 MiB */ /* init led callbacks */ if (IS_ENABLED(CONFIG_MT76_LEDS)) { From ee5bb35d2b83fadc6920aa2478326fb50ea653a9 Mon Sep 17 00:00:00 2001 From: Madhur Kumar Date: Mon, 8 Dec 2025 22:53:31 +0530 Subject: [PATCH 0743/1561] wifi: mt76: mt7921: Replace deprecated PCI function pcim_iomap_table() and pcim_iomap_regions() have been deprecated. Replace them with pcim_iomap_region(). Signed-off-by: Madhur Kumar Link: https://patch.msgid.link/20251208172331.89705-1-madhurkumar004@gmail.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7921/pci.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c index ec9686183251..65c7fe671137 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c @@ -276,6 +276,7 @@ static int mt7921_pci_probe(struct pci_dev *pdev, struct mt76_bus_ops *bus_ops; struct mt792x_dev *dev; struct mt76_dev *mdev; + void __iomem *regs; u16 cmd, chipid; u8 features; int ret; @@ -284,10 +285,6 @@ static int mt7921_pci_probe(struct pci_dev *pdev, if (ret) return ret; - ret = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev)); - if (ret) - return ret; - pci_read_config_word(pdev, PCI_COMMAND, &cmd); if (!(cmd & PCI_COMMAND_MEMORY)) { cmd |= PCI_COMMAND_MEMORY; @@ -321,11 +318,15 @@ static int mt7921_pci_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, mdev); + regs = pcim_iomap_region(pdev, 0, pci_name(pdev)); + if (IS_ERR(regs)) + return PTR_ERR(regs); + dev = container_of(mdev, struct mt792x_dev, mt76); dev->fw_features = features; dev->hif_ops = &mt7921_pcie_ops; dev->irq_map = &irq_map; - mt76_mmio_init(&dev->mt76, pcim_iomap_table(pdev)[0]); + mt76_mmio_init(&dev->mt76, regs); tasklet_init(&mdev->irq_tasklet, mt792x_irq_tasklet, (unsigned long)dev); dev->phy.dev = dev; From 37d5b68ab57c5b4fb1c40e62c6b32376c6a2ca2c Mon Sep 17 00:00:00 2001 From: Allen Ye Date: Wed, 18 Feb 2026 16:30:27 -0800 Subject: [PATCH 0744/1561] wifi: mt76: fix backoff fields and max_power calculation The maximum power value may exist in either the data or backoff field. Previously, backoff power limits were not considered in txpower reporting. This patch ensures mt76 also considers backoff values in the SKU table. Also, each RU entry (RU26, RU52, RU106, BW20, ...) in the DTS corresponds to 10 stream combinations (1T1ss, 2T1ss, 3T1ss, 4T1ss, 2T2ss, 3T2ss, 4T2ss, 3T3ss, 4T3ss, 4T4ss). For beamforming tables: - In connac2, beamforming entries for BW20~BW160, and OFDM do not include 1T1ss. - In connac3, beamforming entries for BW20~BW160, and RU include 1T1ss, but OFDM beamforming does not include 1T1ss. Non-beamforming and RU entries for both connac2 and connac3 include 1T1ss. Fixes: b05ab4be9fd7 ("wifi: mt76: mt7915: add bf backoff limit table support") Signed-off-by: Allen Ye Co-developed-by: Ryder Lee Signed-off-by: Ryder Lee Link: https://patch.msgid.link/8fa8ec500b3d4de7b1966c6887f1dfbe5c46a54c.1771205424.git.ryder.lee@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/eeprom.c | 154 ++++++++++++++------ drivers/net/wireless/mediatek/mt76/mt76.h | 1 - 2 files changed, 109 insertions(+), 46 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/eeprom.c b/drivers/net/wireless/mediatek/mt76/eeprom.c index 573400d57ce7..afdb73661866 100644 --- a/drivers/net/wireless/mediatek/mt76/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/eeprom.c @@ -9,6 +9,13 @@ #include #include #include "mt76.h" +#include "mt76_connac.h" + +enum mt76_sku_type { + MT76_SKU_RATE, + MT76_SKU_BACKOFF, + MT76_SKU_BACKOFF_BF_OFFSET, +}; static int mt76_get_of_eeprom_data(struct mt76_dev *dev, void *eep, int len) { @@ -292,7 +299,6 @@ mt76_find_channel_node(struct device_node *np, struct ieee80211_channel *chan) } EXPORT_SYMBOL_GPL(mt76_find_channel_node); - static s8 mt76_get_txs_delta(struct device_node *np, u8 nss) { @@ -306,9 +312,24 @@ mt76_get_txs_delta(struct device_node *np, u8 nss) return be32_to_cpu(val[nss - 1]); } +static inline u8 mt76_backoff_n_chains(struct mt76_dev *dev, u8 idx) +{ + /* 0:1T1ss, 1:2T1ss, ..., 14:5T5ss */ + static const u8 connac3_table[] = { + 1, 2, 3, 4, 5, 2, 3, 4, 5, 3, 4, 5, 4, 5, 5}; + static const u8 connac2_table[] = { + 1, 2, 3, 4, 2, 3, 4, 3, 4, 4, 0, 0, 0, 0, 0}; + + if (idx >= ARRAY_SIZE(connac3_table)) + return 0; + + return is_mt799x(dev) ? connac3_table[idx] : connac2_table[idx]; +} + static void -mt76_apply_array_limit(s8 *pwr, size_t pwr_len, const s8 *data, - s8 target_power, s8 nss_delta, s8 *max_power) +mt76_apply_array_limit(struct mt76_dev *dev, s8 *pwr, size_t pwr_len, + const s8 *data, s8 target_power, s8 nss_delta, + s8 *max_power, int n_chains, enum mt76_sku_type type) { int i; @@ -316,18 +337,51 @@ mt76_apply_array_limit(s8 *pwr, size_t pwr_len, const s8 *data, return; for (i = 0; i < pwr_len; i++) { - pwr[i] = min_t(s8, target_power, data[i] + nss_delta); + u8 backoff_chain_idx = i; + int backoff_n_chains; + s8 backoff_delta; + s8 delta; + + switch (type) { + case MT76_SKU_RATE: + delta = 0; + backoff_delta = 0; + backoff_n_chains = 0; + break; + case MT76_SKU_BACKOFF_BF_OFFSET: + backoff_chain_idx += 1; + fallthrough; + case MT76_SKU_BACKOFF: + delta = mt76_tx_power_path_delta(n_chains); + backoff_n_chains = mt76_backoff_n_chains(dev, backoff_chain_idx); + backoff_delta = mt76_tx_power_path_delta(backoff_n_chains); + break; + default: + return; + } + + pwr[i] = min_t(s8, target_power + delta - backoff_delta, data[i] + nss_delta); + + /* used for padding, doesn't need to be considered */ + if (data[i] >= S8_MAX - 1) + continue; + + /* only consider backoff value for the configured chain number */ + if (type != MT76_SKU_RATE && n_chains != backoff_n_chains) + continue; + *max_power = max(*max_power, pwr[i]); } } static void -mt76_apply_multi_array_limit(s8 *pwr, size_t pwr_len, s8 pwr_num, - const s8 *data, size_t len, s8 target_power, - s8 nss_delta) +mt76_apply_multi_array_limit(struct mt76_dev *dev, s8 *pwr, size_t pwr_len, + s8 pwr_num, const s8 *data, size_t len, + s8 target_power, s8 nss_delta, s8 *max_power, + int n_chains, enum mt76_sku_type type) { + static const int connac2_backoff_ru_idx = 2; int i, cur; - s8 max_power = -128; if (!data) return; @@ -337,8 +391,26 @@ mt76_apply_multi_array_limit(s8 *pwr, size_t pwr_len, s8 pwr_num, if (len < pwr_len + 1) break; - mt76_apply_array_limit(pwr + pwr_len * i, pwr_len, data + 1, - target_power, nss_delta, &max_power); + /* Each RU entry (RU26, RU52, RU106, BW20, ...) in the DTS + * corresponds to 10 stream combinations (1T1ss, 2T1ss, 3T1ss, + * 4T1ss, 2T2ss, 3T2ss, 4T2ss, 3T3ss, 4T3ss, 4T4ss). + * + * For beamforming tables: + * - In connac2, beamforming entries for BW20~BW160 and OFDM + * do not include 1T1ss. + * - In connac3, beamforming entries for BW20~BW160 and RU + * include 1T1ss, but OFDM beamforming does not include 1T1ss. + * + * Non-beamforming and RU entries for both connac2 and connac3 + * include 1T1ss. + */ + if (!is_mt799x(dev) && type == MT76_SKU_BACKOFF && + i > connac2_backoff_ru_idx) + type = MT76_SKU_BACKOFF_BF_OFFSET; + + mt76_apply_array_limit(dev, pwr + pwr_len * i, pwr_len, data + 1, + target_power, nss_delta, max_power, + n_chains, type); if (--cur > 0) continue; @@ -360,18 +432,11 @@ s8 mt76_get_rate_power_limits(struct mt76_phy *phy, struct device_node *np; const s8 *val; char name[16]; - u32 mcs_rates = dev->drv->mcs_rates; - u32 ru_rates = ARRAY_SIZE(dest->ru[0]); char band; size_t len; - s8 max_power = 0; - s8 max_power_backoff = -127; + s8 max_power = -127; s8 txs_delta; int n_chains = hweight16(phy->chainmask); - s8 target_power_combine = target_power + mt76_tx_power_path_delta(n_chains); - - if (!mcs_rates) - mcs_rates = 10; memset(dest, target_power, sizeof(*dest) - sizeof(dest->path)); memset(&dest->path, 0, sizeof(dest->path)); @@ -409,46 +474,45 @@ s8 mt76_get_rate_power_limits(struct mt76_phy *phy, txs_delta = mt76_get_txs_delta(np, hweight16(phy->chainmask)); val = mt76_get_of_array_s8(np, "rates-cck", &len, ARRAY_SIZE(dest->cck)); - mt76_apply_array_limit(dest->cck, ARRAY_SIZE(dest->cck), val, - target_power, txs_delta, &max_power); + mt76_apply_array_limit(dev, dest->cck, ARRAY_SIZE(dest->cck), val, + target_power, txs_delta, &max_power, n_chains, MT76_SKU_RATE); - val = mt76_get_of_array_s8(np, "rates-ofdm", - &len, ARRAY_SIZE(dest->ofdm)); - mt76_apply_array_limit(dest->ofdm, ARRAY_SIZE(dest->ofdm), val, - target_power, txs_delta, &max_power); + val = mt76_get_of_array_s8(np, "rates-ofdm", &len, ARRAY_SIZE(dest->ofdm)); + mt76_apply_array_limit(dev, dest->ofdm, ARRAY_SIZE(dest->ofdm), val, + target_power, txs_delta, &max_power, n_chains, MT76_SKU_RATE); - val = mt76_get_of_array_s8(np, "rates-mcs", &len, mcs_rates + 1); - mt76_apply_multi_array_limit(dest->mcs[0], ARRAY_SIZE(dest->mcs[0]), - ARRAY_SIZE(dest->mcs), val, len, - target_power, txs_delta); + val = mt76_get_of_array_s8(np, "rates-mcs", &len, ARRAY_SIZE(dest->mcs[0]) + 1); + mt76_apply_multi_array_limit(dev, dest->mcs[0], ARRAY_SIZE(dest->mcs[0]), + ARRAY_SIZE(dest->mcs), val, len, target_power, + txs_delta, &max_power, n_chains, MT76_SKU_RATE); - val = mt76_get_of_array_s8(np, "rates-ru", &len, ru_rates + 1); - mt76_apply_multi_array_limit(dest->ru[0], ARRAY_SIZE(dest->ru[0]), - ARRAY_SIZE(dest->ru), val, len, - target_power, txs_delta); + val = mt76_get_of_array_s8(np, "rates-ru", &len, ARRAY_SIZE(dest->ru[0]) + 1); + mt76_apply_multi_array_limit(dev, dest->ru[0], ARRAY_SIZE(dest->ru[0]), + ARRAY_SIZE(dest->ru), val, len, target_power, + txs_delta, &max_power, n_chains, MT76_SKU_RATE); - max_power_backoff = max_power; val = mt76_get_of_array_s8(np, "paths-cck", &len, ARRAY_SIZE(dest->path.cck)); - mt76_apply_array_limit(dest->path.cck, ARRAY_SIZE(dest->path.cck), val, - target_power_combine, txs_delta, &max_power_backoff); + mt76_apply_array_limit(dev, dest->path.cck, ARRAY_SIZE(dest->path.cck), val, + target_power, txs_delta, &max_power, n_chains, MT76_SKU_BACKOFF); val = mt76_get_of_array_s8(np, "paths-ofdm", &len, ARRAY_SIZE(dest->path.ofdm)); - mt76_apply_array_limit(dest->path.ofdm, ARRAY_SIZE(dest->path.ofdm), val, - target_power_combine, txs_delta, &max_power_backoff); + mt76_apply_array_limit(dev, dest->path.ofdm, ARRAY_SIZE(dest->path.ofdm), val, + target_power, txs_delta, &max_power, n_chains, MT76_SKU_BACKOFF); val = mt76_get_of_array_s8(np, "paths-ofdm-bf", &len, ARRAY_SIZE(dest->path.ofdm_bf)); - mt76_apply_array_limit(dest->path.ofdm_bf, ARRAY_SIZE(dest->path.ofdm_bf), val, - target_power_combine, txs_delta, &max_power_backoff); + mt76_apply_array_limit(dev, dest->path.ofdm_bf, ARRAY_SIZE(dest->path.ofdm_bf), val, + target_power, txs_delta, &max_power, n_chains, + MT76_SKU_BACKOFF_BF_OFFSET); val = mt76_get_of_array_s8(np, "paths-ru", &len, ARRAY_SIZE(dest->path.ru[0]) + 1); - mt76_apply_multi_array_limit(dest->path.ru[0], ARRAY_SIZE(dest->path.ru[0]), - ARRAY_SIZE(dest->path.ru), val, len, - target_power_combine, txs_delta); + mt76_apply_multi_array_limit(dev, dest->path.ru[0], ARRAY_SIZE(dest->path.ru[0]), + ARRAY_SIZE(dest->path.ru), val, len, target_power, + txs_delta, &max_power, n_chains, MT76_SKU_BACKOFF); val = mt76_get_of_array_s8(np, "paths-ru-bf", &len, ARRAY_SIZE(dest->path.ru_bf[0]) + 1); - mt76_apply_multi_array_limit(dest->path.ru_bf[0], ARRAY_SIZE(dest->path.ru_bf[0]), - ARRAY_SIZE(dest->path.ru_bf), val, len, - target_power_combine, txs_delta); + mt76_apply_multi_array_limit(dev, dest->path.ru_bf[0], ARRAY_SIZE(dest->path.ru_bf[0]), + ARRAY_SIZE(dest->path.ru_bf), val, len, target_power, + txs_delta, &max_power, n_chains, MT76_SKU_BACKOFF); return max_power; } diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 5e68efc367fc..23a1832812a2 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -542,7 +542,6 @@ struct mt76_driver_ops { u32 survey_flags; u16 txwi_size; u16 token_size; - u8 mcs_rates; unsigned int link_data_size; From 9a475dc71c38d6abc42ba722ace4a72372876d91 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 18 Mar 2026 16:06:11 +0000 Subject: [PATCH 0745/1561] net: stmmac: move default_an_inband to plat_stmmacenet_data Move the default_an_inband flag from struct mdio_bus_data to struct plat_stmmacenet_data. This is to allow platforms that do not use the integrated MDIO bus to enable inband mode. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w2tPP-0000000DYAX-0TKw@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 4 ++-- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 +---- include/linux/stmmac.h | 2 +- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index f621077c30a4..7898b5075a8b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -298,7 +298,7 @@ static void tgl_get_interfaces(struct stmmac_priv *priv, void *bsp_priv, if (FIELD_GET(SERDES_LINK_MODE_MASK, data) == SERDES_LINK_MODE_2G5) { dev_info(priv->device, "Link Speed Mode: 2.5Gbps\n"); - priv->plat->mdio_bus_data->default_an_inband = false; + priv->plat->default_an_inband = false; interface = PHY_INTERFACE_MODE_2500BASEX; } else { interface = PHY_INTERFACE_MODE_SGMII; @@ -700,7 +700,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, if (plat->phy_interface == PHY_INTERFACE_MODE_SGMII || plat->phy_interface == PHY_INTERFACE_MODE_1000BASEX) { plat->mdio_bus_data->pcs_mask = BIT_U32(INTEL_MGBE_XPCS_ADDR); - plat->mdio_bus_data->default_an_inband = true; + plat->default_an_inband = true; plat->select_pcs = intel_mgbe_select_pcs; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5062537f79e9..bd0f5d487e0f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1390,7 +1390,6 @@ static int stmmac_init_phy(struct net_device *dev) static int stmmac_phylink_setup(struct stmmac_priv *priv) { - struct stmmac_mdio_bus_data *mdio_bus_data; struct phylink_config *config; struct phylink_pcs *pcs; struct phylink *phylink; @@ -1415,9 +1414,7 @@ static int stmmac_phylink_setup(struct stmmac_priv *priv) priv->tx_lpi_clk_stop = priv->plat->flags & STMMAC_FLAG_EN_TX_LPI_CLOCKGATING; - mdio_bus_data = priv->plat->mdio_bus_data; - if (mdio_bus_data) - config->default_an_inband = mdio_bus_data->default_an_inband; + config->default_an_inband = priv->plat->default_an_inband; /* Get the PHY interface modes (at the PHY end of the link) that * are supported by the platform. diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 72febd246bdb..565bb394b194 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -89,7 +89,6 @@ struct stmmac_mdio_bus_data { int *irqs; int probed_phy_irq; bool needs_reset; - bool default_an_inband; }; struct stmmac_dma_cfg { @@ -250,6 +249,7 @@ struct plat_stmmacenet_data { struct stmmac_dma_cfg *dma_cfg; struct stmmac_safety_feature_cfg *safety_feat_cfg; int clk_csr; + bool default_an_inband; bool enh_desc; bool tx_coe; bool bugged_jumbo; From 7d5a2da501e0527f467216a248e28077c7b81153 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 18 Mar 2026 16:06:16 +0000 Subject: [PATCH 0746/1561] net: stmmac: add struct stmmac_pcs_info We need to describe one more register (offset and field bitmask) to the PCS code. Move the existing PCS offset and interrupt enable bits to a new struct and pass that in to stmmac_integrated_pcs_init(). Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w2tPU-0000000DYAd-0ssk@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 9 ++++++--- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 8 ++++++-- drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c | 8 ++++---- drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h | 9 +++++++-- 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 01f8353eb6ef..c851e3b88ce5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -22,14 +22,17 @@ #include "stmmac_ptp.h" #include "dwmac1000.h" +static const struct stmmac_pcs_info dwmac1000_pcs_info = { + .pcs_offset = GMAC_PCS_BASE, + .int_mask = GMAC_INT_DISABLE_PCSLINK | GMAC_INT_DISABLE_PCSAN, +}; + static int dwmac1000_pcs_init(struct stmmac_priv *priv) { if (!priv->dma_cap.pcs) return 0; - return stmmac_integrated_pcs_init(priv, GMAC_PCS_BASE, - GMAC_INT_DISABLE_PCSLINK | - GMAC_INT_DISABLE_PCSAN); + return stmmac_integrated_pcs_init(priv, &dwmac1000_pcs_info); } static void dwmac1000_core_init(struct mac_device_info *hw, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 4c6fed3ecbcf..ba5393beb7bb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -22,13 +22,17 @@ #include "dwmac4.h" #include "dwmac5.h" +static const struct stmmac_pcs_info dwmac4_pcs_info = { + .pcs_offset = GMAC_PCS_BASE, + .int_mask = GMAC_INT_PCS_LINK | GMAC_INT_PCS_ANE, +}; + static int dwmac4_pcs_init(struct stmmac_priv *priv) { if (!priv->dma_cap.pcs) return 0; - return stmmac_integrated_pcs_init(priv, GMAC_PCS_BASE, - GMAC_INT_PCS_LINK | GMAC_INT_PCS_ANE); + return stmmac_integrated_pcs_init(priv, &dwmac4_pcs_info); } static void dwmac4_core_init(struct mac_device_info *hw, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c index 88fa359ea716..2695e0b9ed03 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c @@ -90,8 +90,8 @@ int stmmac_integrated_pcs_get_phy_intf_sel(struct phylink_pcs *pcs, return -EINVAL; } -int stmmac_integrated_pcs_init(struct stmmac_priv *priv, unsigned int offset, - u32 int_mask) +int stmmac_integrated_pcs_init(struct stmmac_priv *priv, + const struct stmmac_pcs_info *pcs_info) { struct stmmac_pcs *spcs; @@ -100,8 +100,8 @@ int stmmac_integrated_pcs_init(struct stmmac_priv *priv, unsigned int offset, return -ENOMEM; spcs->priv = priv; - spcs->base = priv->ioaddr + offset; - spcs->int_mask = int_mask; + spcs->base = priv->ioaddr + pcs_info->pcs_offset; + spcs->int_mask = pcs_info->int_mask; spcs->pcs.ops = &dwmac_integrated_pcs_ops; __set_bit(PHY_INTERFACE_MODE_SGMII, spcs->pcs.supported_interfaces); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h index 23bbd4f10bf8..f1ee473d8e3e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h @@ -27,6 +27,11 @@ struct stmmac_priv; +struct stmmac_pcs_info { + unsigned int pcs_offset; + u32 int_mask; +}; + struct stmmac_pcs { struct stmmac_priv *priv; void __iomem *base; @@ -44,8 +49,8 @@ void stmmac_integrated_pcs_irq(struct stmmac_priv *priv, u32 status, struct stmmac_extra_stats *x); int stmmac_integrated_pcs_get_phy_intf_sel(struct phylink_pcs *pcs, phy_interface_t interface); -int stmmac_integrated_pcs_init(struct stmmac_priv *priv, unsigned int offset, - u32 int_mask); +int stmmac_integrated_pcs_init(struct stmmac_priv *priv, + const struct stmmac_pcs_info *pcs_info); /** * dwmac_ctrl_ane - To program the AN Control Register. From 0837578667358bbeeb0aa2fcc98935f4536696c5 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 18 Mar 2026 16:06:21 +0000 Subject: [PATCH 0747/1561] net: stmmac: add support for reading inband SGMII status Report the link, speed and duplex for SGMII links, read from the SGMII, RGMII and SMII status and control register. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w2tPZ-0000000DYAj-1MdI@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac1000.h | 12 +---- .../ethernet/stmicro/stmmac/dwmac1000_core.c | 2 + drivers/net/ethernet/stmicro/stmmac/dwmac4.h | 10 +--- .../net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 + .../net/ethernet/stmicro/stmmac/stmmac_pcs.c | 49 ++++++++++++++++++- .../net/ethernet/stmicro/stmmac/stmmac_pcs.h | 4 ++ 6 files changed, 58 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h index 9fe639fb06bb..1de1f929d61c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h @@ -70,18 +70,8 @@ enum power_event { #define GMAC_RGSMIIIS 0x000000d8 /* RGMII/SMII status */ /* SGMII/RGMII status register */ -#define GMAC_RGSMIIIS_LNKMODE BIT(0) -#define GMAC_RGSMIIIS_SPEED GENMASK(2, 1) -#define GMAC_RGSMIIIS_LNKSTS BIT(3) -#define GMAC_RGSMIIIS_JABTO BIT(4) -#define GMAC_RGSMIIIS_FALSECARDET BIT(5) +#define GMAC_RSGMIIIS_MASK GENMASK(15, 0) #define GMAC_RGSMIIIS_SMIDRXS BIT(16) -/* LNKMOD */ -#define GMAC_RGSMIIIS_LNKMOD_MASK 0x1 -/* LNKSPEED */ -#define GMAC_RGSMIIIS_SPEED_125 0x2 -#define GMAC_RGSMIIIS_SPEED_25 0x1 -#define GMAC_RGSMIIIS_SPEED_2_5 0x0 /* GMAC Configuration defines */ #define GMAC_CONTROL_2K 0x08000000 /* IEEE 802.3as 2K packets */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index c851e3b88ce5..caac85fc08f1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -24,6 +24,8 @@ static const struct stmmac_pcs_info dwmac1000_pcs_info = { .pcs_offset = GMAC_PCS_BASE, + .rgsmii_offset = GMAC_RGSMIIIS, + .rgsmii_status_mask = GMAC_RSGMIIIS_MASK, .int_mask = GMAC_INT_DISABLE_PCSLINK | GMAC_INT_DISABLE_PCSAN, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index d797d936aee1..ffcd036d4c02 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -470,15 +470,7 @@ static inline u32 mtl_low_credx_base_addr(const struct dwmac4_addrs *addrs, #define GMAC_PHYIF_CTRLSTATUS_TC BIT(0) #define GMAC_PHYIF_CTRLSTATUS_LUD BIT(1) #define GMAC_PHYIF_CTRLSTATUS_SMIDRXS BIT(4) -#define GMAC_PHYIF_CTRLSTATUS_LNKMOD BIT(16) -#define GMAC_PHYIF_CTRLSTATUS_SPEED GENMASK(18, 17) -#define GMAC_PHYIF_CTRLSTATUS_LNKSTS BIT(19) -#define GMAC_PHYIF_CTRLSTATUS_JABTO BIT(20) -#define GMAC_PHYIF_CTRLSTATUS_FALSECARDET BIT(21) -/* LNKSPEED */ -#define GMAC_PHYIF_CTRLSTATUS_SPEED_125 0x2 -#define GMAC_PHYIF_CTRLSTATUS_SPEED_25 0x1 -#define GMAC_PHYIF_CTRLSTATUS_SPEED_2_5 0x0 +#define GMAC_PHYIF_CTRLSTATUS_RSGMII_MASK GENMASK(31, 16) extern const struct stmmac_dma_ops dwmac4_dma_ops; extern const struct stmmac_dma_ops dwmac410_dma_ops; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index ba5393beb7bb..c6fcfae27c3d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -24,6 +24,8 @@ static const struct stmmac_pcs_info dwmac4_pcs_info = { .pcs_offset = GMAC_PCS_BASE, + .rgsmii_offset = GMAC_PHYIF_CONTROL_STATUS, + .rgsmii_status_mask = GMAC_PHYIF_CTRLSTATUS_RSGMII_MASK, .int_mask = GMAC_INT_PCS_LINK | GMAC_INT_PCS_ANE, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c index 2695e0b9ed03..df72f7c5a6a7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c @@ -16,6 +16,16 @@ #define GMAC_ANE_LPA 0x0c /* ANE link partener ability */ #define GMAC_TBI 0x14 /* TBI extend status */ +/* + * RGSMII status bitfield definitions. + */ +#define GMAC_RGSMII_LNKMOD BIT(0) +#define GMAC_RGSMII_SPEED_MASK GENMASK(2, 1) +#define GMAC_RGSMII_SPEED_125 2 +#define GMAC_RGSMII_SPEED_25 1 +#define GMAC_RGSMII_SPEED_2_5 0 +#define GMAC_RGSMII_LNKSTS BIT(3) + static int dwmac_integrated_pcs_enable(struct phylink_pcs *pcs) { struct stmmac_pcs *spcs = phylink_pcs_to_stmmac_pcs(pcs); @@ -36,7 +46,42 @@ static void dwmac_integrated_pcs_get_state(struct phylink_pcs *pcs, unsigned int neg_mode, struct phylink_link_state *state) { - state->link = false; + struct stmmac_pcs *spcs = phylink_pcs_to_stmmac_pcs(pcs); + u32 status, rgsmii; + + status = readl(spcs->base + GMAC_AN_STATUS); + + if (phy_interface_mode_is_8023z(state->interface)) { + state->link = false; + } else { + rgsmii = field_get(spcs->rgsmii_status_mask, + readl(spcs->rgsmii)); + + state->link = status & BMSR_LSTATUS && + rgsmii & GMAC_RGSMII_LNKSTS; + + if (state->link && neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) { + state->duplex = rgsmii & GMAC_RGSMII_LNKMOD ? + DUPLEX_FULL : DUPLEX_HALF; + switch (FIELD_GET(GMAC_RGSMII_SPEED_MASK, rgsmii)) { + case GMAC_RGSMII_SPEED_2_5: + state->speed = SPEED_10; + break; + + case GMAC_RGSMII_SPEED_25: + state->speed = SPEED_100; + break; + + case GMAC_RGSMII_SPEED_125: + state->speed = SPEED_1000; + break; + + default: + state->link = false; + break; + } + } + } } static int dwmac_integrated_pcs_config(struct phylink_pcs *pcs, @@ -101,6 +146,8 @@ int stmmac_integrated_pcs_init(struct stmmac_priv *priv, spcs->priv = priv; spcs->base = priv->ioaddr + pcs_info->pcs_offset; + spcs->rgsmii = priv->ioaddr + pcs_info->rgsmii_offset; + spcs->rgsmii_status_mask = pcs_info->rgsmii_status_mask; spcs->int_mask = pcs_info->int_mask; spcs->pcs.ops = &dwmac_integrated_pcs_ops; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h index f1ee473d8e3e..09e609f111b1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h @@ -29,12 +29,16 @@ struct stmmac_priv; struct stmmac_pcs_info { unsigned int pcs_offset; + unsigned int rgsmii_offset; + u32 rgsmii_status_mask; u32 int_mask; }; struct stmmac_pcs { struct stmmac_priv *priv; void __iomem *base; + void __iomem *rgsmii; + u32 rgsmii_status_mask; u32 int_mask; struct phylink_pcs pcs; }; From 68cff4fff61fb69ef7bb1f6302d4766822a395cc Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 18 Mar 2026 16:06:26 +0000 Subject: [PATCH 0748/1561] net: stmmac: add BASE-X support to integrated PCS The integrated PCS supports 802.3z (BASE-X) modes when the Synopsys IP is coupled with an appropriate SerDes to provide the electrical interface. The PCS presents a TBI interface to the SerDes for this. Thus, the BASE-X related registers are only present when TBI mode is supported. dwmac-qcom-ethqos added support for using 2.5G with the integrated PCS by calling dwmac_ctrl_ane() directly. Add support for the following to the integrated PCS: - 1000BASE-X protocol unconditionally. - 2500BASE-X if the coupled SerDes supports 2.5G speed. - The above without autonegotiation. - If the PCS supports TBI, then optional BASE-X autonegotiation for each of the above. Reviewed-by: Maxime Chevallier Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w2tPe-0000000DYAp-1qpV@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_pcs.c | 103 +++++++++++++++++- .../net/ethernet/stmicro/stmmac/stmmac_pcs.h | 1 + include/linux/stmmac.h | 1 + 3 files changed, 101 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c index df72f7c5a6a7..df37af5ab837 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.c @@ -26,6 +26,27 @@ #define GMAC_RGSMII_SPEED_2_5 0 #define GMAC_RGSMII_LNKSTS BIT(3) +static unsigned int dwmac_integrated_pcs_inband_caps(struct phylink_pcs *pcs, + phy_interface_t interface) +{ + struct stmmac_pcs *spcs = phylink_pcs_to_stmmac_pcs(pcs); + unsigned int ib_caps; + + if (phy_interface_mode_is_8023z(interface)) { + ib_caps = LINK_INBAND_DISABLE; + + /* If the PCS supports TBI/RTBI, then BASE-X negotiation is + * supported. + */ + if (spcs->support_tbi_rtbi) + ib_caps |= LINK_INBAND_ENABLE; + + return ib_caps; + } + + return 0; +} + static int dwmac_integrated_pcs_enable(struct phylink_pcs *pcs) { struct stmmac_pcs *spcs = phylink_pcs_to_stmmac_pcs(pcs); @@ -47,12 +68,20 @@ static void dwmac_integrated_pcs_get_state(struct phylink_pcs *pcs, struct phylink_link_state *state) { struct stmmac_pcs *spcs = phylink_pcs_to_stmmac_pcs(pcs); - u32 status, rgsmii; + u32 status, lpa, rgsmii; status = readl(spcs->base + GMAC_AN_STATUS); if (phy_interface_mode_is_8023z(state->interface)) { - state->link = false; + /* For BASE-X modes, the PCS block supports the advertisement + * and link partner advertisement registers using standard + * 802.3 format. The status register also has the link status + * and AN complete bits in the same bit location. This will + * only be used when AN is enabled. + */ + lpa = readl(spcs->base + GMAC_ANE_LPA); + + phylink_mii_c22_pcs_decode_state(state, neg_mode, status, lpa); } else { rgsmii = field_get(spcs->rgsmii_status_mask, readl(spcs->rgsmii)); @@ -84,6 +113,21 @@ static void dwmac_integrated_pcs_get_state(struct phylink_pcs *pcs, } } +static int dwmac_integrated_pcs_config_aneg(struct stmmac_pcs *spcs, + phy_interface_t interface, + const unsigned long *advertising) +{ + bool changed = false; + u32 adv; + + adv = phylink_mii_c22_pcs_encode_advertisement(interface, advertising); + if (readl(spcs->base + GMAC_ANE_ADV) != adv) + changed = true; + writel(adv, spcs->base + GMAC_ANE_ADV); + + return changed; +} + static int dwmac_integrated_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, @@ -91,17 +135,46 @@ static int dwmac_integrated_pcs_config(struct phylink_pcs *pcs, bool permit_pause_to_mac) { struct stmmac_pcs *spcs = phylink_pcs_to_stmmac_pcs(pcs); + bool changed = false, ane = true; - dwmac_ctrl_ane(spcs->base, 0, 1, spcs->priv->hw->reverse_sgmii_enable); + /* Only configure the advertisement and allow AN in BASE-X mode if + * the core supports TBI/RTBI. AN will be filtered out by via phylink + * and the .pcs_inband_caps() method above. + */ + if (phy_interface_mode_is_8023z(interface) && + spcs->support_tbi_rtbi) { + ane = neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED; - return 0; + changed = dwmac_integrated_pcs_config_aneg(spcs, interface, + advertising); + } + + dwmac_ctrl_ane(spcs->base, 0, ane, + spcs->priv->hw->reverse_sgmii_enable); + + return changed; +} + +static void dwmac_integrated_pcs_an_restart(struct phylink_pcs *pcs) +{ + struct stmmac_pcs *spcs = phylink_pcs_to_stmmac_pcs(pcs); + void __iomem *an_control = spcs->base + GMAC_AN_CTRL(0); + u32 ctrl; + + /* We can only do AN restart if using TBI/RTBI mode */ + if (spcs->support_tbi_rtbi) { + ctrl = readl(an_control) | GMAC_AN_CTRL_RAN; + writel(ctrl, an_control); + } } static const struct phylink_pcs_ops dwmac_integrated_pcs_ops = { + .pcs_inband_caps = dwmac_integrated_pcs_inband_caps, .pcs_enable = dwmac_integrated_pcs_enable, .pcs_disable = dwmac_integrated_pcs_disable, .pcs_get_state = dwmac_integrated_pcs_get_state, .pcs_config = dwmac_integrated_pcs_config, + .pcs_an_restart = dwmac_integrated_pcs_an_restart, }; void stmmac_integrated_pcs_irq(struct stmmac_priv *priv, u32 status, @@ -129,9 +202,18 @@ void stmmac_integrated_pcs_irq(struct stmmac_priv *priv, u32 status, int stmmac_integrated_pcs_get_phy_intf_sel(struct phylink_pcs *pcs, phy_interface_t interface) { + struct stmmac_pcs *spcs = phylink_pcs_to_stmmac_pcs(pcs); + if (interface == PHY_INTERFACE_MODE_SGMII) return PHY_INTF_SEL_SGMII; + if (phy_interface_mode_is_8023z(interface)) { + if (spcs->support_tbi_rtbi) + return PHY_INTF_SEL_TBI; + else + return PHY_INTF_SEL_SGMII; + } + return -EINVAL; } @@ -151,7 +233,20 @@ int stmmac_integrated_pcs_init(struct stmmac_priv *priv, spcs->int_mask = pcs_info->int_mask; spcs->pcs.ops = &dwmac_integrated_pcs_ops; + /* If the PCS supports extended status, then it supports BASE-X AN + * with a TBI interface to the SerDes. Otherwise, we can support + * BASE-X without AN using SGMII, which is required for qcom-ethqos. + */ + if (readl(spcs->base + GMAC_AN_STATUS) & BMSR_ESTATEN) + spcs->support_tbi_rtbi = true; + __set_bit(PHY_INTERFACE_MODE_SGMII, spcs->pcs.supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_1000BASEX, spcs->pcs.supported_interfaces); + + /* Only allow 2500BASE-X if the SerDes has support. */ + if (priv->plat->flags & STMMAC_FLAG_SERDES_SUPPORTS_2500M) + __set_bit(PHY_INTERFACE_MODE_2500BASEX, + spcs->pcs.supported_interfaces); priv->integrated_pcs = spcs; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h index 09e609f111b1..b2b12d34b3dd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pcs.h @@ -41,6 +41,7 @@ struct stmmac_pcs { u32 rgsmii_status_mask; u32 int_mask; struct phylink_pcs pcs; + bool support_tbi_rtbi; }; static inline struct stmmac_pcs * diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 565bb394b194..5b2bece81448 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -212,6 +212,7 @@ enum dwmac_core_type { #define STMMAC_FLAG_EN_TX_LPI_CLK_PHY_CAP BIT(12) #define STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY BIT(13) #define STMMAC_FLAG_KEEP_PREAMBLE_BEFORE_SFD BIT(14) +#define STMMAC_FLAG_SERDES_SUPPORTS_2500M BIT(15) struct mac_device_info; From 365c62c8b5307ae9f9f976e07bd6b606c54cfabc Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 18 Mar 2026 16:06:31 +0000 Subject: [PATCH 0749/1561] net: stmmac: use integrated PCS for BASE-X modes dwmac-qcom-ethqos supports SGMII and 2500BASE-X using the integrated PCS, so we need to expand the PCS support to include support for BASE-X modes. Add support to the prereset configuration to detect 2500BASE-X, and arrange for stmmac_mac_select_pcs() to return the integrated PCS if its supported_interfaces bitmap reports support for the interface mode. This results in priv->hw->pcs now being write-only, so remove it. Reviewed-by: Maxime Chevallier Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w2tPj-0000000DYAv-2JcZ@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/common.h | 4 ---- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 8 ++------ 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 8166389c853f..927ea6230073 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -278,9 +278,6 @@ struct stmmac_safety_stats { #define FLOW_TX 2 #define FLOW_AUTO (FLOW_TX | FLOW_RX) -/* PCS defines */ -#define STMMAC_PCS_SGMII (1 << 1) - #define SF_DMA_MODE 1 /* DMA STORE-AND-FORWARD Operation Mode */ /* DMA HW feature register fields */ @@ -632,7 +629,6 @@ struct mac_device_info { unsigned int unicast_filter_entries; unsigned int mcast_bits_log2; unsigned int rx_csum; - unsigned int pcs; unsigned int num_vlan; u32 vlan_filter[32]; bool vlan_fail_q_en; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index bd0f5d487e0f..ecb6d9a27567 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1027,11 +1027,8 @@ static struct phylink_pcs *stmmac_mac_select_pcs(struct phylink_config *config, return pcs; } - /* The PCS control register is only relevant for SGMII, TBI and RTBI - * modes. We no longer support TBI or RTBI, so only configure this - * register when operating in SGMII mode with the integrated PCS. - */ - if (priv->hw->pcs & STMMAC_PCS_SGMII && priv->integrated_pcs) + if (priv->integrated_pcs && + test_bit(interface, priv->integrated_pcs->pcs.supported_interfaces)) return &priv->integrated_pcs->pcs; return NULL; @@ -1290,7 +1287,6 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv) if (priv->dma_cap.pcs && interface == PHY_INTERFACE_MODE_SGMII) { netdev_dbg(priv->dev, "PCS SGMII support enabled\n"); - priv->hw->pcs = STMMAC_PCS_SGMII; switch (speed) { case SPEED_10: From 4e9b1994a105fefe558b245f2076a70cc6b498a9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 5 Mar 2026 11:43:13 +0100 Subject: [PATCH 0750/1561] ieee802154: atusb: drop redundant device reference Driver core holds a reference to the USB interface and its parent USB device while the interface is bound to a driver and there is no need to take additional references unless the structures are needed after disconnect. Drop the redundant device reference to reduce cargo culting, make it easier to spot drivers where an extra reference is needed, and reduce the risk of memory leaks when drivers fail to release it. Signed-off-by: Johan Hovold Reviewed-by: Miquel Raynal Acked-by: Stefan Schmidt Link: https://patch.msgid.link/20260305104313.15898-1-johan@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ieee802154/atusb.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index 95a4a3cdc8a4..5f7fc4ee7a07 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -961,7 +961,7 @@ static int atusb_probe(struct usb_interface *interface, atusb = hw->priv; atusb->hw = hw; - atusb->usb_dev = usb_get_dev(usb_dev); + atusb->usb_dev = usb_dev; usb_set_intfdata(interface, atusb); atusb->shutdown = 0; @@ -1055,7 +1055,6 @@ fail: atusb_free_urbs(atusb); usb_kill_urb(atusb->tx_urb); usb_free_urb(atusb->tx_urb); - usb_put_dev(usb_dev); ieee802154_free_hw(hw); return ret; } @@ -1076,8 +1075,6 @@ static void atusb_disconnect(struct usb_interface *interface) ieee802154_unregister_hw(atusb->hw); - usb_put_dev(atusb->usb_dev); - ieee802154_free_hw(atusb->hw); usb_set_intfdata(interface, NULL); From 0e10381626546e3c77a11c7972d1264301abd733 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 20 Mar 2026 22:52:29 +0100 Subject: [PATCH 0751/1561] nfc: microread: Drop unused include This driver includes the legacy header but does not use any symbols from it. Drop the inclusion. Signed-off-by: Andy Shevchenko Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260320215230.3236005-1-andriy.shevchenko@linux.intel.com Signed-off-by: Jakub Kicinski --- drivers/nfc/microread/i2c.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nfc/microread/i2c.c b/drivers/nfc/microread/i2c.c index 113b2e306e35..f00cff7f9693 100644 --- a/drivers/nfc/microread/i2c.c +++ b/drivers/nfc/microread/i2c.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include From eb37011395f12138056a4d124159f1a8436662d3 Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Fri, 20 Mar 2026 15:56:03 +0800 Subject: [PATCH 0752/1561] net: add netdev_from_priv() helper Add a helper to get netdev from private data pointer, so drivers won't have to store redundant netdev in priv. Signed-off-by: Qingfang Deng Link: https://patch.msgid.link/20260320075605.490832-1-dqfext@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7ca01eb3f7d2..6882b41bb3e8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2765,6 +2765,17 @@ static inline void *netdev_priv(const struct net_device *dev) return (void *)dev->priv; } +/** + * netdev_from_priv() - get network device from priv + * @priv: network device private data + * + * Returns: net_device to which @priv belongs + */ +static inline struct net_device *netdev_from_priv(const void *priv) +{ + return container_of(priv, struct net_device, priv); +} + /* Set the sysfs physical device reference for the network logical device * if set prior to registration will cause a symlink during initialization. */ From 9027497a25e3c92b5053b2643e0c18f910865625 Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Fri, 20 Mar 2026 15:56:04 +0800 Subject: [PATCH 0753/1561] team: use netdev_from_priv() Use the new netdev_from_priv() helper to access the net device from struct team. Signed-off-by: Qingfang Deng Link: https://patch.msgid.link/20260320075605.490832-2-dqfext@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/team/team_core.c | 77 ++++++++++++++++++++---------------- include/linux/if_team.h | 3 +- 2 files changed, 43 insertions(+), 37 deletions(-) diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index b7282f5c9632..3a745bfb228a 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -66,7 +66,7 @@ static int team_port_set_orig_dev_addr(struct team_port *port) static int team_port_set_team_dev_addr(struct team *team, struct team_port *port) { - return __set_port_dev_addr(port->dev, team->dev->dev_addr); + return __set_port_dev_addr(port->dev, netdev_from_priv(team)->dev_addr); } int team_modeop_port_enter(struct team *team, struct team_port *port) @@ -591,7 +591,7 @@ static int __team_change_mode(struct team *team, static int team_change_mode(struct team *team, const char *kind) { const struct team_mode *new_mode; - struct net_device *dev = team->dev; + struct net_device *dev = netdev_from_priv(team); int err; if (!list_empty(&team->port_list)) { @@ -642,7 +642,7 @@ static void team_notify_peers_work(struct work_struct *work) rtnl_unlock(); return; } - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, team->dev); + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, netdev_from_priv(team)); rtnl_unlock(); if (val) schedule_delayed_work(&team->notify_peers.dw, @@ -651,7 +651,7 @@ static void team_notify_peers_work(struct work_struct *work) static void team_notify_peers(struct team *team) { - if (!team->notify_peers.count || !netif_running(team->dev)) + if (!team->notify_peers.count || !netif_running(netdev_from_priv(team))) return; atomic_add(team->notify_peers.count, &team->notify_peers.count_pending); schedule_delayed_work(&team->notify_peers.dw, 0); @@ -688,7 +688,7 @@ static void team_mcast_rejoin_work(struct work_struct *work) rtnl_unlock(); return; } - call_netdevice_notifiers(NETDEV_RESEND_IGMP, team->dev); + call_netdevice_notifiers(NETDEV_RESEND_IGMP, netdev_from_priv(team)); rtnl_unlock(); if (val) schedule_delayed_work(&team->mcast_rejoin.dw, @@ -697,7 +697,7 @@ static void team_mcast_rejoin_work(struct work_struct *work) static void team_mcast_rejoin(struct team *team) { - if (!team->mcast_rejoin.count || !netif_running(team->dev)) + if (!team->mcast_rejoin.count || !netif_running(netdev_from_priv(team))) return; atomic_add(team->mcast_rejoin.count, &team->mcast_rejoin.count_pending); schedule_delayed_work(&team->mcast_rejoin.dw, 0); @@ -756,7 +756,7 @@ static rx_handler_result_t team_handle_frame(struct sk_buff **pskb) u64_stats_inc(&pcpu_stats->rx_multicast); u64_stats_update_end(&pcpu_stats->syncp); - skb->dev = team->dev; + skb->dev = netdev_from_priv(team); } else if (res == RX_HANDLER_EXACT) { this_cpu_inc(team->pcpu_stats->rx_nohandler); } else { @@ -774,7 +774,7 @@ static rx_handler_result_t team_handle_frame(struct sk_buff **pskb) static int team_queue_override_init(struct team *team) { struct list_head *listarr; - unsigned int queue_cnt = team->dev->num_tx_queues - 1; + unsigned int queue_cnt = netdev_from_priv(team)->num_tx_queues - 1; unsigned int i; if (!queue_cnt) @@ -868,7 +868,7 @@ static void __team_queue_override_enabled_check(struct team *team) } if (enabled == team->queue_override_enabled) return; - netdev_dbg(team->dev, "%s queue override\n", + netdev_dbg(netdev_from_priv(team), "%s queue override\n", enabled ? "Enabling" : "Disabling"); team->queue_override_enabled = enabled; } @@ -984,11 +984,12 @@ static int team_port_enter(struct team *team, struct team_port *port) { int err = 0; - dev_hold(team->dev); + dev_hold(netdev_from_priv(team)); if (team->ops.port_enter) { err = team->ops.port_enter(team, port); if (err) { - netdev_err(team->dev, "Device %s failed to enter team mode\n", + netdev_err(netdev_from_priv(team), + "Device %s failed to enter team mode\n", port->dev->name); goto err_port_enter; } @@ -997,7 +998,7 @@ static int team_port_enter(struct team *team, struct team_port *port) return 0; err_port_enter: - dev_put(team->dev); + dev_put(netdev_from_priv(team)); return err; } @@ -1006,7 +1007,7 @@ static void team_port_leave(struct team *team, struct team_port *port) { if (team->ops.port_leave) team->ops.port_leave(team, port); - dev_put(team->dev); + dev_put(netdev_from_priv(team)); } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -1030,7 +1031,7 @@ static int __team_port_enable_netpoll(struct team_port *port) static int team_port_enable_netpoll(struct team_port *port) { - if (!port->team->dev->npinfo) + if (!netdev_from_priv(port->team)->npinfo) return 0; return __team_port_enable_netpoll(port); @@ -1064,8 +1065,8 @@ static int team_upper_dev_link(struct team *team, struct team_port *port, lag_upper_info.tx_type = team->mode->lag_tx_type; lag_upper_info.hash_type = NETDEV_LAG_HASH_UNKNOWN; - err = netdev_master_upper_dev_link(port->dev, team->dev, NULL, - &lag_upper_info, extack); + err = netdev_master_upper_dev_link(port->dev, netdev_from_priv(team), + NULL, &lag_upper_info, extack); if (err) return err; port->dev->priv_flags |= IFF_TEAM_PORT; @@ -1074,7 +1075,7 @@ static int team_upper_dev_link(struct team *team, struct team_port *port, static void team_upper_dev_unlink(struct team *team, struct team_port *port) { - netdev_upper_dev_unlink(port->dev, team->dev); + netdev_upper_dev_unlink(port->dev, netdev_from_priv(team)); port->dev->priv_flags &= ~IFF_TEAM_PORT; } @@ -1085,7 +1086,7 @@ static int team_dev_type_check_change(struct net_device *dev, static int team_port_add(struct team *team, struct net_device *port_dev, struct netlink_ext_ack *extack) { - struct net_device *dev = team->dev; + struct net_device *dev = netdev_from_priv(team); struct team_port *port; char *portname = port_dev->name; int err; @@ -1247,7 +1248,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev, port->index = -1; list_add_tail_rcu(&port->list, &team->port_list); team_port_enable(team, port); - netdev_compute_master_upper_features(team->dev, true); + netdev_compute_master_upper_features(dev, true); __team_port_change_port_added(port, !!netif_oper_up(port_dev)); __team_options_change_check(team); @@ -1292,7 +1293,7 @@ static void __team_port_change_port_removed(struct team_port *port); static int team_port_del(struct team *team, struct net_device *port_dev, bool unregister) { - struct net_device *dev = team->dev; + struct net_device *dev = netdev_from_priv(team); struct team_port *port; char *portname = port_dev->name; @@ -1337,7 +1338,7 @@ static int team_port_del(struct team *team, struct net_device *port_dev, bool un } kfree_rcu(port, rcu); netdev_info(dev, "Port device %s removed\n", portname); - netdev_compute_master_upper_features(team->dev, true); + netdev_compute_master_upper_features(dev, true); return 0; } @@ -1506,7 +1507,7 @@ static int team_queue_id_option_set(struct team *team, if (port->queue_id == new_queue_id) return 0; - if (new_queue_id >= team->dev->real_num_tx_queues) + if (new_queue_id >= netdev_from_priv(team)->real_num_tx_queues) return -EINVAL; team_queue_override_port_change_queue_id(team, port, new_queue_id); return 0; @@ -1587,7 +1588,6 @@ static int team_init(struct net_device *dev) int i; int err; - team->dev = dev; team_set_no_mode(team); team->notifier_ctx = false; @@ -2256,7 +2256,7 @@ static struct team *team_nl_team_get(struct genl_info *info) static void team_nl_team_put(struct team *team) { - dev_put(team->dev); + dev_put(netdev_from_priv(team)); } typedef int team_nl_send_func_t(struct sk_buff *skb, @@ -2264,7 +2264,7 @@ typedef int team_nl_send_func_t(struct sk_buff *skb, static int team_nl_send_unicast(struct sk_buff *skb, struct team *team, u32 portid) { - return genlmsg_unicast(dev_net(team->dev), skb, portid); + return genlmsg_unicast(dev_net(netdev_from_priv(team)), skb, portid); } static int team_nl_fill_one_option_get(struct sk_buff *skb, struct team *team, @@ -2393,7 +2393,8 @@ start_again: return -EMSGSIZE; } - if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex)) + if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, + netdev_from_priv(team)->ifindex)) goto nla_put_failure; option_list = nla_nest_start_noflag(skb, TEAM_ATTR_LIST_OPTION); if (!option_list) @@ -2681,7 +2682,8 @@ start_again: return -EMSGSIZE; } - if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex)) + if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, + netdev_from_priv(team)->ifindex)) goto nla_put_failure; port_list = nla_nest_start_noflag(skb, TEAM_ATTR_LIST_PORT); if (!port_list) @@ -2782,7 +2784,8 @@ static struct genl_family team_nl_family __ro_after_init = { static int team_nl_send_multicast(struct sk_buff *skb, struct team *team, u32 portid) { - return genlmsg_multicast_netns(&team_nl_family, dev_net(team->dev), + return genlmsg_multicast_netns(&team_nl_family, + dev_net(netdev_from_priv(team)), skb, 0, 0, GFP_KERNEL); } @@ -2827,7 +2830,8 @@ static void __team_options_change_check(struct team *team) } err = team_nl_send_event_options_get(team, &sel_opt_inst_list); if (err && err != -ESRCH) - netdev_warn(team->dev, "Failed to send options change via netlink (err %d)\n", + netdev_warn(netdev_from_priv(team), + "Failed to send options change via netlink (err %d)\n", err); } @@ -2856,7 +2860,8 @@ static void __team_port_change_send(struct team_port *port, bool linkup) send_event: err = team_nl_send_event_port_get(port->team, port); if (err && err != -ESRCH) - netdev_warn(port->team->dev, "Failed to send port change of device %s via netlink (err %d)\n", + netdev_warn(netdev_from_priv(port->team), + "Failed to send port change of device %s via netlink (err %d)\n", port->dev->name, err); } @@ -2878,9 +2883,9 @@ static void __team_carrier_check(struct team *team) } if (team_linkup) - netif_carrier_on(team->dev); + netif_carrier_on(netdev_from_priv(team)); else - netif_carrier_off(team->dev); + netif_carrier_off(netdev_from_priv(team)); } static void __team_port_change_check(struct team_port *port, bool linkup) @@ -2939,12 +2944,14 @@ static int team_device_event(struct notifier_block *unused, !!netif_oper_up(port->dev)); break; case NETDEV_UNREGISTER: - team_del_slave_on_unregister(port->team->dev, dev); + team_del_slave_on_unregister(netdev_from_priv(port->team), + dev); break; case NETDEV_FEAT_CHANGE: if (!port->team->notifier_ctx) { port->team->notifier_ctx = true; - netdev_compute_master_upper_features(port->team->dev, true); + netdev_compute_master_upper_features(netdev_from_priv(port->team), + true); port->team->notifier_ctx = false; } break; @@ -2958,7 +2965,7 @@ static int team_device_event(struct notifier_block *unused, return NOTIFY_BAD; case NETDEV_RESEND_IGMP: /* Propagate to master device */ - call_netdevice_notifiers(event, port->team->dev); + call_netdevice_notifiers(event, netdev_from_priv(port->team)); break; } return NOTIFY_DONE; diff --git a/include/linux/if_team.h b/include/linux/if_team.h index ce97d891cf72..ccb5327de26d 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -186,7 +186,6 @@ struct team_mode { #define TEAM_MODE_PRIV_SIZE (sizeof(long) * TEAM_MODE_PRIV_LONGS) struct team { - struct net_device *dev; /* associated netdevice */ struct team_pcpu_stats __percpu *pcpu_stats; const struct header_ops *header_ops_cache; @@ -232,7 +231,7 @@ static inline int team_dev_queue_xmit(struct team *team, struct team_port *port, skb_set_queue_mapping(skb, qdisc_skb_cb(skb)->slave_dev_queue_mapping); skb->dev = port->dev; - if (unlikely(netpoll_tx_running(team->dev))) { + if (unlikely(netpoll_tx_running(netdev_from_priv(team)))) { team_netpoll_send_skb(port, skb); return 0; } From 0475f9e779b456f934adbc44eeb98e3080a1893f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 20 Mar 2026 09:58:21 +0100 Subject: [PATCH 0754/1561] ethtool: Track user-provided RSS indirection table size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Track the number of indirection table entries the user originally provided (context 0/default as well!). Replace IFF_RXFH_CONFIGURED with rss_indir_user_size: the flag is redundant now that user_size captures the same information. Add ethtool_rxfh_indir_lost() for drivers that must reset the indirection table. Convert bnxt and mlx5 to use it. Signed-off-by: Björn Töpel Link: https://patch.msgid.link/20260320085826.1957255-2-bjorn@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 21 ++++++++------ include/linux/ethtool.h | 7 +++++ include/linux/netdevice.h | 7 +---- net/ethtool/common.c | 28 +++++++++++++++++++ net/ethtool/ioctl.c | 9 +++--- net/ethtool/rss.c | 24 ++++++++++------ 7 files changed, 70 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 604966a398f5..84eb53b4172b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8118,8 +8118,7 @@ static int __bnxt_reserve_rings(struct bnxt *bp) (bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) != bnxt_get_nr_rss_ctxs(bp, rx_rings) || bnxt_get_max_rss_ring(bp) >= rx_rings)) { - netdev_warn(bp->dev, "RSS table entries reverting to default\n"); - bp->dev->priv_flags &= ~IFF_RXFH_CONFIGURED; + ethtool_rxfh_indir_lost(bp->dev); } } bp->rx_nr_rings = rx_rings; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3eebdf402129..1238e5356012 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -6480,12 +6480,23 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv) /* max number of channels may have changed */ max_nch = mlx5e_calc_max_nch(priv->mdev, priv->netdev, profile); + + /* Locking is required by ethtool_rxfh_indir_lost() (sends + * ETHTOOL_MSG_RSS_NTF) and by netif_set_real_num_*_queues in case + * the netdev has been registered by this point (if this function + * was called in the reload or resume flow). + */ + if (need_lock) { + rtnl_lock(); + netdev_lock(priv->netdev); + } + if (priv->channels.params.num_channels > max_nch) { mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch); /* Reducing the number of channels - RXFH has to be reset, and * mlx5e_num_channels_changed below will build the RQT. */ - priv->netdev->priv_flags &= ~IFF_RXFH_CONFIGURED; + ethtool_rxfh_indir_lost(priv->netdev); priv->channels.params.num_channels = max_nch; if (priv->channels.params.mqprio.mode == TC_MQPRIO_MODE_CHANNEL) { mlx5_core_warn(priv->mdev, "MLX5E: Disabling MQPRIO channel mode\n"); @@ -6502,15 +6513,7 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv) /* 1. Set the real number of queues in the kernel the first time. * 2. Set our default XPS cpumask. * 3. Build the RQT. - * - * Locking is required by netif_set_real_num_*_queues in case the - * netdev has been registered by this point (if this function was called - * in the reload or resume flow). */ - if (need_lock) { - rtnl_lock(); - netdev_lock(priv->netdev); - } err = mlx5e_num_channels_changed(priv); if (need_lock) { netdev_unlock(priv->netdev); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 656d465bcd06..34ca9261de82 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -176,6 +176,8 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) * struct ethtool_rxfh_context - a custom RSS context configuration * @indir_size: Number of u32 entries in indirection table * @key_size: Size of hash key, in bytes + * @indir_user_size: number of user provided entries for the + * indirection table * @priv_size: Size of driver private data, in bytes * @hfunc: RSS hash function identifier. One of the %ETH_RSS_HASH_* * @input_xfrm: Defines how the input data is transformed. Valid values are one @@ -186,6 +188,7 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) struct ethtool_rxfh_context { u32 indir_size; u32 key_size; + u32 indir_user_size; u16 priv_size; u8 hfunc; u8 input_xfrm; @@ -214,6 +217,7 @@ static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx) } void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id); +void ethtool_rxfh_indir_lost(struct net_device *dev); struct link_mode_info { int speed; @@ -1337,12 +1341,15 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, * @rss_ctx: XArray of custom RSS contexts * @rss_lock: Protects entries in @rss_ctx. May be taken from * within RTNL. + * @rss_indir_user_size: Number of user provided entries for the default + * (context 0) indirection table. * @wol_enabled: Wake-on-LAN is enabled * @module_fw_flash_in_progress: Module firmware flashing is in progress. */ struct ethtool_netdev_state { struct xarray rss_ctx; struct mutex rss_lock; + u32 rss_indir_user_size; unsigned wol_enabled:1; unsigned module_fw_flash_in_progress:1; }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6882b41bb3e8..e15367373f7c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1716,7 +1716,6 @@ struct net_device_ops { * @IFF_OPENVSWITCH: device is a Open vSwitch master * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device * @IFF_TEAM: device is a team device - * @IFF_RXFH_CONFIGURED: device has had Rx Flow indirection table configured * @IFF_PHONY_HEADROOM: the headroom value is controlled by an external * entity (i.e. the master device for bridged veth) * @IFF_MACSEC: device is a MACsec device @@ -1752,7 +1751,6 @@ enum netdev_priv_flags { IFF_OPENVSWITCH = 1<<20, IFF_L3MDEV_SLAVE = 1<<21, IFF_TEAM = 1<<22, - IFF_RXFH_CONFIGURED = 1<<23, IFF_PHONY_HEADROOM = 1<<24, IFF_MACSEC = 1<<25, IFF_NO_RX_HANDLER = 1<<26, @@ -5580,10 +5578,7 @@ static inline bool netif_is_lag_port(const struct net_device *dev) return netif_is_bond_slave(dev) || netif_is_team_port(dev); } -static inline bool netif_is_rxfh_configured(const struct net_device *dev) -{ - return dev->priv_flags & IFF_RXFH_CONFIGURED; -} +bool netif_is_rxfh_configured(const struct net_device *dev); static inline bool netif_is_failover(const struct net_device *dev) { diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 6a4a3797a812..98e85f393f8c 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -1204,6 +1204,34 @@ void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id) } EXPORT_SYMBOL(ethtool_rxfh_context_lost); +bool netif_is_rxfh_configured(const struct net_device *dev) +{ + return dev->ethtool->rss_indir_user_size; +} +EXPORT_SYMBOL(netif_is_rxfh_configured); + +/** + * ethtool_rxfh_indir_lost - Notify core that the RSS indirection table was lost + * @dev: network device + * + * Drivers should call this when the device can no longer maintain the + * user-configured indirection table, typically after a HW fault recovery + * that reduced the maximum queue count. Marks the default RSS context + * indirection table as unconfigured and sends an %ETHTOOL_MSG_RSS_NTF + * notification. + */ +void ethtool_rxfh_indir_lost(struct net_device *dev) +{ + WARN_ONCE(!rtnl_is_locked() && + !lockdep_is_held_type(&dev->ethtool->rss_lock, -1), + "RSS context lock assertion failed\n"); + + netdev_err(dev, "device error, RSS indirection table lost\n"); + dev->ethtool->rss_indir_user_size = 0; + ethtool_rss_notify(dev, ETHTOOL_MSG_RSS_NTF, 0); +} +EXPORT_SYMBOL(ethtool_rxfh_indir_lost); + enum ethtool_link_medium ethtool_str_to_medium(const char *str) { int i; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 11dfbf076b6d..3c713a91ad0d 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1405,9 +1405,9 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, /* indicate whether rxfh was set to default */ if (user_size == 0) - dev->priv_flags &= ~IFF_RXFH_CONFIGURED; + dev->ethtool->rss_indir_user_size = 0; else - dev->priv_flags |= IFF_RXFH_CONFIGURED; + dev->ethtool->rss_indir_user_size = rxfh_dev.indir_size; out_unlock: mutex_unlock(&dev->ethtool->rss_lock); @@ -1722,9 +1722,9 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if (!rxfh_dev.rss_context) { /* indicate whether rxfh was set to default */ if (rxfh.indir_size == 0) - dev->priv_flags &= ~IFF_RXFH_CONFIGURED; + dev->ethtool->rss_indir_user_size = 0; else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) - dev->priv_flags |= IFF_RXFH_CONFIGURED; + dev->ethtool->rss_indir_user_size = dev_indir_size; } /* Update rss_ctx tracking */ if (rxfh_dev.rss_delete) { @@ -1737,6 +1737,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, ctx->indir_configured = rxfh.indir_size && rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE; + ctx->indir_user_size = dev_indir_size; } if (rxfh_dev.key) { memcpy(ethtool_rxfh_context_key(ctx), rxfh_dev.key, diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 0f4e5cd2ac71..5cf90d73e70b 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -686,7 +686,7 @@ rss_set_prep_indir(struct net_device *dev, struct genl_info *info, *mod |= memcmp(rxfh->indir, data->indir_table, data->indir_size); - return 0; + return user_size; err_free: kfree(rxfh->indir); @@ -833,6 +833,7 @@ ethnl_rss_set(struct ethnl_req_info *req_info, struct genl_info *info) struct nlattr **tb = info->attrs; struct rss_reply_data data = {}; const struct ethtool_ops *ops; + u32 indir_user_size; int ret; ops = dev->ethtool_ops; @@ -845,8 +846,9 @@ ethnl_rss_set(struct ethnl_req_info *req_info, struct genl_info *info) rxfh.rss_context = request->rss_context; ret = rss_set_prep_indir(dev, info, &data, &rxfh, &indir_reset, &mod); - if (ret) + if (ret < 0) goto exit_clean_data; + indir_user_size = ret; indir_mod = !!tb[ETHTOOL_A_RSS_INDIR]; rxfh.hfunc = data.hfunc; @@ -889,12 +891,15 @@ ethnl_rss_set(struct ethnl_req_info *req_info, struct genl_info *info) if (ret) goto exit_unlock; - if (ctx) + if (ctx) { rss_set_ctx_update(ctx, tb, &data, &rxfh); - else if (indir_reset) - dev->priv_flags &= ~IFF_RXFH_CONFIGURED; - else if (indir_mod) - dev->priv_flags |= IFF_RXFH_CONFIGURED; + if (indir_user_size) + ctx->indir_user_size = indir_user_size; + } else if (indir_reset) { + dev->ethtool->rss_indir_user_size = 0; + } else if (indir_mod) { + dev->ethtool->rss_indir_user_size = indir_user_size; + } exit_unlock: mutex_unlock(&dev->ethtool->rss_lock); @@ -999,6 +1004,7 @@ int ethnl_rss_create_doit(struct sk_buff *skb, struct genl_info *info) const struct ethtool_ops *ops; struct rss_req_info req = {}; struct net_device *dev; + u32 indir_user_size; struct sk_buff *rsp; void *hdr; u32 limit; @@ -1035,8 +1041,9 @@ int ethnl_rss_create_doit(struct sk_buff *skb, struct genl_info *info) goto exit_ops; ret = rss_set_prep_indir(dev, info, &data, &rxfh, &indir_dflt, &mod); - if (ret) + if (ret < 0) goto exit_clean_data; + indir_user_size = ret; ethnl_update_u8(&rxfh.hfunc, tb[ETHTOOL_A_RSS_HFUNC], &mod); @@ -1080,6 +1087,7 @@ int ethnl_rss_create_doit(struct sk_buff *skb, struct genl_info *info) /* Store the config from rxfh to Xarray.. */ rss_set_ctx_update(ctx, tb, &data, &rxfh); + ctx->indir_user_size = indir_user_size; /* .. copy from Xarray to data. */ __rss_prepare_ctx(dev, &data, ctx); From 02bcb20083b2780772cfb66cd426f31940296783 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 20 Mar 2026 09:58:22 +0100 Subject: [PATCH 0755/1561] ethtool: Add RSS indirection table resize helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The core locks ctx->indir_size when an RSS context is created. Some NICs (e.g. bnxt) change their indirection table size based on the channel count, because the hardware table is a shared resource. This forces drivers to reject channel changes when RSS contexts exist. Add driver helpers to resize indirection tables: ethtool_rxfh_indir_can_resize() checks whether the default context indirection table can be resized. ethtool_rxfh_indir_resize() resizes the default context table in place. Folding (shrink) requires the table to be periodic at the new size; non-periodic tables are rejected. Unfolding (grow) replicates the existing pattern. Sizes must be multiples of each other. ethtool_rxfh_ctxs_can_resize() validates all non-default RSS contexts can be resized. ethtool_rxfh_ctxs_resize() applies the resize. Signed-off-by: Björn Töpel Link: https://patch.msgid.link/20260320085826.1957255-3-bjorn@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 6 ++ net/ethtool/common.c | 155 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 161 insertions(+) diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 34ca9261de82..1cb0740ba331 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -218,6 +218,12 @@ static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx) void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id); void ethtool_rxfh_indir_lost(struct net_device *dev); +bool ethtool_rxfh_indir_can_resize(struct net_device *dev, const u32 *tbl, + u32 old_size, u32 new_size); +void ethtool_rxfh_indir_resize(struct net_device *dev, u32 *tbl, + u32 old_size, u32 new_size); +int ethtool_rxfh_ctxs_can_resize(struct net_device *dev, u32 new_indir_size); +void ethtool_rxfh_ctxs_resize(struct net_device *dev, u32 new_indir_size); struct link_mode_info { int speed; diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 98e85f393f8c..84ec88dee05c 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -1232,6 +1232,161 @@ void ethtool_rxfh_indir_lost(struct net_device *dev) } EXPORT_SYMBOL(ethtool_rxfh_indir_lost); +static bool ethtool_rxfh_is_periodic(const u32 *tbl, u32 old_size, u32 new_size) +{ + u32 i; + + for (i = new_size; i < old_size; i++) + if (tbl[i] != tbl[i % new_size]) + return false; + return true; +} + +static bool ethtool_rxfh_can_resize(const u32 *tbl, u32 old_size, u32 new_size, + u32 user_size) +{ + if (new_size == old_size) + return true; + + if (!user_size) + return true; + + if (new_size < old_size) { + if (new_size < user_size) + return false; + if (old_size % new_size) + return false; + if (!ethtool_rxfh_is_periodic(tbl, old_size, new_size)) + return false; + return true; + } + + if (new_size % old_size) + return false; + return true; +} + +/* Resize without validation; caller must have called can_resize first */ +static void ethtool_rxfh_resize(u32 *tbl, u32 old_size, u32 new_size) +{ + u32 i; + + /* Grow: replicate existing pattern; shrink is a no-op on the data */ + for (i = old_size; i < new_size; i++) + tbl[i] = tbl[i % old_size]; +} + +/** + * ethtool_rxfh_indir_can_resize - Check if context 0 indir table can resize + * @dev: network device + * @tbl: indirection table + * @old_size: current number of entries in the table + * @new_size: desired number of entries + * + * Validate that @tbl can be resized from @old_size to @new_size without + * data loss. Uses the user_size floor from context 0. When user_size is + * zero the table is not user-configured and resize always succeeds. + * Read-only; does not modify the table. + * + * Return: true if resize is possible, false otherwise. + */ +bool ethtool_rxfh_indir_can_resize(struct net_device *dev, const u32 *tbl, + u32 old_size, u32 new_size) +{ + return ethtool_rxfh_can_resize(tbl, old_size, new_size, + dev->ethtool->rss_indir_user_size); +} +EXPORT_SYMBOL(ethtool_rxfh_indir_can_resize); + +/** + * ethtool_rxfh_indir_resize - Fold or unfold context 0 indirection table + * @dev: network device + * @tbl: indirection table (must have room for max(old_size, new_size) entries) + * @old_size: current number of entries in the table + * @new_size: desired number of entries + * + * Resize the default RSS context indirection table in place. Caller + * must have validated with ethtool_rxfh_indir_can_resize() first. + */ +void ethtool_rxfh_indir_resize(struct net_device *dev, u32 *tbl, + u32 old_size, u32 new_size) +{ + if (!dev->ethtool->rss_indir_user_size) + return; + + ethtool_rxfh_resize(tbl, old_size, new_size); +} +EXPORT_SYMBOL(ethtool_rxfh_indir_resize); + +/** + * ethtool_rxfh_ctxs_can_resize - Validate resize for all RSS contexts + * @dev: network device + * @new_indir_size: new indirection table size + * + * Validate that the indirection tables of all non-default RSS contexts + * can be resized to @new_indir_size. Read-only; does not modify any + * context. Intended to be paired with ethtool_rxfh_ctxs_resize(). + * + * Return: 0 if all contexts can be resized, negative errno on failure. + */ +int ethtool_rxfh_ctxs_can_resize(struct net_device *dev, u32 new_indir_size) +{ + struct ethtool_rxfh_context *ctx; + unsigned long context; + int ret = 0; + + if (!dev->ethtool_ops->rxfh_indir_space || + new_indir_size > dev->ethtool_ops->rxfh_indir_space) + return -EINVAL; + + mutex_lock(&dev->ethtool->rss_lock); + xa_for_each(&dev->ethtool->rss_ctx, context, ctx) { + u32 *indir = ethtool_rxfh_context_indir(ctx); + + if (!ethtool_rxfh_can_resize(indir, ctx->indir_size, + new_indir_size, + ctx->indir_user_size)) { + ret = -EINVAL; + goto unlock; + } + } +unlock: + mutex_unlock(&dev->ethtool->rss_lock); + return ret; +} +EXPORT_SYMBOL(ethtool_rxfh_ctxs_can_resize); + +/** + * ethtool_rxfh_ctxs_resize - Resize all RSS context indirection tables + * @dev: network device + * @new_indir_size: new indirection table size + * + * Resize the indirection table of every non-default RSS context to + * @new_indir_size. Caller must have validated with + * ethtool_rxfh_ctxs_can_resize() first. An %ETHTOOL_MSG_RSS_NTF is + * sent for each resized context. + * + * Notifications are sent outside the RSS lock to avoid holding the + * mutex during notification delivery. + */ +void ethtool_rxfh_ctxs_resize(struct net_device *dev, u32 new_indir_size) +{ + struct ethtool_rxfh_context *ctx; + unsigned long context; + + mutex_lock(&dev->ethtool->rss_lock); + xa_for_each(&dev->ethtool->rss_ctx, context, ctx) { + ethtool_rxfh_resize(ethtool_rxfh_context_indir(ctx), + ctx->indir_size, new_indir_size); + ctx->indir_size = new_indir_size; + } + mutex_unlock(&dev->ethtool->rss_lock); + + xa_for_each(&dev->ethtool->rss_ctx, context, ctx) + ethtool_rss_notify(dev, ETHTOOL_MSG_RSS_NTF, context); +} +EXPORT_SYMBOL(ethtool_rxfh_ctxs_resize); + enum ethtool_link_medium ethtool_str_to_medium(const char *str) { int i; From 57cdfe0dc70ba7cf234707e9e2c63613b9ce1e75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 20 Mar 2026 09:58:23 +0100 Subject: [PATCH 0756/1561] bnxt_en: Resize RSS contexts on channel count change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bnxt_set_channels() previously rejected channel changes that alter the RSS table size when RSS contexts exist, because non-default context sizes were locked at creation. Replace the rejection with the new resize helpers. RSS table size only changes on P5 chips with older firmware; newer firmware always uses the largest table size. Signed-off-by: Björn Töpel Link: https://patch.msgid.link/20260320085826.1957255-4-bjorn@kernel.org Signed-off-by: Jakub Kicinski --- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 35 +++++++++++++++---- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 48e8e3be70d3..b87ac2bb43dd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -942,6 +942,7 @@ static int bnxt_set_channels(struct net_device *dev, { struct bnxt *bp = netdev_priv(dev); int req_tx_rings, req_rx_rings, tcs; + u32 new_tbl_size = 0, old_tbl_size; bool sh = false; int tx_xdp = 0; int rc = 0; @@ -977,19 +978,33 @@ static int bnxt_set_channels(struct net_device *dev, tx_xdp = req_rx_rings; } - if (bnxt_get_nr_rss_ctxs(bp, req_rx_rings) != - bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) && - (netif_is_rxfh_configured(dev) || bp->num_rss_ctx)) { - netdev_warn(dev, "RSS table size change required, RSS table entries must be default (with no additional RSS contexts present) to proceed\n"); - return -EINVAL; - } - rc = bnxt_check_rings(bp, req_tx_rings, req_rx_rings, sh, tcs, tx_xdp); if (rc) { netdev_warn(dev, "Unable to allocate the requested rings\n"); return rc; } + /* RSS table size only changes on P5 chips with older firmware; + * newer firmware always uses the largest table size. + */ + if (bnxt_get_nr_rss_ctxs(bp, req_rx_rings) != + bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings)) { + new_tbl_size = bnxt_get_nr_rss_ctxs(bp, req_rx_rings) * + BNXT_RSS_TABLE_ENTRIES_P5; + old_tbl_size = bnxt_get_rxfh_indir_size(dev); + + if (!ethtool_rxfh_indir_can_resize(dev, bp->rss_indir_tbl, + old_tbl_size, + new_tbl_size)) { + netdev_warn(dev, "RSS table resize not possible\n"); + return -EINVAL; + } + + rc = ethtool_rxfh_ctxs_can_resize(dev, new_tbl_size); + if (rc) + return rc; + } + if (netif_running(dev)) { if (BNXT_PF(bp)) { /* TODO CHIMP_FW: Send message to all VF's @@ -999,6 +1014,12 @@ static int bnxt_set_channels(struct net_device *dev, bnxt_close_nic(bp, true, false); } + if (new_tbl_size) { + ethtool_rxfh_indir_resize(dev, bp->rss_indir_tbl, + old_tbl_size, new_tbl_size); + ethtool_rxfh_ctxs_resize(dev, new_tbl_size); + } + if (sh) { bp->flags |= BNXT_FLAG_SHARED_RINGS; bp->rx_nr_rings = channel->combined_count; From 10329ce49285e8548da25bdb1cdba3badccfb00c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 20 Mar 2026 09:58:24 +0100 Subject: [PATCH 0757/1561] selftests: rss_drv: Add RSS indirection table resize tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add resize tests to rss_drv.py. Devices without dynamic table sizing are skipped via _require_dynamic_indir_size(). resize_periodic: set a periodic 4-entry table, shrink channels to fold, grow back to unfold. Check the exact pattern is preserved. Has main and non-default context variants. resize_below_user_size_reject: send a periodic table with user_size between the big and small device table sizes. Verify that shrinking below user_size is rejected even though the table is periodic. Has main and non-default context variants. resize_nonperiodic_reject: set a non-periodic table (equal N), verify that channel reduction is rejected. resize_nonperiodic_no_corruption: verify a failed resize leaves both the indirection table contents and the channel count unchanged. Signed-off-by: Björn Töpel Link: https://patch.msgid.link/20260320085826.1957255-5-bjorn@kernel.org Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/hw/rss_drv.py | 233 +++++++++++++++++- 1 file changed, 229 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/rss_drv.py b/tools/testing/selftests/drivers/net/hw/rss_drv.py index 2d1a33189076..bd59dace6e15 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_drv.py +++ b/tools/testing/selftests/drivers/net/hw/rss_drv.py @@ -5,9 +5,9 @@ Driver-related behavior tests for RSS. """ -from lib.py import ksft_run, ksft_exit, ksft_ge -from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx -from lib.py import defer, ethtool +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge +from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx, ksft_raises +from lib.py import defer, ethtool, CmdExitFailure from lib.py import EthtoolFamily, NlError from lib.py import NetDrvEnv @@ -45,6 +45,18 @@ def _maybe_create_context(cfg, create_context): return ctx_id +def _require_dynamic_indir_size(cfg, ch_max): + """Skip if the device does not dynamically size its indirection table.""" + ethtool(f"-X {cfg.ifname} default") + ethtool(f"-L {cfg.ifname} combined 2") + small = len(_get_rss(cfg)['rss-indirection-table']) + ethtool(f"-L {cfg.ifname} combined {ch_max}") + large = len(_get_rss(cfg)['rss-indirection-table']) + + if small == large: + raise KsftSkipEx("Device does not dynamically size indirection table") + + @ksft_variants([ KsftNamedVariant("main", False), KsftNamedVariant("ctx", True), @@ -76,11 +88,224 @@ def indir_size_4x(cfg, create_context): _test_rss_indir_size(cfg, test_max, context=ctx_id) +@ksft_variants([ + KsftNamedVariant("main", False), + KsftNamedVariant("ctx", True), +]) +def resize_periodic(cfg, create_context): + """Test that a periodic indirection table survives channel changes. + + Set a non-default periodic table ([3, 2, 1, 0] x N) via netlink, + reduce channels to trigger a fold, then increase to trigger an + unfold. Using a reversed pattern (instead of [0, 1, 2, 3]) ensures + the test can distinguish a correct fold from a driver that silently + resets the table to defaults. Verify the exact pattern is preserved + and the size tracks the channel count. + """ + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + ch_max = channels.get('combined-max', 0) + qcnt = channels['combined-count'] + + if ch_max < 4: + raise KsftSkipEx(f"Not enough queues for the test: max={ch_max}") + + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + + _require_dynamic_indir_size(cfg, ch_max) + + ctx_id = _maybe_create_context(cfg, create_context) + + # Set a non-default periodic pattern via netlink. + # Send only 4 entries (user_size=4) so the kernel replicates it + # to fill the device table. This allows folding down to 4 entries. + rss = _get_rss(cfg, context=ctx_id) + orig_size = len(rss['rss-indirection-table']) + pattern = [3, 2, 1, 0] + req = {'header': {'dev-index': cfg.ifindex}, 'indir': pattern} + if ctx_id: + req['context'] = ctx_id + else: + defer(ethtool, f"-X {cfg.ifname} default") + cfg.ethnl.rss_set(req) + + # Shrink — should fold + ethtool(f"-L {cfg.ifname} combined 4") + rss = _get_rss(cfg, context=ctx_id) + indir = rss['rss-indirection-table'] + + ksft_ge(orig_size, len(indir), "Table did not shrink") + ksft_eq(indir, [3, 2, 1, 0] * (len(indir) // 4), + "Folded table has wrong pattern") + + # Grow back — should unfold + ethtool(f"-L {cfg.ifname} combined {ch_max}") + rss = _get_rss(cfg, context=ctx_id) + indir = rss['rss-indirection-table'] + + ksft_eq(len(indir), orig_size, "Table size not restored") + ksft_eq(indir, [3, 2, 1, 0] * (len(indir) // 4), + "Unfolded table has wrong pattern") + + +@ksft_variants([ + KsftNamedVariant("main", False), + KsftNamedVariant("ctx", True), +]) +def resize_below_user_size_reject(cfg, create_context): + """Test that shrinking below user_size is rejected. + + Send a table via netlink whose size (user_size) sits between + the small and large device table sizes. The table is periodic, + so folding would normally succeed, but the user_size floor must + prevent it. + """ + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + ch_max = channels.get('combined-max', 0) + qcnt = channels['combined-count'] + + if ch_max < 4: + raise KsftSkipEx(f"Not enough queues for the test: max={ch_max}") + + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + + _require_dynamic_indir_size(cfg, ch_max) + + ctx_id = _maybe_create_context(cfg, create_context) + + # Measure the table size at max channels + rss = _get_rss(cfg, context=ctx_id) + big_size = len(rss['rss-indirection-table']) + + # Measure the table size at reduced channels + ethtool(f"-L {cfg.ifname} combined 4") + rss = _get_rss(cfg, context=ctx_id) + small_size = len(rss['rss-indirection-table']) + ethtool(f"-L {cfg.ifname} combined {ch_max}") + + if small_size >= big_size: + raise KsftSkipEx("Table did not shrink at reduced channels") + + # Find a user_size + user_size = None + for div in [2, 4]: + candidate = big_size // div + if candidate > small_size and big_size % candidate == 0: + user_size = candidate + break + if user_size is None: + raise KsftSkipEx("No suitable user_size between small and big table") + + # Send a periodic sub-table of exactly user_size entries. + # Pattern safe for 4 channels. + pattern = [0, 1, 2, 3] * (user_size // 4) + if len(pattern) != user_size: + raise KsftSkipEx(f"user_size ({user_size}) not divisible by 4") + req = {'header': {'dev-index': cfg.ifindex}, 'indir': pattern} + if ctx_id: + req['context'] = ctx_id + else: + defer(ethtool, f"-X {cfg.ifname} default") + cfg.ethnl.rss_set(req) + + # Shrink channels — table would go to small_size < user_size. + # The table is periodic so folding would work, but user_size + # floor must reject it. + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 4") + + +@ksft_variants([ + KsftNamedVariant("main", False), + KsftNamedVariant("ctx", True), +]) +def resize_nonperiodic_reject(cfg, create_context): + """Test that a non-periodic table blocks channel reduction. + + Set equal weight across all queues so the table is not periodic + at any smaller size, then verify channel reduction is rejected. + An additional context with a periodic table is created to verify + that validation catches the non-periodic one even when others + are fine. + """ + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + ch_max = channels.get('combined-max', 0) + qcnt = channels['combined-count'] + + if ch_max < 4: + raise KsftSkipEx(f"Not enough queues for the test: max={ch_max}") + + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + + _require_dynamic_indir_size(cfg, ch_max) + + ctx_id = _maybe_create_context(cfg, create_context) + ctx_ref = f"context {ctx_id}" if ctx_id else "" + + # Create an extra context with a periodic (foldable) table so that + # the validation must iterate all contexts to find the bad one. + extra_ctx = _maybe_create_context(cfg, True) + ethtool(f"-X {cfg.ifname} context {extra_ctx} equal 2") + + ethtool(f"-X {cfg.ifname} {ctx_ref} equal {ch_max}") + if not create_context: + defer(ethtool, f"-X {cfg.ifname} default") + + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 2") + + +@ksft_variants([ + KsftNamedVariant("main", False), + KsftNamedVariant("ctx", True), +]) +def resize_nonperiodic_no_corruption(cfg, create_context): + """Test that a failed resize does not corrupt table or channel count. + + Set a non-periodic table, attempt a channel reduction (which must + fail), then verify both the indirection table contents and the + channel count are unchanged. + """ + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + ch_max = channels.get('combined-max', 0) + qcnt = channels['combined-count'] + + if ch_max < 4: + raise KsftSkipEx(f"Not enough queues for the test: max={ch_max}") + + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + + _require_dynamic_indir_size(cfg, ch_max) + + ctx_id = _maybe_create_context(cfg, create_context) + ctx_ref = f"context {ctx_id}" if ctx_id else "" + + ethtool(f"-X {cfg.ifname} {ctx_ref} equal {ch_max}") + if not create_context: + defer(ethtool, f"-X {cfg.ifname} default") + + rss_before = _get_rss(cfg, context=ctx_id) + + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 2") + + rss_after = _get_rss(cfg, context=ctx_id) + ksft_eq(rss_after['rss-indirection-table'], + rss_before['rss-indirection-table'], + "Indirection table corrupted after failed resize") + + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + ksft_eq(channels['combined-count'], ch_max, + "Channel count changed after failed resize") + + def main() -> None: """ Ksft boiler plate main """ with NetDrvEnv(__file__) as cfg: cfg.ethnl = EthtoolFamily() - ksft_run([indir_size_4x], args=(cfg, )) + ksft_run([indir_size_4x, resize_periodic, + resize_below_user_size_reject, + resize_nonperiodic_reject, + resize_nonperiodic_no_corruption], args=(cfg, )) ksft_exit() From db3bd9e55c3c7b159dbd90772ee39441a3133d16 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Sat, 21 Mar 2026 04:26:11 +0000 Subject: [PATCH 0758/1561] selftests: forwarding: local_termination: fix PTP UDP cksums All six PTP-over-IP test frames (3x IPv4 + 3x IPv6) contain incorrect UDP checksums. The stored values are the ones-complement sums of just the pseudo-headers, not the complete UDP checksums over pseudo-header + UDP header + payload. This is characteristic of frames captured on the sender before TX checksum offload completion. For example, the IPv4 Sync and Follow-Up frames both store checksum 0xa3c8 despite having different UDP payloads and port numbers - 0xa3c8 is their shared pseudo-header sum (same src/dst IP, same protocol and UDP length). While most L2 switches forward frames without verifying transport checksums, hardware that performs deep packet inspection or has PTP awareness may validate UDP checksums and drop frames that fail verification. This causes the 1588v2 over IPv4/IPv6 tests to fail on such hardware even though L2 PTP (which has no UDP checksum) passes fine. Replace all six pseudo-header partial sums with the correctly computed full UDP checksums: IPv4 Sync: 0xa3c8 -> 0x9f41 IPv4 Follow-Up: 0xa3c8 -> 0xeb8a IPv4 Peer Delay Req: 0xa2bc -> 0x9ab9 IPv6 Sync: 0x2e92 -> 0x1476 IPv6 Follow-Up: 0x2e92 -> 0xf047 IPv6 Peer Delay Req: 0xb454 -> 0x891f Signed-off-by: Daniel Golle Reviewed-by: Alexander Sverdlin Link: https://patch.msgid.link/651c3decb80023e4395ec149fd81110afa3869a1.1774067006.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- .../selftests/net/forwarding/local_termination.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index 9bc9d25e7136..15b1a1255a41 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -57,21 +57,21 @@ PTP_1588_L2_PDELAY_REQ=" \ PTP_1588_IPV4_SYNC=" \ 01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \ 00 48 0a 9a 40 00 01 11 cb 88 c0 00 02 01 e0 00 \ -01 81 01 3f 01 3f 00 34 a3 c8 00 02 00 2c 00 00 \ +01 81 01 3f 01 3f 00 34 9f 41 00 02 00 2c 00 00 \ 02 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ 63 ff fe cf 17 0e 00 01 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00" PTP_1588_IPV4_FOLLOW_UP=" 01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \ 00 48 0a 9b 40 00 01 11 cb 87 c0 00 02 01 e0 00 \ -01 81 01 40 01 40 00 34 a3 c8 08 02 00 2c 00 00 \ +01 81 01 40 01 40 00 34 eb 8a 08 02 00 2c 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ 63 ff fe cf 17 0e 00 01 00 00 02 00 00 00 66 83 \ c6 0f 1d 9a 61 87" PTP_1588_IPV4_PDELAY_REQ=" \ 01:00:5e:00:00:6b 00:00:de:ad:be:ef 08:00 45 00 \ 00 52 35 a9 40 00 01 11 a1 85 c0 00 02 01 e0 00 \ -00 6b 01 3f 01 3f 00 3e a2 bc 02 02 00 36 00 00 \ +00 6b 01 3f 01 3f 00 3e 9a b9 02 02 00 36 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ 63 ff fe cf 17 0e 00 01 00 01 05 7f 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00" @@ -79,7 +79,7 @@ PTP_1588_IPV6_SYNC=" \ 33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 06 \ 7c 2f 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \ 00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \ -00 00 00 00 01 81 01 3f 01 3f 00 36 2e 92 00 02 \ +00 00 00 00 01 81 01 3f 01 3f 00 36 14 76 00 02 \ 00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00" @@ -87,7 +87,7 @@ PTP_1588_IPV6_FOLLOW_UP=" \ 33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 0a \ 00 bc 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \ 00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \ -00 00 00 00 01 81 01 40 01 40 00 36 2e 92 08 02 \ +00 00 00 00 01 81 01 40 01 40 00 36 f0 47 08 02 \ 00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \ 00 00 66 83 c6 2a 32 09 bd 74 00 00" @@ -95,7 +95,7 @@ PTP_1588_IPV6_PDELAY_REQ=" \ 33:33:00:00:00:6b 00:00:de:ad:be:ef 86:dd 60 0c \ 5c fd 00 40 11 01 fe 80 00 00 00 00 00 00 3c 37 \ 63 ff fe cf 17 0e ff 02 00 00 00 00 00 00 00 00 \ -00 00 00 00 00 6b 01 3f 01 3f 00 40 b4 54 02 02 \ +00 00 00 00 00 6b 01 3f 01 3f 00 40 89 1f 02 02 \ 00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 3e 37 63 ff fe cf 17 0e 00 01 00 01 05 7f \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ From e81cf512c1bdee07fce4a39854dde78cc2cd7b43 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Fri, 20 Mar 2026 10:22:39 +0800 Subject: [PATCH 0759/1561] selftests: bonding: add test for stacked bond header_parse recursion Add a selftest to reproduce the infinite recursion in bond_header_parse() when bonds are stacked (bond1 -> bond0 -> gre). When a packet is received via AF_PACKET SOCK_DGRAM on the topmost bond, dev_parse_header() calls bond_header_parse() which used skb->dev (always the topmost bond) to get the bonding struct. This caused it to recurse back into itself indefinitely, leading to stack overflow. Before commit b7405dcf7385 ("bonding: prevent potential infinite loop in bond_header_parse()"), the test triggers: ./bond_stacked_header_parse.sh [ 71.999481] BUG: MAX_LOCK_DEPTH too low! [ 72.000170] turning off the locking correctness validator. [ 72.001029] Please attach the output of /proc/lock_stat to the bug report [ 72.002079] depth: 48 max: 48! ... After the fix, everything works fine: ./bond_stacked_header_parse.sh TEST: Stacked bond header_parse does not recurse [ OK ] Signed-off-by: Jiayuan Chen Link: https://patch.msgid.link/20260320022245.392384-1-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/bonding/Makefile | 1 + .../net/bonding/bond_stacked_header_parse.sh | 72 +++++++++++++++++++ .../selftests/drivers/net/bonding/config | 1 + 3 files changed, 74 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/bonding/bond_stacked_header_parse.sh diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 6c5c60adb5e8..9af5f84edd37 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -11,6 +11,7 @@ TEST_PROGS := \ bond_macvlan_ipvlan.sh \ bond_options.sh \ bond_passive_lacp.sh \ + bond_stacked_header_parse.sh \ dev_addr_lists.sh \ mode-1-recovery-updelay.sh \ mode-2-recovery-updelay.sh \ diff --git a/tools/testing/selftests/drivers/net/bonding/bond_stacked_header_parse.sh b/tools/testing/selftests/drivers/net/bonding/bond_stacked_header_parse.sh new file mode 100755 index 000000000000..36bcdef711b0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_stacked_header_parse.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test that bond_header_parse() does not infinitely recurse with stacked bonds. +# +# When a non-Ethernet device (e.g. GRE) is enslaved to a bond that is itself +# enslaved to another bond (bond1 -> bond0 -> gre), receiving a packet via +# AF_PACKET SOCK_DGRAM triggers dev_parse_header() -> bond_header_parse(). +# Since parse() used skb->dev (always the topmost bond) instead of a passed-in +# dev pointer, it would recurse back into itself indefinitely. + +# shellcheck disable=SC2034 +ALL_TESTS=" + bond_test_stacked_header_parse +" +REQUIRE_MZ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh + +# shellcheck disable=SC2329 +bond_test_stacked_header_parse() +{ + local devdummy="test-dummy0" + local devgre="test-gre0" + local devbond0="test-bond0" + local devbond1="test-bond1" + + # shellcheck disable=SC2034 + RET=0 + + # Setup: dummy -> gre -> bond0 -> bond1 + ip link add name "$devdummy" type dummy + ip addr add 10.0.0.1/24 dev "$devdummy" + ip link set "$devdummy" up + + ip link add name "$devgre" type gre local 10.0.0.1 + + ip link add name "$devbond0" type bond mode active-backup + ip link add name "$devbond1" type bond mode active-backup + + ip link set "$devgre" master "$devbond0" + ip link set "$devbond0" master "$devbond1" + + ip link set "$devgre" up + ip link set "$devbond0" up + ip link set "$devbond1" up + + # tcpdump on a non-Ethernet bond uses AF_PACKET SOCK_DGRAM (cooked + # capture), which triggers dev_parse_header() -> bond_header_parse() + # on receive. With the bug, this recurses infinitely. + timeout 5 tcpdump -c 1 -i "$devbond1" >/dev/null 2>&1 & + local tcpdump_pid=$! + sleep 1 + + # Send a GRE packet to 10.0.0.1 so it arrives via gre -> bond0 -> bond1 + python3 -c "from scapy.all import *; send(IP(src='10.0.0.2', dst='10.0.0.1')/GRE()/IP()/UDP(), verbose=0)" + check_err $? "failed to send GRE packet (scapy installed?)" + + wait "$tcpdump_pid" 2>/dev/null + + ip link del "$devbond1" 2>/dev/null + ip link del "$devbond0" 2>/dev/null + ip link del "$devgre" 2>/dev/null + ip link del "$devdummy" 2>/dev/null + + log_test "Stacked bond header_parse does not recurse" +} + +tests_run + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config index 991494376223..b62c70715293 100644 --- a/tools/testing/selftests/drivers/net/bonding/config +++ b/tools/testing/selftests/drivers/net/bonding/config @@ -14,6 +14,7 @@ CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_NETCONSOLE_EXTENDED_LOG=y CONFIG_NETDEVSIM=m +CONFIG_NET_IPGRE=y CONFIG_NET_SCH_INGRESS=y CONFIG_NLMON=y CONFIG_VETH=y From 0f5d68004780effdacf14b7346f235e212cf8ba6 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Thu, 19 Mar 2026 21:18:33 -0700 Subject: [PATCH 0760/1561] selftests: rds: add tools/testing/selftests/net/rds/config The ksft CI runtime needs an rds specific config file to build a minimal kernel with the right options enabled. This patch adds an rds selftest config containing the required CONFIG_RDS* and CONFIG_NET_* options. Signed-off-by: Allison Henderson Link: https://patch.msgid.link/20260320041834.2761069-2-achender@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/rds/config | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 tools/testing/selftests/net/rds/config diff --git a/tools/testing/selftests/net/rds/config b/tools/testing/selftests/net/rds/config new file mode 100644 index 000000000000..97db7ecb892a --- /dev/null +++ b/tools/testing/selftests/net/rds/config @@ -0,0 +1,5 @@ +CONFIG_NET_NS=y +CONFIG_NET_SCH_NETEM=y +CONFIG_RDS=y +CONFIG_RDS_TCP=y +CONFIG_VETH=y From 381a503f0eb3a69e9a250330e2f09a5ae1688d2c Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Thu, 19 Mar 2026 21:18:34 -0700 Subject: [PATCH 0761/1561] selftests: rds: Add -c config option to rds/config.sh This patch adds a new -c flag to config.sh that enables callers to specify the file path of the config they would like to update. If no config is specified, the default will be the .config of the current directory. Signed-off-by: Allison Henderson Link: https://patch.msgid.link/20260320041834.2761069-3-achender@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/rds/README.txt | 5 ++- tools/testing/selftests/net/rds/config.sh | 37 +++++++++++++--------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/net/rds/README.txt b/tools/testing/selftests/net/rds/README.txt index cbde2951ab13..c6fe003d503b 100644 --- a/tools/testing/selftests/net/rds/README.txt +++ b/tools/testing/selftests/net/rds/README.txt @@ -31,8 +31,11 @@ EXAMPLE: # Alternatly create a gcov disabled .config tools/testing/selftests/net/rds/config.sh + # Config paths may also be specified with the -c flag + tools/testing/selftests/net/rds/config.sh -c .config.local + # build the kernel - vng --build --config tools/testing/selftests/net/config + vng --build --config .config # launch the tests in a VM vng -v --rwdir ./ --run . --user root --cpus 4 -- \ diff --git a/tools/testing/selftests/net/rds/config.sh b/tools/testing/selftests/net/rds/config.sh index 791c8dbe1095..29a79314dd60 100755 --- a/tools/testing/selftests/net/rds/config.sh +++ b/tools/testing/selftests/net/rds/config.sh @@ -6,15 +6,20 @@ set -u set -x unset KBUILD_OUTPUT +CONF_FILE="" +FLAGS=() GENERATE_GCOV_REPORT=0 -while getopts "g" opt; do +while getopts "gc:" opt; do case ${opt} in g) GENERATE_GCOV_REPORT=1 ;; + c) + CONF_FILE=$OPTARG + ;; :) - echo "USAGE: config.sh [-g]" + echo "USAGE: config.sh [-g] [-c config]" exit 1 ;; ?) @@ -24,30 +29,32 @@ while getopts "g" opt; do esac done -CONF_FILE="tools/testing/selftests/net/config" +if [[ "$CONF_FILE" != "" ]]; then + FLAGS=(--file "$CONF_FILE") +fi # no modules -scripts/config --file "$CONF_FILE" --disable CONFIG_MODULES +scripts/config "${FLAGS[@]}" --disable CONFIG_MODULES # enable RDS -scripts/config --file "$CONF_FILE" --enable CONFIG_RDS -scripts/config --file "$CONF_FILE" --enable CONFIG_RDS_TCP +scripts/config "${FLAGS[@]}" --enable CONFIG_RDS +scripts/config "${FLAGS[@]}" --enable CONFIG_RDS_TCP if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then # instrument RDS and only RDS - scripts/config --file "$CONF_FILE" --enable CONFIG_GCOV_KERNEL - scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL - scripts/config --file "$CONF_FILE" --enable GCOV_PROFILE_RDS + scripts/config "${FLAGS[@]}" --enable CONFIG_GCOV_KERNEL + scripts/config "${FLAGS[@]}" --disable GCOV_PROFILE_ALL + scripts/config "${FLAGS[@]}" --enable GCOV_PROFILE_RDS else - scripts/config --file "$CONF_FILE" --disable CONFIG_GCOV_KERNEL - scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL - scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_RDS + scripts/config "${FLAGS[@]}" --disable CONFIG_GCOV_KERNEL + scripts/config "${FLAGS[@]}" --disable GCOV_PROFILE_ALL + scripts/config "${FLAGS[@]}" --disable GCOV_PROFILE_RDS fi # need network namespaces to run tests with veth network interfaces -scripts/config --file "$CONF_FILE" --enable CONFIG_NET_NS -scripts/config --file "$CONF_FILE" --enable CONFIG_VETH +scripts/config "${FLAGS[@]}" --enable CONFIG_NET_NS +scripts/config "${FLAGS[@]}" --enable CONFIG_VETH # simulate packet loss -scripts/config --file "$CONF_FILE" --enable CONFIG_NET_SCH_NETEM +scripts/config "${FLAGS[@]}" --enable CONFIG_NET_SCH_NETEM From a897e194c475d5e0d141e27df600dd43962308f1 Mon Sep 17 00:00:00 2001 From: Aleksei Oladko Date: Sat, 21 Mar 2026 21:59:05 +0000 Subject: [PATCH 0762/1561] selftests: net: run reuseport in an isolated netns The reuseport_* tests (bpf, bpf_cpu, bpf_numa, dualstack) currently use a fixed port range. This can cause intermittent test failures when the ports are already in use by other services: failed to bind receive socket: Address already in use To avoid conflicts, run these tests in separate network namespaces using unshare. Each test now has its own isolated network stack, preventing port collisions with the host services. Signed-off-by: Aleksei Oladko Link: https://patch.msgid.link/20260321215908.175465-2-aleksey.oladko@virtuozzo.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/reuseport_bpf.c | 11 +++++++++++ tools/testing/selftests/net/reuseport_bpf_cpu.c | 10 ++++++++++ tools/testing/selftests/net/reuseport_bpf_numa.c | 10 ++++++++++ tools/testing/selftests/net/reuseport_dualstack.c | 11 +++++++++++ 4 files changed, 42 insertions(+) diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c index b6634d6da3d6..12e48b97b862 100644 --- a/tools/testing/selftests/net/reuseport_bpf.c +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "kselftest.h" @@ -455,8 +456,18 @@ static __attribute__((destructor)) void main_dtor(void) setrlimit(RLIMIT_MEMLOCK, &rlim_old); } +static void setup_netns(void) +{ + if (unshare(CLONE_NEWNET)) + error(1, errno, "failed to unshare netns"); + if (system("ip link set lo up")) + error(1, 0, "failed to bring up lo interface in netns"); +} + int main(void) { + setup_netns(); + fprintf(stderr, "---- IPv4 UDP ----\n"); /* NOTE: UDP socket lookups traverse a different code path when there * are > 10 sockets in a group. Run the bpf test through both paths. diff --git a/tools/testing/selftests/net/reuseport_bpf_cpu.c b/tools/testing/selftests/net/reuseport_bpf_cpu.c index 2d646174729f..ddfe92f6597a 100644 --- a/tools/testing/selftests/net/reuseport_bpf_cpu.c +++ b/tools/testing/selftests/net/reuseport_bpf_cpu.c @@ -228,10 +228,20 @@ static void test(int *rcv_fd, int len, int family, int proto) close(rcv_fd[cpu]); } +static void setup_netns(void) +{ + if (unshare(CLONE_NEWNET)) + error(1, errno, "failed to unshare netns"); + if (system("ip link set lo up")) + error(1, 0, "failed to bring up lo interface in netns"); +} + int main(void) { int *rcv_fd, cpus; + setup_netns(); + cpus = sysconf(_SC_NPROCESSORS_ONLN); if (cpus <= 0) error(1, errno, "failed counting cpus"); diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c index 2ffd957ffb15..8ec52fc5ef41 100644 --- a/tools/testing/selftests/net/reuseport_bpf_numa.c +++ b/tools/testing/selftests/net/reuseport_bpf_numa.c @@ -230,10 +230,20 @@ static void test(int *rcv_fd, int len, int family, int proto) close(rcv_fd[node]); } +static void setup_netns(void) +{ + if (unshare(CLONE_NEWNET)) + error(1, errno, "failed to unshare netns"); + if (system("ip link set lo up")) + error(1, 0, "failed to bring up lo interface in netns"); +} + int main(void) { int *rcv_fd, nodes; + setup_netns(); + if (numa_available() < 0) ksft_exit_skip("no numa api support\n"); diff --git a/tools/testing/selftests/net/reuseport_dualstack.c b/tools/testing/selftests/net/reuseport_dualstack.c index fb7a59ed759e..0eaf739d0c85 100644 --- a/tools/testing/selftests/net/reuseport_dualstack.c +++ b/tools/testing/selftests/net/reuseport_dualstack.c @@ -25,6 +25,7 @@ #include #include #include +#include static const int PORT = 8888; @@ -156,10 +157,20 @@ static void test(int *rcv_fds, int count, int proto) close(epfd); } +static void setup_netns(void) +{ + if (unshare(CLONE_NEWNET)) + error(1, errno, "failed to unshare netns"); + if (system("ip link set lo up")) + error(1, 0, "failed to bring up lo interface in netns"); +} + int main(void) { int rcv_fds[32], i; + setup_netns(); + fprintf(stderr, "---- UDP IPv4 created before IPv6 ----\n"); build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 5); build_rcv_fd(AF_INET6, SOCK_DGRAM, &(rcv_fds[5]), 5); From 9d463f7863f2cc3722da9c670c88158409cb7c42 Mon Sep 17 00:00:00 2001 From: Aleksei Oladko Date: Sat, 21 Mar 2026 21:59:08 +0000 Subject: [PATCH 0763/1561] selftests: net: io_uring_zerocopy: enable io_uring for the test The io_uring_zerocopy.sh kselftest assumes that io_uring support is enabled on the host system. When io_uring is disabled via the kernel.io_uring_disabled sysctl, the test fails. Explicitly enable io_uring for the test by setting kernel.io_uring_disabled=0. Save the original value of kernel.io_uring_disabled before changing it and restore it in cleanup handler to ensure the system state is restored regardless of test outcome. Signed-off-by: Aleksei Oladko Signed-off-by: Konstantin Khorenko Link: https://patch.msgid.link/20260321215908.175465-5-aleksey.oladko@virtuozzo.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/io_uring_zerocopy_tx.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh index 123439545013..8c3647de9b4c 100755 --- a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh +++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh @@ -77,9 +77,13 @@ esac # Start of state changes: install cleanup handler +old_io_uring_disabled=0 cleanup() { ip netns del "${NS2}" ip netns del "${NS1}" + if [ "$old_io_uring_disabled" -ne 0 ]; then + sysctl -w -q kernel.io_uring_disabled="$old_io_uring_disabled" 2>/dev/null || true + fi } trap cleanup EXIT @@ -122,5 +126,10 @@ do_test() { wait } +old_io_uring_disabled=$(sysctl -n kernel.io_uring_disabled 2>/dev/null || echo "0") +if [ "$old_io_uring_disabled" -ne 0 ]; then + sysctl -w -q kernel.io_uring_disabled=0 +fi + do_test "${EXTRA_ARGS}" echo ok From 31b2d4e00260ae3fca50779ac416dd9acaacbfb9 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Thu, 19 Mar 2026 22:02:49 +0530 Subject: [PATCH 0764/1561] amd-xgbe: add adaptive link status polling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement adaptive link status polling to enable fast link-down detection while conserving CPU resources during link-down periods. Currently, the driver polls link status at a fixed 1-second interval regardless of link state. This creates a trade-off: - Slow polling (1s): Misses rapid link state changes, causing delays - Fast polling: Wastes CPU when link is stable or down This enhancement introduces state-aware polling: When carrier is UP: Poll every 100ms to enable rapid link-down detection. This provides ~100-200ms response time to link failures, minimizing packet loss and enabling fast failover in link aggregation configurations. When carrier is DOWN: Poll every 1s to conserve CPU resources. Link-up detection is less time-critical since no traffic is flowing. Performance impact: - Link-down detection: 1000ms → 100-200ms (10x improvement) - CPU overhead when link up: 0.1% → 1% (acceptable for active links) - CPU overhead when link down: unchanged at 0.1% This is particularly valuable for: - Link aggregation deployments requiring sub-second failover - Environments with flaky links or cable issues - Applications sensitive to connection recovery time Signed-off-by: Raju Rangoju Link: https://patch.msgid.link/20260319163251.1808611-2-Raju.Rangoju@amd.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 2f39f38fecf9..6886d3b33ffe 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -607,11 +607,33 @@ static void xgbe_service_timer(struct timer_list *t) struct xgbe_prv_data *pdata = timer_container_of(pdata, t, service_timer); struct xgbe_channel *channel; + unsigned int poll_interval; unsigned int i; queue_work(pdata->dev_workqueue, &pdata->service_work); - mod_timer(&pdata->service_timer, jiffies + HZ); + /* Adaptive link status polling for fast failure detection: + * + * - When carrier is UP: poll every 100ms for rapid link-down detection + * Enables sub-second response to link failures, minimizing traffic + * loss. + * + * - When carrier is DOWN: poll every 1s to conserve CPU resources + * Link-up events are less time-critical. + * + * The 100ms active polling interval balances responsiveness with + * efficiency: + * - Provides ~100-200ms link-down detection (10x faster than 1s + * polling) + * - Minimal CPU overhead (1% vs 0.1% with 1s polling) + * - Enables fast failover in link aggregation deployments + */ + if (netif_running(pdata->netdev) && netif_carrier_ok(pdata->netdev)) + poll_interval = msecs_to_jiffies(100); /* 100ms when up */ + else + poll_interval = HZ; /* 1 second when down */ + + mod_timer(&pdata->service_timer, jiffies + poll_interval); if (!pdata->tx_usecs) return; From 0898849ad9715d163555b8f8bfd13b7691a2b3b8 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Thu, 19 Mar 2026 22:02:50 +0530 Subject: [PATCH 0765/1561] amd-xgbe: optimize TX shutdown on link-down Optimize the TX shutdown sequence when link goes down by skipping futile hardware wait operations and immediately stopping TX queues. Current behavior creates delays and resource issues during link-down: 1. xgbe_txq_prepare_tx_stop() waits up to XGBE_DMA_STOP_TIMEOUT for TX queues to drain, but when link is down, hardware will never complete the pending descriptors. This causes unnecessary delays during interface shutdown. 2. TX queues remain active after link-down, allowing the network stack to continue queuing packets that cannot be transmitted. This leads to resource buildup and complicates recovery. This patch adds two optimizations: Optimization 1: Skip TX queue drain when link is down In xgbe_txq_prepare_tx_stop(), detect link-down state and return immediately instead of waiting for hardware. Abandoned descriptors will be cleaned up by the force-cleanup mechanism (next patch). Optimization 2: Immediate TX queue stop on link-down In xgbe_phy_adjust_link(), call netif_tx_stop_all_queues() as soon as link-down is detected. Also wake TX queues on link-up to resume transmission. Benefits: - Faster interface shutdown (no pointless timeout waits) - Prevents packet queue buildup in network stack - Cleaner state management during link transitions - Enables orderly descriptor cleanup by NAPI poll Note: We do not call netdev_tx_reset_queue() on link-down because NAPI poll may still be running, which would trigger BQL assertions. BQL state is cleaned up naturally during descriptor reclamation. Signed-off-by: Raju Rangoju Link: https://patch.msgid.link/20260319163251.1808611-3-Raju.Rangoju@amd.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 9 +++++++++ drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index f1357619097e..b7bf74c6bb47 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -3186,7 +3186,16 @@ static void xgbe_txq_prepare_tx_stop(struct xgbe_prv_data *pdata, /* The Tx engine cannot be stopped if it is actively processing * packets. Wait for the Tx queue to empty the Tx fifo. Don't * wait forever though... + * + * Optimization: Skip the wait when link is down. Hardware won't + * complete TX processing, so waiting serves no purpose and only + * delays interface shutdown. Descriptors will be reclaimed via + * the force-cleanup path in tx_poll. */ + + if (!pdata->phy.link) + return; + tx_timeout = jiffies + (XGBE_DMA_STOP_TIMEOUT * HZ); while (time_before(jiffies, tx_timeout)) { tx_status = XGMAC_MTL_IOREAD(pdata, queue, MTL_Q_TQDR); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 7675bb98f029..fa0df6181207 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -1047,11 +1047,29 @@ static void xgbe_phy_adjust_link(struct xgbe_prv_data *pdata) if (pdata->phy_link != pdata->phy.link) { new_state = 1; pdata->phy_link = pdata->phy.link; + + /* Link is coming up - wake TX queues */ + netif_tx_wake_all_queues(pdata->netdev); } } else if (pdata->phy_link) { new_state = 1; pdata->phy_link = 0; pdata->phy_speed = SPEED_UNKNOWN; + + /* Proactive TX queue management on link-down. + * + * Immediately stop TX queues to enable clean link-down + * handling: + * - Prevents queueing packets that can't be transmitted + * - Allows orderly descriptor cleanup by NAPI poll + * - Enables rapid failover in link aggregation configurations + * + * Note: We do NOT call netdev_tx_reset_queue() here because + * NAPI poll may still be running and would trigger BQL + * assertion. BQL state is cleaned up naturally during + * descriptor reclamation. + */ + netif_tx_stop_all_queues(pdata->netdev); } if (new_state && netif_msg_link(pdata)) From b7fb3677840d26b3fa4c5d0d63b578a2f44077d1 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Thu, 19 Mar 2026 22:02:51 +0530 Subject: [PATCH 0766/1561] amd-xgbe: add TX descriptor cleanup for link-down Add intelligent TX descriptor cleanup mechanism to reclaim abandoned descriptors when the physical link goes down. When the link goes down while TX packets are in-flight, the hardware stops processing descriptors with the OWN bit still set. The current driver waits indefinitely for these descriptors to complete, which never happens. This causes: - TX ring exhaustion (no descriptors available for new packets) - Memory leaks (skbs never freed) - DMA mapping leaks (mappings never unmapped) - Network stack backpressure buildup Add force-cleanup mechanism in xgbe_tx_poll() that detects link-down state and reclaims abandoned descriptors. The helper functions and DMA optimizations support efficient TX shutdown: - xgbe_wait_for_dma_tx_complete(): Wait for DMA completion with link-down optimization - Restructure xgbe_disable_tx() for proper shutdown sequence Implementation: 1. Check link state at the start of tx_poll 2. If link is down, set force_cleanup flag 3. For descriptors that hardware hasn't completed (!tx_complete): - If force_cleanup: treat as completed and reclaim resources - If link up: break and wait for hardware (normal behavior) The cleanup process: - Frees skbs that will never be transmitted - Unmaps DMA mappings - Resets descriptors for reuse - Does NOT count as successful transmission (correct statistics) Benefits: - Prevents TX ring starvation - Eliminates memory and DMA mapping leaks - Enables fast link recovery when link comes back up - Critical for link aggregation failover scenarios Signed-off-by: Raju Rangoju Link: https://patch.msgid.link/20260319163251.1808611-4-Raju.Rangoju@amd.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amd/xgbe/xgbe-common.h | 4 ++ drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 79 +++++++++++++++++---- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 43 +++++++++-- 3 files changed, 109 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index c17900a49595..66807d67e984 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -330,6 +330,10 @@ #define MAC_ISR_SMI_WIDTH 1 #define MAC_ISR_TSIS_INDEX 12 #define MAC_ISR_TSIS_WIDTH 1 +#define MAC_ISR_LS_INDEX 24 +#define MAC_ISR_LS_WIDTH 2 +#define MAC_ISR_LSI_INDEX 0 +#define MAC_ISR_LSI_WIDTH 1 #define MAC_MACA1HR_AE_INDEX 31 #define MAC_MACA1HR_AE_WIDTH 1 #define MAC_MDIOIER_SNGLCOMPIE_INDEX 12 diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index b7bf74c6bb47..2de974213090 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -3276,28 +3276,83 @@ static void xgbe_enable_tx(struct xgbe_prv_data *pdata) XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 1); } +/** + * xgbe_wait_for_dma_tx_complete - Wait for DMA to complete pending TX + * @pdata: driver private data + * + * Wait for the DMA TX channels to complete all pending descriptors. + * This ensures no frames are in-flight before we disable the transmitter. + * If link is down, return immediately as TX will never complete. + * + * Return: 0 on success, -ETIMEDOUT on timeout + */ +static int xgbe_wait_for_dma_tx_complete(struct xgbe_prv_data *pdata) +{ + struct xgbe_channel *channel; + struct xgbe_ring *ring; + unsigned long timeout; + unsigned int i; + bool complete; + + /* If link is down, TX will never complete - skip waiting */ + if (!pdata->phy.link) + return 0; + + timeout = jiffies + (XGBE_DMA_STOP_TIMEOUT * HZ); + + do { + complete = true; + + for (i = 0; i < pdata->channel_count; i++) { + channel = pdata->channel[i]; + ring = channel->tx_ring; + if (!ring) + continue; + + /* Check if DMA has processed all descriptors */ + if (ring->dirty != ring->cur) { + complete = false; + break; + } + } + + if (complete) + return 0; + + usleep_range(100, 200); + } while (time_before(jiffies, timeout)); + + netif_warn(pdata, drv, pdata->netdev, + "timeout waiting for DMA TX to complete\n"); + return -ETIMEDOUT; +} + static void xgbe_disable_tx(struct xgbe_prv_data *pdata) { unsigned int i; - /* Prepare for Tx DMA channel stop */ - for (i = 0; i < pdata->tx_q_count; i++) - xgbe_prepare_tx_stop(pdata, i); + /* Step 1: Wait for DMA to complete pending descriptors */ + xgbe_wait_for_dma_tx_complete(pdata); - /* Disable MAC Tx */ - XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0); - - /* Disable each Tx queue */ - for (i = 0; i < pdata->tx_q_count; i++) - XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TXQEN, 0); - - /* Disable each Tx DMA channel */ + /* Step 2: Disable each Tx DMA channel to stop + * processing new descriptors + */ for (i = 0; i < pdata->channel_count; i++) { if (!pdata->channel[i]->tx_ring) break; - XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 0); } + + /* Step 3: Wait for MTL TX queues to drain */ + for (i = 0; i < pdata->tx_q_count; i++) + xgbe_prepare_tx_stop(pdata, i); + + /* Step 4: Disable MTL TX queues */ + for (i = 0; i < pdata->tx_q_count; i++) + XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TXQEN, 0); + + /* Step 5: Disable MAC TX last */ + XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0); } static void xgbe_prepare_rx_stop(struct xgbe_prv_data *pdata, diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 6886d3b33ffe..2d6d00e3689b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -2169,6 +2169,7 @@ static int xgbe_tx_poll(struct xgbe_channel *channel) struct net_device *netdev = pdata->netdev; struct netdev_queue *txq; int processed = 0; + int force_cleanup; unsigned int tx_packets = 0, tx_bytes = 0; unsigned int cur; @@ -2185,13 +2186,41 @@ static int xgbe_tx_poll(struct xgbe_channel *channel) txq = netdev_get_tx_queue(netdev, channel->queue_index); + /* Smart descriptor cleanup during link-down conditions. + * + * When link is down, hardware stops processing TX descriptors (OWN bit + * remains set). Enable intelligent cleanup to reclaim these abandoned + * descriptors and maintain TX queue health. + * + * This cleanup mechanism enables: + * - Continuous TX queue availability for new packets when link recovers + * - Clean resource management (skbs, DMA mappings, descriptors) + * - Fast failover in link aggregation scenarios + */ + force_cleanup = !pdata->phy.link; + while ((processed < XGBE_TX_DESC_MAX_PROC) && (ring->dirty != cur)) { rdata = XGBE_GET_DESC_DATA(ring, ring->dirty); rdesc = rdata->rdesc; - if (!hw_if->tx_complete(rdesc)) - break; + if (!hw_if->tx_complete(rdesc)) { + if (!force_cleanup) + break; + /* Link-down descriptor cleanup: reclaim abandoned + * resources. + * + * Hardware has stopped processing this descriptor, so + * perform intelligent cleanup to free skbs and reclaim + * descriptors for future use when link recovers. + * + * These are not counted as successful transmissions + * since packets never reached the wire. + */ + netif_dbg(pdata, tx_err, netdev, + "force-freeing stuck TX desc %u (link down)\n", + ring->dirty); + } /* Make sure descriptor fields are read after reading the OWN * bit */ @@ -2200,9 +2229,13 @@ static int xgbe_tx_poll(struct xgbe_channel *channel) if (netif_msg_tx_done(pdata)) xgbe_dump_tx_desc(pdata, ring, ring->dirty, 1, 0); - if (hw_if->is_last_desc(rdesc)) { - tx_packets += rdata->tx.packets; - tx_bytes += rdata->tx.bytes; + /* Only count packets actually transmitted (not force-cleaned) + */ + if (!force_cleanup || hw_if->is_last_desc(rdesc)) { + if (hw_if->is_last_desc(rdesc)) { + tx_packets += rdata->tx.packets; + tx_bytes += rdata->tx.bytes; + } } /* Free the SKB and reset the descriptor for re-use */ From 6e4235adfa8814afc361d26cd630b9af8c4f01a8 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 19 Mar 2026 16:17:10 -0400 Subject: [PATCH 0767/1561] octeontx2-af: simplify rvu_debugfs The driver uses bitmap_print_to_pagebuf() to store human-readable bitmaps representations in a temporary buffers; and then feed seq_printf() with it. Switch to using seq_printf("%*pb") directly and drop intermediate buffer. Signed-off-by: Yury Norov Link: https://patch.msgid.link/20260319201713.941956-2-ynorov@nvidia.com Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- .../marvell/octeontx2/af/rvu_debugfs.c | 28 ++++--------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 413f9fa40b33..fa461489acdd 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -962,30 +962,21 @@ static bool rvu_dbg_is_valid_lf(struct rvu *rvu, int blkaddr, int lf, static void print_npa_qsize(struct seq_file *m, struct rvu_pfvf *pfvf) { - char *buf; - - buf = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!buf) - return; - if (!pfvf->aura_ctx) { seq_puts(m, "Aura context is not initialized\n"); } else { - bitmap_print_to_pagebuf(false, buf, pfvf->aura_bmap, - pfvf->aura_ctx->qsize); seq_printf(m, "Aura count : %d\n", pfvf->aura_ctx->qsize); - seq_printf(m, "Aura context ena/dis bitmap : %s\n", buf); + seq_printf(m, "Aura context ena/dis bitmap : %*pb\n", + pfvf->aura_ctx->qsize, pfvf->aura_bmap); } if (!pfvf->pool_ctx) { seq_puts(m, "Pool context is not initialized\n"); } else { - bitmap_print_to_pagebuf(false, buf, pfvf->pool_bmap, - pfvf->pool_ctx->qsize); seq_printf(m, "Pool count : %d\n", pfvf->pool_ctx->qsize); - seq_printf(m, "Pool context ena/dis bitmap : %s\n", buf); + seq_printf(m, "Pool context ena/dis bitmap : %*pb\n", + pfvf->pool_ctx->qsize, pfvf->pool_bmap); } - kfree(buf); } /* The 'qsize' entry dumps current Aura/Pool context Qsize @@ -2547,17 +2538,8 @@ RVU_DEBUG_SEQ_FOPS(nix_cq_ctx, nix_cq_ctx_display, nix_cq_ctx_write); static void print_nix_qctx_qsize(struct seq_file *filp, int qsize, unsigned long *bmap, char *qtype) { - char *buf; - - buf = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!buf) - return; - - bitmap_print_to_pagebuf(false, buf, bmap, qsize); seq_printf(filp, "%s context count : %d\n", qtype, qsize); - seq_printf(filp, "%s context ena/dis bitmap : %s\n", - qtype, buf); - kfree(buf); + seq_printf(filp, "%s context ena/dis bitmap : %*pb\n", qtype, qsize, bmap); } static void print_nix_qsize(struct seq_file *filp, struct rvu_pfvf *pfvf) From de69301dc2f6d4172c69c3d1c7ceaad7af89b0dc Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 19 Mar 2026 16:17:11 -0400 Subject: [PATCH 0768/1561] net-sysfs: switch xps_queue_show() to sysfs_emit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switch the function to use the proper sysfs_emit("%pb"). Suggested-by: Thomas Weißschuh Reviewed-by: Thomas Weißschuh Signed-off-by: Yury Norov Link: https://patch.msgid.link/20260319201713.941956-3-ynorov@nvidia.com Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- net/core/net-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 2ce011fae249..e430645748a7 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1739,7 +1739,7 @@ static ssize_t xps_queue_show(struct net_device *dev, unsigned int index, out_no_maps: rcu_read_unlock(); - len = bitmap_print_to_pagebuf(false, buf, mask, nr_ids); + len = sysfs_emit(buf, "%*pb\n", nr_ids, mask); bitmap_free(mask); return len < PAGE_SIZE ? len : -EINVAL; From fcbf106189aaf69a2a2f6116672d442d473ecbcd Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 20 Mar 2026 16:47:12 +0000 Subject: [PATCH 0769/1561] net: stmmac: move stmmac_xmit() skb head handling The skb head buffer handling is delayed in stmmac_xmit() until after the skb fragments have been populated into the descriptors. The reason is this code used to set the OWN bit on the first descriptor, which then allows the TX DMA to process the first and subsequent descriptors. However, as of commit 579a25a854d4 ("net: stmmac: Initial support for TBS") this is now separated, but the comments weren't updated. Move the code populating the first descriptor along side the jumbo code which also populates the first descriptor. This gives a consistent location where we populate the descriptor(s) for the SKB head. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w3d0C-0000000DfLj-0BLb@rmk-PC.armlinux.org.uk Tested-by: Maxime Chevallier Signed-off-by: Paolo Abeni --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 63 +++++++++---------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ecb6d9a27567..fb9719820b93 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4763,6 +4763,33 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) entry = stmmac_jumbo_frm(priv, tx_q, skb, csum_insertion); if (unlikely(entry < 0) && (entry != -EINVAL)) goto dma_map_err; + } else { + bool last_segment = (nfrags == 0); + + dma_addr = dma_map_single(priv->device, skb->data, + nopaged_len, DMA_TO_DEVICE); + if (dma_mapping_error(priv->device, dma_addr)) + goto dma_map_err; + + stmmac_set_tx_skb_dma_entry(tx_q, first_entry, dma_addr, + nopaged_len, false); + + stmmac_set_desc_addr(priv, first_desc, dma_addr); + + if (last_segment) + stmmac_set_tx_dma_last_segment(tx_q, first_entry); + + if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + priv->hwts_tx_en)) { + /* declare that device is doing timestamping */ + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + stmmac_enable_tx_timestamp(priv, first_desc); + } + + /* Prepare the first descriptor without setting the OWN bit */ + stmmac_prepare_tx_desc(priv, first_desc, 1, nopaged_len, + csum_insertion, priv->descriptor_mode, + 0, last_segment, skb->len); } for (i = 0; i < nfrags; i++) { @@ -4854,39 +4881,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) if (priv->sarc_type) stmmac_set_desc_sarc(priv, first_desc, priv->sarc_type); - /* Ready to fill the first descriptor and set the OWN bit w/o any - * problems because all the descriptors are actually ready to be - * passed to the DMA engine. - */ - if (likely(!is_jumbo)) { - bool last_segment = (nfrags == 0); - - dma_addr = dma_map_single(priv->device, skb->data, - nopaged_len, DMA_TO_DEVICE); - if (dma_mapping_error(priv->device, dma_addr)) - goto dma_map_err; - - stmmac_set_tx_skb_dma_entry(tx_q, first_entry, dma_addr, - nopaged_len, false); - - stmmac_set_desc_addr(priv, first_desc, dma_addr); - - if (last_segment) - stmmac_set_tx_dma_last_segment(tx_q, first_entry); - - if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && - priv->hwts_tx_en)) { - /* declare that device is doing timestamping */ - skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - stmmac_enable_tx_timestamp(priv, first_desc); - } - - /* Prepare the first descriptor setting the OWN bit too */ - stmmac_prepare_tx_desc(priv, first_desc, 1, nopaged_len, - csum_insertion, priv->descriptor_mode, - 0, last_segment, skb->len); - } - if (tx_q->tbs & STMMAC_TBS_EN) { struct timespec64 ts = ns_to_timespec64(skb->tstamp); @@ -4894,6 +4888,9 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) stmmac_set_desc_tbs(priv, tbs_desc, ts.tv_sec, ts.tv_nsec); } + /* Set the OWN bit on the first descriptor now that all descriptors + * for this skb are populated. + */ stmmac_set_tx_owner(priv, first_desc); netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len); From cafacdc48a74e16b2ad727b3f31a06c08d96e0b1 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 20 Mar 2026 16:47:17 +0000 Subject: [PATCH 0770/1561] net: stmmac: move first xmit descriptor SARC and TBS config Move the first transmit descriptor's SARC and TBS configuration alongside the code which populates the first descriptor, which helps to keep all the code specific to that descriptor together. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w3d0H-0000000DfLp-0gIx@rmk-PC.armlinux.org.uk Tested-by: Maxime Chevallier Signed-off-by: Paolo Abeni --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index fb9719820b93..8741513b152d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4792,6 +4792,16 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) 0, last_segment, skb->len); } + if (priv->sarc_type) + stmmac_set_desc_sarc(priv, first_desc, priv->sarc_type); + + if (tx_q->tbs & STMMAC_TBS_EN) { + struct timespec64 ts = ns_to_timespec64(skb->tstamp); + + tbs_desc = &tx_q->dma_entx[first_entry]; + stmmac_set_desc_tbs(priv, tbs_desc, ts.tv_sec, ts.tv_nsec); + } + for (i = 0; i < nfrags; i++) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; unsigned int frag_size = skb_frag_size(frag); @@ -4878,16 +4888,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) u64_stats_inc(&txq_stats->q.tx_set_ic_bit); u64_stats_update_end(&txq_stats->q_syncp); - if (priv->sarc_type) - stmmac_set_desc_sarc(priv, first_desc, priv->sarc_type); - - if (tx_q->tbs & STMMAC_TBS_EN) { - struct timespec64 ts = ns_to_timespec64(skb->tstamp); - - tbs_desc = &tx_q->dma_entx[first_entry]; - stmmac_set_desc_tbs(priv, tbs_desc, ts.tv_sec, ts.tv_nsec); - } - /* Set the OWN bit on the first descriptor now that all descriptors * for this skb are populated. */ From 7b8683115034360a2f7f7e5b0dac958249fa810a Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 20 Mar 2026 16:47:22 +0000 Subject: [PATCH 0771/1561] net: stmmac: move stmmac_xmit() first entry index code The handling of the first descriptor index/pointer is split around the checksum handling which makes no sense. Group this code together. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w3d0M-0000000DfLv-1C6S@rmk-PC.armlinux.org.uk Tested-by: Maxime Chevallier Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 8741513b152d..84de1134d581 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4728,10 +4728,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) /* Check if VLAN can be inserted by HW */ has_vlan = stmmac_vlan_insert(priv, skb, tx_q); - entry = tx_q->cur_tx; - first_entry = entry; - WARN_ON(tx_q->tx_skbuff[first_entry]); - csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL); /* DWMAC IPs can be synthesized to support tx coe only for a few tx * queues. In that case, checksum offloading for those queues that don't @@ -4748,6 +4744,10 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) csum_insertion = !csum_insertion; } + entry = tx_q->cur_tx; + first_entry = entry; + WARN_ON(tx_q->tx_skbuff[first_entry]); + desc = stmmac_get_tx_desc(priv, tx_q, entry); first_desc = desc; From 2e3bfeb1bc0c0a7cd51212ecc6533aca73cf8e2d Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 20 Mar 2026 16:47:27 +0000 Subject: [PATCH 0772/1561] net: stmmac: move stmmac_xmit() initial variable init Move tx_q, first_tx and txq_stats just before their first use. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w3d0R-0000000DfM1-1itN@rmk-PC.armlinux.org.uk Tested-by: Maxime Chevallier Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 84de1134d581..58a84570821c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4685,10 +4685,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) dma_addr_t dma_addr; u32 sdu_len; - tx_q = &priv->dma_conf.tx_queue[queue]; - txq_stats = &priv->xstats.txq_stats[queue]; - first_tx = tx_q->cur_tx; - if (priv->tx_path_in_lpi_mode && priv->eee_sw_timer_en) stmmac_stop_sw_lpi(priv); @@ -4725,6 +4721,9 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_BUSY; } + tx_q = &priv->dma_conf.tx_queue[queue]; + first_tx = tx_q->cur_tx; + /* Check if VLAN can be inserted by HW */ has_vlan = stmmac_vlan_insert(priv, skb, tx_q); @@ -4882,6 +4881,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue)); } + txq_stats = &priv->xstats.txq_stats[queue]; u64_stats_update_begin(&txq_stats->q_syncp); u64_stats_add(&txq_stats->q.tx_bytes, skb->len); if (set_ic) From 557ccd54ba141650cfd6962296c062089f3c6cd3 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 20 Mar 2026 16:47:32 +0000 Subject: [PATCH 0773/1561] net: stmmac: use first_desc for TBS Rather than freshly getting a pointer for the TBS descriptor (because we want to access its enhanced fields) convert the existing first_desc basic descriptor to a pointer to the enhanced descriptor. Add a comment explaining why it is safe to convert from the basic descriptor pointer to the enhanced descriptor pointer. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w3d0W-0000000DfM7-2BMA@rmk-PC.armlinux.org.uk Tested-by: Maxime Chevallier [pabeni@redhat.com: fixed comment typo] Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 58a84570821c..a8a18d79b6a8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4794,10 +4794,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) if (priv->sarc_type) stmmac_set_desc_sarc(priv, first_desc, priv->sarc_type); + /* STMMAC_TBS_EN can only be set if STMMAC_TBS_AVAIL has already + * been set, which means the underlying type of the descriptors + * will be struct stmmac_edesc. Therefore, it is safe to convert + * the basic descriptor to the enhanced descriptor here. + */ if (tx_q->tbs & STMMAC_TBS_EN) { struct timespec64 ts = ns_to_timespec64(skb->tstamp); - tbs_desc = &tx_q->dma_entx[first_entry]; + tbs_desc = dma_desc_to_edesc(first_desc); stmmac_set_desc_tbs(priv, tbs_desc, ts.tv_sec, ts.tv_nsec); } From cd1f306e28157bd0900cae52207839d210e61f61 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 20 Mar 2026 16:47:37 +0000 Subject: [PATCH 0774/1561] net: stmmac: elminate tbs_desc in stmmac_xmit() There is no need to have a local variable for tbs_desc, we can do the conversion when calling stmmac_set_desc_tbs(). Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w3d0b-0000000DfMD-2hrD@rmk-PC.armlinux.org.uk Tested-by: Maxime Chevallier Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index a8a18d79b6a8..9b6b49331639 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4678,7 +4678,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) int gso = skb_shinfo(skb)->gso_type; struct stmmac_txq_stats *txq_stats; struct dma_desc *desc, *first_desc; - struct dma_edesc *tbs_desc = NULL; struct stmmac_tx_queue *tx_q; int i, csum_insertion = 0; int entry, first_tx; @@ -4802,8 +4801,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) if (tx_q->tbs & STMMAC_TBS_EN) { struct timespec64 ts = ns_to_timespec64(skb->tstamp); - tbs_desc = dma_desc_to_edesc(first_desc); - stmmac_set_desc_tbs(priv, tbs_desc, ts.tv_sec, ts.tv_nsec); + stmmac_set_desc_tbs(priv, dma_desc_to_edesc(first_desc), + ts.tv_sec, ts.tv_nsec); } for (i = 0; i < nfrags; i++) { From 88c07dff9f6dd0b89b3e5364b202978eab2541c2 Mon Sep 17 00:00:00 2001 From: Kexin Sun Date: Sat, 21 Mar 2026 18:57:53 +0800 Subject: [PATCH 0775/1561] hv_sock: update outdated comment for renamed vsock_stream_recvmsg() The function vsock_stream_recvmsg() was renamed to vsock_connectible_recvmsg() by commit a9e29e5511b9 ("af_vsock: update functions for connectible socket"). Update the comment accordingly. Assisted-by: unnamed:deepseek-v3.2 coccinelle Signed-off-by: Kexin Sun Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260321105753.6751-1-kexinsun@smail.nju.edu.cn Signed-off-by: Paolo Abeni --- net/vmw_vsock/hyperv_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 069386a74557..2b7c0b5896ed 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -196,7 +196,7 @@ static int hvs_channel_readable_payload(struct vmbus_channel *chan) if (readable > HVS_PKT_LEN(0)) { /* At least we have 1 byte to read. We don't need to return - * the exact readable bytes: see vsock_stream_recvmsg() -> + * the exact readable bytes: see vsock_connectible_recvmsg() -> * vsock_stream_has_data(). */ return 1; From a4b908c89d88d4b7251c566457979c4167b9664c Mon Sep 17 00:00:00 2001 From: Kexin Sun Date: Sat, 21 Mar 2026 18:58:25 +0800 Subject: [PATCH 0776/1561] net: phy: update outdated comment for removed phy_package_read/write() The locked convenience functions phy_package_read() and phy_package_write() were removed (as they had no users) by commit e7f984e925d2 ("net: phy: move PHY package related code from phy.h to phy_package.c"). Update the comment to reference the existing unlocked counterparts __phy_package_read() and __phy_package_write(). Assisted-by: unnamed:deepseek-v3.2 coccinelle Signed-off-by: Kexin Sun Link: https://patch.msgid.link/20260321105825.7221-1-kexinsun@smail.nju.edu.cn Signed-off-by: Paolo Abeni --- drivers/net/phy/phy_package.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy_package.c b/drivers/net/phy/phy_package.c index 63e4f862f731..16ae8d1c1f89 100644 --- a/drivers/net/phy/phy_package.c +++ b/drivers/net/phy/phy_package.c @@ -194,8 +194,8 @@ EXPORT_SYMBOL_GPL(phy_package_probe_once); * for offset calculation to access generic registers of a PHY package. * Usually, one of the PHY addresses of the different PHYs in the package * provides access to these global registers. - * The address which is given here, will be used in the phy_package_read() - * and phy_package_write() convenience functions as base and added to the + * The address which is given here, will be used in the __phy_package_read() + * and __phy_package_write() convenience functions as base and added to the * passed offset in those functions. * * This will set the shared pointer of the phydev to the shared storage. From b1c803d5c8167026791abfaed96fd3e6a1fcd750 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 21 Mar 2026 15:41:44 +0100 Subject: [PATCH 0777/1561] net: airoha: Rework the code flow in airoha_remove() and in airoha_probe() error path As suggested by Simon in [0], rework the code flow in airoha_remove() and in the airoha_probe() error path in order to rely on a more common approach un-registering configured net-devices first and destroying the hw resources at the end of the code. Introduce airoha_qdma_cleanup routine to release QDMA resources. [0] https://lore.kernel.org/netdev/20251214-airoha-fix-dev-registration-v1-1-860e027ad4c6@kernel.org/ Suggested-by: Simon Horman Signed-off-by: Lorenzo Bianconi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260321-airoha-remove-rework-v2-1-16c7bade5fe5@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_eth.c | 76 ++++++++++++++---------- 1 file changed, 44 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 961325da833b..82e53c60f561 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -1368,6 +1368,33 @@ static int airoha_qdma_init(struct platform_device *pdev, return airoha_qdma_hw_init(qdma); } +static void airoha_qdma_cleanup(struct airoha_qdma *qdma) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(qdma->q_rx); i++) { + if (!qdma->q_rx[i].ndesc) + continue; + + netif_napi_del(&qdma->q_rx[i].napi); + airoha_qdma_cleanup_rx_queue(&qdma->q_rx[i]); + if (qdma->q_rx[i].page_pool) { + page_pool_destroy(qdma->q_rx[i].page_pool); + qdma->q_rx[i].page_pool = NULL; + } + } + + for (i = 0; i < ARRAY_SIZE(qdma->q_tx_irq); i++) + netif_napi_del(&qdma->q_tx_irq[i].napi); + + for (i = 0; i < ARRAY_SIZE(qdma->q_tx); i++) { + if (!qdma->q_tx[i].ndesc) + continue; + + airoha_qdma_cleanup_tx_queue(&qdma->q_tx[i]); + } +} + static int airoha_hw_init(struct platform_device *pdev, struct airoha_eth *eth) { @@ -1395,41 +1422,30 @@ static int airoha_hw_init(struct platform_device *pdev, for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) { err = airoha_qdma_init(pdev, eth, ð->qdma[i]); if (err) - return err; + goto error; } err = airoha_ppe_init(eth); if (err) - return err; + goto error; set_bit(DEV_STATE_INITIALIZED, ð->state); return 0; +error: + for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) + airoha_qdma_cleanup(ð->qdma[i]); + + return err; } -static void airoha_hw_cleanup(struct airoha_qdma *qdma) +static void airoha_hw_cleanup(struct airoha_eth *eth) { int i; - for (i = 0; i < ARRAY_SIZE(qdma->q_rx); i++) { - if (!qdma->q_rx[i].ndesc) - continue; - - netif_napi_del(&qdma->q_rx[i].napi); - airoha_qdma_cleanup_rx_queue(&qdma->q_rx[i]); - if (qdma->q_rx[i].page_pool) - page_pool_destroy(qdma->q_rx[i].page_pool); - } - - for (i = 0; i < ARRAY_SIZE(qdma->q_tx_irq); i++) - netif_napi_del(&qdma->q_tx_irq[i].napi); - - for (i = 0; i < ARRAY_SIZE(qdma->q_tx); i++) { - if (!qdma->q_tx[i].ndesc) - continue; - - airoha_qdma_cleanup_tx_queue(&qdma->q_tx[i]); - } + for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) + airoha_qdma_cleanup(ð->qdma[i]); + airoha_ppe_deinit(eth); } static void airoha_qdma_start_napi(struct airoha_qdma *qdma) @@ -3012,7 +3028,7 @@ static int airoha_probe(struct platform_device *pdev) err = airoha_hw_init(pdev, eth); if (err) - goto error_hw_cleanup; + goto error_netdev_free; for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) airoha_qdma_start_napi(ð->qdma[i]); @@ -3040,10 +3056,6 @@ static int airoha_probe(struct platform_device *pdev) error_napi_stop: for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) airoha_qdma_stop_napi(ð->qdma[i]); - airoha_ppe_deinit(eth); -error_hw_cleanup: - for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) - airoha_hw_cleanup(ð->qdma[i]); for (i = 0; i < ARRAY_SIZE(eth->ports); i++) { struct airoha_gdm_port *port = eth->ports[i]; @@ -3055,6 +3067,8 @@ error_hw_cleanup: unregister_netdev(port->dev); airoha_metadata_dst_free(port); } + airoha_hw_cleanup(eth); +error_netdev_free: free_netdev(eth->napi_dev); platform_set_drvdata(pdev, NULL); @@ -3066,10 +3080,8 @@ static void airoha_remove(struct platform_device *pdev) struct airoha_eth *eth = platform_get_drvdata(pdev); int i; - for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) { + for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) airoha_qdma_stop_napi(ð->qdma[i]); - airoha_hw_cleanup(ð->qdma[i]); - } for (i = 0; i < ARRAY_SIZE(eth->ports); i++) { struct airoha_gdm_port *port = eth->ports[i]; @@ -3080,9 +3092,9 @@ static void airoha_remove(struct platform_device *pdev) unregister_netdev(port->dev); airoha_metadata_dst_free(port); } - free_netdev(eth->napi_dev); + airoha_hw_cleanup(eth); - airoha_ppe_deinit(eth); + free_netdev(eth->napi_dev); platform_set_drvdata(pdev, NULL); } From 3f21614c20053e083f07de58202032a73b04a1b7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 23 Mar 2026 10:20:16 -0700 Subject: [PATCH 0778/1561] wifi: mac80211: Replace strncpy() with strscpy_pad() in drv_switch_vif_chanctx tracepoint Replace the deprecated[1] strncpy() with strscpy_pad() for copying the interface name into a tracepoint entry. The source "sdata->name" is a NUL-terminated char[IFNAMSIZ] buffer populated via NUL-guaranteeing paths: strscpy() in ieee80211_if_add(), snprintf() in ieee80211_add_virtual_monitor(), or memcpy() from ndev->name in ieee80211_if_add() and netdev_notify() (net/mac80211/iface.c). In the memcpy() cases, the source ndev->name is itself always NUL-terminated (populated via snprintf() or strscpy() in __dev_alloc_name() and dev_prep_valid_name() in net/core/dev.c). The destination "local_vifs[i].vif.vif_name" is a char[IFNAMSIZ] field in struct trace_vif_entry, stored in a __dynamic_array within the trace ring buffer. Since ring buffer entries are not zeroed on allocation, strscpy_pad() is used to zero-fill trailing bytes and prevent exposing stale ring buffer contents to userspace readers of tracefs. No behavioral change: since interface names are always at most 15 characters plus a NUL terminator, strscpy_pad() with size IFNAMSIZ (16) produces identical output to the original strncpy(). Link: https://github.com/KSPP/linux/issues/90 [1] Signed-off-by: Kees Cook Link: https://patch.msgid.link/20260323172015.work.146-kees@kernel.org Signed-off-by: Johannes Berg --- net/mac80211/trace.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 1f0c07eaad1b..e5968d754f8b 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1778,9 +1778,8 @@ TRACE_EVENT(drv_switch_vif_chanctx, SWITCH_ENTRY_ASSIGN(vif.vif_type, vif->type); SWITCH_ENTRY_ASSIGN(vif.p2p, vif->p2p); SWITCH_ENTRY_ASSIGN(link_id, link_conf->link_id); - strncpy(local_vifs[i].vif.vif_name, - sdata->name, - sizeof(local_vifs[i].vif.vif_name)); + strscpy_pad(local_vifs[i].vif.vif_name, + sdata->name); SWITCH_ENTRY_ASSIGN(old_chandef.control_freq, old_ctx->def.chan->center_freq); SWITCH_ENTRY_ASSIGN(old_chandef.freq_offset, From 6c65b234575610e9795dabe410509a8b61cd4b68 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 23 Mar 2026 23:02:50 +0200 Subject: [PATCH 0779/1561] wifi: cfg80211: Add support for additional 7 GHz channels Add support for channels 237, 241, 245, 249, 253 and support for additional 320 MHz segment. Signed-off-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260323230242.072942e8e55a.I20eba7b534c6402d5e55f862865ff1e6fef64d83@changeid Signed-off-by: Johannes Berg --- net/wireless/chan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/wireless/chan.c b/net/wireless/chan.c index fa0764ede9c5..2dcf18f5655e 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -317,7 +317,7 @@ static bool cfg80211_valid_center_freq(u32 center, int step; /* We only do strict verification on 6 GHz */ - if (center < 5955 || center > 7115) + if (center < 5955 || center > 7215) return true; bw = nl80211_chan_width_to_mhz(width); @@ -325,7 +325,7 @@ static bool cfg80211_valid_center_freq(u32 center, return false; /* Validate that the channels bw is entirely within the 6 GHz band */ - if (center - bw / 2 < 5945 || center + bw / 2 > 7125) + if (center - bw / 2 < 5945 || center + bw / 2 > 7225) return false; /* With 320 MHz the permitted channels overlap */ From 3f451a2cf56c407131c6bd34546348696f4f685e Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Fri, 20 Mar 2026 10:16:01 +0200 Subject: [PATCH 0780/1561] wifi: mac80211: use for_each_chanctx_user_* in one more place for_each_chanctx_user_* is an iterator that visits all types of chanctx users, including the (to be added) NAN channels, and not only the link. ieee80211_get_chanctx_max_required_bw wasn't changed to use this new iterator, do it now. Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260320101556.4691916c7877.I9660f3945f4dccdb6d41a06ec4e74161e5ac65a4@changeid Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 123 +++++++++++++++++++++++++------------------- 1 file changed, 71 insertions(+), 52 deletions(-) diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 2f0c93f3ace6..b7604118bf57 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -447,74 +447,93 @@ ieee80211_get_max_required_bw(struct ieee80211_link_data *link) return max_bw; } +static enum nl80211_chan_width +ieee80211_get_width_of_link(struct ieee80211_link_data *link) +{ + struct ieee80211_local *local = link->sdata->local; + + switch (link->sdata->vif.type) { + case NL80211_IFTYPE_STATION: + if (!link->sdata->vif.cfg.assoc) { + /* + * The AP's sta->bandwidth may not yet be set + * at this point (pre-association), so simply + * take the width from the chandef. We cannot + * have TDLS peers yet (only after association). + */ + return link->conf->chanreq.oper.width; + } + /* + * otherwise just use min_def like in AP, depending on what + * we currently think the AP STA (and possibly TDLS peers) + * require(s) + */ + fallthrough; + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + return ieee80211_get_max_required_bw(link); + case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: + break; + case NL80211_IFTYPE_MONITOR: + WARN_ON_ONCE(!ieee80211_hw_check(&local->hw, + NO_VIRTUAL_MONITOR)); + fallthrough; + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_OCB: + return link->conf->chanreq.oper.width; + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_UNSPECIFIED: + case NUM_NL80211_IFTYPES: + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_P2P_GO: + WARN_ON_ONCE(1); + break; + } + + /* Take the lowest possible, so it won't change the max width */ + return NL80211_CHAN_WIDTH_20_NOHT; +} + static enum nl80211_chan_width ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, struct ieee80211_link_data *rsvd_for, bool check_reserved) { - struct ieee80211_sub_if_data *sdata; - struct ieee80211_link_data *link; enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT; + struct ieee80211_chanctx_user_iter iter; + struct ieee80211_sub_if_data *sdata; + enum nl80211_chan_width width; if (WARN_ON(check_reserved && rsvd_for)) return ctx->conf.def.width; - for_each_sdata_link(local, link) { - enum nl80211_chan_width width = NL80211_CHAN_WIDTH_20_NOHT; - - if (check_reserved) { - if (link->reserved_chanctx != ctx) - continue; - } else if (link != rsvd_for && - rcu_access_pointer(link->conf->chanctx_conf) != &ctx->conf) - continue; - - switch (link->sdata->vif.type) { - case NL80211_IFTYPE_STATION: - if (!link->sdata->vif.cfg.assoc) { - /* - * The AP's sta->bandwidth may not yet be set - * at this point (pre-association), so simply - * take the width from the chandef. We cannot - * have TDLS peers yet (only after association). - */ - width = link->conf->chanreq.oper.width; - break; - } - /* - * otherwise just use min_def like in AP, depending on what - * we currently think the AP STA (and possibly TDLS peers) - * require(s) - */ - fallthrough; - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_AP_VLAN: - width = ieee80211_get_max_required_bw(link); - break; - case NL80211_IFTYPE_P2P_DEVICE: - case NL80211_IFTYPE_NAN: - continue; - case NL80211_IFTYPE_MONITOR: - WARN_ON_ONCE(!ieee80211_hw_check(&local->hw, - NO_VIRTUAL_MONITOR)); - fallthrough; - case NL80211_IFTYPE_ADHOC: - case NL80211_IFTYPE_MESH_POINT: - case NL80211_IFTYPE_OCB: - width = link->conf->chanreq.oper.width; - break; - case NL80211_IFTYPE_WDS: - case NL80211_IFTYPE_UNSPECIFIED: - case NUM_NL80211_IFTYPES: - case NL80211_IFTYPE_P2P_CLIENT: - case NL80211_IFTYPE_P2P_GO: - WARN_ON_ONCE(1); + /* When this is true we only care about the reserving links */ + if (check_reserved) { + for_each_chanctx_user_reserved(local, ctx, &iter) { + width = ieee80211_get_width_of_link(iter.link); + max_bw = max(max_bw, width); } + goto check_monitor; + } + /* Consider all assigned links */ + for_each_chanctx_user_assigned(local, ctx, &iter) { + width = ieee80211_get_width_of_link(iter.link); max_bw = max(max_bw, width); } + if (!rsvd_for || + rsvd_for->sdata == rcu_access_pointer(local->monitor_sdata)) + goto check_monitor; + + /* Consider the link for which this chanctx is reserved/going to be assigned */ + width = ieee80211_get_width_of_link(rsvd_for); + max_bw = max(max_bw, width); + +check_monitor: /* use the configured bandwidth in case of monitor interface */ sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (sdata && From 4ca2253157925424b3ada17c96fa4a26e664bc0d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 20 Mar 2026 10:16:28 +0200 Subject: [PATCH 0781/1561] wifi: mac80211_hwsim: advertise basic UHR support Just add support for ELR, and nothing else since the spec isn't really all that well-specified yet. Signed-off-by: Johannes Berg Reviewed-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260320101624.77af6463920e.I257e525a461c350bed87cfaefc52de25e37afcfb@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/virtual/mac80211_hwsim.c | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index 82adcc848189..fdadc6fa89bd 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -4641,6 +4641,11 @@ static const struct ieee80211_sband_iftype_data sband_capa_2ghz[] = { }, /* PPE threshold information is not supported */ }, + .uhr_cap = { + .has_uhr = true, + .phy.cap = IEEE80211_UHR_PHY_CAP_ELR_RX | + IEEE80211_UHR_PHY_CAP_ELR_TX, + }, }, { .types_mask = BIT(NL80211_IFTYPE_AP) | @@ -4749,6 +4754,11 @@ static const struct ieee80211_sband_iftype_data sband_capa_2ghz[] = { }, /* PPE threshold information is not supported */ }, + .uhr_cap = { + .has_uhr = true, + .phy.cap = IEEE80211_UHR_PHY_CAP_ELR_RX | + IEEE80211_UHR_PHY_CAP_ELR_TX, + }, }, #ifdef CONFIG_MAC80211_MESH { @@ -4918,6 +4928,11 @@ static const struct ieee80211_sband_iftype_data sband_capa_5ghz[] = { }, /* PPE threshold information is not supported */ }, + .uhr_cap = { + .has_uhr = true, + .phy.cap = IEEE80211_UHR_PHY_CAP_ELR_RX | + IEEE80211_UHR_PHY_CAP_ELR_TX, + }, }, { .types_mask = BIT(NL80211_IFTYPE_AP) | @@ -5043,6 +5058,11 @@ static const struct ieee80211_sband_iftype_data sband_capa_5ghz[] = { }, /* PPE threshold information is not supported */ }, + .uhr_cap = { + .has_uhr = true, + .phy.cap = IEEE80211_UHR_PHY_CAP_ELR_RX | + IEEE80211_UHR_PHY_CAP_ELR_TX, + }, }, #ifdef CONFIG_MAC80211_MESH { @@ -5236,6 +5256,11 @@ static const struct ieee80211_sband_iftype_data sband_capa_6ghz[] = { }, /* PPE threshold information is not supported */ }, + .uhr_cap = { + .has_uhr = true, + .phy.cap = IEEE80211_UHR_PHY_CAP_ELR_RX | + IEEE80211_UHR_PHY_CAP_ELR_TX, + }, }, { .types_mask = BIT(NL80211_IFTYPE_AP) | @@ -5382,6 +5407,11 @@ static const struct ieee80211_sband_iftype_data sband_capa_6ghz[] = { }, /* PPE threshold information is not supported */ }, + .uhr_cap = { + .has_uhr = true, + .phy.cap = IEEE80211_UHR_PHY_CAP_ELR_RX | + IEEE80211_UHR_PHY_CAP_ELR_TX, + }, }, #ifdef CONFIG_MAC80211_MESH { @@ -5473,6 +5503,11 @@ static const struct ieee80211_sband_iftype_data sband_capa_6ghz[] = { }, /* PPE threshold information is not supported */ }, + .uhr_cap = { + .has_uhr = true, + .phy.cap = IEEE80211_UHR_PHY_CAP_ELR_RX | + IEEE80211_UHR_PHY_CAP_ELR_TX, + }, }, #endif }; From c209f67233f1c6d895cb893f622e17a505d00397 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Fri, 20 Mar 2026 10:19:59 +0200 Subject: [PATCH 0782/1561] wifi: mac80211: make ieee80211_find_chanctx link-unaware Currently we have only one user for a channel context: the link. With NAN, a new type of the channel context user will be added - the NAN channel. To prepare for this, we need to separate the channel context code from the link code. Removes the link argument from ieee80211_find_chanctx. Since the issue that led to commit 5e0c422d12b5 ("wifi: mac80211: reserve chanctx during find") - that added the link argument - is relevant for any user of the channel context, add a boolean to the chanctx itself, indicating that the chanctx is in the process of getting used. When this indication is set, the reference count of the channel context will be incremented by one, so even if it is getting released from a link (or another user) it won't be freed. Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260320101954.232499e2a41f.I0b735a607e1ec7aa5749ab01c794ef99dbe82b7f@changeid Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 26 ++++++++++++++------------ net/mac80211/ieee80211_i.h | 3 +++ 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index b7604118bf57..1bcf501cfe8e 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -166,6 +166,13 @@ int ieee80211_chanctx_refcount(struct ieee80211_local *local, for_each_chanctx_user_all(local, ctx, &iter) num++; + /* + * This ctx is in the process of getting used, + * take it into consideration + */ + if (ctx->will_be_used) + num++; + return num; } @@ -769,10 +776,9 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local, _ieee80211_change_chanctx(local, ctx, old_ctx, chanreq, NULL); } -/* Note: if successful, the returned chanctx is reserved for the link */ +/* Note: if successful, the returned chanctx will_be_used flag is set */ static struct ieee80211_chanctx * ieee80211_find_chanctx(struct ieee80211_local *local, - struct ieee80211_link_data *link, const struct ieee80211_chan_req *chanreq, enum ieee80211_chanctx_mode mode) { @@ -784,9 +790,6 @@ ieee80211_find_chanctx(struct ieee80211_local *local, if (mode == IEEE80211_CHANCTX_EXCLUSIVE) return NULL; - if (WARN_ON(link->reserved_chanctx)) - return NULL; - list_for_each_entry(ctx, &local->chanctx_list, list) { const struct ieee80211_chan_req *compat; @@ -807,12 +810,12 @@ ieee80211_find_chanctx(struct ieee80211_local *local, continue; /* - * Reserve the chanctx temporarily, as the driver might change + * Mark the chanctx as will be used, as the driver might change * active links during callbacks we make into it below and/or * later during assignment, which could (otherwise) cause the * context to actually be removed. */ - link->reserved_chanctx = ctx; + ctx->will_be_used = true; ieee80211_change_chanctx(local, ctx, ctx, compat); @@ -2066,8 +2069,8 @@ int _ieee80211_link_use_channel(struct ieee80211_link_data *link, if (!local->in_reconfig) __ieee80211_link_release_channel(link, false); - ctx = ieee80211_find_chanctx(local, link, chanreq, mode); - /* Note: context is now reserved */ + ctx = ieee80211_find_chanctx(local, chanreq, mode); + /* Note: context will_be_used flag is now set */ if (ctx) reserved = true; else if (!ieee80211_find_available_radio(local, chanreq, @@ -2087,9 +2090,8 @@ int _ieee80211_link_use_channel(struct ieee80211_link_data *link, ret = ieee80211_assign_link_chanctx(link, ctx, assign_on_failure); if (reserved) { - /* remove reservation */ - WARN_ON(link->reserved_chanctx != ctx); - link->reserved_chanctx = NULL; + WARN_ON(!ctx->will_be_used); + ctx->will_be_used = false; } if (ret) { diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d71e0c6d2165..fe53812eca95 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -928,6 +928,9 @@ struct ieee80211_chanctx { bool radar_detected; + /* This chanctx is in process of getting used */ + bool will_be_used; + /* MUST be last - ends in a flexible-array member. */ struct ieee80211_chanctx_conf conf; }; From 763677c52145efc4760c721078d5c0dadb60eb03 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 20 Mar 2026 10:20:40 +0200 Subject: [PATCH 0783/1561] wifi: cfg80211: support UNII-9 channels in ieee80211_channel_to_freq_khz Devices that support UNII-9 will call ieee80211_channel_to_freq_khz with a channel number that can go up to 253. Allow the new channel numbers in ieee80211_channel_to_freq_khz. Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260320102034.efcb7ea1de3c.Ifa4b75a24466de2a1d5707181c9c487618236e4b@changeid Signed-off-by: Johannes Berg --- net/wireless/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 0a0cea018fc5..1a861a6ea380 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -90,7 +90,7 @@ u32 ieee80211_channel_to_freq_khz(int chan, enum nl80211_band band) /* see 802.11ax D6.1 27.3.23.2 */ if (chan == 2) return MHZ_TO_KHZ(5935); - if (chan <= 233) + if (chan <= 253) return MHZ_TO_KHZ(5950 + chan * 5); break; case NL80211_BAND_60GHZ: From 876565d4a826f3f04ef36f1cef6123ed4b150aa3 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Fri, 20 Mar 2026 14:13:46 +0200 Subject: [PATCH 0784/1561] wifi: mac80211: properly handle error in ieee80211_add_virtual_monitor In case of an error in ieee80211_add_virtual_monitor, SDATA_STATE_RUNNING should be cleared as it was set in this function. Do it there instead of in the error path of ieee80211_do_open. Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260320141312.5546126313b1.I689dba2f54069b259702e8d246cedf79a73b82c6@changeid Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 40ce0bb72726..232fc0b80e44 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1222,14 +1222,14 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local, } } - set_bit(SDATA_STATE_RUNNING, &sdata->state); - ret = ieee80211_check_queues(sdata, NL80211_IFTYPE_MONITOR); if (ret) { kfree(sdata); return ret; } + set_bit(SDATA_STATE_RUNNING, &sdata->state); + mutex_lock(&local->iflist_mtx); rcu_assign_pointer(local->monitor_sdata, sdata); mutex_unlock(&local->iflist_mtx); @@ -1242,6 +1242,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local, mutex_unlock(&local->iflist_mtx); synchronize_net(); drv_remove_interface(local, sdata); + clear_bit(SDATA_STATE_RUNNING, &sdata->state); kfree(sdata); return ret; } @@ -1550,8 +1551,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) sdata->bss = NULL; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) list_del(&sdata->u.vlan.list); - /* might already be clear but that doesn't matter */ - clear_bit(SDATA_STATE_RUNNING, &sdata->state); return res; } From b5b5ffa94a3b0419193c1a7c35dad6a972a638a9 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Fri, 20 Mar 2026 14:15:32 +0200 Subject: [PATCH 0785/1561] wifi: mac80211: don't consider the sband when processing capabilities In NAN, we have one set of (HT, VHT, HE) capabilities for all bands, which means that we will need to process those capabilities without a given sband. To prepare for that, remove the sband argument from ieee80211_ht_cap_ie_to_sta_ht_cap and ieee80211_he_cap_ie_to_sta_he_cap and pass our own capabilities instead. For ieee80211_vht_cap_ie_to_sta_vht_cap, make the sband argument optional, since it is also used to check if there is at least one channel that supports 80 MHz. (Note that this check doesn't make much sense, but this can be handled in a different patch.) Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260320141504.e42ef1f0eabb.If994d6346f00219437e22043e7bf2395b827b34a@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 3 ++- net/mac80211/he.c | 37 ++++++++++++++++++++++++------------- net/mac80211/ht.c | 6 +++--- net/mac80211/ibss.c | 4 +++- net/mac80211/ieee80211_i.h | 9 ++++++++- net/mac80211/mesh_plink.c | 3 ++- net/mac80211/mlme.c | 3 ++- net/mac80211/vht.c | 33 ++++++++++++++++++--------------- 8 files changed, 62 insertions(+), 36 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index ee64ac8e0f61..9aa4ae0621be 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2140,12 +2140,13 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, return -EINVAL; if (params->ht_capa) - ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, + ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, &sband->ht_cap, params->ht_capa, link_sta); /* VHT can override some HT caps such as the A-MSDU max length */ if (params->vht_capa) ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, + &sband->vht_cap, params->vht_capa, NULL, link_sta); diff --git a/net/mac80211/he.c b/net/mac80211/he.c index f7b05e59374c..93e0342cff4f 100644 --- a/net/mac80211/he.c +++ b/net/mac80211/he.c @@ -108,14 +108,13 @@ static void ieee80211_he_mcs_intersection(__le16 *he_own_rx, __le16 *he_peer_rx, } void -ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, - struct ieee80211_supported_band *sband, - const u8 *he_cap_ie, u8 he_cap_len, - const struct ieee80211_he_6ghz_capa *he_6ghz_capa, - struct link_sta_info *link_sta) +_ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_sta_he_cap *own_he_cap_ptr, + const u8 *he_cap_ie, u8 he_cap_len, + const struct ieee80211_he_6ghz_capa *he_6ghz_capa, + struct link_sta_info *link_sta) { struct ieee80211_sta_he_cap *he_cap = &link_sta->pub->he_cap; - const struct ieee80211_sta_he_cap *own_he_cap_ptr; struct ieee80211_sta_he_cap own_he_cap; struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie; u8 he_ppe_size; @@ -125,12 +124,7 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, memset(he_cap, 0, sizeof(*he_cap)); - if (!he_cap_ie) - return; - - own_he_cap_ptr = - ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); - if (!own_he_cap_ptr) + if (!he_cap_ie || !own_he_cap_ptr || !own_he_cap_ptr->has_he) return; own_he_cap = *own_he_cap_ptr; @@ -164,7 +158,7 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, link_sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(link_sta); link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta); - if (sband->band == NL80211_BAND_6GHZ && he_6ghz_capa) + if (he_6ghz_capa) ieee80211_update_from_he_6ghz_capa(he_6ghz_capa, link_sta); ieee80211_he_mcs_intersection(&own_he_cap.he_mcs_nss_supp.rx_mcs_80, @@ -207,6 +201,23 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, } } +void +ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + const u8 *he_cap_ie, u8 he_cap_len, + const struct ieee80211_he_6ghz_capa *he_6ghz_capa, + struct link_sta_info *link_sta) +{ + const struct ieee80211_sta_he_cap *own_he_cap = + ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); + + _ieee80211_he_cap_ie_to_sta_he_cap(sdata, own_he_cap, he_cap_ie, + he_cap_len, + (sband->band == NL80211_BAND_6GHZ) ? + he_6ghz_capa : NULL, + link_sta); +} + void ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif, const struct ieee80211_he_operation *he_op_ie) diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 33f1e1b235e9..410e2354f33a 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -136,7 +136,7 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, - struct ieee80211_supported_band *sband, + const struct ieee80211_sta_ht_cap *own_cap_ptr, const struct ieee80211_ht_cap *ht_cap_ie, struct link_sta_info *link_sta) { @@ -151,12 +151,12 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, memset(&ht_cap, 0, sizeof(ht_cap)); - if (!ht_cap_ie || !sband->ht_cap.ht_supported) + if (!ht_cap_ie || !own_cap_ptr->ht_supported) goto apply; ht_cap.ht_supported = true; - own_cap = sband->ht_cap; + own_cap = *own_cap_ptr; /* * If user has specified capability over-rides, take care diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 0298272c37ec..1e1ab25d9d8d 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1014,7 +1014,8 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, ieee80211_chandef_ht_oper(elems->ht_operation, &chandef); memcpy(&htcap_ie, elems->ht_cap_elem, sizeof(htcap_ie)); - rates_updated |= ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, + rates_updated |= ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, + &sband->ht_cap, &htcap_ie, &sta->deflink); @@ -1033,6 +1034,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, &chandef); memcpy(&cap_ie, elems->vht_cap_elem, sizeof(cap_ie)); ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, + &sband->vht_cap, &cap_ie, NULL, &sta->deflink); if (memcmp(&cap, &sta->sta.deflink.vht_cap, sizeof(cap))) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index fe53812eca95..bacb49ad2817 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2188,7 +2188,7 @@ void ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata, void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_ht_cap *ht_cap); bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, - struct ieee80211_supported_band *sband, + const struct ieee80211_sta_ht_cap *own_cap, const struct ieee80211_ht_cap *ht_cap_ie, struct link_sta_info *link_sta); void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, @@ -2273,6 +2273,7 @@ void ieee80211_ht_handle_chanwidth_notif(struct ieee80211_local *local, void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, + const struct ieee80211_sta_vht_cap *own_vht_cap, const struct ieee80211_vht_cap *vht_cap_ie, const struct ieee80211_vht_cap *vht_cap_ie2, struct link_sta_info *link_sta); @@ -2313,6 +2314,12 @@ ieee80211_sta_rx_bw_to_chan_width(struct link_sta_info *sta); /* HE */ void +_ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_sta_he_cap *own_he_cap, + const u8 *he_cap_ie, u8 he_cap_len, + const struct ieee80211_he_6ghz_capa *he_6ghz_capa, + struct link_sta_info *link_sta); +void ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, const u8 *he_cap_ie, u8 he_cap_len, diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 7d823a55636f..803106fc3134 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -450,12 +450,13 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, changed |= IEEE80211_RC_SUPP_RATES_CHANGED; sta->sta.deflink.supp_rates[sband->band] = rates; - if (ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, + if (ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, &sband->ht_cap, elems->ht_cap_elem, &sta->deflink)) changed |= IEEE80211_RC_BW_CHANGED; ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, + &sband->vht_cap, elems->vht_cap_elem, NULL, &sta->deflink); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0cd8d07bf668..173a60360a45 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5586,7 +5586,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, /* Set up internal HT/VHT capabilities */ if (elems->ht_cap_elem && link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT) - ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, + ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, &sband->ht_cap, elems->ht_cap_elem, link_sta); @@ -5622,6 +5622,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, } ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, + &sband->vht_cap, elems->vht_cap_elem, bss_vht_cap, link_sta); rcu_read_unlock(); diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 80120f9f17b6..a6570781740a 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -4,7 +4,7 @@ * * Portions of this file * Copyright(c) 2015 - 2016 Intel Deutschland GmbH - * Copyright (C) 2018-2026 Intel Corporation + * Copyright (C) 2018 - 2026 Intel Corporation */ #include @@ -115,6 +115,7 @@ void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata, void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, + const struct ieee80211_sta_vht_cap *own_vht_cap, const struct ieee80211_vht_cap *vht_cap_ie, const struct ieee80211_vht_cap *vht_cap_ie2, struct link_sta_info *link_sta) @@ -122,7 +123,6 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_vht_cap *vht_cap = &link_sta->pub->vht_cap; struct ieee80211_sta_vht_cap own_cap; u32 cap_info, i; - bool have_80mhz; u32 mpdu_len; memset(vht_cap, 0, sizeof(*vht_cap)); @@ -130,23 +130,26 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, if (!link_sta->pub->ht_cap.ht_supported) return; - if (!vht_cap_ie || !sband->vht_cap.vht_supported) + if (!vht_cap_ie || !own_vht_cap->vht_supported) return; - /* Allow VHT if at least one channel on the sband supports 80 MHz */ - have_80mhz = false; - for (i = 0; i < sband->n_channels; i++) { - if (sband->channels[i].flags & (IEEE80211_CHAN_DISABLED | - IEEE80211_CHAN_NO_80MHZ)) - continue; + if (sband) { + /* Allow VHT if at least one channel on the sband supports 80 MHz */ + bool have_80mhz = false; - have_80mhz = true; - break; + for (i = 0; i < sband->n_channels; i++) { + if (sband->channels[i].flags & (IEEE80211_CHAN_DISABLED | + IEEE80211_CHAN_NO_80MHZ)) + continue; + + have_80mhz = true; + break; + } + + if (!have_80mhz) + return; } - if (!have_80mhz) - return; - /* * A VHT STA must support 40 MHz, but if we verify that here * then we break a few things - some APs (e.g. Netgear R6300v2 @@ -156,7 +159,7 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, vht_cap->vht_supported = true; - own_cap = sband->vht_cap; + own_cap = *own_vht_cap; /* * If user has specified capability overrides, take care * of that if the station we're setting up is the AP that From 300aaca3435cca255517b366bebc9d642da1b8cb Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Tue, 10 Mar 2026 17:47:36 -0700 Subject: [PATCH 0786/1561] wifi: b43: kzalloc + kcalloc to kzalloc_flex Simplifies allocation and allows using __counted_by for extra runtime analysis. Signed-off-by: Rosen Penev Link: https://patch.msgid.link/20260311004736.32730-1-rosenp@gmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/broadcom/b43/dma.c | 18 ++++++++---------- drivers/net/wireless/broadcom/b43/dma.h | 4 ++-- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/broadcom/b43/dma.c b/drivers/net/wireless/broadcom/b43/dma.c index 3a8df7a18042..05da6987a845 100644 --- a/drivers/net/wireless/broadcom/b43/dma.c +++ b/drivers/net/wireless/broadcom/b43/dma.c @@ -837,18 +837,19 @@ struct b43_dmaring *b43_setup_dmaring(struct b43_wldev *dev, struct b43_dmaring *ring; int i, err; dma_addr_t dma_test; + size_t nr_slots; - ring = kzalloc_obj(*ring); + if (for_tx) + nr_slots = B43_TXRING_SLOTS; + else + nr_slots = B43_RXRING_SLOTS; + + ring = kzalloc_flex(*ring, meta, nr_slots); if (!ring) goto out; - ring->nr_slots = B43_RXRING_SLOTS; - if (for_tx) - ring->nr_slots = B43_TXRING_SLOTS; + ring->nr_slots = nr_slots; - ring->meta = kzalloc_objs(struct b43_dmadesc_meta, ring->nr_slots); - if (!ring->meta) - goto err_kfree_ring; for (i = 0; i < ring->nr_slots; i++) ring->meta->skb = B43_DMA_PTR_POISON; @@ -943,8 +944,6 @@ struct b43_dmaring *b43_setup_dmaring(struct b43_wldev *dev, err_kfree_txhdr_cache: kfree(ring->txhdr_cache); err_kfree_meta: - kfree(ring->meta); - err_kfree_ring: kfree(ring); ring = NULL; goto out; @@ -1004,7 +1003,6 @@ static void b43_destroy_dmaring(struct b43_dmaring *ring, free_ringmemory(ring); kfree(ring->txhdr_cache); - kfree(ring->meta); kfree(ring); } diff --git a/drivers/net/wireless/broadcom/b43/dma.h b/drivers/net/wireless/broadcom/b43/dma.h index c2a357219d4b..f9f65bbe2d76 100644 --- a/drivers/net/wireless/broadcom/b43/dma.h +++ b/drivers/net/wireless/broadcom/b43/dma.h @@ -228,8 +228,6 @@ struct b43_dmaring { const struct b43_dma_ops *ops; /* Kernel virtual base address of the ring memory. */ void *descbase; - /* Meta data about all descriptors. */ - struct b43_dmadesc_meta *meta; /* Cache of TX headers for each TX frame. * This is to avoid an allocation on each TX. * This is NULL for an RX ring. @@ -273,6 +271,8 @@ struct b43_dmaring { /* Statistics: Total number of TX plus all retries. */ u64 nr_total_packet_tries; #endif /* CONFIG_B43_DEBUG */ + /* Meta data about all descriptors. */ + struct b43_dmadesc_meta meta[] __counted_by(nr_slots); }; static inline u32 b43_dma_read(struct b43_dmaring *ring, u16 offset) From f456e1f56c4cdda1f908f1b97b57f6d45f578f2c Mon Sep 17 00:00:00 2001 From: StanleyYP Wang Date: Thu, 12 Feb 2026 17:03:08 +0800 Subject: [PATCH 0787/1561] wifi: mt76: add external EEPROM support for mt799x chipsets For the MT7992 and MT7990 chipsets, efuse mode is not supported because there is insufficient space in the efuse to store the calibration data. Therefore, an additional on-chip EEPROM is added to address this limitation. Co-developed-by: Elwin Huang Signed-off-by: Elwin Huang Co-developed-by: Shayne Chen Signed-off-by: Shayne Chen Signed-off-by: StanleyYP Wang Link: https://patch.msgid.link/20260212090310.3335392-1-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau --- .../wireless/mediatek/mt76/mt76_connac_mcu.h | 1 + .../wireless/mediatek/mt76/mt7996/eeprom.c | 59 +++++++------ .../net/wireless/mediatek/mt76/mt7996/init.c | 3 +- .../net/wireless/mediatek/mt76/mt7996/mcu.c | 83 ++++++++++++------- .../net/wireless/mediatek/mt76/mt7996/mcu.h | 43 +++++++++- .../wireless/mediatek/mt76/mt7996/mt7996.h | 20 ++++- 6 files changed, 148 insertions(+), 61 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h index f44977f9093d..e91966cd5efe 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h @@ -1308,6 +1308,7 @@ enum { MCU_UNI_CMD_PER_STA_INFO = 0x6d, MCU_UNI_CMD_ALL_STA_INFO = 0x6e, MCU_UNI_CMD_ASSERT_DUMP = 0x6f, + MCU_UNI_CMD_EXT_EEPROM_CTRL = 0x74, MCU_UNI_CMD_RADIO_STATUS = 0x80, MCU_UNI_CMD_SDO = 0x88, }; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7996/eeprom.c index 8f60772913b4..00c72be8498f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/eeprom.c @@ -153,7 +153,7 @@ mt7996_eeprom_check_or_use_default(struct mt7996_dev *dev, bool use_default) dev_warn(dev->mt76.dev, "eeprom load fail, use default bin\n"); memcpy(eeprom, fw->data, MT7996_EEPROM_SIZE); - dev->flash_mode = true; + dev->eeprom_mode = EEPROM_MODE_DEFAULT_BIN; out: release_firmware(fw); @@ -163,26 +163,31 @@ out: static int mt7996_eeprom_load(struct mt7996_dev *dev) { + u32 eeprom_blk_size, block_num; bool use_default = false; - int ret; + int ret, i; ret = mt76_eeprom_init(&dev->mt76, MT7996_EEPROM_SIZE); if (ret < 0) return ret; if (ret && !mt7996_check_eeprom(dev)) { - dev->flash_mode = true; + dev->eeprom_mode = EEPROM_MODE_FLASH; goto out; } - if (!dev->flash_mode) { - u32 eeprom_blk_size = MT7996_EEPROM_BLOCK_SIZE; - u32 block_num = DIV_ROUND_UP(MT7996_EEPROM_SIZE, eeprom_blk_size); + memset(dev->mt76.eeprom.data, 0, MT7996_EEPROM_SIZE); + if (mt7996_has_ext_eeprom(dev)) { + /* external eeprom mode */ + dev->eeprom_mode = EEPROM_MODE_EXT; + eeprom_blk_size = MT7996_EXT_EEPROM_BLOCK_SIZE; + } else { u8 free_block_num; - int i; - memset(dev->mt76.eeprom.data, 0, MT7996_EEPROM_SIZE); - ret = mt7996_mcu_get_eeprom_free_block(dev, &free_block_num); + /* efuse mode */ + dev->eeprom_mode = EEPROM_MODE_EFUSE; + eeprom_blk_size = MT7996_EEPROM_BLOCK_SIZE; + ret = mt7996_mcu_get_efuse_free_block(dev, &free_block_num); if (ret < 0) return ret; @@ -191,27 +196,29 @@ static int mt7996_eeprom_load(struct mt7996_dev *dev) use_default = true; goto out; } + } - /* check if eeprom data from fw is valid */ - if (mt7996_mcu_get_eeprom(dev, 0, NULL, 0) || - mt7996_check_eeprom(dev)) { + /* check if eeprom data from fw is valid */ + if (mt7996_mcu_get_eeprom(dev, 0, NULL, eeprom_blk_size, + dev->eeprom_mode) || + mt7996_check_eeprom(dev)) { + use_default = true; + goto out; + } + + /* read eeprom data from fw */ + block_num = DIV_ROUND_UP(MT7996_EEPROM_SIZE, eeprom_blk_size); + for (i = 1; i < block_num; i++) { + u32 len = eeprom_blk_size; + + if (i == block_num - 1) + len = MT7996_EEPROM_SIZE % eeprom_blk_size; + ret = mt7996_mcu_get_eeprom(dev, i * eeprom_blk_size, + NULL, len, dev->eeprom_mode); + if (ret && ret != -EINVAL) { use_default = true; goto out; } - - /* read eeprom data from fw */ - for (i = 1; i < block_num; i++) { - u32 len = eeprom_blk_size; - - if (i == block_num - 1) - len = MT7996_EEPROM_SIZE % eeprom_blk_size; - ret = mt7996_mcu_get_eeprom(dev, i * eeprom_blk_size, - NULL, len); - if (ret && ret != -EINVAL) { - use_default = true; - goto out; - } - } } out: diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/init.c b/drivers/net/wireless/mediatek/mt76/mt7996/init.c index 2937e89ad0c9..1fab04909831 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/init.c @@ -1207,7 +1207,8 @@ static int mt7996_variant_fem_init(struct mt7996_dev *dev) if (ret) return ret; - ret = mt7996_mcu_get_eeprom(dev, MT7976C_EFUSE_OFFSET, buf, sizeof(buf)); + ret = mt7996_mcu_get_eeprom(dev, MT7976C_EFUSE_OFFSET, buf, sizeof(buf), + EEPROM_MODE_EFUSE); if (ret && ret != -EINVAL) return ret; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index 7741ba0aa0bd..2a149f64c667 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -3954,7 +3954,7 @@ static int mt7996_mcu_set_eeprom_flash(struct mt7996_dev *dev) #define MAX_PAGE_IDX_MASK GENMASK(7, 5) #define PAGE_IDX_MASK GENMASK(4, 2) #define PER_PAGE_SIZE 0x400 - struct mt7996_mcu_eeprom req = { + struct mt7996_mcu_eeprom_update req = { .tag = cpu_to_le16(UNI_EFUSE_BUFFER_MODE), .buffer_mode = EE_MODE_BUFFER }; @@ -3996,57 +3996,80 @@ static int mt7996_mcu_set_eeprom_flash(struct mt7996_dev *dev) int mt7996_mcu_set_eeprom(struct mt7996_dev *dev) { - struct mt7996_mcu_eeprom req = { + struct mt7996_mcu_eeprom_update req = { .tag = cpu_to_le16(UNI_EFUSE_BUFFER_MODE), .len = cpu_to_le16(sizeof(req) - 4), .buffer_mode = EE_MODE_EFUSE, .format = EE_FORMAT_WHOLE }; - if (dev->flash_mode) + if (dev->eeprom_mode != EEPROM_MODE_EFUSE) return mt7996_mcu_set_eeprom_flash(dev); return mt76_mcu_send_msg(&dev->mt76, MCU_WM_UNI_CMD(EFUSE_CTRL), &req, sizeof(req), true); } -int mt7996_mcu_get_eeprom(struct mt7996_dev *dev, u32 offset, u8 *buf, u32 buf_len) +int mt7996_mcu_get_eeprom(struct mt7996_dev *dev, u32 offset, u8 *buf, u32 buf_len, + enum mt7996_eeprom_mode mode) { - struct { - u8 _rsv[4]; - - __le16 tag; - __le16 len; - __le32 addr; - __le32 valid; - u8 data[16]; - } __packed req = { - .tag = cpu_to_le16(UNI_EFUSE_ACCESS), - .len = cpu_to_le16(sizeof(req) - 4), - .addr = cpu_to_le32(round_down(offset, - MT7996_EEPROM_BLOCK_SIZE)), + struct mt7996_mcu_eeprom_access req = { + .info.len = cpu_to_le16(sizeof(req) - 4), }; + struct mt7996_mcu_eeprom_access_event *event; struct sk_buff *skb; - bool valid; - int ret; + int ret, cmd; + u32 addr; - ret = mt76_mcu_send_and_get_msg(&dev->mt76, - MCU_WM_UNI_CMD_QUERY(EFUSE_CTRL), - &req, sizeof(req), true, &skb); + switch (mode) { + case EEPROM_MODE_EFUSE: + addr = round_down(offset, MT7996_EEPROM_BLOCK_SIZE); + cmd = MCU_WM_UNI_CMD_QUERY(EFUSE_CTRL); + req.info.tag = cpu_to_le16(UNI_EFUSE_ACCESS); + break; + case EEPROM_MODE_EXT: + addr = round_down(offset, MT7996_EXT_EEPROM_BLOCK_SIZE); + cmd = MCU_WM_UNI_CMD_QUERY(EXT_EEPROM_CTRL); + req.info.tag = cpu_to_le16(UNI_EXT_EEPROM_ACCESS); + req.eeprom.ext_eeprom.data_len = cpu_to_le32(buf_len); + break; + default: + return -EINVAL; + } + + req.info.addr = cpu_to_le32(addr); + ret = mt76_mcu_send_and_get_msg(&dev->mt76, cmd, &req, sizeof(req), + true, &skb); if (ret) return ret; - valid = le32_to_cpu(*(__le32 *)(skb->data + 16)); - if (valid) { - u32 addr = le32_to_cpu(*(__le32 *)(skb->data + 12)); + event = (struct mt7996_mcu_eeprom_access_event *)skb->data; + if (event->valid) { + u32 ret_len = le32_to_cpu(event->eeprom.ext_eeprom.data_len); + + addr = le32_to_cpu(event->addr); if (!buf) buf = (u8 *)dev->mt76.eeprom.data + addr; - if (!buf_len || buf_len > MT7996_EEPROM_BLOCK_SIZE) - buf_len = MT7996_EEPROM_BLOCK_SIZE; - skb_pull(skb, 48); - memcpy(buf, skb->data, buf_len); + switch (mode) { + case EEPROM_MODE_EFUSE: + if (!buf_len || buf_len > MT7996_EEPROM_BLOCK_SIZE) + buf_len = MT7996_EEPROM_BLOCK_SIZE; + + memcpy(buf, event->eeprom.efuse, buf_len); + break; + case EEPROM_MODE_EXT: + if (!buf_len || buf_len > MT7996_EXT_EEPROM_BLOCK_SIZE) + buf_len = MT7996_EXT_EEPROM_BLOCK_SIZE; + + memcpy(buf, event->eeprom.ext_eeprom.data, + ret_len < buf_len ? ret_len : buf_len); + break; + default: + ret = -EINVAL; + break; + } } else { ret = -EINVAL; } @@ -4056,7 +4079,7 @@ int mt7996_mcu_get_eeprom(struct mt7996_dev *dev, u32 offset, u8 *buf, u32 buf_l return ret; } -int mt7996_mcu_get_eeprom_free_block(struct mt7996_dev *dev, u8 *block_num) +int mt7996_mcu_get_efuse_free_block(struct mt7996_dev *dev, u8 *block_num) { struct { u8 _rsv[4]; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h index d9fb49f7b01b..905dafccc316 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h @@ -145,7 +145,7 @@ struct mt7996_mcu_background_chain_ctrl { u8 rsv[2]; } __packed; -struct mt7996_mcu_eeprom { +struct mt7996_mcu_eeprom_update { u8 _rsv[4]; __le16 tag; @@ -155,6 +155,43 @@ struct mt7996_mcu_eeprom { __le16 buf_len; } __packed; +union eeprom_data { + struct { + __le32 data_len; + DECLARE_FLEX_ARRAY(u8, data); + } ext_eeprom; + DECLARE_FLEX_ARRAY(u8, efuse); +} __packed; + +struct mt7996_mcu_eeprom_info { + u8 _rsv[4]; + + __le16 tag; + __le16 len; + __le32 addr; + __le32 valid; +} __packed; + +struct mt7996_mcu_eeprom_access { + struct mt7996_mcu_eeprom_info info; + union eeprom_data eeprom; +} __packed; + +struct mt7996_mcu_eeprom_access_event { + u8 _rsv[4]; + + __le16 tag; + __le16 len; + __le32 version; + __le32 addr; + __le32 valid; + __le32 size; + __le32 magic_no; + __le32 type; + __le32 rsv[4]; + union eeprom_data eeprom; +} __packed; + struct mt7996_mcu_phy_rx_info { u8 category; u8 rate; @@ -875,6 +912,10 @@ enum { UNI_EFUSE_BUFFER_RD, }; +enum { + UNI_EXT_EEPROM_ACCESS = 1, +}; + enum { UNI_VOW_DRR_CTRL, UNI_VOW_RX_AT_AIRTIME_EN = 0x0b, diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index 3ff730e36fa6..ea1f656a9334 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -85,6 +85,7 @@ #define MT7996_EEPROM_SIZE 7680 #define MT7996_EEPROM_BLOCK_SIZE 16 +#define MT7996_EXT_EEPROM_BLOCK_SIZE 1024 #define MT7996_TOKEN_SIZE 16384 #define MT7996_HW_TOKEN_SIZE 8192 @@ -169,6 +170,13 @@ enum mt7996_fem_type { MT7996_FEM_MIX, }; +enum mt7996_eeprom_mode { + EEPROM_MODE_DEFAULT_BIN, + EEPROM_MODE_EFUSE, + EEPROM_MODE_FLASH, + EEPROM_MODE_EXT, +}; + enum mt7996_txq_id { MT7996_TXQ_FWDL = 16, MT7996_TXQ_MCU_WM, @@ -441,7 +449,7 @@ struct mt7996_dev { u32 hw_pattern; - bool flash_mode:1; + u8 eeprom_mode; bool has_eht:1; struct { @@ -717,8 +725,9 @@ int mt7996_mcu_set_fixed_rate_ctrl(struct mt7996_dev *dev, int mt7996_mcu_set_fixed_field(struct mt7996_dev *dev, struct mt7996_sta *msta, void *data, u8 link_id, u32 field); int mt7996_mcu_set_eeprom(struct mt7996_dev *dev); -int mt7996_mcu_get_eeprom(struct mt7996_dev *dev, u32 offset, u8 *buf, u32 buf_len); -int mt7996_mcu_get_eeprom_free_block(struct mt7996_dev *dev, u8 *block_num); +int mt7996_mcu_get_eeprom(struct mt7996_dev *dev, u32 offset, u8 *buf, u32 buf_len, + enum mt7996_eeprom_mode mode); +int mt7996_mcu_get_efuse_free_block(struct mt7996_dev *dev, u8 *block_num); int mt7996_mcu_get_chip_config(struct mt7996_dev *dev, u32 *cap); int mt7996_mcu_set_ser(struct mt7996_dev *dev, u8 action, u8 set, u8 band); int mt7996_mcu_set_txbf(struct mt7996_dev *dev, u8 action); @@ -816,6 +825,11 @@ static inline bool mt7996_has_wa(struct mt7996_dev *dev) return !is_mt7990(&dev->mt76); } +static inline bool mt7996_has_ext_eeprom(struct mt7996_dev *dev) +{ + return !is_mt7996(&dev->mt76); +} + void mt7996_mac_init(struct mt7996_dev *dev); u32 mt7996_mac_wtbl_lmac_addr(struct mt7996_dev *dev, u16 wcid, u8 dw); bool mt7996_mac_wtbl_update(struct mt7996_dev *dev, int idx, u32 mask); From 676d5d009bc68596a88104137a0bf785b8c2562a Mon Sep 17 00:00:00 2001 From: Shayne Chen Date: Thu, 12 Feb 2026 17:03:09 +0800 Subject: [PATCH 0788/1561] wifi: mt76: mt7996: add variant for MT7992 chipsets Introduce VAR_TYPE_24 for the MT7992 chipsets, a dual-band variant supporting 3T3R/2SS on the 2 GHz band and 5T5R/4SS on the 5GHz band. Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20260212090310.3335392-2-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/eeprom.c | 5 ++++- drivers/net/wireless/mediatek/mt76/mt7996/init.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7996/mcu.c | 3 +++ drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h | 7 +++++++ 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7996/eeprom.c index 00c72be8498f..ac05f7d75d63 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/eeprom.c @@ -33,6 +33,8 @@ static char *mt7996_eeprom_name(struct mt7996_dev *dev) if (dev->var.fem == MT7996_FEM_INT) return MT7992_EEPROM_DEFAULT_23_INT; return MT7992_EEPROM_DEFAULT_23; + case MT7992_VAR_TYPE_24: + return MT7992_EEPROM_DEFAULT_24; case MT7992_VAR_TYPE_44: default: if (dev->var.fem == MT7996_FEM_INT) @@ -392,7 +394,8 @@ bool mt7996_eeprom_has_background_radar(struct mt7996_dev *dev) return false; break; case MT7992_DEVICE_ID: - if (dev->var.type == MT7992_VAR_TYPE_23) + if (dev->var.type == MT7992_VAR_TYPE_23 || + dev->var.type == MT7992_VAR_TYPE_24) return false; break; case MT7990_DEVICE_ID: { diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/init.c b/drivers/net/wireless/mediatek/mt76/mt7996/init.c index 1fab04909831..3b4f808b968c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/init.c @@ -1173,7 +1173,7 @@ static int mt7996_variant_type_init(struct mt7996_dev *dev) else if (u32_get_bits(val, MT_PAD_GPIO_ADIE_COMB_7992)) var_type = MT7992_VAR_TYPE_44; else - return -EINVAL; + var_type = MT7992_VAR_TYPE_24; break; case MT7990_DEVICE_ID: var_type = MT7990_VAR_TYPE_23; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index 2a149f64c667..81c4e0d4654b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -18,6 +18,9 @@ case MT7992_VAR_TYPE_23: \ _fw = MT7992_##name##_23; \ break; \ + case MT7992_VAR_TYPE_24: \ + _fw = MT7992_##name##_24; \ + break; \ default: \ _fw = MT7992_##name; \ } \ diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index ea1f656a9334..d36fb5396141 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -64,6 +64,11 @@ #define MT7992_FIRMWARE_DSP_23 "mediatek/mt7996/mt7992_dsp_23.bin" #define MT7992_ROM_PATCH_23 "mediatek/mt7996/mt7992_rom_patch_23.bin" +#define MT7992_FIRMWARE_WA_24 "mediatek/mt7996/mt7992_wa_24.bin" +#define MT7992_FIRMWARE_WM_24 "mediatek/mt7996/mt7992_wm_24.bin" +#define MT7992_FIRMWARE_DSP_24 "mediatek/mt7996/mt7992_dsp_24.bin" +#define MT7992_ROM_PATCH_24 "mediatek/mt7996/mt7992_rom_patch_24.bin" + #define MT7990_FIRMWARE_WA "" #define MT7990_FIRMWARE_WM "mediatek/mt7996/mt7990_wm.bin" #define MT7990_FIRMWARE_DSP "" @@ -79,6 +84,7 @@ #define MT7992_EEPROM_DEFAULT_MIX "mediatek/mt7996/mt7992_eeprom_2i5e.bin" #define MT7992_EEPROM_DEFAULT_23 "mediatek/mt7996/mt7992_eeprom_23.bin" #define MT7992_EEPROM_DEFAULT_23_INT "mediatek/mt7996/mt7992_eeprom_23_2i5i.bin" +#define MT7992_EEPROM_DEFAULT_24 "mediatek/mt7996/mt7992_eeprom_24_2i5i.bin" #define MT7990_EEPROM_DEFAULT "mediatek/mt7996/mt7990_eeprom.bin" #define MT7990_EEPROM_DEFAULT_INT "mediatek/mt7996/mt7990_eeprom_2i5i.bin" @@ -158,6 +164,7 @@ enum mt7996_var_type { enum mt7992_var_type { MT7992_VAR_TYPE_44, MT7992_VAR_TYPE_23, + MT7992_VAR_TYPE_24, }; enum mt7990_var_type { From fa1063fc649c08b37f9a21d8bc38344ce8a128f5 Mon Sep 17 00:00:00 2001 From: StanleyYP Wang Date: Thu, 12 Feb 2026 17:03:10 +0800 Subject: [PATCH 0789/1561] wifi: mt76: mt7996: apply calibration-free data from OTP Before sending the current EEPROM data to the firmware, read the calibration-free data (FT data) from the efuse and merge it with the existing EEPROM data. Co-developed-by: Shayne Chen Signed-off-by: Shayne Chen Signed-off-by: StanleyYP Wang Link: https://patch.msgid.link/20260212090310.3335392-3-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7996/mcu.c | 172 ++++++++++++++++-- .../wireless/mediatek/mt76/mt7996/mt7996.h | 5 + 2 files changed, 158 insertions(+), 19 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index 81c4e0d4654b..27713399c318 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -3952,7 +3952,153 @@ int mt7996_mcu_set_chan_info(struct mt7996_phy *phy, u16 tag) &req, sizeof(req), true); } -static int mt7996_mcu_set_eeprom_flash(struct mt7996_dev *dev) +static int +mt7996_mcu_get_cal_free_data(struct mt7996_dev *dev) +{ +#define MT_EE_7977BN_OFFSET (0x1200 - 0x500) + struct cal_free_data { + u16 adie_offs; + u16 eep_offs; + }; + static const struct cal_free_data cal_7975[] = { + { 0x5cd, 0x451 }, { 0x5cf, 0x453 }, { 0x5d1, 0x455 }, + { 0x5d3, 0x457 }, { 0x6c0, 0x44c }, { 0x6c1, 0x44d }, + { 0x6c2, 0x44e }, { 0x6c3, 0x44f }, { 0x7a1, 0xba1 }, + { 0x7a6, 0xba6 }, { 0x7a8, 0xba8 }, { 0x7aa, 0xbaa }, + }; + static const struct cal_free_data cal_7976[] = { + { 0x4c, 0x44c }, { 0x4d, 0x44d }, { 0x4e, 0x44e }, + { 0x4f, 0x44f }, { 0x50, 0x450 }, { 0x51, 0x451 }, + { 0x53, 0x453 }, { 0x55, 0x455 }, { 0x57, 0x457 }, + { 0x59, 0x459 }, { 0x70, 0x470 }, { 0x71, 0x471 }, + { 0x790, 0xb90 }, { 0x791, 0xb91 }, { 0x794, 0xb94 }, + { 0x795, 0xb95 }, { 0x7a6, 0xba6 }, { 0x7a8, 0xba8 }, + { 0x7aa, 0xbaa }, + }; + static const struct cal_free_data cal_7977[] = { + { 0x4c, 0x124c }, { 0x4d, 0x124d }, { 0x4e, 0x124e }, + { 0x4f, 0x124f }, { 0x50, 0x1250 }, { 0x51, 0x1251 }, + { 0x53, 0x1253 }, { 0x55, 0x1255 }, { 0x57, 0x1257 }, + { 0x59, 0x1259 }, { 0x69, 0x1269 }, { 0x6a, 0x126a }, + { 0x7a, 0x127a }, { 0x7b, 0x127b }, { 0x7c, 0x127c }, + { 0x7d, 0x127d }, { 0x7e, 0x127e }, + }; + static const struct cal_free_data cal_7978[] = { + { 0x91, 0xb91 }, { 0x95, 0xb95 }, { 0x100, 0x480 }, + { 0x102, 0x482 }, { 0x104, 0x484 }, { 0x106, 0x486 }, + { 0x107, 0x487 }, { 0x108, 0x488 }, { 0x109, 0x489 }, + { 0x10a, 0x48a }, { 0x10b, 0x48b }, { 0x10c, 0x48c }, + { 0x10e, 0x48e }, { 0x110, 0x490 }, + }; + static const struct cal_free_data cal_7979[] = { + { 0x4c, 0x124c }, { 0x4d, 0x124d }, { 0x4e, 0x124e }, + { 0x4f, 0x124f }, { 0x50, 0x1250 }, { 0x51, 0x1251 }, + { 0x53, 0x1253 }, { 0x55, 0x1255 }, { 0x57, 0x1257 }, + { 0x59, 0x1259 }, { 0x69, 0x1269 }, { 0x6a, 0x126a }, + { 0x7a, 0x127a }, { 0x7b, 0x127b }, { 0x7c, 0x127c }, + { 0x7e, 0x127e }, { 0x80, 0x1280 }, + }; + const struct cal_free_data *cal_arr[__MT_MAX_BAND]; + u16 cal_arr_len[__MT_MAX_BAND] = {}; + u8 *eeprom = (u8 *)dev->mt76.eeprom.data; + int band, i, ret; + +#define CAL_ARR(_band, _adie) do { \ + cal_arr[_band] = cal_##_adie; \ + cal_arr_len[_band] = ARRAY_SIZE(cal_##_adie); \ + } while (0) + + switch (mt76_chip(&dev->mt76)) { + case MT7996_DEVICE_ID: + /* adie 0 */ + if (dev->var.fem == MT7996_FEM_INT && + dev->var.type != MT7996_VAR_TYPE_233) + CAL_ARR(0, 7975); + else + CAL_ARR(0, 7976); + + /* adie 1 */ + if (dev->var.type == MT7996_VAR_TYPE_444) + CAL_ARR(1, 7977); + + /* adie 2 */ + CAL_ARR(2, 7977); + break; + case MT7992_DEVICE_ID: + /* adie 0 */ + if (dev->var.type == MT7992_VAR_TYPE_44 && + dev->var.fem != MT7996_FEM_EXT) + CAL_ARR(0, 7975); + else if (dev->var.type == MT7992_VAR_TYPE_24) + CAL_ARR(0, 7978); + else + CAL_ARR(0, 7976); + + /* adie 1 */ + if (dev->var.type == MT7992_VAR_TYPE_44 && + dev->var.fem != MT7996_FEM_INT) + CAL_ARR(1, 7977); + else if (dev->var.type != MT7992_VAR_TYPE_23) + CAL_ARR(1, 7979); + break; + case MT7990_DEVICE_ID: + /* adie 0 */ + CAL_ARR(0, 7976); + break; + default: + return -EINVAL; + } + + for (band = 0; band < __MT_MAX_BAND; band++) { + u8 buf[MT7996_EEPROM_BLOCK_SIZE]; + const struct cal_free_data *cal; + u16 prev_block_idx = -1; + u16 adie_base; + + if (!cal_arr_len[band]) + continue; + + if (band == MT_BAND0) + adie_base = MT7996_EFUSE_BASE_OFFS_ADIE0; + else if (band == MT_BAND1 && is_mt7992(&dev->mt76)) + adie_base = MT7992_EFUSE_BASE_OFFS_ADIE1; + else if (band == MT_BAND1) + adie_base = MT7996_EFUSE_BASE_OFFS_ADIE1; + else + adie_base = MT7996_EFUSE_BASE_OFFS_ADIE2; + + cal = cal_arr[band]; + for (i = 0; i < cal_arr_len[band]; i++) { + u16 adie_offset = cal[i].adie_offs + adie_base; + u16 eep_offset = cal[i].eep_offs; + u16 block_idx = adie_offset / MT7996_EEPROM_BLOCK_SIZE; + u16 offset = adie_offset % MT7996_EEPROM_BLOCK_SIZE; + + if (is_mt7996(&dev->mt76) && band == MT_BAND1 && + dev->var.type == MT7996_VAR_TYPE_444) + eep_offset -= MT_EE_7977BN_OFFSET; + + if (prev_block_idx != block_idx) { + memset(buf, 0, sizeof(buf)); + ret = mt7996_mcu_get_eeprom(dev, adie_offset, buf, + MT7996_EEPROM_BLOCK_SIZE, + EEPROM_MODE_EFUSE); + if (ret) { + if (ret != -EINVAL) + return ret; + prev_block_idx = -1; + continue; + } + } + eeprom[eep_offset] = buf[offset]; + prev_block_idx = block_idx; + } + } + + return 0; +} + +int mt7996_mcu_set_eeprom(struct mt7996_dev *dev) { #define MAX_PAGE_IDX_MASK GENMASK(7, 5) #define PAGE_IDX_MASK GENMASK(4, 2) @@ -3964,11 +4110,15 @@ static int mt7996_mcu_set_eeprom_flash(struct mt7996_dev *dev) u16 eeprom_size = MT7996_EEPROM_SIZE; u8 total = DIV_ROUND_UP(eeprom_size, PER_PAGE_SIZE); u8 *eep = (u8 *)dev->mt76.eeprom.data; - int eep_len, i; + int ret, eep_len, i; + + ret = mt7996_mcu_get_cal_free_data(dev); + if (ret) + return ret; for (i = 0; i < total; i++, eep += eep_len) { struct sk_buff *skb; - int ret, msg_len; + int msg_len; if (i == total - 1 && !!(eeprom_size % PER_PAGE_SIZE)) eep_len = eeprom_size % PER_PAGE_SIZE; @@ -3997,22 +4147,6 @@ static int mt7996_mcu_set_eeprom_flash(struct mt7996_dev *dev) return 0; } -int mt7996_mcu_set_eeprom(struct mt7996_dev *dev) -{ - struct mt7996_mcu_eeprom_update req = { - .tag = cpu_to_le16(UNI_EFUSE_BUFFER_MODE), - .len = cpu_to_le16(sizeof(req) - 4), - .buffer_mode = EE_MODE_EFUSE, - .format = EE_FORMAT_WHOLE - }; - - if (dev->eeprom_mode != EEPROM_MODE_EFUSE) - return mt7996_mcu_set_eeprom_flash(dev); - - return mt76_mcu_send_msg(&dev->mt76, MCU_WM_UNI_CMD(EFUSE_CTRL), - &req, sizeof(req), true); -} - int mt7996_mcu_get_eeprom(struct mt7996_dev *dev, u32 offset, u8 *buf, u32 buf_len, enum mt7996_eeprom_mode mode) { diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index d36fb5396141..5f574ebe81cc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -184,6 +184,11 @@ enum mt7996_eeprom_mode { EEPROM_MODE_EXT, }; +#define MT7996_EFUSE_BASE_OFFS_ADIE0 0x400 +#define MT7996_EFUSE_BASE_OFFS_ADIE1 0x1e00 +#define MT7996_EFUSE_BASE_OFFS_ADIE2 0x1200 +#define MT7992_EFUSE_BASE_OFFS_ADIE1 0x1200 + enum mt7996_txq_id { MT7996_TXQ_FWDL = 16, MT7996_TXQ_MCU_WM, From 5c81a4f182d9b48f32e2548f4a39dd76eafb5404 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 18 Feb 2026 18:39:57 -0600 Subject: [PATCH 0790/1561] wifi: mt76: connac: use is_connac2() to replace is_mt7921() checks Unify all per-chip conditionals under the new is_connac2() helper. This avoids confusion caused by the previous is_mt7921() check, which implicitly covered multiple connac2 chipsets and no longer reflected its actual scope. This is a clean-up only change with no functional impact. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260219004007.19733-1-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt76_connac.h | 2 +- .../wireless/mediatek/mt76/mt76_connac_mac.c | 16 ++++++------ .../wireless/mediatek/mt76/mt76_connac_mcu.c | 26 +++++++++---------- .../wireless/mediatek/mt76/mt76_connac_mcu.h | 2 +- .../net/wireless/mediatek/mt76/mt792x_core.c | 2 +- .../net/wireless/mediatek/mt76/mt792x_dma.c | 2 +- 6 files changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac.h index 813d61bffc2c..02bea67d37c3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac.h +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac.h @@ -187,7 +187,7 @@ static inline bool is_mt7922(struct mt76_dev *dev) return mt76_chip(dev) == 0x7922; } -static inline bool is_mt7921(struct mt76_dev *dev) +static inline bool is_connac2(struct mt76_dev *dev) { return mt76_chip(dev) == 0x7961 || is_mt7922(dev) || is_mt7920(dev); } diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c index c46691248513..c2cf6893848b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c @@ -173,7 +173,7 @@ void mt76_connac_write_hw_txp(struct mt76_dev *dev, txp->msdu_id[0] = cpu_to_le16(id | MT_MSDU_ID_VALID); - if (is_mt7663(dev) || is_mt7921(dev) || is_mt7925(dev)) + if (is_mt7663(dev) || is_connac2(dev) || is_mt7925(dev)) last_mask = MT_TXD_LEN_LAST; else last_mask = MT_TXD_LEN_AMSDU_LAST | @@ -217,7 +217,7 @@ mt76_connac_txp_skb_unmap_hw(struct mt76_dev *dev, u32 last_mask; int i; - if (is_mt7663(dev) || is_mt7921(dev) || is_mt7925(dev)) + if (is_mt7663(dev) || is_connac2(dev) || is_mt7925(dev)) last_mask = MT_TXD_LEN_LAST; else last_mask = MT_TXD_LEN_MSDU_LAST; @@ -309,7 +309,7 @@ u16 mt76_connac2_mac_tx_rate_val(struct mt76_phy *mphy, chandef = mvif->ctx ? &mvif->ctx->def : &mphy->chandef; band = chandef->chan->band; - if (is_mt7921(mphy->dev)) { + if (is_connac2(mphy->dev)) { rateidx = ffs(conf->basic_rates) - 1; goto legacy; } @@ -548,7 +548,7 @@ void mt76_connac2_mac_write_txwi(struct mt76_dev *dev, __le32 *txwi, val = MT_TXD1_LONG_FORMAT | FIELD_PREP(MT_TXD1_WLAN_IDX, wcid->idx) | FIELD_PREP(MT_TXD1_OWN_MAC, omac_idx); - if (!is_mt7921(dev)) + if (!is_connac2(dev)) val |= MT_TXD1_VTA; if (phy_idx || band_idx) val |= MT_TXD1_TGID; @@ -557,7 +557,7 @@ void mt76_connac2_mac_write_txwi(struct mt76_dev *dev, __le32 *txwi, txwi[2] = 0; val = FIELD_PREP(MT_TXD3_REM_TX_COUNT, 15); - if (!is_mt7921(dev)) + if (!is_connac2(dev)) val |= MT_TXD3_SW_POWER_MGMT; if (key) val |= MT_TXD3_PROTECT_FRAME; @@ -599,7 +599,7 @@ void mt76_connac2_mac_write_txwi(struct mt76_dev *dev, __le32 *txwi, txwi[6] |= cpu_to_le32(val); txwi[3] |= cpu_to_le32(MT_TXD3_BA_DISABLE); - if (!is_mt7921(dev)) { + if (!is_connac2(dev)) { u8 spe_idx = mt76_connac_spe_idx(mphy->antenna_mask); if (!spe_idx) @@ -831,7 +831,7 @@ mt76_connac2_mac_decode_he_mu_radiotap(struct mt76_dev *dev, struct sk_buff *skb }; struct ieee80211_radiotap_he_mu *he_mu; - if (is_mt7921(dev)) { + if (is_connac2(dev)) { mu_known.flags1 |= HE_BITS(MU_FLAGS1_SIG_B_COMP_KNOWN); mu_known.flags2 |= HE_BITS(MU_FLAGS2_PUNC_FROM_SIG_A_BW_KNOWN); } @@ -1047,7 +1047,7 @@ int mt76_connac2_mac_fill_rx_rate(struct mt76_dev *dev, stbc = FIELD_GET(MT_PRXV_HT_STBC, v0); gi = FIELD_GET(MT_PRXV_HT_SGI, v0); *mode = FIELD_GET(MT_PRXV_TX_MODE, v0); - if (is_mt7921(dev)) + if (is_connac2(dev)) dcm = !!(idx & MT_PRXV_TX_DCM); else dcm = FIELD_GET(MT_PRXV_DCM, v0); diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c index 3f583e2a1dc1..d7fbf3454bb8 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c @@ -65,7 +65,7 @@ int mt76_connac_mcu_init_download(struct mt76_dev *dev, u32 addr, u32 len, int cmd; if ((!is_connac_v1(dev) && addr == MCU_PATCH_ADDRESS) || - (is_mt7921(dev) && addr == 0x900000) || + (is_connac2(dev) && addr == 0x900000) || (is_mt7925(dev) && (addr == 0x900000 || addr == 0xe0002800)) || (is_mt799x(dev) && addr == 0x900000)) cmd = MCU_CMD(PATCH_START_REQ); @@ -402,7 +402,7 @@ void mt76_connac_mcu_sta_basic_tlv(struct mt76_dev *dev, struct sk_buff *skb, switch (vif->type) { case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_AP: - if (vif->p2p && !is_mt7921(dev)) + if (vif->p2p && !is_connac2(dev)) conn_type = CONNECTION_P2P_GC; else conn_type = CONNECTION_INFRA_STA; @@ -410,7 +410,7 @@ void mt76_connac_mcu_sta_basic_tlv(struct mt76_dev *dev, struct sk_buff *skb, basic->aid = cpu_to_le16(link_sta->sta->aid); break; case NL80211_IFTYPE_STATION: - if (vif->p2p && !is_mt7921(dev)) + if (vif->p2p && !is_connac2(dev)) conn_type = CONNECTION_P2P_GO; else conn_type = CONNECTION_INFRA_AP; @@ -874,7 +874,7 @@ void mt76_connac_mcu_sta_tlv(struct mt76_phy *mphy, struct sk_buff *skb, struct sta_rec_vht *vht; int len; - len = is_mt7921(dev) ? sizeof(*vht) : sizeof(*vht) - 4; + len = is_connac2(dev) ? sizeof(*vht) : sizeof(*vht) - 4; tlv = mt76_connac_mcu_add_tlv(skb, STA_REC_VHT, len); vht = (struct sta_rec_vht *)tlv; vht->vht_cap = cpu_to_le32(sta->deflink.vht_cap.cap); @@ -885,7 +885,7 @@ void mt76_connac_mcu_sta_tlv(struct mt76_phy *mphy, struct sk_buff *skb, /* starec uapsd */ mt76_connac_mcu_sta_uapsd(skb, vif, sta); - if (!is_mt7921(dev)) + if (!is_connac2(dev)) return; if (sta->deflink.ht_cap.ht_supported || sta->deflink.he_cap.has_he) @@ -1778,7 +1778,7 @@ int mt76_connac_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, req->ssid_type_ext = n_ssids ? BIT(0) : 0; req->ssids_num = n_ssids; - duration = is_mt7921(phy->dev) ? 0 : MT76_CONNAC_SCAN_CHANNEL_TIME; + duration = is_connac2(phy->dev) ? 0 : MT76_CONNAC_SCAN_CHANNEL_TIME; /* increase channel time for passive scan */ if (!sreq->n_ssids) duration *= 2; @@ -1821,7 +1821,7 @@ int mt76_connac_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, req->ies_len = cpu_to_le16(sreq->ie_len); } - if (is_mt7921(phy->dev)) + if (is_connac2(phy->dev)) req->scan_func |= SCAN_FUNC_SPLIT_SCAN; memcpy(req->bssid, sreq->bssid, ETH_ALEN); @@ -1897,7 +1897,7 @@ int mt76_connac_mcu_sched_scan_req(struct mt76_phy *phy, get_random_mask_addr(addr, sreq->mac_addr, sreq->mac_addr_mask); } - if (is_mt7921(phy->dev)) { + if (is_connac2(phy->dev)) { req->mt7921.bss_idx = mvif->idx; req->mt7921.delay = cpu_to_le32(sreq->delay); } @@ -2037,7 +2037,7 @@ mt76_connac_mcu_build_sku(struct mt76_dev *dev, s8 *sku, struct mt76_power_limits *limits, enum nl80211_band band) { - int max_power = is_mt7921(dev) ? 127 : 63; + int max_power = is_connac2(dev) ? 127 : 63; int i, offset = sizeof(limits->cck); memset(sku, max_power, MT_SKU_POWER_LIMIT); @@ -2065,7 +2065,7 @@ mt76_connac_mcu_build_sku(struct mt76_dev *dev, s8 *sku, offset += 12; } - if (!is_mt7921(dev)) + if (!is_connac2(dev)) return; /* he */ @@ -2121,7 +2121,7 @@ mt76_connac_mcu_rate_txpower_band(struct mt76_phy *phy, enum nl80211_band band) { struct mt76_dev *dev = phy->dev; - int sku_len, batch_len = is_mt7921(dev) ? 8 : 16; + int sku_len, batch_len = is_connac2(dev) ? 8 : 16; static const u8 chan_list_2ghz[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 @@ -2162,7 +2162,7 @@ mt76_connac_mcu_rate_txpower_band(struct mt76_phy *phy, if (!limits) return -ENOMEM; - sku_len = is_mt7921(dev) ? sizeof(sku_tlbv) : sizeof(sku_tlbv) - 92; + sku_len = is_connac2(dev) ? sizeof(sku_tlbv) : sizeof(sku_tlbv) - 92; tx_power = 2 * phy->hw->conf.power_level; if (!tx_power) tx_power = 127; @@ -3080,7 +3080,7 @@ static u32 mt76_connac2_get_data_mode(struct mt76_dev *dev, u32 info) { u32 mode = DL_MODE_NEED_RSP; - if ((!is_mt7921(dev) && !is_mt7925(dev)) || info == PATCH_SEC_NOT_SUPPORT) + if ((!is_connac2(dev) && !is_mt7925(dev)) || info == PATCH_SEC_NOT_SUPPORT) return mode; switch (FIELD_GET(PATCH_SEC_ENC_TYPE_MASK, info)) { diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h index e91966cd5efe..0809318c1ec2 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h @@ -1867,7 +1867,7 @@ mt76_connac_mcu_gen_dl_mode(struct mt76_dev *dev, u8 feature_set, bool is_wa) ret |= feature_set & FW_FEATURE_SET_ENCRYPT ? DL_MODE_ENCRYPT | DL_MODE_RESET_SEC_IV : 0; - if (is_mt7921(dev) || is_mt7925(dev)) + if (is_connac2(dev) || is_mt7925(dev)) ret |= feature_set & FW_FEATURE_ENCRY_MODE ? DL_CONFIG_ENCRY_MODE_SEL : 0; ret |= FIELD_PREP(DL_MODE_KEY_IDX, diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_core.c b/drivers/net/wireless/mediatek/mt76/mt792x_core.c index f318a53e4deb..2142fcc4ae27 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_core.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_core.c @@ -151,7 +151,7 @@ void mt792x_stop(struct ieee80211_hw *hw, bool suspend) cancel_work_sync(&dev->reset_work); mt76_connac_free_pending_tx_skbs(&dev->pm, NULL); - if (is_mt7921(&dev->mt76)) { + if (is_connac2(&dev->mt76)) { mt792x_mutex_acquire(dev); mt76_connac_mcu_set_mac_enable(&dev->mt76, 0, false, false); mt792x_mutex_release(dev); diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_dma.c b/drivers/net/wireless/mediatek/mt76/mt792x_dma.c index 1ddec7788b66..34f07bd3097d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_dma.c @@ -356,7 +356,7 @@ EXPORT_SYMBOL_GPL(mt792x_poll_rx); int mt792x_wfsys_reset(struct mt792x_dev *dev) { - u32 addr = is_mt7921(&dev->mt76) ? 0x18000140 : 0x7c000140; + u32 addr = is_connac2(&dev->mt76) ? 0x18000140 : 0x7c000140; mt76_clear(dev, addr, WFSYS_SW_RST_B); msleep(50); From 918af1f87f7de988a7e84a7a85390a7d626ee189 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 18 Feb 2026 18:39:58 -0600 Subject: [PATCH 0791/1561] wifi: mt76: mt7921: use mt76_for_each_q_rx() in reset path Replace explicit napi_disable() calls for RX queues with mt76_for_each_q_rx() in mt7921e_mac_reset(). This removes hardcoded queue indices and disables all configured RX queues during reset. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260219004007.19733-2-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c index 5ec084432ae3..0db7acb3a637 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c @@ -71,9 +71,9 @@ int mt7921e_mac_reset(struct mt792x_dev *dev) mt76_txq_schedule_all(&dev->mphy); mt76_worker_disable(&dev->mt76.tx_worker); - napi_disable(&dev->mt76.napi[MT_RXQ_MAIN]); - napi_disable(&dev->mt76.napi[MT_RXQ_MCU]); - napi_disable(&dev->mt76.napi[MT_RXQ_MCU_WA]); + mt76_for_each_q_rx(&dev->mt76, i) { + napi_disable(&dev->mt76.napi[i]); + } napi_disable(&dev->mt76.tx_napi); mt76_connac2_tx_token_put(&dev->mt76); From 222606f43b587c9fb4ae063d04db146100c8951c Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 18 Feb 2026 18:39:59 -0600 Subject: [PATCH 0792/1561] wifi: mt76: mt7921: handle MT7902 irq_map quirk with mutable copy MT7902 PCIe requires a different wm2_complete_mask value, so introduce a mutable per-device copy of the default irq_map and override the field only for this chip. Other devices continue using the shared const template. This is a prerequisite patch before enabling MT7902 PCIe support. Co-developed-by: Xiong Huang Signed-off-by: Xiong Huang Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260219004007.19733-3-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7921/pci.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c index 65c7fe671137..5f857a21f362 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c @@ -327,6 +327,20 @@ static int mt7921_pci_probe(struct pci_dev *pdev, dev->hif_ops = &mt7921_pcie_ops; dev->irq_map = &irq_map; mt76_mmio_init(&dev->mt76, regs); + + if (id->device == 0x7902) { + struct mt792x_irq_map *map; + + /* MT7902 needs a mutable copy because wm2_complete_mask differs */ + map = devm_kmemdup(&pdev->dev, &irq_map, + sizeof(irq_map), GFP_KERNEL); + if (!map) + return -ENOMEM; + + map->rx.wm2_complete_mask = 0; + dev->irq_map = map; + } + tasklet_init(&mdev->irq_tasklet, mt792x_irq_tasklet, (unsigned long)dev); dev->phy.dev = dev; From d3bb1ca22896a28860009cb83dd8af09748bccac Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 18 Feb 2026 18:40:00 -0600 Subject: [PATCH 0793/1561] wifi: mt76: mt7921: add MT7902e DMA layout support Add MT7902 PCIe specific DMA layout overrides for MCU TXQ index, RX ring size, and MCU_WA usage. Common layout remains the default for other chips. This is a prerequisite patch before enabling MT7902 PCIe support. Co-developed-by: Xiong Huang Signed-off-by: Xiong Huang Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260219004007.19733-4-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- .../wireless/mediatek/mt76/mt7921/mt7921.h | 14 +++++++ .../net/wireless/mediatek/mt76/mt7921/pci.c | 41 +++++++++++++++---- 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h index ad92af98e314..64f60c4fc60c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h @@ -17,6 +17,9 @@ #define MT7921_RX_MCU_RING_SIZE 8 #define MT7921_RX_MCU_WA_RING_SIZE 512 +/* MT7902 Rx Ring0 is for both Rx Event and Tx Done Event */ +#define MT7902_RX_MCU_RING_SIZE 512 + #define MT7921_EEPROM_SIZE 3584 #define MT7921_TOKEN_SIZE 8192 @@ -119,6 +122,17 @@ enum mt7921_rxq_id { MT7921_RXQ_MCU_WM = 0, }; +/* MT7902 assigns its MCU-WM TXQ at index 15 */ +enum mt7902_txq_id { + MT7902_TXQ_MCU_WM = 15, +}; + +struct mt7921_dma_layout { + u8 mcu_wm_txq; + u16 mcu_rxdone_ring_size; + bool has_mcu_wa; +}; + enum { MT7921_CLC_POWER, MT7921_CLC_CHAN, diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c index 5f857a21f362..6bb3c6a1cf6a 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c @@ -167,8 +167,29 @@ static u32 mt7921_rmw(struct mt76_dev *mdev, u32 offset, u32 mask, u32 val) static int mt7921_dma_init(struct mt792x_dev *dev) { + struct mt7921_dma_layout layout = { + /* General case: MT7921 / MT7922 /MT7920 */ + .mcu_wm_txq = MT7921_TXQ_MCU_WM, + .mcu_rxdone_ring_size = MT7921_RX_MCU_RING_SIZE, + .has_mcu_wa = true, + }; + bool is_mt7902; int ret; + is_mt7902 = mt7921_l1_rr(dev, MT_HW_CHIPID) == 0x7902; + + /* + * MT7902 special case: + * - MCU-WM TXQ uses index 15 + * - RX Ring0 is larger and shared for event/TX-done + * - MT7902 does not use the MCU_WA ring + */ + if (is_mt7902) { + layout.mcu_wm_txq = MT7902_TXQ_MCU_WM; + layout.mcu_rxdone_ring_size = MT7902_RX_MCU_RING_SIZE; + layout.has_mcu_wa = false; + } + mt76_dma_attach(&dev->mt76); ret = mt792x_dma_disable(dev, true); @@ -185,7 +206,7 @@ static int mt7921_dma_init(struct mt792x_dev *dev) mt76_wr(dev, MT_WFDMA0_TX_RING0_EXT_CTRL, 0x4); /* command to WM */ - ret = mt76_init_mcu_queue(&dev->mt76, MT_MCUQ_WM, MT7921_TXQ_MCU_WM, + ret = mt76_init_mcu_queue(&dev->mt76, MT_MCUQ_WM, layout.mcu_wm_txq, MT7921_TX_MCU_RING_SIZE, MT_TX_RING_BASE); if (ret) return ret; @@ -199,18 +220,20 @@ static int mt7921_dma_init(struct mt792x_dev *dev) /* event from WM before firmware download */ ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MCU], MT7921_RXQ_MCU_WM, - MT7921_RX_MCU_RING_SIZE, + layout.mcu_rxdone_ring_size, MT_RX_BUF_SIZE, MT_RX_EVENT_RING_BASE); if (ret) return ret; - /* Change mcu queue after firmware download */ - ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MCU_WA], - MT7921_RXQ_MCU_WM, - MT7921_RX_MCU_WA_RING_SIZE, - MT_RX_BUF_SIZE, MT_WFDMA0(0x540)); - if (ret) - return ret; + if (layout.has_mcu_wa) { + /* Change mcu queue after firmware download */ + ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MCU_WA], + MT7921_RXQ_MCU_WM, + MT7921_RX_MCU_WA_RING_SIZE, + MT_RX_BUF_SIZE, MT_WFDMA0(0x540)); + if (ret) + return ret; + } /* rx data */ ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MAIN], From 0a7d2fca06afb036ff2d61540fc68e6e48eb9fbe Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 18 Feb 2026 18:40:01 -0600 Subject: [PATCH 0794/1561] wifi: mt76: connac: mark MT7902 as hw txp devices Add MT7902 to is_mt76_fw_txp() so it follows the legacy TX descriptor path like the other connac2 chips that return false. This is a prerequisite patch before enabling MT7902 pcie support. Co-developed-by: Xiong Huang Signed-off-by: Xiong Huang Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260219004007.19733-5-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76_connac.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac.h index 02bea67d37c3..d868bb7c7ab8 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac.h +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac.h @@ -271,6 +271,7 @@ static inline bool is_mt76_fw_txp(struct mt76_dev *dev) case 0x7961: case 0x7920: case 0x7922: + case 0x7902: case 0x7925: case 0x7663: case 0x7622: From 14a7ba034fcd9d1766503ef44cc491c6fda8db2c Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 18 Feb 2026 18:40:02 -0600 Subject: [PATCH 0795/1561] wifi: mt76: mt792x: add PSE handling barrier for the large MCU cmd Add a dummy register read in mt76_connac_mcu_rate_txpower_band() to act as a PSE barrier. This would release PSE pages and prevents buffer underflow issues when handling MCU commands with larger payloads without the response in mt76_connac_mcu_set_rate_txpower(). This is a prerequisite patch before enabling MT7902 PCIe and SDIO support. Co-developed-by: Xiong Huang Signed-off-by: Xiong Huang Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260219004007.19733-6-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c | 4 ++++ drivers/net/wireless/mediatek/mt76/mt792x_regs.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c index d7fbf3454bb8..89bd52ea8bf7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c @@ -4,6 +4,7 @@ #include #include "mt76_connac2_mac.h" #include "mt76_connac_mcu.h" +#include "mt792x_regs.h" int mt76_connac_mcu_start_firmware(struct mt76_dev *dev, u32 addr, u32 option) { @@ -2246,6 +2247,9 @@ mt76_connac_mcu_rate_txpower_band(struct mt76_phy *phy, false); if (err < 0) goto out; + + /* read a CR to avoid PSE buffer underflow */ + mt76_connac_mcu_reg_rr(dev, MT_PSE_BASE); } out: diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_regs.h b/drivers/net/wireless/mediatek/mt76/mt792x_regs.h index acf627aed609..7ddde9286861 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_regs.h +++ b/drivers/net/wireless/mediatek/mt76/mt792x_regs.h @@ -25,6 +25,8 @@ #define MT_PLE_AC_QEMPTY(_n) MT_PLE(0x500 + 0x40 * (_n)) #define MT_PLE_AMSDU_PACK_MSDU_CNT(n) MT_PLE(0x10e0 + ((n) << 2)) +#define MT_PSE_BASE 0x820c8000 + /* TMAC: band 0(0x21000), band 1(0xa1000) */ #define MT_WF_TMAC_BASE(_band) ((_band) ? 0x820f4000 : 0x820e4000) #define MT_WF_TMAC(_band, ofs) (MT_WF_TMAC_BASE(_band) + (ofs)) From 9eef868b86db32d40e4a45f407af8aa1e4e4e830 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 18 Feb 2026 18:40:03 -0600 Subject: [PATCH 0796/1561] wifi: mt76: mt792x: ensure MCU ready before ROM patch download Restart the MCU and poll FW state to ensure correct MCU status before downloading the ROM patch. This is a prerequisite for enabling MT7902 PCIe and has been validated on MT7921 and MT7925 since they share the common code path. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260219004007.19733-7-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt792x_core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_core.c b/drivers/net/wireless/mediatek/mt76/mt792x_core.c index 2142fcc4ae27..152cfcca2f90 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_core.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_core.c @@ -926,6 +926,13 @@ int mt792x_load_firmware(struct mt792x_dev *dev) { int ret; + mt76_connac_mcu_restart(&dev->mt76); + + if (!mt76_poll_msec(dev, MT_CONN_ON_MISC, MT_TOP_MISC_FW_STATE, + MT_TOP_MISC2_FW_PWR_ON, 1000)) + dev_warn(dev->mt76.dev, + "MCU is not ready for firmware download\n"); + ret = mt76_connac2_load_patch(&dev->mt76, mt792x_patch_name(dev)); if (ret) return ret; From 6d32fb25768946cf2bc8eef9ba77acf9406867ef Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 18 Feb 2026 18:40:04 -0600 Subject: [PATCH 0797/1561] wifi: mt76: mt7921: add MT7902 MCU support Add MCU support for the MT7902 chipset. runtime pm is not yet supported by the driver, but normal mac80211 operation is unaffected. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260219004007.19733-8-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76_connac.h | 8 +++++++- drivers/net/wireless/mediatek/mt76/mt7921/init.c | 4 +++- drivers/net/wireless/mediatek/mt76/mt792x.h | 6 ++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac.h index d868bb7c7ab8..51423c7740bd 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac.h +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac.h @@ -182,6 +182,11 @@ static inline bool is_mt7920(struct mt76_dev *dev) return mt76_chip(dev) == 0x7920; } +static inline bool is_mt7902(struct mt76_dev *dev) +{ + return mt76_chip(dev) == 0x7902; +} + static inline bool is_mt7922(struct mt76_dev *dev) { return mt76_chip(dev) == 0x7922; @@ -189,7 +194,8 @@ static inline bool is_mt7922(struct mt76_dev *dev) static inline bool is_connac2(struct mt76_dev *dev) { - return mt76_chip(dev) == 0x7961 || is_mt7922(dev) || is_mt7920(dev); + return mt76_chip(dev) == 0x7961 || is_mt7922(dev) || is_mt7920(dev) || + is_mt7902(dev); } static inline bool is_mt7663(struct mt76_dev *dev) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c index 29732315af1c..8e7790702191 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c @@ -302,7 +302,9 @@ int mt7921_register_device(struct mt792x_dev *dev) dev->pm.idle_timeout = MT792x_PM_TIMEOUT; dev->pm.stats.last_wake_event = jiffies; dev->pm.stats.last_doze_event = jiffies; - if (!mt76_is_usb(&dev->mt76)) { + + if (!mt76_is_usb(&dev->mt76) && + !is_mt7902(&dev->mt76)) { dev->pm.enable_user = true; dev->pm.enable = true; dev->pm.ds_enable_user = true; diff --git a/drivers/net/wireless/mediatek/mt76/mt792x.h b/drivers/net/wireless/mediatek/mt76/mt792x.h index 8388638ed550..1f381ab356bc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x.h +++ b/drivers/net/wireless/mediatek/mt76/mt792x.h @@ -41,11 +41,13 @@ #define MT792x_MCU_INIT_RETRY_COUNT 10 #define MT792x_WFSYS_INIT_RETRY_COUNT 2 +#define MT7902_FIRMWARE_WM "mediatek/WIFI_RAM_CODE_MT7902_1.bin" #define MT7920_FIRMWARE_WM "mediatek/WIFI_RAM_CODE_MT7961_1a.bin" #define MT7921_FIRMWARE_WM "mediatek/WIFI_RAM_CODE_MT7961_1.bin" #define MT7922_FIRMWARE_WM "mediatek/WIFI_RAM_CODE_MT7922_1.bin" #define MT7925_FIRMWARE_WM "mediatek/mt7925/WIFI_RAM_CODE_MT7925_1_1.bin" +#define MT7902_ROM_PATCH "mediatek/WIFI_MT7902_patch_mcu_1_1_hdr.bin" #define MT7920_ROM_PATCH "mediatek/WIFI_MT7961_patch_mcu_1a_2_hdr.bin" #define MT7921_ROM_PATCH "mediatek/WIFI_MT7961_patch_mcu_1_2_hdr.bin" #define MT7922_ROM_PATCH "mediatek/WIFI_MT7922_patch_mcu_1_1_hdr.bin" @@ -448,6 +450,8 @@ void mt792x_config_mac_addr_list(struct mt792x_dev *dev); static inline char *mt792x_ram_name(struct mt792x_dev *dev) { switch (mt76_chip(&dev->mt76)) { + case 0x7902: + return MT7902_FIRMWARE_WM; case 0x7920: return MT7920_FIRMWARE_WM; case 0x7922: @@ -462,6 +466,8 @@ static inline char *mt792x_ram_name(struct mt792x_dev *dev) static inline char *mt792x_patch_name(struct mt792x_dev *dev) { switch (mt76_chip(&dev->mt76)) { + case 0x7902: + return MT7902_ROM_PATCH; case 0x7920: return MT7920_ROM_PATCH; case 0x7922: From 199443b4015238f46c8a578b84c3834a48246355 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 18 Feb 2026 18:40:05 -0600 Subject: [PATCH 0798/1561] wifi: mt76: mt792x: add MT7902 WFDMA prefetch configuration Configure the RX/TX ring prefetch setting for MT7902 PCIe device. This is a prerequisite patch before enabling MT7902 PCIe support. Co-developed-by: Xiong Huang Signed-off-by: Xiong Huang Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260219004007.19733-9-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt792x_dma.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_dma.c b/drivers/net/wireless/mediatek/mt76/mt792x_dma.c index 34f07bd3097d..002aece857b2 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_dma.c @@ -103,6 +103,22 @@ static void mt792x_dma_prefetch(struct mt792x_dev *dev) mt76_wr(dev, MT_WFDMA0_TX_RING3_EXT_CTRL, PREFETCH(0x0400, 0x10)); mt76_wr(dev, MT_WFDMA0_TX_RING15_EXT_CTRL, PREFETCH(0x0500, 0x4)); mt76_wr(dev, MT_WFDMA0_TX_RING16_EXT_CTRL, PREFETCH(0x0540, 0x4)); + } else if (is_mt7902(&dev->mt76)) { + /* rx ring */ + mt76_wr(dev, MT_WFDMA0_RX_RING0_EXT_CTRL, PREFETCH(0x0000, 0x4)); + mt76_wr(dev, MT_WFDMA0_RX_RING1_EXT_CTRL, PREFETCH(0x0040, 0x4)); + mt76_wr(dev, MT_WFDMA0_RX_RING2_EXT_CTRL, PREFETCH(0x0080, 0x4)); + mt76_wr(dev, MT_WFDMA0_RX_RING3_EXT_CTRL, PREFETCH(0x00c0, 0x4)); + /* tx ring */ + mt76_wr(dev, MT_WFDMA0_TX_RING0_EXT_CTRL, PREFETCH(0x0100, 0x4)); + mt76_wr(dev, MT_WFDMA0_TX_RING1_EXT_CTRL, PREFETCH(0x0140, 0x4)); + mt76_wr(dev, MT_WFDMA0_TX_RING2_EXT_CTRL, PREFETCH(0x0180, 0x4)); + mt76_wr(dev, MT_WFDMA0_TX_RING3_EXT_CTRL, PREFETCH(0x01c0, 0x4)); + mt76_wr(dev, MT_WFDMA0_TX_RING4_EXT_CTRL, PREFETCH(0x0200, 0x4)); + mt76_wr(dev, MT_WFDMA0_TX_RING5_EXT_CTRL, PREFETCH(0x0240, 0x4)); + mt76_wr(dev, MT_WFDMA0_TX_RING6_EXT_CTRL, PREFETCH(0x0280, 0x4)); + mt76_wr(dev, MT_WFDMA0_TX_RING15_EXT_CTRL, PREFETCH(0x02c0, 0x4)); + mt76_wr(dev, MT_WFDMA0_TX_RING16_EXT_CTRL, PREFETCH(0x0300, 0x4)); } else { /* rx ring */ mt76_wr(dev, MT_WFDMA0_RX_RING0_EXT_CTRL, PREFETCH(0x0, 0x4)); From c26319afb5fb403a0bb2a604cee95a5bab8bbf18 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 18 Feb 2026 18:40:06 -0600 Subject: [PATCH 0799/1561] wifi: mt76: mt7921: add MT7902 PCIe device support Register the MT7902 PCI device ID in the mt7921 driver and add its corresponding firmware and ROM patch names. Co-developed-by: Xiong Huang Signed-off-by: Xiong Huang Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260219004007.19733-10-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7921/pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c index 6bb3c6a1cf6a..7a790ddf43bb 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c @@ -26,6 +26,8 @@ static const struct pci_device_id mt7921_pci_device_table[] = { .driver_data = (kernel_ulong_t)MT7922_FIRMWARE_WM }, { PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x7920), .driver_data = (kernel_ulong_t)MT7920_FIRMWARE_WM }, + { PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x7902), + .driver_data = (kernel_ulong_t)MT7902_FIRMWARE_WM }, { }, }; @@ -617,6 +619,8 @@ MODULE_FIRMWARE(MT7921_FIRMWARE_WM); MODULE_FIRMWARE(MT7921_ROM_PATCH); MODULE_FIRMWARE(MT7922_FIRMWARE_WM); MODULE_FIRMWARE(MT7922_ROM_PATCH); +MODULE_FIRMWARE(MT7902_FIRMWARE_WM); +MODULE_FIRMWARE(MT7902_ROM_PATCH); MODULE_AUTHOR("Sean Wang "); MODULE_AUTHOR("Lorenzo Bianconi "); MODULE_DESCRIPTION("MediaTek MT7921E (PCIe) wireless driver"); From 02b7a65719a00b5edea5b60b00ff85440a3f5d38 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 18 Feb 2026 18:40:07 -0600 Subject: [PATCH 0800/1561] wifi: mt76: mt7921: add MT7902 SDIO device support Register the MT7902 SDIO device ID in the mt7921 driver and add its corresponding firmware and ROM patch names. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260219004007.19733-11-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7921/sdio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/sdio.c b/drivers/net/wireless/mediatek/mt76/mt7921/sdio.c index 3421e53dc948..9150f185716c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/sdio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/sdio.c @@ -19,6 +19,8 @@ static const struct sdio_device_id mt7921s_table[] = { { SDIO_DEVICE(SDIO_VENDOR_ID_MEDIATEK, 0x7901), .driver_data = (kernel_ulong_t)MT7921_FIRMWARE_WM }, + { SDIO_DEVICE(SDIO_VENDOR_ID_MEDIATEK, 0x7902), + .driver_data = (kernel_ulong_t)MT7902_FIRMWARE_WM }, { } /* Terminating entry */ }; @@ -317,6 +319,8 @@ failed: MODULE_DEVICE_TABLE(sdio, mt7921s_table); MODULE_FIRMWARE(MT7921_FIRMWARE_WM); MODULE_FIRMWARE(MT7921_ROM_PATCH); +MODULE_FIRMWARE(MT7902_FIRMWARE_WM); +MODULE_FIRMWARE(MT7902_ROM_PATCH); static DEFINE_SIMPLE_DEV_PM_OPS(mt7921s_pm_ops, mt7921s_suspend, mt7921s_resume); From 97b9f9831bf297f3ffa62018721601ed2736f2c3 Mon Sep 17 00:00:00 2001 From: Shayne Chen Date: Tue, 3 Feb 2026 23:55:29 +0800 Subject: [PATCH 0801/1561] wifi: mt76: mt7996: fix wrong DMAD length when using MAC TXP The struct mt76_connac_fw_txp is used for HIF TXP. Change to use the struct mt76_connac_hw_txp to fix the wrong DMAD length for MAC TXP. Fixes: cb6ebbdffef2 ("wifi: mt76: mt7996: support writing MAC TXD for AddBA Request") Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20260203155532.1098290-1-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/mac.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index c6028fabd7d1..208ed20d0bf9 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -1100,10 +1100,10 @@ int mt7996_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, * req */ if (le32_to_cpu(ptr[7]) & MT_TXD7_MAC_TXD) { - u32 val; + u32 val, mac_txp_size = sizeof(struct mt76_connac_hw_txp); ptr = (__le32 *)(txwi + MT_TXD_SIZE); - memset((void *)ptr, 0, sizeof(struct mt76_connac_fw_txp)); + memset((void *)ptr, 0, mac_txp_size); val = FIELD_PREP(MT_TXP0_TOKEN_ID0, id) | MT_TXP0_TOKEN_ID0_VALID_MASK; @@ -1122,6 +1122,8 @@ int mt7996_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, tx_info->buf[1].addr >> 32); #endif ptr[3] = cpu_to_le32(val); + + tx_info->buf[0].len = MT_TXD_SIZE + mac_txp_size; } else { struct mt76_connac_txp_common *txp; From efbd5bf395f4e6b45a87f3835d4c2e28170c77c5 Mon Sep 17 00:00:00 2001 From: StanleyYP Wang Date: Tue, 3 Feb 2026 23:55:30 +0800 Subject: [PATCH 0802/1561] wifi: mt76: mt7996: fix struct mt7996_mcu_uni_event The cid field is defined as a two-byte value in the firmware. Fixes: 98686cd21624 ("wifi: mt76: mt7996: add driver for MediaTek Wi-Fi 7 (802.11be) devices") Signed-off-by: StanleyYP Wang Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20260203155532.1098290-2-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/mcu.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7996/mcu.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index 27713399c318..13182a69eec9 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -245,7 +245,7 @@ mt7996_mcu_parse_response(struct mt76_dev *mdev, int cmd, event = (struct mt7996_mcu_uni_event *)skb->data; ret = le32_to_cpu(event->status); /* skip invalid event */ - if (mcu_cmd != event->cid) + if (mcu_cmd != le16_to_cpu(event->cid)) ret = -EAGAIN; } else { skb_pull(skb, sizeof(struct mt7996_mcu_rxd)); diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h index 905dafccc316..39df13679779 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h @@ -25,8 +25,8 @@ struct mt7996_mcu_rxd { }; struct mt7996_mcu_uni_event { - u8 cid; - u8 __rsv[3]; + __le16 cid; + u8 __rsv[2]; __le32 status; /* 0: success, others: fail */ } __packed; From 169c83d3df95b57e787174454332e01eb1b823ed Mon Sep 17 00:00:00 2001 From: StanleyYP Wang Date: Tue, 3 Feb 2026 23:55:31 +0800 Subject: [PATCH 0803/1561] wifi: mt76: avoid to set ACK for MCU command if wait_resp is not set When wait_resp is not set but the ACK option is enabled in the MCU TXD, the ACK event is enqueued to the MCU event queue without being dequeued by the original MCU command request. Any orphaned ACK events will only be removed from the queue when another MCU command requests a response. Due to sequence index mismatches, these events are discarded one by one until a matching sequence index is found. However, if several MCU commands that do not require a response continue to fill up the event queue, there is a risk that when an MCU command with wait_resp enabled is issued, it may dequeue the wrong event skb, especially if the queue contains events with all possible sequence indices. Signed-off-by: StanleyYP Wang Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20260203155532.1098290-3-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mcu.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7996/mcu.c | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mcu.c b/drivers/net/wireless/mediatek/mt76/mcu.c index 535c3d8a9cc0..cbfb3bbec503 100644 --- a/drivers/net/wireless/mediatek/mt76/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mcu.c @@ -98,7 +98,7 @@ retry: /* orig skb might be needed for retry, mcu_skb_send_msg consumes it */ if (orig_skb) skb_get(orig_skb); - ret = dev->mcu_ops->mcu_skb_send_msg(dev, skb, cmd, &seq); + ret = dev->mcu_ops->mcu_skb_send_msg(dev, skb, cmd, wait_resp ? &seq : NULL); if (ret < 0) goto out; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index 13182a69eec9..ee0fb3c45ca2 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -325,13 +325,12 @@ mt7996_mcu_send_message(struct mt76_dev *mdev, struct sk_buff *skb, uni_txd->pkt_type = MCU_PKT_ID; uni_txd->seq = seq; - if (cmd & __MCU_CMD_FIELD_QUERY) - uni_txd->option = MCU_CMD_UNI_QUERY_ACK; - else - uni_txd->option = MCU_CMD_UNI_EXT_ACK; + uni_txd->option = MCU_CMD_UNI; + if (!(cmd & __MCU_CMD_FIELD_QUERY)) + uni_txd->option |= MCU_CMD_SET; - if (mcu_cmd == MCU_UNI_CMD_SDO) - uni_txd->option &= ~MCU_CMD_ACK; + if (wait_seq) + uni_txd->option |= MCU_CMD_ACK; if ((cmd & __MCU_CMD_FIELD_WA) && (cmd & __MCU_CMD_FIELD_WM)) uni_txd->s2d_index = MCU_S2D_H2CN; From 1f9017d19db38ad2cb9bedb5b078f6f4f60afa94 Mon Sep 17 00:00:00 2001 From: StanleyYP Wang Date: Tue, 3 Feb 2026 23:55:32 +0800 Subject: [PATCH 0804/1561] wifi: mt76: mt7996: fix queue pause after scan due to wrong channel switch reason Previously, we used the IEEE80211_CONF_IDLE flag to avoid setting the parking channel with the CH_SWITCH_NORMAL reason, which could trigger TX emission before bootup CAC. However, we found that this flag can be set after triggering scanning on a connected station interface, and the reason CH_SWITCH_SCAN_BYPASS_DPD will be used when switching back to the operating channel, which makes the firmware failed to resume paused AC queues. Seems that we should avoid relying on this flag after switching to single multi-radio architecture. Instead, use the existence of chanctx as the condition. Signed-off-by: StanleyYP Wang Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20260203155532.1098290-4-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/mcu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index ee0fb3c45ca2..c632abe54707 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -3929,8 +3929,7 @@ int mt7996_mcu_set_chan_info(struct mt7996_phy *phy, u16 tag) if (phy->mt76->hw->conf.flags & IEEE80211_CONF_MONITOR) req.switch_reason = CH_SWITCH_NORMAL; - else if (phy->mt76->offchannel || - phy->mt76->hw->conf.flags & IEEE80211_CONF_IDLE) + else if (phy->mt76->offchannel || !phy->mt76->chanctx) req.switch_reason = CH_SWITCH_SCAN_BYPASS_DPD; else if (!cfg80211_reg_can_beacon(phy->mt76->hw->wiphy, chandef, NL80211_IFTYPE_AP)) From 964f870e090e9c88a41e2890333421204cc0bdf4 Mon Sep 17 00:00:00 2001 From: David Bauer Date: Fri, 30 Jan 2026 00:23:20 +0100 Subject: [PATCH 0805/1561] wifi: mt76: don't return TXQ when exceeding max non-AQL packets mt76_txq_send_burst does check if the number of non-AQL frames exceeds the maximum. In this case the queue is returned to ieee80211_return_txq when iterating over the scheduled TXQs in mt76_txq_schedule_list. This has the effect of inserting said TXQ at the head of the list. This means the loop will get the same TXQ again, which will terminate the scheduling round. TXQs following in the list thus never get scheduled for transmission. This can manifest in high latency low throughput or broken connections for said STAs. Check if the non-AQL packet count exceeds the limit and not return the TXQ in this case. Schedule all TXQs for the STA in case the non-AQL limit can be satisfied again. Signed-off-by: David Bauer Link: https://patch.msgid.link/20260129232321.276575-1-mail@david-bauer.net Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/tx.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index 9ec6d0b53a84..0753acf2eccb 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -227,7 +227,9 @@ mt76_tx_check_non_aql(struct mt76_dev *dev, struct mt76_wcid *wcid, struct sk_buff *skb) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_sta *sta; int pending; + int i; if (!wcid || info->tx_time_est) return; @@ -235,6 +237,17 @@ mt76_tx_check_non_aql(struct mt76_dev *dev, struct mt76_wcid *wcid, pending = atomic_dec_return(&wcid->non_aql_packets); if (pending < 0) atomic_cmpxchg(&wcid->non_aql_packets, pending, 0); + + sta = wcid_to_sta(wcid); + if (!sta || pending != MT_MAX_NON_AQL_PKT - 1) + return; + + for (i = 0; i < ARRAY_SIZE(sta->txq); i++) { + if (!sta->txq[i]) + continue; + + ieee80211_schedule_txq(dev->hw, sta->txq[i]); + } } void __mt76_tx_complete_skb(struct mt76_dev *dev, u16 wcid_idx, struct sk_buff *skb, @@ -542,6 +555,9 @@ mt76_txq_schedule_list(struct mt76_phy *phy, enum mt76_txq_id qid) if (!wcid || test_bit(MT_WCID_FLAG_PS, &wcid->flags)) continue; + if (atomic_read(&wcid->non_aql_packets) >= MT_MAX_NON_AQL_PKT) + continue; + phy = mt76_dev_phy(dev, wcid->phy_idx); if (test_bit(MT76_RESET, &phy->state) || phy->offchannel) continue; From 1146d0946b5358fad24812bd39d68f31cd40cc34 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Fri, 30 Jan 2026 22:57:59 +0800 Subject: [PATCH 0806/1561] wifi: mt76: mt7915: fix use-after-free bugs in mt7915_mac_dump_work() When the mt7915 pci chip is detaching, the mt7915_crash_data is released in mt7915_coredump_unregister(). However, the work item dump_work may still be running or pending, leading to UAF bugs when the already freed crash_data is dereferenced again in mt7915_mac_dump_work(). The race condition can occur as follows: CPU 0 (removal path) | CPU 1 (workqueue) mt7915_pci_remove() | mt7915_sys_recovery_set() mt7915_unregister_device() | mt7915_reset() mt7915_coredump_unregister() | queue_work() vfree(dev->coredump.crash_data) | mt7915_mac_dump_work() | crash_data-> // UAF Fix this by ensuring dump_work is properly canceled before the crash_data is deallocated. Add cancel_work_sync() in mt7915_unregister_device() to synchronize with any pending or executing dump work. Fixes: 4dbcb9125cc3 ("wifi: mt76: mt7915: enable coredump support") Signed-off-by: Duoming Zhou Link: https://patch.msgid.link/20260130145759.84272-1-duoming@zju.edu.cn Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7915/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c index 22443cbc74ad..250c2d2479b0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c @@ -1294,6 +1294,7 @@ free_phy2: void mt7915_unregister_device(struct mt7915_dev *dev) { + cancel_work_sync(&dev->dump_work); mt7915_unregister_ext_phy(dev); mt7915_coredump_unregister(dev); mt7915_unregister_thermal(&dev->phy); From c8f62f73bbced3a79894655bdb0b625462d956fc Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Sat, 31 Jan 2026 10:47:31 +0800 Subject: [PATCH 0807/1561] wifi: mt76: mt7996: fix use-after-free bugs in mt7996_mac_dump_work() When the mt7996 pci chip is detaching, the mt7996_crash_data is released in mt7996_coredump_unregister(). However, the work item dump_work may still be running or pending, leading to UAF bugs when the already freed crash_data is dereferenced again in mt7996_mac_dump_work(). The race condition can occur as follows: CPU 0 (removal path) | CPU 1 (workqueue) mt7996_pci_remove() | mt7996_sys_recovery_set() mt7996_unregister_device() | mt7996_reset() mt7996_coredump_unregister() | queue_work() vfree(dev->coredump.crash_data) | mt7996_mac_dump_work() | crash_data-> // UAF Fix this by ensuring dump_work is properly canceled before the crash_data is deallocated. Add cancel_work_sync() in mt7996_unregister_device() to synchronize with any pending or executing dump work. Fixes: 878161d5d4a4 ("wifi: mt76: mt7996: enable coredump support") Signed-off-by: Duoming Zhou Link: https://patch.msgid.link/20260131024731.18741-1-duoming@zju.edu.cn Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/init.c b/drivers/net/wireless/mediatek/mt76/mt7996/init.c index 3b4f808b968c..29bce7cffbb5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/init.c @@ -1773,6 +1773,7 @@ error: void mt7996_unregister_device(struct mt7996_dev *dev) { + cancel_work_sync(&dev->dump_work); cancel_work_sync(&dev->wed_rro.work); mt7996_unregister_phy(mt7996_phy3(dev)); mt7996_unregister_phy(mt7996_phy2(dev)); From c9ce833d7891804f618c3c8349d9c96e4fe62774 Mon Sep 17 00:00:00 2001 From: MeiChia Chiu Date: Tue, 3 Feb 2026 09:32:02 +0100 Subject: [PATCH 0808/1561] wifi: mt76: mt7996: Add eMLSR support Implement set_eml_op_mode mac80211 callback in order to introduce eMLSR support. Tested-by: Christian Marangi Signed-off-by: MeiChia Chiu Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260203-mt7996-emlsr-v1-1-38ffb3d5110c@kernel.org Signed-off-by: Felix Fietkau --- .../wireless/mediatek/mt76/mt76_connac_mcu.h | 9 +++ .../net/wireless/mediatek/mt76/mt7996/main.c | 16 ++++++ .../net/wireless/mediatek/mt76/mt7996/mcu.c | 55 +++++++++++++++++++ .../wireless/mediatek/mt76/mt7996/mt7996.h | 4 ++ 4 files changed, 84 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h index 0809318c1ec2..fd9cf9c0c32f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h @@ -628,6 +628,13 @@ struct sta_rec_tx_proc { __le32 flag; } __packed; +struct sta_rec_eml_op { + __le16 tag; + __le16 len; + u8 link_bitmap; + u8 link_ant_num[3]; +} __packed; + /* wtbl_rec */ struct wtbl_req_hdr { @@ -796,6 +803,7 @@ struct wtbl_raw { sizeof(struct sta_rec_he_6g_capa) + \ sizeof(struct sta_rec_pn_info) + \ sizeof(struct sta_rec_tx_proc) + \ + sizeof(struct sta_rec_eml_op) + \ sizeof(struct tlv) + \ MT76_CONNAC_WTBL_UPDATE_MAX_SIZE) @@ -832,6 +840,7 @@ enum { STA_REC_PN_INFO = 0x26, STA_REC_KEY_V3 = 0x27, STA_REC_HDRT = 0x28, + STA_REC_EML_OP = 0x29, STA_REC_HDR_TRANS = 0x2B, STA_REC_MAX_NUM }; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index c0fae7aec1ae..c6d14f09fd10 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -2366,6 +2366,21 @@ mt7996_reconfig_complete(struct ieee80211_hw *hw, MT7996_WATCHDOG_TIME); } +static int +mt7996_set_eml_op_mode(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct ieee80211_eml_params *eml_params) +{ + struct mt7996_dev *dev = mt7996_hw_dev(hw); + int ret; + + mutex_lock(&dev->mt76.mutex); + ret = mt7996_mcu_set_emlsr_mode(dev, vif, sta, eml_params); + mutex_unlock(&dev->mt76.mutex); + + return ret; +} + const struct ieee80211_ops mt7996_ops = { .add_chanctx = mt76_add_chanctx, .remove_chanctx = mt76_remove_chanctx, @@ -2429,4 +2444,5 @@ const struct ieee80211_ops mt7996_ops = { .change_vif_links = mt7996_change_vif_links, .change_sta_links = mt7996_mac_sta_change_links, .reconfig_complete = mt7996_reconfig_complete, + .set_eml_op_mode = mt7996_set_eml_op_mode, }; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index c632abe54707..8e06f7fe479c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -1304,6 +1304,61 @@ int mt7996_mcu_set_protection(struct mt7996_phy *phy, struct mt7996_vif_link *li MCU_WM_UNI_CMD(BSS_INFO_UPDATE), true); } +int mt7996_mcu_set_emlsr_mode(struct mt7996_dev *dev, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct ieee80211_eml_params *eml_params) +{ + struct mt7996_sta *msta = (struct mt7996_sta *)sta->drv_priv; + struct mt7996_sta_link *msta_link; + struct sta_rec_eml_op *eml_op; + struct mt7996_vif_link *link; + struct sk_buff *skb; + struct tlv *tlv; + + msta_link = mt76_dereference(msta->link[eml_params->link_id], + &dev->mt76); + if (!msta_link) + return -EINVAL; + + link = mt7996_vif_link(dev, vif, eml_params->link_id); + if (!link) + return -EINVAL; + + skb = __mt76_connac_mcu_alloc_sta_req(&dev->mt76, &link->mt76, + &msta_link->wcid, + MT7996_STA_UPDATE_MAX_SIZE); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + tlv = mt76_connac_mcu_add_tlv(skb, STA_REC_EML_OP, sizeof(*eml_op)); + eml_op = (struct sta_rec_eml_op *)tlv; + eml_op->link_bitmap = 0; + + if (eml_params->control & IEEE80211_EML_CTRL_EMLSR_MODE) { + unsigned long link_bitmap = eml_params->link_bitmap; + unsigned int link_id; + + for_each_set_bit(link_id, &link_bitmap, + IEEE80211_MLD_MAX_NUM_LINKS) { + struct mt76_phy *mphy; + + link = mt7996_vif_link(dev, vif, link_id); + if (!link) + continue; + + mphy = mt76_vif_link_phy(&link->mt76); + if (!mphy) + continue; + + eml_op->link_bitmap |= BIT(mphy->band_idx); + } + } + + return mt76_mcu_skb_send_msg(&dev->mt76, skb, + MCU_WMWA_UNI_CMD(STA_REC_UPDATE), true); +} + int mt7996_mcu_set_timing(struct mt7996_phy *phy, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf) { diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index 5f574ebe81cc..ee3564c0115a 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -901,6 +901,10 @@ int mt7996_mcu_wtbl_update_hdr_trans(struct mt7996_dev *dev, struct mt7996_vif_link *link, struct mt7996_sta_link *msta_link); int mt7996_mcu_cp_support(struct mt7996_dev *dev, u8 mode); +int mt7996_mcu_set_emlsr_mode(struct mt7996_dev *dev, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct ieee80211_eml_params *eml_params); #ifdef CONFIG_MAC80211_DEBUGFS void mt7996_sta_add_debugfs(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct dentry *dir); From 947d63d8cd3b03c7be16875ca90273edbdbe7ce5 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Fri, 13 Feb 2026 00:00:29 -0800 Subject: [PATCH 0809/1561] wifi: mt76: mt7996: Disable Rx hdr_trans in monitor mode Ensure raw frames are captured without header modification. Signed-off-by: Ryder Lee Link: https://patch.msgid.link/04008426d6cd5de3995beefb98f9d13f35526c25.1770969275.git.ryder.lee@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/main.c | 2 ++ drivers/net/wireless/mediatek/mt76/mt7996/regs.h | 3 +++ 2 files changed, 5 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index c6d14f09fd10..06f4c653ed67 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -482,6 +482,8 @@ static void mt7996_set_monitor(struct mt7996_phy *phy, bool enabled) mt76_rmw_field(dev, MT_DMA_DCR0(phy->mt76->band_idx), MT_DMA_DCR0_RXD_G5_EN, enabled); + mt76_rmw_field(dev, MT_MDP_DCR0, + MT_MDP_DCR0_RX_HDR_TRANS_EN, !enabled); mt7996_phy_set_rxfilter(phy); mt7996_mcu_set_sniffer_mode(phy, enabled); } diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/regs.h b/drivers/net/wireless/mediatek/mt76/mt7996/regs.h index e48e0e575b64..393faae2d52b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/regs.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/regs.h @@ -159,6 +159,9 @@ enum offs_rev { #define MT_MDP_BASE 0x820cc000 #define MT_MDP(ofs) (MT_MDP_BASE + (ofs)) +#define MT_MDP_DCR0 MT_MDP(0x800) +#define MT_MDP_DCR0_RX_HDR_TRANS_EN BIT(19) + #define MT_MDP_DCR2 MT_MDP(0x8e8) #define MT_MDP_DCR2_RX_TRANS_SHORT BIT(2) From 3dc0c40d7806c72cfe88cf4e1e2650c1673f9db4 Mon Sep 17 00:00:00 2001 From: Michael Lo Date: Wed, 11 Feb 2026 17:50:25 +0800 Subject: [PATCH 0810/1561] wifi: mt76: mt7921: fix 6GHz regulatory update on connection Call mt7921_regd_update() instead of mt7921_mcu_set_clc() when setting the 6GHz power type after connection, so that regulatory limits and SAR power are also applied. Fixes: 51ba0e3a15eb ("wifi: mt76: mt7921: add 6GHz power type support for clc") Signed-off-by: Michael Lo Link: https://patch.msgid.link/20260211095025.2415624-1-leon.yen@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7921/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 42b9514e04e7..3d74fabe7408 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -800,7 +800,8 @@ mt7921_regd_set_6ghz_power_type(struct ieee80211_vif *vif, bool is_add) } out: - mt7921_mcu_set_clc(dev, dev->mt76.alpha2, dev->country_ie_env); + if (vif->bss_conf.chanreq.oper.chan->band == NL80211_BAND_6GHZ) + mt7921_regd_update(dev); } int mt7921_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif, From f0168f2f9a1eca55d3ae09d8250b94e82b67cac3 Mon Sep 17 00:00:00 2001 From: Ziyi Guo Date: Sat, 31 Jan 2026 03:52:10 +0000 Subject: [PATCH 0811/1561] wifi: mt76: add missing lock protection in mt76_sta_state for sta_event callback mt76_sta_state() calls the sta_event callback without holding dev->mutex. However, mt7915_mac_sta_event() (MT7915 implementation of this callback) calls mt7915_mac_twt_teardown_flow() which has lockdep_assert_held(&dev->mt76.mutex) indicating that callers must hold this lock. The locking pattern in mt76_sta_state() is inconsistent: - mt76_sta_add() acquires dev->mutex before calling dev->drv->sta_add - mt76_sta_remove() acquires dev->mutex before calling __mt76_sta_remove - But sta_event callback is called without acquiring the lock Add mutex_lock()/mutex_unlock() around the mt7915_mac_twt_teardown_flow invocation to fix the missing lock protection and maintain consistency with the existing locking pattern. Signed-off-by: Ziyi Guo Link: https://patch.msgid.link/20260131035210.2198259-1-n7l8m4@u.northwestern.edu Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7915/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c index 0892291616ea..e1d83052aa6d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c @@ -852,8 +852,10 @@ int mt7915_mac_sta_event(struct mt76_dev *mdev, struct ieee80211_vif *vif, return mt7915_mcu_add_sta(dev, vif, sta, CONN_STATE_PORT_SECURE, false); case MT76_STA_EVENT_DISASSOC: + mutex_lock(&dev->mt76.mutex); for (i = 0; i < ARRAY_SIZE(msta->twt.flow); i++) mt7915_mac_twt_teardown_flow(dev, msta, i); + mutex_unlock(&dev->mt76.mutex); mt7915_mcu_add_sta(dev, vif, sta, CONN_STATE_DISCONNECT, false); msta->wcid.sta_disabled = 1; From 62e037aa8cf5a69b7ea63336705a35c897b9db2b Mon Sep 17 00:00:00 2001 From: Quan Zhou Date: Wed, 25 Feb 2026 17:47:22 +0800 Subject: [PATCH 0812/1561] wifi: mt76: mt7925: fix incorrect TLV length in CLC command The previous implementation of __mt7925_mcu_set_clc() set the TLV length field (.len) incorrectly during CLC command construction. The length was initialized as sizeof(req) - 4, regardless of the actual segment length. This could cause the WiFi firmware to misinterpret the command payload, resulting in command execution errors. This patch moves the TLV length assignment to after the segment is selected, and sets .len to sizeof(req) + seg->len - 4, matching the actual command content. This ensures the firmware receives the correct TLV length and parses the command properly. Fixes: c948b5da6bbe ("wifi: mt76: mt7925: add Mediatek Wi-Fi7 driver for mt7925 chips") Cc: stable@vger.kernel.org Signed-off-by: Quan Zhou Acked-by: Sean Wang Link: https://patch.msgid.link/f56ae0e705774dfa8aab3b99e5bbdc92cd93523e.1772011204.git.quan.zhou@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/mcu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index 1379bf6a26b5..abcdd0e0b3b5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -3380,7 +3380,6 @@ __mt7925_mcu_set_clc(struct mt792x_dev *dev, u8 *alpha2, u8 rsvd[64]; } __packed req = { .tag = cpu_to_le16(0x3), - .len = cpu_to_le16(sizeof(req) - 4), .idx = idx, .env = env_cap, @@ -3409,6 +3408,7 @@ __mt7925_mcu_set_clc(struct mt792x_dev *dev, u8 *alpha2, memcpy(req.type, rule->type, 2); req.size = cpu_to_le16(seg->len); + req.len = cpu_to_le16(sizeof(req) + seg->len - 4); dev->phy.clc_chan_conf = clc->ver == 1 ? 0xff : rule->flag; skb = __mt76_mcu_msg_alloc(&dev->mt76, &req, le16_to_cpu(req.size) + sizeof(req), From c0a47ffc4caaf5161955add553322112c3a211b0 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 28 Sep 2025 18:27:01 +0200 Subject: [PATCH 0813/1561] wifi: mt76: mt7996: Add missing CHANCTX_STA_CSA property Enable missing CHANCTX_STA_CSA property required for MLO. Fixes: f5160304d57c ("wifi: mt76: mt7996: Enable MLO support for client interfaces") Signed-off-by: Lorenzo Bianconi Reviewed-by: AngeloGioacchino Del Regno Link: https://patch.msgid.link/20250928-mt7996_chanctx_sta_csa-v1-1-82e455185990@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/init.c b/drivers/net/wireless/mediatek/mt76/mt7996/init.c index 29bce7cffbb5..5aaa93767109 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/init.c @@ -536,6 +536,7 @@ mt7996_init_wiphy(struct ieee80211_hw *hw, struct mtk_wed_device *wed) ieee80211_hw_set(hw, SUPPORTS_RX_DECAP_OFFLOAD); ieee80211_hw_set(hw, NO_VIRTUAL_MONITOR); ieee80211_hw_set(hw, SUPPORTS_MULTI_BSSID); + ieee80211_hw_set(hw, CHANCTX_STA_CSA); hw->max_tx_fragments = 4; wiphy->txq_memory_limit = 32 << 20; /* 32 MiB */ From 569ce4340268915911fc356ec9ad27e92fb82289 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 6 Mar 2026 11:27:52 +0100 Subject: [PATCH 0814/1561] wifi: mt76: mt7996: Remove link pointer dependency in mt7996_mac_sta_remove_links() Remove link pointer dependency in mt7996_mac_sta_remove_links routine to get the mt7996_phy pointer since the link can be already offchannel running mt7996_mac_sta_remove_links(). Rely on __mt7996_phy routine instead. Fixes: 344dd6a4c919 ("wifi: mt76: mt7996: Move num_sta accounting in mt7996_mac_sta_{add,remove}_links") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260306-mt7996-deflink-lookup-link-remove-v1-1-7162b332873c@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/main.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 06f4c653ed67..f063888b9980 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -1097,8 +1097,7 @@ mt7996_mac_sta_remove_links(struct mt7996_dev *dev, struct ieee80211_vif *vif, for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) { struct mt7996_sta_link *msta_link = NULL; - struct mt7996_vif_link *link; - struct mt76_phy *mphy; + struct mt7996_phy *phy; msta_link = rcu_replace_pointer(msta->link[link_id], msta_link, lockdep_is_held(&mdev->mutex)); @@ -1107,17 +1106,12 @@ mt7996_mac_sta_remove_links(struct mt7996_dev *dev, struct ieee80211_vif *vif, mt7996_mac_wtbl_update(dev, msta_link->wcid.idx, MT_WTBL_UPDATE_ADM_COUNT_CLEAR); - mt7996_mac_sta_deinit_link(dev, msta_link); - link = mt7996_vif_link(dev, vif, link_id); - if (!link) - continue; - mphy = mt76_vif_link_phy(&link->mt76); - if (!mphy) - continue; + phy = __mt7996_phy(dev, msta_link->wcid.phy_idx); + if (phy) + phy->mt76->num_sta--; - mphy->num_sta--; if (msta->deflink_id == link_id) { msta->deflink_id = IEEE80211_LINK_UNSPECIFIED; if (msta->seclink_id == link_id) { From 5806c91a3f0d21d233f8c386c892e1ffaf64d7b2 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 6 Mar 2026 11:27:55 +0100 Subject: [PATCH 0815/1561] wifi: mt76: mt7996: Remove unnecessary phy filed in mt7996_vif_link struct Remove unnecessary phy pointer in mt7996_vif_link struct and rely on mt7996_vif_link_phy() utility routine. Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260306-mt7996-deflink-lookup-link-remove-v1-4-7162b332873c@kernel.org Signed-off-by: Felix Fietkau --- .../wireless/mediatek/mt76/mt7996/debugfs.c | 14 +++- .../net/wireless/mediatek/mt76/mt7996/mac.c | 7 +- .../net/wireless/mediatek/mt76/mt7996/main.c | 43 +++++++---- .../net/wireless/mediatek/mt76/mt7996/mcu.c | 74 +++++++++++++------ .../wireless/mediatek/mt76/mt7996/mt7996.h | 2 - 5 files changed, 101 insertions(+), 39 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7996/debugfs.c index 76d623b2cafb..6cc63f87b222 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/debugfs.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/debugfs.c @@ -626,13 +626,18 @@ mt7996_sta_hw_queue_read(void *data, struct ieee80211_sta *sta) { struct mt7996_sta *msta = (struct mt7996_sta *)sta->drv_priv; struct mt7996_vif *mvif = msta->vif; - struct mt7996_dev *dev = mvif->deflink.phy->dev; + struct mt7996_phy *phy = mt7996_vif_link_phy(&mvif->deflink); struct ieee80211_link_sta *link_sta; struct seq_file *s = data; struct ieee80211_vif *vif; + struct mt7996_dev *dev; unsigned int link_id; + if (!phy) + return; + vif = container_of((void *)mvif, struct ieee80211_vif, drv_priv); + dev = phy->dev; rcu_read_lock(); @@ -979,13 +984,17 @@ static ssize_t mt7996_link_sta_fixed_rate_set(struct file *file, #define LONG_PREAMBLE 1 struct ieee80211_link_sta *link_sta = file->private_data; struct mt7996_sta *msta = (struct mt7996_sta *)link_sta->sta->drv_priv; - struct mt7996_dev *dev = msta->vif->deflink.phy->dev; + struct mt7996_phy *link_phy = mt7996_vif_link_phy(&msta->vif->deflink); struct mt7996_sta_link *msta_link; struct ra_rate phy = {}; + struct mt7996_dev *dev; char buf[100]; int ret; u16 gi, ltf; + if (!link_phy) + return -EINVAL; + if (count >= sizeof(buf)) return -EINVAL; @@ -1008,6 +1017,7 @@ static ssize_t mt7996_link_sta_fixed_rate_set(struct file *file, * spe - off: 0, on: 1 * ltf - 1xltf: 0, 2xltf: 1, 4xltf: 2 */ + dev = link_phy->dev; if (sscanf(buf, "%hhu %hhu %hhu %hhu %hu %hhu %hhu %hhu %hhu %hu", &phy.mode, &phy.bw, &phy.mcs, &phy.nss, &gi, &phy.preamble, &phy.stbc, &phy.ldpc, &phy.spe, <f) != 10) { diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index 208ed20d0bf9..7587e7b4e8fe 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -2167,9 +2167,14 @@ mt7996_update_vif_beacon(void *priv, u8 *mac, struct ieee80211_vif *vif) for_each_vif_active_link(vif, link_conf, link_id) { struct mt7996_vif_link *link; + struct mt7996_phy *link_phy; link = mt7996_vif_link(dev, vif, link_id); - if (!link || link->phy != phy) + if (!link) + continue; + + link_phy = mt7996_vif_link_phy(link); + if (link_phy != phy) continue; mt7996_mcu_add_beacon(dev->mt76.hw, vif, link_conf, diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index f063888b9980..8ecbc0511848 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -239,10 +239,13 @@ mt7996_set_hw_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, link_conf = &vif->bss_conf; if (cmd == SET_KEY && !sta && !link->mt76.cipher) { + struct mt7996_phy *phy = mt7996_vif_link_phy(link); + link->mt76.cipher = mt76_connac_mcu_get_cipher(key->cipher); - mt7996_mcu_add_bss_info(link->phy, vif, link_conf, - &link->mt76, msta_link, true); + if (phy) + mt7996_mcu_add_bss_info(phy, vif, link_conf, + &link->mt76, msta_link, true); } if (cmd == SET_KEY) @@ -316,7 +319,6 @@ int mt7996_vif_link_add(struct mt76_phy *mphy, struct ieee80211_vif *vif, return -ENOSPC; link->mld_idx = mld_idx; - link->phy = phy; mlink->omac_idx = idx; mlink->band_idx = band_idx; mlink->wmm_idx = vif->type == NL80211_IFTYPE_AP ? 0 : 3; @@ -823,15 +825,17 @@ mt7996_vif_cfg_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif, for_each_vif_active_link(vif, link_conf, link_id) { struct mt7996_vif_link *link; + struct mt7996_phy *phy; link = mt7996_vif_link(dev, vif, link_id); if (!link) continue; - if (!link->phy) + phy = mt7996_vif_link_phy(link); + if (!phy) continue; - mt7996_mcu_add_bss_info(link->phy, vif, link_conf, + mt7996_mcu_add_bss_info(phy, vif, link_conf, &link->mt76, &link->msta_link, true); mt7996_mcu_add_sta(dev, link_conf, NULL, link, NULL, @@ -950,10 +954,15 @@ mt7996_channel_switch_beacon(struct ieee80211_hw *hw, mutex_lock(&dev->mt76.mutex); for_each_vif_active_link(vif, link_conf, link_id) { - struct mt7996_vif_link *link = - mt7996_vif_link(dev, vif, link_id); + struct mt7996_vif_link *link; + struct mt7996_phy *link_phy; - if (!link || link->phy != phy) + link = mt7996_vif_link(dev, vif, link_id); + if (!link) + continue; + + link_phy = mt7996_vif_link_phy(link); + if (link_phy != phy) continue; /* Reset beacon when channel switch triggered during CAC to let @@ -1024,10 +1033,13 @@ mt7996_mac_sta_init_link(struct mt7996_dev *dev, { struct ieee80211_sta *sta = link_sta->sta; struct mt7996_sta *msta = (struct mt7996_sta *)sta->drv_priv; - struct mt7996_phy *phy = link->phy; + struct mt7996_phy *phy = mt7996_vif_link_phy(link); struct mt7996_sta_link *msta_link; int idx; + if (!phy) + return -EINVAL; + idx = mt76_wcid_alloc(dev->mt76.wcid_mask, MT7996_WTBL_STA); if (idx < 0) return -ENOSPC; @@ -1572,8 +1584,8 @@ mt7996_get_stats(struct ieee80211_hw *hw, u64 __mt7996_get_tsf(struct ieee80211_hw *hw, struct mt7996_vif_link *link) { + struct mt7996_phy *phy = mt7996_vif_link_phy(link); struct mt7996_dev *dev = mt7996_hw_dev(hw); - struct mt7996_phy *phy = link->phy; union { u64 t64; u32 t32[2]; @@ -1632,7 +1644,7 @@ mt7996_set_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif, n = link->mt76.omac_idx > HW_BSSID_MAX ? HW_BSSID_0 : link->mt76.omac_idx; - phy = link->phy; + phy = mt7996_vif_link_phy(link); if (!phy) goto unlock; @@ -1666,7 +1678,7 @@ mt7996_offset_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif, if (!link) goto unlock; - phy = link->phy; + phy = mt7996_vif_link_phy(link); if (!phy) goto unlock; @@ -1796,9 +1808,14 @@ static void mt7996_link_rate_ctrl_update(void *data, struct mt7996_sta_link *msta_link) { struct mt7996_sta *msta = msta_link->sta; - struct mt7996_dev *dev = msta->vif->deflink.phy->dev; + struct mt7996_phy *phy = mt7996_vif_link_phy(&msta->vif->deflink); + struct mt7996_dev *dev; u32 *changed = data; + if (!phy) + return; + + dev = phy->dev; spin_lock_bh(&dev->mt76.sta_poll_lock); msta_link->changed |= *changed; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index 8e06f7fe479c..2a50d0758d9c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -128,9 +128,16 @@ mt7996_mcu_set_sta_he_mcs(struct ieee80211_link_sta *link_sta, struct mt7996_vif_link *link, __le16 *he_mcs, u16 mcs_map) { + struct mt76_phy *mphy = mt76_vif_link_phy(&link->mt76); int nss, max_nss = link_sta->rx_nss > 3 ? 4 : link_sta->rx_nss; - enum nl80211_band band = link->phy->mt76->chandef.chan->band; - const u16 *mask = link->bitrate_mask.control[band].he_mcs; + enum nl80211_band band; + const u16 *mask; + + if (!mphy) + return; + + band = mphy->chandef.chan->band; + mask = link->bitrate_mask.control[band].he_mcs; for (nss = 0; nss < max_nss; nss++) { int mcs; @@ -1968,9 +1975,8 @@ mt7996_mcu_sta_bfer_tlv(struct mt7996_dev *dev, struct sk_buff *skb, #define EBF_MODE BIT(0) #define IBF_MODE BIT(1) #define BF_MAT_ORDER 4 + struct mt7996_phy *phy = mt7996_vif_link_phy(link); struct ieee80211_vif *vif = link_conf->vif; - struct mt7996_phy *phy = link->phy; - int tx_ant = hweight16(phy->mt76->chainmask) - 1; struct sta_rec_bf *bf; struct tlv *tlv; static const u8 matrix[BF_MAT_ORDER][BF_MAT_ORDER] = { @@ -1979,8 +1985,12 @@ mt7996_mcu_sta_bfer_tlv(struct mt7996_dev *dev, struct sk_buff *skb, {2, 4, 4, 0}, /* 3x1, 3x2, 3x3, 3x4 */ {3, 5, 6, 0} /* 4x1, 4x2, 4x3, 4x4 */ }; + int tx_ant; bool ebf; + if (!phy) + return; + if (!(link_sta->ht_cap.ht_supported || link_sta->he_cap.has_he)) return; @@ -1996,17 +2006,18 @@ mt7996_mcu_sta_bfer_tlv(struct mt7996_dev *dev, struct sk_buff *skb, * ht: iBF only, since mac80211 lacks of eBF support */ if (link_sta->eht_cap.has_eht) - mt7996_mcu_sta_bfer_eht(link_sta, vif, link->phy, bf, ebf); + mt7996_mcu_sta_bfer_eht(link_sta, vif, phy, bf, ebf); else if (link_sta->he_cap.has_he) - mt7996_mcu_sta_bfer_he(link_sta, vif, link->phy, bf, ebf); + mt7996_mcu_sta_bfer_he(link_sta, vif, phy, bf, ebf); else if (link_sta->vht_cap.vht_supported) - mt7996_mcu_sta_bfer_vht(link_sta, link->phy, bf, ebf); + mt7996_mcu_sta_bfer_vht(link_sta, phy, bf, ebf); else if (link_sta->ht_cap.ht_supported) - mt7996_mcu_sta_bfer_ht(link_sta, link->phy, bf, ebf); + mt7996_mcu_sta_bfer_ht(link_sta, phy, bf, ebf); else return; bf->bf_cap = ebf ? EBF_MODE : (dev->ibf ? IBF_MODE : 0); + tx_ant = hweight16(phy->mt76->chainmask) - 1; if (is_mt7992(&dev->mt76) && tx_ant == 4) bf->bf_cap |= IBF_MODE; @@ -2038,11 +2049,14 @@ mt7996_mcu_sta_bfee_tlv(struct mt7996_dev *dev, struct sk_buff *skb, struct ieee80211_link_sta *link_sta, struct mt7996_vif_link *link) { - struct mt7996_phy *phy = link->phy; - int tx_ant = hweight8(phy->mt76->antenna_mask) - 1; + struct mt7996_phy *phy = mt7996_vif_link_phy(link); struct sta_rec_bfee *bfee; struct tlv *tlv; u8 nrow = 0; + int tx_ant; + + if (!phy) + return; if (!(link_sta->vht_cap.vht_supported || link_sta->he_cap.has_he)) return; @@ -2066,6 +2080,7 @@ mt7996_mcu_sta_bfee_tlv(struct mt7996_dev *dev, struct sk_buff *skb, } /* reply with identity matrix to avoid 2x2 BF negative gain */ + tx_ant = hweight8(phy->mt76->antenna_mask) - 1; bfee->fb_identity_matrix = (nrow == 1 && tx_ant == 2); } @@ -2249,6 +2264,7 @@ mt7996_mcu_add_rate_ctrl_fixed(struct mt7996_dev *dev, struct mt7996_sta *msta, struct ieee80211_sta *sta; int ret, nrates = 0, idx; enum nl80211_band band; + struct mt76_phy *mphy; bool has_he; #define __sta_phy_bitrate_mask_check(_mcs, _gi, _ht, _he) \ @@ -2282,7 +2298,11 @@ mt7996_mcu_add_rate_ctrl_fixed(struct mt7996_dev *dev, struct mt7996_sta *msta, if (!link_sta) goto error_unlock; - band = link->phy->mt76->chandef.chan->band; + mphy = mt76_vif_link_phy(&link->mt76); + if (!mphy) + goto error_unlock; + + band = mphy->chandef.chan->band; has_he = link_sta->he_cap.has_he; mask = link->bitrate_mask; idx = msta_link->wcid.idx; @@ -2362,18 +2382,25 @@ mt7996_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7996_dev *dev, struct mt7996_vif_link *link) { #define INIT_RCPI 180 - struct mt76_phy *mphy = link->phy->mt76; - struct cfg80211_chan_def *chandef = &mphy->chandef; + struct mt76_phy *mphy = mt76_vif_link_phy(&link->mt76); struct cfg80211_bitrate_mask *mask = &link->bitrate_mask; u32 cap = link_sta->sta->wme ? STA_CAP_WMM : 0; - enum nl80211_band band = chandef->chan->band; + struct cfg80211_chan_def *chandef; struct sta_rec_ra_uni *ra; + enum nl80211_band band; struct tlv *tlv; - u32 supp_rate = link_sta->supp_rates[band]; + u32 supp_rate; + + if (!mphy) + return; tlv = mt76_connac_mcu_add_tlv(skb, STA_REC_RA, sizeof(*ra)); ra = (struct sta_rec_ra_uni *)tlv; + chandef = &mphy->chandef; + band = chandef->chan->band; + supp_rate = link_sta->supp_rates[band]; + ra->valid = true; ra->auto_rate = true; ra->phy_mode = mt76_connac_get_phy_mode(mphy, vif, band, link_sta); @@ -2722,6 +2749,7 @@ void mt7996_mcu_update_sta_rec_bw(void *data, struct ieee80211_sta *sta) { struct mt7996_vif_link *link = (struct mt7996_vif_link *)data; struct mt7996_sta *msta = (struct mt7996_sta *)sta->drv_priv; + struct mt7996_phy *phy = mt7996_vif_link_phy(link); struct mt7996_sta_link *msta_link; struct mt7996_dev *dev; struct ieee80211_bss_conf *link_conf; @@ -2730,10 +2758,13 @@ void mt7996_mcu_update_sta_rec_bw(void *data, struct ieee80211_sta *sta) struct sk_buff *skb; int link_id; + if (!phy) + return; + if (link->mt76.mvif != &msta->vif->mt76) return; - dev = link->phy->dev; + dev = phy->dev; link_id = link->msta_link.wcid.link_id; link_sta = link_sta_dereference_protected(sta, link_id); if (!link_sta) @@ -3010,6 +3041,7 @@ int mt7996_mcu_add_beacon(struct ieee80211_hw *hw, struct ieee80211_vif *vif, { struct mt7996_dev *dev = mt7996_hw_dev(hw); struct mt7996_vif_link *link = mt7996_vif_conf_link(dev, vif, link_conf); + struct mt76_phy *mphy = link ? mt76_vif_link_phy(&link->mt76) : NULL; struct mt76_vif_link *mlink = link ? &link->mt76 : NULL; struct ieee80211_mutable_offsets offs; struct ieee80211_tx_info *info; @@ -3024,7 +3056,7 @@ int mt7996_mcu_add_beacon(struct ieee80211_hw *hw, struct ieee80211_vif *vif, if (!mlink) return -EINVAL; - if (link->phy && link->phy->mt76->offchannel) + if (mphy && mphy->offchannel) enabled = false; rskb = __mt7996_mcu_alloc_bss_req(&dev->mt76, mlink, @@ -3075,9 +3107,9 @@ int mt7996_mcu_beacon_inband_discov(struct mt7996_dev *dev, { #define OFFLOAD_TX_MODE_SU BIT(0) #define OFFLOAD_TX_MODE_MU BIT(1) + struct mt76_phy *mphy = mt76_vif_link_phy(&link->mt76); struct ieee80211_vif *vif = link_conf->vif; struct ieee80211_hw *hw = mt76_hw(dev); - struct mt7996_phy *phy = link->phy; struct mt76_wcid *wcid = &dev->mt76.global_wcid; struct bss_inband_discovery_tlv *discov; struct ieee80211_tx_info *info; @@ -3088,10 +3120,10 @@ int mt7996_mcu_beacon_inband_discov(struct mt7996_dev *dev, u8 *buf, interval; int len; - if (!phy) + if (!mphy) return -EINVAL; - chandef = &phy->mt76->chandef; + chandef = &mphy->chandef; band = chandef->chan->band; if (link_conf->nontransmitted) @@ -3129,7 +3161,7 @@ int mt7996_mcu_beacon_inband_discov(struct mt7996_dev *dev, info = IEEE80211_SKB_CB(skb); info->control.vif = vif; info->band = band; - info->hw_queue |= FIELD_PREP(MT_TX_HW_QUEUE_PHY, phy->mt76->band_idx); + info->hw_queue |= FIELD_PREP(MT_TX_HW_QUEUE_PHY, mphy->band_idx); len = ALIGN(sizeof(*discov) + MT_TXD_SIZE + skb->len, 4); tlv = mt7996_mcu_add_uni_tlv(rskb, UNI_BSS_INFO_OFFLOAD, len); diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index ee3564c0115a..d18f8794351e 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -276,8 +276,6 @@ struct mt7996_vif_link { struct mt76_vif_link mt76; /* must be first */ struct mt7996_sta_link msta_link; - struct mt7996_phy *phy; - struct cfg80211_bitrate_mask bitrate_mask; u8 mld_idx; From e648051d52afbdb360bd586218961f5fffff63e8 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 8 Mar 2026 14:25:20 +0100 Subject: [PATCH 0816/1561] wifi: mt76: mt7996: Decrement sta counter removing the link in mt7996_mac_reset_sta_iter() Fixes tracking per-phy stations for offchannel switching. Fixes: ace5d3b6b49e8 ("wifi: mt76: mt7996: improve hardware restart reliability") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260308-mt7996_mac_reset_vif_iter-fix-v1-1-57f640aa2dcf@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/mac.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index 7587e7b4e8fe..be70c0f4fc30 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -2366,6 +2366,7 @@ mt7996_mac_reset_sta_iter(void *data, struct ieee80211_sta *sta) for (i = 0; i < ARRAY_SIZE(msta->link); i++) { struct mt7996_sta_link *msta_link = NULL; + struct mt7996_phy *phy; msta_link = rcu_replace_pointer(msta->link[i], msta_link, lockdep_is_held(&dev->mt76.mutex)); @@ -2373,6 +2374,10 @@ mt7996_mac_reset_sta_iter(void *data, struct ieee80211_sta *sta) continue; mt7996_mac_sta_deinit_link(dev, msta_link); + phy = __mt7996_phy(dev, msta_link->wcid.phy_idx); + if (phy) + phy->mt76->num_sta--; + if (msta_link != &msta->deflink) kfree_rcu(msta_link, rcu_head); } From 0420180df092419a96351fb2afec1e2a74d385c3 Mon Sep 17 00:00:00 2001 From: Chad Monroe Date: Mon, 9 Mar 2026 06:07:20 +0000 Subject: [PATCH 0817/1561] wifi: mt76: fix multi-radio on-channel scanning avoid unnecessary channel switch when performing an on-channel scan using a multi-radio device. Fixes: c56d6edebc1f ("wifi: mt76: mt7996: use emulated hardware scan support") Signed-off-by: Chad Monroe Link: https://patch.msgid.link/20251118102723.47997-1-nbd@nbd.name Link: https://patch.msgid.link/20260309060730.87840-1-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/scan.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/scan.c b/drivers/net/wireless/mediatek/mt76/scan.c index ff9176cdee3d..89f16c23e352 100644 --- a/drivers/net/wireless/mediatek/mt76/scan.c +++ b/drivers/net/wireless/mediatek/mt76/scan.c @@ -16,7 +16,7 @@ static void mt76_scan_complete(struct mt76_dev *dev, bool abort) clear_bit(MT76_SCANNING, &phy->state); - if (dev->scan.chan && phy->main_chandef.chan && + if (dev->scan.chan && phy->main_chandef.chan && phy->offchannel && !test_bit(MT76_MCU_RESET, &dev->phy.state)) mt76_set_channel(phy, &phy->main_chandef, false); mt76_put_vif_phy_link(phy, dev->scan.vif, dev->scan.mlink); @@ -87,6 +87,7 @@ void mt76_scan_work(struct work_struct *work) struct cfg80211_chan_def chandef = {}; struct mt76_phy *phy = dev->scan.phy; int duration = HZ / 9; /* ~110 ms */ + bool offchannel = true; int i; if (dev->scan.chan_idx >= req->n_channels) { @@ -94,7 +95,7 @@ void mt76_scan_work(struct work_struct *work) return; } - if (dev->scan.chan && phy->num_sta) { + if (dev->scan.chan && phy->num_sta && phy->offchannel) { dev->scan.chan = NULL; mt76_set_channel(phy, &phy->main_chandef, false); goto out; @@ -102,20 +103,26 @@ void mt76_scan_work(struct work_struct *work) dev->scan.chan = req->channels[dev->scan.chan_idx++]; cfg80211_chandef_create(&chandef, dev->scan.chan, NL80211_CHAN_HT20); - mt76_set_channel(phy, &chandef, true); + if (phy->main_chandef.chan == dev->scan.chan) { + chandef = phy->main_chandef; + offchannel = false; + } + + mt76_set_channel(phy, &chandef, offchannel); if (!req->n_ssids || chandef.chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR)) goto out; - duration = HZ / 16; /* ~60 ms */ + if (phy->offchannel) + duration = HZ / 16; /* ~60 ms */ local_bh_disable(); for (i = 0; i < req->n_ssids; i++) mt76_scan_send_probe(dev, &req->ssids[i]); local_bh_enable(); out: - if (dev->scan.chan) + if (dev->scan.chan && phy->offchannel) duration = max_t(int, duration, msecs_to_jiffies(req->duration + (req->duration >> 5))); From 360552c8592dab3c69e0bbff786b55137f1a81bb Mon Sep 17 00:00:00 2001 From: Chad Monroe Date: Mon, 9 Mar 2026 06:07:21 +0000 Subject: [PATCH 0818/1561] wifi: mt76: support upgrading passive scans to active On channels with NO_IR or RADAR flags, wait for beacon before sending probe requests. Allows active scanning and WPS on restricted channels if another AP is already present. Fixes: c56d6edebc1f ("wifi: mt76: mt7996: use emulated hardware scan support") Tested-by: Piotr Kubik Signed-off-by: Chad Monroe Link: https://patch.msgid.link/20251118102723.47997-2-nbd@nbd.name Link: https://patch.msgid.link/20260309060730.87840-2-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mac80211.c | 1 + drivers/net/wireless/mediatek/mt76/mt76.h | 4 ++ .../net/wireless/mediatek/mt76/mt7996/mac.c | 3 ++ drivers/net/wireless/mediatek/mt76/scan.c | 51 +++++++++++++++++-- 4 files changed, 56 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index d0c522909e98..3c539c263238 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -726,6 +726,7 @@ mt76_alloc_device(struct device *pdev, unsigned int size, INIT_LIST_HEAD(&dev->rxwi_cache); dev->token_size = dev->drv->token_size; INIT_DELAYED_WORK(&dev->scan_work, mt76_scan_work); + spin_lock_init(&dev->scan_lock); for (i = 0; i < ARRAY_SIZE(dev->q_rx); i++) skb_queue_head_init(&dev->rx_skb[i]); diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 23a1832812a2..ffeb4b4c425b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -1003,6 +1003,7 @@ struct mt76_dev { u32 rxfilter; struct delayed_work scan_work; + spinlock_t scan_lock; struct { struct cfg80211_scan_request *req; struct ieee80211_channel *chan; @@ -1010,6 +1011,8 @@ struct mt76_dev { struct mt76_vif_link *mlink; struct mt76_phy *phy; int chan_idx; + bool beacon_wait; + bool beacon_received; } scan; #ifdef CONFIG_NL80211_TESTMODE @@ -1597,6 +1600,7 @@ int mt76_get_rate(struct mt76_dev *dev, int mt76_hw_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_scan_request *hw_req); void mt76_cancel_hw_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif); +void mt76_scan_rx_beacon(struct mt76_dev *dev, struct ieee80211_channel *chan); void mt76_sw_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif, const u8 *mac); void mt76_sw_scan_complete(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index be70c0f4fc30..f5537eba9c6b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -515,6 +515,9 @@ mt7996_mac_fill_rx(struct mt7996_dev *dev, enum mt76_rxq_id q, qos_ctl = FIELD_GET(MT_RXD10_QOS_CTL, v2); seq_ctrl = FIELD_GET(MT_RXD10_SEQ_CTRL, v2); + if (ieee80211_is_beacon(fc)) + mt76_scan_rx_beacon(&dev->mt76, mphy->chandef.chan); + rxd += 4; if ((u8 *)rxd - skb->data >= skb->len) return -EINVAL; diff --git a/drivers/net/wireless/mediatek/mt76/scan.c b/drivers/net/wireless/mediatek/mt76/scan.c index 89f16c23e352..2b5f30cef795 100644 --- a/drivers/net/wireless/mediatek/mt76/scan.c +++ b/drivers/net/wireless/mediatek/mt76/scan.c @@ -27,6 +27,10 @@ static void mt76_scan_complete(struct mt76_dev *dev, bool abort) void mt76_abort_scan(struct mt76_dev *dev) { + spin_lock_bh(&dev->scan_lock); + dev->scan.beacon_wait = false; + spin_unlock_bh(&dev->scan_lock); + cancel_delayed_work_sync(&dev->scan_work); mt76_scan_complete(dev, true); } @@ -79,6 +83,28 @@ out: rcu_read_unlock(); } +void mt76_scan_rx_beacon(struct mt76_dev *dev, struct ieee80211_channel *chan) +{ + struct mt76_phy *phy; + + spin_lock(&dev->scan_lock); + + if (!dev->scan.beacon_wait || dev->scan.beacon_received || + dev->scan.chan != chan) + goto out; + + phy = dev->scan.phy; + if (!phy) + goto out; + + dev->scan.beacon_received = true; + ieee80211_queue_delayed_work(phy->hw, &dev->scan_work, 0); + +out: + spin_unlock(&dev->scan_lock); +} +EXPORT_SYMBOL_GPL(mt76_scan_rx_beacon); + void mt76_scan_work(struct work_struct *work) { struct mt76_dev *dev = container_of(work, struct mt76_dev, @@ -87,9 +113,20 @@ void mt76_scan_work(struct work_struct *work) struct cfg80211_chan_def chandef = {}; struct mt76_phy *phy = dev->scan.phy; int duration = HZ / 9; /* ~110 ms */ - bool offchannel = true; + bool beacon_rx, offchannel = true; int i; + if (!phy || !req) + return; + + spin_lock_bh(&dev->scan_lock); + beacon_rx = dev->scan.beacon_wait && dev->scan.beacon_received; + dev->scan.beacon_wait = false; + spin_unlock_bh(&dev->scan_lock); + + if (beacon_rx) + goto probe; + if (dev->scan.chan_idx >= req->n_channels) { mt76_scan_complete(dev, false); return; @@ -110,10 +147,18 @@ void mt76_scan_work(struct work_struct *work) mt76_set_channel(phy, &chandef, offchannel); - if (!req->n_ssids || - chandef.chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR)) + if (!req->n_ssids) goto out; + if (chandef.chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR)) { + spin_lock_bh(&dev->scan_lock); + dev->scan.beacon_received = false; + dev->scan.beacon_wait = true; + spin_unlock_bh(&dev->scan_lock); + goto out; + } + +probe: if (phy->offchannel) duration = HZ / 16; /* ~60 ms */ local_bh_disable(); From ec0a9b01ef88b5f5bdf74140f8c987f7a96693af Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 9 Mar 2026 06:07:22 +0000 Subject: [PATCH 0819/1561] wifi: mt76: add offchannel check to mt76_roc_complete mt76_roc_complete() unconditionally calls __mt76_set_channel() to restore the operating channel. The scan equivalent mt76_scan_complete() checks phy->offchannel first, skipping the restore if the phy is already back on-channel. Without this check, ROC completion performs a redundant full hardware channel switch when something has already moved the phy back. Link: https://patch.msgid.link/20260309060730.87840-3-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/channel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/channel.c b/drivers/net/wireless/mediatek/mt76/channel.c index d9f8529db7ed..ae7b4ed27a5c 100644 --- a/drivers/net/wireless/mediatek/mt76/channel.c +++ b/drivers/net/wireless/mediatek/mt76/channel.c @@ -324,7 +324,7 @@ void mt76_roc_complete(struct mt76_phy *phy) if (mlink) mlink->mvif->roc_phy = NULL; - if (phy->main_chandef.chan && + if (phy->main_chandef.chan && phy->offchannel && !test_bit(MT76_MCU_RESET, &dev->phy.state)) __mt76_set_channel(phy, &phy->main_chandef, false); mt76_put_vif_phy_link(phy, phy->roc_vif, phy->roc_link); From f0fb9afb74ec5bec49585772502db62613321fc6 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 9 Mar 2026 06:07:23 +0000 Subject: [PATCH 0820/1561] wifi: mt76: check chanctx before restoring channel after ROC mt76_remove_chanctx() sets phy->chanctx to NULL but does not clear phy->main_chandef. If ROC is later performed on that phy, completion tries to restore the stale main_chandef channel, programming the hardware to sit on a channel with no active context. Add a chanctx check to avoid restoring a channel when no context is active. Link: https://patch.msgid.link/20260309060730.87840-4-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/channel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/channel.c b/drivers/net/wireless/mediatek/mt76/channel.c index ae7b4ed27a5c..cfff50892a6e 100644 --- a/drivers/net/wireless/mediatek/mt76/channel.c +++ b/drivers/net/wireless/mediatek/mt76/channel.c @@ -324,7 +324,7 @@ void mt76_roc_complete(struct mt76_phy *phy) if (mlink) mlink->mvif->roc_phy = NULL; - if (phy->main_chandef.chan && phy->offchannel && + if (phy->chanctx && phy->main_chandef.chan && phy->offchannel && !test_bit(MT76_MCU_RESET, &dev->phy.state)) __mt76_set_channel(phy, &phy->main_chandef, false); mt76_put_vif_phy_link(phy, phy->roc_vif, phy->roc_link); From de62b24224ac1533c17b3d5bae77164a82ae2e49 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 9 Mar 2026 06:07:24 +0000 Subject: [PATCH 0821/1561] wifi: mt76: abort ROC on chanctx changes mt76_change_chanctx() calls mt76_phy_update_channel() which switches the hardware channel. If ROC is active on the same phy, this switches away from the ROC channel and clears offchannel, but leaves ROC state intact. Mac80211 still thinks the phy is on the ROC channel. Abort any active ROC before proceeding, matching the pattern already used in add, remove, assign, unassign, and switch chanctx functions. Link: https://patch.msgid.link/20260309060730.87840-5-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/channel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/channel.c b/drivers/net/wireless/mediatek/mt76/channel.c index cfff50892a6e..8d2e72c68c6b 100644 --- a/drivers/net/wireless/mediatek/mt76/channel.c +++ b/drivers/net/wireless/mediatek/mt76/channel.c @@ -88,6 +88,9 @@ void mt76_change_chanctx(struct ieee80211_hw *hw, IEEE80211_CHANCTX_CHANGE_RADAR))) return; + if (phy->roc_vif) + mt76_abort_roc(phy); + cancel_delayed_work_sync(&phy->mac_work); mutex_lock(&dev->mutex); From f72dd74dd0b69e3a87b4702f3c860e9a7318d5dd Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 9 Mar 2026 06:07:25 +0000 Subject: [PATCH 0822/1561] wifi: mt76: optimize ROC for same-channel case mt76_remain_on_channel() always creates an HT20 chandef and goes offchannel, even when the ROC channel matches the operating channel. This unnecessarily narrows bandwidth and triggers beacon stop/restart. When the ROC channel matches the current operating channel, preserve the full chandef and skip the offchannel transition, matching the optimization already present in the scan code. Extract the shared same-channel detection into mt76_offchannel_chandef() and use it in both ROC and scan paths. Link: https://patch.msgid.link/20260309060730.87840-6-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/channel.c | 4 ++-- drivers/net/wireless/mediatek/mt76/mt76.h | 12 ++++++++++++ drivers/net/wireless/mediatek/mt76/scan.c | 6 +----- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/channel.c b/drivers/net/wireless/mediatek/mt76/channel.c index 8d2e72c68c6b..f42f25101544 100644 --- a/drivers/net/wireless/mediatek/mt76/channel.c +++ b/drivers/net/wireless/mediatek/mt76/channel.c @@ -392,8 +392,8 @@ int mt76_remain_on_channel(struct ieee80211_hw *hw, struct ieee80211_vif *vif, mlink->mvif->roc_phy = phy; phy->roc_vif = vif; phy->roc_link = mlink; - cfg80211_chandef_create(&chandef, chan, NL80211_CHAN_HT20); - ret = __mt76_set_channel(phy, &chandef, true); + ret = __mt76_set_channel(phy, &chandef, + mt76_offchannel_chandef(phy, chan, &chandef)); if (ret) { mlink->mvif->roc_phy = NULL; phy->roc_vif = NULL; diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index ffeb4b4c425b..c7c9ffd0dc3b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -1790,6 +1790,18 @@ void mt76_queue_tx_complete(struct mt76_dev *dev, struct mt76_queue *q, struct mt76_queue_entry *e); int __mt76_set_channel(struct mt76_phy *phy, struct cfg80211_chan_def *chandef, bool offchannel); + +static inline bool +mt76_offchannel_chandef(struct mt76_phy *phy, struct ieee80211_channel *chan, + struct cfg80211_chan_def *chandef) +{ + cfg80211_chandef_create(chandef, chan, NL80211_CHAN_HT20); + if (phy->main_chandef.chan != chan) + return true; + + *chandef = phy->main_chandef; + return false; +} int mt76_set_channel(struct mt76_phy *phy, struct cfg80211_chan_def *chandef, bool offchannel); void mt76_scan_work(struct work_struct *work); diff --git a/drivers/net/wireless/mediatek/mt76/scan.c b/drivers/net/wireless/mediatek/mt76/scan.c index 2b5f30cef795..5a67e9b8183a 100644 --- a/drivers/net/wireless/mediatek/mt76/scan.c +++ b/drivers/net/wireless/mediatek/mt76/scan.c @@ -139,11 +139,7 @@ void mt76_scan_work(struct work_struct *work) } dev->scan.chan = req->channels[dev->scan.chan_idx++]; - cfg80211_chandef_create(&chandef, dev->scan.chan, NL80211_CHAN_HT20); - if (phy->main_chandef.chan == dev->scan.chan) { - chandef = phy->main_chandef; - offchannel = false; - } + offchannel = mt76_offchannel_chandef(phy, dev->scan.chan, &chandef); mt76_set_channel(phy, &chandef, offchannel); From 381733b3a14aaef36b421571c1e99856304311f1 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 9 Mar 2026 06:07:26 +0000 Subject: [PATCH 0823/1561] wifi: mt76: send nullfunc PS frames on offchannel transitions Since mt76 uses chanctx, mac80211 does not send nullfunc power save notifications when the driver goes offchannel for scan or ROC. Add mt76_offchannel_notify() to send nullfunc PM=1 before going offchannel and PM=0 after returning, so that the AP can buffer frames during the absence. For MLO, iterate all vif links on the phy and set IEEE80211_TX_CTRL_MLO_LINK so that the driver's tx_prepare_skb resolves the correct per-link wcid. Link: https://patch.msgid.link/20260309060730.87840-7-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/channel.c | 12 ++- drivers/net/wireless/mediatek/mt76/mac80211.c | 99 +++++++++++++++++++ drivers/net/wireless/mediatek/mt76/mt76.h | 1 + drivers/net/wireless/mediatek/mt76/scan.c | 7 +- 4 files changed, 115 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/channel.c b/drivers/net/wireless/mediatek/mt76/channel.c index f42f25101544..3072e11e2688 100644 --- a/drivers/net/wireless/mediatek/mt76/channel.c +++ b/drivers/net/wireless/mediatek/mt76/channel.c @@ -328,8 +328,10 @@ void mt76_roc_complete(struct mt76_phy *phy) if (mlink) mlink->mvif->roc_phy = NULL; if (phy->chanctx && phy->main_chandef.chan && phy->offchannel && - !test_bit(MT76_MCU_RESET, &dev->phy.state)) + !test_bit(MT76_MCU_RESET, &dev->phy.state)) { __mt76_set_channel(phy, &phy->main_chandef, false); + mt76_offchannel_notify(phy, false); + } mt76_put_vif_phy_link(phy, phy->roc_vif, phy->roc_link); phy->roc_vif = NULL; phy->roc_link = NULL; @@ -367,6 +369,7 @@ int mt76_remain_on_channel(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct mt76_phy *phy = hw->priv; struct mt76_dev *dev = phy->dev; struct mt76_vif_link *mlink; + bool offchannel; int ret = 0; phy = dev->band_phys[chan->band]; @@ -392,8 +395,11 @@ int mt76_remain_on_channel(struct ieee80211_hw *hw, struct ieee80211_vif *vif, mlink->mvif->roc_phy = phy; phy->roc_vif = vif; phy->roc_link = mlink; - ret = __mt76_set_channel(phy, &chandef, - mt76_offchannel_chandef(phy, chan, &chandef)); + + offchannel = mt76_offchannel_chandef(phy, chan, &chandef); + if (offchannel) + mt76_offchannel_notify(phy, true); + ret = __mt76_set_channel(phy, &chandef, offchannel); if (ret) { mlink->mvif->roc_phy = NULL; phy->roc_vif = NULL; diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 3c539c263238..63a42fe16f73 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -2132,3 +2132,102 @@ u16 mt76_select_links(struct ieee80211_vif *vif, int max_active_links) return sel_links; } EXPORT_SYMBOL_GPL(mt76_select_links); + +struct mt76_offchannel_cb_data { + struct mt76_phy *phy; + bool offchannel; +}; + +static void +mt76_offchannel_send_nullfunc(struct mt76_offchannel_cb_data *data, + struct ieee80211_vif *vif, int link_id) +{ + struct mt76_phy *phy = data->phy; + struct ieee80211_tx_info *info; + struct ieee80211_sta *sta = NULL; + struct ieee80211_hdr *hdr; + struct mt76_wcid *wcid; + struct sk_buff *skb; + + skb = ieee80211_nullfunc_get(phy->hw, vif, link_id, true); + if (!skb) + return; + + hdr = (struct ieee80211_hdr *)skb->data; + if (data->offchannel) + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PM); + + skb->priority = 7; + skb_set_queue_mapping(skb, IEEE80211_AC_VO); + + if (!ieee80211_tx_prepare_skb(phy->hw, vif, skb, + phy->main_chandef.chan->band, + &sta)) + return; + + if (sta) + wcid = (struct mt76_wcid *)sta->drv_priv; + else + wcid = ((struct mt76_vif_link *)vif->drv_priv)->wcid; + + if (link_id >= 0) { + info = IEEE80211_SKB_CB(skb); + info->control.flags &= ~IEEE80211_TX_CTRL_MLO_LINK; + info->control.flags |= + u32_encode_bits(link_id, IEEE80211_TX_CTRL_MLO_LINK); + } + + mt76_tx(phy, sta, wcid, skb); +} + +static void +mt76_offchannel_notify_iter(void *_data, u8 *mac, struct ieee80211_vif *vif) +{ + struct mt76_offchannel_cb_data *data = _data; + struct mt76_vif_link *mlink; + struct mt76_vif_data *mvif; + int link_id; + + if (vif->type != NL80211_IFTYPE_STATION || !vif->cfg.assoc) + return; + + mlink = (struct mt76_vif_link *)vif->drv_priv; + mvif = mlink->mvif; + + if (!ieee80211_vif_is_mld(vif)) { + if (mt76_vif_link_phy(mlink) == data->phy) + mt76_offchannel_send_nullfunc(data, vif, -1); + return; + } + + for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { + if (link_id == mvif->deflink_id) + mlink = (struct mt76_vif_link *)vif->drv_priv; + else + mlink = rcu_dereference(mvif->link[link_id]); + if (!mlink) + continue; + if (mt76_vif_link_phy(mlink) != data->phy) + continue; + + mt76_offchannel_send_nullfunc(data, vif, link_id); + } +} + +void mt76_offchannel_notify(struct mt76_phy *phy, bool offchannel) +{ + struct mt76_offchannel_cb_data data = { + .phy = phy, + .offchannel = offchannel, + }; + + if (!phy->num_sta) + return; + + local_bh_disable(); + ieee80211_iterate_active_interfaces_atomic(phy->hw, + IEEE80211_IFACE_ITER_NORMAL, + mt76_offchannel_notify_iter, &data); + local_bh_enable(); +} +EXPORT_SYMBOL_GPL(mt76_offchannel_notify); diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index c7c9ffd0dc3b..c9084c0ae522 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -1813,6 +1813,7 @@ struct mt76_vif_link *mt76_get_vif_phy_link(struct mt76_phy *phy, struct ieee80211_vif *vif); void mt76_put_vif_phy_link(struct mt76_phy *phy, struct ieee80211_vif *vif, struct mt76_vif_link *mlink); +void mt76_offchannel_notify(struct mt76_phy *phy, bool offchannel); /* usb */ static inline bool mt76u_urb_error(struct urb *urb) diff --git a/drivers/net/wireless/mediatek/mt76/scan.c b/drivers/net/wireless/mediatek/mt76/scan.c index 5a67e9b8183a..04cf8a01f20d 100644 --- a/drivers/net/wireless/mediatek/mt76/scan.c +++ b/drivers/net/wireless/mediatek/mt76/scan.c @@ -17,8 +17,10 @@ static void mt76_scan_complete(struct mt76_dev *dev, bool abort) clear_bit(MT76_SCANNING, &phy->state); if (dev->scan.chan && phy->main_chandef.chan && phy->offchannel && - !test_bit(MT76_MCU_RESET, &dev->phy.state)) + !test_bit(MT76_MCU_RESET, &dev->phy.state)) { mt76_set_channel(phy, &phy->main_chandef, false); + mt76_offchannel_notify(phy, false); + } mt76_put_vif_phy_link(phy, dev->scan.vif, dev->scan.mlink); memset(&dev->scan, 0, sizeof(dev->scan)); if (!test_bit(MT76_MCU_RESET, &dev->phy.state)) @@ -135,12 +137,15 @@ void mt76_scan_work(struct work_struct *work) if (dev->scan.chan && phy->num_sta && phy->offchannel) { dev->scan.chan = NULL; mt76_set_channel(phy, &phy->main_chandef, false); + mt76_offchannel_notify(phy, false); goto out; } dev->scan.chan = req->channels[dev->scan.chan_idx++]; offchannel = mt76_offchannel_chandef(phy, dev->scan.chan, &chandef); + if (offchannel) + mt76_offchannel_notify(phy, true); mt76_set_channel(phy, &chandef, offchannel); if (!req->n_ssids) From 0dcef1cbae27d806cd29c296cc03ad6e8ece771d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 9 Mar 2026 06:07:27 +0000 Subject: [PATCH 0824/1561] wifi: mt76: flush pending TX before channel switch mt76_tx() queues frames on wcid->tx_pending for async processing by tx_worker. In __mt76_set_channel(), the worker gets disabled before it may have run, and the subsequent wait only checks DMA ring queues, not the software pending list. This means frames like nullfunc PS frames from mt76_offchannel_notify() may never be transmitted on the correct channel. Fix this by running mt76_txq_schedule_pending() synchronously after disabling the tx_worker but before setting MT76_RESET, which would otherwise cause mt76_txq_schedule_pending_wcid() to bail out. Link: https://patch.msgid.link/20260309060730.87840-8-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mac80211.c | 5 +++-- drivers/net/wireless/mediatek/mt76/mt76.h | 1 + drivers/net/wireless/mediatek/mt76/tx.c | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 63a42fe16f73..2ddbfdcae939 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -1031,9 +1031,10 @@ int __mt76_set_channel(struct mt76_phy *phy, struct cfg80211_chan_def *chandef, int timeout = HZ / 5; int ret; - set_bit(MT76_RESET, &phy->state); - mt76_worker_disable(&dev->tx_worker); + mt76_txq_schedule_pending(phy); + + set_bit(MT76_RESET, &phy->state); wait_event_timeout(dev->tx_wait, !mt76_has_tx_pending(phy), timeout); mt76_update_survey(phy); diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index c9084c0ae522..210aafc1e21e 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -1522,6 +1522,7 @@ void mt76_stop_tx_queues(struct mt76_phy *phy, struct ieee80211_sta *sta, void mt76_tx_check_agg_ssn(struct ieee80211_sta *sta, struct sk_buff *skb); void mt76_txq_schedule(struct mt76_phy *phy, enum mt76_txq_id qid); void mt76_txq_schedule_all(struct mt76_phy *phy); +void mt76_txq_schedule_pending(struct mt76_phy *phy); void mt76_tx_worker_run(struct mt76_dev *dev); void mt76_tx_worker(struct mt76_worker *w); void mt76_release_buffered_frames(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index 0753acf2eccb..ab62591b7a26 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -660,7 +660,7 @@ mt76_txq_schedule_pending_wcid(struct mt76_phy *phy, struct mt76_wcid *wcid, return ret; } -static void mt76_txq_schedule_pending(struct mt76_phy *phy) +void mt76_txq_schedule_pending(struct mt76_phy *phy) { LIST_HEAD(tx_list); int ret = 0; From 331e766e75d2a64b5c1f38aadfcfcf31d264f43e Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 9 Mar 2026 06:07:28 +0000 Subject: [PATCH 0825/1561] wifi: mt76: route nullfunc frames to PSD/ALTX queue ieee80211_is_data() returns true for nullfunc/QoS-nullfunc frames, so they bypass the PSD queue routing and go through the regular VO data queue. This means firmware processes them through the normal TID queue instead of the ALTX queue, which doesn't guarantee immediate transmission. Use ieee80211_is_data_present() instead, which returns false for both management frames and nullfunc/QoS-nullfunc (no payload), routing them to MT_TXQ_PSD. Firmware maps PSD to the ALTX queue, which transmits immediately without PS buffering. This only affects frames from the mt76_tx() pending path. Regular mac80211 TXQ scheduling is unchanged. Link: https://patch.msgid.link/20260309060730.87840-9-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index ab62591b7a26..7b0fae694f12 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -632,7 +632,7 @@ mt76_txq_schedule_pending_wcid(struct mt76_phy *phy, struct mt76_wcid *wcid, if ((dev->drv->drv_flags & MT_DRV_HW_MGMT_TXQ) && !(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) && - !ieee80211_is_data(hdr->frame_control) && + !ieee80211_is_data_present(hdr->frame_control) && (!ieee80211_is_bufferable_mmpdu(skb) || ieee80211_is_deauth(hdr->frame_control) || head == &wcid->tx_offchannel)) From e765bd6708cdeeab01121247165cae04a254868e Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 9 Mar 2026 06:07:29 +0000 Subject: [PATCH 0826/1561] wifi: mt76: wait for firmware TX completion of mgmt frames before channel switch After flushing software-pending frames to DMA, mt76_has_tx_pending() only checks DMA ring q->queued. For token-based drivers, q->queued is decremented at DMA consumption, but firmware may not have transmitted the frame yet. Waiting for all tokens is not feasible because data frames may be stuck in firmware powersave/aggregation queues. Track PSD queue tokens (firmware ALTX) per phy using an atomic counter. These frames are sent by firmware immediately without PS buffering, so the counter reliably reaches zero after transmission. Increment the counter in mt76_token_consume() and decrement it in mt76_token_release(), only for PSD queue tokens. Include the counter in mt76_has_tx_pending() so channel switch waits for firmware TX completion of management and nullfunc frames. mt7615 (uses mt76_token_get/put) and non-token drivers are unaffected as they never call mt76_token_consume/release. Link: https://patch.msgid.link/20260309060730.87840-10-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/dma.c | 2 ++ drivers/net/wireless/mediatek/mt76/mac80211.c | 3 +++ drivers/net/wireless/mediatek/mt76/mt76.h | 3 +++ .../net/wireless/mediatek/mt76/mt76_connac_mac.c | 6 ++++++ drivers/net/wireless/mediatek/mt76/mt7996/mac.c | 6 ++++++ drivers/net/wireless/mediatek/mt76/tx.c | 14 +++++++++++++- 6 files changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 2d133ace7c33..f8c2fe5f2f58 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -666,6 +666,8 @@ mt76_dma_tx_queue_skb(struct mt76_phy *phy, struct mt76_queue *q, if (!t) goto free_skb; + t->phy_idx = phy->band_idx; + t->qid = qid; txwi = mt76_get_txwi_ptr(dev, t); skb->prev = skb->next = NULL; diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 2ddbfdcae939..54ed1cec0228 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -971,6 +971,9 @@ bool mt76_has_tx_pending(struct mt76_phy *phy) return true; } + if (atomic_read(&phy->mgmt_tx_pending)) + return true; + return false; } EXPORT_SYMBOL_GPL(mt76_has_tx_pending); diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 210aafc1e21e..bc1153142b71 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -450,6 +450,7 @@ struct mt76_txwi_cache { }; u8 qid; + u8 phy_idx; }; struct mt76_rx_tid { @@ -860,6 +861,8 @@ struct mt76_phy { struct list_head tx_list; struct mt76_queue *q_tx[__MT_TXQ_MAX]; + atomic_t mgmt_tx_pending; + struct cfg80211_chan_def chandef; struct cfg80211_chan_def main_chandef; bool offchannel; diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c index c2cf6893848b..0339e2e7ab60 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c @@ -1209,5 +1209,11 @@ void mt76_connac2_tx_token_put(struct mt76_dev *dev) } spin_unlock_bh(&dev->token_lock); idr_destroy(&dev->token); + + for (id = 0; id < __MT_MAX_BAND; id++) { + struct mt76_phy *phy = dev->phys[id]; + if (phy) + atomic_set(&phy->mgmt_tx_pending, 0); + } } EXPORT_SYMBOL_GPL(mt76_connac2_tx_token_put); diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index f5537eba9c6b..7deedd3fcb19 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -2220,6 +2220,12 @@ void mt7996_tx_token_put(struct mt7996_dev *dev) } spin_unlock_bh(&dev->mt76.token_lock); idr_destroy(&dev->mt76.token); + + for (id = 0; id < __MT_MAX_BAND; id++) { + struct mt76_phy *phy = dev->mt76.phys[id]; + if (phy) + atomic_set(&phy->mgmt_tx_pending, 0); + } } static int diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index 7b0fae694f12..22f9690634c9 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -866,9 +866,15 @@ int mt76_token_consume(struct mt76_dev *dev, struct mt76_txwi_cache **ptxwi) token = idr_alloc(&dev->token, *ptxwi, dev->token_start, dev->token_start + dev->token_size, GFP_ATOMIC); - if (token >= dev->token_start) + if (token >= dev->token_start) { dev->token_count++; + if ((*ptxwi)->qid == MT_TXQ_PSD) { + struct mt76_phy *mphy = mt76_dev_phy(dev, (*ptxwi)->phy_idx); + atomic_inc(&mphy->mgmt_tx_pending); + } + } + #ifdef CONFIG_NET_MEDIATEK_SOC_WED if (mtk_wed_device_active(&dev->mmio.wed) && token >= dev->mmio.wed.wlan.token_start) @@ -913,6 +919,12 @@ mt76_token_release(struct mt76_dev *dev, int token, bool *wake) if (txwi) { dev->token_count--; + if (txwi->qid == MT_TXQ_PSD) { + struct mt76_phy *mphy = mt76_dev_phy(dev, txwi->phy_idx); + if (atomic_dec_and_test(&mphy->mgmt_tx_pending)) + wake_up(&dev->tx_wait); + } + #ifdef CONFIG_NET_MEDIATEK_SOC_WED if (mtk_wed_device_active(&dev->mmio.wed) && token >= dev->mmio.wed.wlan.token_start && From 4df75606ece504a0cef3552ae88217a4af1b7dba Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 9 Mar 2026 06:07:30 +0000 Subject: [PATCH 0827/1561] wifi: mt76: add per-link beacon monitoring for MLO With chanctx drivers using hardware scan or remain-on-channel, mac80211 does not know when the radio goes off-channel, which breaks its software beacon loss detection. Implement per-link beacon monitoring in the driver. Track the last beacon timestamp per link and check for beacon loss periodically from the mac_work handler. Beacon monitoring is initialized on association and on late link activation, and cleared on disassociation. The beacon_mon_last timestamp is reset when returning from offchannel and after channel switches to prevent false beacon loss detection. Link: https://patch.msgid.link/20260309060730.87840-11-nbd@nbd.name Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/channel.c | 2 + drivers/net/wireless/mediatek/mt76/mac80211.c | 114 +++++++++++++++++- drivers/net/wireless/mediatek/mt76/mt76.h | 5 + .../net/wireless/mediatek/mt76/mt7996/mac.c | 6 +- .../net/wireless/mediatek/mt76/mt7996/main.c | 32 +++++ drivers/net/wireless/mediatek/mt76/scan.c | 1 - 6 files changed, 155 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/channel.c b/drivers/net/wireless/mediatek/mt76/channel.c index 3072e11e2688..cf3fc09e5d5a 100644 --- a/drivers/net/wireless/mediatek/mt76/channel.c +++ b/drivers/net/wireless/mediatek/mt76/channel.c @@ -257,6 +257,8 @@ skip_link_replace: continue; mlink->ctx = vifs->new_ctx; + if (mlink->beacon_mon_interval) + WRITE_ONCE(mlink->beacon_mon_last, jiffies); } out: diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 54ed1cec0228..4ae5e4715a9c 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -2199,8 +2199,11 @@ mt76_offchannel_notify_iter(void *_data, u8 *mac, struct ieee80211_vif *vif) mvif = mlink->mvif; if (!ieee80211_vif_is_mld(vif)) { - if (mt76_vif_link_phy(mlink) == data->phy) + if (mt76_vif_link_phy(mlink) == data->phy) { + if (!data->offchannel && mlink->beacon_mon_interval) + WRITE_ONCE(mlink->beacon_mon_last, jiffies); mt76_offchannel_send_nullfunc(data, vif, -1); + } return; } @@ -2214,6 +2217,9 @@ mt76_offchannel_notify_iter(void *_data, u8 *mac, struct ieee80211_vif *vif) if (mt76_vif_link_phy(mlink) != data->phy) continue; + if (!data->offchannel && mlink->beacon_mon_interval) + WRITE_ONCE(mlink->beacon_mon_last, jiffies); + mt76_offchannel_send_nullfunc(data, vif, link_id); } } @@ -2235,3 +2241,109 @@ void mt76_offchannel_notify(struct mt76_phy *phy, bool offchannel) local_bh_enable(); } EXPORT_SYMBOL_GPL(mt76_offchannel_notify); + +struct mt76_rx_beacon_data { + struct mt76_phy *phy; + const u8 *bssid; +}; + +static void mt76_rx_beacon_iter(void *_data, u8 *mac, + struct ieee80211_vif *vif) +{ + struct mt76_rx_beacon_data *data = _data; + struct mt76_vif_link *mlink = (struct mt76_vif_link *)vif->drv_priv; + struct mt76_vif_data *mvif = mlink->mvif; + int link_id; + + if (vif->type != NL80211_IFTYPE_STATION || !vif->cfg.assoc) + return; + + for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { + struct ieee80211_bss_conf *link_conf; + + if (link_id == mvif->deflink_id) + mlink = (struct mt76_vif_link *)vif->drv_priv; + else + mlink = rcu_dereference(mvif->link[link_id]); + if (!mlink || !mlink->beacon_mon_interval) + continue; + + if (mt76_vif_link_phy(mlink) != data->phy) + continue; + + link_conf = rcu_dereference(vif->link_conf[link_id]); + if (!link_conf) + continue; + + if (!ether_addr_equal(link_conf->bssid, data->bssid) && + (!link_conf->nontransmitted || + !ether_addr_equal(link_conf->transmitter_bssid, + data->bssid))) + continue; + + WRITE_ONCE(mlink->beacon_mon_last, jiffies); + } +} + +void mt76_rx_beacon(struct mt76_phy *phy, struct sk_buff *skb) +{ + struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb; + struct ieee80211_hdr *hdr = mt76_skb_get_hdr(skb); + struct mt76_rx_beacon_data data = { + .phy = phy, + .bssid = hdr->addr3, + }; + + mt76_scan_rx_beacon(phy->dev, phy->chandef.chan); + + if (!phy->num_sta) + return; + + if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_ONLY_MONITOR)) + return; + + ieee80211_iterate_active_interfaces_atomic(phy->hw, + IEEE80211_IFACE_ITER_RESUME_ALL, + mt76_rx_beacon_iter, &data); +} +EXPORT_SYMBOL_GPL(mt76_rx_beacon); + +static void mt76_beacon_mon_iter(void *data, u8 *mac, + struct ieee80211_vif *vif) +{ + struct mt76_phy *phy = data; + struct mt76_vif_link *mlink = (struct mt76_vif_link *)vif->drv_priv; + struct mt76_vif_data *mvif = mlink->mvif; + int link_id; + + if (vif->type != NL80211_IFTYPE_STATION || !vif->cfg.assoc) + return; + + for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { + if (link_id == mvif->deflink_id) + mlink = (struct mt76_vif_link *)vif->drv_priv; + else + mlink = rcu_dereference(mvif->link[link_id]); + if (!mlink || !mlink->beacon_mon_interval) + continue; + + if (mt76_vif_link_phy(mlink) != phy) + continue; + + if (time_after(jiffies, + READ_ONCE(mlink->beacon_mon_last) + + MT76_BEACON_MON_MAX_MISS * mlink->beacon_mon_interval)) + ieee80211_beacon_loss(vif); + } +} + +void mt76_beacon_mon_check(struct mt76_phy *phy) +{ + if (phy->offchannel) + return; + + ieee80211_iterate_active_interfaces_atomic(phy->hw, + IEEE80211_IFACE_ITER_RESUME_ALL, + mt76_beacon_mon_iter, phy); +} +EXPORT_SYMBOL_GPL(mt76_beacon_mon_check); diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index bc1153142b71..527bef97e122 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -364,6 +364,7 @@ enum mt76_wcid_flags { }; #define MT76_N_WCIDS 1088 +#define MT76_BEACON_MON_MAX_MISS 7 /* stored in ieee80211_tx_info::hw_queue */ #define MT_TX_HW_QUEUE_PHY GENMASK(3, 2) @@ -833,6 +834,8 @@ struct mt76_vif_link { u8 mcast_rates_idx; u8 beacon_rates_idx; bool offchannel; + unsigned long beacon_mon_last; + u16 beacon_mon_interval; struct ieee80211_chanctx_conf *ctx; struct mt76_wcid *wcid; struct mt76_vif_data *mvif; @@ -1605,6 +1608,8 @@ int mt76_hw_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_scan_request *hw_req); void mt76_cancel_hw_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif); void mt76_scan_rx_beacon(struct mt76_dev *dev, struct ieee80211_channel *chan); +void mt76_rx_beacon(struct mt76_phy *phy, struct sk_buff *skb); +void mt76_beacon_mon_check(struct mt76_phy *phy); void mt76_sw_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif, const u8 *mac); void mt76_sw_scan_complete(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index 7deedd3fcb19..e8bf58dc50b1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -515,9 +515,6 @@ mt7996_mac_fill_rx(struct mt7996_dev *dev, enum mt76_rxq_id q, qos_ctl = FIELD_GET(MT_RXD10_QOS_CTL, v2); seq_ctrl = FIELD_GET(MT_RXD10_SEQ_CTRL, v2); - if (ieee80211_is_beacon(fc)) - mt76_scan_rx_beacon(&dev->mt76, mphy->chandef.chan); - rxd += 4; if ((u8 *)rxd - skb->data >= skb->len) return -EINVAL; @@ -664,6 +661,8 @@ mt7996_mac_fill_rx(struct mt7996_dev *dev, enum mt76_rxq_id q, hdr = mt76_skb_get_hdr(skb); fc = hdr->frame_control; + if (ieee80211_is_beacon(fc)) + mt76_rx_beacon(mphy, skb); if (ieee80211_is_data_qos(fc)) { u8 *qos = ieee80211_get_qos_ctl(hdr); @@ -2954,6 +2953,7 @@ void mt7996_mac_work(struct work_struct *work) mutex_unlock(&mphy->dev->mutex); + mt76_beacon_mon_check(mphy); mt76_tx_status_check(mphy->dev, false); ieee80211_queue_delayed_work(mphy->hw, &mphy->mac_work, diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 8ecbc0511848..39e1999143da 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -375,6 +375,17 @@ int mt7996_vif_link_add(struct mt76_phy *mphy, struct ieee80211_vif *vif, mtxq->wcid = idx; } + if (vif->type == NL80211_IFTYPE_STATION) { + vif->driver_flags |= IEEE80211_VIF_BEACON_FILTER; + + if (vif->cfg.assoc && link_conf->beacon_int) { + mlink->beacon_mon_interval = + msecs_to_jiffies(ieee80211_tu_to_usec( + link_conf->beacon_int) / 1000); + WRITE_ONCE(mlink->beacon_mon_last, jiffies); + } + } + return 0; } @@ -831,6 +842,13 @@ mt7996_vif_cfg_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif, if (!link) continue; + if (vif->type == NL80211_IFTYPE_STATION) { + link->mt76.beacon_mon_interval = + msecs_to_jiffies(ieee80211_tu_to_usec( + link_conf->beacon_int) / 1000); + WRITE_ONCE(link->mt76.beacon_mon_last, jiffies); + } + phy = mt7996_vif_link_phy(link); if (!phy) continue; @@ -844,6 +862,20 @@ mt7996_vif_cfg_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif, } } + if ((changed & BSS_CHANGED_ASSOC) && !vif->cfg.assoc && + vif->type == NL80211_IFTYPE_STATION) { + struct ieee80211_bss_conf *link_conf; + unsigned long link_id; + + for_each_vif_active_link(vif, link_conf, link_id) { + struct mt7996_vif_link *link; + + link = mt7996_vif_link(dev, vif, link_id); + if (link) + link->mt76.beacon_mon_interval = 0; + } + } + mutex_unlock(&dev->mt76.mutex); } diff --git a/drivers/net/wireless/mediatek/mt76/scan.c b/drivers/net/wireless/mediatek/mt76/scan.c index 04cf8a01f20d..fbc10c9657cf 100644 --- a/drivers/net/wireless/mediatek/mt76/scan.c +++ b/drivers/net/wireless/mediatek/mt76/scan.c @@ -105,7 +105,6 @@ void mt76_scan_rx_beacon(struct mt76_dev *dev, struct ieee80211_channel *chan) out: spin_unlock(&dev->scan_lock); } -EXPORT_SYMBOL_GPL(mt76_scan_rx_beacon); void mt76_scan_work(struct work_struct *work) { From e6f48512c1ceebcd1ce6bb83df3b3d56a261507d Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Tue, 10 Mar 2026 19:28:24 -0500 Subject: [PATCH 0828/1561] wifi: mt76: mt792x: describe USB WFSYS reset with a descriptor Prepare mt792xu_wfsys_reset() for chips that share the same USB WFSYS reset flow but use different register definitions. This is a pure refactor of the current mt7921u path and keeps the reset sequence unchanged. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260311002825.15502-1-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt792x_usb.c | 40 +++++++++++++++---- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_usb.c b/drivers/net/wireless/mediatek/mt76/mt792x_usb.c index 552808458138..a92e872226cf 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_usb.c @@ -206,6 +206,24 @@ static void mt792xu_epctl_rst_opt(struct mt792x_dev *dev, bool reset) mt792xu_uhw_wr(&dev->mt76, MT_SSUSB_EPCTL_CSR_EP_RST_OPT, val); } +struct mt792xu_wfsys_desc { + u32 rst_reg; + u32 done_reg; + u32 done_mask; + u32 done_val; + u32 delay_ms; + bool need_status_sel; +}; + +static const struct mt792xu_wfsys_desc mt7921_wfsys_desc = { + .rst_reg = MT_CBTOP_RGU_WF_SUBSYS_RST, + .done_reg = MT_UDMA_CONN_INFRA_STATUS, + .done_mask = MT_UDMA_CONN_WFSYS_INIT_DONE, + .done_val = MT_UDMA_CONN_WFSYS_INIT_DONE, + .delay_ms = 0, + .need_status_sel = true, +}; + int mt792xu_dma_init(struct mt792x_dev *dev, bool resume) { int err; @@ -236,25 +254,31 @@ EXPORT_SYMBOL_GPL(mt792xu_dma_init); int mt792xu_wfsys_reset(struct mt792x_dev *dev) { + const struct mt792xu_wfsys_desc *desc = &mt7921_wfsys_desc; u32 val; int i; mt792xu_epctl_rst_opt(dev, false); - val = mt792xu_uhw_rr(&dev->mt76, MT_CBTOP_RGU_WF_SUBSYS_RST); + val = mt792xu_uhw_rr(&dev->mt76, desc->rst_reg); val |= MT_CBTOP_RGU_WF_SUBSYS_RST_WF_WHOLE_PATH; - mt792xu_uhw_wr(&dev->mt76, MT_CBTOP_RGU_WF_SUBSYS_RST, val); + mt792xu_uhw_wr(&dev->mt76, desc->rst_reg, val); - usleep_range(10, 20); + if (desc->delay_ms) + msleep(desc->delay_ms); + else + usleep_range(10, 20); - val = mt792xu_uhw_rr(&dev->mt76, MT_CBTOP_RGU_WF_SUBSYS_RST); + val = mt792xu_uhw_rr(&dev->mt76, desc->rst_reg); val &= ~MT_CBTOP_RGU_WF_SUBSYS_RST_WF_WHOLE_PATH; - mt792xu_uhw_wr(&dev->mt76, MT_CBTOP_RGU_WF_SUBSYS_RST, val); + mt792xu_uhw_wr(&dev->mt76, desc->rst_reg, val); + + if (desc->need_status_sel) + mt792xu_uhw_wr(&dev->mt76, MT_UDMA_CONN_INFRA_STATUS_SEL, 0); - mt792xu_uhw_wr(&dev->mt76, MT_UDMA_CONN_INFRA_STATUS_SEL, 0); for (i = 0; i < MT792x_WFSYS_INIT_RETRY_COUNT; i++) { - val = mt792xu_uhw_rr(&dev->mt76, MT_UDMA_CONN_INFRA_STATUS); - if (val & MT_UDMA_CONN_WFSYS_INIT_DONE) + val = mt792xu_uhw_rr(&dev->mt76, desc->done_reg); + if ((val & desc->done_mask) == desc->done_val) break; msleep(100); From 56154fef47d104effa9f29ed3db4f805cbc0d640 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Tue, 10 Mar 2026 19:28:25 -0500 Subject: [PATCH 0829/1561] wifi: mt76: mt792x: fix mt7925u USB WFSYS reset handling mt7925u uses different reset/status registers from mt7921u. Reusing the mt7921u register set causes the WFSYS reset to fail. Add a chip-specific descriptor in mt792xu_wfsys_reset() to select the correct registers and fix mt7925u failing to initialize after a warm reboot. Fixes: d28e1a48952e ("wifi: mt76: mt792x: introduce mt792x-usb module") Cc: stable@vger.kernel.org Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260311002825.15502-2-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt792x_regs.h | 4 ++++ drivers/net/wireless/mediatek/mt76/mt792x_usb.c | 13 ++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_regs.h b/drivers/net/wireless/mediatek/mt76/mt792x_regs.h index 7ddde9286861..d2a8b2b0df32 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_regs.h +++ b/drivers/net/wireless/mediatek/mt76/mt792x_regs.h @@ -392,6 +392,10 @@ #define MT_CBTOP_RGU_WF_SUBSYS_RST MT_CBTOP_RGU(0x600) #define MT_CBTOP_RGU_WF_SUBSYS_RST_WF_WHOLE_PATH BIT(0) +#define MT7925_CBTOP_RGU_WF_SUBSYS_RST 0x70028600 +#define MT7925_WFSYS_INIT_DONE_ADDR 0x184c1604 +#define MT7925_WFSYS_INIT_DONE 0x00001d1e + #define MT_HW_BOUND 0x70010020 #define MT_HW_CHIPID 0x70010200 #define MT_HW_REV 0x70010204 diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_usb.c b/drivers/net/wireless/mediatek/mt76/mt792x_usb.c index a92e872226cf..47827d1c5ccb 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_usb.c @@ -224,6 +224,15 @@ static const struct mt792xu_wfsys_desc mt7921_wfsys_desc = { .need_status_sel = true, }; +static const struct mt792xu_wfsys_desc mt7925_wfsys_desc = { + .rst_reg = MT7925_CBTOP_RGU_WF_SUBSYS_RST, + .done_reg = MT7925_WFSYS_INIT_DONE_ADDR, + .done_mask = U32_MAX, + .done_val = MT7925_WFSYS_INIT_DONE, + .delay_ms = 20, + .need_status_sel = false, +}; + int mt792xu_dma_init(struct mt792x_dev *dev, bool resume) { int err; @@ -254,7 +263,9 @@ EXPORT_SYMBOL_GPL(mt792xu_dma_init); int mt792xu_wfsys_reset(struct mt792x_dev *dev) { - const struct mt792xu_wfsys_desc *desc = &mt7921_wfsys_desc; + const struct mt792xu_wfsys_desc *desc = is_mt7925(&dev->mt76) ? + &mt7925_wfsys_desc : + &mt7921_wfsys_desc; u32 val; int i; From 73b46379e5231138025b271ce8e158d2a8aa0768 Mon Sep 17 00:00:00 2001 From: Peter Chiu Date: Thu, 12 Mar 2026 17:57:19 +0800 Subject: [PATCH 0830/1561] wifi: mt76: mt7996: fix RRO EMU configuration Use the correct helper to update specific bitfields instead of overwriting the entire register. Fixes: eedb427eb260 ("wifi: mt76: mt7996: Enable HW RRO for MT7992 chipset") Signed-off-by: Peter Chiu Signed-off-by: Shayne Chen Acked-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260312095724.2117448-1-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/init.c | 3 +-- drivers/net/wireless/mediatek/mt76/mt7996/mac.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/init.c b/drivers/net/wireless/mediatek/mt76/mt7996/init.c index 5aaa93767109..f3239f530aea 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/init.c @@ -873,8 +873,7 @@ void mt7996_rro_hw_init(struct mt7996_dev *dev) } } else { /* set emul 3.0 function */ - mt76_wr(dev, MT_RRO_3_0_EMU_CONF, - MT_RRO_3_0_EMU_CONF_EN_MASK); + mt76_set(dev, MT_RRO_3_0_EMU_CONF, MT_RRO_3_0_EMU_CONF_EN_MASK); mt76_wr(dev, MT_RRO_ADDR_ARRAY_BASE0, dev->wed_rro.addr_elem[0].phy_addr); diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index e8bf58dc50b1..c895e8a5de4d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -2577,7 +2577,7 @@ void mt7996_mac_reset_work(struct work_struct *work) mt7996_dma_start(dev, false, false); if (!is_mt7996(&dev->mt76) && dev->mt76.hwrro_mode == MT76_HWRRO_V3) - mt76_wr(dev, MT_RRO_3_0_EMU_CONF, MT_RRO_3_0_EMU_CONF_EN_MASK); + mt76_set(dev, MT_RRO_3_0_EMU_CONF, MT_RRO_3_0_EMU_CONF_EN_MASK); if (mtk_wed_device_active(&dev->mt76.mmio.wed)) { u32 wed_irq_mask = MT_INT_TX_DONE_BAND2 | From cf909557c1ba1215328db41f883ca5af5849f2fd Mon Sep 17 00:00:00 2001 From: Howard Hsu Date: Thu, 12 Mar 2026 17:57:20 +0800 Subject: [PATCH 0831/1561] wifi: mt76: mt7996: support critical packet mode for MT7990 chipsets For MT7990 chipsets, critical packet mode must be enabled. Without this, some higher priority packets may be placed in the wrong AC queue. Signed-off-by: Howard Hsu Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20260312095724.2117448-2-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 39e1999143da..834edd31458d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -56,7 +56,7 @@ static int mt7996_start(struct ieee80211_hw *hw) mutex_lock(&dev->mt76.mutex); ret = mt7996_mcu_set_hdr_trans(dev, true); - if (!ret && is_mt7992(&dev->mt76)) { + if (!ret && !is_mt7996(&dev->mt76)) { u8 queue = mt76_connac_lmac_mapping(IEEE80211_AC_VI); ret = mt7996_mcu_cp_support(dev, queue); From 22f9abaf3656cf08d36196bab950668e7fc64381 Mon Sep 17 00:00:00 2001 From: Peter Chiu Date: Thu, 12 Mar 2026 17:57:21 +0800 Subject: [PATCH 0832/1561] wifi: mt76: mt7996: update WFSYS reset flow for MT7990 chipsets Skip WFSYS reset during bootup for MT7990 chipsets; only reset if L0.5 recovery is triggered. Without this fix, the following kernel error may occur: Internal error: synchronous external abort. Signed-off-by: Peter Chiu Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20260312095724.2117448-3-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7996/init.c | 29 +++++++++++++++++-- .../net/wireless/mediatek/mt76/mt7996/regs.h | 8 +++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/init.c b/drivers/net/wireless/mediatek/mt76/mt7996/init.c index f3239f530aea..8dfb81eabc9a 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/init.c @@ -791,11 +791,34 @@ static void mt7996_init_work(struct work_struct *work) void mt7996_wfsys_reset(struct mt7996_dev *dev) { - mt76_set(dev, MT_WF_SUBSYS_RST, 0x1); + if (!is_mt7990(&dev->mt76)) { + mt76_set(dev, MT_WF_SUBSYS_RST, 0x1); + msleep(20); + + mt76_clear(dev, MT_WF_SUBSYS_RST, 0x1); + msleep(20); + + return; + } + + if (!dev->recovery.hw_full_reset) + return; + + mt76_set(dev, MT_WF_SUBSYS_RST, + MT_WF_SUBSYS_RST_WHOLE_PATH_RST_REVERT | + MT_WF_SUBSYS_RST_BYPASS_WFDMA_SLP_PROT | + MT_WF_SUBSYS_RST_BYPASS_WFDMA2_SLP_PROT); + mt76_rmw(dev, MT_WF_SUBSYS_RST, + MT_WF_SUBSYS_RST_WHOLE_PATH_RST_REVERT_CYCLE, + u32_encode_bits(0x20, MT_WF_SUBSYS_RST_WHOLE_PATH_RST_REVERT_CYCLE)); + mt76_clear(dev, MT_WF_L05_RST, MT_WF_L05_RST_WF_RST_MASK); + mt76_set(dev, MT_WF_SUBSYS_RST, MT_WF_SUBSYS_RST_WHOLE_PATH_RST); msleep(20); - mt76_clear(dev, MT_WF_SUBSYS_RST, 0x1); - msleep(20); + if (mt76_poll(dev, MT_WF_L05_RST, MT_WF_L05_RST_WF_RST_MASK, 0x1a, 1000)) + return; + + dev_err(dev->mt76.dev, "wfsys reset fail\n"); } static void mt7996_rro_hw_init_v3(struct mt7996_dev *dev) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/regs.h b/drivers/net/wireless/mediatek/mt76/mt7996/regs.h index 393faae2d52b..c6379933b6c3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/regs.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/regs.h @@ -736,7 +736,15 @@ enum offs_rev { #define MT_HW_REV 0x70010204 #define MT_HW_REV1 0x8a00 +#define MT_WF_L05_RST 0x70028550 +#define MT_WF_L05_RST_WF_RST_MASK GENMASK(4, 0) + #define MT_WF_SUBSYS_RST 0x70028600 +#define MT_WF_SUBSYS_RST_WHOLE_PATH_RST BIT(0) +#define MT_WF_SUBSYS_RST_WHOLE_PATH_RST_REVERT BIT(5) +#define MT_WF_SUBSYS_RST_BYPASS_WFDMA_SLP_PROT BIT(6) +#define MT_WF_SUBSYS_RST_BYPASS_WFDMA2_SLP_PROT BIT(16) +#define MT_WF_SUBSYS_RST_WHOLE_PATH_RST_REVERT_CYCLE GENMASK(15, 8) /* PCIE MAC */ #define MT_PCIE_MAC_BASE 0x74030000 From 9eeea2984c309f4c21c697e163abbef00c4d2b5e Mon Sep 17 00:00:00 2001 From: Rex Lu Date: Thu, 12 Mar 2026 17:57:22 +0800 Subject: [PATCH 0833/1561] wifi: mt76: mt7996: adjust timeout value for boot-up calibration commands Align the vendor driver by adjusting the timeout values for the MCU_UNI_CMD_EFUSE_CTRL and MCU_UNI_CMD_EXT_EEPROM_CTRL commands. Without this adjustment, false positive command timeout errors may occur, especially on some iPA variants. Signed-off-by: Rex Lu Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20260312095724.2117448-4-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/mcu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index 2a50d0758d9c..4bf22318396f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -278,7 +278,8 @@ mt7996_mcu_set_timeout(struct mt76_dev *mdev, int cmd) mdev->mcu.timeout = 2 * HZ; return; case MCU_UNI_CMD_EFUSE_CTRL: - mdev->mcu.timeout = 20 * HZ; + case MCU_UNI_CMD_EXT_EEPROM_CTRL: + mdev->mcu.timeout = 30 * HZ; return; default: break; From fc4533b5db80792fccc2bf4a14322e7af1e55980 Mon Sep 17 00:00:00 2001 From: StanleyYP Wang Date: Thu, 12 Mar 2026 17:57:24 +0800 Subject: [PATCH 0834/1561] wifi: mt76: mt7996: fix issues with manually triggered radar detection Disallow triggering radar detection on non-DFS channels to prevent paused TX queues from failing to resume, as a channel switch is not performed in this case. Signed-off-by: StanleyYP Wang Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20260312095724.2117448-6-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau --- .../wireless/mediatek/mt76/mt7996/debugfs.c | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7996/debugfs.c index 6cc63f87b222..34af800964d1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/debugfs.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/debugfs.c @@ -226,14 +226,23 @@ mt7996_radar_trigger(void *data, u64 val) #define RADAR_BACKGROUND 2 struct mt7996_dev *dev = data; struct mt7996_phy *phy = mt7996_band_phy(dev, NL80211_BAND_5GHZ); - int rdd_idx; + struct cfg80211_chan_def *chandef; + int rdd_idx, ret; if (!phy || !val || val > RADAR_BACKGROUND) return -EINVAL; - if (val == RADAR_BACKGROUND && !dev->rdd2_phy) { - dev_err(dev->mt76.dev, "Background radar is not enabled\n"); - return -EINVAL; + if (test_bit(MT76_SCANNING, &phy->mt76->state)) + return -EBUSY; + + if (val == RADAR_BACKGROUND) { + if (!dev->rdd2_phy || !cfg80211_chandef_valid(&dev->rdd2_chandef)) { + dev_err(dev->mt76.dev, "Background radar is not enabled\n"); + return -EINVAL; + } + chandef = &dev->rdd2_chandef; + } else { + chandef = &phy->mt76->chandef; } rdd_idx = mt7996_get_rdd_idx(phy, val == RADAR_BACKGROUND); @@ -242,6 +251,11 @@ mt7996_radar_trigger(void *data, u64 val) return -EINVAL; } + ret = cfg80211_chandef_dfs_required(dev->mt76.hw->wiphy, chandef, + NL80211_IFTYPE_AP); + if (ret <= 0) + return ret; + return mt7996_mcu_rdd_cmd(dev, RDD_RADAR_EMULATE, rdd_idx, 0); } From a1353d994c167c818ef4165653a5ccec091ba534 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 6 Mar 2026 17:22:20 -0600 Subject: [PATCH 0835/1561] wifi: mt76: mt7925: pass mlink to sta_amsdu_tlv() Drop the mt792x_sta_to_link() lookup in mt7925_mcu_sta_amsdu_tlv() and pass mlink from the caller instead. The link context is already known so the lookup is redundant. This makes link ownership explicit and keeps the helper lookup-free. No functional change intended. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260306232238.2039675-2-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/mcu.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index abcdd0e0b3b5..fa5f79004a6e 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -1726,10 +1726,9 @@ mt7925_mcu_sta_vht_tlv(struct sk_buff *skb, struct ieee80211_link_sta *link_sta) static void mt7925_mcu_sta_amsdu_tlv(struct sk_buff *skb, struct ieee80211_vif *vif, - struct ieee80211_link_sta *link_sta) + struct ieee80211_link_sta *link_sta, + struct mt792x_link_sta *mlink) { - struct mt792x_sta *msta = (struct mt792x_sta *)link_sta->sta->drv_priv; - struct mt792x_link_sta *mlink; struct sta_rec_amsdu *amsdu; struct tlv *tlv; @@ -1745,7 +1744,6 @@ mt7925_mcu_sta_amsdu_tlv(struct sk_buff *skb, amsdu->max_amsdu_num = 8; amsdu->amsdu_en = true; - mlink = mt792x_sta_to_link(msta, link_sta->link_id); mlink->wcid.amsdu = true; switch (link_sta->agg.max_amsdu_len) { @@ -1966,6 +1964,7 @@ mt7925_mcu_sta_cmd(struct mt76_phy *phy, struct mt792x_vif *mvif = (struct mt792x_vif *)info->vif->drv_priv; struct mt76_dev *dev = phy->dev; struct mt792x_bss_conf *mconf; + struct mt792x_link_sta *mlink; struct sk_buff *skb; int conn_state; @@ -1980,6 +1979,8 @@ mt7925_mcu_sta_cmd(struct mt76_phy *phy, CONN_STATE_DISCONNECT; if (info->enable && info->link_sta) { + mlink = container_of(info->wcid, struct mt792x_link_sta, wcid); + mt76_connac_mcu_sta_basic_tlv(dev, skb, info->link_conf, info->link_sta, conn_state, info->newly); @@ -1987,7 +1988,7 @@ mt7925_mcu_sta_cmd(struct mt76_phy *phy, mt7925_mcu_sta_ht_tlv(skb, info->link_sta); mt7925_mcu_sta_vht_tlv(skb, info->link_sta); mt76_connac_mcu_sta_uapsd(skb, info->vif, info->link_sta->sta); - mt7925_mcu_sta_amsdu_tlv(skb, info->vif, info->link_sta); + mt7925_mcu_sta_amsdu_tlv(skb, info->vif, info->link_sta, mlink); mt7925_mcu_sta_he_tlv(skb, info->link_sta); mt7925_mcu_sta_he_6g_tlv(skb, info->link_sta); mt7925_mcu_sta_eht_tlv(skb, info->link_sta); From ea757740dd87c0b00c4844dd3282dff4d83fa3c7 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 6 Mar 2026 17:22:21 -0600 Subject: [PATCH 0836/1561] wifi: mt76: mt7925: pass WCID indices to bss_basic_tlv() Drop the mt792x_sta_to_link() lookup in mt7925_mcu_bss_basic_tlv() and pass the resolved WCID indices from the caller instead. The link context is already known, so the lookup is redundant. This makes link ownership explicit and keeps the helper lookup-free. No functional change intended. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260306232238.2039675-3-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7925/main.c | 34 ++++++---- .../net/wireless/mediatek/mt76/mt7925/mcu.c | 65 ++++++++++++------- .../net/wireless/mediatek/mt76/mt7925/mcu.h | 7 ++ 3 files changed, 70 insertions(+), 36 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index afcc0fa4aa35..353461f0e169 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -850,20 +850,22 @@ mt7925_get_rates_table(struct ieee80211_hw *hw, struct ieee80211_vif *vif, static int mt7925_mac_link_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif, - struct ieee80211_link_sta *link_sta) + struct ieee80211_link_sta *link_sta, + struct mt792x_link_sta *mlink) { struct mt792x_dev *dev = container_of(mdev, struct mt792x_dev, mt76); struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv; struct ieee80211_bss_conf *link_conf; struct mt792x_bss_conf *mconf; u8 link_id = link_sta->link_id; - struct mt792x_link_sta *mlink; struct mt792x_sta *msta; struct mt76_wcid *wcid; int ret, idx; msta = (struct mt792x_sta *)link_sta->sta->drv_priv; - mlink = mt792x_sta_to_link(msta, link_id); + + if (WARN_ON_ONCE(!mlink)) + return -EINVAL; idx = mt76_wcid_alloc(dev->mt76.wcid_mask, MT792x_WTBL_STA - 1); if (idx < 0) @@ -898,12 +900,21 @@ static int mt7925_mac_link_sta_add(struct mt76_dev *mdev, /* should update bss info before STA add */ if (vif->type == NL80211_IFTYPE_STATION && !link_sta->sta->tdls) { - if (ieee80211_vif_is_mld(vif)) - mt7925_mcu_add_bss_info(&dev->phy, mconf->mt76.ctx, - link_conf, link_sta, link_sta != mlink->pri_link); - else - mt7925_mcu_add_bss_info(&dev->phy, mconf->mt76.ctx, - link_conf, link_sta, false); + struct mt792x_link_sta *mlink_bc; + + mlink_bc = mt792x_sta_to_link(&mvif->sta, mconf->link_id); + + if (ieee80211_vif_is_mld(vif)) { + mt7925_mcu_add_bss_info_sta(&dev->phy, mconf->mt76.ctx, + link_conf, link_sta, + mlink_bc->wcid.idx, mlink->wcid.idx, + link_sta != mlink->pri_link); + } else { + mt7925_mcu_add_bss_info_sta(&dev->phy, mconf->mt76.ctx, + link_conf, link_sta, + mlink_bc->wcid.idx, mlink->wcid.idx, + false); + } } if (ieee80211_vif_is_mld(vif) && @@ -965,7 +976,7 @@ mt7925_mac_sta_add_links(struct mt792x_dev *dev, struct ieee80211_vif *vif, mlink->wcid.def_wcid = &msta->deflink.wcid; link_sta = mt792x_sta_to_link_sta(vif, sta, link_id); - mt7925_mac_link_sta_add(&dev->mt76, vif, link_sta); + mt7925_mac_link_sta_add(&dev->mt76, vif, link_sta, mlink); } return err; @@ -989,7 +1000,8 @@ int mt7925_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif, err = mt7925_mac_sta_add_links(dev, vif, sta, sta->valid_links); } else { - err = mt7925_mac_link_sta_add(mdev, vif, &sta->deflink); + err = mt7925_mac_link_sta_add(mdev, vif, &sta->deflink, + &msta->deflink); } return err; diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index fa5f79004a6e..76bcfaf8ebfa 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -2476,7 +2476,9 @@ mt7925_mcu_bss_basic_tlv(struct sk_buff *skb, struct ieee80211_bss_conf *link_conf, struct ieee80211_link_sta *link_sta, struct ieee80211_chanctx_conf *ctx, - struct mt76_phy *phy, u16 wlan_idx, + struct mt76_phy *phy, + u16 bmc_tx_wlan_idx, + u16 sta_wlan_idx, bool enable) { struct ieee80211_vif *vif = link_conf->vif; @@ -2485,7 +2487,6 @@ mt7925_mcu_bss_basic_tlv(struct sk_buff *skb, &link_conf->chanreq.oper; enum nl80211_band band = chandef->chan->band; struct mt76_connac_bss_basic_tlv *basic_req; - struct mt792x_link_sta *mlink; struct tlv *tlv; int conn_type; u8 idx; @@ -2509,20 +2510,9 @@ mt7925_mcu_bss_basic_tlv(struct sk_buff *skb, basic_req->phymode = mt76_connac_get_phy_mode(phy, vif, band, link_sta); basic_req->bcn_interval = cpu_to_le16(link_conf->beacon_int); basic_req->dtim_period = link_conf->dtim_period; - basic_req->bmc_tx_wlan_idx = cpu_to_le16(wlan_idx); + basic_req->bmc_tx_wlan_idx = cpu_to_le16(bmc_tx_wlan_idx); basic_req->link_idx = mconf->mt76.idx; - - if (link_sta) { - struct mt792x_sta *msta; - - msta = (struct mt792x_sta *)link_sta->sta->drv_priv; - mlink = mt792x_sta_to_link(msta, link_sta->link_id); - - } else { - mlink = &mconf->vif->sta.deflink; - } - - basic_req->sta_idx = cpu_to_le16(mlink->wcid.idx); + basic_req->sta_idx = cpu_to_le16(sta_wlan_idx); basic_req->omac_idx = mconf->mt76.omac_idx; basic_req->band_idx = mconf->mt76.band_idx; basic_req->wmm_idx = mconf->mt76.wmm_idx; @@ -2829,16 +2819,16 @@ void mt7925_mcu_del_dev(struct mt76_dev *mdev, &dev_req, sizeof(dev_req), true); } -int mt7925_mcu_add_bss_info(struct mt792x_phy *phy, - struct ieee80211_chanctx_conf *ctx, - struct ieee80211_bss_conf *link_conf, - struct ieee80211_link_sta *link_sta, - int enable) +int mt7925_mcu_add_bss_info_sta(struct mt792x_phy *phy, + struct ieee80211_chanctx_conf *ctx, + struct ieee80211_bss_conf *link_conf, + struct ieee80211_link_sta *link_sta, + u16 bmc_tx_wlan_idx, + u16 sta_wlan_idx, + int enable) { - struct mt792x_vif *mvif = (struct mt792x_vif *)link_conf->vif->drv_priv; struct mt792x_bss_conf *mconf = mt792x_link_conf_to_mconf(link_conf); struct mt792x_dev *dev = phy->dev; - struct mt792x_link_sta *mlink_bc; struct sk_buff *skb; skb = __mt7925_mcu_alloc_bss_req(&dev->mt76, &mconf->mt76, @@ -2846,11 +2836,9 @@ int mt7925_mcu_add_bss_info(struct mt792x_phy *phy, if (IS_ERR(skb)) return PTR_ERR(skb); - mlink_bc = mt792x_sta_to_link(&mvif->sta, mconf->link_id); - /* bss_basic must be first */ mt7925_mcu_bss_basic_tlv(skb, link_conf, link_sta, ctx, phy->mt76, - mlink_bc->wcid.idx, enable); + bmc_tx_wlan_idx, sta_wlan_idx, enable); mt7925_mcu_bss_sec_tlv(skb, link_conf); mt7925_mcu_bss_bmc_tlv(skb, phy, ctx, link_conf); mt7925_mcu_bss_qos_tlv(skb, link_conf); @@ -2871,6 +2859,33 @@ int mt7925_mcu_add_bss_info(struct mt792x_phy *phy, MCU_UNI_CMD(BSS_INFO_UPDATE), true); } +int mt7925_mcu_add_bss_info(struct mt792x_phy *phy, + struct ieee80211_chanctx_conf *ctx, + struct ieee80211_bss_conf *link_conf, + struct ieee80211_link_sta *link_sta, + int enable) +{ + struct mt792x_vif *mvif = (struct mt792x_vif *)link_conf->vif->drv_priv; + struct mt792x_bss_conf *mconf = mt792x_link_conf_to_mconf(link_conf); + struct mt792x_link_sta *mlink_bc; + struct mt792x_link_sta *mlink; + + mlink_bc = mt792x_sta_to_link(&mvif->sta, mconf->link_id); + + if (link_sta) { + struct mt792x_sta *msta = (void *)link_sta->sta->drv_priv; + + mlink = mt792x_sta_to_link(msta, link_sta->link_id); + if (WARN_ON(!mlink)) + return -EINVAL; + } else { + mlink = &mconf->vif->sta.deflink; + } + + return mt7925_mcu_add_bss_info_sta(phy, ctx, link_conf, link_sta, + mlink_bc->wcid.idx, mlink->wcid.idx, enable); +} + int mt7925_mcu_set_dbdc(struct mt76_phy *phy, bool enable) { struct mt76_dev *mdev = phy->dev; diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h index e09e0600534a..56e2772f3ffe 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h @@ -693,6 +693,13 @@ int mt7925_mcu_add_bss_info(struct mt792x_phy *phy, struct ieee80211_bss_conf *link_conf, struct ieee80211_link_sta *link_sta, int enable); +int mt7925_mcu_add_bss_info_sta(struct mt792x_phy *phy, + struct ieee80211_chanctx_conf *ctx, + struct ieee80211_bss_conf *link_conf, + struct ieee80211_link_sta *link_sta, + u16 bmc_tx_wlan_idx, + u16 sta_wlan_idx, + int enable); int mt7925_mcu_set_timing(struct mt792x_phy *phy, struct ieee80211_bss_conf *link_conf); int mt7925_mcu_set_deep_sleep(struct mt792x_dev *dev, bool enable); From ff643b81bc38eaff6c0ab783a62e4ba9e10d2476 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 6 Mar 2026 17:22:22 -0600 Subject: [PATCH 0837/1561] wifi: mt76: mt7925: pass mlink and mconf to sta_mld_tlv() Drop the mt792x_sta_to_link() lookup in mt7925_mcu_sta_mld_tlv() and pass mlink and mconf from the caller instead. The link context is already known at the call site, making the lookup redundant. This keeps the helper lookup-free and makes MLD link selection explicit. No functional change intended. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260306232238.2039675-4-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7925/mcu.c | 53 +++++++++++++------ 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index 76bcfaf8ebfa..1e46adf7ddd9 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -1914,36 +1914,53 @@ mt7925_mcu_sta_eht_mld_tlv(struct sk_buff *skb, static void mt7925_mcu_sta_mld_tlv(struct sk_buff *skb, - struct ieee80211_vif *vif, struct ieee80211_sta *sta) + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct mt792x_bss_conf *mconf, + struct mt792x_link_sta *mlink) { struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv; struct mt792x_sta *msta = (struct mt792x_sta *)sta->drv_priv; - unsigned long valid = mvif->valid_links; - struct mt792x_bss_conf *mconf; - struct mt792x_link_sta *mlink; + struct mt792x_dev *dev = mvif->phy->dev; + struct mt792x_bss_conf *mconf_pri; struct sta_rec_mld *mld; struct tlv *tlv; - int i, cnt = 0; + u8 cnt = 0; + + /* Primary link always uses driver's deflink WCID. */ + mconf_pri = (msta->deflink_id != IEEE80211_LINK_UNSPECIFIED) ? + mt792x_vif_to_link(mvif, msta->deflink_id) : NULL; + + /* If caller is operating on deflink, reuse its mconf as primary. */ + if (!mconf_pri && mlink == &msta->deflink) + mconf_pri = mconf; + + if (!mconf_pri) { + dev_warn_ratelimited(dev->mt76.dev, + "mt7925: MLD_TLV_LINK skip (no primary mconf) sta=%pM\n", + sta->addr); + return; + } tlv = mt76_connac_mcu_add_tlv(skb, STA_REC_MLD, sizeof(*mld)); mld = (struct sta_rec_mld *)tlv; memcpy(mld->mac_addr, sta->addr, ETH_ALEN); + mld->primary_id = cpu_to_le16(msta->deflink.wcid.idx); mld->wlan_id = cpu_to_le16(msta->deflink.wcid.idx); - mld->link_num = min_t(u8, hweight16(mvif->valid_links), 2); - for_each_set_bit(i, &valid, IEEE80211_MLD_MAX_NUM_LINKS) { - if (cnt == mld->link_num) - break; + /* Always encode primary link first. */ + mld->link[cnt].wlan_id = cpu_to_le16(msta->deflink.wcid.idx); + mld->link[cnt++].bss_idx = mconf_pri->mt76.idx; - mconf = mt792x_vif_to_link(mvif, i); - mlink = mt792x_sta_to_link(msta, i); + /* Optionally encode the currently-updated secondary link. */ + if (mlink && mlink != &msta->deflink && mconf) { + mld->secondary_id = cpu_to_le16(mlink->wcid.idx); mld->link[cnt].wlan_id = cpu_to_le16(mlink->wcid.idx); mld->link[cnt++].bss_idx = mconf->mt76.idx; - - if (mlink != &msta->deflink) - mld->secondary_id = cpu_to_le16(mlink->wcid.idx); } + + mld->link_num = cnt; } static void @@ -1969,6 +1986,7 @@ mt7925_mcu_sta_cmd(struct mt76_phy *phy, int conn_state; mconf = mt792x_vif_to_link(mvif, info->wcid->link_id); + mlink = container_of(info->wcid, struct mt792x_link_sta, wcid); skb = __mt76_connac_mcu_alloc_sta_req(dev, &mconf->mt76, info->wcid, MT7925_STA_UPDATE_MAX_SIZE); @@ -1979,8 +1997,6 @@ mt7925_mcu_sta_cmd(struct mt76_phy *phy, CONN_STATE_DISCONNECT; if (info->enable && info->link_sta) { - mlink = container_of(info->wcid, struct mt792x_link_sta, wcid); - mt76_connac_mcu_sta_basic_tlv(dev, skb, info->link_conf, info->link_sta, conn_state, info->newly); @@ -1999,7 +2015,10 @@ mt7925_mcu_sta_cmd(struct mt76_phy *phy, info->state); if (info->state != MT76_STA_INFO_STATE_NONE) { - mt7925_mcu_sta_mld_tlv(skb, info->vif, info->link_sta->sta); + mt7925_mcu_sta_mld_tlv(skb, info->vif, + info->link_sta->sta, + mconf, mlink); + mt7925_mcu_sta_eht_mld_tlv(skb, info->vif, info->link_sta->sta); } } From dc019e3294c7e3c6e997bb11732d46ce0d9211e9 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 6 Mar 2026 17:22:23 -0600 Subject: [PATCH 0838/1561] wifi: mt76: mt7925: pass mlink to mcu_sta_update() Drop the mt792x_sta_to_link() lookup in mt7925_mcu_sta_update() and pass the resolved mlink from the caller instead. The link context is already known at the call site, making the lookup redundant. This keeps the helper lookup-free and makes WCID selection explicit. No functional change intended. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260306232238.2039675-5-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7925/mac.c | 3 ++- .../net/wireless/mediatek/mt76/mt7925/main.c | 27 +++++++++++++------ .../net/wireless/mediatek/mt76/mt7925/mcu.c | 12 +++------ .../wireless/mediatek/mt76/mt7925/mt7925.h | 4 ++- 4 files changed, 28 insertions(+), 18 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c index 0bafa8e770a6..c47bd812b66b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c @@ -1285,7 +1285,8 @@ mt7925_vif_connect_iter(void *priv, u8 *mac, if (vif->type == NL80211_IFTYPE_AP) { mt76_connac_mcu_uni_add_bss(dev->phy.mt76, vif, &mvif->sta.deflink.wcid, true, NULL); - mt7925_mcu_sta_update(dev, NULL, vif, true, + mt7925_mcu_sta_update(dev, NULL, vif, + &mvif->sta.deflink, true, MT76_STA_INFO_STATE_NONE); mt7925_mcu_uni_add_beacon_offload(dev, hw, vif, true); } diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index 353461f0e169..c65e32a14c01 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -919,23 +919,31 @@ static int mt7925_mac_link_sta_add(struct mt76_dev *mdev, if (ieee80211_vif_is_mld(vif) && link_sta == mlink->pri_link) { - ret = mt7925_mcu_sta_update(dev, link_sta, vif, true, + ret = mt7925_mcu_sta_update(dev, link_sta, vif, + mlink, true, MT76_STA_INFO_STATE_NONE); if (ret) return ret; } else if (ieee80211_vif_is_mld(vif) && link_sta != mlink->pri_link) { + struct mt792x_link_sta *pri_mlink; + + pri_mlink = mt792x_sta_to_link(msta, mlink->pri_link->link_id); + ret = mt7925_mcu_sta_update(dev, mlink->pri_link, vif, - true, MT76_STA_INFO_STATE_ASSOC); + pri_mlink, true, + MT76_STA_INFO_STATE_ASSOC); if (ret) return ret; - ret = mt7925_mcu_sta_update(dev, link_sta, vif, true, + ret = mt7925_mcu_sta_update(dev, link_sta, vif, + mlink, true, MT76_STA_INFO_STATE_ASSOC); if (ret) return ret; } else { - ret = mt7925_mcu_sta_update(dev, link_sta, vif, true, + ret = mt7925_mcu_sta_update(dev, link_sta, vif, + mlink, true, MT76_STA_INFO_STATE_NONE); if (ret) return ret; @@ -1075,7 +1083,8 @@ static void mt7925_mac_link_sta_assoc(struct mt76_dev *mdev, MT_WTBL_UPDATE_ADM_COUNT_CLEAR); memset(mlink->airtime_ac, 0, sizeof(mlink->airtime_ac)); - mt7925_mcu_sta_update(dev, link_sta, vif, true, MT76_STA_INFO_STATE_ASSOC); + mt7925_mcu_sta_update(dev, link_sta, vif, mlink, true, + MT76_STA_INFO_STATE_ASSOC); mt792x_mutex_release(dev); } @@ -1119,7 +1128,7 @@ static void mt7925_mac_link_sta_remove(struct mt76_dev *mdev, mt76_connac_free_pending_tx_skbs(&dev->pm, &mlink->wcid); mt76_connac_pm_wake(&dev->mphy, &dev->pm); - mt7925_mcu_sta_update(dev, link_sta, vif, false, + mt7925_mcu_sta_update(dev, link_sta, vif, mlink, false, MT76_STA_INFO_STATE_NONE); mt7925_mac_wtbl_update(dev, mlink->wcid.idx, MT_WTBL_UPDATE_ADM_COUNT_CLEAR); @@ -1744,7 +1753,8 @@ mt7925_start_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif, if (err) goto out; - err = mt7925_mcu_sta_update(dev, NULL, vif, true, + err = mt7925_mcu_sta_update(dev, NULL, vif, + &mvif->sta.deflink, true, MT76_STA_INFO_STATE_NONE); out: mt792x_mutex_release(dev); @@ -1887,7 +1897,8 @@ static void mt7925_vif_cfg_changed(struct ieee80211_hw *hw, mt792x_mutex_acquire(dev); if (changed & BSS_CHANGED_ASSOC) { - mt7925_mcu_sta_update(dev, NULL, vif, true, + mt7925_mcu_sta_update(dev, NULL, vif, + &mvif->sta.deflink, true, MT76_STA_INFO_STATE_ASSOC); mt7925_mcu_set_beacon_filter(dev, vif, vif->cfg.assoc); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index 1e46adf7ddd9..c97f5917c854 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -2036,7 +2036,9 @@ mt7925_mcu_sta_cmd(struct mt76_phy *phy, int mt7925_mcu_sta_update(struct mt792x_dev *dev, struct ieee80211_link_sta *link_sta, - struct ieee80211_vif *vif, bool enable, + struct ieee80211_vif *vif, + struct mt792x_link_sta *mlink, + bool enable, enum mt76_sta_info_state state) { struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv; @@ -2051,14 +2053,8 @@ int mt7925_mcu_sta_update(struct mt792x_dev *dev, .offload_fw = true, .rcpi = to_rcpi(rssi), }; - struct mt792x_sta *msta; - struct mt792x_link_sta *mlink; - if (link_sta) { - msta = (struct mt792x_sta *)link_sta->sta->drv_priv; - mlink = mt792x_sta_to_link(msta, link_sta->link_id); - } - info.wcid = link_sta ? &mlink->wcid : &mvif->sta.deflink.wcid; + info.wcid = &mlink->wcid; info.newly = state != MT76_STA_INFO_STATE_ASSOC; return mt7925_mcu_sta_cmd(&dev->mphy, &info); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h b/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h index 0f0eff748bb7..95f29dae4d9d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h @@ -250,7 +250,9 @@ int mt7925_mcu_set_bss_pm(struct mt792x_dev *dev, bool enable); int mt7925_mcu_sta_update(struct mt792x_dev *dev, struct ieee80211_link_sta *link_sta, - struct ieee80211_vif *vif, bool enable, + struct ieee80211_vif *vif, + struct mt792x_link_sta *mlink, + bool enable, enum mt76_sta_info_state state); int mt7925_mcu_set_chan_info(struct mt792x_phy *phy, u16 tag); int mt7925_mcu_set_tx(struct mt792x_dev *dev, struct ieee80211_bss_conf *bss_conf); From ecc57c9899e60456015fb355bfcf7650af7d13c1 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 6 Mar 2026 17:22:24 -0600 Subject: [PATCH 0839/1561] wifi: mt76: mt7925: resolve primary mlink via def_wcid Use mlink->wcid.def_wcid to obtain the primary mlink in mt7925_mac_link_sta_add() instead of calling mt792x_sta_to_link(). The primary link context is already carried by the WCID, so the extra lookup is redundant. This makes the add path follow the existing WCID association directly. No functional change intended. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260306232238.2039675-6-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/main.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index c65e32a14c01..d7a09a0a79fb 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -927,8 +927,17 @@ static int mt7925_mac_link_sta_add(struct mt76_dev *mdev, } else if (ieee80211_vif_is_mld(vif) && link_sta != mlink->pri_link) { struct mt792x_link_sta *pri_mlink; + struct mt76_wcid *pri_wcid; - pri_mlink = mt792x_sta_to_link(msta, mlink->pri_link->link_id); + /* alternative lookup via def_wcid */ + pri_wcid = mlink->wcid.def_wcid; + + pri_mlink = pri_wcid ? + container_of(pri_wcid, struct mt792x_link_sta, wcid) : + NULL; + + if (WARN_ON_ONCE(!pri_mlink)) + return -EINVAL; ret = mt7925_mcu_sta_update(dev, mlink->pri_link, vif, pri_mlink, true, From 9e4d518a4707175e1154876b760d4f2b39967e9d Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 6 Mar 2026 17:22:25 -0600 Subject: [PATCH 0840/1561] wifi: mt76: mt7925: pass mlink to mac_link_sta_remove() Drop the mt792x_sta_to_link() lookup in mt7925_mac_link_sta_remove() and pass mlink from mt7925_mac_sta_remove_links() instead. The link is already resolved there, making the extra lookup redundant. This keeps the remove helper lookup-free and avoids hidden dependence on msta->link[link_id] during teardown. No functional change intended. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260306232238.2039675-7-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/main.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index d7a09a0a79fb..ddff6c5ab876 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -1121,16 +1121,12 @@ EXPORT_SYMBOL_GPL(mt7925_mac_sta_event); static void mt7925_mac_link_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif, - struct ieee80211_link_sta *link_sta) + struct ieee80211_link_sta *link_sta, + struct mt792x_link_sta *mlink) { struct mt792x_dev *dev = container_of(mdev, struct mt792x_dev, mt76); struct ieee80211_bss_conf *link_conf; u8 link_id = link_sta->link_id; - struct mt792x_link_sta *mlink; - struct mt792x_sta *msta; - - msta = (struct mt792x_sta *)link_sta->sta->drv_priv; - mlink = mt792x_sta_to_link(msta, link_id); mt7925_roc_abort_sync(dev); @@ -1213,7 +1209,7 @@ mt7925_mac_sta_remove_links(struct mt792x_dev *dev, struct ieee80211_vif *vif, if (!mlink) continue; - mt7925_mac_link_sta_remove(&dev->mt76, vif, link_sta); + mt7925_mac_link_sta_remove(&dev->mt76, vif, link_sta, mlink); wcid = &mlink->wcid; rcu_assign_pointer(msta->link[link_id], NULL); From 8951131c18979b9d40d0f8a164be0432e8d1083b Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 6 Mar 2026 17:22:26 -0600 Subject: [PATCH 0841/1561] wifi: mt76: mt7925: pass mlink to sta_hdr_trans_tlv() Drop the mt792x_sta_to_link() lookup in mt7925_mcu_sta_hdr_trans_tlv() and pass the resolved mlink from the caller instead. The link is already known at the call site, making the lookup redundant. This keeps the helper lookup-free and makes WCID selection explicit. No functional change intended. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260306232238.2039675-8-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7925/mcu.c | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index c97f5917c854..476aec8337a9 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -1066,9 +1066,9 @@ EXPORT_SYMBOL_GPL(mt7925_run_firmware); static void mt7925_mcu_sta_hdr_trans_tlv(struct sk_buff *skb, struct ieee80211_vif *vif, - struct ieee80211_link_sta *link_sta) + struct ieee80211_link_sta *link_sta, + struct mt792x_link_sta *mlink) { - struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv; struct sta_rec_hdr_trans *hdr_trans; struct mt76_wcid *wcid; struct tlv *tlv; @@ -1082,15 +1082,7 @@ mt7925_mcu_sta_hdr_trans_tlv(struct sk_buff *skb, else hdr_trans->from_ds = true; - if (link_sta) { - struct mt792x_sta *msta = (struct mt792x_sta *)link_sta->sta->drv_priv; - struct mt792x_link_sta *mlink; - - mlink = mt792x_sta_to_link(msta, link_sta->link_id); - wcid = &mlink->wcid; - } else { - wcid = &mvif->sta.deflink.wcid; - } + wcid = &mlink->wcid; if (!wcid) return; @@ -1127,7 +1119,10 @@ int mt7925_mcu_wtbl_update_hdr_trans(struct mt792x_dev *dev, return PTR_ERR(skb); /* starec hdr trans */ - mt7925_mcu_sta_hdr_trans_tlv(skb, vif, link_sta); + if (!link_sta) + mlink = &mvif->sta.deflink; + + mt7925_mcu_sta_hdr_trans_tlv(skb, vif, link_sta, mlink); return mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_WMWA_UNI_CMD(STA_REC_UPDATE), true); } @@ -2028,7 +2023,10 @@ mt7925_mcu_sta_cmd(struct mt76_phy *phy, mt76_connac_mcu_add_tlv(skb, STA_REC_MLD_OFF, sizeof(struct tlv)); } else { - mt7925_mcu_sta_hdr_trans_tlv(skb, info->vif, info->link_sta); + if (!info->link_sta) + mlink = &mvif->sta.deflink; + + mt7925_mcu_sta_hdr_trans_tlv(skb, info->vif, info->link_sta, mlink); } return mt76_mcu_skb_send_msg(dev, skb, info->cmd, true); From 292651cafac012ebad3d9d68b38f69117da4685d Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 6 Mar 2026 17:22:27 -0600 Subject: [PATCH 0842/1561] wifi: mt76: mt7925: validate mlink in sta_hdr_trans_tlv() Replace the dead wcid NULL check in mt7925_mcu_sta_hdr_trans_tlv() with a WARN_ON_ONCE() guard on mlink before dereferencing mlink->wcid. wcid is always derived from mlink, so mlink is the only meaningful object to validate here. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260306232238.2039675-9-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/mcu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index 476aec8337a9..ba471341e8d0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -1082,11 +1082,11 @@ mt7925_mcu_sta_hdr_trans_tlv(struct sk_buff *skb, else hdr_trans->from_ds = true; - wcid = &mlink->wcid; - - if (!wcid) + if (WARN_ON_ONCE(!mlink)) return; + wcid = &mlink->wcid; + hdr_trans->dis_rx_hdr_tran = !test_bit(MT_WCID_FLAG_HDR_TRANS, &wcid->flags); if (test_bit(MT_WCID_FLAG_4ADDR, &wcid->flags)) { hdr_trans->to_ds = true; From 46a2264681500f3fff9ebf7ad64f5704d2b568bb Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 6 Mar 2026 17:22:28 -0600 Subject: [PATCH 0843/1561] wifi: mt76: mt7925: pass mlink to wtbl_update_hdr_trans() Drop the mt792x_sta_to_link() lookup in mt7925_mcu_wtbl_update_hdr_trans() and pass the resolved mlink from the caller instead. The link context is already known at the call site, making the lookup redundant. This keeps the helper lookup-free and makes link ownership explicit. No functional change intended. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260306232238.2039675-10-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/main.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7925/mcu.c | 8 ++------ drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h | 1 + 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index ddff6c5ab876..f5dd91dacca5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -1598,7 +1598,7 @@ static void mt7925_sta_set_decap_offload(struct ieee80211_hw *hw, if (!mlink->wcid.sta) continue; - mt7925_mcu_wtbl_update_hdr_trans(dev, vif, sta, i); + mt7925_mcu_wtbl_update_hdr_trans(dev, vif, sta, mlink, i); } mt792x_mutex_release(dev); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index ba471341e8d0..04650e201071 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -1097,18 +1097,14 @@ mt7925_mcu_sta_hdr_trans_tlv(struct sk_buff *skb, int mt7925_mcu_wtbl_update_hdr_trans(struct mt792x_dev *dev, struct ieee80211_vif *vif, struct ieee80211_sta *sta, + struct mt792x_link_sta *mlink, int link_id) { - struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv; struct ieee80211_link_sta *link_sta = sta ? &sta->deflink : NULL; - struct mt792x_link_sta *mlink; + struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv; struct mt792x_bss_conf *mconf; - struct mt792x_sta *msta; struct sk_buff *skb; - msta = sta ? (struct mt792x_sta *)sta->drv_priv : &mvif->sta; - - mlink = mt792x_sta_to_link(msta, link_id); link_sta = mt792x_sta_to_link_sta(vif, sta, link_id); mconf = mt792x_vif_to_link(mvif, link_id); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h b/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h index 95f29dae4d9d..e28972f0615b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h @@ -371,6 +371,7 @@ int mt7925_mcu_set_rts_thresh(struct mt792x_phy *phy, u32 val); int mt7925_mcu_wtbl_update_hdr_trans(struct mt792x_dev *dev, struct ieee80211_vif *vif, struct ieee80211_sta *sta, + struct mt792x_link_sta *mlink, int link_id); int mt7925_mcu_wf_rf_pin_ctrl(struct mt792x_phy *phy); From cf9db836b1e069a3d6a80c72b9bdc12df78b0dd1 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 6 Mar 2026 17:22:29 -0600 Subject: [PATCH 0844/1561] wifi: mt76: mt7925: pass mlink to set_link_key() Drop the mt792x_sta_to_link() lookup in mt7925_set_link_key() and pass the resolved mlink from the caller instead. The link context is already known at the call site, making the lookup redundant. This keeps the helper lookup-free and makes link ownership explicit. No functional change intended. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260306232238.2039675-11-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/main.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index f5dd91dacca5..68f168a093f2 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -594,7 +594,8 @@ static int mt7925_cancel_remain_on_channel(struct ieee80211_hw *hw, static int mt7925_set_link_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, struct ieee80211_vif *vif, struct ieee80211_sta *sta, - struct ieee80211_key_conf *key, int link_id) + struct ieee80211_key_conf *key, int link_id, + struct mt792x_link_sta *mlink) { struct mt792x_dev *dev = mt792x_hw_dev(hw); struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv; @@ -603,7 +604,6 @@ static int mt7925_set_link_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, struct ieee80211_bss_conf *link_conf; struct ieee80211_link_sta *link_sta; int idx = key->keyidx, err = 0; - struct mt792x_link_sta *mlink; struct mt792x_bss_conf *mconf; struct mt76_wcid *wcid; u8 *wcid_keyidx; @@ -611,7 +611,6 @@ static int mt7925_set_link_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, link_conf = mt792x_vif_to_bss_conf(vif, link_id); link_sta = sta ? mt792x_sta_to_link_sta(vif, sta, link_id) : NULL; mconf = mt792x_vif_to_link(mvif, link_id); - mlink = mt792x_sta_to_link(msta, link_id); wcid = &mlink->wcid; wcid_keyidx = &wcid->hw_key_idx; @@ -679,6 +678,7 @@ static int mt7925_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv; struct mt792x_sta *msta = sta ? (struct mt792x_sta *)sta->drv_priv : &mvif->sta; + struct mt792x_link_sta *mlink; int err; /* The hardware does not support per-STA RX GTK, fallback @@ -700,12 +700,16 @@ static int mt7925_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, add = key->link_id != -1 ? BIT(key->link_id) : msta->valid_links; for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) { - err = mt7925_set_link_key(hw, cmd, vif, sta, key, link_id); + mlink = mt792x_sta_to_link(msta, link_id); + err = mt7925_set_link_key(hw, cmd, vif, sta, key, link_id, + mlink); if (err < 0) break; } } else { - err = mt7925_set_link_key(hw, cmd, vif, sta, key, vif->bss_conf.link_id); + mlink = mt792x_sta_to_link(msta, vif->bss_conf.link_id); + err = mt7925_set_link_key(hw, cmd, vif, sta, key, + vif->bss_conf.link_id, mlink); } mt792x_mutex_release(dev); From beec58f36983f826fe90287a90edff46b32e8a89 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 6 Mar 2026 17:22:30 -0600 Subject: [PATCH 0845/1561] wifi: mt76: mt7925: resolve link after acquiring mt76 mutex mt792x_sta_to_link() uses rcu_dereference_protected() and therefore expects mt76.mutex to be held. Move the lookup after mt792x_mutex_acquire() to make the locking explicit and correct. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260306232238.2039675-12-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index 68f168a093f2..135a803b4382 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -1071,11 +1071,11 @@ static void mt7925_mac_link_sta_assoc(struct mt76_dev *mdev, struct mt792x_link_sta *mlink; struct mt792x_sta *msta; + mt792x_mutex_acquire(dev); + msta = (struct mt792x_sta *)link_sta->sta->drv_priv; mlink = mt792x_sta_to_link(msta, link_sta->link_id); - mt792x_mutex_acquire(dev); - if (ieee80211_vif_is_mld(vif)) { link_conf = mt792x_vif_to_bss_conf(vif, msta->deflink_id); } else { From 9763ead5b9f8fd69033fbe77046a2c5bdd749cf5 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 6 Mar 2026 17:22:31 -0600 Subject: [PATCH 0846/1561] wifi: mt76: mt7925: pass mconf and mlink to wtbl_update_hdr_trans() Drop the mt792x_vif_to_link() lookup in mt7925_mcu_wtbl_update_hdr_trans() and pass the resolved mconf and mlink from the caller instead. The link context is already known at the call site, making the lookup redundant. This keeps the helper lookup-free and makes link ownership explicit. No functional change intended. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260306232238.2039675-13-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7925/main.c | 4 +++- .../net/wireless/mediatek/mt76/mt7925/mcu.c | 20 ++++--------------- .../wireless/mediatek/mt76/mt7925/mt7925.h | 5 ++--- 3 files changed, 9 insertions(+), 20 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index 135a803b4382..151dc79f7c12 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -1590,8 +1590,10 @@ static void mt7925_sta_set_decap_offload(struct ieee80211_hw *hw, valid = ieee80211_vif_is_mld(vif) ? mvif->valid_links : BIT(0); for_each_set_bit(i, &valid, IEEE80211_MLD_MAX_NUM_LINKS) { + struct mt792x_bss_conf *mconf; struct mt792x_link_sta *mlink; + mconf = mt792x_vif_to_link(mvif, i); mlink = mt792x_sta_to_link(msta, i); if (enabled) @@ -1602,7 +1604,7 @@ static void mt7925_sta_set_decap_offload(struct ieee80211_hw *hw, if (!mlink->wcid.sta) continue; - mt7925_mcu_wtbl_update_hdr_trans(dev, vif, sta, mlink, i); + mt7925_mcu_wtbl_update_hdr_trans(dev, vif, mconf, mlink); } mt792x_mutex_release(dev); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index 04650e201071..37cdf3e8a067 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -1066,7 +1066,6 @@ EXPORT_SYMBOL_GPL(mt7925_run_firmware); static void mt7925_mcu_sta_hdr_trans_tlv(struct sk_buff *skb, struct ieee80211_vif *vif, - struct ieee80211_link_sta *link_sta, struct mt792x_link_sta *mlink) { struct sta_rec_hdr_trans *hdr_trans; @@ -1096,29 +1095,18 @@ mt7925_mcu_sta_hdr_trans_tlv(struct sk_buff *skb, int mt7925_mcu_wtbl_update_hdr_trans(struct mt792x_dev *dev, struct ieee80211_vif *vif, - struct ieee80211_sta *sta, - struct mt792x_link_sta *mlink, - int link_id) + struct mt792x_bss_conf *mconf, + struct mt792x_link_sta *mlink) { - struct ieee80211_link_sta *link_sta = sta ? &sta->deflink : NULL; - struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv; - struct mt792x_bss_conf *mconf; struct sk_buff *skb; - link_sta = mt792x_sta_to_link_sta(vif, sta, link_id); - mconf = mt792x_vif_to_link(mvif, link_id); - skb = __mt76_connac_mcu_alloc_sta_req(&dev->mt76, &mconf->mt76, &mlink->wcid, MT7925_STA_UPDATE_MAX_SIZE); if (IS_ERR(skb)) return PTR_ERR(skb); - /* starec hdr trans */ - if (!link_sta) - mlink = &mvif->sta.deflink; - - mt7925_mcu_sta_hdr_trans_tlv(skb, vif, link_sta, mlink); + mt7925_mcu_sta_hdr_trans_tlv(skb, vif, mlink); return mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_WMWA_UNI_CMD(STA_REC_UPDATE), true); } @@ -2022,7 +2010,7 @@ mt7925_mcu_sta_cmd(struct mt76_phy *phy, if (!info->link_sta) mlink = &mvif->sta.deflink; - mt7925_mcu_sta_hdr_trans_tlv(skb, info->vif, info->link_sta, mlink); + mt7925_mcu_sta_hdr_trans_tlv(skb, info->vif, mlink); } return mt76_mcu_skb_send_msg(dev, skb, info->cmd, true); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h b/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h index e28972f0615b..46b480f7d813 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mt7925.h @@ -370,9 +370,8 @@ int mt7925_mcu_add_key(struct mt76_dev *dev, struct ieee80211_vif *vif, int mt7925_mcu_set_rts_thresh(struct mt792x_phy *phy, u32 val); int mt7925_mcu_wtbl_update_hdr_trans(struct mt792x_dev *dev, struct ieee80211_vif *vif, - struct ieee80211_sta *sta, - struct mt792x_link_sta *mlink, - int link_id); + struct mt792x_bss_conf *mconf, + struct mt792x_link_sta *mlink); int mt7925_mcu_wf_rf_pin_ctrl(struct mt792x_phy *phy); int mt7925_testmode_cmd(struct ieee80211_hw *hw, struct ieee80211_vif *vif, From da14a9349746bae246621f4fbab9e2720b800f6c Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 6 Mar 2026 17:22:32 -0600 Subject: [PATCH 0847/1561] wifi: mt76: mt7925: make WCID cleanup unconditional in sta_remove_links() Drop the dead pri_link check in mt7925_mac_sta_remove_links() and perform WCID cleanup unconditionally. mlink->pri_link is already cleared before the test, making the branch ineffective. This matches the actual teardown behaviour and simplifies the remove path. No functional change intended. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260306232238.2039675-14-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/main.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index 151dc79f7c12..584d989fb4e8 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -1221,10 +1221,8 @@ mt7925_mac_sta_remove_links(struct mt792x_dev *dev, struct ieee80211_vif *vif, mlink->sta = NULL; mlink->pri_link = NULL; - if (link_sta != mlink->pri_link) { - mt76_wcid_cleanup(mdev, wcid); - mt76_wcid_mask_clear(mdev->wcid_mask, wcid->idx); - } + mt76_wcid_cleanup(mdev, wcid); + mt76_wcid_mask_clear(mdev->wcid_mask, wcid->idx); if (msta->deflink_id == link_id) msta->deflink_id = IEEE80211_LINK_UNSPECIFIED; From 75e2d6bfd9ac69c79adfe6fa2854558158b260de Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 6 Mar 2026 17:22:33 -0600 Subject: [PATCH 0848/1561] wifi: mt76: mt7925: unwind WCID setup on link STA add failure Undo the published WCID state when mt7925_mac_link_sta_add() fails after WCID setup. The add path can fail after dev->mt76.wcid[] is published, so the error path must clear the partial host-side WCID state to avoid leaving stale entries behind. No functional change intended. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260306232238.2039675-15-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7925/main.c | 53 +++++++++++++------ 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index 584d989fb4e8..dbde91727cd0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -862,8 +862,10 @@ static int mt7925_mac_link_sta_add(struct mt76_dev *mdev, struct ieee80211_bss_conf *link_conf; struct mt792x_bss_conf *mconf; u8 link_id = link_sta->link_id; + bool wcid_published = false; struct mt792x_sta *msta; struct mt76_wcid *wcid; + bool pm_woken = false; int ret, idx; msta = (struct mt792x_sta *)link_sta->sta->drv_priv; @@ -888,6 +890,7 @@ static int mt7925_mac_link_sta_add(struct mt76_dev *mdev, wcid = &mlink->wcid; ewma_signal_init(&wcid->rssi); rcu_assign_pointer(dev->mt76.wcid[wcid->idx], wcid); + wcid_published = true; mt76_wcid_init(wcid, 0); ewma_avg_signal_init(&mlink->avg_ack_signal); memset(mlink->airtime_ac, 0, @@ -895,7 +898,8 @@ static int mt7925_mac_link_sta_add(struct mt76_dev *mdev, ret = mt76_connac_pm_wake(&dev->mphy, &dev->pm); if (ret) - return ret; + goto out_wcid; + pm_woken = true; mt7925_mac_wtbl_update(dev, idx, MT_WTBL_UPDATE_ADM_COUNT_CLEAR); @@ -909,15 +913,19 @@ static int mt7925_mac_link_sta_add(struct mt76_dev *mdev, mlink_bc = mt792x_sta_to_link(&mvif->sta, mconf->link_id); if (ieee80211_vif_is_mld(vif)) { - mt7925_mcu_add_bss_info_sta(&dev->phy, mconf->mt76.ctx, - link_conf, link_sta, - mlink_bc->wcid.idx, mlink->wcid.idx, - link_sta != mlink->pri_link); + ret = mt7925_mcu_add_bss_info_sta(&dev->phy, mconf->mt76.ctx, + link_conf, link_sta, + mlink_bc->wcid.idx, mlink->wcid.idx, + link_sta != mlink->pri_link); + if (ret) + goto out_pm; } else { - mt7925_mcu_add_bss_info_sta(&dev->phy, mconf->mt76.ctx, - link_conf, link_sta, - mlink_bc->wcid.idx, mlink->wcid.idx, - false); + ret = mt7925_mcu_add_bss_info_sta(&dev->phy, mconf->mt76.ctx, + link_conf, link_sta, + mlink_bc->wcid.idx, mlink->wcid.idx, + false); + if (ret) + goto out_pm; } } @@ -927,7 +935,7 @@ static int mt7925_mac_link_sta_add(struct mt76_dev *mdev, mlink, true, MT76_STA_INFO_STATE_NONE); if (ret) - return ret; + goto out_pm; } else if (ieee80211_vif_is_mld(vif) && link_sta != mlink->pri_link) { struct mt792x_link_sta *pri_mlink; @@ -940,31 +948,46 @@ static int mt7925_mac_link_sta_add(struct mt76_dev *mdev, container_of(pri_wcid, struct mt792x_link_sta, wcid) : NULL; - if (WARN_ON_ONCE(!pri_mlink)) - return -EINVAL; + if (WARN_ON_ONCE(!pri_mlink)) { + ret = -EINVAL; + goto out_pm; + } ret = mt7925_mcu_sta_update(dev, mlink->pri_link, vif, pri_mlink, true, MT76_STA_INFO_STATE_ASSOC); if (ret) - return ret; + goto out_pm; ret = mt7925_mcu_sta_update(dev, link_sta, vif, mlink, true, MT76_STA_INFO_STATE_ASSOC); if (ret) - return ret; + goto out_pm; } else { ret = mt7925_mcu_sta_update(dev, link_sta, vif, mlink, true, MT76_STA_INFO_STATE_NONE); if (ret) - return ret; + goto out_pm; } mt76_connac_power_save_sched(&dev->mphy, &dev->pm); return 0; + +out_pm: + if (pm_woken) + mt76_connac_power_save_sched(&dev->mphy, &dev->pm); +out_wcid: + if (wcid_published) { + u16 idx = wcid->idx; + + rcu_assign_pointer(dev->mt76.wcid[idx], NULL); + mt76_wcid_cleanup(mdev, wcid); + mt76_wcid_mask_clear(mdev->wcid_mask, wcid->idx); + } + return ret; } static int From 0fff5b5e2786a4fed7c67087bbaa44ff4a2e200d Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 6 Mar 2026 17:22:34 -0600 Subject: [PATCH 0849/1561] wifi: mt76: mt7925: drop WCID reinit after publish Remove the redundant mt76_wcid_init() call after publishing the WCID in mt7925_mac_link_sta_add(). WCID is already initialized before publication, so reinitializing it afterward is unnecessary and makes the setup ordering less clear. No functional change intended. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260306232238.2039675-16-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index dbde91727cd0..d99dbd707fcd 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -891,7 +891,6 @@ static int mt7925_mac_link_sta_add(struct mt76_dev *mdev, ewma_signal_init(&wcid->rssi); rcu_assign_pointer(dev->mt76.wcid[wcid->idx], wcid); wcid_published = true; - mt76_wcid_init(wcid, 0); ewma_avg_signal_init(&mlink->avg_ack_signal); memset(mlink->airtime_ac, 0, sizeof(msta->deflink.airtime_ac)); From 52f088a2e6bfae6d30eea274738e82898c246bd6 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 6 Mar 2026 17:22:35 -0600 Subject: [PATCH 0850/1561] wifi: mt76: mt7925: move WCID teardown into link_sta_remove() Move WCID teardown into mt7925_mac_link_sta_remove() to mirror the dev->mt76.wcid[] publish done during link add. This clears the published WCID before the rest of teardown, so WCID lookups no longer expose a link that is being removed. No functional change intended. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260306232238.2039675-17-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/main.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index d99dbd707fcd..9e3f3874d0b3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -1151,12 +1151,14 @@ static void mt7925_mac_link_sta_remove(struct mt76_dev *mdev, struct mt792x_link_sta *mlink) { struct mt792x_dev *dev = container_of(mdev, struct mt792x_dev, mt76); + struct mt76_wcid *wcid = &mlink->wcid; struct ieee80211_bss_conf *link_conf; u8 link_id = link_sta->link_id; + u16 idx = wcid->idx; mt7925_roc_abort_sync(dev); - mt76_connac_free_pending_tx_skbs(&dev->pm, &mlink->wcid); + mt76_connac_free_pending_tx_skbs(&dev->pm, wcid); mt76_connac_pm_wake(&dev->mphy, &dev->pm); mt7925_mcu_sta_update(dev, link_sta, vif, mlink, false, @@ -1183,6 +1185,10 @@ static void mt7925_mac_link_sta_remove(struct mt76_dev *mdev, list_del_init(&mlink->wcid.poll_list); spin_unlock_bh(&mdev->sta_poll_lock); + rcu_assign_pointer(dev->mt76.wcid[idx], NULL); + mt76_wcid_cleanup(mdev, wcid); + mt76_wcid_mask_clear(mdev->wcid_mask, idx); + mt76_connac_power_save_sched(&dev->mphy, &dev->pm); } @@ -1191,8 +1197,6 @@ mt7925_mac_sta_remove_links(struct mt792x_dev *dev, struct ieee80211_vif *vif, struct ieee80211_sta *sta, unsigned long old_links) { struct mt792x_sta *msta = (struct mt792x_sta *)sta->drv_priv; - struct mt76_dev *mdev = &dev->mt76; - struct mt76_wcid *wcid; unsigned int link_id; /* clean up bss before starec */ @@ -1235,16 +1239,12 @@ mt7925_mac_sta_remove_links(struct mt792x_dev *dev, struct ieee80211_vif *vif, if (!mlink) continue; - mt7925_mac_link_sta_remove(&dev->mt76, vif, link_sta, mlink); - - wcid = &mlink->wcid; rcu_assign_pointer(msta->link[link_id], NULL); msta->valid_links &= ~BIT(link_id); mlink->sta = NULL; mlink->pri_link = NULL; - mt76_wcid_cleanup(mdev, wcid); - mt76_wcid_mask_clear(mdev->wcid_mask, wcid->idx); + mt7925_mac_link_sta_remove(&dev->mt76, vif, link_sta, mlink); if (msta->deflink_id == link_id) msta->deflink_id = IEEE80211_LINK_UNSPECIFIED; From 6ace866cf6d2c5db3bcc8a0d55dcb2d705f2e7f8 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 6 Mar 2026 17:22:36 -0600 Subject: [PATCH 0851/1561] wifi: mt76: mt7925: switch link STA allocation to RCU lifetime Allocate mt792x_link_sta with kzalloc() and free it with kfree_rcu() instead of devm-managed memory. msta->link[] is published via RCU, so the link STA must remain valid until readers have quiesced after teardown. Manage the object lifetime with kfree_rcu() to match its RCU-visible publication. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260306232238.2039675-18-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/main.c | 10 +++++++--- drivers/net/wireless/mediatek/mt76/mt792x.h | 1 + 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index 9e3f3874d0b3..eb16c4683100 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -1005,7 +1005,7 @@ mt7925_mac_sta_add_links(struct mt792x_dev *dev, struct ieee80211_vif *vif, mlink = &msta->deflink; msta->deflink_id = link_id; } else { - mlink = devm_kzalloc(dev->mt76.dev, sizeof(*mlink), GFP_KERNEL); + mlink = kzalloc(sizeof(*mlink), GFP_KERNEL); if (!mlink) { err = -ENOMEM; break; @@ -1197,6 +1197,7 @@ mt7925_mac_sta_remove_links(struct mt792x_dev *dev, struct ieee80211_vif *vif, struct ieee80211_sta *sta, unsigned long old_links) { struct mt792x_sta *msta = (struct mt792x_sta *)sta->drv_priv; + struct mt76_dev *mdev = &dev->mt76; unsigned int link_id; /* clean up bss before starec */ @@ -1235,17 +1236,20 @@ mt7925_mac_sta_remove_links(struct mt792x_dev *dev, struct ieee80211_vif *vif, if (!link_sta) continue; - mlink = mt792x_sta_to_link(msta, link_id); + mlink = rcu_replace_pointer(msta->link[link_id], NULL, + lockdep_is_held(&mdev->mutex)); if (!mlink) continue; - rcu_assign_pointer(msta->link[link_id], NULL); msta->valid_links &= ~BIT(link_id); mlink->sta = NULL; mlink->pri_link = NULL; mt7925_mac_link_sta_remove(&dev->mt76, vif, link_sta, mlink); + if (mlink != &msta->deflink) + kfree_rcu(mlink, rcu_head); + if (msta->deflink_id == link_id) msta->deflink_id = IEEE80211_LINK_UNSPECIFIED; } diff --git a/drivers/net/wireless/mediatek/mt76/mt792x.h b/drivers/net/wireless/mediatek/mt76/mt792x.h index 1f381ab356bc..4ff93f2cd624 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x.h +++ b/drivers/net/wireless/mediatek/mt76/mt792x.h @@ -97,6 +97,7 @@ DECLARE_EWMA(avg_signal, 10, 8) struct mt792x_link_sta { struct mt76_wcid wcid; /* must be first */ + struct rcu_head rcu_head; u32 airtime_ac[8]; From db134691924fb19535ad6f27e09354c8ad001964 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 6 Mar 2026 17:22:37 -0600 Subject: [PATCH 0852/1561] wifi: mt76: mt7925: publish msta->link after successful link add Move the msta->link[link_id] publication until after mt7925_mac_link_sta_add() succeeds. msta->link[] is RCU-visible, so publishing it before setup completes can expose a link whose add path later fails. Publish it only after success to avoid partially initialized link state becoming visible. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260306232238.2039675-19-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7925/main.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index eb16c4683100..95c631b57894 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -1000,10 +1000,11 @@ mt7925_mac_sta_add_links(struct mt792x_dev *dev, struct ieee80211_vif *vif, for_each_set_bit(link_id, &new_links, IEEE80211_MLD_MAX_NUM_LINKS) { struct ieee80211_link_sta *link_sta; struct mt792x_link_sta *mlink; + bool is_deflink = false; if (msta->deflink_id == IEEE80211_LINK_UNSPECIFIED) { mlink = &msta->deflink; - msta->deflink_id = link_id; + is_deflink = true; } else { mlink = kzalloc(sizeof(*mlink), GFP_KERNEL); if (!mlink) { @@ -1012,14 +1013,23 @@ mt7925_mac_sta_add_links(struct mt792x_dev *dev, struct ieee80211_vif *vif, } } - msta->valid_links |= BIT(link_id); - rcu_assign_pointer(msta->link[link_id], mlink); mlink->sta = msta; mlink->pri_link = &sta->deflink; mlink->wcid.def_wcid = &msta->deflink.wcid; link_sta = mt792x_sta_to_link_sta(vif, sta, link_id); - mt7925_mac_link_sta_add(&dev->mt76, vif, link_sta, mlink); + err = mt7925_mac_link_sta_add(&dev->mt76, vif, link_sta, mlink); + if (err) { + if (!is_deflink) + kfree_rcu(mlink, rcu_head); + break; + } + + if (is_deflink) + msta->deflink_id = link_id; + + rcu_assign_pointer(msta->link[link_id], mlink); + msta->valid_links |= BIT(link_id); } return err; From 9fc83205a62eeeb775739ba6d8efcfdda9b7780a Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 6 Mar 2026 17:22:38 -0600 Subject: [PATCH 0853/1561] wifi: mt76: mt7925: host-only unwind published links on add failure Release host link resources when mt7925_mac_sta_add_links() fails after partial success. msta->link[] and dev->mt76.wcid[] may already be published, so unwind the host state to avoid leaving stale links behind. Signed-off-by: Sean Wang Link: https://patch.msgid.link/20260306232238.2039675-20-sean.wang@kernel.org Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7925/main.c | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index 95c631b57894..73d3722739d0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -989,11 +989,51 @@ out_wcid: return ret; } +/* + * Host-only unwind for sta_add_links() failures. + * + * If add_links fail due to MCU/firmware timeouts; calling the full remove + * path would send more firmware commands and may hang again. So only rollback + * host-published state here (msta->link/valid_links, dev->mt76.wcid[idx]) and + * free mlink objects (RCU-safe). Firmware state is left for reset/recovery. + */ +static void +mt7925_mac_sta_unwind_links_host(struct mt792x_dev *dev, + struct ieee80211_sta *sta, + unsigned long links) +{ + struct mt792x_sta *msta = (struct mt792x_sta *)sta->drv_priv; + unsigned int link_id; + + for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) { + struct mt792x_link_sta *mlink; + u16 idx; + + mlink = rcu_replace_pointer(msta->link[link_id], NULL, + lockdep_is_held(&dev->mt76.mutex)); + if (!mlink) + continue; + + msta->valid_links &= ~BIT(link_id); + if (msta->deflink_id == link_id) + msta->deflink_id = IEEE80211_LINK_UNSPECIFIED; + + idx = mlink->wcid.idx; + rcu_assign_pointer(dev->mt76.wcid[idx], NULL); + mt76_wcid_cleanup(&dev->mt76, &mlink->wcid); + mt76_wcid_mask_clear(dev->mt76.wcid_mask, idx); + + if (mlink != &msta->deflink) + kfree_rcu(mlink, rcu_head); + } +} + static int mt7925_mac_sta_add_links(struct mt792x_dev *dev, struct ieee80211_vif *vif, struct ieee80211_sta *sta, unsigned long new_links) { struct mt792x_sta *msta = (struct mt792x_sta *)sta->drv_priv; + unsigned long added_links = 0; unsigned int link_id; int err = 0; @@ -1030,8 +1070,13 @@ mt7925_mac_sta_add_links(struct mt792x_dev *dev, struct ieee80211_vif *vif, rcu_assign_pointer(msta->link[link_id], mlink); msta->valid_links |= BIT(link_id); + + added_links |= BIT(link_id); } + if (err && added_links) + mt7925_mac_sta_unwind_links_host(dev, sta, added_links); + return err; } From 59a295335021f6973a34566554b2b9371f1c6f7d Mon Sep 17 00:00:00 2001 From: Peter Chiu Date: Mon, 16 Mar 2026 12:44:27 +0100 Subject: [PATCH 0854/1561] wifi: mt76: mt7996: fix frequency separation for station STR mode Fix frequency separation field for STR in MLD capabilities to get the correct chip capability. Signed-off-by: Peter Chiu Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260316-mt7996-sta-str-v1-1-666814e6ab2d@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/init.c b/drivers/net/wireless/mediatek/mt76/mt7996/init.c index 8dfb81eabc9a..d6f9aa1ab52d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/init.c @@ -99,6 +99,7 @@ static const struct wiphy_iftype_ext_capab iftypes_ext_capa[] = { .extended_capabilities_mask = if_types_ext_capa_ap, .extended_capabilities_len = sizeof(if_types_ext_capa_ap), .mld_capa_and_ops = + FIELD_PREP_CONST(IEEE80211_MLD_CAP_OP_FREQ_SEP_TYPE_IND, 1) | FIELD_PREP_CONST(IEEE80211_MLD_CAP_OP_MAX_SIMUL_LINKS, MT7996_MAX_RADIOS - 1), }, From 76ceccd60bdd1e496e0e70700f3e045d7bc339bf Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 15 Mar 2026 11:26:24 +0100 Subject: [PATCH 0855/1561] wifi: mt76: mt7996: Rely on msta_link link_id in mt7996_vif_link_remove() Rely on msta_link link_id value in mt7996_vif_link_remove routine instead of using link_conf pointer. This assumption is correct since msta_link link_id is set to link_conf link_id value in mt7996_vif_link_add routine. Moreover, fallback to default ieee80211_bss_conf struct if the link_conf pointer in mt7996_vif_link_remove() is NULL. MT7996 hw requires to remove AP MLD links from MCU configuration during AP tear-down process (e.g. running mt7996_remove_interface()). Doing so, we can't assume link_conf pointer is always non-NULL running mt7996_vif_link_remove routine. Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260315-mt7996-mlo-link-reconf-v1-1-a8a634fbc927@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7996/main.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 834edd31458d..21a240f0c8c2 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -396,17 +396,21 @@ void mt7996_vif_link_remove(struct mt76_phy *mphy, struct ieee80211_vif *vif, struct mt7996_vif_link *link = container_of(mlink, struct mt7996_vif_link, mt76); struct mt7996_vif *mvif = (struct mt7996_vif *)vif->drv_priv; struct mt7996_sta_link *msta_link = &link->msta_link; + unsigned int link_id = msta_link->wcid.link_id; struct mt7996_phy *phy = mphy->priv; struct mt7996_dev *dev = phy->dev; struct mt7996_key_iter_data it = { .cmd = SET_KEY, - .link_id = link_conf->link_id, + .link_id = link_id, }; int idx = msta_link->wcid.idx; if (!mlink->wcid->offchannel) ieee80211_iter_keys(mphy->hw, vif, mt7996_key_iter, &it); + if (!link_conf) + link_conf = &vif->bss_conf; + mt7996_mcu_add_sta(dev, link_conf, NULL, link, NULL, CONN_STATE_DISCONNECT, false); mt7996_mcu_add_bss_info(phy, vif, link_conf, mlink, msta_link, false); @@ -416,10 +420,9 @@ void mt7996_vif_link_remove(struct mt76_phy *mphy, struct ieee80211_vif *vif, rcu_assign_pointer(dev->mt76.wcid[idx], NULL); if (vif->txq && !mlink->wcid->offchannel && - mvif->mt76.deflink_id == link_conf->link_id) { + mvif->mt76.deflink_id == link_id) { struct ieee80211_bss_conf *iter; struct mt76_txq *mtxq; - unsigned int link_id; mvif->mt76.deflink_id = IEEE80211_LINK_UNSPECIFIED; mtxq = (struct mt76_txq *)vif->txq->drv_priv; @@ -427,7 +430,7 @@ void mt7996_vif_link_remove(struct mt76_phy *mphy, struct ieee80211_vif *vif, for_each_vif_active_link(vif, iter, link_id) { struct mt7996_vif_link *link; - if (link_id == link_conf->link_id) + if (link_id == msta_link->wcid.link_id) continue; link = mt7996_vif_link(dev, vif, link_id); From 108cb0c43fdc93bad105dcd00c30d9729520c71f Mon Sep 17 00:00:00 2001 From: Shayne Chen Date: Sun, 15 Mar 2026 11:26:25 +0100 Subject: [PATCH 0856/1561] wifi: mt76: mt7996: Account active links in valid_links fields Track active vif links in mt7996_vif_link_add and mt7996_vif_link_remove routines. This is a preliminary patch in order to remove AP MLD links from MCU configuration during AP tear-down process and to support MLO link reconfiguration in MT7996 driver. Signed-off-by: Shayne Chen Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260315-mt7996-mlo-link-reconf-v1-2-a8a634fbc927@kernel.org Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7996/main.c | 50 +++++++++++-------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 21a240f0c8c2..07a266f7670c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -367,12 +367,16 @@ int mt7996_vif_link_add(struct mt76_phy *mphy, struct ieee80211_vif *vif, ieee80211_iter_keys(mphy->hw, vif, mt7996_key_iter, &it); - if (vif->txq && !mlink->wcid->offchannel && - mvif->mt76.deflink_id == IEEE80211_LINK_UNSPECIFIED) { - struct mt76_txq *mtxq = (struct mt76_txq *)vif->txq->drv_priv; + if (!mlink->wcid->offchannel) { + if (vif->txq && + mvif->mt76.deflink_id == IEEE80211_LINK_UNSPECIFIED) { + struct mt76_txq *mtxq; - mvif->mt76.deflink_id = link_conf->link_id; - mtxq->wcid = idx; + mtxq = (struct mt76_txq *)vif->txq->drv_priv; + mvif->mt76.deflink_id = link_conf->link_id; + mtxq->wcid = idx; + } + mvif->mt76.valid_links |= BIT(link_conf->link_id); } if (vif->type == NL80211_IFTYPE_STATION) { @@ -419,28 +423,30 @@ void mt7996_vif_link_remove(struct mt76_phy *mphy, struct ieee80211_vif *vif, rcu_assign_pointer(dev->mt76.wcid[idx], NULL); - if (vif->txq && !mlink->wcid->offchannel && - mvif->mt76.deflink_id == link_id) { - struct ieee80211_bss_conf *iter; - struct mt76_txq *mtxq; + if (!mlink->wcid->offchannel) { + if (vif->txq && mvif->mt76.deflink_id == link_id) { + struct ieee80211_bss_conf *iter; + struct mt76_txq *mtxq; - mvif->mt76.deflink_id = IEEE80211_LINK_UNSPECIFIED; - mtxq = (struct mt76_txq *)vif->txq->drv_priv; - /* Primary link will be removed, look for a new one */ - for_each_vif_active_link(vif, iter, link_id) { - struct mt7996_vif_link *link; + mvif->mt76.deflink_id = IEEE80211_LINK_UNSPECIFIED; + mtxq = (struct mt76_txq *)vif->txq->drv_priv; + /* Primary link will be removed, look for a new one */ + for_each_vif_active_link(vif, iter, link_id) { + struct mt7996_vif_link *link; - if (link_id == msta_link->wcid.link_id) - continue; + if (link_id == msta_link->wcid.link_id) + continue; - link = mt7996_vif_link(dev, vif, link_id); - if (!link) - continue; + link = mt7996_vif_link(dev, vif, link_id); + if (!link) + continue; - mtxq->wcid = link->msta_link.wcid.idx; - mvif->mt76.deflink_id = link_id; - break; + mtxq->wcid = link->msta_link.wcid.idx; + mvif->mt76.deflink_id = link_id; + break; + } } + mvif->mt76.valid_links &= ~BIT(link_id); } dev->mt76.vif_mask &= ~BIT_ULL(mlink->idx); From c8d22f28ea583bda31b7db8243e4e1eb042ac38a Mon Sep 17 00:00:00 2001 From: Shayne Chen Date: Sun, 15 Mar 2026 11:26:26 +0100 Subject: [PATCH 0857/1561] wifi: mt76: mt7996: Move mlink deallocation in mt7996_vif_link_remove() Destroy mt76_vif_link struct in mt7996_vif_link_remove routine and not in mt76_unassign_vif_chanctx(). This is necessary since, in order to properly support MLO link reconfiguration, we will destroy mt76_vif_link struct during AP tear-down process and not running unassign_vif_chanctx mac80211 callback. This patch does not introduce any regression since mt76_assign_vif_chanctx/mt76_unassign_vif_chanctx APIs are currently used just by MT7996 driver. Signed-off-by: Shayne Chen Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260315-mt7996-mlo-link-reconf-v1-3-a8a634fbc927@kernel.org Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/channel.c | 9 --------- drivers/net/wireless/mediatek/mt76/mt7996/main.c | 6 ++++++ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/channel.c b/drivers/net/wireless/mediatek/mt76/channel.c index cf3fc09e5d5a..05eee64706ea 100644 --- a/drivers/net/wireless/mediatek/mt76/channel.c +++ b/drivers/net/wireless/mediatek/mt76/channel.c @@ -158,8 +158,6 @@ void mt76_unassign_vif_chanctx(struct ieee80211_hw *hw, { struct mt76_chanctx *ctx = (struct mt76_chanctx *)conf->drv_priv; struct mt76_vif_link *mlink = (struct mt76_vif_link *)vif->drv_priv; - struct mt76_vif_data *mvif = mlink->mvif; - int link_id = link_conf->link_id; struct mt76_phy *phy = ctx->phy; struct mt76_dev *dev = phy->dev; @@ -176,15 +174,8 @@ void mt76_unassign_vif_chanctx(struct ieee80211_hw *hw, if (!mlink) goto out; - if (mlink != (struct mt76_vif_link *)vif->drv_priv) - rcu_assign_pointer(mvif->link[link_id], NULL); - dev->drv->vif_link_remove(phy, vif, link_conf, mlink); mlink->ctx = NULL; - - if (mlink != (struct mt76_vif_link *)vif->drv_priv) - kfree_rcu(mlink, rcu_head); - out: mutex_unlock(&dev->mutex); } diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 07a266f7670c..feee93340a6c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -459,6 +459,12 @@ void mt7996_vif_link_remove(struct mt76_phy *mphy, struct ieee80211_vif *vif, spin_unlock_bh(&dev->mt76.sta_poll_lock); mt76_wcid_cleanup(&dev->mt76, &msta_link->wcid); + + if (mlink != (struct mt76_vif_link *)vif->drv_priv && + !mlink->wcid->offchannel) { + rcu_assign_pointer(mlink->mvif->link[link_id], NULL); + kfree_rcu(mlink, rcu_head); + } } static void mt7996_phy_set_rxfilter(struct mt7996_phy *phy) From 08813703ac412360e060cc9abd4a60e3c6668781 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 15 Mar 2026 11:26:27 +0100 Subject: [PATCH 0858/1561] wifi: mt76: mt7996: Destroy vif active links in mt7996_remove_interface() MT7996 hw requires to remove active links from the mcu BSSINFO table destroying the interface. For this reason introduce mt7996_vif_link_destroy routine and remove active (non-offchannel) vif links running mt7996_remove_interface routine. This is a preliminary patch in order to support MLO link reconfiguration in MT7996 driver. Co-developed-by: Shayne Chen Signed-off-by: Shayne Chen Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260315-mt7996-mlo-link-reconf-v1-4-a8a634fbc927@kernel.org Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7996/main.c | 108 ++++++++++++------ 1 file changed, 72 insertions(+), 36 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index feee93340a6c..d8ef41c39a7f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -306,6 +306,10 @@ int mt7996_vif_link_add(struct mt76_phy *mphy, struct ieee80211_vif *vif, }; int mld_idx, idx, ret; + if ((mvif->mt76.valid_links & BIT(link_conf->link_id)) && + !mlink->offchannel) + return 0; + mlink->idx = __ffs64(~dev->mt76.vif_mask); if (mlink->idx >= mt7996_max_interface_num(dev)) return -ENOSPC; @@ -393,65 +397,40 @@ int mt7996_vif_link_add(struct mt76_phy *mphy, struct ieee80211_vif *vif, return 0; } -void mt7996_vif_link_remove(struct mt76_phy *mphy, struct ieee80211_vif *vif, - struct ieee80211_bss_conf *link_conf, - struct mt76_vif_link *mlink) +static void mt7996_vif_link_destroy(struct mt7996_phy *phy, + struct mt7996_vif_link *link, + struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf) { - struct mt7996_vif_link *link = container_of(mlink, struct mt7996_vif_link, mt76); struct mt7996_vif *mvif = (struct mt7996_vif *)vif->drv_priv; struct mt7996_sta_link *msta_link = &link->msta_link; unsigned int link_id = msta_link->wcid.link_id; - struct mt7996_phy *phy = mphy->priv; - struct mt7996_dev *dev = phy->dev; + struct mt76_vif_link *mlink = &link->mt76; struct mt7996_key_iter_data it = { .cmd = SET_KEY, .link_id = link_id, }; + struct mt7996_dev *dev = phy->dev; int idx = msta_link->wcid.idx; - if (!mlink->wcid->offchannel) - ieee80211_iter_keys(mphy->hw, vif, mt7996_key_iter, &it); - if (!link_conf) link_conf = &vif->bss_conf; + if (!mlink->wcid->offchannel) + ieee80211_iter_keys(phy->mt76->hw, vif, mt7996_key_iter, &it); + mt7996_mcu_add_sta(dev, link_conf, NULL, link, NULL, CONN_STATE_DISCONNECT, false); mt7996_mcu_add_bss_info(phy, vif, link_conf, mlink, msta_link, false); - mt7996_mcu_add_dev_info(phy, vif, link_conf, mlink, false); rcu_assign_pointer(dev->mt76.wcid[idx], NULL); - if (!mlink->wcid->offchannel) { - if (vif->txq && mvif->mt76.deflink_id == link_id) { - struct ieee80211_bss_conf *iter; - struct mt76_txq *mtxq; - - mvif->mt76.deflink_id = IEEE80211_LINK_UNSPECIFIED; - mtxq = (struct mt76_txq *)vif->txq->drv_priv; - /* Primary link will be removed, look for a new one */ - for_each_vif_active_link(vif, iter, link_id) { - struct mt7996_vif_link *link; - - if (link_id == msta_link->wcid.link_id) - continue; - - link = mt7996_vif_link(dev, vif, link_id); - if (!link) - continue; - - mtxq->wcid = link->msta_link.wcid.idx; - mvif->mt76.deflink_id = link_id; - break; - } - } - mvif->mt76.valid_links &= ~BIT(link_id); - } - dev->mt76.vif_mask &= ~BIT_ULL(mlink->idx); dev->mld_idx_mask &= ~BIT_ULL(link->mld_idx); phy->omac_mask &= ~BIT_ULL(mlink->omac_idx); + if (!mlink->wcid->offchannel) + mvif->mt76.valid_links &= ~BIT(link_id); spin_lock_bh(&dev->mt76.sta_poll_lock); if (!list_empty(&msta_link->wcid.poll_list)) @@ -467,6 +446,44 @@ void mt7996_vif_link_remove(struct mt76_phy *mphy, struct ieee80211_vif *vif, } } +void mt7996_vif_link_remove(struct mt76_phy *mphy, struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf, + struct mt76_vif_link *mlink) +{ + struct mt7996_vif_link *link = container_of(mlink, struct mt7996_vif_link, mt76); + struct mt7996_vif *mvif = (struct mt7996_vif *)vif->drv_priv; + struct mt7996_sta_link *msta_link = &link->msta_link; + struct mt7996_phy *phy = mphy->priv; + + /* Hw requires to destroy active links tearing down the interface, so + * postpone it removing the interface. + */ + if (mlink->wcid->offchannel) { + mt7996_vif_link_destroy(phy, link, vif, link_conf); + } else if (vif->txq && + mvif->mt76.deflink_id == msta_link->wcid.link_id) { + struct ieee80211_bss_conf *iter; + struct mt76_txq *mtxq; + unsigned int link_id; + + mvif->mt76.deflink_id = IEEE80211_LINK_UNSPECIFIED; + mtxq = (struct mt76_txq *)vif->txq->drv_priv; + /* Primary link will be removed, look for a new one */ + for_each_vif_active_link(vif, iter, link_id) { + if (link_id == msta_link->wcid.link_id) + continue; + + link = mt7996_vif_link(phy->dev, vif, link_id); + if (!link) + continue; + + mtxq->wcid = link->msta_link.wcid.idx; + mvif->mt76.deflink_id = link_id; + break; + } + } +} + static void mt7996_phy_set_rxfilter(struct mt7996_phy *phy) { struct mt7996_dev *dev = phy->dev; @@ -570,10 +587,29 @@ static void mt7996_remove_iter(void *data, u8 *mac, struct ieee80211_vif *vif) static void mt7996_remove_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { + struct mt7996_vif *mvif = (struct mt7996_vif *)vif->drv_priv; + unsigned long rem_links = mvif->mt76.valid_links; struct mt7996_dev *dev = mt7996_hw_dev(hw); struct mt7996_radio_data rdata = {}; + unsigned int link_id; int i; + /* Remove all active links */ + for_each_set_bit(link_id, &rem_links, IEEE80211_MLD_MAX_NUM_LINKS) { + struct mt7996_vif_link *link; + struct mt7996_phy *phy; + + link = mt7996_vif_link(dev, vif, link_id); + if (!link) + continue; + + phy = __mt7996_phy(dev, link->msta_link.wcid.phy_idx); + if (!phy) + continue; + + mt7996_vif_link_destroy(phy, link, vif, NULL); + } + ieee80211_iterate_active_interfaces_mtx(hw, 0, mt7996_remove_iter, &rdata); mt76_vif_cleanup(&dev->mt76, vif); From e7ec71d9f8fafe9b431c6b4673465390273d744d Mon Sep 17 00:00:00 2001 From: Shayne Chen Date: Sun, 15 Mar 2026 11:26:28 +0100 Subject: [PATCH 0859/1561] wifi: mt76: mt7996: Add mcu APIs to enable/disable vif links. Introduce mt7996_mcu_mld_reconf_stop_link and mt7996_mcu_mld_link_oper utility routines in order to communicate to the mcu fw to disable/enable a specific vif link. Please note these APIs are currently supported by the MT7996 firmware only in AP mode. Signed-off-by: Shayne Chen Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260315-mt7996-mlo-link-reconf-v1-5-a8a634fbc927@kernel.org Signed-off-by: Felix Fietkau --- .../wireless/mediatek/mt76/mt76_connac_mcu.h | 2 + .../net/wireless/mediatek/mt76/mt7996/main.c | 48 +++++++++----- .../net/wireless/mediatek/mt76/mt7996/mcu.c | 66 +++++++++++++++++++ .../net/wireless/mediatek/mt76/mt7996/mcu.h | 34 ++++++++++ .../wireless/mediatek/mt76/mt7996/mt7996.h | 6 ++ 5 files changed, 138 insertions(+), 18 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h index fd9cf9c0c32f..ac5126ab68ff 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h @@ -1319,6 +1319,7 @@ enum { MCU_UNI_CMD_ASSERT_DUMP = 0x6f, MCU_UNI_CMD_EXT_EEPROM_CTRL = 0x74, MCU_UNI_CMD_RADIO_STATUS = 0x80, + MCU_UNI_CMD_MLD = 0x82, MCU_UNI_CMD_SDO = 0x88, }; @@ -1394,6 +1395,7 @@ enum { UNI_BSS_INFO_MLD = 26, UNI_BSS_INFO_PM_DISABLE = 27, UNI_BSS_INFO_EHT = 30, + UNI_BSS_INFO_MLD_LINK_OP = 36, }; enum { diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index d8ef41c39a7f..ac82ea3f066a 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -307,8 +307,12 @@ int mt7996_vif_link_add(struct mt76_phy *mphy, struct ieee80211_vif *vif, int mld_idx, idx, ret; if ((mvif->mt76.valid_links & BIT(link_conf->link_id)) && - !mlink->offchannel) + !mlink->offchannel) { + if (vif->type == NL80211_IFTYPE_AP) + return mt7996_mcu_mld_link_oper(dev, link_conf, link, + true); return 0; + } mlink->idx = __ffs64(~dev->mt76.vif_mask); if (mlink->idx >= mt7996_max_interface_num(dev)) @@ -453,6 +457,7 @@ void mt7996_vif_link_remove(struct mt76_phy *mphy, struct ieee80211_vif *vif, struct mt7996_vif_link *link = container_of(mlink, struct mt7996_vif_link, mt76); struct mt7996_vif *mvif = (struct mt7996_vif *)vif->drv_priv; struct mt7996_sta_link *msta_link = &link->msta_link; + unsigned int link_id = msta_link->wcid.link_id; struct mt7996_phy *phy = mphy->priv; /* Hw requires to destroy active links tearing down the interface, so @@ -460,26 +465,33 @@ void mt7996_vif_link_remove(struct mt76_phy *mphy, struct ieee80211_vif *vif, */ if (mlink->wcid->offchannel) { mt7996_vif_link_destroy(phy, link, vif, link_conf); - } else if (vif->txq && - mvif->mt76.deflink_id == msta_link->wcid.link_id) { - struct ieee80211_bss_conf *iter; - struct mt76_txq *mtxq; - unsigned int link_id; + } else { + if (vif->type == NL80211_IFTYPE_AP) { + mt7996_mcu_mld_reconf_stop_link(phy->dev, vif, + BIT(link_id)); + mt7996_mcu_mld_link_oper(phy->dev, link_conf, link, + false); + } - mvif->mt76.deflink_id = IEEE80211_LINK_UNSPECIFIED; - mtxq = (struct mt76_txq *)vif->txq->drv_priv; - /* Primary link will be removed, look for a new one */ - for_each_vif_active_link(vif, iter, link_id) { - if (link_id == msta_link->wcid.link_id) - continue; + if (vif->txq && mvif->mt76.deflink_id == link_id) { + struct ieee80211_bss_conf *iter; + struct mt76_txq *mtxq; - link = mt7996_vif_link(phy->dev, vif, link_id); - if (!link) - continue; + mvif->mt76.deflink_id = IEEE80211_LINK_UNSPECIFIED; + mtxq = (struct mt76_txq *)vif->txq->drv_priv; + /* Primary link will be removed, look for a new one */ + for_each_vif_active_link(vif, iter, link_id) { + if (link_id == msta_link->wcid.link_id) + continue; - mtxq->wcid = link->msta_link.wcid.idx; - mvif->mt76.deflink_id = link_id; - break; + link = mt7996_vif_link(phy->dev, vif, link_id); + if (!link) + continue; + + mtxq->wcid = link->msta_link.wcid.idx; + mvif->mt76.deflink_id = link_id; + break; + } } } } diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index 4bf22318396f..16420375112d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -2583,6 +2583,72 @@ mt7996_mcu_add_group(struct mt7996_dev *dev, struct mt7996_vif_link *link, sizeof(req), true); } +int mt7996_mcu_mld_reconf_stop_link(struct mt7996_dev *dev, + struct ieee80211_vif *vif, + u16 removed_links) +{ + unsigned long rem_links = removed_links; + struct mld_reconf_stop_link *sl; + struct mld_req_hdr hdr = {}; + unsigned int link_id; + struct sk_buff *skb; + struct tlv *tlv; + + skb = mt76_mcu_msg_alloc(&dev->mt76, NULL, sizeof(hdr) + sizeof(*sl)); + if (!skb) + return -ENOMEM; + + memcpy(hdr.mld_addr, vif->addr, ETH_ALEN); + skb_put_data(skb, &hdr, sizeof(hdr)); + + tlv = mt7996_mcu_add_uni_tlv(skb, UNI_CMD_MLD_RECONF_STOP_LINK, + sizeof(*sl)); + sl = (struct mld_reconf_stop_link *)tlv; + sl->link_bitmap = cpu_to_le16(removed_links); + + for_each_set_bit(link_id, &rem_links, IEEE80211_MLD_MAX_NUM_LINKS) { + struct mt7996_vif_link *link; + + link = mt7996_vif_link(dev, vif, link_id); + if (!link) + continue; + + sl->bss_idx[link_id] = link->mt76.idx; + } + + return mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_WM_UNI_CMD(MLD), + true); +} + +int mt7996_mcu_mld_link_oper(struct mt7996_dev *dev, + struct ieee80211_bss_conf *link_conf, + struct mt7996_vif_link *link, bool add) +{ + struct ieee80211_vif *vif = link_conf->vif; + struct mt7996_vif *mvif = (struct mt7996_vif *)vif->drv_priv; + struct bss_mld_link_op_tlv *mld_op; + struct sk_buff *skb; + struct tlv *tlv; + + skb = __mt7996_mcu_alloc_bss_req(&dev->mt76, &link->mt76, + MT7996_BSS_UPDATE_MAX_SIZE); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + tlv = mt7996_mcu_add_uni_tlv(skb, UNI_BSS_INFO_MLD_LINK_OP, + sizeof(*mld_op)); + mld_op = (struct bss_mld_link_op_tlv *)tlv; + mld_op->link_operation = add; + mld_op->own_mld_id = link->mld_idx; + mld_op->link_id = link_conf->link_id; + mld_op->group_mld_id = add ? mvif->mld_group_idx : 0xff; + mld_op->remap_idx = add ? mvif->mld_remap_idx : 0xff; + memcpy(mld_op->mac_addr, vif->addr, ETH_ALEN); + + return mt76_mcu_skb_send_msg(&dev->mt76, skb, + MCU_WMWA_UNI_CMD(BSS_INFO_UPDATE), true); +} + static void mt7996_mcu_sta_mld_setup_tlv(struct mt7996_dev *dev, struct sk_buff *skb, struct ieee80211_vif *vif, diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h index 39df13679779..8902e16508b7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.h @@ -524,6 +524,18 @@ struct bss_prot_tlv { __le32 prot_mode; } __packed; +struct bss_mld_link_op_tlv { + __le16 tag; + __le16 len; + u8 group_mld_id; + u8 own_mld_id; + u8 mac_addr[ETH_ALEN]; + u8 remap_idx; + u8 link_operation; + u8 link_id; + u8 rsv[2]; +} __packed; + struct sta_rec_ht_uni { __le16 tag; __le16 len; @@ -697,6 +709,28 @@ struct mld_setup_link { u8 __rsv; } __packed; +struct mld_req_hdr { + u8 ver; + u8 mld_addr[ETH_ALEN]; + u8 mld_idx; + u8 flag; + u8 rsv[3]; + u8 buf[]; +} __packed; + +struct mld_reconf_stop_link { + __le16 tag; + __le16 len; + __le16 link_bitmap; + u8 rsv[2]; + u8 bss_idx[16]; +} __packed; + +enum { + UNI_CMD_MLD_RECONF_AP_REM_TIMER = 0x03, + UNI_CMD_MLD_RECONF_STOP_LINK = 0x04, +}; + struct hdr_trans_en { __le16 tag; __le16 len; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index d18f8794351e..e0a5c4eeb516 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -776,6 +776,12 @@ int mt7996_mcu_get_all_sta_info(struct mt7996_phy *phy, u16 tag); int mt7996_mcu_wed_rro_reset_sessions(struct mt7996_dev *dev, u16 id); int mt7996_mcu_set_sniffer_mode(struct mt7996_phy *phy, bool enabled); int mt7996_mcu_set_dup_wtbl(struct mt7996_dev *dev); +int mt7996_mcu_mld_reconf_stop_link(struct mt7996_dev *dev, + struct ieee80211_vif *vif, + u16 removed_links); +int mt7996_mcu_mld_link_oper(struct mt7996_dev *dev, + struct ieee80211_bss_conf *link_conf, + struct mt7996_vif_link *link, bool add); static inline bool mt7996_has_hwrro(struct mt7996_dev *dev) { From e8c819df02436f2c2379766946735e1f06a7c923 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 15 Mar 2026 11:26:29 +0100 Subject: [PATCH 0860/1561] wifi: mt76: mt7996: Destroy active sta links in mt7996_mac_sta_remove() Similar to vif link management, postpone sta link destuction in mt7996_mac_sta_remove() introducing mt7996_mac_sta_remove_link utility routine and just disable sta link running mt7996_mac_sta_remove_links routine. This is a preliminary patch in order to support MLO link reconfiguration in MT7996 driver. Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260315-mt7996-mlo-link-reconf-v1-6-a8a634fbc927@kernel.org Signed-off-by: Felix Fietkau --- .../net/wireless/mediatek/mt76/mt7996/mac.c | 22 +----- .../net/wireless/mediatek/mt76/mt7996/main.c | 67 +++++++++++-------- .../wireless/mediatek/mt76/mt7996/mt7996.h | 5 +- 3 files changed, 45 insertions(+), 49 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index c895e8a5de4d..e2a83da3a09c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -2372,26 +2372,8 @@ mt7996_mac_reset_sta_iter(void *data, struct ieee80211_sta *sta) struct mt7996_dev *dev = data; int i; - for (i = 0; i < ARRAY_SIZE(msta->link); i++) { - struct mt7996_sta_link *msta_link = NULL; - struct mt7996_phy *phy; - - msta_link = rcu_replace_pointer(msta->link[i], msta_link, - lockdep_is_held(&dev->mt76.mutex)); - if (!msta_link) - continue; - - mt7996_mac_sta_deinit_link(dev, msta_link); - phy = __mt7996_phy(dev, msta_link->wcid.phy_idx); - if (phy) - phy->mt76->num_sta--; - - if (msta_link != &msta->deflink) - kfree_rcu(msta_link, rcu_head); - } - - msta->deflink_id = IEEE80211_LINK_UNSPECIFIED; - msta->seclink_id = msta->deflink_id; + for (i = 0; i < ARRAY_SIZE(msta->link); i++) + mt7996_mac_sta_remove_link(dev, sta, i, true); } static void diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index ac82ea3f066a..a8a6552d49f6 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -1179,9 +1179,17 @@ mt7996_mac_sta_init_link(struct mt7996_dev *dev, return 0; } -void mt7996_mac_sta_deinit_link(struct mt7996_dev *dev, - struct mt7996_sta_link *msta_link) +void mt7996_mac_sta_remove_link(struct mt7996_dev *dev, + struct ieee80211_sta *sta, + unsigned int link_id, bool flush) { + struct mt7996_sta *msta = (struct mt7996_sta *)sta->drv_priv; + struct mt7996_sta_link *msta_link; + + msta_link = mt76_dereference(msta->link[link_id], &dev->mt76); + if (!msta_link) + return; + spin_lock_bh(&dev->mt76.sta_poll_lock); if (!list_empty(&msta_link->wcid.poll_list)) list_del_init(&msta_link->wcid.poll_list); @@ -1189,31 +1197,13 @@ void mt7996_mac_sta_deinit_link(struct mt7996_dev *dev, list_del_init(&msta_link->rc_list); spin_unlock_bh(&dev->mt76.sta_poll_lock); - rcu_assign_pointer(dev->mt76.wcid[msta_link->wcid.idx], NULL); mt76_wcid_cleanup(&dev->mt76, &msta_link->wcid); - mt76_wcid_mask_clear(dev->mt76.wcid_mask, msta_link->wcid.idx); -} -static void -mt7996_mac_sta_remove_links(struct mt7996_dev *dev, struct ieee80211_vif *vif, - struct ieee80211_sta *sta, unsigned long links) -{ - struct mt7996_sta *msta = (struct mt7996_sta *)sta->drv_priv; - struct mt76_dev *mdev = &dev->mt76; - unsigned int link_id; - - for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) { - struct mt7996_sta_link *msta_link = NULL; + if (msta_link->wcid.link_valid) { struct mt7996_phy *phy; - msta_link = rcu_replace_pointer(msta->link[link_id], msta_link, - lockdep_is_held(&mdev->mutex)); - if (!msta_link) - continue; - mt7996_mac_wtbl_update(dev, msta_link->wcid.idx, MT_WTBL_UPDATE_ADM_COUNT_CLEAR); - mt7996_mac_sta_deinit_link(dev, msta_link); phy = __mt7996_phy(dev, msta_link->wcid.phy_idx); if (phy) @@ -1230,7 +1220,7 @@ mt7996_mac_sta_remove_links(struct mt7996_dev *dev, struct ieee80211_vif *vif, /* switch to the secondary link */ msta_seclink = mt76_dereference( msta->link[msta->seclink_id], - mdev); + &dev->mt76); if (msta_seclink) { msta->deflink_id = msta->seclink_id; mt7996_sta_init_txq_wcid(sta, @@ -1240,12 +1230,29 @@ mt7996_mac_sta_remove_links(struct mt7996_dev *dev, struct ieee80211_vif *vif, } else if (msta->seclink_id == link_id) { msta->seclink_id = msta->deflink_id; } + msta_link->wcid.link_valid = false; + } + if (flush) { + rcu_assign_pointer(msta->link[link_id], NULL); + rcu_assign_pointer(dev->mt76.wcid[msta_link->wcid.idx], NULL); + mt76_wcid_mask_clear(dev->mt76.wcid_mask, msta_link->wcid.idx); if (msta_link != &msta->deflink) kfree_rcu(msta_link, rcu_head); } } +static void +mt7996_mac_sta_remove_links(struct mt7996_dev *dev, struct ieee80211_vif *vif, + struct ieee80211_sta *sta, unsigned long links, + bool flush) +{ + unsigned int link_id; + + for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) + mt7996_mac_sta_remove_link(dev, sta, link_id, flush); +} + static int mt7996_mac_sta_add_links(struct mt7996_dev *dev, struct ieee80211_vif *vif, struct ieee80211_sta *sta, unsigned long new_links) @@ -1257,11 +1264,15 @@ mt7996_mac_sta_add_links(struct mt7996_dev *dev, struct ieee80211_vif *vif, for_each_set_bit(link_id, &new_links, IEEE80211_MLD_MAX_NUM_LINKS) { struct ieee80211_bss_conf *link_conf; struct ieee80211_link_sta *link_sta; + struct mt7996_sta_link *msta_link; struct mt7996_vif_link *link; struct mt76_phy *mphy; - if (rcu_access_pointer(msta->link[link_id])) + msta_link = mt76_dereference(msta->link[link_id], &dev->mt76); + if (msta_link) { + msta_link->wcid.link_valid = true; continue; + } link_conf = link_conf_dereference_protected(vif, link_id); if (!link_conf) { @@ -1298,7 +1309,7 @@ mt7996_mac_sta_add_links(struct mt7996_dev *dev, struct ieee80211_vif *vif, return 0; error_unlink: - mt7996_mac_sta_remove_links(dev, vif, sta, new_links); + mt7996_mac_sta_remove_links(dev, vif, sta, new_links, true); return err; } @@ -1315,7 +1326,7 @@ mt7996_mac_sta_change_links(struct ieee80211_hw *hw, struct ieee80211_vif *vif, mutex_lock(&dev->mt76.mutex); - mt7996_mac_sta_remove_links(dev, vif, sta, rem); + mt7996_mac_sta_remove_links(dev, vif, sta, rem, false); ret = mt7996_mac_sta_add_links(dev, vif, sta, add); mutex_unlock(&dev->mt76.mutex); @@ -1424,10 +1435,12 @@ static void mt7996_mac_sta_remove(struct mt7996_dev *dev, struct ieee80211_vif *vif, struct ieee80211_sta *sta) { - unsigned long links = sta->valid_links ? sta->valid_links : BIT(0); + struct mt7996_sta *msta = (struct mt7996_sta *)sta->drv_priv; + int i; mutex_lock(&dev->mt76.mutex); - mt7996_mac_sta_remove_links(dev, vif, sta, links); + for (i = 0; i < ARRAY_SIZE(msta->link); i++) + mt7996_mac_sta_remove_link(dev, sta, i, true); mutex_unlock(&dev->mt76.mutex); } diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index e0a5c4eeb516..bdcf72457954 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -867,8 +867,9 @@ void mt7996_mac_twt_teardown_flow(struct mt7996_dev *dev, struct mt7996_vif_link *link, struct mt7996_sta_link *msta_link, u8 flowid); -void mt7996_mac_sta_deinit_link(struct mt7996_dev *dev, - struct mt7996_sta_link *msta_link); +void mt7996_mac_sta_remove_link(struct mt7996_dev *dev, + struct ieee80211_sta *sta, + unsigned int link_id, bool flush); void mt7996_mac_add_twt_setup(struct ieee80211_hw *hw, struct ieee80211_sta *sta, struct ieee80211_twt_setup *twt); From 10171b938330f6e625e7dc8dd24a15cc96218172 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Mon, 23 Mar 2026 10:58:32 +0100 Subject: [PATCH 0861/1561] net: ethtool: pass genl_info to the ethnl parse_request operation The .parse_request() ethnl operation extracts the relevant attributes from the netlink request to populate the private req_info. By passing genl_info as a parameter to this callback, we can use the GENL_REQ_ATTR_CHECK() macro to check for missing mandatory parameters. This macro has the advantage of returning a better error explanation through the netlink_ext_ack struct. Convert the eeprom ethnl code to this macro, as it's the only command yet that has mandatory request parameters. Suggested-by: Jakub Kicinski Signed-off-by: Maxime Chevallier Link: https://patch.msgid.link/20260323095833.136266-1-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- net/ethtool/eeprom.c | 12 +++++++----- net/ethtool/netlink.c | 3 ++- net/ethtool/netlink.h | 1 + net/ethtool/pause.c | 1 + net/ethtool/rss.c | 4 +++- net/ethtool/stats.c | 1 + net/ethtool/strset.c | 1 + net/ethtool/tsinfo.c | 4 +++- 8 files changed, 19 insertions(+), 8 deletions(-) diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c index caf1e41e676b..a557e3996c85 100644 --- a/net/ethtool/eeprom.c +++ b/net/ethtool/eeprom.c @@ -150,15 +150,17 @@ err_free: return ret; } -static int eeprom_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb, +static int eeprom_parse_request(struct ethnl_req_info *req_info, + const struct genl_info *info, + struct nlattr **tb, struct netlink_ext_ack *extack) { struct eeprom_req_info *request = MODULE_EEPROM_REQINFO(req_info); - if (!tb[ETHTOOL_A_MODULE_EEPROM_OFFSET] || - !tb[ETHTOOL_A_MODULE_EEPROM_LENGTH] || - !tb[ETHTOOL_A_MODULE_EEPROM_PAGE] || - !tb[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS]) + if (GENL_REQ_ATTR_CHECK(info, ETHTOOL_A_MODULE_EEPROM_OFFSET) || + GENL_REQ_ATTR_CHECK(info, ETHTOOL_A_MODULE_EEPROM_LENGTH) || + GENL_REQ_ATTR_CHECK(info, ETHTOOL_A_MODULE_EEPROM_PAGE) || + GENL_REQ_ATTR_CHECK(info, ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS)) return -EINVAL; request->i2c_address = nla_get_u8(tb[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS]); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 14cb06ec5171..5046023a30b1 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -462,7 +462,8 @@ static int ethnl_default_parse(struct ethnl_req_info *req_info, return ret; if (request_ops->parse_request) { - ret = request_ops->parse_request(req_info, tb, info->extack); + ret = request_ops->parse_request(req_info, info, tb, + info->extack); if (ret < 0) goto err_dev; } diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 89010eaa67df..aaf6f2468768 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -396,6 +396,7 @@ struct ethnl_request_ops { u8 set_ntf_cmd; int (*parse_request)(struct ethnl_req_info *req_info, + const struct genl_info *info, struct nlattr **tb, struct netlink_ext_ack *extack); int (*prepare_data)(const struct ethnl_req_info *req_info, diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c index 29f812e935b6..ccaec8e878b2 100644 --- a/net/ethtool/pause.c +++ b/net/ethtool/pause.c @@ -28,6 +28,7 @@ const struct nla_policy ethnl_pause_get_policy[] = { }; static int pause_parse_request(struct ethnl_req_info *req_base, + const struct genl_info *info, struct nlattr **tb, struct netlink_ext_ack *extack) { diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 5cf90d73e70b..353110b862ab 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -66,7 +66,9 @@ const struct nla_policy ethnl_rss_get_policy[] = { }; static int -rss_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb, +rss_parse_request(struct ethnl_req_info *req_info, + const struct genl_info *info, + struct nlattr **tb, struct netlink_ext_ack *extack) { struct rss_req_info *request = RSS_REQINFO(req_info); diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c index 38136f19b2ec..9b0d8cb07675 100644 --- a/net/ethtool/stats.c +++ b/net/ethtool/stats.c @@ -99,6 +99,7 @@ const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_SRC + 1] = { }; static int stats_parse_request(struct ethnl_req_info *req_base, + const struct genl_info *info, struct nlattr **tb, struct netlink_ext_ack *extack) { diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index aa1e50c98f34..9271aba8255e 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -190,6 +190,7 @@ static const struct nla_policy strset_stringsets_policy[] = { }; static int strset_parse_request(struct ethnl_req_info *req_base, + const struct genl_info *info, struct nlattr **tb, struct netlink_ext_ack *extack) { diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c index 9aad62695dfb..a865f0fdd26b 100644 --- a/net/ethtool/tsinfo.c +++ b/net/ethtool/tsinfo.c @@ -70,7 +70,9 @@ int ts_parse_hwtst_provider(const struct nlattr *nest, } static int -tsinfo_parse_request(struct ethnl_req_info *req_base, struct nlattr **tb, +tsinfo_parse_request(struct ethnl_req_info *req_base, + const struct genl_info *info, + struct nlattr **tb, struct netlink_ext_ack *extack) { struct tsinfo_req_info *req = TSINFO_REQINFO(req_base); From a319d0c8c8cede3b63538c9f111f84651d078bf6 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Sun, 22 Mar 2026 13:27:20 +0000 Subject: [PATCH 0862/1561] net: dsa: mxl862xx: add CRC for MDIO communication Enable the firmware's opt-in CRC validation on the MDIO/MMD command interface to detect bit errors on the bus. The firmware bundles CRC-6 and CRC-16 under a single enable flag, so both are implemented together. CRC-6 protects the ctrl and len_ret command registers using a table- driven 3GPP algorithm. It is applied to every command exchange including SET_DATA/GET_DATA batch transfers. With CRC enabled, the firmware encodes its return value as a signed 11-bit integer within the CRC- protected register fields, replacing the previous 16-bit interpretation. CRC-16 protects the data payload using the kernel's crc16() library. The driver appends a CRC-16 checksum to outgoing data and verifies the firmware-appended checksum on responses. The checksum is placed at the exact byte offset where the struct data ends, correctly handling packed structs with odd sizes by splitting the checksum across word boundaries. SET_DATA/GET_DATA sub-commands carry only CRC-6. Upon detection of a CRC error on either side all conduit interfaces are taken down, triggering all user ports to go down as well. This is the most feasible option: CRC errors are likely caused either by broken hardware, or are symptom of overheating. In either case, trying to resume normal operation isn't reasonable. Signed-off-by: Daniel Golle Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/620453b9a150bbe5b7ea4224331cb5dc5e57263b.1774185953.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mxl862xx/Kconfig | 1 + drivers/net/dsa/mxl862xx/mxl862xx-host.c | 345 ++++++++++++++++++----- drivers/net/dsa/mxl862xx/mxl862xx-host.h | 2 + drivers/net/dsa/mxl862xx/mxl862xx.c | 11 + drivers/net/dsa/mxl862xx/mxl862xx.h | 2 + 5 files changed, 296 insertions(+), 65 deletions(-) diff --git a/drivers/net/dsa/mxl862xx/Kconfig b/drivers/net/dsa/mxl862xx/Kconfig index 3e772298cc89..e51a67a3cf9b 100644 --- a/drivers/net/dsa/mxl862xx/Kconfig +++ b/drivers/net/dsa/mxl862xx/Kconfig @@ -2,6 +2,7 @@ config NET_DSA_MXL862 tristate "MaxLinear MxL862xx" depends on NET_DSA + select CRC16 select NET_DSA_TAG_MXL_862XX help This enables support for the MaxLinear MxL862xx switch family. diff --git a/drivers/net/dsa/mxl862xx/mxl862xx-host.c b/drivers/net/dsa/mxl862xx/mxl862xx-host.c index 4eefd2a759a7..0b7f8db31825 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx-host.c +++ b/drivers/net/dsa/mxl862xx/mxl862xx-host.c @@ -7,7 +7,9 @@ * Copyright (C) 2024 MaxLinear Inc. */ +#include #include +#include #include #include #include @@ -15,6 +17,9 @@ #include "mxl862xx-host.h" #define CTRL_BUSY_MASK BIT(15) +#define CTRL_CRC_FLAG BIT(14) + +#define LEN_RET_LEN_MASK GENMASK(9, 0) #define MXL862XX_MMD_REG_CTRL 0 #define MXL862XX_MMD_REG_LEN_RET 1 @@ -27,7 +32,159 @@ #define MMD_API_GET_DATA_0 5 #define MMD_API_RST_DATA 8 -#define MXL862XX_SWITCH_RESET 0x9907 +#define MXL862XX_SWITCH_RESET 0x9907 + +static void mxl862xx_crc_err_work_fn(struct work_struct *work) +{ + struct mxl862xx_priv *priv = container_of(work, struct mxl862xx_priv, + crc_err_work); + struct dsa_port *dp; + + dev_warn(&priv->mdiodev->dev, + "MDIO CRC error detected, shutting down all ports\n"); + + rtnl_lock(); + dsa_switch_for_each_cpu_port(dp, priv->ds) + dev_close(dp->conduit); + rtnl_unlock(); + + clear_bit(0, &priv->crc_err); +} + +/* Firmware CRC error codes (outside normal Zephyr errno range). */ +#define MXL862XX_FW_CRC6_ERR (-1024) +#define MXL862XX_FW_CRC16_ERR (-1023) + +/* 3GPP CRC-6 lookup table (polynomial 0x6F). + * Matches the firmware's default CRC-6 implementation. + */ +static const u8 mxl862xx_crc6_table[256] = { + 0x00, 0x2f, 0x31, 0x1e, 0x0d, 0x22, 0x3c, 0x13, + 0x1a, 0x35, 0x2b, 0x04, 0x17, 0x38, 0x26, 0x09, + 0x34, 0x1b, 0x05, 0x2a, 0x39, 0x16, 0x08, 0x27, + 0x2e, 0x01, 0x1f, 0x30, 0x23, 0x0c, 0x12, 0x3d, + 0x07, 0x28, 0x36, 0x19, 0x0a, 0x25, 0x3b, 0x14, + 0x1d, 0x32, 0x2c, 0x03, 0x10, 0x3f, 0x21, 0x0e, + 0x33, 0x1c, 0x02, 0x2d, 0x3e, 0x11, 0x0f, 0x20, + 0x29, 0x06, 0x18, 0x37, 0x24, 0x0b, 0x15, 0x3a, + 0x0e, 0x21, 0x3f, 0x10, 0x03, 0x2c, 0x32, 0x1d, + 0x14, 0x3b, 0x25, 0x0a, 0x19, 0x36, 0x28, 0x07, + 0x3a, 0x15, 0x0b, 0x24, 0x37, 0x18, 0x06, 0x29, + 0x20, 0x0f, 0x11, 0x3e, 0x2d, 0x02, 0x1c, 0x33, + 0x09, 0x26, 0x38, 0x17, 0x04, 0x2b, 0x35, 0x1a, + 0x13, 0x3c, 0x22, 0x0d, 0x1e, 0x31, 0x2f, 0x00, + 0x3d, 0x12, 0x0c, 0x23, 0x30, 0x1f, 0x01, 0x2e, + 0x27, 0x08, 0x16, 0x39, 0x2a, 0x05, 0x1b, 0x34, + 0x1c, 0x33, 0x2d, 0x02, 0x11, 0x3e, 0x20, 0x0f, + 0x06, 0x29, 0x37, 0x18, 0x0b, 0x24, 0x3a, 0x15, + 0x28, 0x07, 0x19, 0x36, 0x25, 0x0a, 0x14, 0x3b, + 0x32, 0x1d, 0x03, 0x2c, 0x3f, 0x10, 0x0e, 0x21, + 0x1b, 0x34, 0x2a, 0x05, 0x16, 0x39, 0x27, 0x08, + 0x01, 0x2e, 0x30, 0x1f, 0x0c, 0x23, 0x3d, 0x12, + 0x2f, 0x00, 0x1e, 0x31, 0x22, 0x0d, 0x13, 0x3c, + 0x35, 0x1a, 0x04, 0x2b, 0x38, 0x17, 0x09, 0x26, + 0x12, 0x3d, 0x23, 0x0c, 0x1f, 0x30, 0x2e, 0x01, + 0x08, 0x27, 0x39, 0x16, 0x05, 0x2a, 0x34, 0x1b, + 0x26, 0x09, 0x17, 0x38, 0x2b, 0x04, 0x1a, 0x35, + 0x3c, 0x13, 0x0d, 0x22, 0x31, 0x1e, 0x00, 0x2f, + 0x15, 0x3a, 0x24, 0x0b, 0x18, 0x37, 0x29, 0x06, + 0x0f, 0x20, 0x3e, 0x11, 0x02, 0x2d, 0x33, 0x1c, + 0x21, 0x0e, 0x10, 0x3f, 0x2c, 0x03, 0x1d, 0x32, + 0x3b, 0x14, 0x0a, 0x25, 0x36, 0x19, 0x07, 0x28, +}; + +/* Compute 3GPP CRC-6 over the ctrl register (16 bits) and the lower + * 10 bits of the len_ret register. The 26-bit input is packed as + * { len_ret[9:0], ctrl[15:0] } and processed LSB-first through the + * lookup table. + */ +static u8 mxl862xx_crc6(u16 ctrl, u16 len_ret) +{ + u32 data = ((u32)(len_ret & LEN_RET_LEN_MASK) << 16) | ctrl; + u8 crc = 0; + int i; + + for (i = 0; i < sizeof(data); i++, data >>= 8) + crc = mxl862xx_crc6_table[(crc << 2) ^ (data & 0xff)] & 0x3f; + + return crc; +} + +/* Encode CRC-6 into the ctrl and len_ret registers before writing them + * to MDIO. The caller must set ctrl = API_ID | CTRL_BUSY_MASK | + * CTRL_CRC_FLAG, and len_ret = parameter length (bits 0-9 only). + * + * After encoding: + * ctrl[12:0] = API ID (unchanged) + * ctrl[14:13] = CRC-6 bits 5-4 + * ctrl[15] = busy flag (unchanged) + * len_ret[9:0] = parameter length (unchanged) + * len_ret[13:10] = CRC-6 bits 3-0 + * len_ret[14] = original ctrl[14] (CRC check flag, forwarded to FW) + * len_ret[15] = original ctrl[13] (magic bit, always 1) + */ +static void mxl862xx_crc6_encode(u16 *pctrl, u16 *plen_ret) +{ + u16 crc, ctrl, len_ret; + + /* Set magic bit before CRC computation */ + *pctrl |= BIT(13); + + crc = mxl862xx_crc6(*pctrl, *plen_ret); + + /* Place CRC MSB (bits 5-4) into ctrl bits 13-14 */ + ctrl = (*pctrl & ~GENMASK(14, 13)); + ctrl |= (crc & 0x30) << 9; + + /* Place CRC LSB (bits 3-0) into len_ret bits 10-13 */ + len_ret = *plen_ret | ((crc & 0x0f) << 10); + + /* Forward ctrl[14] (CRC check flag) to len_ret[14], + * and ctrl[13] (magic, always 1) to len_ret[15]. + */ + len_ret |= (*pctrl & BIT(14)) | ((*pctrl & BIT(13)) << 2); + + *pctrl = ctrl; + *plen_ret = len_ret; +} + +/* Verify CRC-6 on a firmware response and extract the return value. + * + * The firmware encodes the return value as a signed 11-bit integer: + * - Sign bit (bit 10) in ctrl[14] + * - Magnitude (bits 9-0) in len_ret[9:0] + * These are recoverable after CRC-6 verification by restoring the + * original ctrl from the auxiliary copies in len_ret[15:14]. + * + * Return: 0 on CRC match (with *result set), or -EIO on mismatch. + */ +static int mxl862xx_crc6_verify(u16 ctrl, u16 len_ret, int *result) +{ + u16 crc_recv, crc_calc; + + /* Extract the received CRC-6 */ + crc_recv = ((ctrl >> 9) & 0x30) | ((len_ret >> 10) & 0x0f); + + /* Reconstruct the original ctrl for re-computation: + * ctrl[14] = len_ret[14] (sign bit / CRC check flag) + * ctrl[13] = len_ret[15] >> 2 (magic bit) + */ + ctrl &= ~GENMASK(14, 13); + ctrl |= len_ret & BIT(14); + ctrl |= (len_ret & BIT(15)) >> 2; + + crc_calc = mxl862xx_crc6(ctrl, len_ret); + if (crc_recv != crc_calc) + return -EIO; + + /* Extract signed 11-bit return value: + * bit 10 (sign) from ctrl[14], bits 9-0 from len_ret[9:0] + */ + *result = sign_extend32((len_ret & LEN_RET_LEN_MASK) | + ((ctrl & CTRL_CRC_FLAG) >> 4), 10); + + return 0; +} static int mxl862xx_reg_read(struct mxl862xx_priv *priv, u32 addr) { @@ -52,60 +209,78 @@ static int mxl862xx_busy_wait(struct mxl862xx_priv *priv) !(val & CTRL_BUSY_MASK), 15, 500000); } -static int mxl862xx_set_data(struct mxl862xx_priv *priv, u16 words) +/* Issue a firmware command with CRC-6 protection on the ctrl and len_ret + * registers, wait for completion, and verify the response CRC-6. + * + * Return: firmware result value (>= 0) on success, or negative errno. + */ +static int mxl862xx_issue_cmd(struct mxl862xx_priv *priv, u16 cmd, u16 len) { - int ret; - u16 cmd; + u16 ctrl_enc, len_enc; + int ret, fw_result; - ret = mxl862xx_reg_write(priv, MXL862XX_MMD_REG_LEN_RET, - MXL862XX_MMD_REG_DATA_MAX_SIZE * sizeof(u16)); + ctrl_enc = cmd | CTRL_BUSY_MASK | CTRL_CRC_FLAG; + len_enc = len; + mxl862xx_crc6_encode(&ctrl_enc, &len_enc); + + ret = mxl862xx_reg_write(priv, MXL862XX_MMD_REG_LEN_RET, len_enc); if (ret < 0) return ret; + ret = mxl862xx_reg_write(priv, MXL862XX_MMD_REG_CTRL, ctrl_enc); + if (ret < 0) + return ret; + + ret = mxl862xx_busy_wait(priv); + if (ret < 0) + return ret; + + ret = mxl862xx_reg_read(priv, MXL862XX_MMD_REG_CTRL); + if (ret < 0) + return ret; + ctrl_enc = ret; + + ret = mxl862xx_reg_read(priv, MXL862XX_MMD_REG_LEN_RET); + if (ret < 0) + return ret; + len_enc = ret; + + ret = mxl862xx_crc6_verify(ctrl_enc, len_enc, &fw_result); + if (ret) { + if (!test_and_set_bit(0, &priv->crc_err)) + schedule_work(&priv->crc_err_work); + return -EIO; + } + + return fw_result; +} + +static int mxl862xx_set_data(struct mxl862xx_priv *priv, u16 words) +{ + u16 cmd; + cmd = words / MXL862XX_MMD_REG_DATA_MAX_SIZE - 1; if (!(cmd < 2)) return -EINVAL; cmd += MMD_API_SET_DATA_0; - ret = mxl862xx_reg_write(priv, MXL862XX_MMD_REG_CTRL, - cmd | CTRL_BUSY_MASK); - if (ret < 0) - return ret; - return mxl862xx_busy_wait(priv); + return mxl862xx_issue_cmd(priv, cmd, + MXL862XX_MMD_REG_DATA_MAX_SIZE * sizeof(u16)); } static int mxl862xx_get_data(struct mxl862xx_priv *priv, u16 words) { - int ret; u16 cmd; - ret = mxl862xx_reg_write(priv, MXL862XX_MMD_REG_LEN_RET, - MXL862XX_MMD_REG_DATA_MAX_SIZE * sizeof(u16)); - if (ret < 0) - return ret; - cmd = words / MXL862XX_MMD_REG_DATA_MAX_SIZE; if (!(cmd > 0 && cmd < 3)) return -EINVAL; cmd += MMD_API_GET_DATA_0; - ret = mxl862xx_reg_write(priv, MXL862XX_MMD_REG_CTRL, - cmd | CTRL_BUSY_MASK); - if (ret < 0) - return ret; - return mxl862xx_busy_wait(priv); -} - -static int mxl862xx_firmware_return(int ret) -{ - /* Only 16-bit values are valid. */ - if (WARN_ON(ret & GENMASK(31, 16))) - return -EINVAL; - - /* Interpret value as signed 16-bit integer. */ - return (s16)ret; + return mxl862xx_issue_cmd(priv, cmd, + MXL862XX_MMD_REG_DATA_MAX_SIZE * sizeof(u16)); } static int mxl862xx_send_cmd(struct mxl862xx_priv *priv, u16 cmd, u16 size, @@ -113,30 +288,23 @@ static int mxl862xx_send_cmd(struct mxl862xx_priv *priv, u16 cmd, u16 size, { int ret; - ret = mxl862xx_reg_write(priv, MXL862XX_MMD_REG_LEN_RET, size); - if (ret) - return ret; + ret = mxl862xx_issue_cmd(priv, cmd, size); - ret = mxl862xx_reg_write(priv, MXL862XX_MMD_REG_CTRL, - cmd | CTRL_BUSY_MASK); - if (ret) - return ret; - - ret = mxl862xx_busy_wait(priv); - if (ret) - return ret; - - ret = mxl862xx_reg_read(priv, MXL862XX_MMD_REG_LEN_RET); - if (ret < 0) - return ret; - - /* handle errors returned by the firmware as -EIO + /* Handle errors returned by the firmware as -EIO. * The firmware is based on Zephyr OS and uses the errors as * defined in errno.h of Zephyr OS. See * https://github.com/zephyrproject-rtos/zephyr/blob/v3.7.0/lib/libc/minimal/include/errno.h + * + * The firmware signals CRC validation failures with dedicated + * error codes outside the normal Zephyr errno range: + * -1024: CRC-6 mismatch on ctrl/len_ret registers + * -1023: CRC-16 mismatch on data payload */ - ret = mxl862xx_firmware_return(ret); if (ret < 0) { + if ((ret == MXL862XX_FW_CRC6_ERR || + ret == MXL862XX_FW_CRC16_ERR) && + !test_and_set_bit(0, &priv->crc_err)) + schedule_work(&priv->crc_err_work); if (!quiet) dev_err(&priv->mdiodev->dev, "CMD %04x returned error %d\n", cmd, ret); @@ -151,7 +319,7 @@ int mxl862xx_api_wrap(struct mxl862xx_priv *priv, u16 cmd, void *_data, { __le16 *data = _data; int ret, cmd_ret; - u16 max, i; + u16 max, crc, i; dev_dbg(&priv->mdiodev->dev, "CMD %04x DATA %*ph\n", cmd, size, data); @@ -163,26 +331,45 @@ int mxl862xx_api_wrap(struct mxl862xx_priv *priv, u16 cmd, void *_data, if (ret < 0) goto out; - for (i = 0; i < max; i++) { + /* Compute CRC-16 over the data payload; written as an extra word + * after the data so the firmware can verify the transfer. + */ + crc = crc16(0xffff, (const u8 *)data, size); + + for (i = 0; i < max + 1; i++) { u16 off = i % MXL862XX_MMD_REG_DATA_MAX_SIZE; + u16 val; if (i && off == 0) { /* Send command to set data when every * MXL862XX_MMD_REG_DATA_MAX_SIZE of WORDs are written. - */ + */ ret = mxl862xx_set_data(priv, i); if (ret < 0) goto out; } - if ((i * 2 + 1) == size) - ret = mxl862xx_reg_write(priv, - MXL862XX_MMD_REG_DATA_FIRST + off, - *(u8 *)&data[i]); - else - ret = mxl862xx_reg_write(priv, - MXL862XX_MMD_REG_DATA_FIRST + off, - le16_to_cpu(data[i])); + if (i == max) { + /* Even size: full CRC word. + * Odd size: only CRC high byte remains (low byte + * was packed into the previous word). + */ + val = (size & 1) ? crc >> 8 : crc; + } else if ((i * 2 + 1) == size) { + /* Special handling for last BYTE if it's not WORD + * aligned to avoid reading beyond the allocated data + * structure. Pack the CRC low byte into the high + * byte of this word so it sits at byte offset 'size' + * in the firmware's contiguous buffer. + */ + val = *(u8 *)&data[i] | ((crc & 0xff) << 8); + } else { + val = le16_to_cpu(data[i]); + } + + ret = mxl862xx_reg_write(priv, + MXL862XX_MMD_REG_DATA_FIRST + off, + val); if (ret < 0) goto out; } @@ -194,13 +381,13 @@ int mxl862xx_api_wrap(struct mxl862xx_priv *priv, u16 cmd, void *_data, /* store result of mxl862xx_send_cmd() */ cmd_ret = ret; - for (i = 0; i < max; i++) { + for (i = 0; i < max + 1; i++) { u16 off = i % MXL862XX_MMD_REG_DATA_MAX_SIZE; if (i && off == 0) { /* Send command to fetch next batch of data when every * MXL862XX_MMD_REG_DATA_MAX_SIZE of WORDs are read. - */ + */ ret = mxl862xx_get_data(priv, i); if (ret < 0) goto out; @@ -210,17 +397,35 @@ int mxl862xx_api_wrap(struct mxl862xx_priv *priv, u16 cmd, void *_data, if (ret < 0) goto out; - if ((i * 2 + 1) == size) { + if (i == max) { + /* Even size: full CRC word. + * Odd size: only CRC high byte remains (low byte + * was in the previous word). + */ + if (size & 1) + crc = (crc & 0x00ff) | + (((u16)ret & 0xff) << 8); + else + crc = (u16)ret; + } else if ((i * 2 + 1) == size) { /* Special handling for last BYTE if it's not WORD * aligned to avoid writing beyond the allocated data - * structure. + * structure. The high byte carries the CRC low byte. */ *(uint8_t *)&data[i] = ret & 0xff; + crc = (ret >> 8) & 0xff; } else { data[i] = cpu_to_le16((u16)ret); } } + if (crc16(0xffff, (const u8 *)data, size) != crc) { + if (!test_and_set_bit(0, &priv->crc_err)) + schedule_work(&priv->crc_err_work); + ret = -EIO; + goto out; + } + /* on success return the result of the mxl862xx_send_cmd() */ ret = cmd_ret; @@ -249,3 +454,13 @@ out: return ret; } + +void mxl862xx_host_init(struct mxl862xx_priv *priv) +{ + INIT_WORK(&priv->crc_err_work, mxl862xx_crc_err_work_fn); +} + +void mxl862xx_host_shutdown(struct mxl862xx_priv *priv) +{ + cancel_work_sync(&priv->crc_err_work); +} diff --git a/drivers/net/dsa/mxl862xx/mxl862xx-host.h b/drivers/net/dsa/mxl862xx/mxl862xx-host.h index 7cc496f6be5c..84512a30bc18 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx-host.h +++ b/drivers/net/dsa/mxl862xx/mxl862xx-host.h @@ -5,6 +5,8 @@ #include "mxl862xx.h" +void mxl862xx_host_init(struct mxl862xx_priv *priv); +void mxl862xx_host_shutdown(struct mxl862xx_priv *priv); int mxl862xx_api_wrap(struct mxl862xx_priv *priv, u16 cmd, void *data, u16 size, bool read, bool quiet); int mxl862xx_reset(struct mxl862xx_priv *priv); diff --git a/drivers/net/dsa/mxl862xx/mxl862xx.c b/drivers/net/dsa/mxl862xx/mxl862xx.c index c825704c9e9e..78eef639628a 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx.c +++ b/drivers/net/dsa/mxl862xx/mxl862xx.c @@ -424,6 +424,7 @@ static int mxl862xx_probe(struct mdio_device *mdiodev) ds->ops = &mxl862xx_switch_ops; ds->phylink_mac_ops = &mxl862xx_phylink_mac_ops; ds->num_ports = MXL862XX_MAX_PORTS; + mxl862xx_host_init(priv); dev_set_drvdata(dev, ds); @@ -433,22 +434,32 @@ static int mxl862xx_probe(struct mdio_device *mdiodev) static void mxl862xx_remove(struct mdio_device *mdiodev) { struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); + struct mxl862xx_priv *priv; if (!ds) return; + priv = ds->priv; + dsa_unregister_switch(ds); + + mxl862xx_host_shutdown(priv); } static void mxl862xx_shutdown(struct mdio_device *mdiodev) { struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); + struct mxl862xx_priv *priv; if (!ds) return; + priv = ds->priv; + dsa_switch_shutdown(ds); + mxl862xx_host_shutdown(priv); + dev_set_drvdata(&mdiodev->dev, NULL); } diff --git a/drivers/net/dsa/mxl862xx/mxl862xx.h b/drivers/net/dsa/mxl862xx/mxl862xx.h index bfeb436942d5..3ca0d386f3e8 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx.h +++ b/drivers/net/dsa/mxl862xx/mxl862xx.h @@ -11,6 +11,8 @@ struct mxl862xx_priv { struct dsa_switch *ds; struct mdio_device *mdiodev; + struct work_struct crc_err_work; + unsigned long crc_err; }; #endif /* __MXL862XX_H */ From 7c20f6c1cfb15ad6adc9a312973a48113cbf49f0 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Sun, 22 Mar 2026 13:27:26 +0000 Subject: [PATCH 0863/1561] net: dsa: mxl862xx: use RST_DATA to skip writing zero words Issue the firmware's RST_DATA command before writing data payloads that contain many zero words. RST_DATA zeroes both the firmware's internal buffer and the MMD data registers in a single command, allowing the driver to skip individual MDIO writes for zero-valued words. This reduces bus traffic for the common case where API structs have many unused or default-zero fields. The optimization is applied when at least 5 zero words are found in the payload, roughly the break-even point against the cost of the extra RST_DATA command round-trip. Signed-off-by: Daniel Golle Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/d10bd6ad5df062d0da342c3e0d330550b3d2432b.1774185953.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mxl862xx/mxl862xx-host.c | 38 ++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/drivers/net/dsa/mxl862xx/mxl862xx-host.c b/drivers/net/dsa/mxl862xx/mxl862xx-host.c index 0b7f8db31825..cadbdb590cf4 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx-host.c +++ b/drivers/net/dsa/mxl862xx/mxl862xx-host.c @@ -283,6 +283,17 @@ static int mxl862xx_get_data(struct mxl862xx_priv *priv, u16 words) MXL862XX_MMD_REG_DATA_MAX_SIZE * sizeof(u16)); } +static int mxl862xx_rst_data(struct mxl862xx_priv *priv) +{ + return mxl862xx_issue_cmd(priv, MMD_API_RST_DATA, 0); +} + +/* Minimum number of zero words in the data payload before issuing a + * RST_DATA command is worthwhile. RST_DATA costs one full command + * round-trip (~5 MDIO transactions), so the threshold must offset that. + */ +#define RST_DATA_THRESHOLD 5 + static int mxl862xx_send_cmd(struct mxl862xx_priv *priv, u16 cmd, u16 size, bool quiet) { @@ -318,6 +329,8 @@ int mxl862xx_api_wrap(struct mxl862xx_priv *priv, u16 cmd, void *_data, u16 size, bool read, bool quiet) { __le16 *data = _data; + bool use_rst = false; + unsigned int zeros; int ret, cmd_ret; u16 max, crc, i; @@ -331,6 +344,24 @@ int mxl862xx_api_wrap(struct mxl862xx_priv *priv, u16 cmd, void *_data, if (ret < 0) goto out; + /* If the data contains enough zero words, issue RST_DATA to zero + * both the firmware buffer and MMD registers, then skip writing + * zero words individually. + */ + for (i = 0, zeros = 0; i < size / 2 && zeros < RST_DATA_THRESHOLD; i++) + if (!data[i]) + zeros++; + + if (zeros < RST_DATA_THRESHOLD && (size & 1) && !*(u8 *)&data[i]) + zeros++; + + if (zeros >= RST_DATA_THRESHOLD) { + ret = mxl862xx_rst_data(priv); + if (ret < 0) + goto out; + use_rst = true; + } + /* Compute CRC-16 over the data payload; written as an extra word * after the data so the firmware can verify the transfer. */ @@ -367,6 +398,13 @@ int mxl862xx_api_wrap(struct mxl862xx_priv *priv, u16 cmd, void *_data, val = le16_to_cpu(data[i]); } + /* After RST_DATA, skip zero data words as the registers + * already contain zeros, but never skip the CRC word at the + * final word. + */ + if (use_rst && i < max && val == 0) + continue; + ret = mxl862xx_reg_write(priv, MXL862XX_MMD_REG_DATA_FIRST + off, val); From 112f4c6320070b19e7d49cba758400adc279e377 Mon Sep 17 00:00:00 2001 From: Bobby Eshleman Date: Mon, 23 Mar 2026 17:08:10 -0700 Subject: [PATCH 0864/1561] selftests: drv-net: add missing tc config options for netkit tests The NetDrvContEnv env context uses tc clsact qdiscs and BPF tc filters for traffic redirection, but the kernel config options are missing from the selftests config. Without them, the tc qdisc installation trips on: CMD: tc qdisc add dev enp1s0 clsact EXIT: 2 STDERR: Error: Specified qdisc kind is unknown. net.lib.py.utils.CmdExitFailure: Command failed Add CONFIG_NET_CLS_ACT and CONFIG_NET_SCH_INGRESS to enable these tc options. Fixes: 3f74d5bb807e ("selftests/net: Add env for container based tests") Signed-off-by: Bobby Eshleman Link: https://patch.msgid.link/20260323-config-fixes-for-nk-tests-v2-1-6c505d83e52d@meta.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config index 235c0a1cd21e..dd50cb8a7911 100644 --- a/tools/testing/selftests/drivers/net/hw/config +++ b/tools/testing/selftests/drivers/net/hw/config @@ -6,9 +6,11 @@ CONFIG_FUNCTION_ERROR_INJECTION=y CONFIG_IO_URING=y CONFIG_IPV6=y CONFIG_IPV6_GRE=y +CONFIG_NET_CLS_ACT=y CONFIG_NET_CLS_BPF=y CONFIG_NET_IPGRE=y CONFIG_NET_IPGRE_DEMUX=y CONFIG_NETKIT=y +CONFIG_NET_SCH_INGRESS=y CONFIG_UDMABUF=y CONFIG_VXLAN=y From d1e59a46973719e458bec78d00dd767d7a7ba71f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Mar 2026 23:49:20 +0000 Subject: [PATCH 0865/1561] tcp: add cwnd_event_tx_start to tcp_congestion_ops (tcp_congestion_ops)->cwnd_event() is called very often, with @event oscillating between CA_EVENT_TX_START and other values. This is not branch prediction friendly. Provide a new cwnd_event_tx_start pointer dedicated for CA_EVENT_TX_START. Both BBR and CUBIC benefit from this change, since they only care about CA_EVENT_TX_START. No change in kernel size: $ scripts/bloat-o-meter -t vmlinux.0 vmlinux add/remove: 4/4 grow/shrink: 3/1 up/down: 564/-568 (-4) Function old new delta bbr_cwnd_event_tx_start - 450 +450 cubictcp_cwnd_event_tx_start - 70 +70 __pfx_cubictcp_cwnd_event_tx_start - 16 +16 __pfx_bbr_cwnd_event_tx_start - 16 +16 tcp_unregister_congestion_control 93 99 +6 tcp_update_congestion_control 518 521 +3 tcp_register_congestion_control 422 425 +3 __tcp_transmit_skb 3308 3306 -2 __pfx_cubictcp_cwnd_event 16 - -16 __pfx_bbr_cwnd_event 16 - -16 cubictcp_cwnd_event 80 - -80 bbr_cwnd_event 454 - -454 Total: Before=25240512, After=25240508, chg -0.00% Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260323234920.1097858-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 8 +++++ net/ipv4/bpf_tcp_ca.c | 5 +++ net/ipv4/tcp_bbr.c | 8 ++--- net/ipv4/tcp_cubic.c | 31 +++++++++---------- net/ipv4/tcp_dctcp.c | 12 +++++-- net/ipv4/tcp_vegas.c | 9 ++++-- net/ipv4/tcp_vegas.h | 1 + net/ipv4/tcp_veno.c | 8 ++++- net/ipv4/tcp_yeah.c | 1 + .../selftests/bpf/progs/bpf_cc_cubic.c | 8 ++--- tools/testing/selftests/bpf/progs/bpf_cubic.c | 29 ++++++++--------- .../selftests/bpf/progs/tcp_ca_kfunc.c | 16 +++++++--- 12 files changed, 85 insertions(+), 51 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index f87bdacb5a69..39ff4cf3c810 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1341,6 +1341,9 @@ struct tcp_congestion_ops { /* call when cwnd event occurs (optional) */ void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); + /* call when CA_EVENT_TX_START cwnd event occurs (optional) */ + void (*cwnd_event_tx_start)(struct sock *sk); + /* call when ack arrives (optional) */ void (*in_ack_event)(struct sock *sk, u32 flags); @@ -1440,6 +1443,11 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) { const struct inet_connection_sock *icsk = inet_csk(sk); + if (event == CA_EVENT_TX_START) { + if (icsk->icsk_ca_ops->cwnd_event_tx_start) + icsk->icsk_ca_ops->cwnd_event_tx_start(sk); + return; + } if (icsk->icsk_ca_ops->cwnd_event) icsk->icsk_ca_ops->cwnd_event(sk, event); } diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index e01492234b0b..008edc7f6688 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -272,6 +272,10 @@ static void bpf_tcp_ca_cwnd_event(struct sock *sk, enum tcp_ca_event ev) { } +static void bpf_tcp_ca_cwnd_event_tx_start(struct sock *sk) +{ +} + static void bpf_tcp_ca_in_ack_event(struct sock *sk, u32 flags) { } @@ -313,6 +317,7 @@ static struct tcp_congestion_ops __bpf_ops_tcp_congestion_ops = { .cong_avoid = bpf_tcp_ca_cong_avoid, .set_state = bpf_tcp_ca_set_state, .cwnd_event = bpf_tcp_ca_cwnd_event, + .cwnd_event_tx_start = bpf_tcp_ca_cwnd_event_tx_start, .in_ack_event = bpf_tcp_ca_in_ack_event, .pkts_acked = bpf_tcp_ca_pkts_acked, .min_tso_segs = bpf_tcp_ca_min_tso_segs, diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 05d52372ca8f..1ddc20a399b0 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -330,12 +330,12 @@ static void bbr_save_cwnd(struct sock *sk) bbr->prior_cwnd = max(bbr->prior_cwnd, tcp_snd_cwnd(tp)); } -__bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) +__bpf_kfunc static void bbr_cwnd_event_tx_start(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - if (event == CA_EVENT_TX_START && tp->app_limited) { + if (tp->app_limited) { bbr->idle_restart = 1; bbr->ack_epoch_mstamp = tp->tcp_mstamp; bbr->ack_epoch_acked = 0; @@ -1149,7 +1149,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .cong_control = bbr_main, .sndbuf_expand = bbr_sndbuf_expand, .undo_cwnd = bbr_undo_cwnd, - .cwnd_event = bbr_cwnd_event, + .cwnd_event_tx_start = bbr_cwnd_event_tx_start, .ssthresh = bbr_ssthresh, .min_tso_segs = bbr_min_tso_segs, .get_info = bbr_get_info, @@ -1161,7 +1161,7 @@ BTF_ID_FLAGS(func, bbr_init) BTF_ID_FLAGS(func, bbr_main) BTF_ID_FLAGS(func, bbr_sndbuf_expand) BTF_ID_FLAGS(func, bbr_undo_cwnd) -BTF_ID_FLAGS(func, bbr_cwnd_event) +BTF_ID_FLAGS(func, bbr_cwnd_event_tx_start) BTF_ID_FLAGS(func, bbr_ssthresh) BTF_ID_FLAGS(func, bbr_min_tso_segs) BTF_ID_FLAGS(func, bbr_set_state) diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 76c23675ae50..ab78b5ae8d0e 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -139,24 +139,21 @@ __bpf_kfunc static void cubictcp_init(struct sock *sk) tcp_sk(sk)->snd_ssthresh = initial_ssthresh; } -__bpf_kfunc static void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) +__bpf_kfunc static void cubictcp_cwnd_event_tx_start(struct sock *sk) { - if (event == CA_EVENT_TX_START) { - struct bictcp *ca = inet_csk_ca(sk); - u32 now = tcp_jiffies32; - s32 delta; + struct bictcp *ca = inet_csk_ca(sk); + u32 now = tcp_jiffies32; + s32 delta; - delta = now - tcp_sk(sk)->lsndtime; + delta = now - tcp_sk(sk)->lsndtime; - /* We were application limited (idle) for a while. - * Shift epoch_start to keep cwnd growth to cubic curve. - */ - if (ca->epoch_start && delta > 0) { - ca->epoch_start += delta; - if (after(ca->epoch_start, now)) - ca->epoch_start = now; - } - return; + /* We were application limited (idle) for a while. + * Shift epoch_start to keep cwnd growth to cubic curve. + */ + if (ca->epoch_start && delta > 0) { + ca->epoch_start += delta; + if (after(ca->epoch_start, now)) + ca->epoch_start = now; } } @@ -481,7 +478,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = { .cong_avoid = cubictcp_cong_avoid, .set_state = cubictcp_state, .undo_cwnd = tcp_reno_undo_cwnd, - .cwnd_event = cubictcp_cwnd_event, + .cwnd_event_tx_start = cubictcp_cwnd_event_tx_start, .pkts_acked = cubictcp_acked, .owner = THIS_MODULE, .name = "cubic", @@ -492,7 +489,7 @@ BTF_ID_FLAGS(func, cubictcp_init) BTF_ID_FLAGS(func, cubictcp_recalc_ssthresh) BTF_ID_FLAGS(func, cubictcp_cong_avoid) BTF_ID_FLAGS(func, cubictcp_state) -BTF_ID_FLAGS(func, cubictcp_cwnd_event) +BTF_ID_FLAGS(func, cubictcp_cwnd_event_tx_start) BTF_ID_FLAGS(func, cubictcp_acked) BTF_KFUNCS_END(tcp_cubic_check_kfunc_ids) diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 03abe0848420..96c99999e09d 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -203,15 +203,19 @@ __bpf_kfunc static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) tcp_plb_update_state_upon_rto(sk, &ca->plb); dctcp_react_to_loss(sk); break; - case CA_EVENT_TX_START: - tcp_plb_check_rehash(sk, &ca->plb); /* Maybe rehash when inflight is 0 */ - break; default: /* Don't care for the rest. */ break; } } +__bpf_kfunc static void dctcp_cwnd_event_tx_start(struct sock *sk) +{ + struct dctcp *ca = inet_csk_ca(sk); + + tcp_plb_check_rehash(sk, &ca->plb); /* Maybe rehash when inflight is 0 */ +} + static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info) { @@ -252,6 +256,7 @@ static struct tcp_congestion_ops dctcp __read_mostly = { .init = dctcp_init, .in_ack_event = dctcp_update_alpha, .cwnd_event = dctcp_cwnd_event, + .cwnd_event_tx_start = dctcp_cwnd_event_tx_start, .ssthresh = dctcp_ssthresh, .cong_avoid = tcp_reno_cong_avoid, .undo_cwnd = dctcp_cwnd_undo, @@ -275,6 +280,7 @@ BTF_KFUNCS_START(tcp_dctcp_check_kfunc_ids) BTF_ID_FLAGS(func, dctcp_init) BTF_ID_FLAGS(func, dctcp_update_alpha) BTF_ID_FLAGS(func, dctcp_cwnd_event) +BTF_ID_FLAGS(func, dctcp_cwnd_event_tx_start) BTF_ID_FLAGS(func, dctcp_ssthresh) BTF_ID_FLAGS(func, dctcp_cwnd_undo) BTF_ID_FLAGS(func, dctcp_state) diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 786848ad37ea..cf12fb6be079 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -151,12 +151,17 @@ EXPORT_SYMBOL_GPL(tcp_vegas_state); */ void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) { - if (event == CA_EVENT_CWND_RESTART || - event == CA_EVENT_TX_START) + if (event == CA_EVENT_CWND_RESTART) tcp_vegas_init(sk); } EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); +void tcp_vegas_cwnd_event_tx_start(struct sock *sk) +{ + tcp_vegas_init(sk); +} +EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event_tx_start); + static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) { return min(tp->snd_ssthresh, tcp_snd_cwnd(tp)); diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h index 4f24d0e37d9c..602af8e600c7 100644 --- a/net/ipv4/tcp_vegas.h +++ b/net/ipv4/tcp_vegas.h @@ -20,6 +20,7 @@ void tcp_vegas_init(struct sock *sk); void tcp_vegas_state(struct sock *sk, u8 ca_state); void tcp_vegas_pkts_acked(struct sock *sk, const struct ack_sample *sample); void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event); +void tcp_vegas_cwnd_event_tx_start(struct sock *sk); size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info); diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 366ff6f214b2..1b2e1b947901 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -112,10 +112,15 @@ static void tcp_veno_state(struct sock *sk, u8 ca_state) */ static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event) { - if (event == CA_EVENT_CWND_RESTART || event == CA_EVENT_TX_START) + if (event == CA_EVENT_CWND_RESTART) tcp_veno_init(sk); } +static void tcp_veno_cwnd_event_tx_start(struct sock *sk) +{ + tcp_veno_init(sk); +} + static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); @@ -213,6 +218,7 @@ static struct tcp_congestion_ops tcp_veno __read_mostly = { .pkts_acked = tcp_veno_pkts_acked, .set_state = tcp_veno_state, .cwnd_event = tcp_veno_cwnd_event, + .cwnd_event_tx_start = tcp_veno_cwnd_event_tx_start, .owner = THIS_MODULE, .name = "veno", diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 18b07ff5d20e..b22b3dccd05e 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -212,6 +212,7 @@ static struct tcp_congestion_ops tcp_yeah __read_mostly = { .cong_avoid = tcp_yeah_cong_avoid, .set_state = tcp_vegas_state, .cwnd_event = tcp_vegas_cwnd_event, + .cwnd_event_tx_start = tcp_vegas_cwnd_event_tx_start, .get_info = tcp_vegas_get_info, .pkts_acked = tcp_vegas_pkts_acked, diff --git a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c index 9af19dfe4e80..bccf677b94b6 100644 --- a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c +++ b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c @@ -23,7 +23,7 @@ #define TCP_REORDERING (12) extern void cubictcp_init(struct sock *sk) __ksym; -extern void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym; +extern void cubictcp_cwnd_event_tx_start(struct sock *sk) __ksym; extern __u32 cubictcp_recalc_ssthresh(struct sock *sk) __ksym; extern void cubictcp_state(struct sock *sk, __u8 new_state) __ksym; extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym; @@ -108,9 +108,9 @@ void BPF_PROG(bpf_cubic_init, struct sock *sk) } SEC("struct_ops") -void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event) +void BPF_PROG(bpf_cubic_cwnd_event_tx_start, struct sock *sk) { - cubictcp_cwnd_event(sk, event); + cubictcp_cwnd_event_tx_start(sk); } SEC("struct_ops") @@ -172,7 +172,7 @@ struct tcp_congestion_ops cc_cubic = { .cong_control = (void *)bpf_cubic_cong_control, .set_state = (void *)bpf_cubic_state, .undo_cwnd = (void *)bpf_cubic_undo_cwnd, - .cwnd_event = (void *)bpf_cubic_cwnd_event, + .cwnd_event_tx_start = (void *)bpf_cubic_cwnd_event_tx_start, .pkts_acked = (void *)bpf_cubic_acked, .name = "bpf_cc_cubic", }; diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c index 46fb2b37d3a7..ce18a4db813f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_cubic.c +++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c @@ -185,24 +185,21 @@ void BPF_PROG(bpf_cubic_init, struct sock *sk) } SEC("struct_ops") -void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event) +void BPF_PROG(bpf_cubic_cwnd_event_tx_start, struct sock *sk) { - if (event == CA_EVENT_TX_START) { - struct bpf_bictcp *ca = inet_csk_ca(sk); - __u32 now = tcp_jiffies32; - __s32 delta; + struct bpf_bictcp *ca = inet_csk_ca(sk); + __u32 now = tcp_jiffies32; + __s32 delta; - delta = now - tcp_sk(sk)->lsndtime; + delta = now - tcp_sk(sk)->lsndtime; - /* We were application limited (idle) for a while. - * Shift epoch_start to keep cwnd growth to cubic curve. - */ - if (ca->epoch_start && delta > 0) { - ca->epoch_start += delta; - if (after(ca->epoch_start, now)) - ca->epoch_start = now; - } - return; + /* We were application limited (idle) for a while. + * Shift epoch_start to keep cwnd growth to cubic curve. + */ + if (ca->epoch_start && delta > 0) { + ca->epoch_start += delta; + if (after(ca->epoch_start, now)) + ca->epoch_start = now; } } @@ -537,7 +534,7 @@ struct tcp_congestion_ops cubic = { .cong_avoid = (void *)bpf_cubic_cong_avoid, .set_state = (void *)bpf_cubic_state, .undo_cwnd = (void *)bpf_cubic_undo_cwnd, - .cwnd_event = (void *)bpf_cubic_cwnd_event, + .cwnd_event_tx_start = (void *)bpf_cubic_cwnd_event_tx_start, .pkts_acked = (void *)bpf_cubic_acked, .name = "bpf_cubic", }; diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c b/tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c index f95862f570b7..0a3e9d35bf6f 100644 --- a/tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c +++ b/tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c @@ -8,7 +8,7 @@ extern void bbr_init(struct sock *sk) __ksym; extern void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs) __ksym; extern u32 bbr_sndbuf_expand(struct sock *sk) __ksym; extern u32 bbr_undo_cwnd(struct sock *sk) __ksym; -extern void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym; +extern void bbr_cwnd_event_tx_start(struct sock *sk) __ksym; extern u32 bbr_ssthresh(struct sock *sk) __ksym; extern u32 bbr_min_tso_segs(struct sock *sk) __ksym; extern void bbr_set_state(struct sock *sk, u8 new_state) __ksym; @@ -16,6 +16,7 @@ extern void bbr_set_state(struct sock *sk, u8 new_state) __ksym; extern void dctcp_init(struct sock *sk) __ksym; extern void dctcp_update_alpha(struct sock *sk, u32 flags) __ksym; extern void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) __ksym; +extern void dctcp_cwnd_event_tx_start(struct sock *sk) __ksym; extern u32 dctcp_ssthresh(struct sock *sk) __ksym; extern u32 dctcp_cwnd_undo(struct sock *sk) __ksym; extern void dctcp_state(struct sock *sk, u8 new_state) __ksym; @@ -24,7 +25,7 @@ extern void cubictcp_init(struct sock *sk) __ksym; extern u32 cubictcp_recalc_ssthresh(struct sock *sk) __ksym; extern void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) __ksym; extern void cubictcp_state(struct sock *sk, u8 new_state) __ksym; -extern void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym; +extern void cubictcp_cwnd_event_tx_start(struct sock *sk) __ksym; extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym; SEC("struct_ops") @@ -69,9 +70,15 @@ u32 BPF_PROG(undo_cwnd, struct sock *sk) SEC("struct_ops") void BPF_PROG(cwnd_event, struct sock *sk, enum tcp_ca_event event) { - bbr_cwnd_event(sk, event); dctcp_cwnd_event(sk, event); - cubictcp_cwnd_event(sk, event); +} + +SEC("struct_ops") +void BPF_PROG(cwnd_event_tx_start, struct sock *sk) +{ + bbr_cwnd_event_tx_start(sk); + dctcp_cwnd_event_tx_start(sk); + cubictcp_cwnd_event_tx_start(sk); } SEC("struct_ops") @@ -111,6 +118,7 @@ struct tcp_congestion_ops tcp_ca_kfunc = { .sndbuf_expand = (void *)sndbuf_expand, .undo_cwnd = (void *)undo_cwnd, .cwnd_event = (void *)cwnd_event, + .cwnd_event_tx_start = (void *)cwnd_event_tx_start, .ssthresh = (void *)ssthresh, .min_tso_segs = (void *)min_tso_segs, .set_state = (void *)set_state, From 5dac9abd3f187f0ed9bd3cc4f9c06a0729c8607b Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Thu, 20 Nov 2025 11:08:48 +0100 Subject: [PATCH 0866/1561] wifi: iwlwifi: replace use of system_unbound_wq with system_dfl_wq This patch continues the effort to refactor workqueue APIs, which has begun with the changes introducing new workqueues and a new alloc_workqueue flag: commit 128ea9f6ccfb ("workqueue: Add system_percpu_wq and system_dfl_wq") commit 930c2ea566af ("workqueue: Add new WQ_PERCPU flag") The point of the refactoring is to eventually alter the default behavior of workqueues to become unbound by default so that their workload placement is optimized by the scheduler. Before that to happen after a careful review and conversion of each individual case, workqueue users must be converted to the better named new workqueues with no intended behaviour changes: system_wq -> system_percpu_wq system_unbound_wq -> system_dfl_wq This way the old obsolete workqueues (system_wq, system_unbound_wq) can be removed in the future. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Link: https://patch.msgid.link/20251120100850.66192-2-marco.crivellari@suse.com Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/iwl-trans.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 688f9fee2821..aa0952a011e0 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -1088,7 +1088,7 @@ static inline void iwl_trans_schedule_reset(struct iwl_trans *trans, */ trans->restart.during_reset = test_bit(STATUS_IN_SW_RESET, &trans->status); - queue_delayed_work(system_unbound_wq, &trans->restart.wk, 0); + queue_delayed_work(system_dfl_wq, &trans->restart.wk, 0); } static inline void iwl_trans_fw_error(struct iwl_trans *trans, From 900c899bd8dd06e4fd5343521dfca565b93adfed Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Thu, 20 Nov 2025 11:08:49 +0100 Subject: [PATCH 0867/1561] wifi: iwlwifi: fw: replace use of system_unbound_wq with system_dfl_wq This patch continues the effort to refactor workqueue APIs, which has begun with the changes introducing new workqueues and a new alloc_workqueue flag: commit 128ea9f6ccfb ("workqueue: Add system_percpu_wq and system_dfl_wq") commit 930c2ea566af ("workqueue: Add new WQ_PERCPU flag") The point of the refactoring is to eventually alter the default behavior of workqueues to become unbound by default so that their workload placement is optimized by the scheduler. Before that to happen after a careful review and conversion of each individual case, workqueue users must be converted to the better named new workqueues with no intended behaviour changes: system_wq -> system_percpu_wq system_unbound_wq -> system_dfl_wq This way the old obsolete workqueues (system_wq, system_unbound_wq) can be removed in the future. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Link: https://patch.msgid.link/20251120100850.66192-3-marco.crivellari@suse.com Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 1f26d89fc908..0cffa5493704 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -2933,7 +2933,7 @@ int iwl_fw_dbg_collect_desc(struct iwl_fw_runtime *fwrt, IWL_WARN(fwrt, "Collecting data: trigger %d fired.\n", le32_to_cpu(desc->trig_desc.type)); - queue_delayed_work(system_unbound_wq, &wk_data->wk, + queue_delayed_work(system_dfl_wq, &wk_data->wk, usecs_to_jiffies(delay)); return 0; @@ -3236,7 +3236,7 @@ int iwl_fw_dbg_ini_collect(struct iwl_fw_runtime *fwrt, if (sync) iwl_fw_dbg_collect_sync(fwrt, idx); else - queue_delayed_work(system_unbound_wq, + queue_delayed_work(system_dfl_wq, &fwrt->dump.wks[idx].wk, usecs_to_jiffies(delay)); From bb0c0aa30ff8c6f558433ab62a012bd620cec889 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Tue, 10 Feb 2026 15:33:32 +0100 Subject: [PATCH 0868/1561] wifi: iwlwifi: mvm: replace use of system_wq with system_percpu_wq This patch continues the effort to refactor workqueue APIs, which has begun with the changes introducing new workqueues and a new alloc_workqueue flag: commit 128ea9f6ccfb ("workqueue: Add system_percpu_wq and system_dfl_wq") commit 930c2ea566af ("workqueue: Add new WQ_PERCPU flag") The point of the refactoring is to eventually alter the default behavior of workqueues to become unbound by default so that their workload placement is optimized by the scheduler. Before that to happen after a careful review and conversion of each individual case, workqueue users must be converted to the better named new workqueues with no intended behaviour changes: system_wq -> system_percpu_wq system_unbound_wq -> system_dfl_wq This way the old obsolete workqueues (system_wq, system_unbound_wq) can be removed in the future. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Link: https://patch.msgid.link/20260210143332.206146-4-marco.crivellari@suse.com Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/mvm/tdls.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tdls.c b/drivers/net/wireless/intel/iwlwifi/mvm/tdls.c index 4945ebf19f6b..a7cd2e4ba1ae 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tdls.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tdls.c @@ -234,7 +234,7 @@ void iwl_mvm_rx_tdls_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) * Also convert TU to msec. */ delay = TU_TO_MS(vif->bss_conf.dtim_period * vif->bss_conf.beacon_int); - mod_delayed_work(system_wq, &mvm->tdls_cs.dwork, + mod_delayed_work(system_percpu_wq, &mvm->tdls_cs.dwork, msecs_to_jiffies(delay)); iwl_mvm_tdls_update_cs_state(mvm, IWL_MVM_TDLS_SW_ACTIVE); @@ -548,7 +548,7 @@ iwl_mvm_tdls_channel_switch(struct ieee80211_hw *hw, */ delay = 2 * TU_TO_MS(vif->bss_conf.dtim_period * vif->bss_conf.beacon_int); - mod_delayed_work(system_wq, &mvm->tdls_cs.dwork, + mod_delayed_work(system_percpu_wq, &mvm->tdls_cs.dwork, msecs_to_jiffies(delay)); return 0; } @@ -659,6 +659,6 @@ retry: /* register a timeout in case we don't succeed in switching */ delay = vif->bss_conf.dtim_period * vif->bss_conf.beacon_int * 1024 / 1000; - mod_delayed_work(system_wq, &mvm->tdls_cs.dwork, + mod_delayed_work(system_percpu_wq, &mvm->tdls_cs.dwork, msecs_to_jiffies(delay)); } From 078df640ef057d57d22c064f5d980aead29ba23d Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 11:09:13 +0200 Subject: [PATCH 0869/1561] wifi: iwlwifi: mld: add support for iwl_mcc_allowed_ap_type_cmd v2 There is a new version of this command to indicate which AP type in UNII-9 is supported per country. This adds support for a new UEFI table that will include that data to be filled in the new AP type table. Rename the uats_table field in firmware_runtime structure since it includes now the UATS and the new UNEB table coming from UEFI. For the same reason, rename iwl_mld_init_uats. Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319110722.b839655712c5.I3dfca54bd19d6bd5f7ca385ea63be086ece9c1d0@changeid --- .../wireless/intel/iwlwifi/fw/api/nvm-reg.h | 14 ++++++-- .../net/wireless/intel/iwlwifi/fw/runtime.h | 8 ++--- drivers/net/wireless/intel/iwlwifi/fw/uefi.c | 36 +++++++++++++++++-- drivers/net/wireless/intel/iwlwifi/fw/uefi.h | 11 ++++++ drivers/net/wireless/intel/iwlwifi/mld/fw.c | 2 +- .../wireless/intel/iwlwifi/mld/regulatory.c | 32 ++++++++++++++--- .../wireless/intel/iwlwifi/mld/regulatory.h | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 24 +++++++------ 8 files changed, 102 insertions(+), 27 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h index bd6bf931866f..25c860a05b0e 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h @@ -701,13 +701,23 @@ struct iwl_pnvm_init_complete_ntfy { #define UATS_TABLE_COL_SIZE 13 /** - * struct iwl_mcc_allowed_ap_type_cmd - struct for MCC_ALLOWED_AP_TYPE_CMD + * struct iwl_mcc_allowed_ap_type_cmd_v1 - struct for MCC_ALLOWED_AP_TYPE_CMD * @mcc_to_ap_type_map: mapping an MCC to 6 GHz AP type support (UATS) * @reserved: reserved */ -struct iwl_mcc_allowed_ap_type_cmd { +struct iwl_mcc_allowed_ap_type_cmd_v1 { u8 mcc_to_ap_type_map[UATS_TABLE_ROW_SIZE][UATS_TABLE_COL_SIZE]; __le16 reserved; } __packed; /* MCC_ALLOWED_AP_TYPE_CMD_API_S_VER_1 */ +/** + * struct iwl_mcc_allowed_ap_type_cmd - struct for MCC_ALLOWED_AP_TYPE_CMD + * @mcc_to_ap_type_map: mapping an MCC to 6 GHz AP type support (UATS) + * @mcc_to_ap_type_unii9_map: mapping an MCC to UNII-9 AP type support allowed + */ +struct iwl_mcc_allowed_ap_type_cmd { + u8 mcc_to_ap_type_map[UATS_TABLE_ROW_SIZE][UATS_TABLE_COL_SIZE]; + u8 mcc_to_ap_type_unii9_map[UATS_TABLE_ROW_SIZE][UATS_TABLE_COL_SIZE]; +} __packed; /* MCC_ALLOWED_AP_TYPE_CMD_API_S_VER_2 */ + #endif /* __iwl_fw_api_nvm_reg_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h index ff186fb2e0da..411e75b45530 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h @@ -106,8 +106,8 @@ struct iwl_txf_iter_data { * @cur_fw_img: current firmware image, must be maintained by * the driver by calling &iwl_fw_set_current_image() * @dump: debug dump data - * @uats_table: AP type table - * @uats_valid: is AP type table valid + * @ap_type_cmd: AP type tables (for enablement on 6 GHz) + * @ap_type_cmd_valid: if &ap_type_cmd is valid * @uefi_tables_lock_status: The status of the WIFI GUID UEFI variables lock: * 0: Unlocked, 1 and 2: Locked. * Only read the UEFI variables if locked. @@ -213,8 +213,8 @@ struct iwl_fw_runtime { u8 ppag_bios_source; struct iwl_sar_offset_mapping_cmd sgom_table; bool sgom_enabled; - struct iwl_mcc_allowed_ap_type_cmd uats_table; - bool uats_valid; + struct iwl_mcc_allowed_ap_type_cmd ap_type_cmd; + bool ap_type_cmd_valid; u8 uefi_tables_lock_status; struct iwl_phy_specific_cfg phy_filters; enum bios_source dsm_source; diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c index a7ba86e06c09..d4e1ab1f7c84 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c @@ -402,11 +402,11 @@ static int iwl_uefi_uats_parse(struct uefi_cnv_wlan_uats_data *uats_data, if (uats_data->revision != 1) return -EINVAL; - memcpy(fwrt->uats_table.mcc_to_ap_type_map, + memcpy(fwrt->ap_type_cmd.mcc_to_ap_type_map, uats_data->mcc_to_ap_type_map, - sizeof(fwrt->uats_table.mcc_to_ap_type_map)); + sizeof(fwrt->ap_type_cmd.mcc_to_ap_type_map)); - fwrt->uats_valid = true; + fwrt->ap_type_cmd_valid = true; return 0; } @@ -429,6 +429,36 @@ void iwl_uefi_get_uats_table(struct iwl_trans *trans, } IWL_EXPORT_SYMBOL(iwl_uefi_get_uats_table); +void iwl_uefi_get_uneb_table(struct iwl_trans *trans, + struct iwl_fw_runtime *fwrt) +{ + struct uefi_cnv_wlan_uneb_data *data; + + data = iwl_uefi_get_verified_variable(trans, IWL_UEFI_UNEB_NAME, + "UNEB", sizeof(*data), NULL); + if (IS_ERR(data)) + return; + + if (data->revision != 1) { + IWL_DEBUG_RADIO(fwrt, + "Cannot read UNEB table. rev is invalid\n"); + goto out; + } + + BUILD_BUG_ON(sizeof(data->mcc_to_ap_type_map) != + sizeof(fwrt->ap_type_cmd.mcc_to_ap_type_unii9_map)); + + memcpy(fwrt->ap_type_cmd.mcc_to_ap_type_unii9_map, + data->mcc_to_ap_type_map, + sizeof(fwrt->ap_type_cmd.mcc_to_ap_type_unii9_map)); + + fwrt->ap_type_cmd_valid = true; + +out: + kfree(data); +} +IWL_EXPORT_SYMBOL(iwl_uefi_get_uneb_table); + static void iwl_uefi_set_sar_profile(struct iwl_fw_runtime *fwrt, struct uefi_sar_profile *uefi_sar_prof, u8 prof_index, bool enabled) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h index 349ac1505ad7..99170a72c3f1 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h @@ -25,6 +25,7 @@ #define IWL_UEFI_PUNCTURING_NAME L"UefiCnvWlanPuncturing" #define IWL_UEFI_DSBR_NAME L"UefiCnvCommonDSBR" #define IWL_UEFI_WPFC_NAME L"WPFC" +#define IWL_UEFI_UNEB_NAME L"CnvUefiWlanUNEB" #define IWL_SGOM_MAP_SIZE 339 @@ -63,6 +64,9 @@ struct uefi_cnv_wlan_uats_data { u8 mcc_to_ap_type_map[IWL_UATS_MAP_SIZE - 1]; } __packed; +/* UNEB's layout is identical to UATS's */ +#define uefi_cnv_wlan_uneb_data uefi_cnv_wlan_uats_data + struct uefi_cnv_common_step_data { u8 revision; u8 step_mode; @@ -274,6 +278,8 @@ int iwl_uefi_get_dsm(struct iwl_fw_runtime *fwrt, enum iwl_dsm_funcs func, void iwl_uefi_get_sgom_table(struct iwl_trans *trans, struct iwl_fw_runtime *fwrt); void iwl_uefi_get_uats_table(struct iwl_trans *trans, struct iwl_fw_runtime *fwrt); +void iwl_uefi_get_uneb_table(struct iwl_trans *trans, + struct iwl_fw_runtime *fwrt); int iwl_uefi_get_puncturing(struct iwl_fw_runtime *fwrt); int iwl_uefi_get_dsbr(struct iwl_fw_runtime *fwrt, u32 *value); int iwl_uefi_get_phy_filters(struct iwl_fw_runtime *fwrt); @@ -373,6 +379,11 @@ iwl_uefi_get_uats_table(struct iwl_trans *trans, struct iwl_fw_runtime *fwrt) { } +static inline void +iwl_uefi_get_uneb_table(struct iwl_trans *trans, struct iwl_fw_runtime *fwrt) +{ +} + static inline int iwl_uefi_get_puncturing(struct iwl_fw_runtime *fwrt) { diff --git a/drivers/net/wireless/intel/iwlwifi/mld/fw.c b/drivers/net/wireless/intel/iwlwifi/mld/fw.c index 19da521a4bab..7b1fb84a641c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/fw.c @@ -513,7 +513,7 @@ static int iwl_mld_config_fw(struct iwl_mld *mld) return ret; iwl_mld_init_tas(mld); - iwl_mld_init_uats(mld); + iwl_mld_init_ap_type_tables(mld); return 0; } diff --git a/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c b/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c index 6ab5a3410353..d1a55b565898 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c @@ -64,6 +64,7 @@ void iwl_mld_get_bios_tables(struct iwl_mld *mld) } iwl_uefi_get_uats_table(mld->trans, &mld->fwrt); + iwl_uefi_get_uneb_table(mld->trans, &mld->fwrt); iwl_bios_get_phy_filters(&mld->fwrt); } @@ -352,21 +353,42 @@ void iwl_mld_configure_lari(struct iwl_mld *mld) ret); } -void iwl_mld_init_uats(struct iwl_mld *mld) +void iwl_mld_init_ap_type_tables(struct iwl_mld *mld) { int ret; struct iwl_host_cmd cmd = { .id = WIDE_ID(REGULATORY_AND_NVM_GROUP, MCC_ALLOWED_AP_TYPE_CMD), - .data[0] = &mld->fwrt.uats_table, - .len[0] = sizeof(mld->fwrt.uats_table), + .data[0] = &mld->fwrt.ap_type_cmd, + .len[0] = sizeof(mld->fwrt.ap_type_cmd), .dataflags[0] = IWL_HCMD_DFL_NOCOPY, }; - if (!mld->fwrt.uats_valid) + if (!mld->fwrt.ap_type_cmd_valid) return; - ret = iwl_mld_send_cmd(mld, &cmd); + if (iwl_fw_lookup_cmd_ver(mld->fw, cmd.id, 1) == 1) { + struct iwl_mcc_allowed_ap_type_cmd_v1 *cmd_v1 = + kzalloc(sizeof(*cmd_v1), GFP_KERNEL); + + if (!cmd_v1) + return; + + BUILD_BUG_ON(sizeof(mld->fwrt.ap_type_cmd.mcc_to_ap_type_map) != + sizeof(cmd_v1->mcc_to_ap_type_map)); + + memcpy(cmd_v1->mcc_to_ap_type_map, + mld->fwrt.ap_type_cmd.mcc_to_ap_type_map, + sizeof(mld->fwrt.ap_type_cmd.mcc_to_ap_type_map)); + + cmd.data[0] = cmd_v1; + cmd.len[0] = sizeof(*cmd_v1); + ret = iwl_mld_send_cmd(mld, &cmd); + kfree(cmd_v1); + } else { + ret = iwl_mld_send_cmd(mld, &cmd); + } + if (ret) IWL_ERR(mld, "failed to send MCC_ALLOWED_AP_TYPE_CMD (%d)\n", ret); diff --git a/drivers/net/wireless/intel/iwlwifi/mld/regulatory.h b/drivers/net/wireless/intel/iwlwifi/mld/regulatory.h index 3b01c645adda..5498c19789f4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/regulatory.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/regulatory.h @@ -9,7 +9,7 @@ void iwl_mld_get_bios_tables(struct iwl_mld *mld); void iwl_mld_configure_lari(struct iwl_mld *mld); -void iwl_mld_init_uats(struct iwl_mld *mld); +void iwl_mld_init_ap_type_tables(struct iwl_mld *mld); void iwl_mld_init_tas(struct iwl_mld *mld); int iwl_mld_init_ppag(struct iwl_mld *mld); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 43cf94c9a36b..f5e5c10cc581 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -459,23 +459,18 @@ static void iwl_mvm_phy_filter_init(struct iwl_mvm *mvm, static void iwl_mvm_uats_init(struct iwl_mvm *mvm) { + int cmd_id = WIDE_ID(REGULATORY_AND_NVM_GROUP, + MCC_ALLOWED_AP_TYPE_CMD); + struct iwl_mcc_allowed_ap_type_cmd_v1 cmd = {}; u8 cmd_ver; int ret; - struct iwl_host_cmd cmd = { - .id = WIDE_ID(REGULATORY_AND_NVM_GROUP, - MCC_ALLOWED_AP_TYPE_CMD), - .flags = 0, - .data[0] = &mvm->fwrt.uats_table, - .len[0] = sizeof(mvm->fwrt.uats_table), - .dataflags[0] = IWL_HCMD_DFL_NOCOPY, - }; if (mvm->trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_AX210) { IWL_DEBUG_RADIO(mvm, "UATS feature is not supported\n"); return; } - cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, cmd.id, + cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, cmd_id, IWL_FW_CMD_VER_UNKNOWN); if (cmd_ver != 1) { IWL_DEBUG_RADIO(mvm, @@ -486,10 +481,17 @@ static void iwl_mvm_uats_init(struct iwl_mvm *mvm) iwl_uefi_get_uats_table(mvm->trans, &mvm->fwrt); - if (!mvm->fwrt.uats_valid) + if (!mvm->fwrt.ap_type_cmd_valid) return; - ret = iwl_mvm_send_cmd(mvm, &cmd); + BUILD_BUG_ON(sizeof(mvm->fwrt.ap_type_cmd.mcc_to_ap_type_map) != + sizeof(cmd.mcc_to_ap_type_map)); + + memcpy(cmd.mcc_to_ap_type_map, + mvm->fwrt.ap_type_cmd.mcc_to_ap_type_map, + sizeof(mvm->fwrt.ap_type_cmd.mcc_to_ap_type_map)); + + ret = iwl_mvm_send_cmd_pdu(mvm, cmd_id, 0, sizeof(cmd), &cmd); if (ret < 0) IWL_ERR(mvm, "failed to send MCC_ALLOWED_AP_TYPE_CMD (%d)\n", ret); From 07c82a4e5beed28a9d2f69bc687a4668ca2754c4 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 11:09:14 +0200 Subject: [PATCH 0870/1561] wifi: iwlwifi: ensure we don't read SAR values past the limit When we fill the SAR values, we read values from the BIOS store in the firmware runtime object and write them into the command that we send to the firmware. We assumed that the size of the firmware command is not longer than the BIOS tables. This has been true until now, but this is not really safe. We will soon have an firmware API change that will increase the size of the table in the command and we want to make sure that we don't have a buffer overrun when we read the firmware runtime object. Add this safety measure. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319110722.99aaf2df072a.I5942590b81324b17e2a369f0c354cafee0f70ef5@changeid --- drivers/net/wireless/intel/iwlwifi/fw/regulatory.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c index 958e71a3c958..5793c267daf7 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c @@ -241,6 +241,10 @@ static int iwl_sar_fill_table(struct iwl_fw_runtime *fwrt, int profs[BIOS_SAR_NUM_CHAINS] = { prof_a, prof_b }; int i, j; + if (WARN_ON_ONCE(n_subbands > + ARRAY_SIZE(fwrt->sar_profiles[0].chains[0].subbands))) + return -EINVAL; + for (i = 0; i < BIOS_SAR_NUM_CHAINS; i++) { struct iwl_sar_profile *prof; From 5971e08b1324a9a9f3d530ce6dda4982401a6120 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 11:09:15 +0200 Subject: [PATCH 0871/1561] wifi: iwlwifi: uefi: decouple UEFI and firmware APIs The APIs in uefi.h are not firmware API files nor are they pure software objects. They really reflect a specific layout we expect to see in the UEFI tables. Since the UEFI objects are encoded into the BIOS, we can't use the same values for the declaration of the UEFI objects and for the pure software object like iwl_sar_profile in the firmware runtime object. Decouple the two types. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319110722.db39a64073db.I21486dedb7357570151437cb0211b697e0efb61d@changeid --- drivers/net/wireless/intel/iwlwifi/fw/uefi.h | 23 ++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h index 99170a72c3f1..c6940a3c03ea 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h @@ -76,13 +76,24 @@ struct uefi_cnv_common_step_data { u8 radio2; } __packed; +#define UEFI_SAR_MAX_SUB_BANDS_NUM 11 +#define UEFI_SAR_MAX_CHAINS_PER_PROFILE 4 + +/* + * struct uefi_sar_profile_chain - per-chain values of a SAR profile + * @subbands: the SAR value for each subband + */ +struct uefi_sar_profile_chain { + u8 subbands[UEFI_SAR_MAX_SUB_BANDS_NUM]; +}; + /* * struct uefi_sar_profile - a SAR profile as defined in UEFI * * @chains: a per-chain table of SAR values */ struct uefi_sar_profile { - struct iwl_sar_profile_chain chains[BIOS_SAR_MAX_CHAINS_PER_PROFILE]; + struct uefi_sar_profile_chain chains[UEFI_SAR_MAX_CHAINS_PER_PROFILE]; } __packed; /* @@ -125,6 +136,14 @@ struct uefi_cnv_var_wgds { struct iwl_geo_profile geo_profiles[BIOS_GEO_MAX_PROFILE_NUM]; } __packed; +/* + * struct uefi_ppag_chain - PPAG table for a specific chain + * @subbands: the PPAG values for band + */ +struct uefi_ppag_chain { + s8 subbands[UEFI_SAR_MAX_SUB_BANDS_NUM]; +}; + /* * struct uefi_cnv_var_ppag - PPAG table as defined in UEFI * @revision: the revision of the table @@ -134,7 +153,7 @@ struct uefi_cnv_var_wgds { struct uefi_cnv_var_ppag { u8 revision; u32 ppag_modes; - struct iwl_ppag_chain ppag_chains[IWL_NUM_CHAIN_LIMITS]; + struct uefi_ppag_chain ppag_chains[IWL_NUM_CHAIN_LIMITS]; } __packed; /* struct uefi_cnv_var_wtas - WTAS tabled as defined in UEFI From 64b992ebf1e8be3dc89611357522d09fcaba5387 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 11:09:16 +0200 Subject: [PATCH 0872/1561] wifi: iwlwifi: acpi: better use ARRAY_SIZE than a define Since we'll have to change things in this area, use the safer option to define the size of an array. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319110722.1acfc3b6f2b8.I2185e7850146e15628f8ec2c579d93f536c83d83@changeid --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index de9aef0d924c..b64abb8439b7 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -504,7 +504,8 @@ iwl_acpi_parse_chains_table(union acpi_object *table, u8 num_chains, u8 num_sub_bands) { for (u8 chain = 0; chain < num_chains; chain++) { - for (u8 subband = 0; subband < BIOS_SAR_MAX_SUB_BANDS_NUM; + for (u8 subband = 0; + subband < ARRAY_SIZE(chains[chain].subbands); subband++) { /* if we don't have the values, use the default */ if (subband >= num_sub_bands) { From 97cbd93e364fcb9bef0db13055a4d2fd103df9d4 Mon Sep 17 00:00:00 2001 From: Nidhish A N Date: Thu, 19 Mar 2026 11:09:17 +0200 Subject: [PATCH 0873/1561] wifi: iwlwifi: mvm: cleanup some more MLO code iwlmld is now the op mode that is used for EHT devices, so iwlmvm code can never run in MLO. Clean up some more MLO code. Signed-off-by: Nidhish A N Reviewed-by: Pagadala Yesu Anjaneyulu Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319110722.8efcec472e91.Icaf4f4d6b9008e12310f408cfef7f35643f27ca5@changeid --- .../net/wireless/intel/iwlwifi/mvm/mld-key.c | 46 --- .../wireless/intel/iwlwifi/mvm/mld-mac80211.c | 132 -------- .../net/wireless/intel/iwlwifi/mvm/mld-sta.c | 285 ------------------ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 5 - drivers/net/wireless/intel/iwlwifi/mvm/sta.h | 4 - 5 files changed, 472 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-key.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-key.c index 9bb253dcf4a7..4869a5fa8abc 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-key.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-key.c @@ -121,52 +121,6 @@ struct iwl_mvm_sta_key_update_data { int err; }; -static void iwl_mvm_mld_update_sta_key(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta, - struct ieee80211_key_conf *key, - void *_data) -{ - u32 cmd_id = WIDE_ID(DATA_PATH_GROUP, SEC_KEY_CMD); - struct iwl_mvm_sta_key_update_data *data = _data; - struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); - struct iwl_sec_key_cmd cmd = { - .action = cpu_to_le32(FW_CTXT_ACTION_MODIFY), - .u.modify.old_sta_mask = cpu_to_le32(data->old_sta_mask), - .u.modify.new_sta_mask = cpu_to_le32(data->new_sta_mask), - .u.modify.key_id = cpu_to_le32(key->keyidx), - .u.modify.key_flags = - cpu_to_le32(iwl_mvm_get_sec_flags(mvm, vif, sta, key)), - }; - int err; - - /* only need to do this for pairwise keys (link_id == -1) */ - if (sta != data->sta || key->link_id >= 0) - return; - - err = iwl_mvm_send_cmd_pdu(mvm, cmd_id, 0, sizeof(cmd), &cmd); - - if (err) - data->err = err; -} - -int iwl_mvm_mld_update_sta_keys(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta, - u32 old_sta_mask, - u32 new_sta_mask) -{ - struct iwl_mvm_sta_key_update_data data = { - .sta = sta, - .old_sta_mask = old_sta_mask, - .new_sta_mask = new_sta_mask, - }; - - ieee80211_iter_keys(mvm->hw, vif, iwl_mvm_mld_update_sta_key, - &data); - return data.err; -} - static int __iwl_mvm_sec_key_del(struct iwl_mvm *mvm, u32 sta_mask, u32 key_flags, u32 keyidx, u32 flags) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c index 896ed9823021..f1dbfeae20bc 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c @@ -886,133 +886,6 @@ static int iwl_mvm_mld_roc(struct ieee80211_hw *hw, struct ieee80211_vif *vif, return iwl_mvm_roc_common(hw, vif, channel, duration, type, &ops); } -static int -iwl_mvm_mld_change_vif_links(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - u16 old_links, u16 new_links, - struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS]) -{ - struct iwl_mvm_vif_link_info *new_link[IEEE80211_MLD_MAX_NUM_LINKS] = {}; - struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); - u16 removed = old_links & ~new_links; - u16 added = new_links & ~old_links; - int err, i; - - for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) { - if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) - break; - - if (!(added & BIT(i))) - continue; - new_link[i] = kzalloc_obj(*new_link[i]); - if (!new_link[i]) { - err = -ENOMEM; - goto free; - } - - new_link[i]->fw_link_id = IWL_MVM_FW_LINK_ID_INVALID; - iwl_mvm_init_link(new_link[i]); - } - - mutex_lock(&mvm->mutex); - - /* If we're in RESTART flow, the default link wasn't added in - * drv_add_interface(), and link[0] doesn't point to it. - */ - if (old_links == 0 && !test_bit(IWL_MVM_STATUS_IN_HW_RESTART, - &mvm->status)) { - err = iwl_mvm_disable_link(mvm, vif, &vif->bss_conf); - if (err) - goto out_err; - mvmvif->link[0] = NULL; - } - - for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) { - if (removed & BIT(i)) { - struct ieee80211_bss_conf *link_conf = old[i]; - - err = iwl_mvm_disable_link(mvm, vif, link_conf); - if (err) - goto out_err; - kfree(mvmvif->link[i]); - mvmvif->link[i] = NULL; - } else if (added & BIT(i)) { - struct ieee80211_bss_conf *link_conf; - - link_conf = link_conf_dereference_protected(vif, i); - if (WARN_ON(!link_conf)) - continue; - - if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, - &mvm->status)) - mvmvif->link[i] = new_link[i]; - new_link[i] = NULL; - err = iwl_mvm_add_link(mvm, vif, link_conf); - if (err) - goto out_err; - } - } - - err = 0; - if (new_links == 0) { - mvmvif->link[0] = &mvmvif->deflink; - err = iwl_mvm_add_link(mvm, vif, &vif->bss_conf); - } - -out_err: - /* we really don't have a good way to roll back here ... */ - mutex_unlock(&mvm->mutex); - -free: - for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) - kfree(new_link[i]); - return err; -} - -static int -iwl_mvm_mld_change_sta_links(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta, - u16 old_links, u16 new_links) -{ - struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); - - guard(mvm)(mvm); - return iwl_mvm_mld_update_sta_links(mvm, vif, sta, old_links, new_links); -} - -static bool iwl_mvm_mld_can_activate_links(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - u16 desired_links) -{ - int n_links = hweight16(desired_links); - - if (n_links <= 1) - return true; - - WARN_ON(1); - return false; -} - -static enum ieee80211_neg_ttlm_res -iwl_mvm_mld_can_neg_ttlm(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - struct ieee80211_neg_ttlm *neg_ttlm) -{ - u16 map; - u8 i; - - /* Verify all TIDs are mapped to the same links set */ - map = neg_ttlm->downlink[0]; - for (i = 0; i < IEEE80211_TTLM_NUM_TIDS; i++) { - if (neg_ttlm->downlink[i] != neg_ttlm->uplink[i] || - neg_ttlm->uplink[i] != map) - return NEG_TTLM_RES_REJECT; - } - - return NEG_TTLM_RES_ACCEPT; -} - const struct ieee80211_ops iwl_mvm_mld_hw_ops = { .tx = iwl_mvm_mac_tx, .wake_tx_queue = iwl_mvm_mac_wake_tx_queue, @@ -1102,9 +975,4 @@ const struct ieee80211_ops iwl_mvm_mld_hw_ops = { .link_sta_add_debugfs = iwl_mvm_link_sta_add_debugfs, #endif .set_hw_timestamp = iwl_mvm_set_hw_timestamp, - - .change_vif_links = iwl_mvm_mld_change_vif_links, - .change_sta_links = iwl_mvm_mld_change_sta_links, - .can_activate_links = iwl_mvm_mld_can_activate_links, - .can_neg_ttlm = iwl_mvm_mld_can_neg_ttlm, }; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c index 3359e02e151f..44e16ee9514e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c @@ -913,288 +913,3 @@ void iwl_mvm_mld_modify_all_sta_disable_tx(struct iwl_mvm *mvm, rcu_read_unlock(); } - -static int iwl_mvm_mld_update_sta_queues(struct iwl_mvm *mvm, - struct ieee80211_sta *sta, - u32 old_sta_mask, - u32 new_sta_mask) -{ - struct iwl_mvm_sta *mvm_sta = iwl_mvm_sta_from_mac80211(sta); - struct iwl_scd_queue_cfg_cmd cmd = { - .operation = cpu_to_le32(IWL_SCD_QUEUE_MODIFY), - .u.modify.old_sta_mask = cpu_to_le32(old_sta_mask), - .u.modify.new_sta_mask = cpu_to_le32(new_sta_mask), - }; - struct iwl_host_cmd hcmd = { - .id = WIDE_ID(DATA_PATH_GROUP, SCD_QUEUE_CONFIG_CMD), - .len[0] = sizeof(cmd), - .data[0] = &cmd - }; - int tid; - int ret; - - lockdep_assert_held(&mvm->mutex); - - for (tid = 0; tid <= IWL_MAX_TID_COUNT; tid++) { - struct iwl_mvm_tid_data *tid_data = &mvm_sta->tid_data[tid]; - int txq_id = tid_data->txq_id; - - if (txq_id == IWL_MVM_INVALID_QUEUE) - continue; - - if (tid == IWL_MAX_TID_COUNT) - cmd.u.modify.tid = cpu_to_le32(IWL_MGMT_TID); - else - cmd.u.modify.tid = cpu_to_le32(tid); - - ret = iwl_mvm_send_cmd(mvm, &hcmd); - if (ret) - return ret; - } - - return 0; -} - -static int iwl_mvm_mld_update_sta_baids(struct iwl_mvm *mvm, - u32 old_sta_mask, - u32 new_sta_mask) -{ - struct iwl_rx_baid_cfg_cmd cmd = { - .action = cpu_to_le32(IWL_RX_BAID_ACTION_MODIFY), - .modify.old_sta_id_mask = cpu_to_le32(old_sta_mask), - .modify.new_sta_id_mask = cpu_to_le32(new_sta_mask), - }; - u32 cmd_id = WIDE_ID(DATA_PATH_GROUP, RX_BAID_ALLOCATION_CONFIG_CMD); - int baid; - - /* mac80211 will remove sessions later, but we ignore all that */ - if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) - return 0; - - BUILD_BUG_ON(sizeof(struct iwl_rx_baid_cfg_resp) != sizeof(baid)); - - for (baid = 0; baid < ARRAY_SIZE(mvm->baid_map); baid++) { - struct iwl_mvm_baid_data *data; - int ret; - - data = rcu_dereference_protected(mvm->baid_map[baid], - lockdep_is_held(&mvm->mutex)); - if (!data) - continue; - - if (!(data->sta_mask & old_sta_mask)) - continue; - - WARN_ONCE(data->sta_mask != old_sta_mask, - "BAID data for %d corrupted - expected 0x%x found 0x%x\n", - baid, old_sta_mask, data->sta_mask); - - cmd.modify.tid = cpu_to_le32(data->tid); - - ret = iwl_mvm_send_cmd_pdu(mvm, cmd_id, CMD_SEND_IN_RFKILL, - sizeof(cmd), &cmd); - data->sta_mask = new_sta_mask; - if (ret) - return ret; - } - - return 0; -} - -static int iwl_mvm_mld_update_sta_resources(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta, - u32 old_sta_mask, - u32 new_sta_mask) -{ - int ret; - - ret = iwl_mvm_mld_update_sta_queues(mvm, sta, - old_sta_mask, - new_sta_mask); - if (ret) - return ret; - - ret = iwl_mvm_mld_update_sta_keys(mvm, vif, sta, - old_sta_mask, - new_sta_mask); - if (ret) - return ret; - - return iwl_mvm_mld_update_sta_baids(mvm, old_sta_mask, new_sta_mask); -} - -int iwl_mvm_mld_update_sta_links(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta, - u16 old_links, u16 new_links) -{ - struct iwl_mvm_sta *mvm_sta = iwl_mvm_sta_from_mac80211(sta); - struct iwl_mvm_vif *mvm_vif = iwl_mvm_vif_from_mac80211(vif); - struct iwl_mvm_link_sta *mvm_sta_link; - struct iwl_mvm_vif_link_info *mvm_vif_link; - unsigned long links_to_add = ~old_links & new_links; - unsigned long links_to_rem = old_links & ~new_links; - unsigned long old_links_long = old_links; - u32 current_sta_mask = 0, sta_mask_added = 0, sta_mask_to_rem = 0; - unsigned long link_sta_added_to_fw = 0, link_sta_allocated = 0; - unsigned int link_id; - int ret; - - lockdep_assert_wiphy(mvm->hw->wiphy); - lockdep_assert_held(&mvm->mutex); - - for_each_set_bit(link_id, &old_links_long, - IEEE80211_MLD_MAX_NUM_LINKS) { - mvm_sta_link = - rcu_dereference_protected(mvm_sta->link[link_id], - lockdep_is_held(&mvm->mutex)); - - if (WARN_ON(!mvm_sta_link)) { - ret = -EINVAL; - goto err; - } - - current_sta_mask |= BIT(mvm_sta_link->sta_id); - if (links_to_rem & BIT(link_id)) - sta_mask_to_rem |= BIT(mvm_sta_link->sta_id); - } - - if (sta_mask_to_rem) { - ret = iwl_mvm_mld_update_sta_resources(mvm, vif, sta, - current_sta_mask, - current_sta_mask & - ~sta_mask_to_rem); - if (WARN_ON(ret)) - goto err; - - current_sta_mask &= ~sta_mask_to_rem; - } - - for_each_set_bit(link_id, &links_to_rem, IEEE80211_MLD_MAX_NUM_LINKS) { - mvm_sta_link = - rcu_dereference_protected(mvm_sta->link[link_id], - lockdep_is_held(&mvm->mutex)); - mvm_vif_link = mvm_vif->link[link_id]; - - if (WARN_ON(!mvm_sta_link || !mvm_vif_link)) { - ret = -EINVAL; - goto err; - } - - ret = iwl_mvm_mld_rm_sta_from_fw(mvm, mvm_sta_link->sta_id); - if (WARN_ON(ret)) - goto err; - - if (vif->type == NL80211_IFTYPE_STATION) - mvm_vif_link->ap_sta_id = IWL_INVALID_STA; - - iwl_mvm_mld_free_sta_link(mvm, mvm_sta, mvm_sta_link, link_id); - } - - for_each_set_bit(link_id, &links_to_add, IEEE80211_MLD_MAX_NUM_LINKS) { - struct ieee80211_bss_conf *link_conf = - link_conf_dereference_protected(vif, link_id); - struct ieee80211_link_sta *link_sta = - link_sta_dereference_protected(sta, link_id); - mvm_vif_link = mvm_vif->link[link_id]; - - if (WARN_ON(!mvm_vif_link || !link_conf || !link_sta)) { - ret = -EINVAL; - goto err; - } - - if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) { - struct iwl_mvm_link_sta *mvm_link_sta = - rcu_dereference_protected(mvm_sta->link[link_id], - lockdep_is_held(&mvm->mutex)); - u32 sta_id; - - if (WARN_ON(!mvm_link_sta)) { - ret = -EINVAL; - goto err; - } - - sta_id = mvm_link_sta->sta_id; - - rcu_assign_pointer(mvm->fw_id_to_mac_id[sta_id], sta); - rcu_assign_pointer(mvm->fw_id_to_link_sta[sta_id], - link_sta); - } else { - if (WARN_ON(mvm_sta->link[link_id])) { - ret = -EINVAL; - goto err; - } - ret = iwl_mvm_mld_alloc_sta_link(mvm, vif, sta, - link_id); - if (WARN_ON(ret)) - goto err; - } - - link_sta->agg.max_rc_amsdu_len = 1; - ieee80211_sta_recalc_aggregates(sta); - - mvm_sta_link = - rcu_dereference_protected(mvm_sta->link[link_id], - lockdep_is_held(&mvm->mutex)); - - if (WARN_ON(!mvm_sta_link)) { - ret = -EINVAL; - goto err; - } - - if (vif->type == NL80211_IFTYPE_STATION) - iwl_mvm_mld_set_ap_sta_id(sta, mvm_vif_link, - mvm_sta_link); - - link_sta_allocated |= BIT(link_id); - - sta_mask_added |= BIT(mvm_sta_link->sta_id); - - ret = iwl_mvm_mld_cfg_sta(mvm, sta, vif, link_sta, link_conf, - mvm_sta_link); - if (WARN_ON(ret)) - goto err; - - link_sta_added_to_fw |= BIT(link_id); - - iwl_mvm_rs_add_sta_link(mvm, mvm_sta_link); - - iwl_mvm_rs_rate_init(mvm, vif, sta, link_conf, link_sta, - link_conf->chanreq.oper.chan->band); - } - - if (sta_mask_added) { - ret = iwl_mvm_mld_update_sta_resources(mvm, vif, sta, - current_sta_mask, - current_sta_mask | - sta_mask_added); - if (WARN_ON(ret)) - goto err; - } - - return 0; - -err: - /* remove all already allocated stations in FW */ - for_each_set_bit(link_id, &link_sta_added_to_fw, - IEEE80211_MLD_MAX_NUM_LINKS) { - mvm_sta_link = - rcu_dereference_protected(mvm_sta->link[link_id], - lockdep_is_held(&mvm->mutex)); - - iwl_mvm_mld_rm_sta_from_fw(mvm, mvm_sta_link->sta_id); - } - - /* remove all already allocated station links in driver */ - for_each_set_bit(link_id, &link_sta_allocated, - IEEE80211_MLD_MAX_NUM_LINKS) { - mvm_sta_link = - rcu_dereference_protected(mvm_sta->link[link_id], - lockdep_is_held(&mvm->mutex)); - - iwl_mvm_mld_free_sta_link(mvm, mvm_sta, mvm_sta_link, link_id); - } - - return ret; -} diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 46a9dfa58a53..402ba5dee8b2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -2450,11 +2450,6 @@ void iwl_mvm_sec_key_remove_ap(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct iwl_mvm_vif_link_info *link, unsigned int link_id); -int iwl_mvm_mld_update_sta_keys(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta, - u32 old_sta_mask, - u32 new_sta_mask); int iwl_mvm_mld_send_key(struct iwl_mvm *mvm, u32 sta_mask, u32 key_flags, struct ieee80211_key_conf *keyconf); u32 iwl_mvm_get_sec_flags(struct iwl_mvm *mvm, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h index c25edc7c1813..ff099aec7886 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h @@ -637,10 +637,6 @@ void iwl_mvm_mld_free_sta_link(struct iwl_mvm *mvm, struct iwl_mvm_link_sta *mvm_sta_link, unsigned int link_id); int iwl_mvm_mld_rm_sta_id(struct iwl_mvm *mvm, u8 sta_id); -int iwl_mvm_mld_update_sta_links(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta, - u16 old_links, u16 new_links); u32 iwl_mvm_sta_fw_id_mask(struct iwl_mvm *mvm, struct ieee80211_sta *sta, int filter_link_id); int iwl_mvm_mld_add_int_sta_with_queue(struct iwl_mvm *mvm, From 5ebf0b1d7bd31996cc3f63e3751c05bef7eaeaae Mon Sep 17 00:00:00 2001 From: Pagadala Yesu Anjaneyulu Date: Thu, 19 Mar 2026 11:09:18 +0200 Subject: [PATCH 0874/1561] wifi: iwlwifi: mld: remove unused scan expire time constants Remove the unused IWL_MLD_SCAN_EXPIRE_TIME_SEC constant from constants.h and its corresponding IWL_MLD_SCAN_EXPIRE_TIME macro definition from mlo.c. These definitions are no longer referenced. Signed-off-by: Pagadala Yesu Anjaneyulu Reviewed-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319110722.4be7221113cf.I13e32d575bb854709af374519332b998bc1fed4a@changeid --- drivers/net/wireless/intel/iwlwifi/mld/constants.h | 1 - drivers/net/wireless/intel/iwlwifi/mld/mlo.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/constants.h b/drivers/net/wireless/intel/iwlwifi/mld/constants.h index 5d23a618ae3c..e2a5eecc18c3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/constants.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/constants.h @@ -36,7 +36,6 @@ #define IWL_MLD_PS_HEAVY_RX_THLD_PACKETS 8 #define IWL_MLD_TRIGGER_LINK_SEL_TIME_SEC 30 -#define IWL_MLD_SCAN_EXPIRE_TIME_SEC 20 #define IWL_MLD_TPT_COUNT_WINDOW (5 * HZ) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mlo.c b/drivers/net/wireless/intel/iwlwifi/mld/mlo.c index f842f5183223..f693f92e42b4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mlo.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mlo.c @@ -110,7 +110,6 @@ void iwl_mld_emlsr_tmp_non_bss_done_wk(struct wiphy *wiphy, } #define IWL_MLD_TRIGGER_LINK_SEL_TIME (HZ * IWL_MLD_TRIGGER_LINK_SEL_TIME_SEC) -#define IWL_MLD_SCAN_EXPIRE_TIME (HZ * IWL_MLD_SCAN_EXPIRE_TIME_SEC) /* Exit reasons that can cause longer EMLSR prevention */ #define IWL_MLD_PREVENT_EMLSR_REASONS (IWL_MLD_EMLSR_EXIT_MISSED_BEACON | \ From b6045c899e371dda801ae33727c9da112f422645 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 19 Mar 2026 11:09:19 +0200 Subject: [PATCH 0875/1561] wifi: iwlwifi: mld: Refactor scan command handling As a preparation for a new scan command version, refactor the scan command building such that it would allow introducing new scan command structures in a simpler way. Signed-off-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319110722.a3e9589769f0.If458023e234ed79db7474107d98f0b6e28e565e5@changeid --- drivers/net/wireless/intel/iwlwifi/mld/scan.c | 159 ++++++++++++------ drivers/net/wireless/intel/iwlwifi/mld/scan.h | 2 + 2 files changed, 114 insertions(+), 47 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/scan.c b/drivers/net/wireless/intel/iwlwifi/mld/scan.c index a1a4cf3ab3d3..7f4679134def 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/scan.c @@ -116,6 +116,13 @@ struct iwl_mld_scan_params { u8 bssid[ETH_ALEN] __aligned(2); }; +struct iwl_scan_req_params_ptrs { + struct iwl_scan_general_params_v11 *general_params; + struct iwl_scan_channel_params_v7 *channel_params; + struct iwl_scan_periodic_parms_v1 *periodic_params; + struct iwl_scan_probe_params_v4 *probe_params; +}; + struct iwl_mld_scan_respect_p2p_go_iter_data { struct ieee80211_vif *current_vif; bool p2p_go; @@ -512,9 +519,10 @@ iwl_mld_scan_get_cmd_gen_flags2(struct iwl_mld *mld, static void iwl_mld_scan_cmd_set_dwell(struct iwl_mld *mld, - struct iwl_scan_general_params_v11 *gp, - struct iwl_mld_scan_params *params) + struct iwl_mld_scan_params *params, + struct iwl_scan_req_params_ptrs *scan_ptrs) { + struct iwl_scan_general_params_v11 *gp = scan_ptrs->general_params; const struct iwl_mld_scan_timing_params *timing = &scan_timing[params->type]; @@ -551,9 +559,10 @@ static void iwl_mld_scan_cmd_set_gen_params(struct iwl_mld *mld, struct iwl_mld_scan_params *params, struct ieee80211_vif *vif, - struct iwl_scan_general_params_v11 *gp, + struct iwl_scan_req_params_ptrs *scan_ptrs, enum iwl_mld_scan_status scan_status) { + struct iwl_scan_general_params_v11 *gp = scan_ptrs->general_params; u16 gen_flags = iwl_mld_scan_get_cmd_gen_flags(mld, params, vif, scan_status); u8 gen_flags2 = iwl_mld_scan_get_cmd_gen_flags2(mld, params, vif, @@ -566,7 +575,7 @@ iwl_mld_scan_cmd_set_gen_params(struct iwl_mld *mld, gp->flags = cpu_to_le16(gen_flags); gp->flags2 = gen_flags2; - iwl_mld_scan_cmd_set_dwell(mld, gp, params); + iwl_mld_scan_cmd_set_dwell(mld, params, scan_ptrs); if (gen_flags & IWL_UMAC_SCAN_GEN_FLAGS_V2_FRAGMENTED_LMAC1) gp->num_of_fragments[SCAN_LB_LMAC_IDX] = IWL_SCAN_NUM_OF_FRAGS; @@ -577,9 +586,12 @@ iwl_mld_scan_cmd_set_gen_params(struct iwl_mld *mld, static int iwl_mld_scan_cmd_set_sched_params(struct iwl_mld_scan_params *params, - struct iwl_scan_umac_schedule *schedule, - __le16 *delay) + struct iwl_scan_req_params_ptrs *scan_ptrs) { + struct iwl_scan_umac_schedule *schedule = + scan_ptrs->periodic_params->schedule; + __le16 *delay = &scan_ptrs->periodic_params->delay; + if (WARN_ON(!params->n_scan_plans || params->n_scan_plans > IWL_MAX_SCHED_SCAN_PLANS)) return -EINVAL; @@ -657,11 +669,12 @@ iwl_mld_scan_cmd_build_ssids(struct iwl_mld_scan_params *params, static void iwl_mld_scan_fill_6g_chan_list(struct iwl_mld_scan_params *params, - struct iwl_scan_probe_params_v4 *pp) + struct iwl_scan_req_params_ptrs *scan_ptrs) { int j, idex_s = 0, idex_b = 0; struct cfg80211_scan_6ghz_params *scan_6ghz_params = params->scan_6ghz_params; + struct iwl_scan_probe_params_v4 *pp = scan_ptrs->probe_params; for (j = 0; j < params->n_ssids && idex_s < SCAN_SHORT_SSID_MAX_SIZE; @@ -725,13 +738,15 @@ iwl_mld_scan_fill_6g_chan_list(struct iwl_mld_scan_params *params, static void iwl_mld_scan_cmd_set_probe_params(struct iwl_mld_scan_params *params, - struct iwl_scan_probe_params_v4 *pp, + struct iwl_scan_req_params_ptrs *scan_ptrs, u32 *bitmap_ssid) { + struct iwl_scan_probe_params_v4 *pp = scan_ptrs->probe_params; + pp->preq = params->preq; if (params->scan_6ghz) { - iwl_mld_scan_fill_6g_chan_list(params, pp); + iwl_mld_scan_fill_6g_chan_list(params, scan_ptrs); return; } @@ -821,10 +836,12 @@ static u32 iwl_mld_scan_ch_n_aps_flag(enum nl80211_iftype vif_type, u8 ch_id) static void iwl_mld_scan_cmd_set_channels(struct iwl_mld *mld, struct ieee80211_channel **channels, - struct iwl_scan_channel_params_v7 *cp, + struct iwl_scan_req_params_ptrs *scan_ptrs, int n_channels, u32 flags, enum nl80211_iftype vif_type) { + struct iwl_scan_channel_params_v7 *cp = scan_ptrs->channel_params; + for (int i = 0; i < n_channels; i++) { enum nl80211_band band = channels[i]->band; struct iwl_scan_channel_cfg_umac *cfg = &cp->channel_config[i]; @@ -862,10 +879,11 @@ static u8 iwl_mld_scan_cfg_channels_6g(struct iwl_mld *mld, struct iwl_mld_scan_params *params, u32 n_channels, - struct iwl_scan_probe_params_v4 *pp, - struct iwl_scan_channel_params_v7 *cp, + struct iwl_scan_req_params_ptrs *scan_ptrs, enum nl80211_iftype vif_type) { + struct iwl_scan_probe_params_v4 *pp = scan_ptrs->probe_params; + struct iwl_scan_channel_params_v7 *cp = scan_ptrs->channel_params; struct cfg80211_scan_6ghz_params *scan_6ghz_params = params->scan_6ghz_params; u32 i; @@ -1063,25 +1081,23 @@ static int iwl_mld_scan_cmd_set_6ghz_chan_params(struct iwl_mld *mld, struct iwl_mld_scan_params *params, struct ieee80211_vif *vif, - struct iwl_scan_req_params_v17 *scan_p) + struct iwl_scan_req_params_ptrs *scan_ptrs) { - struct iwl_scan_channel_params_v7 *chan_p = &scan_p->channel_params; - struct iwl_scan_probe_params_v4 *probe_p = &scan_p->probe_params; + struct iwl_scan_channel_params_v7 *cp = scan_ptrs->channel_params; /* Explicitly clear the flags since most of them are not * relevant for 6 GHz scan. */ - chan_p->flags = 0; - chan_p->count = iwl_mld_scan_cfg_channels_6g(mld, params, - params->n_channels, - probe_p, chan_p, - vif->type); - if (!chan_p->count) + cp->flags = 0; + cp->count = iwl_mld_scan_cfg_channels_6g(mld, params, + params->n_channels, + scan_ptrs, vif->type); + if (!cp->count) return -EINVAL; if (!params->n_ssids || (params->n_ssids == 1 && !params->ssids[0].ssid_len)) - chan_p->flags |= IWL_SCAN_CHANNEL_FLAG_6G_PSC_NO_FILTER; + cp->flags |= IWL_SCAN_CHANNEL_FLAG_6G_PSC_NO_FILTER; return 0; } @@ -1090,12 +1106,12 @@ static int iwl_mld_scan_cmd_set_chan_params(struct iwl_mld *mld, struct iwl_mld_scan_params *params, struct ieee80211_vif *vif, - struct iwl_scan_req_params_v17 *scan_p, + struct iwl_scan_req_params_ptrs *scan_ptrs, bool low_latency, enum iwl_mld_scan_status scan_status, u32 channel_cfg_flags) { - struct iwl_scan_channel_params_v7 *cp = &scan_p->channel_params; + struct iwl_scan_channel_params_v7 *cp = scan_ptrs->channel_params; struct ieee80211_supported_band *sband = &mld->nvm_data->bands[NL80211_BAND_6GHZ]; @@ -1107,14 +1123,14 @@ iwl_mld_scan_cmd_set_chan_params(struct iwl_mld *mld, if (params->scan_6ghz) return iwl_mld_scan_cmd_set_6ghz_chan_params(mld, params, - vif, scan_p); + vif, scan_ptrs); /* relevant only for 2.4 GHz/5 GHz scan */ cp->flags = iwl_mld_scan_cmd_set_chan_flags(mld, params, vif, low_latency); cp->count = params->n_channels; - iwl_mld_scan_cmd_set_channels(mld, params->channels, cp, + iwl_mld_scan_cmd_set_channels(mld, params->channels, scan_ptrs, params->n_channels, channel_cfg_flags, vif->type); @@ -1144,41 +1160,50 @@ iwl_mld_scan_cmd_set_chan_params(struct iwl_mld *mld, return 0; } -static int -iwl_mld_scan_build_cmd(struct iwl_mld *mld, struct ieee80211_vif *vif, +struct iwl_scan_umac_handler { + u8 version; + int (*handler)(struct iwl_mld *mld, struct ieee80211_vif *vif, struct iwl_mld_scan_params *params, enum iwl_mld_scan_status scan_status, - bool low_latency) + int uid, u32 ooc_priority, bool low_latency); +}; + +#define IWL_SCAN_UMAC_HANDLER(_ver) { \ + .version = _ver, \ + .handler = iwl_mld_scan_umac_v##_ver, \ +} + +static int iwl_mld_scan_umac_v17(struct iwl_mld *mld, struct ieee80211_vif *vif, + struct iwl_mld_scan_params *params, + enum iwl_mld_scan_status scan_status, + int uid, u32 ooc_priority, bool low_latency) { struct iwl_scan_req_umac_v17 *cmd = mld->scan.cmd; - struct iwl_scan_req_params_v17 *scan_p = &cmd->scan_params; + struct iwl_scan_req_params_ptrs scan_ptrs = { + .general_params = &cmd->scan_params.general_params, + .probe_params = &cmd->scan_params.probe_params, + .channel_params = &cmd->scan_params.channel_params, + .periodic_params = &cmd->scan_params.periodic_params + }; u32 bitmap_ssid = 0; - int uid, ret; + int ret; - memset(mld->scan.cmd, 0, mld->scan.cmd_size); - - /* find a free UID entry */ - uid = iwl_mld_scan_uid_by_status(mld, IWL_MLD_SCAN_NONE); - if (uid < 0) - return uid; + if (WARN_ON(params->n_channels > SCAN_MAX_NUM_CHANS_V3)) + return -EINVAL; cmd->uid = cpu_to_le32(uid); - cmd->ooc_priority = - cpu_to_le32(iwl_mld_scan_ooc_priority(scan_status)); + cmd->ooc_priority = cpu_to_le32(ooc_priority); - iwl_mld_scan_cmd_set_gen_params(mld, params, vif, - &scan_p->general_params, scan_status); + iwl_mld_scan_cmd_set_gen_params(mld, params, vif, &scan_ptrs, + scan_status); - ret = iwl_mld_scan_cmd_set_sched_params(params, - scan_p->periodic_params.schedule, - &scan_p->periodic_params.delay); + ret = iwl_mld_scan_cmd_set_sched_params(params, &scan_ptrs); if (ret) return ret; - iwl_mld_scan_cmd_set_probe_params(params, &scan_p->probe_params, - &bitmap_ssid); + iwl_mld_scan_cmd_set_probe_params(params, &scan_ptrs, &bitmap_ssid); - ret = iwl_mld_scan_cmd_set_chan_params(mld, params, vif, scan_p, + ret = iwl_mld_scan_cmd_set_chan_params(mld, params, vif, &scan_ptrs, low_latency, scan_status, bitmap_ssid); if (ret) @@ -1187,6 +1212,45 @@ iwl_mld_scan_build_cmd(struct iwl_mld *mld, struct ieee80211_vif *vif, return uid; } +static const struct iwl_scan_umac_handler iwl_scan_umac_handlers[] = { + /* set the newest version first to shorten the list traverse time */ + IWL_SCAN_UMAC_HANDLER(17), +}; + +static int +iwl_mld_scan_build_cmd(struct iwl_mld *mld, struct ieee80211_vif *vif, + struct iwl_mld_scan_params *params, + enum iwl_mld_scan_status scan_status, + bool low_latency) +{ + int uid, err; + u32 ooc_priority; + + memset(mld->scan.cmd, 0, mld->scan.cmd_size); + uid = iwl_mld_scan_uid_by_status(mld, IWL_MLD_SCAN_NONE); + if (uid < 0) + return uid; + + ooc_priority = iwl_mld_scan_ooc_priority(scan_status); + + for (size_t i = 0; i < ARRAY_SIZE(iwl_scan_umac_handlers); i++) { + const struct iwl_scan_umac_handler *ver_handler = + &iwl_scan_umac_handlers[i]; + + if (ver_handler->version != mld->scan.cmd_ver) + continue; + + err = ver_handler->handler(mld, vif, params, scan_status, + uid, ooc_priority, low_latency); + return err ? : uid; + } + + IWL_ERR(mld, "No handler for UMAC scan cmd version %d\n", + mld->scan.cmd_ver); + + return -EINVAL; +} + static bool iwl_mld_scan_pass_all(struct iwl_mld *mld, struct cfg80211_sched_scan_request *req) @@ -2041,6 +2105,7 @@ int iwl_mld_alloc_scan_cmd(struct iwl_mld *mld) return -ENOMEM; mld->scan.cmd_size = scan_cmd_size; + mld->scan.cmd_ver = scan_cmd_ver; return 0; } diff --git a/drivers/net/wireless/intel/iwlwifi/mld/scan.h b/drivers/net/wireless/intel/iwlwifi/mld/scan.h index 69110f0cfc8e..772b3a02c4c4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/scan.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/scan.h @@ -109,6 +109,7 @@ enum iwl_mld_traffic_load { * @traffic_load.status: The current traffic load status, see * &enum iwl_mld_traffic_load * @cmd_size: size of %cmd. + * @cmd_ver: version of the scan command format. * @cmd: pointer to scan cmd buffer (allocated once in op mode start). * @last_6ghz_passive_jiffies: stores the last 6GHz passive scan time * in jiffies. @@ -134,6 +135,7 @@ struct iwl_mld_scan { /* And here fields that survive a fw restart */ size_t cmd_size; void *cmd; + u8 cmd_ver; unsigned long last_6ghz_passive_jiffies; unsigned long last_start_time_jiffies; u64 last_mlo_scan_time; From 6af32104003e86959044e61821e579b6cf160a48 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 19 Mar 2026 11:09:20 +0200 Subject: [PATCH 0876/1561] wifi: iwlwifi: mld: Introduce scan command version 18 The FW scan logic was extended to support new channels in the 7 GHz band, as such, the scan command was modified to support scanning more PSC channels. Introduce scan command version 18 handling, which is different from scan command version 17 only in the number of supported channel configurations. Signed-off-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319110722.c995b4e8bbc5.Ie401d9cf02daaa5e6adf2b3c309643589e3ead71@changeid --- .../net/wireless/intel/iwlwifi/fw/api/scan.h | 45 +++++++++ drivers/net/wireless/intel/iwlwifi/mld/scan.c | 91 +++++++++++++++---- 2 files changed, 117 insertions(+), 19 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h index 60f0a4924ddf..c2bb400c834c 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h @@ -985,6 +985,7 @@ struct iwl_scan_probe_params_v4 { } __packed; /* SCAN_PROBE_PARAMS_API_S_VER_4 */ #define SCAN_MAX_NUM_CHANS_V3 67 +#define SCAN_MAX_NUM_CHANS_V4 68 /** * struct iwl_scan_channel_params_v4 - channel params @@ -1027,6 +1028,24 @@ struct iwl_scan_channel_params_v7 { struct iwl_scan_channel_cfg_umac channel_config[SCAN_MAX_NUM_CHANS_V3]; } __packed; /* SCAN_CHANNEL_PARAMS_API_S_VER_6 */ +/** + * struct iwl_scan_channel_params_v8 - channel params + * @flags: channel flags &enum iwl_scan_channel_flags + * @count: num of channels in scan request + * @n_aps_override: override the number of APs the FW uses to calculate dwell + * time when adaptive dwell is used. + * Channel k will use n_aps_override[i] when BIT(20 + i) is set in + * channel_config[k].flags + * @channel_config: array of explicit channel configurations + * for 2.4Ghz and 5.2Ghz bands + */ +struct iwl_scan_channel_params_v8 { + u8 flags; + u8 count; + u8 n_aps_override[2]; + struct iwl_scan_channel_cfg_umac channel_config[SCAN_MAX_NUM_CHANS_V4]; +} __packed; /* SCAN_CHANNEL_PARAMS_API_S_VER_8 */ + /** * struct iwl_scan_general_params_v11 - channel params * @flags: &enum iwl_umac_scan_general_flags_v2 @@ -1109,6 +1128,20 @@ struct iwl_scan_req_params_v17 { struct iwl_scan_probe_params_v4 probe_params; } __packed; /* SCAN_REQUEST_PARAMS_API_S_VER_17 - 14 */ +/** + * struct iwl_scan_req_params_v18 - scan request parameters (v18) + * @general_params: &struct iwl_scan_general_params_v11 + * @channel_params: &struct iwl_scan_channel_params_v8 + * @periodic_params: &struct iwl_scan_periodic_parms_v1 + * @probe_params: &struct iwl_scan_probe_params_v4 + */ +struct iwl_scan_req_params_v18 { + struct iwl_scan_general_params_v11 general_params; + struct iwl_scan_channel_params_v8 channel_params; + struct iwl_scan_periodic_parms_v1 periodic_params; + struct iwl_scan_probe_params_v4 probe_params; +} __packed; /* SCAN_REQUEST_PARAMS_API_S_VER_18 */ + /** * struct iwl_scan_req_umac_v12 - scan request command (v12) * @uid: scan id, &enum iwl_umac_scan_uid_offsets @@ -1133,6 +1166,18 @@ struct iwl_scan_req_umac_v17 { struct iwl_scan_req_params_v17 scan_params; } __packed; /* SCAN_REQUEST_CMD_UMAC_API_S_VER_17 - 14 */ +/** + * struct iwl_scan_req_umac_v18 - scan request command (v18) + * @uid: scan id, &enum iwl_umac_scan_uid_offsets + * @ooc_priority: out of channel priority - &enum iwl_scan_priority + * @scan_params: scan parameters + */ +struct iwl_scan_req_umac_v18 { + __le32 uid; + __le32 ooc_priority; + struct iwl_scan_req_params_v18 scan_params; +} __packed; /* SCAN_REQUEST_CMD_UMAC_API_S_VER_18 */ + /** * struct iwl_umac_scan_abort - scan abort command * @uid: scan id, &enum iwl_umac_scan_uid_offsets diff --git a/drivers/net/wireless/intel/iwlwifi/mld/scan.c b/drivers/net/wireless/intel/iwlwifi/mld/scan.c index 7f4679134def..96cd970cceb4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/scan.c @@ -118,7 +118,7 @@ struct iwl_mld_scan_params { struct iwl_scan_req_params_ptrs { struct iwl_scan_general_params_v11 *general_params; - struct iwl_scan_channel_params_v7 *channel_params; + struct iwl_scan_channel_params_v8 *channel_params; struct iwl_scan_periodic_parms_v1 *periodic_params; struct iwl_scan_probe_params_v4 *probe_params; }; @@ -840,7 +840,7 @@ iwl_mld_scan_cmd_set_channels(struct iwl_mld *mld, int n_channels, u32 flags, enum nl80211_iftype vif_type) { - struct iwl_scan_channel_params_v7 *cp = scan_ptrs->channel_params; + struct iwl_scan_channel_params_v8 *cp = scan_ptrs->channel_params; for (int i = 0; i < n_channels; i++) { enum nl80211_band band = channels[i]->band; @@ -883,7 +883,7 @@ iwl_mld_scan_cfg_channels_6g(struct iwl_mld *mld, enum nl80211_iftype vif_type) { struct iwl_scan_probe_params_v4 *pp = scan_ptrs->probe_params; - struct iwl_scan_channel_params_v7 *cp = scan_ptrs->channel_params; + struct iwl_scan_channel_params_v8 *cp = scan_ptrs->channel_params; struct cfg80211_scan_6ghz_params *scan_6ghz_params = params->scan_6ghz_params; u32 i; @@ -1083,7 +1083,7 @@ iwl_mld_scan_cmd_set_6ghz_chan_params(struct iwl_mld *mld, struct ieee80211_vif *vif, struct iwl_scan_req_params_ptrs *scan_ptrs) { - struct iwl_scan_channel_params_v7 *cp = scan_ptrs->channel_params; + struct iwl_scan_channel_params_v8 *cp = scan_ptrs->channel_params; /* Explicitly clear the flags since most of them are not * relevant for 6 GHz scan. @@ -1111,7 +1111,7 @@ iwl_mld_scan_cmd_set_chan_params(struct iwl_mld *mld, enum iwl_mld_scan_status scan_status, u32 channel_cfg_flags) { - struct iwl_scan_channel_params_v7 *cp = scan_ptrs->channel_params; + struct iwl_scan_channel_params_v8 *cp = scan_ptrs->channel_params; struct ieee80211_supported_band *sband = &mld->nvm_data->bands[NL80211_BAND_6GHZ]; @@ -1173,6 +1173,58 @@ struct iwl_scan_umac_handler { .handler = iwl_mld_scan_umac_v##_ver, \ } +static int iwl_mld_scan_umac_common(struct iwl_mld *mld, + struct ieee80211_vif *vif, + struct iwl_mld_scan_params *params, + struct iwl_scan_req_params_ptrs *scan_ptrs, + enum iwl_mld_scan_status scan_status, + bool low_latency) +{ + u32 bitmap_ssid = 0; + int ret; + + iwl_mld_scan_cmd_set_gen_params(mld, params, vif, scan_ptrs, + scan_status); + + ret = iwl_mld_scan_cmd_set_sched_params(params, scan_ptrs); + if (ret) + return ret; + + iwl_mld_scan_cmd_set_probe_params(params, scan_ptrs, &bitmap_ssid); + + return iwl_mld_scan_cmd_set_chan_params(mld, params, vif, scan_ptrs, + low_latency, scan_status, + bitmap_ssid); +} + +static int iwl_mld_scan_umac_v18(struct iwl_mld *mld, struct ieee80211_vif *vif, + struct iwl_mld_scan_params *params, + enum iwl_mld_scan_status scan_status, + int uid, u32 ooc_priority, bool low_latency) +{ + struct iwl_scan_req_umac_v18 *cmd = mld->scan.cmd; + struct iwl_scan_req_params_ptrs scan_ptrs = { + .general_params = &cmd->scan_params.general_params, + .probe_params = &cmd->scan_params.probe_params, + .channel_params = &cmd->scan_params.channel_params, + .periodic_params = &cmd->scan_params.periodic_params + }; + int ret; + + if (WARN_ON(params->n_channels > SCAN_MAX_NUM_CHANS_V4)) + return -EINVAL; + + cmd->uid = cpu_to_le32(uid); + cmd->ooc_priority = cpu_to_le32(ooc_priority); + + ret = iwl_mld_scan_umac_common(mld, vif, params, &scan_ptrs, + scan_status, low_latency); + if (ret) + return ret; + + return uid; +} + static int iwl_mld_scan_umac_v17(struct iwl_mld *mld, struct ieee80211_vif *vif, struct iwl_mld_scan_params *params, enum iwl_mld_scan_status scan_status, @@ -1182,10 +1234,18 @@ static int iwl_mld_scan_umac_v17(struct iwl_mld *mld, struct ieee80211_vif *vif, struct iwl_scan_req_params_ptrs scan_ptrs = { .general_params = &cmd->scan_params.general_params, .probe_params = &cmd->scan_params.probe_params, - .channel_params = &cmd->scan_params.channel_params, + + /* struct iwl_scan_channel_params_v8 and struct + * iwl_scan_channel_params_v7 are almost identical. The only + * difference is that the newer version allows configuration of + * more channels. So casting here is ok as long as we ensure + * that we don't exceed the max number of channels supported by + * the older version (see the WARN_ON below). + */ + .channel_params = (struct iwl_scan_channel_params_v8 *) + &cmd->scan_params.channel_params, .periodic_params = &cmd->scan_params.periodic_params }; - u32 bitmap_ssid = 0; int ret; if (WARN_ON(params->n_channels > SCAN_MAX_NUM_CHANS_V3)) @@ -1194,18 +1254,8 @@ static int iwl_mld_scan_umac_v17(struct iwl_mld *mld, struct ieee80211_vif *vif, cmd->uid = cpu_to_le32(uid); cmd->ooc_priority = cpu_to_le32(ooc_priority); - iwl_mld_scan_cmd_set_gen_params(mld, params, vif, &scan_ptrs, - scan_status); - - ret = iwl_mld_scan_cmd_set_sched_params(params, &scan_ptrs); - if (ret) - return ret; - - iwl_mld_scan_cmd_set_probe_params(params, &scan_ptrs, &bitmap_ssid); - - ret = iwl_mld_scan_cmd_set_chan_params(mld, params, vif, &scan_ptrs, - low_latency, scan_status, - bitmap_ssid); + ret = iwl_mld_scan_umac_common(mld, vif, params, &scan_ptrs, + scan_status, low_latency); if (ret) return ret; @@ -1214,6 +1264,7 @@ static int iwl_mld_scan_umac_v17(struct iwl_mld *mld, struct ieee80211_vif *vif, static const struct iwl_scan_umac_handler iwl_scan_umac_handlers[] = { /* set the newest version first to shorten the list traverse time */ + IWL_SCAN_UMAC_HANDLER(18), IWL_SCAN_UMAC_HANDLER(17), }; @@ -2095,6 +2146,8 @@ int iwl_mld_alloc_scan_cmd(struct iwl_mld *mld) if (scan_cmd_ver == 17) { scan_cmd_size = sizeof(struct iwl_scan_req_umac_v17); + } else if (scan_cmd_ver == 18) { + scan_cmd_size = sizeof(struct iwl_scan_req_umac_v18); } else { IWL_ERR(mld, "Unexpected scan cmd version %d\n", scan_cmd_ver); return -EINVAL; From f983c7308e271b1b5c70147ab743b587c8362b44 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 11:09:21 +0200 Subject: [PATCH 0877/1561] wifi: iwlwifi: uefi: open code the PPAG table store operation The structure in firmware runtime will need to grow because we're adding a subband for UNII-9. This means that we will soon no longer be able to just memcpy the data from the UEFI table. The layout of the array will change. Tediously copy the data byte-byte to make sure things get to the right place even when we'll increase the number of subbands. Make it easier for the uefi_cnv_var_ppag structure to grow by simpiflying the layout it becomes an array of s8. The layout of the structure becomes less obvious from the structure's declaration, but then the code is more flexible. Don't use UEFI_SAR_MAX_SUB_BANDS_NUM for the number of bands for PPAG. Of course, SAR related structures will grow in future patches, but decouple SAR and PPAG to make the work easier. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319110722.61e729ea2bde.I9d9cda29f576290bf966f780bf7ad5af34970e6f@changeid --- drivers/net/wireless/intel/iwlwifi/fw/uefi.c | 23 ++++++++++++++++---- drivers/net/wireless/intel/iwlwifi/fw/uefi.h | 16 +++++--------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c index d4e1ab1f7c84..38f9d9adf90e 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c @@ -571,9 +571,11 @@ int iwl_uefi_get_ppag_table(struct iwl_fw_runtime *fwrt) { struct uefi_cnv_var_ppag *data; int ret = 0; + int data_sz = sizeof(*data) + sizeof(data->vals[0]) * + IWL_NUM_CHAIN_LIMITS * UEFI_PPAG_SUB_BANDS_NUM; data = iwl_uefi_get_verified_variable(fwrt->trans, IWL_UEFI_PPAG_NAME, - "PPAG", sizeof(*data), NULL); + "PPAG", data_sz, NULL); if (IS_ERR(data)) return -EINVAL; @@ -589,9 +591,22 @@ int iwl_uefi_get_ppag_table(struct iwl_fw_runtime *fwrt) fwrt->ppag_flags = iwl_bios_get_ppag_flags(data->ppag_modes, fwrt->ppag_bios_rev); - BUILD_BUG_ON(sizeof(fwrt->ppag_chains) != sizeof(data->ppag_chains)); - memcpy(&fwrt->ppag_chains, &data->ppag_chains, - sizeof(data->ppag_chains)); + /* + * Make sure fwrt has enough room to hold + * data coming from the UEFI table + */ + BUILD_BUG_ON(ARRAY_SIZE(fwrt->ppag_chains) * + ARRAY_SIZE(fwrt->ppag_chains[0].subbands) < + IWL_NUM_CHAIN_LIMITS * UEFI_PPAG_SUB_BANDS_NUM); + + for (int chain = 0; chain < IWL_NUM_CHAIN_LIMITS; chain++) { + for (int subband = 0; + subband < UEFI_PPAG_SUB_BANDS_NUM; + subband++) + fwrt->ppag_chains[chain].subbands[subband] = + data->vals[chain * UEFI_PPAG_SUB_BANDS_NUM + subband]; + } + fwrt->ppag_bios_source = BIOS_SOURCE_UEFI; out: kfree(data); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h index c6940a3c03ea..4f0ce068a589 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h @@ -77,6 +77,7 @@ struct uefi_cnv_common_step_data { } __packed; #define UEFI_SAR_MAX_SUB_BANDS_NUM 11 +#define UEFI_PPAG_SUB_BANDS_NUM 11 #define UEFI_SAR_MAX_CHAINS_PER_PROFILE 4 /* @@ -136,24 +137,19 @@ struct uefi_cnv_var_wgds { struct iwl_geo_profile geo_profiles[BIOS_GEO_MAX_PROFILE_NUM]; } __packed; -/* - * struct uefi_ppag_chain - PPAG table for a specific chain - * @subbands: the PPAG values for band - */ -struct uefi_ppag_chain { - s8 subbands[UEFI_SAR_MAX_SUB_BANDS_NUM]; -}; - /* * struct uefi_cnv_var_ppag - PPAG table as defined in UEFI * @revision: the revision of the table * @ppag_modes: values from &enum iwl_ppag_flags - * @ppag_chains: the PPAG values per chain and band + * @vals: the PPAG values per chain and band as an array. + * vals[chain * num_of_subbands + subband] will return the right value. + * num_of_subbands is %UEFI_PPAG_SUB_BANDS_NUM. + * the max number of chains is currently 2 */ struct uefi_cnv_var_ppag { u8 revision; u32 ppag_modes; - struct uefi_ppag_chain ppag_chains[IWL_NUM_CHAIN_LIMITS]; + s8 vals[]; } __packed; /* struct uefi_cnv_var_wtas - WTAS tabled as defined in UEFI From 1a7d1830be843af332081833223f4be536c00e3b Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 11:09:22 +0200 Subject: [PATCH 0878/1561] wifi: iwlwifi: bring iwl_fill_ppag_table to the iwlmvm iwl_fill_ppag_table fills a command that is sent to the firmware. This command has several versions and handling those different versions is the responsibility of the op_mode. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319110722.1f9b38ff7d22.I5c7482c074d63cd18533ac83289cc0b26c1be3d2@changeid --- .../wireless/intel/iwlwifi/fw/regulatory.c | 126 ----------------- .../wireless/intel/iwlwifi/fw/regulatory.h | 4 - drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 129 +++++++++++++++++- 3 files changed, 128 insertions(+), 131 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c index 5793c267daf7..9e834cc1b054 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c @@ -304,132 +304,6 @@ int iwl_sar_fill_profile(struct iwl_fw_runtime *fwrt, } IWL_EXPORT_SYMBOL(iwl_sar_fill_profile); -static bool iwl_ppag_value_valid(struct iwl_fw_runtime *fwrt, int chain, - int subband) -{ - s8 ppag_val = fwrt->ppag_chains[chain].subbands[subband]; - - if ((subband == 0 && - (ppag_val > IWL_PPAG_MAX_LB || ppag_val < IWL_PPAG_MIN_LB)) || - (subband != 0 && - (ppag_val > IWL_PPAG_MAX_HB || ppag_val < IWL_PPAG_MIN_HB))) { - IWL_DEBUG_RADIO(fwrt, "Invalid PPAG value: %d\n", ppag_val); - return false; - } - return true; -} - -/* Utility function for iwlmvm and iwlxvt */ -int iwl_fill_ppag_table(struct iwl_fw_runtime *fwrt, - union iwl_ppag_table_cmd *cmd, int *cmd_size) -{ - u8 cmd_ver; - int i, j, num_sub_bands; - s8 *gain; - bool send_ppag_always; - - /* many firmware images for JF lie about this */ - if (CSR_HW_RFID_TYPE(fwrt->trans->info.hw_rf_id) == - CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_JF)) - return -EOPNOTSUPP; - - if (!fw_has_capa(&fwrt->fw->ucode_capa, IWL_UCODE_TLV_CAPA_SET_PPAG)) { - IWL_DEBUG_RADIO(fwrt, - "PPAG capability not supported by FW, command not sent.\n"); - return -EINVAL; - } - - cmd_ver = iwl_fw_lookup_cmd_ver(fwrt->fw, - WIDE_ID(PHY_OPS_GROUP, - PER_PLATFORM_ANT_GAIN_CMD), 1); - /* - * Starting from ver 4, driver needs to send the PPAG CMD regardless - * if PPAG is enabled/disabled or valid/invalid. - */ - send_ppag_always = cmd_ver > 3; - - /* Don't send PPAG if it is disabled */ - if (!send_ppag_always && !fwrt->ppag_flags) { - IWL_DEBUG_RADIO(fwrt, "PPAG not enabled, command not sent.\n"); - return -EINVAL; - } - - IWL_DEBUG_RADIO(fwrt, "PPAG cmd ver is %d\n", cmd_ver); - if (cmd_ver == 1) { - num_sub_bands = IWL_NUM_SUB_BANDS_V1; - gain = cmd->v1.gain[0]; - *cmd_size = sizeof(cmd->v1); - cmd->v1.flags = cpu_to_le32(fwrt->ppag_flags & IWL_PPAG_CMD_V1_MASK); - if (fwrt->ppag_bios_rev >= 1) { - /* in this case FW supports revision 0 */ - IWL_DEBUG_RADIO(fwrt, - "PPAG table rev is %d, send truncated table\n", - fwrt->ppag_bios_rev); - } - } else if (cmd_ver == 5) { - num_sub_bands = IWL_NUM_SUB_BANDS_V2; - gain = cmd->v5.gain[0]; - *cmd_size = sizeof(cmd->v5); - cmd->v5.flags = cpu_to_le32(fwrt->ppag_flags & IWL_PPAG_CMD_V5_MASK); - if (fwrt->ppag_bios_rev == 0) { - /* in this case FW supports revisions 1,2 or 3 */ - IWL_DEBUG_RADIO(fwrt, - "PPAG table rev is 0, send padded table\n"); - } - } else if (cmd_ver == 7) { - num_sub_bands = IWL_NUM_SUB_BANDS_V2; - gain = cmd->v7.gain[0]; - *cmd_size = sizeof(cmd->v7); - cmd->v7.ppag_config_info.hdr.table_source = - fwrt->ppag_bios_source; - cmd->v7.ppag_config_info.hdr.table_revision = - fwrt->ppag_bios_rev; - cmd->v7.ppag_config_info.value = cpu_to_le32(fwrt->ppag_flags); - } else { - IWL_DEBUG_RADIO(fwrt, "Unsupported PPAG command version\n"); - return -EINVAL; - } - - /* ppag mode */ - IWL_DEBUG_RADIO(fwrt, - "PPAG MODE bits were read from bios: %d\n", - fwrt->ppag_flags); - - if (cmd_ver == 1 && - !fw_has_capa(&fwrt->fw->ucode_capa, - IWL_UCODE_TLV_CAPA_PPAG_CHINA_BIOS_SUPPORT)) { - cmd->v1.flags &= cpu_to_le32(IWL_PPAG_ETSI_MASK); - IWL_DEBUG_RADIO(fwrt, "masking ppag China bit\n"); - } else { - IWL_DEBUG_RADIO(fwrt, "isn't masking ppag China bit\n"); - } - - /* The 'flags' field is the same in v1 and v5 so we can just - * use v1 to access it. - */ - IWL_DEBUG_RADIO(fwrt, - "PPAG MODE bits going to be sent: %d\n", - (cmd_ver < 7) ? le32_to_cpu(cmd->v1.flags) : - le32_to_cpu(cmd->v7.ppag_config_info.value)); - - for (i = 0; i < IWL_NUM_CHAIN_LIMITS; i++) { - for (j = 0; j < num_sub_bands; j++) { - if (!send_ppag_always && - !iwl_ppag_value_valid(fwrt, i, j)) - return -EINVAL; - - gain[i * num_sub_bands + j] = - fwrt->ppag_chains[i].subbands[j]; - IWL_DEBUG_RADIO(fwrt, - "PPAG table: chain[%d] band[%d]: gain = %d\n", - i, j, gain[i * num_sub_bands + j]); - } - } - - return 0; -} -IWL_EXPORT_SYMBOL(iwl_fill_ppag_table); - bool iwl_is_ppag_approved(struct iwl_fw_runtime *fwrt) { if (!dmi_check_system(dmi_ppag_approved_list)) { diff --git a/drivers/net/wireless/intel/iwlwifi/fw/regulatory.h b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.h index 1489031687b7..8e04b0e2d507 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/regulatory.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.h @@ -190,10 +190,6 @@ int iwl_sar_fill_profile(struct iwl_fw_runtime *fwrt, __le16 *per_chain, u32 n_tables, u32 n_subbands, int prof_a, int prof_b); -int iwl_fill_ppag_table(struct iwl_fw_runtime *fwrt, - union iwl_ppag_table_cmd *cmd, - int *cmd_size); - bool iwl_is_ppag_approved(struct iwl_fw_runtime *fwrt); bool iwl_is_tas_approved(void); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index f5e5c10cc581..d46715abd7a5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -1034,12 +1034,139 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm) return iwl_mvm_send_cmd_pdu(mvm, cmd_id, 0, len, &cmd); } +static bool iwl_mvm_ppag_value_valid(struct iwl_fw_runtime *fwrt, int chain, + int subband) +{ + s8 ppag_val = fwrt->ppag_chains[chain].subbands[subband]; + + if ((subband == 0 && + (ppag_val > IWL_PPAG_MAX_LB || ppag_val < IWL_PPAG_MIN_LB)) || + (subband != 0 && + (ppag_val > IWL_PPAG_MAX_HB || ppag_val < IWL_PPAG_MIN_HB))) { + IWL_DEBUG_RADIO(fwrt, "Invalid PPAG value: %d\n", ppag_val); + return false; + } + return true; +} + +static int iwl_mvm_fill_ppag_table(struct iwl_fw_runtime *fwrt, + union iwl_ppag_table_cmd *cmd, + int *cmd_size) +{ + u8 cmd_ver; + int i, j, num_sub_bands; + s8 *gain; + bool send_ppag_always; + + /* many firmware images for JF lie about this */ + if (CSR_HW_RFID_TYPE(fwrt->trans->info.hw_rf_id) == + CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_JF)) + return -EOPNOTSUPP; + + if (!fw_has_capa(&fwrt->fw->ucode_capa, IWL_UCODE_TLV_CAPA_SET_PPAG)) { + IWL_DEBUG_RADIO(fwrt, + "PPAG capability not supported by FW, command not sent.\n"); + return -EINVAL; + } + + cmd_ver = iwl_fw_lookup_cmd_ver(fwrt->fw, + WIDE_ID(PHY_OPS_GROUP, + PER_PLATFORM_ANT_GAIN_CMD), 1); + /* + * Starting from ver 4, driver needs to send the PPAG CMD regardless + * if PPAG is enabled/disabled or valid/invalid. + */ + send_ppag_always = cmd_ver > 3; + + /* Don't send PPAG if it is disabled */ + if (!send_ppag_always && !fwrt->ppag_flags) { + IWL_DEBUG_RADIO(fwrt, "PPAG not enabled, command not sent.\n"); + return -EINVAL; + } + + IWL_DEBUG_RADIO(fwrt, "PPAG cmd ver is %d\n", cmd_ver); + if (cmd_ver == 1) { + num_sub_bands = IWL_NUM_SUB_BANDS_V1; + gain = cmd->v1.gain[0]; + *cmd_size = sizeof(cmd->v1); + cmd->v1.flags = + cpu_to_le32(fwrt->ppag_flags & IWL_PPAG_CMD_V1_MASK); + if (fwrt->ppag_bios_rev >= 1) { + /* in this case FW supports revision 0 */ + IWL_DEBUG_RADIO(fwrt, + "PPAG table rev is %d, send truncated table\n", + fwrt->ppag_bios_rev); + } + } else if (cmd_ver == 5) { + num_sub_bands = IWL_NUM_SUB_BANDS_V2; + gain = cmd->v5.gain[0]; + *cmd_size = sizeof(cmd->v5); + cmd->v5.flags = + cpu_to_le32(fwrt->ppag_flags & IWL_PPAG_CMD_V5_MASK); + if (fwrt->ppag_bios_rev == 0) { + /* in this case FW supports revisions 1,2 or 3 */ + IWL_DEBUG_RADIO(fwrt, + "PPAG table rev is 0, send padded table\n"); + } + } else if (cmd_ver == 7) { + num_sub_bands = IWL_NUM_SUB_BANDS_V2; + gain = cmd->v7.gain[0]; + *cmd_size = sizeof(cmd->v7); + cmd->v7.ppag_config_info.hdr.table_source = + fwrt->ppag_bios_source; + cmd->v7.ppag_config_info.hdr.table_revision = + fwrt->ppag_bios_rev; + cmd->v7.ppag_config_info.value = cpu_to_le32(fwrt->ppag_flags); + } else { + IWL_DEBUG_RADIO(fwrt, "Unsupported PPAG command version\n"); + return -EINVAL; + } + + /* ppag mode */ + IWL_DEBUG_RADIO(fwrt, + "PPAG MODE bits were read from bios: %d\n", + fwrt->ppag_flags); + + if (cmd_ver == 1 && + !fw_has_capa(&fwrt->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_PPAG_CHINA_BIOS_SUPPORT)) { + cmd->v1.flags &= cpu_to_le32(IWL_PPAG_ETSI_MASK); + IWL_DEBUG_RADIO(fwrt, "masking ppag China bit\n"); + } else { + IWL_DEBUG_RADIO(fwrt, "isn't masking ppag China bit\n"); + } + + /* The 'flags' field is the same in v1 and v5 so we can just + * use v1 to access it. + */ + IWL_DEBUG_RADIO(fwrt, + "PPAG MODE bits going to be sent: %d\n", + (cmd_ver < 7) ? le32_to_cpu(cmd->v1.flags) : + le32_to_cpu(cmd->v7.ppag_config_info.value)); + + for (i = 0; i < IWL_NUM_CHAIN_LIMITS; i++) { + for (j = 0; j < num_sub_bands; j++) { + if (!send_ppag_always && + !iwl_mvm_ppag_value_valid(fwrt, i, j)) + return -EINVAL; + + gain[i * num_sub_bands + j] = + fwrt->ppag_chains[i].subbands[j]; + IWL_DEBUG_RADIO(fwrt, + "PPAG table: chain[%d] band[%d]: gain = %d\n", + i, j, gain[i * num_sub_bands + j]); + } + } + + return 0; +} + int iwl_mvm_ppag_send_cmd(struct iwl_mvm *mvm) { union iwl_ppag_table_cmd cmd; int ret, cmd_size; - ret = iwl_fill_ppag_table(&mvm->fwrt, &cmd, &cmd_size); + ret = iwl_mvm_fill_ppag_table(&mvm->fwrt, &cmd, &cmd_size); /* Not supporting PPAG table is a valid scenario */ if (ret < 0) return 0; From 99d602b2d7906dac4322ad3e50615931a3180d9a Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 19 Mar 2026 11:09:23 +0200 Subject: [PATCH 0879/1561] wifi: iwlwifi: mld: add support for sta command version 3 In this version, the link_id becomes a link_mask to support multiple links that are used to communicate with the station in question. This is needed for NAN, in which we can communicate on multiple channels with the same station. Also add a new STA type - NAN peer. Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319110722.382a30bd1b70.Id6271e7eba233a11dc214ed2e07c2b186b167c66@changeid --- .../wireless/intel/iwlwifi/fw/api/mac-cfg.h | 98 ++++++++++++++++++- drivers/net/wireless/intel/iwlwifi/mld/sta.c | 42 ++++++-- .../net/wireless/intel/iwlwifi/mvm/mld-sta.c | 6 +- 3 files changed, 132 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h index c7a833f8041a..2e3f437686b9 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h @@ -42,7 +42,8 @@ enum iwl_mac_conf_subcmd_ids { */ LINK_CONFIG_CMD = 0x9, /** - * @STA_CONFIG_CMD: &struct iwl_sta_cfg_cmd + * @STA_CONFIG_CMD: &struct iwl_sta_cfg_cmd_v1, + * &struct iwl_sta_cfg_cmd_v2, or &struct iwl_sta_cfg_cmd */ STA_CONFIG_CMD = 0xA, /** @@ -662,6 +663,13 @@ struct iwl_link_config_cmd { * @STATION_TYPE_MCAST: the station used for BCAST / MCAST in GO. Will be * suspended / resumed at the right timing depending on the clients' * power save state and the DTIM timing + * @STATION_TYPE_NAN_PEER_NMI: NAN management peer station type. A station + * of this type can have any number of links (even none) set in the + * link_mask. (Supported since version 3.) + * @STATION_TYPE_NAN_PEER_NDI: NAN data peer station type. A station + * of this type can have any number of links (even none) set in the + * link_mask. (Supported since version 3.) + * @STATION_TYPE_MAX: maximum number of FW station types * @STATION_TYPE_AUX: aux sta. In the FW there is no need for a special type * for the aux sta, so this type is only for driver - internal use. */ @@ -669,8 +677,11 @@ enum iwl_fw_sta_type { STATION_TYPE_PEER, STATION_TYPE_BCAST_MGMT, STATION_TYPE_MCAST, - STATION_TYPE_AUX, -}; /* STATION_TYPE_E_VER_1 */ + STATION_TYPE_NAN_PEER_NMI, + STATION_TYPE_NAN_PEER_NDI, + STATION_TYPE_MAX, + STATION_TYPE_AUX = STATION_TYPE_MAX /* this doesn't exist in FW */ +}; /* STATION_TYPE_E_VER_1, _VER_2 */ /** * struct iwl_sta_cfg_cmd_v1 - cmd structure to add a peer sta to the uCode's @@ -729,7 +740,7 @@ struct iwl_sta_cfg_cmd_v1 { } __packed; /* STA_CMD_API_S_VER_1 */ /** - * struct iwl_sta_cfg_cmd - cmd structure to add a peer sta to the uCode's + * struct iwl_sta_cfg_cmd_v2 - cmd structure to add a peer sta to the uCode's * station table * ( STA_CONFIG_CMD = 0xA ) * @@ -769,7 +780,7 @@ struct iwl_sta_cfg_cmd_v1 { * @mic_compute_pad_delay: MIC compute time padding * @reserved: Reserved for alignment */ -struct iwl_sta_cfg_cmd { +struct iwl_sta_cfg_cmd_v2 { __le32 sta_id; __le32 link_id; u8 peer_mld_address[ETH_ALEN]; @@ -799,6 +810,83 @@ struct iwl_sta_cfg_cmd { u8 reserved[2]; } __packed; /* STA_CMD_API_S_VER_2 */ +/** + * struct iwl_sta_cfg_cmd - cmd structure to add a peer sta to the uCode's + * station table + * ( STA_CONFIG_CMD = 0xA ) + * + * @sta_id: index of station in uCode's station table + * @link_mask: bitmap of link FW IDs used with this STA + * @peer_mld_address: the peers mld address + * @reserved_for_peer_mld_address: reserved + * @peer_link_address: the address of the link that is used to communicate + * with this sta + * @reserved_for_peer_link_address: reserved + * @station_type: type of this station. See &enum iwl_fw_sta_type + * @assoc_id: for GO only + * @beamform_flags: beam forming controls + * @mfp: indicates whether the STA uses management frame protection or not. + * @mimo: indicates whether the sta uses mimo or not + * @mimo_protection: indicates whether the sta uses mimo protection or not + * @ack_enabled: indicates that the AP supports receiving ACK- + * enabled AGG, i.e. both BACK and non-BACK frames in a single AGG + * @trig_rnd_alloc: indicates that trigger based random allocation + * is enabled according to UORA element existence + * @tx_ampdu_spacing: minimum A-MPDU spacing: + * 4 - 2us density, 5 - 4us density, 6 - 8us density, 7 - 16us density + * @tx_ampdu_max_size: maximum A-MPDU length: 0 - 8K, 1 - 16K, 2 - 32K, + * 3 - 64K, 4 - 128K, 5 - 256K, 6 - 512K, 7 - 1024K. + * @sp_length: the size of the SP in actual number of frames + * @uapsd_acs: 4 LS bits are trigger enabled ACs, 4 MS bits are the deliver + * enabled ACs. + * @pkt_ext: optional, exists according to PPE-present bit in the HE/EHT-PHY + * capa + * @htc_flags: which features are supported in HTC + * @use_ldpc_x2_cw: Indicates whether to use LDPC with double CW + * @use_icf: Indicates whether to use ICF instead of RTS + * @dps_pad_time: DPS (Dynamic Power Save) padding delay resolution to ensure + * proper timing alignment + * @dps_trans_delay: DPS minimal time that takes the peer to return to low power + * @dps_enabled: flag indicating whether or not DPS is enabled + * @mic_prep_pad_delay: MIC prep time padding + * @mic_compute_pad_delay: MIC compute time padding + * @nmi_sta_id: for an NDI peer STA, the NMI peer STA ID it relates to + * @ndi_local_addr: for an NDI peer STA, the local NDI interface MAC address + * @reserved: Reserved for alignment + */ +struct iwl_sta_cfg_cmd { + __le32 sta_id; + __le32 link_mask; + u8 peer_mld_address[ETH_ALEN]; + __le16 reserved_for_peer_mld_address; + u8 peer_link_address[ETH_ALEN]; + __le16 reserved_for_peer_link_address; + __le32 station_type; + __le32 assoc_id; + __le32 beamform_flags; + __le32 mfp; + __le32 mimo; + __le32 mimo_protection; + __le32 ack_enabled; + __le32 trig_rnd_alloc; + __le32 tx_ampdu_spacing; + __le32 tx_ampdu_max_size; + __le32 sp_length; + __le32 uapsd_acs; + struct iwl_he_pkt_ext_v2 pkt_ext; + __le32 htc_flags; + u8 use_ldpc_x2_cw; + u8 use_icf; + u8 dps_pad_time; + u8 dps_trans_delay; + u8 dps_enabled; + u8 mic_prep_pad_delay; + u8 mic_compute_pad_delay; + u8 nmi_sta_id; + u8 ndi_local_addr[ETH_ALEN]; + u8 reserved[2]; +} __packed; /* STA_CMD_API_S_VER_3 */ + /** * struct iwl_aux_sta_cmd - command for AUX STA configuration * ( AUX_STA_CMD = 0xB ) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/sta.c b/drivers/net/wireless/intel/iwlwifi/mld/sta.c index 6b7a89e050e6..f40c49377466 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/sta.c @@ -398,12 +398,42 @@ static u32 iwl_mld_get_htc_flags(struct ieee80211_link_sta *link_sta) return htc_flags; } +/* Note: modifies the command depending on FW command version */ static int iwl_mld_send_sta_cmd(struct iwl_mld *mld, - const struct iwl_sta_cfg_cmd *cmd) + struct iwl_sta_cfg_cmd *cmd) { - int ret = iwl_mld_send_cmd_pdu(mld, - WIDE_ID(MAC_CONF_GROUP, STA_CONFIG_CMD), - cmd); + int cmd_id = WIDE_ID(MAC_CONF_GROUP, STA_CONFIG_CMD); + int cmd_ver = iwl_fw_lookup_cmd_ver(mld->fw, cmd_id, 0); + int len = sizeof(*cmd); + int ret; + + if (cmd_ver < 2) { + IWL_ERR(mld, "Unsupported STA_CONFIG_CMD version %d\n", + cmd_ver); + return -EINVAL; + } else if (cmd_ver == 2) { + struct iwl_sta_cfg_cmd_v2 *cmd_v2 = (void *)cmd; + + if (WARN_ON(cmd->station_type == cpu_to_le32(STATION_TYPE_NAN_PEER_NMI) || + cmd->station_type == cpu_to_le32(STATION_TYPE_NAN_PEER_NDI) || + hweight32(le32_to_cpu(cmd->link_mask)) != 1)) + return -EINVAL; + /* + * These fields are located in a different place in the struct of v2. + * The assumption is that UHR won't be used with FW that has v2. + */ + if (WARN_ON(cmd->mic_prep_pad_delay || cmd->mic_compute_pad_delay)) + return -EINVAL; + + len = sizeof(struct iwl_sta_cfg_cmd_v2); + cmd_v2->link_id = cpu_to_le32(__ffs(le32_to_cpu(cmd->link_mask))); + } else if (WARN_ON(cmd->station_type != cpu_to_le32(STATION_TYPE_NAN_PEER_NMI) && + cmd->station_type != cpu_to_le32(STATION_TYPE_NAN_PEER_NDI) && + hweight32(le32_to_cpu(cmd->link_mask)) != 1)) { + return -EINVAL; + } + + ret = iwl_mld_send_cmd_pdu(mld, cmd_id, cmd, len); if (ret) IWL_ERR(mld, "STA_CONFIG_CMD send failed, ret=0x%x\n", ret); return ret; @@ -431,8 +461,8 @@ iwl_mld_add_modify_sta_cmd(struct iwl_mld *mld, return -EINVAL; cmd.sta_id = cpu_to_le32(fw_id); + cmd.link_mask = cpu_to_le32(BIT(mld_link->fw_id)); cmd.station_type = cpu_to_le32(mld_sta->sta_type); - cmd.link_id = cpu_to_le32(mld_link->fw_id); memcpy(&cmd.peer_mld_address, sta->addr, ETH_ALEN); memcpy(&cmd.peer_link_address, link_sta->addr, ETH_ALEN); @@ -982,7 +1012,7 @@ iwl_mld_add_internal_sta_to_fw(struct iwl_mld *mld, return iwl_mld_send_aux_sta_cmd(mld, internal_sta); cmd.sta_id = cpu_to_le32((u8)internal_sta->sta_id); - cmd.link_id = cpu_to_le32(fw_link_id); + cmd.link_mask = cpu_to_le32(BIT(fw_link_id)); cmd.station_type = cpu_to_le32(internal_sta->sta_type); /* FW doesn't allow to add a IGTK/BIGTK if the sta isn't marked as MFP. diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c index 44e16ee9514e..da7ed4639a93 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c @@ -20,7 +20,7 @@ u32 iwl_mvm_sta_fw_id_mask(struct iwl_mvm *mvm, struct ieee80211_sta *sta, } static int iwl_mvm_mld_send_sta_cmd(struct iwl_mvm *mvm, - struct iwl_sta_cfg_cmd *cmd) + struct iwl_sta_cfg_cmd_v2 *cmd) { u32 cmd_id = WIDE_ID(MAC_CONF_GROUP, STA_CONFIG_CMD); int cmd_len = iwl_fw_lookup_cmd_ver(mvm->fw, cmd_id, 0) > 1 ? @@ -41,7 +41,7 @@ static int iwl_mvm_mld_add_int_sta_to_fw(struct iwl_mvm *mvm, struct iwl_mvm_int_sta *sta, const u8 *addr, int link_id) { - struct iwl_sta_cfg_cmd cmd; + struct iwl_sta_cfg_cmd_v2 cmd; lockdep_assert_held(&mvm->mutex); @@ -416,7 +416,7 @@ static int iwl_mvm_mld_cfg_sta(struct iwl_mvm *mvm, struct ieee80211_sta *sta, struct iwl_mvm_vif *mvm_vif = iwl_mvm_vif_from_mac80211(vif); struct iwl_mvm_vif_link_info *link_info = mvm_vif->link[link_conf->link_id]; - struct iwl_sta_cfg_cmd cmd = { + struct iwl_sta_cfg_cmd_v2 cmd = { .sta_id = cpu_to_le32(mvm_link_sta->sta_id), .station_type = cpu_to_le32(mvm_sta->sta_type), }; From 442c707c3c6663e7b3185716a3464dc99e72b727 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 11:09:25 +0200 Subject: [PATCH 0880/1561] wifi: iwlwifi: regulatory: support a new command for PPAG Per Platform Antenna Gain is getting support for UNII-9. Add a new version of PER_PLATFORM_ANT_GAIN_CMD. This requires to increase the number of subbands in the firmware runtime object. Pass the number of subbands to iwl_bios_print_ppag to avoid printing invalid values. Introduce BIOS_PPAG_MAX_SUB_BANDS_NUM to avoid impacting BIOS_SAR_MAX_SUB_BANDS_NUM which was used until now for PPAG as well. SAR will get support for the new subband in future patches. While at it, print the PPAG table as it was read from BIOS. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319110722.2e577236d3c9.I042697a73893d79ef761796354b5d1dd8522f734@changeid --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 1 + .../net/wireless/intel/iwlwifi/fw/api/power.h | 8 +++ .../wireless/intel/iwlwifi/fw/regulatory.c | 21 ++++++ .../wireless/intel/iwlwifi/fw/regulatory.h | 6 +- drivers/net/wireless/intel/iwlwifi/fw/uefi.c | 1 + .../wireless/intel/iwlwifi/mld/regulatory.c | 66 +++++++++++++++---- 6 files changed, 91 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index b64abb8439b7..d00191e84f20 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -981,6 +981,7 @@ read_table: } } + iwl_bios_print_ppag(fwrt, num_sub_bands); fwrt->ppag_bios_source = BIOS_SOURCE_ACPI; ret = 0; diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/power.h b/drivers/net/wireless/intel/iwlwifi/fw/api/power.h index 0cd8a12e0f7c..118c08f95649 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/power.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/power.h @@ -269,6 +269,7 @@ enum iwl_dev_tx_power_cmd_mode { #define IWL_NUM_CHAIN_LIMITS 2 #define IWL_NUM_SUB_BANDS_V1 5 #define IWL_NUM_SUB_BANDS_V2 11 +#define IWL_NUM_SUB_BANDS_V3 12 /** * struct iwl_dev_tx_power_common - Common part of the TX power reduction cmd @@ -573,6 +574,7 @@ enum iwl_ppag_flags { * @v1: command version 1 structure. * @v5: command version 5 structure. * @v7: command version 7 structure. + * @v8: command version 8 structure. * @v1.flags: values from &enum iwl_ppag_flags * @v1.gain: table of antenna gain values per chain and sub-band * @v1.reserved: reserved @@ -581,6 +583,8 @@ enum iwl_ppag_flags { * @v7.ppag_config_info: see @struct bios_value_u32 * @v7.gain: table of antenna gain values per chain and sub-band * @v7.reserved: reserved + * @v8.ppag_config_info: see @struct bios_value_u32 + * @v8.gain: table of antenna gain values per chain and sub-band */ union iwl_ppag_table_cmd { struct { @@ -598,6 +602,10 @@ union iwl_ppag_table_cmd { s8 gain[IWL_NUM_CHAIN_LIMITS][IWL_NUM_SUB_BANDS_V2]; s8 reserved[2]; } __packed v7; /* PER_PLAT_ANTENNA_GAIN_CMD_API_S_VER_7 */ + struct { + struct bios_value_u32 ppag_config_info; + s8 gain[IWL_NUM_CHAIN_LIMITS][IWL_NUM_SUB_BANDS_V3]; + } __packed v8; /* PER_PLAT_ANTENNA_GAIN_CMD_API_S_VER_8 */ } __packed; #define IWL_PPAG_CMD_V1_MASK (IWL_PPAG_ETSI_MASK | IWL_PPAG_CHINA_MASK) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c index 9e834cc1b054..55128caac7ed 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.c @@ -318,6 +318,27 @@ bool iwl_is_ppag_approved(struct iwl_fw_runtime *fwrt) } IWL_EXPORT_SYMBOL(iwl_is_ppag_approved); +/* Print the PPAG table as read from BIOS */ +void iwl_bios_print_ppag(struct iwl_fw_runtime *fwrt, int n_subbands) +{ + int i, j; + + IWL_DEBUG_RADIO(fwrt, "PPAG table as read from BIOS:\n"); + IWL_DEBUG_RADIO(fwrt, "PPAG revision = %d\n", fwrt->ppag_bios_rev); + IWL_DEBUG_RADIO(fwrt, "PPAG flags = 0x%x\n", fwrt->ppag_flags); + + if (WARN_ON_ONCE(n_subbands > + ARRAY_SIZE(fwrt->ppag_chains[0].subbands))) + return; + + for (i = 0; i < ARRAY_SIZE(fwrt->ppag_chains); i++) + for (j = 0; j < n_subbands; j++) + IWL_DEBUG_RADIO(fwrt, + "ppag_chains[%d].subbands[%d] = %d\n", + i, j, + fwrt->ppag_chains[i].subbands[j]); +} + bool iwl_is_tas_approved(void) { return dmi_check_system(dmi_tas_approved_list); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/regulatory.h b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.h index 8e04b0e2d507..446c8a2c4f9d 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/regulatory.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.h @@ -22,6 +22,7 @@ #define BIOS_SAR_MAX_CHAINS_PER_PROFILE 4 #define BIOS_SAR_NUM_CHAINS 2 #define BIOS_SAR_MAX_SUB_BANDS_NUM 11 +#define BIOS_PPAG_MAX_SUB_BANDS_NUM 12 #define BIOS_GEO_NUM_CHAINS 2 #define BIOS_GEO_MAX_NUM_BANDS 3 @@ -100,7 +101,7 @@ struct iwl_geo_profile { /* Same thing as with SAR, all revisions fit in revision 2 */ struct iwl_ppag_chain { - s8 subbands[BIOS_SAR_MAX_SUB_BANDS_NUM]; + s8 subbands[BIOS_PPAG_MAX_SUB_BANDS_NUM]; }; struct iwl_tas_data { @@ -180,6 +181,9 @@ enum iwl_dsm_masks_reg { struct iwl_fw_runtime; +/* Print the PPAG table as read from BIOS */ +void iwl_bios_print_ppag(struct iwl_fw_runtime *fwrt, int n_subbands); + bool iwl_sar_geo_support(struct iwl_fw_runtime *fwrt); int iwl_sar_geo_fill_table(struct iwl_fw_runtime *fwrt, diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c index 38f9d9adf90e..fba41976be6b 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c @@ -607,6 +607,7 @@ int iwl_uefi_get_ppag_table(struct iwl_fw_runtime *fwrt) data->vals[chain * UEFI_PPAG_SUB_BANDS_NUM + subband]; } + iwl_bios_print_ppag(fwrt, UEFI_PPAG_SUB_BANDS_NUM); fwrt->ppag_bios_source = BIOS_SOURCE_UEFI; out: kfree(data); diff --git a/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c b/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c index d1a55b565898..27059ec93847 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c @@ -166,30 +166,74 @@ static int iwl_mld_ppag_send_cmd(struct iwl_mld *mld) { struct iwl_fw_runtime *fwrt = &mld->fwrt; union iwl_ppag_table_cmd cmd = { - .v7.ppag_config_info.hdr.table_source = fwrt->ppag_bios_source, - .v7.ppag_config_info.hdr.table_revision = fwrt->ppag_bios_rev, - .v7.ppag_config_info.value = cpu_to_le32(fwrt->ppag_flags), + /* v7 and v8 have the same layout for the ppag_config_info */ + .v8.ppag_config_info.hdr.table_source = fwrt->ppag_bios_source, + .v8.ppag_config_info.hdr.table_revision = fwrt->ppag_bios_rev, + .v8.ppag_config_info.value = cpu_to_le32(fwrt->ppag_flags), }; + int cmd_ver = + iwl_fw_lookup_cmd_ver(mld->fw, + WIDE_ID(PHY_OPS_GROUP, + PER_PLATFORM_ANT_GAIN_CMD), 1); + int cmd_len = sizeof(cmd.v8); int ret; + BUILD_BUG_ON(offsetof(typeof(cmd), v8.ppag_config_info.hdr) != + offsetof(typeof(cmd), v7.ppag_config_info.hdr)); + BUILD_BUG_ON(offsetof(typeof(cmd), v8.gain) != + offsetof(typeof(cmd), v7.gain)); + + BUILD_BUG_ON(ARRAY_SIZE(cmd.v7.gain) > ARRAY_SIZE(fwrt->ppag_chains)); + BUILD_BUG_ON(ARRAY_SIZE(cmd.v7.gain[0]) > + ARRAY_SIZE(fwrt->ppag_chains[0].subbands)); + BUILD_BUG_ON(ARRAY_SIZE(cmd.v8.gain) > ARRAY_SIZE(fwrt->ppag_chains)); + BUILD_BUG_ON(ARRAY_SIZE(cmd.v8.gain[0]) > + ARRAY_SIZE(fwrt->ppag_chains[0].subbands)); + IWL_DEBUG_RADIO(fwrt, "PPAG MODE bits going to be sent: %d\n", fwrt->ppag_flags); - for (int chain = 0; chain < IWL_NUM_CHAIN_LIMITS; chain++) { - for (int subband = 0; subband < IWL_NUM_SUB_BANDS_V2; subband++) { - cmd.v7.gain[chain][subband] = - fwrt->ppag_chains[chain].subbands[subband]; - IWL_DEBUG_RADIO(fwrt, - "PPAG table: chain[%d] band[%d]: gain = %d\n", - chain, subband, cmd.v7.gain[chain][subband]); + /* Since ver 7 will be deprecated at some point, don't bother making + * this code generic for both ver 7 and ver 8: duplicate the code. + */ + if (cmd_ver == 7) { + for (int chain = 0; chain < ARRAY_SIZE(cmd.v7.gain); chain++) { + for (int subband = 0; + subband < ARRAY_SIZE(cmd.v7.gain[0]); + subband++) { + cmd.v7.gain[chain][subband] = + fwrt->ppag_chains[chain].subbands[subband]; + IWL_DEBUG_RADIO(fwrt, + "PPAG table: chain[%d] band[%d]: gain = %d\n", + chain, subband, + cmd.v7.gain[chain][subband]); + } } + cmd_len = sizeof(cmd.v7); + } else if (cmd_ver == 8) { + for (int chain = 0; chain < ARRAY_SIZE(cmd.v8.gain); chain++) { + for (int subband = 0; + subband < ARRAY_SIZE(cmd.v8.gain[0]); + subband++) { + cmd.v8.gain[chain][subband] = + fwrt->ppag_chains[chain].subbands[subband]; + IWL_DEBUG_RADIO(fwrt, + "PPAG table: chain[%d] band[%d]: gain = %d\n", + chain, subband, + cmd.v8.gain[chain][subband]); + } + } + } else { + WARN(1, "Bad version for PER_PLATFORM_ANT_GAIN_CMD %d\n", + cmd_ver); + return -EINVAL; } IWL_DEBUG_RADIO(mld, "Sending PER_PLATFORM_ANT_GAIN_CMD\n"); ret = iwl_mld_send_cmd_pdu(mld, WIDE_ID(PHY_OPS_GROUP, PER_PLATFORM_ANT_GAIN_CMD), - &cmd, sizeof(cmd.v7)); + &cmd, cmd_len); if (ret < 0) IWL_ERR(mld, "failed to send PER_PLATFORM_ANT_GAIN_CMD (%d)\n", ret); From feb27e5abb72fe8fc0a4e6e672374f4f1cd46ecc Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 11:09:26 +0200 Subject: [PATCH 0881/1561] wifi: iwlwifi: acpi: check the size of the ACPI PPAG tables We need to make sure we don't have a buffer overflow while reading the PPAG tables from ACPI into the firmware runtime object. Add an ACPI specific define for the number of chains in order to decouple the ACPI layout from the other objects. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319110722.451808698662.I91234c8a662608674679ce490b51be792332cd43@changeid --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 11 ++++++++++- drivers/net/wireless/intel/iwlwifi/fw/acpi.h | 5 +++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index d00191e84f20..de30799519cd 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -951,6 +951,15 @@ int iwl_acpi_get_ppag_table(struct iwl_fw_runtime *fwrt) goto out_free; read_table: + if (WARN_ON_ONCE(num_sub_bands > + ARRAY_SIZE(fwrt->ppag_chains[0].subbands))) { + ret = -EINVAL; + goto out_free; + } + + BUILD_BUG_ON(ACPI_PPAG_NUM_CHAINS > + ARRAY_SIZE(fwrt->ppag_chains)); + fwrt->ppag_bios_rev = tbl_rev; flags = &wifi_pkg->package.elements[1]; @@ -967,7 +976,7 @@ read_table: * first sub-band (j=0) corresponds to Low-Band (2.4GHz), and the * following sub-bands to High-Band (5GHz). */ - for (i = 0; i < IWL_NUM_CHAIN_LIMITS; i++) { + for (i = 0; i < ACPI_PPAG_NUM_CHAINS; i++) { for (j = 0; j < num_sub_bands; j++) { union acpi_object *ent; diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h index 06cece4ea6d9..c34dc17ff608 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h @@ -96,9 +96,10 @@ */ #define ACPI_WTAS_WIFI_DATA_SIZE (3 + IWL_WTAS_BLACK_LIST_MAX) -#define ACPI_PPAG_WIFI_DATA_SIZE_V1 ((IWL_NUM_CHAIN_LIMITS * \ +#define ACPI_PPAG_NUM_CHAINS 2 +#define ACPI_PPAG_WIFI_DATA_SIZE_V1 ((ACPI_PPAG_NUM_CHAINS * \ IWL_NUM_SUB_BANDS_V1) + 2) -#define ACPI_PPAG_WIFI_DATA_SIZE_V2 ((IWL_NUM_CHAIN_LIMITS * \ +#define ACPI_PPAG_WIFI_DATA_SIZE_V2 ((ACPI_PPAG_NUM_CHAINS * \ IWL_NUM_SUB_BANDS_V2) + 2) #define IWL_SAR_ENABLE_MSK BIT(0) From f90c8ea27cbe0522e8cd979a32ab8488298e98e6 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 11:09:27 +0200 Subject: [PATCH 0882/1561] wifi: iwlwifi: acpi: add support for PPAG rev5 This adds support for UNII9 which requires to add a subband. Just increase the number of subbands that we need to read. Replace the usage of the IWL_NUM_SUB_BANDS_VX macros in acpi.h since those macros are defined in the firmware API and ACPI declarations have nothing to do the firmware API. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319110722.a5e7f805d0f2.I0e3ee3258b77b339234692ceccf0d25d1e6dd67e@changeid --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 16 ++++++++++++++++ drivers/net/wireless/intel/iwlwifi/fw/acpi.h | 16 +++++++++------- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index de30799519cd..4d0a93832336 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -916,6 +916,22 @@ int iwl_acpi_get_ppag_table(struct iwl_fw_runtime *fwrt) if (IS_ERR(data)) return PTR_ERR(data); + /* try to read ppag table rev 5 */ + wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data, + ACPI_PPAG_WIFI_DATA_SIZE_V3, &tbl_rev); + if (!IS_ERR(wifi_pkg)) { + if (tbl_rev == 5) { + num_sub_bands = IWL_NUM_SUB_BANDS_V3; + IWL_DEBUG_RADIO(fwrt, + "Reading PPAG table (tbl_rev=%d)\n", + tbl_rev); + goto read_table; + } else { + ret = -EINVAL; + goto out_free; + } + } + /* try to read ppag table rev 1 to 4 (all have the same data size) */ wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data, ACPI_PPAG_WIFI_DATA_SIZE_V2, &tbl_rev); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h index c34dc17ff608..138fdb9a5273 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h @@ -8,11 +8,6 @@ #include #include "fw/regulatory.h" -#include "fw/api/commands.h" -#include "fw/api/power.h" -#include "fw/api/phy.h" -#include "fw/api/nvm-reg.h" -#include "fw/api/config.h" #include "fw/img.h" #include "iwl-trans.h" @@ -97,10 +92,17 @@ #define ACPI_WTAS_WIFI_DATA_SIZE (3 + IWL_WTAS_BLACK_LIST_MAX) #define ACPI_PPAG_NUM_CHAINS 2 +#define ACPI_PPAG_NUM_BANDS_V1 5 +#define ACPI_PPAG_NUM_BANDS_V2 11 +#define ACPI_PPAG_NUM_BANDS_V3 12 #define ACPI_PPAG_WIFI_DATA_SIZE_V1 ((ACPI_PPAG_NUM_CHAINS * \ - IWL_NUM_SUB_BANDS_V1) + 2) + ACPI_PPAG_NUM_BANDS_V1) + 2) #define ACPI_PPAG_WIFI_DATA_SIZE_V2 ((ACPI_PPAG_NUM_CHAINS * \ - IWL_NUM_SUB_BANDS_V2) + 2) + ACPI_PPAG_NUM_BANDS_V2) + 2) + +/* used for ACPI PPAG table rev 5 */ +#define ACPI_PPAG_WIFI_DATA_SIZE_V3 ((ACPI_PPAG_NUM_CHAINS * \ + ACPI_PPAG_NUM_BANDS_V3) + 2) #define IWL_SAR_ENABLE_MSK BIT(0) #define IWL_REDUCE_POWER_FLAGS_POS 1 From 1fd4e6478ccde5c89f47191170b6e655808e450d Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 20:48:41 +0200 Subject: [PATCH 0883/1561] wifi: iwlwifi: uefi: add support for PPAG table rev5 This table has another subband for UNII-9. Add defines for the sizes of rev4 and rev5 to easily know how much data to ask from iwl_uefi_get_verified_variable. In case rev5 doesn't exist, fallback to rev4. Check that the revision advertised by the fetched table matches the size that we got. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319204647.b9ebcff37599.I1e8bb9cee5a028ed416b6094c0fdbf9f859c6dd8@changeid --- drivers/net/wireless/intel/iwlwifi/fw/uefi.c | 66 +++++++++++++------- drivers/net/wireless/intel/iwlwifi/fw/uefi.h | 17 +++-- 2 files changed, 57 insertions(+), 26 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c index fba41976be6b..84b6f8b7eda9 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c @@ -570,44 +570,66 @@ out: int iwl_uefi_get_ppag_table(struct iwl_fw_runtime *fwrt) { struct uefi_cnv_var_ppag *data; + int n_subbands; + u32 valid_rev; int ret = 0; - int data_sz = sizeof(*data) + sizeof(data->vals[0]) * - IWL_NUM_CHAIN_LIMITS * UEFI_PPAG_SUB_BANDS_NUM; data = iwl_uefi_get_verified_variable(fwrt->trans, IWL_UEFI_PPAG_NAME, - "PPAG", data_sz, NULL); - if (IS_ERR(data)) - return -EINVAL; + "PPAG", UEFI_PPAG_DATA_SIZE_V5, + NULL); + if (!IS_ERR(data)) { + n_subbands = UEFI_PPAG_SUB_BANDS_NUM_REV5; + valid_rev = BIT(5); - if (data->revision < IWL_UEFI_MIN_PPAG_REV || - data->revision > IWL_UEFI_MAX_PPAG_REV) { + goto parse_table; + } + + data = iwl_uefi_get_verified_variable(fwrt->trans, + IWL_UEFI_PPAG_NAME, + "PPAG", + UEFI_PPAG_DATA_SIZE_V4, + NULL); + if (!IS_ERR(data)) { + n_subbands = UEFI_PPAG_SUB_BANDS_NUM_REV4; + /* revisions 1-4 have all the same size */ + valid_rev = BIT(1) | BIT(2) | BIT(3) | BIT(4); + + goto parse_table; + } + + return -EINVAL; + +parse_table: + if (!(BIT(data->revision) & valid_rev)) { ret = -EINVAL; - IWL_DEBUG_RADIO(fwrt, "Unsupported UEFI PPAG revision:%d\n", + IWL_DEBUG_RADIO(fwrt, + "Unsupported UEFI PPAG revision:%d\n", data->revision); goto out; } + /* + * Make sure fwrt has enough room to hold + * data coming from the UEFI table + */ + if (WARN_ON(ARRAY_SIZE(fwrt->ppag_chains) * + ARRAY_SIZE(fwrt->ppag_chains[0].subbands) < + UEFI_PPAG_NUM_CHAINS * n_subbands)) { + ret = -EINVAL; + goto out; + } + fwrt->ppag_bios_rev = data->revision; fwrt->ppag_flags = iwl_bios_get_ppag_flags(data->ppag_modes, fwrt->ppag_bios_rev); - /* - * Make sure fwrt has enough room to hold - * data coming from the UEFI table - */ - BUILD_BUG_ON(ARRAY_SIZE(fwrt->ppag_chains) * - ARRAY_SIZE(fwrt->ppag_chains[0].subbands) < - IWL_NUM_CHAIN_LIMITS * UEFI_PPAG_SUB_BANDS_NUM); - - for (int chain = 0; chain < IWL_NUM_CHAIN_LIMITS; chain++) { - for (int subband = 0; - subband < UEFI_PPAG_SUB_BANDS_NUM; - subband++) + for (int chain = 0; chain < UEFI_PPAG_NUM_CHAINS; chain++) { + for (int subband = 0; subband < n_subbands; subband++) fwrt->ppag_chains[chain].subbands[subband] = - data->vals[chain * UEFI_PPAG_SUB_BANDS_NUM + subband]; + data->vals[chain * n_subbands + subband]; } - iwl_bios_print_ppag(fwrt, UEFI_PPAG_SUB_BANDS_NUM); + iwl_bios_print_ppag(fwrt, n_subbands); fwrt->ppag_bios_source = BIOS_SOURCE_UEFI; out: kfree(data); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h index 4f0ce068a589..5046b6a45419 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h @@ -34,8 +34,6 @@ #define IWL_UEFI_WRDS_REVISION 2 #define IWL_UEFI_EWRD_REVISION 2 #define IWL_UEFI_WGDS_REVISION 3 -#define IWL_UEFI_MIN_PPAG_REV 1 -#define IWL_UEFI_MAX_PPAG_REV 4 #define IWL_UEFI_MIN_WTAS_REVISION 1 #define IWL_UEFI_MAX_WTAS_REVISION 2 #define IWL_UEFI_SPLC_REVISION 0 @@ -77,7 +75,9 @@ struct uefi_cnv_common_step_data { } __packed; #define UEFI_SAR_MAX_SUB_BANDS_NUM 11 -#define UEFI_PPAG_SUB_BANDS_NUM 11 +#define UEFI_PPAG_SUB_BANDS_NUM_REV4 11 +#define UEFI_PPAG_SUB_BANDS_NUM_REV5 12 +#define UEFI_PPAG_NUM_CHAINS 2 #define UEFI_SAR_MAX_CHAINS_PER_PROFILE 4 /* @@ -143,7 +143,9 @@ struct uefi_cnv_var_wgds { * @ppag_modes: values from &enum iwl_ppag_flags * @vals: the PPAG values per chain and band as an array. * vals[chain * num_of_subbands + subband] will return the right value. - * num_of_subbands is %UEFI_PPAG_SUB_BANDS_NUM. + * num_of_subbands depends on the revision. For revision 5, it is + * %UEFI_PPAG_SUB_BANDS_NUM_REV5, for earlier revision it is + * %UEFI_PPAG_SUB_BANDS_NUM_REV4. * the max number of chains is currently 2 */ struct uefi_cnv_var_ppag { @@ -152,6 +154,13 @@ struct uefi_cnv_var_ppag { s8 vals[]; } __packed; +#define UEFI_PPAG_DATA_SIZE_V4 \ + (offsetof(struct uefi_cnv_var_ppag, vals) + \ + sizeof(s8) * UEFI_PPAG_NUM_CHAINS * UEFI_PPAG_SUB_BANDS_NUM_REV4) +#define UEFI_PPAG_DATA_SIZE_V5 \ + (offsetof(struct uefi_cnv_var_ppag, vals) + \ + sizeof(s8) * UEFI_PPAG_NUM_CHAINS * UEFI_PPAG_SUB_BANDS_NUM_REV5) + /* struct uefi_cnv_var_wtas - WTAS tabled as defined in UEFI * @revision: the revision of the table * @tas_selection: different options of TAS enablement. From f473f609164ee9907497ac55934689110c248e23 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 19 Mar 2026 20:48:42 +0200 Subject: [PATCH 0884/1561] wifi: iwlwifi: restrict TOP reset to some devices Due to the Bluetooth implementation needing to match, not all devices can actually do TOP reset. Restrict it to Sc2/Sc2f or later, with Wh RF or later. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319204647.6c4479f4e49d.I5023d70cb33f1e18f7cb15981fc3acfbb00862b7@changeid --- drivers/net/wireless/intel/iwlwifi/iwl-trans.c | 10 +++++----- drivers/net/wireless/intel/iwlwifi/iwl-trans.h | 18 ++++++++++++++++++ .../wireless/intel/iwlwifi/pcie/gen1_2/trans.c | 2 +- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c index 89901786fd68..16b2c313e72b 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c @@ -138,7 +138,7 @@ iwl_trans_determine_restart_mode(struct iwl_trans *trans) IWL_RESET_MODE_FUNC_RESET, IWL_RESET_MODE_PROD_RESET, }; - static const enum iwl_reset_mode escalation_list_sc[] = { + static const enum iwl_reset_mode escalation_list_top[] = { IWL_RESET_MODE_SW_RESET, IWL_RESET_MODE_REPROBE, IWL_RESET_MODE_REPROBE, @@ -159,14 +159,14 @@ iwl_trans_determine_restart_mode(struct iwl_trans *trans) if (trans->request_top_reset) { trans->request_top_reset = 0; - if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_SC) + if (iwl_trans_is_top_reset_supported(trans)) return IWL_RESET_MODE_TOP_RESET; return IWL_RESET_MODE_PROD_RESET; } - if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_SC) { - escalation_list = escalation_list_sc; - escalation_list_size = ARRAY_SIZE(escalation_list_sc); + if (iwl_trans_is_top_reset_supported(trans)) { + escalation_list = escalation_list_top; + escalation_list_size = ARRAY_SIZE(escalation_list_top); } else { escalation_list = escalation_list_old; escalation_list_size = ARRAY_SIZE(escalation_list_old); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index aa0952a011e0..61e4f4776dcb 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -1258,4 +1258,22 @@ bool iwl_trans_is_pm_supported(struct iwl_trans *trans); bool iwl_trans_is_ltr_enabled(struct iwl_trans *trans); +static inline bool iwl_trans_is_top_reset_supported(struct iwl_trans *trans) +{ + /* not supported before Sc family */ + if (trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_SC) + return false; + + /* for Sc family only supported for Sc2/Sc2f */ + if (trans->mac_cfg->device_family == IWL_DEVICE_FAMILY_SC && + CSR_HW_REV_TYPE(trans->info.hw_rev) == IWL_CFG_MAC_TYPE_SC) + return false; + + /* so far these numbers are increasing - not before Pe */ + if (CSR_HW_RFID_TYPE(trans->info.hw_rf_id) < IWL_CFG_RF_TYPE_PE) + return false; + + return true; +} + #endif /* __iwl_trans_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans.c index 4560d92d76fe..a05f60f9224b 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans.c @@ -3197,7 +3197,7 @@ static ssize_t iwl_dbgfs_reset_write(struct file *file, if (!test_bit(STATUS_DEVICE_ENABLED, &trans->status)) return -EINVAL; if (mode == IWL_RESET_MODE_TOP_RESET) { - if (trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_SC) + if (!iwl_trans_is_top_reset_supported(trans)) return -EINVAL; trans->request_top_reset = 1; } From 5562b3bbeede8be25092064720e4a942e9fd3e3e Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 20:48:43 +0200 Subject: [PATCH 0885/1561] wifi: iwlwifi: mvm: zero iwl_geo_tx_power_profiles_cmd before sending Otherwise we may send garbage. Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319204647.2d494b0f4692.I9afd0fa6b2ea5a27118144ac4e3bbbedc2089c10@changeid --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index d46715abd7a5..0c643f0b7105 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -908,7 +908,7 @@ int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a, int prof_b) int iwl_mvm_get_sar_geo_profile(struct iwl_mvm *mvm) { - union iwl_geo_tx_power_profiles_cmd geo_tx_cmd; + union iwl_geo_tx_power_profiles_cmd geo_tx_cmd = {}; struct iwl_geo_tx_power_profiles_resp *resp; u16 len; int ret; @@ -960,7 +960,7 @@ int iwl_mvm_get_sar_geo_profile(struct iwl_mvm *mvm) static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm) { u32 cmd_id = WIDE_ID(PHY_OPS_GROUP, PER_CHAIN_LIMIT_OFFSET_CMD); - union iwl_geo_tx_power_profiles_cmd cmd; + union iwl_geo_tx_power_profiles_cmd cmd = {}; u16 len; u32 n_bands; u32 n_profiles; From b00294e66e1faafa16d635580ee1b7c382519758 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 20:48:44 +0200 Subject: [PATCH 0886/1561] wifi: iwlwifi: uefi: support the new WRDS and EWRD tables Those tables now have support for UNII-9 subband. Refactor iwl_uefi_set_sar_profile to get an array of values that makes it easier to use when the number of subbands can vary. Revamp a bit the code that fetches the tables to ask for the smaller table, then we can check the size of the object that we got and compare to the expected sizes to determine what revision to expect. Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319204647.6948f69e6ae4.Icf990e13de6905c35a8de69f1445f8eb4aa43ee4@changeid --- .../wireless/intel/iwlwifi/fw/regulatory.h | 2 +- drivers/net/wireless/intel/iwlwifi/fw/uefi.c | 92 +++++++++++++++---- drivers/net/wireless/intel/iwlwifi/fw/uefi.h | 64 ++++++++----- 3 files changed, 117 insertions(+), 41 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/regulatory.h b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.h index 446c8a2c4f9d..a3684514c904 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/regulatory.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.h @@ -21,7 +21,7 @@ */ #define BIOS_SAR_MAX_CHAINS_PER_PROFILE 4 #define BIOS_SAR_NUM_CHAINS 2 -#define BIOS_SAR_MAX_SUB_BANDS_NUM 11 +#define BIOS_SAR_MAX_SUB_BANDS_NUM 12 #define BIOS_PPAG_MAX_SUB_BANDS_NUM 12 #define BIOS_GEO_NUM_CHAINS 2 diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c index 84b6f8b7eda9..3d3d698bacd0 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c @@ -460,11 +460,30 @@ out: IWL_EXPORT_SYMBOL(iwl_uefi_get_uneb_table); static void iwl_uefi_set_sar_profile(struct iwl_fw_runtime *fwrt, - struct uefi_sar_profile *uefi_sar_prof, - u8 prof_index, bool enabled) + const u8 *vals, u8 prof_index, + u8 num_subbands, bool enabled) { - memcpy(&fwrt->sar_profiles[prof_index].chains, uefi_sar_prof, - sizeof(struct uefi_sar_profile)); + struct iwl_sar_profile *sar_prof = &fwrt->sar_profiles[prof_index]; + + /* + * Make sure fwrt has enough room to hold the data + * coming from the UEFI table + */ + if (WARN_ON(ARRAY_SIZE(sar_prof->chains) * + ARRAY_SIZE(sar_prof->chains[0].subbands) < + UEFI_SAR_MAX_CHAINS_PER_PROFILE * num_subbands)) + return; + + BUILD_BUG_ON(ARRAY_SIZE(sar_prof->chains) != + UEFI_SAR_MAX_CHAINS_PER_PROFILE); + + for (int chain = 0; + chain < UEFI_SAR_MAX_CHAINS_PER_PROFILE; + chain++) { + for (int subband = 0; subband < num_subbands; subband++) + sar_prof->chains[chain].subbands[subband] = + vals[chain * num_subbands + subband]; + } fwrt->sar_profiles[prof_index].enabled = enabled & IWL_SAR_ENABLE_MSK; } @@ -472,24 +491,46 @@ static void iwl_uefi_set_sar_profile(struct iwl_fw_runtime *fwrt, int iwl_uefi_get_wrds_table(struct iwl_fw_runtime *fwrt) { struct uefi_cnv_var_wrds *data; + unsigned long size; + unsigned long expected_size; + int num_subbands; int ret = 0; data = iwl_uefi_get_verified_variable(fwrt->trans, IWL_UEFI_WRDS_NAME, - "WRDS", sizeof(*data), NULL); + "WRDS", + UEFI_SAR_WRDS_TABLE_SIZE_REV2, + &size); + if (IS_ERR(data)) return -EINVAL; - if (data->revision != IWL_UEFI_WRDS_REVISION) { - ret = -EINVAL; - IWL_DEBUG_RADIO(fwrt, "Unsupported UEFI WRDS revision:%d\n", + switch (data->revision) { + case 2: + expected_size = UEFI_SAR_WRDS_TABLE_SIZE_REV2; + num_subbands = UEFI_SAR_SUB_BANDS_NUM_REV2; + break; + case 3: + expected_size = UEFI_SAR_WRDS_TABLE_SIZE_REV3; + num_subbands = UEFI_SAR_SUB_BANDS_NUM_REV3; + break; + default: + IWL_DEBUG_RADIO(fwrt, + "Unsupported UEFI WRDS revision:%d\n", data->revision); + ret = -EINVAL; + goto out; + } + + if (size != expected_size) { + ret = -EINVAL; goto out; } /* The profile from WRDS is officially profile 1, but goes * into sar_profiles[0] (because we don't have a profile 0). */ - iwl_uefi_set_sar_profile(fwrt, &data->sar_profile, 0, data->mode); + iwl_uefi_set_sar_profile(fwrt, data->vals, 0, + num_subbands, data->mode); out: kfree(data); return ret; @@ -498,21 +539,40 @@ out: int iwl_uefi_get_ewrd_table(struct iwl_fw_runtime *fwrt) { struct uefi_cnv_var_ewrd *data; + unsigned long expected_size; int i, ret = 0; + unsigned long size; + int num_subbands; + int profile_size; data = iwl_uefi_get_verified_variable(fwrt->trans, IWL_UEFI_EWRD_NAME, - "EWRD", sizeof(*data), NULL); + "EWRD", + UEFI_SAR_EWRD_TABLE_SIZE_REV2, + &size); if (IS_ERR(data)) return -EINVAL; - if (data->revision != IWL_UEFI_EWRD_REVISION) { - ret = -EINVAL; - IWL_DEBUG_RADIO(fwrt, "Unsupported UEFI EWRD revision:%d\n", + switch (data->revision) { + case 2: + expected_size = UEFI_SAR_EWRD_TABLE_SIZE_REV2; + num_subbands = UEFI_SAR_SUB_BANDS_NUM_REV2; + profile_size = UEFI_SAR_PROFILE_SIZE_REV2; + break; + case 3: + expected_size = UEFI_SAR_EWRD_TABLE_SIZE_REV3; + num_subbands = UEFI_SAR_SUB_BANDS_NUM_REV3; + profile_size = UEFI_SAR_PROFILE_SIZE_REV3; + break; + default: + IWL_DEBUG_RADIO(fwrt, + "Unsupported UEFI EWRD revision:%d\n", data->revision); + ret = -EINVAL; goto out; } - if (data->num_profiles >= BIOS_SAR_MAX_PROFILE_NUM) { + if (size != expected_size || + data->num_profiles >= BIOS_SAR_MAX_PROFILE_NUM) { ret = -EINVAL; goto out; } @@ -522,8 +582,8 @@ int iwl_uefi_get_ewrd_table(struct iwl_fw_runtime *fwrt) * save them in sar_profiles[1-3] (because we don't * have profile 0). So in the array we start from 1. */ - iwl_uefi_set_sar_profile(fwrt, &data->sar_profiles[i], i + 1, - data->mode); + iwl_uefi_set_sar_profile(fwrt, &data->vals[i * profile_size], + i + 1, num_subbands, data->mode); out: kfree(data); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h index 5046b6a45419..aa5a4c5a7392 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h @@ -31,8 +31,6 @@ #define IWL_SGOM_MAP_SIZE 339 #define IWL_UATS_MAP_SIZE 339 -#define IWL_UEFI_WRDS_REVISION 2 -#define IWL_UEFI_EWRD_REVISION 2 #define IWL_UEFI_WGDS_REVISION 3 #define IWL_UEFI_MIN_WTAS_REVISION 1 #define IWL_UEFI_MAX_WTAS_REVISION 2 @@ -74,56 +72,74 @@ struct uefi_cnv_common_step_data { u8 radio2; } __packed; -#define UEFI_SAR_MAX_SUB_BANDS_NUM 11 #define UEFI_PPAG_SUB_BANDS_NUM_REV4 11 #define UEFI_PPAG_SUB_BANDS_NUM_REV5 12 #define UEFI_PPAG_NUM_CHAINS 2 + +#define UEFI_SAR_SUB_BANDS_NUM_REV2 11 +#define UEFI_SAR_SUB_BANDS_NUM_REV3 12 + #define UEFI_SAR_MAX_CHAINS_PER_PROFILE 4 -/* - * struct uefi_sar_profile_chain - per-chain values of a SAR profile - * @subbands: the SAR value for each subband - */ -struct uefi_sar_profile_chain { - u8 subbands[UEFI_SAR_MAX_SUB_BANDS_NUM]; -}; - -/* - * struct uefi_sar_profile - a SAR profile as defined in UEFI - * - * @chains: a per-chain table of SAR values - */ -struct uefi_sar_profile { - struct uefi_sar_profile_chain chains[UEFI_SAR_MAX_CHAINS_PER_PROFILE]; -} __packed; - /* * struct uefi_cnv_var_wrds - WRDS table as defined in UEFI * * @revision: the revision of the table * @mode: is WRDS enbaled/disabled - * @sar_profile: sar profile #1 + * @vals: values for sar profile #1 as an array: + * vals[chain * num_of_subbands + subband] will return the right value. + * num_of_subbands depends on the revision. For revision 3, it is + * %UEFI_SAR_SUB_BANDS_NUM_REV3, for earlier revision, it is + * %UEFI_SAR_SUB_BANDS_NUM_REV2. + * The max number of chains is currently 2 */ struct uefi_cnv_var_wrds { u8 revision; u32 mode; - struct uefi_sar_profile sar_profile; + u8 vals[]; } __packed; +#define UEFI_SAR_PROFILE_SIZE_REV2 \ + (sizeof(u8) * UEFI_SAR_MAX_CHAINS_PER_PROFILE * \ + UEFI_SAR_SUB_BANDS_NUM_REV2) + +#define UEFI_SAR_PROFILE_SIZE_REV3 \ + (sizeof(u8) * UEFI_SAR_MAX_CHAINS_PER_PROFILE * \ + UEFI_SAR_SUB_BANDS_NUM_REV3) + +#define UEFI_SAR_WRDS_TABLE_SIZE_REV2 \ + (offsetof(struct uefi_cnv_var_wrds, vals) + \ + UEFI_SAR_PROFILE_SIZE_REV2) + +#define UEFI_SAR_WRDS_TABLE_SIZE_REV3 \ + (offsetof(struct uefi_cnv_var_wrds, vals) + \ + UEFI_SAR_PROFILE_SIZE_REV3) + /* * struct uefi_cnv_var_ewrd - EWRD table as defined in UEFI * @revision: the revision of the table * @mode: is WRDS enbaled/disabled * @num_profiles: how many additional profiles we have in this table (0-3) - * @sar_profiles: the additional SAR profiles (#2-#4) + * @vals: the additional SAR profiles (#2-#4) as an array of SAR profiles. + * A SAR profile is defined the &struct uefi_cnv_var_wrds::vals. The size + * of each profile depends on the number of subbands which depends on the + * revision. This is explained in &struct uefi_cnv_var_wrds. */ struct uefi_cnv_var_ewrd { u8 revision; u32 mode; u32 num_profiles; - struct uefi_sar_profile sar_profiles[BIOS_SAR_MAX_PROFILE_NUM - 1]; + u8 vals[]; } __packed; +#define UEFI_SAR_EWRD_TABLE_SIZE_REV2 \ + (offsetof(struct uefi_cnv_var_ewrd, vals) + \ + UEFI_SAR_PROFILE_SIZE_REV2 * (BIOS_SAR_MAX_PROFILE_NUM - 1)) + +#define UEFI_SAR_EWRD_TABLE_SIZE_REV3 \ + (offsetof(struct uefi_cnv_var_ewrd, vals) + \ + UEFI_SAR_PROFILE_SIZE_REV3 * (BIOS_SAR_MAX_PROFILE_NUM - 1)) + /* * struct uefi_cnv_var_wgds - WGDS table as defined in UEFI * @revision: the revision of the table From 60db0a1a70fa44ac85dcb90fc986235f9d4c9fb0 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 20:48:45 +0200 Subject: [PATCH 0887/1561] wifi: iwlwifi: acpi: add support for WRDS rev 3 table This table includes another sub-band for UNII-9. Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319204647.06543ec65e00.I73135c7d61bff9b46ad9862d93f4faf923983fd4@changeid --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 25 +++++++++++++++++++- drivers/net/wireless/intel/iwlwifi/fw/acpi.h | 3 +++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index 4d0a93832336..debbba22a909 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -535,7 +535,23 @@ int iwl_acpi_get_wrds_table(struct iwl_fw_runtime *fwrt) if (IS_ERR(data)) return PTR_ERR(data); - /* start by trying to read revision 2 */ + /* start by trying to read revision 3 */ + wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data, + ACPI_WRDS_WIFI_DATA_SIZE_REV3, + &tbl_rev); + if (!IS_ERR(wifi_pkg)) { + if (tbl_rev != 3) { + ret = -EINVAL; + goto out_free; + } + + num_chains = ACPI_SAR_NUM_CHAINS_REV2; + num_sub_bands = ACPI_SAR_NUM_SUB_BANDS_REV3; + + goto read_table; + } + + /* then try revision 2 */ wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data, ACPI_WRDS_WIFI_DATA_SIZE_REV2, &tbl_rev); @@ -592,6 +608,13 @@ read_table: goto out_free; } + if (WARN_ON(num_chains * num_sub_bands > + ARRAY_SIZE(fwrt->sar_profiles[0].chains) * + ARRAY_SIZE(fwrt->sar_profiles[0].chains[0].subbands))) { + ret = -EINVAL; + goto out_free; + } + IWL_DEBUG_RADIO(fwrt, "Reading WRDS tbl_rev=%d\n", tbl_rev); flags = wifi_pkg->package.elements[1].integer.value; diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h index 138fdb9a5273..ec6af1b58098 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h @@ -39,6 +39,7 @@ #define ACPI_SAR_NUM_SUB_BANDS_REV0 5 #define ACPI_SAR_NUM_SUB_BANDS_REV1 11 #define ACPI_SAR_NUM_SUB_BANDS_REV2 11 +#define ACPI_SAR_NUM_SUB_BANDS_REV3 12 #define ACPI_WRDS_WIFI_DATA_SIZE_REV0 (ACPI_SAR_NUM_CHAINS_REV0 * \ ACPI_SAR_NUM_SUB_BANDS_REV0 + 2) @@ -46,6 +47,8 @@ ACPI_SAR_NUM_SUB_BANDS_REV1 + 2) #define ACPI_WRDS_WIFI_DATA_SIZE_REV2 (ACPI_SAR_NUM_CHAINS_REV2 * \ ACPI_SAR_NUM_SUB_BANDS_REV2 + 2) +#define ACPI_WRDS_WIFI_DATA_SIZE_REV3 (ACPI_SAR_NUM_CHAINS_REV2 * \ + ACPI_SAR_NUM_SUB_BANDS_REV3 + 2) #define ACPI_EWRD_WIFI_DATA_SIZE_REV0 ((ACPI_SAR_PROFILE_NUM - 1) * \ ACPI_SAR_NUM_CHAINS_REV0 * \ ACPI_SAR_NUM_SUB_BANDS_REV0 + 3) From 6481a02f94e28dbb88a395db581984457da22683 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 20:48:46 +0200 Subject: [PATCH 0888/1561] wifi: iwlwifi: acpi: add support for EWRD rev 3 table This table includes another sub-band for UNII-9. Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319204647.9182284f007e.Ibbe7c1f8442933d29695b9bf56b8e775394c71f8@changeid --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 31 +++++++++++++++++++- drivers/net/wireless/intel/iwlwifi/fw/acpi.h | 3 ++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index debbba22a909..721bd014bbaa 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -648,7 +648,22 @@ int iwl_acpi_get_ewrd_table(struct iwl_fw_runtime *fwrt) if (IS_ERR(data)) return PTR_ERR(data); - /* start by trying to read revision 2 */ + /* start by trying to read revision 3 */ + wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data, + ACPI_EWRD_WIFI_DATA_SIZE_REV3, + &tbl_rev); + if (!IS_ERR(wifi_pkg)) { + if (tbl_rev != 3) { + ret = -EINVAL; + goto out_free; + } + + num_sub_bands = ACPI_SAR_NUM_SUB_BANDS_REV3; + + goto read_table; + } + + /* then try revision 2 */ wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data, ACPI_EWRD_WIFI_DATA_SIZE_REV2, &tbl_rev); @@ -703,6 +718,13 @@ read_table: goto out_free; } + if (WARN_ON(ACPI_SAR_NUM_CHAINS_REV0 * num_sub_bands > + ARRAY_SIZE(fwrt->sar_profiles[0].chains) * + ARRAY_SIZE(fwrt->sar_profiles[0].chains[0].subbands))) { + ret = -EINVAL; + goto out_free; + } + enabled = !!(wifi_pkg->package.elements[1].integer.value); n_profiles = wifi_pkg->package.elements[2].integer.value; @@ -745,6 +767,13 @@ read_table: if (tbl_rev < 2) goto set_enabled; + if (WARN_ON(ACPI_SAR_NUM_CHAINS_REV0 * 2 * num_sub_bands > + ARRAY_SIZE(fwrt->sar_profiles[0].chains) * + ARRAY_SIZE(fwrt->sar_profiles[0].chains[0].subbands))) { + ret = -EINVAL; + goto out_free; + } + /* parse cdb chains for all profiles */ for (i = 0; i < n_profiles; i++) { struct iwl_sar_profile_chain *chains; diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h index ec6af1b58098..8e5ed72d4d8d 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h @@ -58,6 +58,9 @@ #define ACPI_EWRD_WIFI_DATA_SIZE_REV2 ((ACPI_SAR_PROFILE_NUM - 1) * \ ACPI_SAR_NUM_CHAINS_REV2 * \ ACPI_SAR_NUM_SUB_BANDS_REV2 + 3) +#define ACPI_EWRD_WIFI_DATA_SIZE_REV3 ((ACPI_SAR_PROFILE_NUM - 1) * \ + ACPI_SAR_NUM_CHAINS_REV2 * \ + ACPI_SAR_NUM_SUB_BANDS_REV3 + 3) #define ACPI_WPFC_WIFI_DATA_SIZE 5 /* domain and 4 filter config words */ /* revision 0 and 1 are identical, except for the semantics in the FW */ From c239a0d8d75e83e28e5866f15234fff34f4103c5 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 20:48:47 +0200 Subject: [PATCH 0889/1561] wifi: iwlwifi: mld: support version 11 of REDUCE_TX_POWER_CMD This introduces support for UNII-9. After we increased the size of the arrays of the subbands in SAR structure, we now support the new firmware command. Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319204647.9cea60b78a1b.Ia91c59829af0dc4d6c351c5b09ce33800c1f9e44@changeid --- .../net/wireless/intel/iwlwifi/fw/api/power.h | 15 ++++++++- .../net/wireless/intel/iwlwifi/mld/power.c | 5 ++- .../wireless/intel/iwlwifi/mld/regulatory.c | 32 +++++++++++++++---- 3 files changed, 44 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/power.h b/drivers/net/wireless/intel/iwlwifi/fw/api/power.h index 118c08f95649..ec923162a44b 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/power.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/power.h @@ -426,19 +426,32 @@ struct iwl_dev_tx_power_cmd_v10 { __le32 flags; } __packed; /* TX_REDUCED_POWER_API_S_VER_10 */ +struct iwl_dev_tx_power_cmd_v11 { + __le16 per_chain[IWL_NUM_CHAIN_TABLES_V2][IWL_NUM_CHAIN_LIMITS][IWL_NUM_SUB_BANDS_V3]; + u8 per_chain_restriction_changed; + u8 reserved; + __le32 timer_period; + __le32 flags; +} __packed; /* TX_REDUCED_POWER_API_S_VER_11 */ + /* * struct iwl_dev_tx_power_cmd - TX power reduction command (multiversion) * @common: common part of the command * @v9: version 9 part of the command * @v10: version 10 part of the command + * @v11: version 11 part of the command */ struct iwl_dev_tx_power_cmd { struct iwl_dev_tx_power_common common; union { struct iwl_dev_tx_power_cmd_v9 v9; struct iwl_dev_tx_power_cmd_v10 v10; + struct iwl_dev_tx_power_cmd_v11 v11; }; -} __packed; /* TX_REDUCED_POWER_API_S_VER_9_VER10 */ +} __packed; /* TX_REDUCED_POWER_API_S_VER_9 + * TX_REDUCED_POWER_API_S_VER_10 + * TX_REDUCED_POWER_API_S_VER_11 + */ #define IWL_NUM_GEO_PROFILES 3 #define IWL_NUM_GEO_PROFILES_V3 8 diff --git a/drivers/net/wireless/intel/iwlwifi/mld/power.c b/drivers/net/wireless/intel/iwlwifi/mld/power.c index c3318e84f4a2..49b0d9f8f865 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/power.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/power.c @@ -405,7 +405,10 @@ int iwl_mld_set_tx_power(struct iwl_mld *mld, .common.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_LINK), .common.pwr_restriction = cpu_to_le16(u_tx_power), }; - int len = sizeof(cmd.common) + sizeof(cmd.v10); + int len = sizeof(cmd.common) + sizeof(cmd.v11); + + if (iwl_fw_lookup_cmd_ver(mld->fw, cmd_id, 10) == 10) + len = sizeof(cmd.common) + sizeof(cmd.v10); if (WARN_ON(!mld_link)) return -ENODEV; diff --git a/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c b/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c index 27059ec93847..f009c884e6cd 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c @@ -95,23 +95,43 @@ static int iwl_mld_geo_sar_init(struct iwl_mld *mld) int iwl_mld_config_sar_profile(struct iwl_mld *mld, int prof_a, int prof_b) { - u32 cmd_id = REDUCE_TX_POWER_CMD; struct iwl_dev_tx_power_cmd cmd = { .common.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_CHAINS), - .v10.flags = cpu_to_le32(mld->fwrt.reduced_power_flags), }; + u8 cmd_ver = iwl_fw_lookup_cmd_ver(mld->fw, REDUCE_TX_POWER_CMD, 10); + int num_subbands; + int cmd_size; int ret; + switch (cmd_ver) { + case 10: + cmd.v10.flags = cpu_to_le32(mld->fwrt.reduced_power_flags); + cmd_size = sizeof(cmd.common) + sizeof(cmd.v10); + num_subbands = IWL_NUM_SUB_BANDS_V2; + break; + case 11: + cmd.v11.flags = cpu_to_le32(mld->fwrt.reduced_power_flags); + cmd_size = sizeof(cmd.common) + sizeof(cmd.v11); + num_subbands = IWL_NUM_SUB_BANDS_V3; + break; + default: + WARN_ONCE(1, "Bad version for REDUCE_TX_POWER_CMD: %d\n", + cmd_ver); + return -EOPNOTSUPP; + } + /* TODO: CDB - support IWL_NUM_CHAIN_TABLES_V2 */ - ret = iwl_sar_fill_profile(&mld->fwrt, &cmd.v10.per_chain[0][0][0], - IWL_NUM_CHAIN_TABLES, IWL_NUM_SUB_BANDS_V2, + /* v10 and v11 have the same position for per_chain */ + BUILD_BUG_ON(offsetof(typeof(cmd), v11.per_chain) != + offsetof(typeof(cmd), v10.per_chain)); + ret = iwl_sar_fill_profile(&mld->fwrt, &cmd.v11.per_chain[0][0][0], + IWL_NUM_CHAIN_TABLES, num_subbands, prof_a, prof_b); /* return on error or if the profile is disabled (positive number) */ if (ret) return ret; - return iwl_mld_send_cmd_pdu(mld, cmd_id, &cmd, - sizeof(cmd.common) + sizeof(cmd.v10)); + return iwl_mld_send_cmd_pdu(mld, REDUCE_TX_POWER_CMD, &cmd, cmd_size); } int iwl_mld_init_sar(struct iwl_mld *mld) From de985774e2175483bfffb5598bea4e39c12a181a Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 20:48:48 +0200 Subject: [PATCH 0890/1561] wifi: iwlwifi: uefi: open code the parsing of the WGDS table We will soon add support for UNII-9 band in the WGDS table. We need to decouple the UEFI code from the firmware runtime code. The firmware runtime is just a software object which will need to grow and UEFI objects need a new revision to grow. Existing systems will keep the same UEFI objects. Just like PPAG and SAR, stop using structures to parse the UEFI tables since the layout depends on the revision. The support for the new revision will be added in the next patch, for now, just do the ground work. Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319204647.140706e6e91f.I83ca04932bc21aa358119890001e876ced1e1bda@changeid --- drivers/net/wireless/intel/iwlwifi/fw/uefi.c | 33 +++++++++++++++++--- drivers/net/wireless/intel/iwlwifi/fw/uefi.h | 18 +++++++++-- 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c index 3d3d698bacd0..ccac50385175 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c @@ -593,10 +593,11 @@ out: int iwl_uefi_get_wgds_table(struct iwl_fw_runtime *fwrt) { struct uefi_cnv_var_wgds *data; - int i, ret = 0; + int ret = 0; data = iwl_uefi_get_verified_variable(fwrt->trans, IWL_UEFI_WGDS_NAME, - "WGDS", sizeof(*data), NULL); + "WGDS", UEFI_WGDS_TABLE_SIZE_REV3, + NULL); if (IS_ERR(data)) return -EINVAL; @@ -615,10 +616,32 @@ int iwl_uefi_get_wgds_table(struct iwl_fw_runtime *fwrt) goto out; } + if (WARN_ON(BIOS_GEO_MAX_PROFILE_NUM > + ARRAY_SIZE(fwrt->geo_profiles) || + UEFI_GEO_NUM_BANDS_REV3 > + ARRAY_SIZE(fwrt->geo_profiles[0].bands) || + BIOS_GEO_NUM_CHAINS > + ARRAY_SIZE(fwrt->geo_profiles[0].bands[0].chains))) { + ret = -EINVAL; + goto out; + } + fwrt->geo_rev = data->revision; - for (i = 0; i < data->num_profiles; i++) - memcpy(&fwrt->geo_profiles[i], &data->geo_profiles[i], - sizeof(struct iwl_geo_profile)); + for (int prof = 0; prof < data->num_profiles; prof++) { + const u8 *val = &data->vals[UEFI_WGDS_PROFILE_SIZE_REV3 * prof]; + struct iwl_geo_profile *geo_prof = &fwrt->geo_profiles[prof]; + + for (int subband = 0; + subband < UEFI_GEO_NUM_BANDS_REV3; + subband++) { + geo_prof->bands[subband].max = *val++; + + for (int chain = 0; + chain < BIOS_GEO_NUM_CHAINS; + chain++) + geo_prof->bands[subband].chains[chain] = *val++; + } + } fwrt->geo_num_profiles = data->num_profiles; fwrt->geo_enabled = true; diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h index aa5a4c5a7392..3959937242d8 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h @@ -81,6 +81,8 @@ struct uefi_cnv_common_step_data { #define UEFI_SAR_MAX_CHAINS_PER_PROFILE 4 +#define UEFI_GEO_NUM_BANDS_REV3 3 + /* * struct uefi_cnv_var_wrds - WRDS table as defined in UEFI * @@ -145,14 +147,26 @@ struct uefi_cnv_var_ewrd { * @revision: the revision of the table * @num_profiles: the number of geo profiles we have in the table. * The first 3 are mandatory, and can have up to 8. - * @geo_profiles: a per-profile table of the offsets to add to SAR values. + * @vals: a per-profile table of the offsets to add to SAR values. This is an + * array of profiles, each profile is an array of + * &struct iwl_geo_profile_band, one for each subband. + * There are %UEFI_GEO_NUM_BANDS_REV3 subbands. */ struct uefi_cnv_var_wgds { u8 revision; u8 num_profiles; - struct iwl_geo_profile geo_profiles[BIOS_GEO_MAX_PROFILE_NUM]; + u8 vals[]; } __packed; +/* struct iwl_geo_profile_band is 3 bytes-long, but since it is not packed, + * we can't use sizeof() + */ +#define UEFI_WGDS_PROFILE_SIZE_REV3 (sizeof(u8) * 3 * UEFI_GEO_NUM_BANDS_REV3) + +#define UEFI_WGDS_TABLE_SIZE_REV3 \ + (offsetof(struct uefi_cnv_var_wgds, vals) + \ + UEFI_WGDS_PROFILE_SIZE_REV3 * BIOS_GEO_MAX_PROFILE_NUM) + /* * struct uefi_cnv_var_ppag - PPAG table as defined in UEFI * @revision: the revision of the table From c30e4e03721dc75adfff6b4c2ad671dab71f319c Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 20:48:49 +0200 Subject: [PATCH 0891/1561] wifi: iwlwifi: uefi: add support for WGDS rev4 This new revision includes support for UNII-9. It adds a subband. Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319204647.ad8e49c3a9e1.I51170ba78a706f976e93918d6473185d41e4306d@changeid --- .../wireless/intel/iwlwifi/fw/regulatory.h | 2 +- drivers/net/wireless/intel/iwlwifi/fw/uefi.c | 32 ++++++++++++++----- drivers/net/wireless/intel/iwlwifi/fw/uefi.h | 11 +++++-- 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/regulatory.h b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.h index a3684514c904..6fffc032efd3 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/regulatory.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/regulatory.h @@ -25,7 +25,7 @@ #define BIOS_PPAG_MAX_SUB_BANDS_NUM 12 #define BIOS_GEO_NUM_CHAINS 2 -#define BIOS_GEO_MAX_NUM_BANDS 3 +#define BIOS_GEO_MAX_NUM_BANDS 4 #define BIOS_GEO_MAX_PROFILE_NUM 8 #define BIOS_GEO_MIN_PROFILE_NUM 3 diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c index ccac50385175..f73340c7d537 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c @@ -593,21 +593,39 @@ out: int iwl_uefi_get_wgds_table(struct iwl_fw_runtime *fwrt) { struct uefi_cnv_var_wgds *data; + unsigned long expected_size; + unsigned long size; + int profile_size; + int n_subbands; int ret = 0; data = iwl_uefi_get_verified_variable(fwrt->trans, IWL_UEFI_WGDS_NAME, "WGDS", UEFI_WGDS_TABLE_SIZE_REV3, - NULL); + &size); if (IS_ERR(data)) return -EINVAL; - if (data->revision != IWL_UEFI_WGDS_REVISION) { + switch (data->revision) { + case 3: + expected_size = UEFI_WGDS_TABLE_SIZE_REV3; + n_subbands = UEFI_GEO_NUM_BANDS_REV3; + break; + case 4: + expected_size = UEFI_WGDS_TABLE_SIZE_REV4; + n_subbands = UEFI_GEO_NUM_BANDS_REV4; + break; + default: ret = -EINVAL; IWL_DEBUG_RADIO(fwrt, "Unsupported UEFI WGDS revision:%d\n", data->revision); goto out; } + if (size != expected_size) { + ret = -EINVAL; + goto out; + } + if (data->num_profiles < BIOS_GEO_MIN_PROFILE_NUM || data->num_profiles > BIOS_GEO_MAX_PROFILE_NUM) { ret = -EINVAL; @@ -618,8 +636,7 @@ int iwl_uefi_get_wgds_table(struct iwl_fw_runtime *fwrt) if (WARN_ON(BIOS_GEO_MAX_PROFILE_NUM > ARRAY_SIZE(fwrt->geo_profiles) || - UEFI_GEO_NUM_BANDS_REV3 > - ARRAY_SIZE(fwrt->geo_profiles[0].bands) || + n_subbands > ARRAY_SIZE(fwrt->geo_profiles[0].bands) || BIOS_GEO_NUM_CHAINS > ARRAY_SIZE(fwrt->geo_profiles[0].bands[0].chains))) { ret = -EINVAL; @@ -627,13 +644,12 @@ int iwl_uefi_get_wgds_table(struct iwl_fw_runtime *fwrt) } fwrt->geo_rev = data->revision; + profile_size = 3 * n_subbands; for (int prof = 0; prof < data->num_profiles; prof++) { - const u8 *val = &data->vals[UEFI_WGDS_PROFILE_SIZE_REV3 * prof]; + const u8 *val = &data->vals[profile_size * prof]; struct iwl_geo_profile *geo_prof = &fwrt->geo_profiles[prof]; - for (int subband = 0; - subband < UEFI_GEO_NUM_BANDS_REV3; - subband++) { + for (int subband = 0; subband < n_subbands; subband++) { geo_prof->bands[subband].max = *val++; for (int chain = 0; diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h index 3959937242d8..0d3dac65178c 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h @@ -31,7 +31,6 @@ #define IWL_SGOM_MAP_SIZE 339 #define IWL_UATS_MAP_SIZE 339 -#define IWL_UEFI_WGDS_REVISION 3 #define IWL_UEFI_MIN_WTAS_REVISION 1 #define IWL_UEFI_MAX_WTAS_REVISION 2 #define IWL_UEFI_SPLC_REVISION 0 @@ -82,6 +81,7 @@ struct uefi_cnv_common_step_data { #define UEFI_SAR_MAX_CHAINS_PER_PROFILE 4 #define UEFI_GEO_NUM_BANDS_REV3 3 +#define UEFI_GEO_NUM_BANDS_REV4 4 /* * struct uefi_cnv_var_wrds - WRDS table as defined in UEFI @@ -150,7 +150,8 @@ struct uefi_cnv_var_ewrd { * @vals: a per-profile table of the offsets to add to SAR values. This is an * array of profiles, each profile is an array of * &struct iwl_geo_profile_band, one for each subband. - * There are %UEFI_GEO_NUM_BANDS_REV3 subbands. + * There are %UEFI_GEO_NUM_BANDS_REV3 or %UEFI_GEO_NUM_BANDS_REV4 subbands + * depending on the revision. */ struct uefi_cnv_var_wgds { u8 revision; @@ -163,10 +164,16 @@ struct uefi_cnv_var_wgds { */ #define UEFI_WGDS_PROFILE_SIZE_REV3 (sizeof(u8) * 3 * UEFI_GEO_NUM_BANDS_REV3) +#define UEFI_WGDS_PROFILE_SIZE_REV4 (sizeof(u8) * 3 * UEFI_GEO_NUM_BANDS_REV4) + #define UEFI_WGDS_TABLE_SIZE_REV3 \ (offsetof(struct uefi_cnv_var_wgds, vals) + \ UEFI_WGDS_PROFILE_SIZE_REV3 * BIOS_GEO_MAX_PROFILE_NUM) +#define UEFI_WGDS_TABLE_SIZE_REV4 \ + (offsetof(struct uefi_cnv_var_wgds, vals) + \ + UEFI_WGDS_PROFILE_SIZE_REV4 * BIOS_GEO_MAX_PROFILE_NUM) + /* * struct uefi_cnv_var_ppag - PPAG table as defined in UEFI * @revision: the revision of the table From f951689793e6c5e908be143fcc506aafa5bf3c36 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 20:48:50 +0200 Subject: [PATCH 0892/1561] wifi: iwlwifi: acpi: validate the WGDS table Prefer to use ARRAY_SIZE when we check array-length. Make sure num_profile isn't bigger than the number of profiles we can actually store in the firmware runtime object. Same of the number of bands. Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319204647.a398511514ed.Ie4e62e2008f7e117ae7e305967ffadf1a30fc2b1@changeid --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 38 ++++++++++++++------ 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index 721bd014bbaa..1c416d3f75ea 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -865,6 +865,18 @@ int iwl_acpi_get_wgds_table(struct iwl_fw_runtime *fwrt) num_bands = rev_data[idx].bands; num_profiles = rev_data[idx].profiles; + if (WARN_ON(num_profiles > + ARRAY_SIZE(fwrt->geo_profiles))) { + ret = -EINVAL; + goto out_free; + } + + if (WARN_ON(num_bands > + ARRAY_SIZE(fwrt->geo_profiles[0].bands))) { + ret = -EINVAL; + goto out_free; + } + if (rev_data[idx].min_profiles) { /* read header that says # of profiles */ union acpi_object *entry; @@ -904,18 +916,20 @@ int iwl_acpi_get_wgds_table(struct iwl_fw_runtime *fwrt) read_table: fwrt->geo_rev = tbl_rev; + for (i = 0; i < num_profiles; i++) { - for (j = 0; j < BIOS_GEO_MAX_NUM_BANDS; j++) { + struct iwl_geo_profile *prof = &fwrt->geo_profiles[i]; + + for (j = 0; j < ARRAY_SIZE(prof->bands); j++) { union acpi_object *entry; /* - * num_bands is either 2 or 3, if it's only 2 then - * fill the third band (6 GHz) with the values from - * 5 GHz (second band) + * num_bands is either 2 or 3 or 4, if it's lower + * than 4, fill the third band (6 GHz) with the values + * from 5 GHz (second band) */ if (j >= num_bands) { - fwrt->geo_profiles[i].bands[j].max = - fwrt->geo_profiles[i].bands[1].max; + prof->bands[j].max = prof->bands[1].max; } else { entry = &wifi_pkg->package.elements[entry_idx]; entry_idx++; @@ -925,15 +939,17 @@ read_table: goto out_free; } - fwrt->geo_profiles[i].bands[j].max = + prof->bands[j].max = entry->integer.value; } - for (k = 0; k < BIOS_GEO_NUM_CHAINS; k++) { + for (k = 0; + k < ARRAY_SIZE(prof->bands[0].chains); + k++) { /* same here as above */ if (j >= num_bands) { - fwrt->geo_profiles[i].bands[j].chains[k] = - fwrt->geo_profiles[i].bands[1].chains[k]; + prof->bands[j].chains[k] = + prof->bands[1].chains[k]; } else { entry = &wifi_pkg->package.elements[entry_idx]; entry_idx++; @@ -943,7 +959,7 @@ read_table: goto out_free; } - fwrt->geo_profiles[i].bands[j].chains[k] = + prof->bands[j].chains[k] = entry->integer.value; } } From c5cc3d37177835650d0c58609527a7550dfa0a66 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 20:48:51 +0200 Subject: [PATCH 0893/1561] wifi: iwlwifi: acpi: add support for WGDS revision 4 This adds support for UNII-9. WGDS tables will now have 4 subbands. Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319204647.721e9fbabfc9.Ie8bd641cf84aa659d93893438c172c172b67214b@changeid --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 6 ++++++ drivers/net/wireless/intel/iwlwifi/fw/acpi.h | 1 + 2 files changed, 7 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index 1c416d3f75ea..16d91c6915f0 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -812,6 +812,12 @@ int iwl_acpi_get_wgds_table(struct iwl_fw_runtime *fwrt) u8 profiles; u8 min_profiles; } rev_data[] = { + { + .revisions = BIT(4), + .bands = ACPI_GEO_NUM_BANDS_REV4, + .profiles = ACPI_NUM_GEO_PROFILES_REV3, + .min_profiles = BIOS_GEO_MIN_PROFILE_NUM, + }, { .revisions = BIT(3), .bands = ACPI_GEO_NUM_BANDS_REV2, diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h index 8e5ed72d4d8d..51a57e57de7a 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h @@ -66,6 +66,7 @@ /* revision 0 and 1 are identical, except for the semantics in the FW */ #define ACPI_GEO_NUM_BANDS_REV0 2 #define ACPI_GEO_NUM_BANDS_REV2 3 +#define ACPI_GEO_NUM_BANDS_REV4 4 #define ACPI_WRDD_WIFI_DATA_SIZE 2 #define ACPI_SPLC_WIFI_DATA_SIZE 2 From f863093a914c4b371f5c82a173d5b804318c91ef Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 20:48:52 +0200 Subject: [PATCH 0894/1561] wifi: iwlwifi: support PER_CHAIN_LIMIT_OFFSET_CMD v6 This includes support for UNII-9. Store the source of the WGDS table in the firmware runtime object to be able to pass the information to the firmware. Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319204647.eaff31760dd7.Ic7f56fbbe310833723094f965e7ba3f8624d0ef9@changeid --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 1 + .../net/wireless/intel/iwlwifi/fw/api/power.h | 14 ++++++++ .../net/wireless/intel/iwlwifi/fw/runtime.h | 2 ++ drivers/net/wireless/intel/iwlwifi/fw/uefi.c | 1 + .../wireless/intel/iwlwifi/mld/regulatory.c | 32 +++++++++++++++---- 5 files changed, 44 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index 16d91c6915f0..bf0f851a9075 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -973,6 +973,7 @@ read_table: } fwrt->geo_num_profiles = num_profiles; + fwrt->geo_bios_source = BIOS_SOURCE_ACPI; fwrt->geo_enabled = true; ret = 0; out_free: diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/power.h b/drivers/net/wireless/intel/iwlwifi/fw/api/power.h index ec923162a44b..a3f916630df2 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/power.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/power.h @@ -457,6 +457,7 @@ struct iwl_dev_tx_power_cmd { #define IWL_NUM_GEO_PROFILES_V3 8 #define IWL_NUM_BANDS_PER_CHAIN_V1 2 #define IWL_NUM_BANDS_PER_CHAIN_V2 3 +#define IWL_NUM_BANDS_PER_CHAIN_V6 4 /** * enum iwl_geo_per_chain_offset_operation - type of operation @@ -538,12 +539,25 @@ struct iwl_geo_tx_power_profiles_cmd_v5 { __le32 table_revision; } __packed; /* PER_CHAIN_LIMIT_OFFSET_CMD_VER_5 */ +/** + * struct iwl_geo_tx_power_profiles_cmd_v6 - struct for PER_CHAIN_LIMIT_OFFSET_CMD cmd. + * @ops: operations, value from &enum iwl_geo_per_chain_offset_operation + * @table: offset profile per band. + * @bios_hdr: describes the revision and the source of the BIOS + */ +struct iwl_geo_tx_power_profiles_cmd_v6 { + __le32 ops; + struct iwl_per_chain_offset table[IWL_NUM_GEO_PROFILES_V3][IWL_NUM_BANDS_PER_CHAIN_V6]; + struct iwl_bios_config_hdr bios_hdr; +} __packed; /* PER_CHAIN_LIMIT_OFFSET_CMD_VER_6 */ + union iwl_geo_tx_power_profiles_cmd { struct iwl_geo_tx_power_profiles_cmd_v1 v1; struct iwl_geo_tx_power_profiles_cmd_v2 v2; struct iwl_geo_tx_power_profiles_cmd_v3 v3; struct iwl_geo_tx_power_profiles_cmd_v4 v4; struct iwl_geo_tx_power_profiles_cmd_v5 v5; + struct iwl_geo_tx_power_profiles_cmd_v6 v6; }; /** diff --git a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h index 411e75b45530..d80ae610e56c 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h @@ -113,6 +113,7 @@ struct iwl_txf_iter_data { * Only read the UEFI variables if locked. * @sar_profiles: sar profiles as read from WRDS/EWRD BIOS tables * @geo_profiles: geographic profiles as read from WGDS BIOS table + * @geo_bios_source: see &enum bios_source * @phy_filters: specific phy filters as read from WPFC BIOS table * @ppag_bios_rev: PPAG BIOS revision * @ppag_bios_source: see &enum bios_source @@ -204,6 +205,7 @@ struct iwl_fw_runtime { u8 sar_chain_b_profile; u8 reduced_power_flags; struct iwl_geo_profile geo_profiles[BIOS_GEO_MAX_PROFILE_NUM]; + enum bios_source geo_bios_source; u32 geo_rev; u32 geo_num_profiles; bool geo_enabled; diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c index f73340c7d537..2ef0a7a920ad 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c @@ -644,6 +644,7 @@ int iwl_uefi_get_wgds_table(struct iwl_fw_runtime *fwrt) } fwrt->geo_rev = data->revision; + fwrt->geo_bios_source = BIOS_SOURCE_UEFI; profile_size = 3 * n_subbands; for (int prof = 0; prof < data->num_profiles; prof++) { const u8 *val = &data->vals[profile_size * prof]; diff --git a/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c b/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c index f009c884e6cd..2486d78d6fc3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c @@ -73,16 +73,36 @@ static int iwl_mld_geo_sar_init(struct iwl_mld *mld) { u32 cmd_id = WIDE_ID(PHY_OPS_GROUP, PER_CHAIN_LIMIT_OFFSET_CMD); /* Only set to South Korea if the table revision is 1 */ - __le32 sk = cpu_to_le32(mld->fwrt.geo_rev == 1 ? 1 : 0); + u8 sk = mld->fwrt.geo_rev == 1 ? 1 : 0; union iwl_geo_tx_power_profiles_cmd cmd = { .v5.ops = cpu_to_le32(IWL_PER_CHAIN_OFFSET_SET_TABLES), - .v5.table_revision = sk, }; + u32 cmd_ver = iwl_fw_lookup_cmd_ver(mld->fw, cmd_id, 0); + int n_subbands; + int cmd_size; int ret; - ret = iwl_sar_geo_fill_table(&mld->fwrt, &cmd.v5.table[0][0], - ARRAY_SIZE(cmd.v5.table[0]), - BIOS_GEO_MAX_PROFILE_NUM); + switch (cmd_ver) { + case 5: + n_subbands = ARRAY_SIZE(cmd.v5.table[0]); + cmd.v5.table_revision = cpu_to_le32(sk); + cmd_size = sizeof(cmd.v5); + break; + case 6: + n_subbands = ARRAY_SIZE(cmd.v6.table[0]); + cmd.v6.bios_hdr.table_revision = mld->fwrt.geo_rev; + cmd.v6.bios_hdr.table_source = mld->fwrt.geo_bios_source; + cmd_size = sizeof(cmd.v6); + break; + default: + WARN(false, "unsupported version: %d", cmd_ver); + return -EINVAL; + } + + BUILD_BUG_ON(offsetof(typeof(cmd), v6.table) != + offsetof(typeof(cmd), v5.table)); + ret = iwl_sar_geo_fill_table(&mld->fwrt, &cmd.v6.table[0][0], + n_subbands, BIOS_GEO_MAX_PROFILE_NUM); /* It is a valid scenario to not support SAR, or miss wgds table, * but in that case there is no need to send the command. @@ -90,7 +110,7 @@ static int iwl_mld_geo_sar_init(struct iwl_mld *mld) if (ret) return 0; - return iwl_mld_send_cmd_pdu(mld, cmd_id, &cmd, sizeof(cmd.v5)); + return iwl_mld_send_cmd_pdu(mld, cmd_id, &cmd, cmd_size); } int iwl_mld_config_sar_profile(struct iwl_mld *mld, int prof_a, int prof_b) From 769b5b1ed59084b653f423904b7a486b1c86672a Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 20:48:53 +0200 Subject: [PATCH 0895/1561] wifi: iwlwifi: uefi: mode the comments valid kerneldoc comments This will allow to get warnings if we make mistakes while documenting the uefi structures Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319204647.e9ad48c2cf4a.I867e3eb9581ac2a87772fd2534502c008543bafb@changeid --- drivers/net/wireless/intel/iwlwifi/fw/uefi.h | 27 ++++++++++++-------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h index 0d3dac65178c..474f06db4d43 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h @@ -83,7 +83,7 @@ struct uefi_cnv_common_step_data { #define UEFI_GEO_NUM_BANDS_REV3 3 #define UEFI_GEO_NUM_BANDS_REV4 4 -/* +/** * struct uefi_cnv_var_wrds - WRDS table as defined in UEFI * * @revision: the revision of the table @@ -117,7 +117,7 @@ struct uefi_cnv_var_wrds { (offsetof(struct uefi_cnv_var_wrds, vals) + \ UEFI_SAR_PROFILE_SIZE_REV3) -/* +/** * struct uefi_cnv_var_ewrd - EWRD table as defined in UEFI * @revision: the revision of the table * @mode: is WRDS enbaled/disabled @@ -142,7 +142,7 @@ struct uefi_cnv_var_ewrd { (offsetof(struct uefi_cnv_var_ewrd, vals) + \ UEFI_SAR_PROFILE_SIZE_REV3 * (BIOS_SAR_MAX_PROFILE_NUM - 1)) -/* +/** * struct uefi_cnv_var_wgds - WGDS table as defined in UEFI * @revision: the revision of the table * @num_profiles: the number of geo profiles we have in the table. @@ -174,7 +174,7 @@ struct uefi_cnv_var_wgds { (offsetof(struct uefi_cnv_var_wgds, vals) + \ UEFI_WGDS_PROFILE_SIZE_REV4 * BIOS_GEO_MAX_PROFILE_NUM) -/* +/** * struct uefi_cnv_var_ppag - PPAG table as defined in UEFI * @revision: the revision of the table * @ppag_modes: values from &enum iwl_ppag_flags @@ -198,7 +198,8 @@ struct uefi_cnv_var_ppag { (offsetof(struct uefi_cnv_var_ppag, vals) + \ sizeof(s8) * UEFI_PPAG_NUM_CHAINS * UEFI_PPAG_SUB_BANDS_NUM_REV5) -/* struct uefi_cnv_var_wtas - WTAS tabled as defined in UEFI +/** + * struct uefi_cnv_var_wtas - WTAS tabled as defined in UEFI * @revision: the revision of the table * @tas_selection: different options of TAS enablement. * @black_list_size: the number of defined entried in the black list @@ -211,7 +212,8 @@ struct uefi_cnv_var_wtas { u16 black_list[IWL_WTAS_BLACK_LIST_MAX]; } __packed; -/* struct uefi_cnv_var_splc - SPLC tabled as defined in UEFI +/** + * struct uefi_cnv_var_splc - SPLC tabled as defined in UEFI * @revision: the revision of the table * @default_pwr_limit: The default maximum power per device */ @@ -220,7 +222,8 @@ struct uefi_cnv_var_splc { u32 default_pwr_limit; } __packed; -/* struct uefi_cnv_var_wrdd - WRDD table as defined in UEFI +/** + * struct uefi_cnv_var_wrdd - WRDD table as defined in UEFI * @revision: the revision of the table * @mcc: country identifier as defined in ISO/IEC 3166-1 Alpha 2 code */ @@ -229,7 +232,8 @@ struct uefi_cnv_var_wrdd { u32 mcc; } __packed; -/* struct uefi_cnv_var_eckv - ECKV table as defined in UEFI +/** + * struct uefi_cnv_var_eckv - ECKV table as defined in UEFI * @revision: the revision of the table * @ext_clock_valid: indicates if external 32KHz clock is valid */ @@ -240,7 +244,8 @@ struct uefi_cnv_var_eckv { #define UEFI_MAX_DSM_FUNCS 32 -/* struct uefi_cnv_var_general_cfg - DSM-like table as defined in UEFI +/** + * struct uefi_cnv_var_general_cfg - DSM-like table as defined in UEFI * @revision: the revision of the table * @functions: payload of the different DSM functions */ @@ -250,7 +255,9 @@ struct uefi_cnv_var_general_cfg { } __packed; #define IWL_UEFI_WBEM_REV0_MASK (BIT(0) | BIT(1)) -/* struct uefi_cnv_wlan_wbem_data - Bandwidth enablement per MCC as defined + +/** + * struct uefi_cnv_wlan_wbem_data - Bandwidth enablement per MCC as defined * in UEFI * @revision: the revision of the table * @wbem_320mhz_per_mcc: enablement of 320MHz bandwidth per MCC From f199ee6078a9677c872e09bd40c4cea8db6fbc28 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 20:48:54 +0200 Subject: [PATCH 0896/1561] wifi: iwlwifi: remove IWL_MAX_WD_TIMEOUT This define is not used, remove it. Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319204647.15341c4081ed.I639a1d65799ce5502e5c83e8889bcc5eda5ec4dc@changeid --- drivers/net/wireless/intel/iwlwifi/iwl-config.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 45cf2bc68e41..5f40cd15e27f 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -85,7 +85,6 @@ enum iwl_nvm_type { #define IWL_WATCHDOG_DISABLED 0 #define IWL_DEF_WD_TIMEOUT 2500 #define IWL_LONG_WD_TIMEOUT 10000 -#define IWL_MAX_WD_TIMEOUT 120000 #define IWL_DEFAULT_MAX_TX_POWER 22 #define IWL_TX_CSUM_NETIF_FLAGS (NETIF_F_IPV6_CSUM | NETIF_F_IP_CSUM |\ From a8023eb7038cffb3f7f481ee6c26f7f854b33795 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 19 Mar 2026 20:48:55 +0200 Subject: [PATCH 0897/1561] wifi: iwlwifi: mld: remove SCAN_TIMEOUT_MSEC It has no users Signed-off-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260319204647.ea8d4a381474.I82c1d17faa6de6f16f08573ebb180de8db837bee@changeid --- drivers/net/wireless/intel/iwlwifi/mld/scan.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/scan.c b/drivers/net/wireless/intel/iwlwifi/mld/scan.c index 96cd970cceb4..17e0b13b5ce8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/scan.c @@ -47,8 +47,6 @@ /* adaptive dwell number of APs override mask for social channels */ #define IWL_SCAN_ADWELL_N_APS_SOCIAL_CHS_BIT BIT(21) -#define SCAN_TIMEOUT_MSEC (30000 * HZ) - /* minimal number of 2GHz and 5GHz channels in the regular scan request */ #define IWL_MLD_6GHZ_PASSIVE_SCAN_MIN_CHANS 4 From 364273359ef354d315157f78fef1b72f75d08468 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 20 Mar 2026 10:09:04 +0200 Subject: [PATCH 0898/1561] wifi: iwlwifi: mld: enable UHR in TLC Tell the firmware if UHR is supported, including ELR (enhanced long range) MCS support. Note that the spec currently doesn't differentiate between 1.5 and 3 Mbps ELR MCSes, unlike the firmware. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260320100746.7117009d7c39.If4e8cdc63fdf4c5f14d923a5c59fb7b43df72a67@changeid --- drivers/net/wireless/intel/iwlwifi/mld/tlc.c | 28 +++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/tlc.c b/drivers/net/wireless/intel/iwlwifi/mld/tlc.c index 62a54c37a98c..ede385909e38 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/tlc.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/tlc.c @@ -36,7 +36,8 @@ iwl_mld_get_tlc_cmd_flags(struct iwl_mld *mld, struct ieee80211_vif *vif, struct ieee80211_link_sta *link_sta, const struct ieee80211_sta_he_cap *own_he_cap, - const struct ieee80211_sta_eht_cap *own_eht_cap) + const struct ieee80211_sta_eht_cap *own_eht_cap, + const struct ieee80211_sta_uhr_cap *own_uhr_cap) { struct ieee80211_sta_ht_cap *ht_cap = &link_sta->ht_cap; struct ieee80211_sta_vht_cap *vht_cap = &link_sta->vht_cap; @@ -90,6 +91,12 @@ iwl_mld_get_tlc_cmd_flags(struct iwl_mld *mld, flags |= IWL_TLC_MNG_CFG_FLAGS_EHT_EXTRA_LTF_MSK; } + if (link_sta->uhr_cap.has_uhr && own_uhr_cap && + link_sta->uhr_cap.phy.cap & IEEE80211_UHR_PHY_CAP_ELR_RX && + own_uhr_cap->phy.cap & IEEE80211_UHR_PHY_CAP_ELR_TX) + flags |= IWL_TLC_MNG_CFG_FLAGS_UHR_ELR_1_5_MBPS_MSK | + IWL_TLC_MNG_CFG_FLAGS_UHR_ELR_3_MBPS_MSK; + return cpu_to_le16(flags); } @@ -406,6 +413,7 @@ iwl_mld_fill_supp_rates(struct iwl_mld *mld, struct ieee80211_vif *vif, struct ieee80211_supported_band *sband, const struct ieee80211_sta_he_cap *own_he_cap, const struct ieee80211_sta_eht_cap *own_eht_cap, + const struct ieee80211_sta_uhr_cap *own_uhr_cap, struct iwl_tlc_config_cmd *cmd) { int i; @@ -423,7 +431,16 @@ iwl_mld_fill_supp_rates(struct iwl_mld *mld, struct ieee80211_vif *vif, cmd->non_ht_rates = cpu_to_le16(non_ht_rates); cmd->mode = IWL_TLC_MNG_MODE_NON_HT; - if (link_sta->eht_cap.has_eht && own_he_cap && own_eht_cap) { + if (link_sta->uhr_cap.has_uhr && own_uhr_cap) { + cmd->mode = IWL_TLC_MNG_MODE_UHR; + /* + * FIXME: spec currently inherits from EHT but has no + * finer MCS bits. Once that's there, need to add them + * to the bitmaps (and maybe copy this to UHR, or so.) + */ + iwl_mld_fill_eht_rates(vif, link_sta, own_he_cap, + own_eht_cap, cmd); + } else if (link_sta->eht_cap.has_eht && own_he_cap && own_eht_cap) { cmd->mode = IWL_TLC_MNG_MODE_EHT; iwl_mld_fill_eht_rates(vif, link_sta, own_he_cap, own_eht_cap, cmd); @@ -519,13 +536,16 @@ static void iwl_mld_send_tlc_cmd(struct iwl_mld *mld, ieee80211_get_he_iftype_cap_vif(sband, vif); const struct ieee80211_sta_eht_cap *own_eht_cap = ieee80211_get_eht_iftype_cap_vif(sband, vif); + const struct ieee80211_sta_uhr_cap *own_uhr_cap = + ieee80211_get_uhr_iftype_cap_vif(sband, vif); struct iwl_tlc_config_cmd cmd = { /* For AP mode, use 20 MHz until the STA is authorized */ .max_ch_width = mld_sta->sta_state > IEEE80211_STA_ASSOC ? iwl_mld_fw_bw_from_sta_bw(link_sta) : IWL_TLC_MNG_CH_WIDTH_20MHZ, .flags = iwl_mld_get_tlc_cmd_flags(mld, vif, link_sta, - own_he_cap, own_eht_cap), + own_he_cap, own_eht_cap, + own_uhr_cap), .chains = iwl_mld_get_fw_chains(mld), .sgi_ch_width_supp = iwl_mld_get_fw_sgi(link_sta), .max_mpdu_len = cpu_to_le16(link_sta->agg.max_amsdu_len), @@ -555,7 +575,7 @@ static void iwl_mld_send_tlc_cmd(struct iwl_mld *mld, iwl_mld_fill_supp_rates(mld, vif, link_sta, sband, own_he_cap, own_eht_cap, - &cmd); + own_uhr_cap, &cmd); if (cmd_ver == 6) { cmd_ptr = &cmd; From d60cbe9ef68c0fb2a46a5f89e8f77f419f62a1d6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 20 Mar 2026 10:09:05 +0200 Subject: [PATCH 0899/1561] wifi: iwlwifi: mld: set UHR MCS in RX status Handle UHR MCSes in the RX status when receiving UHR frames. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260320100746.7d235ea6a4f2.Ibc8c7e1af45cae2756e4ddcdf6dc5424b3992f7b@changeid --- drivers/net/wireless/intel/iwlwifi/mld/rx.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/rx.c b/drivers/net/wireless/intel/iwlwifi/mld/rx.c index 214dcfde2fb4..ff6e71e3ff6e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/rx.c @@ -1407,6 +1407,7 @@ static void iwl_mld_set_rx_rate(struct iwl_mld *mld, u32 rate_n_flags = phy_data->rate_n_flags; u8 stbc = u32_get_bits(rate_n_flags, RATE_MCS_STBC_MSK); u32 format = rate_n_flags & RATE_MCS_MOD_TYPE_MSK; + u32 he_type = u32_get_bits(rate_n_flags, RATE_MCS_HE_TYPE_MSK); bool is_sgi = rate_n_flags & RATE_MCS_SGI_MSK; /* bandwidth may be overridden to RU by PHY ntfy */ @@ -1481,6 +1482,12 @@ static void iwl_mld_set_rx_rate(struct iwl_mld *mld, rx_status->encoding = RX_ENC_EHT; iwl_mld_set_rx_nonlegacy_rate_info(rate_n_flags, rx_status); break; + case RATE_MCS_MOD_TYPE_UHR: + rx_status->encoding = RX_ENC_UHR; + iwl_mld_set_rx_nonlegacy_rate_info(rate_n_flags, rx_status); + if (he_type == RATE_MCS_HE_TYPE_UHR_ELR) + rx_status->uhr.elr = 1; + break; default: WARN_ON_ONCE(1); } From 7fa6fcfca7ac3e09ae279f63111f59e990126a52 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 20 Mar 2026 10:09:07 +0200 Subject: [PATCH 0900/1561] wifi: iwlwifi: mld: support changing iftype at runtime While the interface isn't really operating, which is already required by mac80211, we can simply remove the MAC and add it again to change the type. Implement this simple handling. We could almost consider moving this to mac80211 itself, as this kind of flow should be supportable by any device, but for now keep it here. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260320100746.2fb530f9d825.I7cc68fa36e40c9f3bef3be9c2982061cb9ea2300@changeid --- .../net/wireless/intel/iwlwifi/mld/mac80211.c | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c index 0c53d6bd9651..79f4dc271c34 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c @@ -754,6 +754,30 @@ void iwl_mld_mac80211_remove_interface(struct ieee80211_hw *hw, mld->monitor.phy.valid = false; } +static +int iwl_mld_mac80211_change_interface(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + enum nl80211_iftype new_type, bool p2p) +{ + enum nl80211_iftype old_type = vif->type; + bool old_p2p = vif->p2p; + int ret; + + iwl_mld_mac80211_remove_interface(hw, vif); + + /* set the new type for adding it cleanly */ + vif->type = new_type; + vif->p2p = p2p; + + ret = iwl_mld_mac80211_add_interface(hw, vif); + + /* restore for mac80211, it will change it again */ + vif->type = old_type; + vif->p2p = old_p2p; + + return ret; +} + struct iwl_mld_mc_iter_data { struct iwl_mld *mld; int port_id; @@ -2716,6 +2740,7 @@ const struct ieee80211_ops iwl_mld_hw_ops = { .get_antenna = iwl_mld_get_antenna, .set_antenna = iwl_mld_set_antenna, .add_interface = iwl_mld_mac80211_add_interface, + .change_interface = iwl_mld_mac80211_change_interface, .remove_interface = iwl_mld_mac80211_remove_interface, .conf_tx = iwl_mld_mac80211_conf_tx, .prepare_multicast = iwl_mld_mac80211_prepare_multicast, From c74abe31142db03f942f12b28033f2b8b14e3acb Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Fri, 20 Mar 2026 10:09:09 +0200 Subject: [PATCH 0901/1561] wifi: iwlwifi: bump core version for BZ/SC/DR Start supporting Core 102 FW on these devices. Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260320100746.3b6540a99c1c.Ie2d3bdb3dc1865ad7c865cdcbeefa41d21ea4482@changeid --- drivers/net/wireless/intel/iwlwifi/cfg/bz.c | 2 +- drivers/net/wireless/intel/iwlwifi/cfg/dr.c | 2 +- drivers/net/wireless/intel/iwlwifi/cfg/sc.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/bz.c b/drivers/net/wireless/intel/iwlwifi/cfg/bz.c index 77db8c75e6e2..3653ddbf3ce9 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/bz.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/bz.c @@ -10,7 +10,7 @@ #include "fw/api/txq.h" /* Highest firmware core release supported */ -#define IWL_BZ_UCODE_CORE_MAX 101 +#define IWL_BZ_UCODE_CORE_MAX 102 /* Lowest firmware API version supported */ #define IWL_BZ_UCODE_API_MIN 100 diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/dr.c b/drivers/net/wireless/intel/iwlwifi/cfg/dr.c index a279dcfd3083..83d893b10f8e 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/dr.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/dr.c @@ -9,7 +9,7 @@ #include "fw/api/txq.h" /* Highest firmware core release supported */ -#define IWL_DR_UCODE_CORE_MAX 101 +#define IWL_DR_UCODE_CORE_MAX 102 /* Lowest firmware API version supported */ #define IWL_DR_UCODE_API_MIN 100 diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c index ee00b2af7a1d..749d46dc0236 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c @@ -10,7 +10,7 @@ #include "fw/api/txq.h" /* Highest firmware core release supported */ -#define IWL_SC_UCODE_CORE_MAX 101 +#define IWL_SC_UCODE_CORE_MAX 102 /* Lowest firmware API version supported */ #define IWL_SC_UCODE_API_MIN 100 From 3fd645e6a153e2ce45928ec32442ab4719c3afa6 Mon Sep 17 00:00:00 2001 From: Pagadala Yesu Anjaneyulu Date: Fri, 20 Mar 2026 10:09:10 +0200 Subject: [PATCH 0902/1561] wifi: iwlwifi: fw: Add TLV support for BIOS revision of command Add support for newer firmware API versions that support multiple BIOS revisions. Use the new TLV provided by firmware to determine which BIOS revision it supports. Future patches will use this information to either drop commands when the BIOS revision is higher than supported or convert commands based on the command specific implementation. Since we are including now nvm-reg.h in img.h, this causes a re-definition error of IWL_NUM_CHANNELS which is also defined in eeprom.c, so rename IWL_NUM_CHANNELS to IWL_NUM_CHANNELS_V2 Signed-off-by: Pagadala Yesu Anjaneyulu Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260320100746.76c8a9589ea0.I7f9157115de702e07511f2c3ed5fcb9ae4c667aa@changeid --- .../wireless/intel/iwlwifi/fw/api/nvm-reg.h | 4 +-- drivers/net/wireless/intel/iwlwifi/fw/file.h | 15 +++++++++ drivers/net/wireless/intel/iwlwifi/fw/img.c | 32 ++++++++++++++++++- drivers/net/wireless/intel/iwlwifi/fw/img.h | 8 +++++ drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 21 ++++++++++++ .../wireless/intel/iwlwifi/iwl-nvm-parse.c | 2 +- 6 files changed, 78 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h index 25c860a05b0e..443a9a416325 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h @@ -204,7 +204,7 @@ struct iwl_nvm_get_info_phy { } __packed; /* REGULATORY_NVM_GET_INFO_PHY_SKU_SECTION_S_VER_1 */ #define IWL_NUM_CHANNELS_V1 51 -#define IWL_NUM_CHANNELS 110 +#define IWL_NUM_CHANNELS_V2 110 /** * struct iwl_nvm_get_info_regulatory_v1 - regulatory information @@ -227,7 +227,7 @@ struct iwl_nvm_get_info_regulatory_v1 { struct iwl_nvm_get_info_regulatory { __le32 lar_enabled; __le32 n_channels; - __le32 channel_profile[IWL_NUM_CHANNELS]; + __le32 channel_profile[IWL_NUM_CHANNELS_V2]; } __packed; /* REGULATORY_NVM_GET_INFO_REGULATORY_S_VER_2 */ /** diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h index 378788de1d74..f7a6f21267e9 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/file.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h @@ -103,6 +103,7 @@ enum iwl_ucode_tlv_type { IWL_UCODE_TLV_D3_KEK_KCK_ADDR = 67, IWL_UCODE_TLV_CURRENT_PC = 68, IWL_UCODE_TLV_FSEQ_BIN_VERSION = 72, + IWL_UCODE_TLV_CMD_BIOS_TABLE = 73, /* contains sub-sections like PNVM file does (did) */ IWL_UCODE_TLV_PNVM_DATA = 74, @@ -1040,6 +1041,20 @@ struct iwl_fw_cmd_version { u8 notif_ver; } __packed; +/** + * struct iwl_fw_cmd_bios_table - firmware command BIOS revision entry + * @cmd: command ID + * @group: group ID + * @max_acpi_revision: max supported ACPI revision of command. + * @max_uefi_revision: max supported UEFI revision of command. + */ +struct iwl_fw_cmd_bios_table { + u8 cmd; + u8 group; + u8 max_acpi_revision; + u8 max_uefi_revision; +} __packed; + struct iwl_fw_tcm_error_addr { __le32 addr; }; /* FW_TLV_TCM_ERROR_INFO_ADDRS_S */ diff --git a/drivers/net/wireless/intel/iwlwifi/fw/img.c b/drivers/net/wireless/intel/iwlwifi/fw/img.c index c2f4fc83a22c..3cc1e3ae0858 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/img.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/img.c @@ -1,11 +1,41 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* * Copyright(c) 2019 - 2021 Intel Corporation - * Copyright(c) 2024 Intel Corporation + * Copyright(c) 2024 - 2025 Intel Corporation */ #include #include "img.h" +u8 iwl_fw_lookup_cmd_bios_supported_revision(const struct iwl_fw *fw, + enum bios_source table_source, + u32 cmd_id, u8 def) +{ + const struct iwl_fw_cmd_bios_table *entry; + /* prior to LONG_GROUP, we never used this CMD version API */ + u8 grp = iwl_cmd_groupid(cmd_id) ?: LONG_GROUP; + u8 cmd = iwl_cmd_opcode(cmd_id); + + if (table_source != BIOS_SOURCE_ACPI && + table_source != BIOS_SOURCE_UEFI) + return def; + + if (!fw->ucode_capa.cmd_bios_tables || + !fw->ucode_capa.n_cmd_bios_tables) + return def; + + entry = fw->ucode_capa.cmd_bios_tables; + for (int i = 0; i < fw->ucode_capa.n_cmd_bios_tables; i++, entry++) { + if (entry->group == grp && entry->cmd == cmd) { + if (table_source == BIOS_SOURCE_ACPI) + return entry->max_acpi_revision; + return entry->max_uefi_revision; + } + } + + return def; +} +EXPORT_SYMBOL_GPL(iwl_fw_lookup_cmd_bios_supported_revision); + u8 iwl_fw_lookup_cmd_ver(const struct iwl_fw *fw, u32 cmd_id, u8 def) { const struct iwl_fw_cmd_version *entry; diff --git a/drivers/net/wireless/intel/iwlwifi/fw/img.h b/drivers/net/wireless/intel/iwlwifi/fw/img.h index 045a3e009429..94113d1db8e1 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/img.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/img.h @@ -9,6 +9,7 @@ #include #include "api/dbg-tlv.h" +#include "api/nvm-reg.h" #include "file.h" #include "error-dump.h" @@ -57,6 +58,9 @@ struct iwl_ucode_capabilities { const struct iwl_fw_cmd_version *cmd_versions; u32 n_cmd_versions; + + const struct iwl_fw_cmd_bios_table *cmd_bios_tables; + u32 n_cmd_bios_tables; }; static inline bool @@ -274,6 +278,10 @@ iwl_get_ucode_image(const struct iwl_fw *fw, enum iwl_ucode_type ucode_type) return &fw->img[ucode_type]; } +u8 iwl_fw_lookup_cmd_bios_supported_revision(const struct iwl_fw *fw, + enum bios_source table_source, + u32 cmd_id, u8 def); + u8 iwl_fw_lookup_cmd_ver(const struct iwl_fw *fw, u32 cmd_id, u8 def); u8 iwl_fw_lookup_notif_ver(const struct iwl_fw *fw, u8 grp, u8 cmd, u8 def); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index 475b3e417efa..4cdd0fe1b788 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -133,6 +133,7 @@ static void iwl_dealloc_ucode(struct iwl_drv *drv) kfree(drv->fw.dbg.mem_tlv); kfree(drv->fw.iml); kfree(drv->fw.ucode_capa.cmd_versions); + kfree(drv->fw.ucode_capa.cmd_bios_tables); kfree(drv->fw.phy_integration_ver); kfree(drv->trans->dbg.pc_data); drv->trans->dbg.pc_data = NULL; @@ -1426,6 +1427,26 @@ static int iwl_parse_tlv_firmware(struct iwl_drv *drv, return -ENOMEM; drv->fw.pnvm_size = tlv_len; break; + case IWL_UCODE_TLV_CMD_BIOS_TABLE: + if (tlv_len % sizeof(struct iwl_fw_cmd_bios_table)) { + IWL_ERR(drv, + "Invalid length for command bios table: %u\n", + tlv_len); + return -EINVAL; + } + + if (capa->cmd_bios_tables) { + IWL_ERR(drv, "Duplicate TLV type 0x%02X detected\n", + tlv_type); + return -EINVAL; + } + capa->cmd_bios_tables = kmemdup(tlv_data, tlv_len, + GFP_KERNEL); + if (!capa->cmd_bios_tables) + return -ENOMEM; + capa->n_cmd_bios_tables = + tlv_len / sizeof(struct iwl_fw_cmd_bios_table); + break; default: IWL_DEBUG_INFO(drv, "unknown TLV: %d\n", tlv_type); break; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 6d235c417fdd..e5b08c4e7f15 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -2031,7 +2031,7 @@ struct iwl_nvm_data *iwl_get_nvm(struct iwl_trans *trans, if (empty_otp) IWL_INFO(trans, "OTP is empty\n"); - nvm = kzalloc_flex(*nvm, channels, IWL_NUM_CHANNELS); + nvm = kzalloc_flex(*nvm, channels, IWL_NUM_CHANNELS_V2); if (!nvm) { ret = -ENOMEM; goto out; From 5e35b749fe25ee963b5766d2c217e6f3b0e41b18 Mon Sep 17 00:00:00 2001 From: Pagadala Yesu Anjaneyulu Date: Fri, 20 Mar 2026 10:09:11 +0200 Subject: [PATCH 0903/1561] wifi: iwlwifi: mld: eliminate duplicate WIDE_ID in PPAG command handling Extract the PER_PLATFORM_ANT_GAIN_CMD command ID into a local variable to avoid duplicating WIDE_ID(PHY_OPS_GROUP, PER_PLATFORM_ANT_GAIN_CMD). Signed-off-by: Pagadala Yesu Anjaneyulu Reviewed-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260320100746.7b7e6315e2cc.Icffcc47ac1e876708b6219a89fd546a018797d44@changeid --- drivers/net/wireless/intel/iwlwifi/mld/regulatory.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c b/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c index 2486d78d6fc3..f91f61ca9b2e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c @@ -211,10 +211,8 @@ static int iwl_mld_ppag_send_cmd(struct iwl_mld *mld) .v8.ppag_config_info.hdr.table_revision = fwrt->ppag_bios_rev, .v8.ppag_config_info.value = cpu_to_le32(fwrt->ppag_flags), }; - int cmd_ver = - iwl_fw_lookup_cmd_ver(mld->fw, - WIDE_ID(PHY_OPS_GROUP, - PER_PLATFORM_ANT_GAIN_CMD), 1); + u32 cmd_id = WIDE_ID(PHY_OPS_GROUP, PER_PLATFORM_ANT_GAIN_CMD); + int cmd_ver = iwl_fw_lookup_cmd_ver(mld->fw, cmd_id, 1); int cmd_len = sizeof(cmd.v8); int ret; @@ -271,9 +269,7 @@ static int iwl_mld_ppag_send_cmd(struct iwl_mld *mld) } IWL_DEBUG_RADIO(mld, "Sending PER_PLATFORM_ANT_GAIN_CMD\n"); - ret = iwl_mld_send_cmd_pdu(mld, WIDE_ID(PHY_OPS_GROUP, - PER_PLATFORM_ANT_GAIN_CMD), - &cmd, cmd_len); + ret = iwl_mld_send_cmd_pdu(mld, cmd_id, &cmd, cmd_len); if (ret < 0) IWL_ERR(mld, "failed to send PER_PLATFORM_ANT_GAIN_CMD (%d)\n", ret); From 454e9141ae96691ad380f7f3f7d11fe652a03cb5 Mon Sep 17 00:00:00 2001 From: Avinash Bhatt Date: Fri, 20 Mar 2026 10:09:12 +0200 Subject: [PATCH 0904/1561] wifi: iwlwifi: add CQM event support for per-link RSSI changes Implement CQM RSSI threshold handling by tracking the last reported RSSI and issuing CQM low/high events when the RSSI crosses the configured threshold with the required hysteresis. This provides proper CQM support and enables userspace to receive per-link RSSI notifications. Signed-off-by: Avinash Bhatt Reviewed-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Reviewed-by: Pagadala Yesu Anjaneyulu Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260320100746.08697e34bf66.Ic1a68537ef0d37be62c73c138efe9c5cf09bd24c@changeid --- drivers/net/wireless/intel/iwlwifi/mld/link.h | 2 ++ .../net/wireless/intel/iwlwifi/mld/stats.c | 26 ++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/link.h b/drivers/net/wireless/intel/iwlwifi/mld/link.h index 9e4da8e4de93..ca691259fc5e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/link.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/link.h @@ -40,6 +40,7 @@ struct iwl_probe_resp_data { * @bcast_sta: station used for broadcast packets. Used in AP, GO and IBSS. * @mcast_sta: station used for multicast packets. Used in AP, GO and IBSS. * @mon_sta: station used for TX injection in monitor interface. + * @last_cqm_rssi_event: rssi of the last cqm rssi event * @average_beacon_energy: average beacon energy for beacons received during * client connections * @ap_early_keys: The firmware cannot install keys before bcast/mcast STAs, @@ -66,6 +67,7 @@ struct iwl_mld_link { struct iwl_mld_int_sta bcast_sta; struct iwl_mld_int_sta mcast_sta; struct iwl_mld_int_sta mon_sta; + int last_cqm_rssi_event; /* we can only have 2 GTK + 2 IGTK + 2 BIGTK active at a time */ struct ieee80211_key_conf *ap_early_keys[6]; diff --git a/drivers/net/wireless/intel/iwlwifi/mld/stats.c b/drivers/net/wireless/intel/iwlwifi/mld/stats.c index 7b8709716324..9b3149b9d2c2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/stats.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/stats.c @@ -369,15 +369,39 @@ out: static void iwl_mld_update_link_sig(struct ieee80211_vif *vif, int sig, struct ieee80211_bss_conf *bss_conf) { + struct iwl_mld_link *link = iwl_mld_link_from_mac80211(bss_conf); struct iwl_mld *mld = iwl_mld_vif_from_mac80211(vif)->mld; int exit_emlsr_thresh; + int last_event; if (sig == 0) { IWL_DEBUG_RX(mld, "RSSI is 0 - skip signal based decision\n"); return; } - /* TODO: task=statistics handle CQM notifications */ + if (WARN_ON(!link)) + return; + + /* CQM Notification */ + if (vif->driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI) { + int thold = bss_conf->cqm_rssi_thold; + int hyst = bss_conf->cqm_rssi_hyst; + + last_event = link->last_cqm_rssi_event; + if (thold && sig < thold && + (last_event == 0 || sig < last_event - hyst)) { + link->last_cqm_rssi_event = sig; + ieee80211_cqm_rssi_notify(vif, + NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW, + sig, GFP_KERNEL); + } else if (sig > thold && + (last_event == 0 || sig > last_event + hyst)) { + link->last_cqm_rssi_event = sig; + ieee80211_cqm_rssi_notify(vif, + NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH, + sig, GFP_KERNEL); + } + } if (!iwl_mld_emlsr_active(vif)) { /* We're not in EMLSR and our signal is bad, From 6e6d8f344dbb00f232e73dbd0813cfabdd35b15f Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Fri, 20 Mar 2026 10:09:13 +0200 Subject: [PATCH 0905/1561] wifi: iwlwifi: validate the channels received in iwl_mcc_update_resp_v* Check with IWL_FW_CHECK that the FW doesn't send a channel that we don't support. Otherwise, the center frequency will be 0, leading to a warning since is_valid_reg_rule will return false, of course. Although the warning is verbose enough, the IWL_FW_CHECK will spare some of the debug. Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260320100746.0e83cdd88cea.Ic86852e622ed3ec06110f9e6525f72679236cf1e@changeid --- drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index e5b08c4e7f15..8f3f651451bb 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -23,6 +23,8 @@ #include "fw/api/commands.h" #include "fw/api/cmdhdr.h" #include "fw/img.h" +#include "fw/dbg.h" + #include "mei/iwl-mei.h" /* NVM offsets (in words) definitions */ @@ -1702,6 +1704,11 @@ iwl_parse_nvm_mcc_info(struct iwl_trans *trans, band); new_rule = false; + if (IWL_FW_CHECK(trans, !center_freq, + "Invalid channel %d (idx %d) in NVM\n", + nvm_chan[ch_idx], ch_idx)) + continue; + if (!(ch_flags & NVM_CHANNEL_VALID)) { iwl_nvm_print_channel_flags(dev, IWL_DL_LAR, nvm_chan[ch_idx], ch_flags); From 4aece67f1cb049b5f42e18d76979a558a5705890 Mon Sep 17 00:00:00 2001 From: Pagadala Yesu Anjaneyulu Date: Fri, 20 Mar 2026 10:09:14 +0200 Subject: [PATCH 0906/1561] wifi: iwlwifi: mld: add BIOS revision compatibility check for PPAG command Prevent potential issues when newer BIOS revisions are used with firmware that doesn't support them for PER_PLATFORM_ANT_GAIN_CMD. Without this check, the driver may attempt to use BIOS configurations that are incompatible with the current firmware version, leading to dropping of command in firmware without any failure notification to driver. Signed-off-by: Pagadala Yesu Anjaneyulu Reviewed-by: Emmanuel Grumbach Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260320100746.79bf2bf398d0.I8161dbe1a04af3738e00ab0fc13fe3dbfa9094ec@changeid --- .../net/wireless/intel/iwlwifi/mld/regulatory.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c b/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c index f91f61ca9b2e..659243ada86c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/regulatory.c @@ -214,6 +214,7 @@ static int iwl_mld_ppag_send_cmd(struct iwl_mld *mld) u32 cmd_id = WIDE_ID(PHY_OPS_GROUP, PER_PLATFORM_ANT_GAIN_CMD); int cmd_ver = iwl_fw_lookup_cmd_ver(mld->fw, cmd_id, 1); int cmd_len = sizeof(cmd.v8); + u8 cmd_bios_rev; int ret; BUILD_BUG_ON(offsetof(typeof(cmd), v8.ppag_config_info.hdr) != @@ -249,6 +250,10 @@ static int iwl_mld_ppag_send_cmd(struct iwl_mld *mld) } } cmd_len = sizeof(cmd.v7); + cmd_bios_rev = + iwl_fw_lookup_cmd_bios_supported_revision(fwrt->fw, + fwrt->ppag_bios_source, + cmd_id, 4); } else if (cmd_ver == 8) { for (int chain = 0; chain < ARRAY_SIZE(cmd.v8.gain); chain++) { for (int subband = 0; @@ -262,12 +267,23 @@ static int iwl_mld_ppag_send_cmd(struct iwl_mld *mld) cmd.v8.gain[chain][subband]); } } + cmd_bios_rev = + iwl_fw_lookup_cmd_bios_supported_revision(fwrt->fw, + fwrt->ppag_bios_source, + cmd_id, 5); } else { WARN(1, "Bad version for PER_PLATFORM_ANT_GAIN_CMD %d\n", cmd_ver); return -EINVAL; } + if (cmd_bios_rev < fwrt->ppag_bios_rev) { + IWL_ERR(mld, + "BIOS revision compatibility check failed - Supported: %d, Current: %d\n", + cmd_bios_rev, fwrt->ppag_bios_rev); + return 0; + } + IWL_DEBUG_RADIO(mld, "Sending PER_PLATFORM_ANT_GAIN_CMD\n"); ret = iwl_mld_send_cmd_pdu(mld, cmd_id, &cmd, cmd_len); if (ret < 0) From 1793f23177b6e0543618051f00a402f0dbf5f88c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 20 Mar 2026 10:09:15 +0200 Subject: [PATCH 0907/1561] wifi: iwlwifi: use IWL_FW_CHECK for sync timeout This could be a firmware issue, it didn't send all the responses quickly enough. There are other potential issues (interrupts not being delivered, etc.) but the FW debug data will at least give some better information, and it's not a WARN condition anyway. Signed-off-by: Johannes Berg Reviewed-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260320100746.2188e2efbead.I7dc5bd6f581a31ac51d8a854f3b3af4cb980223a@changeid --- drivers/net/wireless/intel/iwlwifi/mld/rx.c | 5 +++-- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 7 ++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/rx.c b/drivers/net/wireless/intel/iwlwifi/mld/rx.c index ff6e71e3ff6e..6f40d6e47083 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/rx.c @@ -2211,8 +2211,9 @@ void iwl_mld_sync_rx_queues(struct iwl_mld *mld, ret = wait_event_timeout(mld->rxq_sync.waitq, READ_ONCE(mld->rxq_sync.state) == 0, SYNC_RX_QUEUE_TIMEOUT); - WARN_ONCE(!ret, "RXQ sync failed: state=0x%lx, cookie=%d\n", - mld->rxq_sync.state, mld->rxq_sync.cookie); + IWL_FW_CHECK(mld, !ret, + "RXQ sync failed: state=0x%lx, cookie=%d\n", + mld->rxq_sync.state, mld->rxq_sync.cookie); out: mld->rxq_sync.state = 0; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 090791fe0638..1ec9807e4827 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -6229,9 +6229,10 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm, ret = wait_event_timeout(mvm->rx_sync_waitq, READ_ONCE(mvm->queue_sync_state) == 0, SYNC_RX_QUEUE_TIMEOUT); - WARN_ONCE(!ret, "queue sync: failed to sync, state is 0x%lx, cookie %d\n", - mvm->queue_sync_state, - mvm->queue_sync_cookie); + IWL_FW_CHECK(mvm, !ret, + "queue sync: failed to sync, state is 0x%lx, cookie %d\n", + mvm->queue_sync_state, + mvm->queue_sync_cookie); } out: From 4a481720106d6bad1521d0e0322fd74fa2f6c464 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 20 Mar 2026 10:09:16 +0200 Subject: [PATCH 0908/1561] wifi: iwlwifi: pcie: don't dump on reset handshake in dump When a FW dump happens, possibly even because of a reset handshake timeout, there's no point in attempting to dump again. Since all the callers of the function outside the transport itself are from the FW dump infrastructure, just split the internal function and make the external one not dump on timeout. Signed-off-by: Johannes Berg Reviewed-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260320100746.f36ba3893899.I063ccc3a037ae6dabcde61941acb162c4b33f127@changeid --- .../wireless/intel/iwlwifi/pcie/gen1_2/trans-gen2.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans-gen2.c index b15c5d486527..a50e845cea42 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans-gen2.c @@ -95,7 +95,9 @@ static void iwl_pcie_gen2_apm_stop(struct iwl_trans *trans, bool op_mode_leave) CSR_GP_CNTRL_REG_FLAG_INIT_DONE); } -void iwl_trans_pcie_fw_reset_handshake(struct iwl_trans *trans) +static void +_iwl_trans_pcie_fw_reset_handshake(struct iwl_trans *trans, + bool dump_on_timeout) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); int ret; @@ -133,7 +135,7 @@ void iwl_trans_pcie_fw_reset_handshake(struct iwl_trans *trans) "timeout waiting for FW reset ACK (inta_hw=0x%x, reset_done %d)\n", inta_hw, reset_done); - if (!reset_done) { + if (!reset_done && dump_on_timeout) { struct iwl_fw_error_dump_mode mode = { .type = IWL_ERR_TYPE_RESET_HS_TIMEOUT, .context = IWL_ERR_CONTEXT_FROM_OPMODE, @@ -147,6 +149,11 @@ void iwl_trans_pcie_fw_reset_handshake(struct iwl_trans *trans) trans_pcie->fw_reset_state = FW_RESET_IDLE; } +void iwl_trans_pcie_fw_reset_handshake(struct iwl_trans *trans) +{ + _iwl_trans_pcie_fw_reset_handshake(trans, false); +} + static void _iwl_trans_pcie_gen2_stop_device(struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); @@ -163,7 +170,7 @@ static void _iwl_trans_pcie_gen2_stop_device(struct iwl_trans *trans) * should assume that the firmware is already dead. */ trans->state = IWL_TRANS_NO_FW; - iwl_trans_pcie_fw_reset_handshake(trans); + _iwl_trans_pcie_fw_reset_handshake(trans, true); } trans_pcie->is_down = true; From d86aeb5f6fbcdb4b678380fa4b02c663e394488e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 20 Mar 2026 10:09:17 +0200 Subject: [PATCH 0909/1561] wifi: iwlwifi: mld: make iwl_mld_mac80211_iftype_to_fw() static This function is only used within the file, so make it static. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260320100746.45867b060b3d.Iee64056fab7881ea5146433bacef8c2e936c45b1@changeid --- drivers/net/wireless/intel/iwlwifi/mld/iface.c | 2 +- drivers/net/wireless/intel/iwlwifi/mld/iface.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/iface.c b/drivers/net/wireless/intel/iwlwifi/mld/iface.c index 29df747c8938..5c7f2435ecb9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/iface.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/iface.c @@ -74,7 +74,7 @@ static int iwl_mld_send_mac_cmd(struct iwl_mld *mld, return ret; } -int iwl_mld_mac80211_iftype_to_fw(const struct ieee80211_vif *vif) +static int iwl_mld_mac80211_iftype_to_fw(const struct ieee80211_vif *vif) { switch (vif->type) { case NL80211_IFTYPE_STATION: diff --git a/drivers/net/wireless/intel/iwlwifi/mld/iface.h b/drivers/net/wireless/intel/iwlwifi/mld/iface.h index 62fca166afd1..3e106c93f0db 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/iface.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/iface.h @@ -219,8 +219,6 @@ iwl_mld_link_from_mac80211(struct ieee80211_bss_conf *bss_conf) return iwl_mld_link_dereference_check(mld_vif, bss_conf->link_id); } -int iwl_mld_mac80211_iftype_to_fw(const struct ieee80211_vif *vif); - /* Cleanup function for struct iwl_mld_vif, will be called in restart */ void iwl_mld_cleanup_vif(void *data, u8 *mac, struct ieee80211_vif *vif); int iwl_mld_mac_fw_action(struct iwl_mld *mld, struct ieee80211_vif *vif, From f1011219309730a26e13e9b190168fceef6b8679 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 20 Mar 2026 10:09:18 +0200 Subject: [PATCH 0910/1561] wifi: iwlwifi: mld: remove type argument from iwl_mld_add_sta() This is used only in a single place, and the caller always sets the type to STATION_TYPE_PEER right now. We need to change some of this for NAN in the future, removing the type argument will simplify that. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260320100746.71841a054f16.I1851148e582eb710261740459a46d22720788926@changeid --- drivers/net/wireless/intel/iwlwifi/mld/mac80211.c | 2 +- drivers/net/wireless/intel/iwlwifi/mld/sta.c | 4 ++-- drivers/net/wireless/intel/iwlwifi/mld/sta.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c index 79f4dc271c34..8ab56788a491 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c @@ -1751,7 +1751,7 @@ static int iwl_mld_move_sta_state_up(struct iwl_mld *mld, return -EBUSY; } - ret = iwl_mld_add_sta(mld, sta, vif, STATION_TYPE_PEER); + ret = iwl_mld_add_sta(mld, sta, vif); if (ret) return ret; diff --git a/drivers/net/wireless/intel/iwlwifi/mld/sta.c b/drivers/net/wireless/intel/iwlwifi/mld/sta.c index f40c49377466..619f302076ad 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/sta.c @@ -755,14 +755,14 @@ iwl_mld_init_sta(struct iwl_mld *mld, struct ieee80211_sta *sta, } int iwl_mld_add_sta(struct iwl_mld *mld, struct ieee80211_sta *sta, - struct ieee80211_vif *vif, enum iwl_fw_sta_type type) + struct ieee80211_vif *vif) { struct iwl_mld_sta *mld_sta = iwl_mld_sta_from_mac80211(sta); struct ieee80211_link_sta *link_sta; int link_id; int ret; - ret = iwl_mld_init_sta(mld, sta, vif, type); + ret = iwl_mld_init_sta(mld, sta, vif, STATION_TYPE_PEER); if (ret) return ret; diff --git a/drivers/net/wireless/intel/iwlwifi/mld/sta.h b/drivers/net/wireless/intel/iwlwifi/mld/sta.h index 1897b121aae2..5f6c440bf058 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/sta.h @@ -190,7 +190,7 @@ iwl_mld_link_sta_from_mac80211(struct ieee80211_link_sta *link_sta) } int iwl_mld_add_sta(struct iwl_mld *mld, struct ieee80211_sta *sta, - struct ieee80211_vif *vif, enum iwl_fw_sta_type type); + struct ieee80211_vif *vif); void iwl_mld_remove_sta(struct iwl_mld *mld, struct ieee80211_sta *sta); int iwl_mld_fw_sta_id_from_link_sta(struct iwl_mld *mld, struct ieee80211_link_sta *link_sta); From bac6907b2dadae522dfbe25faa365f1b081adf6a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 21 Mar 2026 19:29:08 +0200 Subject: [PATCH 0911/1561] wifi: iwlwifi: mld: rename iwl_mld_phy_from_mac80211() argument Calling the channel context just "channel" is confusing since it's a different struct, rename it to the more appropriate "chanctx". Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260321192637.b2cf8cfd5902.I9e0006481454445058b96ec3e7ae338e917e2c50@changeid --- drivers/net/wireless/intel/iwlwifi/mld/phy.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/phy.h b/drivers/net/wireless/intel/iwlwifi/mld/phy.h index 0deaf179f07c..6887f9feaa5c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/phy.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/phy.h @@ -32,9 +32,9 @@ struct iwl_mld_phy { }; static inline struct iwl_mld_phy * -iwl_mld_phy_from_mac80211(struct ieee80211_chanctx_conf *channel) +iwl_mld_phy_from_mac80211(struct ieee80211_chanctx_conf *chanctx) { - return (void *)channel->drv_priv; + return (void *)chanctx->drv_priv; } /* Cleanup function for struct iwl_mld_phy, will be called in restart */ From 282fb8c6b1d1a8aa260c467eb067f0c94c80486f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 21 Mar 2026 19:29:09 +0200 Subject: [PATCH 0912/1561] wifi: iwlwifi: mld: make alloc functions not forced static In preparation for NAN needing the link ID allocation, have the macro not automatically make the ID allocation functions static so we can remove that later from the link allocation function. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260321192637.cbfd202c255f.I4dd4d4416d30bed35bc7b7caa3de50071906830a@changeid --- drivers/net/wireless/intel/iwlwifi/mld/iface.c | 2 +- drivers/net/wireless/intel/iwlwifi/mld/link.c | 2 +- drivers/net/wireless/intel/iwlwifi/mld/mld.h | 4 ++-- drivers/net/wireless/intel/iwlwifi/mld/sta.c | 2 +- drivers/net/wireless/intel/iwlwifi/mld/tests/utils.c | 6 +++--- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/iface.c b/drivers/net/wireless/intel/iwlwifi/mld/iface.c index 5c7f2435ecb9..968247977605 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/iface.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/iface.c @@ -386,7 +386,7 @@ static void iwl_mld_mlo_scan_start_wk(struct wiphy *wiphy, iwl_mld_int_mlo_scan(mld, iwl_mld_vif_to_mac80211(mld_vif)); } -IWL_MLD_ALLOC_FN(vif, vif) +static IWL_MLD_ALLOC_FN(vif, vif) /* Constructor function for struct iwl_mld_vif */ static void diff --git a/drivers/net/wireless/intel/iwlwifi/mld/link.c b/drivers/net/wireless/intel/iwlwifi/mld/link.c index b5430e8a73d6..b66e84d2365f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/link.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/link.c @@ -437,7 +437,7 @@ iwl_mld_rm_link_from_fw(struct iwl_mld *mld, struct ieee80211_bss_conf *link) iwl_mld_send_link_cmd(mld, &cmd, FW_CTXT_ACTION_REMOVE); } -IWL_MLD_ALLOC_FN(link, bss_conf) +static IWL_MLD_ALLOC_FN(link, bss_conf) /* Constructor function for struct iwl_mld_link */ static int diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.h b/drivers/net/wireless/intel/iwlwifi/mld/mld.h index 66c7a7d31409..ea3d1fab6f46 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.h @@ -530,9 +530,9 @@ void iwl_construct_mld(struct iwl_mld *mld, struct iwl_trans *trans, #define IWL_MLD_INVALID_FW_ID 0xff #define IWL_MLD_ALLOC_FN(_type, _mac80211_type) \ -static int \ +int \ iwl_mld_allocate_##_type##_fw_id(struct iwl_mld *mld, \ - u8 *fw_id, \ + u8 *fw_id, \ struct ieee80211_##_mac80211_type *mac80211_ptr) \ { \ u8 rand = IWL_MLD_DIS_RANDOM_FW_ID ? 0 : get_random_u8(); \ diff --git a/drivers/net/wireless/intel/iwlwifi/mld/sta.c b/drivers/net/wireless/intel/iwlwifi/mld/sta.c index 619f302076ad..eda2cbbb3b30 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/sta.c @@ -528,7 +528,7 @@ iwl_mld_add_modify_sta_cmd(struct iwl_mld *mld, return iwl_mld_send_sta_cmd(mld, &cmd); } -IWL_MLD_ALLOC_FN(link_sta, link_sta) +static IWL_MLD_ALLOC_FN(link_sta, link_sta) static int iwl_mld_add_link_sta(struct iwl_mld *mld, struct ieee80211_link_sta *link_sta) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/tests/utils.c b/drivers/net/wireless/intel/iwlwifi/mld/tests/utils.c index 26cf27be762d..176dbbf4c643 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/tests/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/tests/utils.c @@ -68,7 +68,7 @@ int iwlmld_kunit_test_init(struct kunit *test) return 0; } -IWL_MLD_ALLOC_FN(link, bss_conf) +static IWL_MLD_ALLOC_FN(link, bss_conf) static void iwlmld_kunit_init_link(struct ieee80211_vif *vif, struct ieee80211_bss_conf *link, @@ -94,7 +94,7 @@ static void iwlmld_kunit_init_link(struct ieee80211_vif *vif, rcu_assign_pointer(vif->link_conf[link_id], link); } -IWL_MLD_ALLOC_FN(vif, vif) +static IWL_MLD_ALLOC_FN(vif, vif) /* Helper function to add and initialize a VIF for KUnit tests */ struct ieee80211_vif *iwlmld_kunit_add_vif(bool mlo, enum nl80211_iftype type) @@ -199,7 +199,7 @@ void iwlmld_kunit_assign_chanctx_to_link(struct ieee80211_vif *vif, vif->active_links |= BIT(link->link_id); } -IWL_MLD_ALLOC_FN(link_sta, link_sta) +static IWL_MLD_ALLOC_FN(link_sta, link_sta) static void iwlmld_kunit_add_link_sta(struct ieee80211_sta *sta, struct ieee80211_link_sta *link_sta, From 350d91a2ae5ae88a5eae257c8f0d2c33e077f9fc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 21 Mar 2026 19:29:10 +0200 Subject: [PATCH 0913/1561] wifi: iwlwifi: mld: add double-include guards to nan.h This is missing, but needed when we want to add data structures to this file. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260321192637.4e09d461db6a.If5c14c495b14a20ce7abadc72be57a40d3462bfb@changeid --- drivers/net/wireless/intel/iwlwifi/mld/nan.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/nan.h b/drivers/net/wireless/intel/iwlwifi/mld/nan.h index c9c83d1012f0..c04d77208971 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/nan.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/nan.h @@ -2,7 +2,8 @@ /* * Copyright (C) 2025 Intel Corporation */ - +#ifndef __iwl_mld_nan_h__ +#define __iwl_mld_nan_h__ #include #include @@ -26,3 +27,5 @@ bool iwl_mld_cancel_nan_cluster_notif(struct iwl_mld *mld, bool iwl_mld_cancel_nan_dw_end_notif(struct iwl_mld *mld, struct iwl_rx_packet *pkt, u32 obj_id); + +#endif /* __iwl_mld_nan_h__ */ From 19a86a3ff3cc400767cd0d23932c763d6e0da53e Mon Sep 17 00:00:00 2001 From: Avinash Bhatt Date: Sat, 21 Mar 2026 19:29:11 +0200 Subject: [PATCH 0914/1561] wifi: iwlwifi: handle NULL/ERR returns from ptp_clock_register() ptp_clock_register() returns NULL when PTP support is disabled and may return an ERR_PTR() on other failures. Reduce Log severity for NULL return cases to avoid misleading errors when PTP is unavailable. Signed-off-by: Avinash Bhatt Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260321192637.adea594600e8.I0e3d3f7ce897c54fff8ace6dd0faf55b4f39832b@changeid --- drivers/net/wireless/intel/iwlwifi/mld/ptp.c | 4 +++- drivers/net/wireless/intel/iwlwifi/mvm/ptp.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/ptp.c b/drivers/net/wireless/intel/iwlwifi/mld/ptp.c index 231920425c06..c65f4b56a327 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/ptp.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/ptp.c @@ -301,10 +301,12 @@ void iwl_mld_ptp_init(struct iwl_mld *mld) mld->ptp_data.ptp_clock = ptp_clock_register(&mld->ptp_data.ptp_clock_info, mld->dev); - if (IS_ERR_OR_NULL(mld->ptp_data.ptp_clock)) { + if (IS_ERR(mld->ptp_data.ptp_clock)) { IWL_ERR(mld, "Failed to register PHC clock (%ld)\n", PTR_ERR(mld->ptp_data.ptp_clock)); mld->ptp_data.ptp_clock = NULL; + } else if (!mld->ptp_data.ptp_clock) { + IWL_DEBUG_INFO(mld, "PTP module unavailable on this kernel\n"); } else { IWL_DEBUG_INFO(mld, "Registered PHC clock: %s, with index: %d\n", mld->ptp_data.ptp_clock_info.name, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ptp.c b/drivers/net/wireless/intel/iwlwifi/mvm/ptp.c index ad156b82eaa9..f7b620136c85 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ptp.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ptp.c @@ -304,7 +304,9 @@ void iwl_mvm_ptp_init(struct iwl_mvm *mvm) IWL_ERR(mvm, "Failed to register PHC clock (%ld)\n", PTR_ERR(mvm->ptp_data.ptp_clock)); mvm->ptp_data.ptp_clock = NULL; - } else if (mvm->ptp_data.ptp_clock) { + } else if (!mvm->ptp_data.ptp_clock) { + IWL_DEBUG_INFO(mvm, "PTP module unavailable on this kernel\n"); + } else { IWL_DEBUG_INFO(mvm, "Registered PHC clock: %s, with index: %d\n", mvm->ptp_data.ptp_clock_info.name, ptp_clock_index(mvm->ptp_data.ptp_clock)); From 62e4d33c7d6945867062f45d09b99dda0dd04108 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 21 Mar 2026 19:29:12 +0200 Subject: [PATCH 0915/1561] wifi: iwlwifi: add MAC context command version 4 Due to NAN additions, this command needs to grow. In iwlmvm we just need to use the old _v3 (or v2) version, but iwlmld needs to handle the difference and send both. Do that as a first step towards adding NAN support. Signed-off-by: Johannes Berg Reviewed-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260321192637.5ab609ca1966.I860737f952865bd0b997f1c190c3891864c7c6ba@changeid --- .../wireless/intel/iwlwifi/fw/api/mac-cfg.h | 64 +++++++++++++++++-- .../net/wireless/intel/iwlwifi/fw/api/mac.h | 6 +- .../net/wireless/intel/iwlwifi/mld/iface.c | 13 +++- .../net/wireless/intel/iwlwifi/mvm/mld-mac.c | 18 +++--- 4 files changed, 81 insertions(+), 20 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h index 2e3f437686b9..180eb8227582 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h @@ -34,7 +34,8 @@ enum iwl_mac_conf_subcmd_ids { */ CANCEL_CHANNEL_SWITCH_CMD = 0x6, /** - * @MAC_CONFIG_CMD: &struct iwl_mac_config_cmd + * @MAC_CONFIG_CMD: &struct iwl_mac_config_cmd_v3 or + * &struct iwl_mac_config_cmd */ MAC_CONFIG_CMD = 0x8, /** @@ -357,7 +358,7 @@ struct iwl_mac_wifi_gen_support { } __packed; /** - * struct iwl_mac_config_cmd - command structure to configure MAC contexts in + * struct iwl_mac_config_cmd_v3 - command structure to configure MAC contexts in * MLD API for versions 2 and 3 * ( MAC_CONTEXT_CONFIG_CMD = 0x8 ) * @@ -376,7 +377,7 @@ struct iwl_mac_wifi_gen_support { * @client: client mac data * @p2p_dev: mac data for p2p device */ -struct iwl_mac_config_cmd { +struct iwl_mac_config_cmd_v3 { __le32 id_and_color; __le32 action; /* MAC_CONTEXT_TYPE_API_E */ @@ -394,7 +395,62 @@ struct iwl_mac_config_cmd { struct iwl_mac_client_data client; struct iwl_mac_p2p_dev_data p2p_dev; }; -} __packed; /* MAC_CONTEXT_CONFIG_CMD_API_S_VER_2_VER_3 */ +} __packed; /* MAC_CONTEXT_CONFIG_CMD_API_S_VER_2, _VER_3 */ + +/** + * struct iwl_mac_nan_data - NAN specific MAC data + * @ndi_addrs: extra NDI addresses being used + * @ndi_addrs_count: number of extra NDI addresses + */ +struct iwl_mac_nan_data { + struct { + u8 addr[ETH_ALEN]; + __le16 reserved; + } __packed ndi_addrs[2]; + __le32 ndi_addrs_count; +} __packed; /* MAC_CONTEXT_CONFIG_NAN_DATA_API_S_VER_1 */ + +/** + * struct iwl_mac_config_cmd - command structure to configure MAC contexts in + * MLD API for versions 4 + * ( MAC_CONTEXT_CONFIG_CMD = 0x8 ) + * + * @id_and_color: ID and color of the MAC + * @action: action to perform, see &enum iwl_ctxt_action + * @mac_type: one of &enum iwl_mac_types + * @local_mld_addr: mld address + * @reserved_for_local_mld_addr: reserved + * @filter_flags: combination of &enum iwl_mac_config_filter_flags + * @wifi_gen_v2: he/eht parameters as in cmd version 2 + * @wifi_gen: he/eht/uhr parameters as in cmd version 3 + * @nic_not_ack_enabled: mark that the NIC doesn't support receiving + * ACK-enabled AGG, (i.e. both BACK and non-BACK frames in single AGG). + * If the NIC is not ACK_ENABLED it may use the EOF-bit in first non-0 + * len delim to determine if AGG or single. + * @client: client mac data + * @p2p_dev: mac data for p2p device + * @nan: NAN specific data (NAN data interface addresses) + */ +struct iwl_mac_config_cmd { + __le32 id_and_color; + __le32 action; + /* MAC_CONTEXT_TYPE_API_E */ + __le32 mac_type; + u8 local_mld_addr[6]; + __le16 reserved_for_local_mld_addr; + __le32 filter_flags; + union { + struct iwl_mac_wifi_gen_support_v2 wifi_gen_v2; + struct iwl_mac_wifi_gen_support wifi_gen; + }; + __le32 nic_not_ack_enabled; + /* MAC_CONTEXT_CONFIG_SPECIFIC_DATA_API_U_VER_3 */ + union { + struct iwl_mac_client_data client; + struct iwl_mac_p2p_dev_data p2p_dev; + struct iwl_mac_nan_data nan; + }; +} __packed; /* MAC_CONTEXT_CONFIG_CMD_API_S_VER_4 */ /** * enum iwl_link_ctx_modify_flags - indicate to the fw what fields are being diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/mac.h b/drivers/net/wireless/intel/iwlwifi/fw/api/mac.h index 2a174c00b712..439a4530ec9f 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/mac.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/mac.h @@ -57,8 +57,7 @@ enum iwl_mac_protection_flags { * @FW_MAC_TYPE_P2P_DEVICE: P2P Device * @FW_MAC_TYPE_P2P_STA: P2P client * @FW_MAC_TYPE_GO: P2P GO - * @FW_MAC_TYPE_TEST: ? - * @FW_MAC_TYPE_MAX: highest support MAC type + * @FW_MAC_TYPE_NAN: NAN (since version 4) */ enum iwl_mac_types { FW_MAC_TYPE_FIRST = 1, @@ -70,8 +69,7 @@ enum iwl_mac_types { FW_MAC_TYPE_P2P_DEVICE, FW_MAC_TYPE_P2P_STA, FW_MAC_TYPE_GO, - FW_MAC_TYPE_TEST, - FW_MAC_TYPE_MAX = FW_MAC_TYPE_TEST + FW_MAC_TYPE_NAN, }; /* MAC_CONTEXT_TYPE_API_E_VER_1 */ /** diff --git a/drivers/net/wireless/intel/iwlwifi/mld/iface.c b/drivers/net/wireless/intel/iwlwifi/mld/iface.c index 968247977605..21348d2e2ede 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/iface.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/iface.c @@ -61,13 +61,20 @@ void iwl_mld_cleanup_vif(void *data, u8 *mac, struct ieee80211_vif *vif) static int iwl_mld_send_mac_cmd(struct iwl_mld *mld, struct iwl_mac_config_cmd *cmd) { + u16 cmd_id = WIDE_ID(MAC_CONF_GROUP, MAC_CONFIG_CMD); + int len = sizeof(*cmd); int ret; lockdep_assert_wiphy(mld->wiphy); - ret = iwl_mld_send_cmd_pdu(mld, - WIDE_ID(MAC_CONF_GROUP, MAC_CONFIG_CMD), - cmd); + if (iwl_fw_lookup_cmd_ver(mld->fw, cmd_id, 0) < 4) { + if (WARN_ON(cmd->mac_type == cpu_to_le32(FW_MAC_TYPE_NAN))) + return -EINVAL; + + len = sizeof(struct iwl_mac_config_cmd_v3); + } + + ret = iwl_mld_send_cmd_pdu(mld, cmd_id, cmd, len); if (ret) IWL_ERR(mld, "Failed to send MAC_CONFIG_CMD ret = %d\n", ret); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c index bf54b90a7c51..b65825747b9d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c @@ -6,7 +6,7 @@ static void iwl_mvm_mld_set_he_support(struct iwl_mvm *mvm, struct ieee80211_vif *vif, - struct iwl_mac_config_cmd *cmd, + struct iwl_mac_config_cmd_v3 *cmd, int cmd_ver) { if (vif->type == NL80211_IFTYPE_AP) { @@ -24,7 +24,7 @@ static void iwl_mvm_mld_set_he_support(struct iwl_mvm *mvm, static void iwl_mvm_mld_mac_ctxt_cmd_common(struct iwl_mvm *mvm, struct ieee80211_vif *vif, - struct iwl_mac_config_cmd *cmd, + struct iwl_mac_config_cmd_v3 *cmd, u32 action) { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); @@ -83,7 +83,7 @@ static void iwl_mvm_mld_mac_ctxt_cmd_common(struct iwl_mvm *mvm, } static int iwl_mvm_mld_mac_ctxt_send_cmd(struct iwl_mvm *mvm, - struct iwl_mac_config_cmd *cmd) + struct iwl_mac_config_cmd_v3 *cmd) { int ret = iwl_mvm_send_cmd_pdu(mvm, WIDE_ID(MAC_CONF_GROUP, MAC_CONFIG_CMD), @@ -98,7 +98,7 @@ static int iwl_mvm_mld_mac_ctxt_cmd_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, u32 action, bool force_assoc_off) { - struct iwl_mac_config_cmd cmd = {}; + struct iwl_mac_config_cmd_v3 cmd = {}; WARN_ON(vif->type != NL80211_IFTYPE_STATION); @@ -151,7 +151,7 @@ static int iwl_mvm_mld_mac_ctxt_cmd_listener(struct iwl_mvm *mvm, struct ieee80211_vif *vif, u32 action) { - struct iwl_mac_config_cmd cmd = {}; + struct iwl_mac_config_cmd_v3 cmd = {}; WARN_ON(vif->type != NL80211_IFTYPE_MONITOR); @@ -170,7 +170,7 @@ static int iwl_mvm_mld_mac_ctxt_cmd_ibss(struct iwl_mvm *mvm, struct ieee80211_vif *vif, u32 action) { - struct iwl_mac_config_cmd cmd = {}; + struct iwl_mac_config_cmd_v3 cmd = {}; WARN_ON(vif->type != NL80211_IFTYPE_ADHOC); @@ -187,7 +187,7 @@ static int iwl_mvm_mld_mac_ctxt_cmd_p2p_device(struct iwl_mvm *mvm, struct ieee80211_vif *vif, u32 action) { - struct iwl_mac_config_cmd cmd = {}; + struct iwl_mac_config_cmd_v3 cmd = {}; WARN_ON(vif->type != NL80211_IFTYPE_P2P_DEVICE); @@ -210,7 +210,7 @@ static int iwl_mvm_mld_mac_ctxt_cmd_ap_go(struct iwl_mvm *mvm, u32 action) { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - struct iwl_mac_config_cmd cmd = {}; + struct iwl_mac_config_cmd_v3 cmd = {}; WARN_ON(vif->type != NL80211_IFTYPE_AP); @@ -286,7 +286,7 @@ int iwl_mvm_mld_mac_ctxt_changed(struct iwl_mvm *mvm, int iwl_mvm_mld_mac_ctxt_remove(struct iwl_mvm *mvm, struct ieee80211_vif *vif) { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - struct iwl_mac_config_cmd cmd = { + struct iwl_mac_config_cmd_v3 cmd = { .action = cpu_to_le32(FW_CTXT_ACTION_REMOVE), .id_and_color = cpu_to_le32(mvmvif->id), }; From ab97a6c94c8919373a7a8109a5a27475bd1102bd Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Sat, 21 Mar 2026 19:29:13 +0200 Subject: [PATCH 0916/1561] wifi: iwlwifi: mld: use the dedicated helper to extract a link There is a helper, iwl_mld_fw_id_to_link_conf, that converts a fw link id into the bss_conf structure. Use it in two more places instead of retrieving the bss_conf directly from the fw-id-to-bss_conf mapping array. This required changing the loop bound in iwl_mld_process_per_link_stats() to ucode_capa.num_links, to avoid hitting a IWL_FW_CHECK for link ids > ucode_capa.num_links and < ARRAY_SIZE(fw_id_to_bss_conf), but this change makes sense anyway (there is no reason to iterate links that cannot be valid). Reviewed-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260321192637.f8da2cd2a873.I7fbd3b4a86a5695206bb5083fdac49de9acc9dca@changeid --- drivers/net/wireless/intel/iwlwifi/mld/scan.c | 4 +--- drivers/net/wireless/intel/iwlwifi/mld/stats.c | 5 ++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/scan.c b/drivers/net/wireless/intel/iwlwifi/mld/scan.c index 17e0b13b5ce8..7ed107fb0e8d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/scan.c @@ -2055,9 +2055,7 @@ void iwl_mld_handle_scan_complete_notif(struct iwl_mld *mld, struct ieee80211_bss_conf *link_conf = NULL; if (fw_link_id != IWL_MLD_INVALID_FW_ID) - link_conf = - wiphy_dereference(mld->wiphy, - mld->fw_id_to_bss_conf[fw_link_id]); + link_conf = iwl_mld_fw_id_to_link_conf(mld, fw_link_id); /* It is possible that by the time the scan is complete the * link was already removed and is not valid. diff --git a/drivers/net/wireless/intel/iwlwifi/mld/stats.c b/drivers/net/wireless/intel/iwlwifi/mld/stats.c index 9b3149b9d2c2..54eb0ead78ee 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/stats.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/stats.c @@ -431,14 +431,13 @@ iwl_mld_process_per_link_stats(struct iwl_mld *mld, u32 total_airtime_usec = 0; for (u32 fw_id = 0; - fw_id < ARRAY_SIZE(mld->fw_id_to_bss_conf); + fw_id < mld->fw->ucode_capa.num_links; fw_id++) { const struct iwl_stats_ntfy_per_link *link_stats; struct ieee80211_bss_conf *bss_conf; int sig; - bss_conf = wiphy_dereference(mld->wiphy, - mld->fw_id_to_bss_conf[fw_id]); + bss_conf = iwl_mld_fw_id_to_link_conf(mld, fw_id); if (!bss_conf || bss_conf->vif->type != NL80211_IFTYPE_STATION) continue; From 4f1da5cf31cf6345f145e914a0158c2e114bbe27 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Sat, 21 Mar 2026 19:29:14 +0200 Subject: [PATCH 0917/1561] wifi: iwlwifi: mld: always assign a fw id to a vif We used to have a fw id assignment in iwl_mld_init_vif since all interface types that were added to the driver was immediately added to the FW as well. Since NAN was introduced, this is no longer the case - the NAN interface is not added to the fw until a local schedule is configured. For this vif we don't assign a fw id so it is 0 by default. But later, when the vif is removed from the driver, we think that it has a valid fw id (0) and we point fw_id_to_vif[0] to NULL. fw_id_to_vif[0] might actually point to another vif with a valid fw id 0. In this case, we end up messing fw_id_to_vif. Fix this by initializing a vif with a special invalid fw id, and by exiting iwl_mld_rm_vif early for NAN interfaces. Reviewed-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260321192637.f3b5cc59098f.I3d1dbe66bd224cbb786c2b0ab3d1c9f7ec9003e4@changeid --- drivers/net/wireless/intel/iwlwifi/mld/iface.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/iface.c b/drivers/net/wireless/intel/iwlwifi/mld/iface.c index 21348d2e2ede..46c8d943fd55 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/iface.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/iface.c @@ -404,6 +404,7 @@ iwl_mld_init_vif(struct iwl_mld *mld, struct ieee80211_vif *vif) lockdep_assert_wiphy(mld->wiphy); mld_vif->mld = mld; + mld_vif->fw_id = IWL_MLD_INVALID_FW_ID; mld_vif->roc_activity = ROC_NUM_ACTIVITIES; if (!mld->fw_status.in_hw_restart) { @@ -451,6 +452,10 @@ void iwl_mld_rm_vif(struct iwl_mld *mld, struct ieee80211_vif *vif) lockdep_assert_wiphy(mld->wiphy); + /* NAN interface type is not known to FW */ + if (vif->type == NL80211_IFTYPE_NAN) + return; + iwl_mld_mac_fw_action(mld, vif, FW_CTXT_ACTION_REMOVE); if (WARN_ON(mld_vif->fw_id >= ARRAY_SIZE(mld->fw_id_to_vif))) From f2463eff4ad919d1b761eb0db4d4913423dee28d Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Sat, 21 Mar 2026 19:29:15 +0200 Subject: [PATCH 0918/1561] wifi: iwlwifi: add a macro for max FW links Currently we use IWL_FW_MAX_LINK_ID + 1 to indicate the maximum number of link that the fw supports. This is a bit confusing. Add a macro that indicates the number if maximum links that the FW supports and use it instead. Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260321192637.8da9f991526f.I72709f1db90036265c98c5d45682bcf5f36be7ba@changeid --- drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h | 1 + drivers/net/wireless/intel/iwlwifi/fw/api/stats.h | 5 ++--- drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 2 +- drivers/net/wireless/intel/iwlwifi/mld/mld.h | 2 +- drivers/net/wireless/intel/iwlwifi/mld/sta.c | 2 +- drivers/net/wireless/intel/iwlwifi/mld/sta.h | 2 +- drivers/net/wireless/intel/iwlwifi/mld/tests/utils.c | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h index 180eb8227582..25c57753ff34 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h @@ -709,6 +709,7 @@ struct iwl_link_config_cmd { */ #define IWL_FW_MAX_ACTIVE_LINKS_NUM 2 #define IWL_FW_MAX_LINK_ID 3 +#define IWL_FW_MAX_LINKS IWL_FW_MAX_LINK_ID + 1 /** * enum iwl_fw_sta_type - FW station types diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/stats.h b/drivers/net/wireless/intel/iwlwifi/fw/api/stats.h index 8d9a5058d5a5..68983f6a0026 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/stats.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/stats.h @@ -598,7 +598,6 @@ struct iwl_stats_ntfy_per_sta { } __packed; /* STATISTICS_NTFY_PER_STA_API_S_VER_1 */ #define IWL_STATS_MAX_PHY_OPERATIONAL 3 -#define IWL_STATS_MAX_FW_LINKS (IWL_FW_MAX_LINK_ID + 1) /** * struct iwl_system_statistics_notif_oper - statistics notification @@ -610,7 +609,7 @@ struct iwl_stats_ntfy_per_sta { */ struct iwl_system_statistics_notif_oper { __le32 time_stamp; - struct iwl_stats_ntfy_per_link per_link[IWL_STATS_MAX_FW_LINKS]; + struct iwl_stats_ntfy_per_link per_link[IWL_FW_MAX_LINKS]; struct iwl_stats_ntfy_per_phy per_phy[IWL_STATS_MAX_PHY_OPERATIONAL]; struct iwl_stats_ntfy_per_sta per_sta[IWL_STATION_COUNT_MAX]; } __packed; /* STATISTICS_FW_NTFY_OPERATIONAL_API_S_VER_3 */ @@ -624,7 +623,7 @@ struct iwl_system_statistics_notif_oper { */ struct iwl_system_statistics_part1_notif_oper { __le32 time_stamp; - struct iwl_stats_ntfy_part1_per_link per_link[IWL_STATS_MAX_FW_LINKS]; + struct iwl_stats_ntfy_part1_per_link per_link[IWL_FW_MAX_LINKS]; __le32 per_phy_crc_error_stats[IWL_STATS_MAX_PHY_OPERATIONAL]; } __packed; /* STATISTICS_FW_NTFY_OPERATIONAL_PART1_API_S_VER_4 */ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index 4cdd0fe1b788..d5ded4d3a30b 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -1315,7 +1315,7 @@ static int iwl_parse_tlv_firmware(struct iwl_drv *drv, if (tlv_len != sizeof(u32)) goto invalid_tlv_len; if (le32_to_cpup((const __le32 *)tlv_data) > - IWL_FW_MAX_LINK_ID + 1) { + IWL_FW_MAX_LINKS) { IWL_ERR(drv, "%d is an invalid number of links\n", le32_to_cpup((const __le32 *)tlv_data)); diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.h b/drivers/net/wireless/intel/iwlwifi/mld/mld.h index ea3d1fab6f46..606cb64f8ea4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.h @@ -205,7 +205,7 @@ struct iwl_mld { /* Add here fields that need clean up on restart */ struct_group(zeroed_on_hw_restart, - struct ieee80211_bss_conf __rcu *fw_id_to_bss_conf[IWL_FW_MAX_LINK_ID + 1]; + struct ieee80211_bss_conf __rcu *fw_id_to_bss_conf[IWL_FW_MAX_LINKS]; struct ieee80211_vif __rcu *fw_id_to_vif[NUM_MAC_INDEX_DRIVER]; struct ieee80211_txq __rcu *fw_id_to_txq[IWL_MAX_TVQM_QUEUES]; u8 used_phy_ids: NUM_PHY_CTX; diff --git a/drivers/net/wireless/intel/iwlwifi/mld/sta.c b/drivers/net/wireless/intel/iwlwifi/mld/sta.c index eda2cbbb3b30..4c97d12ce2d0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/sta.c @@ -938,7 +938,7 @@ static void iwl_mld_count_mpdu(struct ieee80211_link_sta *link_sta, int queue, if (!(mld_vif->emlsr.blocked_reasons & IWL_MLD_EMLSR_BLOCKED_TPT)) goto unlock; - for (int i = 0; i <= IWL_FW_MAX_LINK_ID; i++) + for (int i = 0; i < IWL_FW_MAX_LINKS; i++) total_mpdus += tx ? queue_counter->per_link[i].tx : queue_counter->per_link[i].rx; diff --git a/drivers/net/wireless/intel/iwlwifi/mld/sta.h b/drivers/net/wireless/intel/iwlwifi/mld/sta.h index 5f6c440bf058..36288c2fb38c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/sta.h @@ -89,7 +89,7 @@ struct iwl_mld_per_link_mpdu_counter { */ struct iwl_mld_per_q_mpdu_counter { spinlock_t lock; - struct iwl_mld_per_link_mpdu_counter per_link[IWL_FW_MAX_LINK_ID + 1]; + struct iwl_mld_per_link_mpdu_counter per_link[IWL_FW_MAX_LINKS]; unsigned long window_start_time; } ____cacheline_aligned_in_smp; diff --git a/drivers/net/wireless/intel/iwlwifi/mld/tests/utils.c b/drivers/net/wireless/intel/iwlwifi/mld/tests/utils.c index 176dbbf4c643..dce747270167 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/tests/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/tests/utils.c @@ -42,7 +42,7 @@ int iwlmld_kunit_test_init(struct kunit *test) iwl_construct_mld(mld, trans, cfg, fw, hw, NULL); fw->ucode_capa.num_stations = IWL_STATION_COUNT_MAX; - fw->ucode_capa.num_links = IWL_FW_MAX_LINK_ID + 1; + fw->ucode_capa.num_links = IWL_FW_MAX_LINKS; mld->fwrt.trans = trans; mld->fwrt.fw = fw; From b008f2860015c7f72b77ae3c4fa0acd828930001 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sat, 21 Mar 2026 19:29:16 +0200 Subject: [PATCH 0919/1561] wifi: iwlwifi: mld: update the TLC when we deactivate a link We hit a problem in the channel switch flow. We had link 0 using PHY 0, so the TLC object in the firmware is using PHY 0. Then we switched channel, so mac80211 / iwlmld: * deactivated link 0 * removed PHY 0 * added PHY 1 * modified link 0 to use PHY 1 * activated link 0. The TLC object was not updated and the firmware was unhappy that the TLC was still trying to use PHY 0. Fix that by letting the TLC know about the PHY context before the link activation. When we are de-activating a link, let the TLC know so that it'll send a TLC configuration command with an invalid PHY context to remove the relationship between the TLC object and the PHY that is going to be removed. That last part is not implemented yet in the firmware, so leave this as a TODO for now. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260321192637.317c66b11a31.I591118fa376ed967c0d1a47058c13834bc94605e@changeid --- .../net/wireless/intel/iwlwifi/mld/mac80211.c | 4 ++ drivers/net/wireless/intel/iwlwifi/mld/tlc.c | 50 ++++++++++++++++++- drivers/net/wireless/intel/iwlwifi/mld/tlc.h | 3 ++ 3 files changed, 56 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c index 8ab56788a491..9dec981a2bc5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c @@ -1148,6 +1148,8 @@ int iwl_mld_assign_vif_chanctx(struct ieee80211_hw *hw, /* Now activate the link */ if (iwl_mld_can_activate_link(mld, vif, link)) { + iwl_mld_tlc_update_phy(mld, vif, link); + ret = iwl_mld_activate_link(mld, link); if (ret) goto err; @@ -1209,6 +1211,8 @@ void iwl_mld_unassign_vif_chanctx(struct ieee80211_hw *hw, RCU_INIT_POINTER(mld_link->chan_ctx, NULL); + iwl_mld_tlc_update_phy(mld, vif, link); + /* in the non-MLO case, remove/re-add the link to clean up FW state. * In MLO, it'll be done in drv_change_vif_link */ diff --git a/drivers/net/wireless/intel/iwlwifi/mld/tlc.c b/drivers/net/wireless/intel/iwlwifi/mld/tlc.c index ede385909e38..78d6162d9297 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/tlc.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/tlc.c @@ -9,6 +9,7 @@ #include "hcmd.h" #include "sta.h" #include "phy.h" +#include "iface.h" #include "fw/api/rs.h" #include "fw/api/context.h" @@ -530,6 +531,7 @@ static void iwl_mld_send_tlc_cmd(struct iwl_mld *mld, struct ieee80211_bss_conf *link) { struct iwl_mld_sta *mld_sta = iwl_mld_sta_from_mac80211(link_sta->sta); + struct iwl_mld_link *mld_link = iwl_mld_link_from_mac80211(link); enum nl80211_band band = link->chanreq.oper.chan->band; struct ieee80211_supported_band *sband = mld->hw->wiphy->bands[band]; const struct ieee80211_sta_he_cap *own_he_cap = @@ -566,7 +568,10 @@ static void iwl_mld_send_tlc_cmd(struct iwl_mld *mld, cmd.sta_mask = cpu_to_le32(BIT(fw_sta_id)); - chan_ctx = rcu_dereference_wiphy(mld->wiphy, link->chanctx_conf); + if (WARN_ON_ONCE(!mld_link)) + return; + + chan_ctx = rcu_dereference_wiphy(mld->wiphy, mld_link->chan_ctx); if (WARN_ON(!chan_ctx)) return; @@ -658,6 +663,49 @@ void iwl_mld_config_tlc_link(struct iwl_mld *mld, iwl_mld_send_tlc_cmd(mld, vif, link_sta, link_conf); } +void iwl_mld_tlc_update_phy(struct iwl_mld *mld, struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf) +{ + struct iwl_mld_link *mld_link = iwl_mld_link_from_mac80211(link_conf); + struct ieee80211_chanctx_conf *chan_ctx; + int link_id = link_conf->link_id; + struct ieee80211_sta *sta; + + lockdep_assert_wiphy(mld->wiphy); + + if (WARN_ON(!mld_link)) + return; + + chan_ctx = rcu_dereference_wiphy(mld->wiphy, mld_link->chan_ctx); + + for_each_station(sta, mld->hw) { + struct iwl_mld_sta *mld_sta = iwl_mld_sta_from_mac80211(sta); + struct iwl_mld_link_sta *mld_link_sta; + struct ieee80211_link_sta *link_sta; + + if (mld_sta->vif != vif) + continue; + + link_sta = link_sta_dereference_protected(sta, link_id); + if (!link_sta) + continue; + + mld_link_sta = iwl_mld_link_sta_dereference_check(mld_sta, + link_id); + + /* In recovery flow, the station may not be (yet) in the + * firmware, don't send a TLC command for a station the + * firmware does not know. + */ + if (!mld_link_sta || !mld_link_sta->in_fw) + continue; + + if (chan_ctx) + iwl_mld_config_tlc_link(mld, vif, link_conf, link_sta); + /* TODO: else, remove the TLC object in the firmware */ + } +} + void iwl_mld_config_tlc(struct iwl_mld *mld, struct ieee80211_vif *vif, struct ieee80211_sta *sta) { diff --git a/drivers/net/wireless/intel/iwlwifi/mld/tlc.h b/drivers/net/wireless/intel/iwlwifi/mld/tlc.h index c32f42e8840b..c7ff209c9ab6 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/tlc.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/tlc.h @@ -20,4 +20,7 @@ void iwl_mld_handle_tlc_notif(struct iwl_mld *mld, int iwl_mld_send_tlc_dhc(struct iwl_mld *mld, u8 sta_id, u32 type, u32 data); +void iwl_mld_tlc_update_phy(struct iwl_mld *mld, struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf); + #endif /* __iwl_mld_tlc_h__ */ From 73a20e5d1ed8cd1328674689221ee8df2104aec5 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sat, 21 Mar 2026 19:29:17 +0200 Subject: [PATCH 0920/1561] wifi: iwlwifi: TLC_MNG_CONFIG_CMD can use several structures Depending on the firmware API version, we can use different version of the command. Mention them all in the description. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260321192637.2c0b1adb8655.Id0cc6cb6996df53a224d29fa541d19b9ee2aa479@changeid --- drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h b/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h index 6a6e11a57dbf..06370c161fe4 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h @@ -56,7 +56,8 @@ enum iwl_data_path_subcmd_ids { RFH_QUEUE_CONFIG_CMD = 0xD, /** - * @TLC_MNG_CONFIG_CMD: &struct iwl_tlc_config_cmd_v4 + * @TLC_MNG_CONFIG_CMD: &struct iwl_tlc_config_cmd_v4 or + * &struct iwl_tlc_config_cmd_v5 or &struct iwl_tlc_config_cmd. */ TLC_MNG_CONFIG_CMD = 0xF, From 4d56037a02bda77844f98bfbb2b91e324f86bd7f Mon Sep 17 00:00:00 2001 From: Avinash Bhatt Date: Sat, 21 Mar 2026 19:29:18 +0200 Subject: [PATCH 0921/1561] wifi: iwlwifi: mld: block EMLSR during TDLS connections TDLS (Tunneled Direct Link Setup) requires single-link operation for direct peer-to-peer communication, which is incompatible with EMLSR (Enhanced Multi-Link Single Radio) mode where the radio switches between multiple links. Block EMLSR when the first TDLS peer is added and unblock when the last TDLS peer is removed. The block/unblock APIs handle exiting EMLSR and triggering link selection automatically. Signed-off-by: Avinash Bhatt Reviewed-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260321192637.c1376b0259dd.I016587eb1570f7a7a64c0c95e0636e955a640350@changeid --- .../net/wireless/intel/iwlwifi/mld/iface.h | 2 ++ .../net/wireless/intel/iwlwifi/mld/mac80211.c | 21 ++++++++++++++++--- drivers/net/wireless/intel/iwlwifi/mld/mlo.c | 3 ++- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/iface.h b/drivers/net/wireless/intel/iwlwifi/mld/iface.h index 3e106c93f0db..0857ae28be8e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/iface.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/iface.h @@ -33,6 +33,7 @@ enum iwl_mld_cca_40mhz_wa_status { * there is an indication that a non-BSS interface is to be added. * @IWL_MLD_EMLSR_BLOCKED_TPT: throughput is too low to make EMLSR worthwhile * @IWL_MLD_EMLSR_BLOCKED_NAN: NAN is preventing EMLSR. + * @IWL_MLD_EMLSR_BLOCKED_TDLS: TDLS connection is preventing EMLSR. */ enum iwl_mld_emlsr_blocked { IWL_MLD_EMLSR_BLOCKED_PREVENTION = 0x1, @@ -42,6 +43,7 @@ enum iwl_mld_emlsr_blocked { IWL_MLD_EMLSR_BLOCKED_TMP_NON_BSS = 0x10, IWL_MLD_EMLSR_BLOCKED_TPT = 0x20, IWL_MLD_EMLSR_BLOCKED_NAN = 0x40, + IWL_MLD_EMLSR_BLOCKED_TDLS = 0x80, }; /** diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c index 9dec981a2bc5..e3aec814aa0d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c @@ -1759,10 +1759,19 @@ static int iwl_mld_move_sta_state_up(struct iwl_mld *mld, if (ret) return ret; - /* just added first TDLS STA, so disable PM */ - if (sta->tdls && tdls_count == 0) + /* just added first TDLS STA, so disable PM and block EMLSR */ + if (sta->tdls && tdls_count == 0) { iwl_mld_update_mac_power(mld, vif, false); + /* TDLS requires single-link operation with + * direct peer communication. + * Block and exit EMLSR when TDLS is established. + */ + iwl_mld_block_emlsr(mld, vif, + IWL_MLD_EMLSR_BLOCKED_TDLS, + iwl_mld_get_primary_link(vif)); + } + if (vif->type == NL80211_IFTYPE_STATION && !sta->tdls) mld_vif->ap_sta = sta; @@ -1898,8 +1907,14 @@ static int iwl_mld_move_sta_state_down(struct iwl_mld *mld, iwl_mld_remove_sta(mld, sta); if (sta->tdls && iwl_mld_tdls_sta_count(mld) == 0) { - /* just removed last TDLS STA, so enable PM */ + /* just removed last TDLS STA, so enable PM + * and unblock EMLSR + */ iwl_mld_update_mac_power(mld, vif, false); + + /* Unblock EMLSR when TDLS connection is torn down */ + iwl_mld_unblock_emlsr(mld, vif, + IWL_MLD_EMLSR_BLOCKED_TDLS); } } else { return -EINVAL; diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mlo.c b/drivers/net/wireless/intel/iwlwifi/mld/mlo.c index f693f92e42b4..9362e02d9e76 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mlo.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mlo.c @@ -13,7 +13,8 @@ HOW(NON_BSS) \ HOW(TMP_NON_BSS) \ HOW(TPT) \ - HOW(NAN) + HOW(NAN) \ + HOW(TDLS) static const char * iwl_mld_get_emlsr_blocked_string(enum iwl_mld_emlsr_blocked blocked) From 1a41221f314c70c73f5d1a03744ec013e6d59269 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Sat, 21 Mar 2026 19:29:19 +0200 Subject: [PATCH 0922/1561] wifi: iwlwifi: mld: introduce iwl_mld_vif_fw_id_valid Introduce a helper function that checks if a vif fw id is valid, and warns if it isn't. Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260321192637.b68d43db2ddc.I11b2b98e115da9eec8f603c5a01a0a9bcd040884@changeid --- drivers/net/wireless/intel/iwlwifi/mld/iface.h | 11 ++++++++++- .../net/wireless/intel/iwlwifi/mld/low_latency.c | 13 +++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/iface.h b/drivers/net/wireless/intel/iwlwifi/mld/iface.h index 0857ae28be8e..8dfc79fed253 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/iface.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/iface.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2024-2025 Intel Corporation + * Copyright (C) 2024-2026 Intel Corporation */ #ifndef __iwl_mld_iface_h__ #define __iwl_mld_iface_h__ @@ -203,6 +203,15 @@ iwl_mld_vif_to_mac80211(struct iwl_mld_vif *mld_vif) return container_of((void *)mld_vif, struct ieee80211_vif, drv_priv); } +/* Call only for interfaces that were added to the driver! */ +static inline bool iwl_mld_vif_fw_id_valid(struct iwl_mld_vif *mld_vif) +{ + if (WARN_ON(mld_vif->fw_id >= ARRAY_SIZE(mld_vif->mld->fw_id_to_vif))) + return false; + + return true; +} + #define iwl_mld_link_dereference_check(mld_vif, link_id) \ rcu_dereference_check((mld_vif)->link[link_id], \ lockdep_is_held(&mld_vif->mld->wiphy->mtx)) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/low_latency.c b/drivers/net/wireless/intel/iwlwifi/mld/low_latency.c index d39dd36b08e3..a4ddc32e2860 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/low_latency.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/low_latency.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2024-2025 Intel Corporation + * Copyright (C) 2024-2026 Intel Corporation */ #include "mld.h" #include "iface.h" @@ -77,9 +77,12 @@ static void iwl_mld_low_latency_iter(void *_data, u8 *mac, bool prev = mld_vif->low_latency_causes & LOW_LATENCY_TRAFFIC; bool low_latency; - if (WARN_ON(mld_vif->fw_id >= ARRAY_SIZE(mld->low_latency.result))) + if (!iwl_mld_vif_fw_id_valid(mld_vif)) return; + BUILD_BUG_ON(ARRAY_SIZE(mld->fw_id_to_vif) != + ARRAY_SIZE(mld->low_latency.result)); + low_latency = mld->low_latency.result[mld_vif->fw_id]; if (prev != low_latency) @@ -272,8 +275,10 @@ void iwl_mld_low_latency_update_counters(struct iwl_mld *mld, if (WARN_ON_ONCE(!mld->low_latency.pkts_counters)) return; - if (WARN_ON_ONCE(fw_id >= ARRAY_SIZE(counters->vo_vi) || - queue >= mld->trans->info.num_rxqs)) + if (!iwl_mld_vif_fw_id_valid(mld_vif)) + return; + + if (WARN_ON_ONCE(queue >= mld->trans->info.num_rxqs)) return; if (mld->low_latency.stopped) From 1d624e0bd26c0c30bed7246dea9145ce7c5b5850 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sat, 21 Mar 2026 19:29:20 +0200 Subject: [PATCH 0923/1561] wifi: iwlwifi: fix the description of SESSION_PROTECTION_CMD The struct has been renamed to iwl_session_prot_cmd. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260321192637.56545b097d13.If468c6a666dcf3a52601604bfc8a1c4faa9d320c@changeid --- drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h index 25c57753ff34..b398c582b867 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2012-2014, 2018-2019, 2021-2025 Intel Corporation + * Copyright (C) 2012-2014, 2018-2019, 2021-2026 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -26,7 +26,7 @@ enum iwl_mac_conf_subcmd_ids { */ MISSED_VAP_NOTIF = 0xFA, /** - * @SESSION_PROTECTION_CMD: &struct iwl_mvm_session_prot_cmd + * @SESSION_PROTECTION_CMD: &struct iwl_session_prot_cmd */ SESSION_PROTECTION_CMD = 0x5, /** From d35dafca94d99f9d4adc91830b78d41d3cc9f988 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sat, 21 Mar 2026 19:29:21 +0200 Subject: [PATCH 0924/1561] wifi: iwlwifi: reduce the number of prints upon firmware crash When the firmware crashes, we print data to be able to know what happened. The problem is that those prints became excessive as during the course of the years, we added more data without ever removing the prints that were no longer useful. Instead of spamming the log with data that will not help anyone, limit the prints to what is really needed. Signed-off-by: Emmanuel Grumbach Reviewed-by: Eilon Rinat Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260321192637.3bb8b142ff48.Ieacb12bf3bc930a4c28824e31d8e06eda177ba78@changeid --- drivers/net/wireless/intel/iwlwifi/fw/dump.c | 69 +------------------- 1 file changed, 1 insertion(+), 68 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dump.c b/drivers/net/wireless/intel/iwlwifi/fw/dump.c index ddd714cff2f4..c2af66899a78 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dump.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dump.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2012-2014, 2018-2025 Intel Corporation + * Copyright (C) 2012-2014, 2018-2026 Intel Corporation * Copyright (C) 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH */ @@ -128,19 +128,11 @@ static void iwl_fwrt_dump_umac_error_log(struct iwl_fw_runtime *fwrt) IWL_ERR(fwrt, "0x%08X | %s\n", table.error_id, iwl_fw_lookup_assert_desc(table.error_id)); - IWL_ERR(fwrt, "0x%08X | umac branchlink1\n", table.blink1); - IWL_ERR(fwrt, "0x%08X | umac branchlink2\n", table.blink2); - IWL_ERR(fwrt, "0x%08X | umac interruptlink1\n", table.ilink1); IWL_ERR(fwrt, "0x%08X | umac interruptlink2\n", table.ilink2); IWL_ERR(fwrt, "0x%08X | umac data1\n", table.data1); IWL_ERR(fwrt, "0x%08X | umac data2\n", table.data2); IWL_ERR(fwrt, "0x%08X | umac data3\n", table.data3); - IWL_ERR(fwrt, "0x%08X | umac major\n", table.umac_major); - IWL_ERR(fwrt, "0x%08X | umac minor\n", table.umac_minor); - IWL_ERR(fwrt, "0x%08X | frame pointer\n", table.frame_pointer); - IWL_ERR(fwrt, "0x%08X | stack pointer\n", table.stack_pointer); IWL_ERR(fwrt, "0x%08X | last host cmd\n", table.cmd_header); - IWL_ERR(fwrt, "0x%08X | isr status reg\n", table.nic_isr_pref); } static void iwl_fwrt_dump_lmac_error_log(struct iwl_fw_runtime *fwrt, u8 lmac_num) @@ -200,39 +192,10 @@ static void iwl_fwrt_dump_lmac_error_log(struct iwl_fw_runtime *fwrt, u8 lmac_nu IWL_ERR(fwrt, "0x%08X | %-28s\n", table.error_id, iwl_fw_lookup_assert_desc(table.error_id)); - IWL_ERR(fwrt, "0x%08X | trm_hw_status0\n", table.trm_hw_status0); - IWL_ERR(fwrt, "0x%08X | trm_hw_status1\n", table.trm_hw_status1); - IWL_ERR(fwrt, "0x%08X | branchlink2\n", table.blink2); - IWL_ERR(fwrt, "0x%08X | interruptlink1\n", table.ilink1); IWL_ERR(fwrt, "0x%08X | interruptlink2\n", table.ilink2); IWL_ERR(fwrt, "0x%08X | data1\n", table.data1); IWL_ERR(fwrt, "0x%08X | data2\n", table.data2); IWL_ERR(fwrt, "0x%08X | data3\n", table.data3); - IWL_ERR(fwrt, "0x%08X | beacon time\n", table.bcon_time); - IWL_ERR(fwrt, "0x%08X | tsf low\n", table.tsf_low); - IWL_ERR(fwrt, "0x%08X | tsf hi\n", table.tsf_hi); - IWL_ERR(fwrt, "0x%08X | time gp1\n", table.gp1); - IWL_ERR(fwrt, "0x%08X | time gp2\n", table.gp2); - IWL_ERR(fwrt, "0x%08X | uCode revision type\n", table.fw_rev_type); - IWL_ERR(fwrt, "0x%08X | uCode version major\n", table.major); - IWL_ERR(fwrt, "0x%08X | uCode version minor\n", table.minor); - IWL_ERR(fwrt, "0x%08X | hw version\n", table.hw_ver); - IWL_ERR(fwrt, "0x%08X | board version\n", table.brd_ver); - IWL_ERR(fwrt, "0x%08X | hcmd\n", table.hcmd); - IWL_ERR(fwrt, "0x%08X | isr0\n", table.isr0); - IWL_ERR(fwrt, "0x%08X | isr1\n", table.isr1); - IWL_ERR(fwrt, "0x%08X | isr2\n", table.isr2); - IWL_ERR(fwrt, "0x%08X | isr3\n", table.isr3); - IWL_ERR(fwrt, "0x%08X | isr4\n", table.isr4); - IWL_ERR(fwrt, "0x%08X | last cmd Id\n", table.last_cmd_id); - IWL_ERR(fwrt, "0x%08X | wait_event\n", table.wait_event); - IWL_ERR(fwrt, "0x%08X | l2p_control\n", table.l2p_control); - IWL_ERR(fwrt, "0x%08X | l2p_duration\n", table.l2p_duration); - IWL_ERR(fwrt, "0x%08X | l2p_mhvalid\n", table.l2p_mhvalid); - IWL_ERR(fwrt, "0x%08X | l2p_addr_match\n", table.l2p_addr_match); - IWL_ERR(fwrt, "0x%08X | lmpm_pmg_sel\n", table.lmpm_pmg_sel); - IWL_ERR(fwrt, "0x%08X | timestamp\n", table.u_timestamp); - IWL_ERR(fwrt, "0x%08X | flow_handler\n", table.flow_handler); } /* @@ -264,7 +227,6 @@ static void iwl_fwrt_dump_tcm_error_log(struct iwl_fw_runtime *fwrt, int idx) struct iwl_trans *trans = fwrt->trans; struct iwl_tcm_error_event_table table = {}; u32 base = fwrt->trans->dbg.tcm_error_event_table[idx]; - int i; u32 flag = idx ? IWL_ERROR_EVENT_TABLE_TCM2 : IWL_ERROR_EVENT_TABLE_TCM1; @@ -275,23 +237,10 @@ static void iwl_fwrt_dump_tcm_error_log(struct iwl_fw_runtime *fwrt, int idx) IWL_ERR(fwrt, "TCM%d status:\n", idx + 1); IWL_ERR(fwrt, "0x%08X | error ID\n", table.error_id); - IWL_ERR(fwrt, "0x%08X | tcm branchlink2\n", table.blink2); - IWL_ERR(fwrt, "0x%08X | tcm interruptlink1\n", table.ilink1); IWL_ERR(fwrt, "0x%08X | tcm interruptlink2\n", table.ilink2); IWL_ERR(fwrt, "0x%08X | tcm data1\n", table.data1); IWL_ERR(fwrt, "0x%08X | tcm data2\n", table.data2); IWL_ERR(fwrt, "0x%08X | tcm data3\n", table.data3); - IWL_ERR(fwrt, "0x%08X | tcm log PC\n", table.logpc); - IWL_ERR(fwrt, "0x%08X | tcm frame pointer\n", table.frame_pointer); - IWL_ERR(fwrt, "0x%08X | tcm stack pointer\n", table.stack_pointer); - IWL_ERR(fwrt, "0x%08X | tcm msg ID\n", table.msgid); - IWL_ERR(fwrt, "0x%08X | tcm ISR status\n", table.isr); - for (i = 0; i < ARRAY_SIZE(table.hw_status); i++) - IWL_ERR(fwrt, "0x%08X | tcm HW status[%d]\n", - table.hw_status[i], i); - for (i = 0; i < ARRAY_SIZE(table.sw_status); i++) - IWL_ERR(fwrt, "0x%08X | tcm SW status[%d]\n", - table.sw_status[i], i); } /* @@ -338,26 +287,10 @@ static void iwl_fwrt_dump_rcm_error_log(struct iwl_fw_runtime *fwrt, int idx) IWL_ERR(fwrt, "RCM%d status:\n", idx + 1); IWL_ERR(fwrt, "0x%08X | error ID\n", table.error_id); - IWL_ERR(fwrt, "0x%08X | rcm branchlink2\n", table.blink2); - IWL_ERR(fwrt, "0x%08X | rcm interruptlink1\n", table.ilink1); IWL_ERR(fwrt, "0x%08X | rcm interruptlink2\n", table.ilink2); IWL_ERR(fwrt, "0x%08X | rcm data1\n", table.data1); IWL_ERR(fwrt, "0x%08X | rcm data2\n", table.data2); IWL_ERR(fwrt, "0x%08X | rcm data3\n", table.data3); - IWL_ERR(fwrt, "0x%08X | rcm log PC\n", table.logpc); - IWL_ERR(fwrt, "0x%08X | rcm frame pointer\n", table.frame_pointer); - IWL_ERR(fwrt, "0x%08X | rcm stack pointer\n", table.stack_pointer); - IWL_ERR(fwrt, "0x%08X | rcm msg ID\n", table.msgid); - IWL_ERR(fwrt, "0x%08X | rcm ISR status\n", table.isr); - IWL_ERR(fwrt, "0x%08X | frame HW status\n", table.frame_hw_status); - IWL_ERR(fwrt, "0x%08X | LMAC-to-RCM request mbox\n", - table.mbx_lmac_to_rcm_req); - IWL_ERR(fwrt, "0x%08X | RCM-to-LMAC request mbox\n", - table.mbx_rcm_to_lmac_req); - IWL_ERR(fwrt, "0x%08X | MAC header control\n", table.mh_ctl); - IWL_ERR(fwrt, "0x%08X | MAC header addr1 low\n", table.mh_addr1_lo); - IWL_ERR(fwrt, "0x%08X | MAC header info\n", table.mh_info); - IWL_ERR(fwrt, "0x%08X | MAC header error\n", table.mh_err); } static void iwl_fwrt_dump_iml_error_log(struct iwl_fw_runtime *fwrt) From 95d12fc4ef4522757c25cba866cc4b8a5f01760f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 21 Mar 2026 19:29:22 +0200 Subject: [PATCH 0925/1561] wifi: iwlwifi: mld: set RX_FLAG_RADIOTAP_TLV_AT_END generically Instead of setting this flag in the iwl_mld_radiotap_put_tlv() users, and not even all of them, set it inside the function. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260321192637.31eff369ccf2.If5cee8f7c767b937891abb6cccf2692068ba7758@changeid --- drivers/net/wireless/intel/iwlwifi/mld/rx.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/rx.c b/drivers/net/wireless/intel/iwlwifi/mld/rx.c index 6f40d6e47083..a2e586c6ea67 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/rx.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2024-2025 Intel Corporation + * Copyright (C) 2024-2026 Intel Corporation */ #include @@ -791,6 +791,9 @@ static void * iwl_mld_radiotap_put_tlv(struct sk_buff *skb, u16 type, u16 len) { struct ieee80211_radiotap_tlv *tlv; + struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); + + rx_status->flag |= RX_FLAG_RADIOTAP_TLV_AT_END; tlv = skb_put(skb, sizeof(*tlv)); tlv->type = cpu_to_le16(type); @@ -1234,8 +1237,6 @@ static void iwl_mld_rx_eht(struct iwl_mld *mld, struct sk_buff *skb, eht = iwl_mld_radiotap_put_tlv(skb, IEEE80211_RADIOTAP_EHT, eht_len); - rx_status->flag |= RX_FLAG_RADIOTAP_TLV_AT_END; - switch (u32_get_bits(rate_n_flags, RATE_MCS_HE_GI_LTF_MSK)) { case 0: if (he_type == RATE_MCS_HE_TYPE_TRIG) { @@ -1329,7 +1330,6 @@ static void iwl_mld_rx_eht(struct iwl_mld *mld, struct sk_buff *skb, static void iwl_mld_add_rtap_sniffer_config(struct iwl_mld *mld, struct sk_buff *skb) { - struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); struct ieee80211_radiotap_vendor_content *radiotap; const u16 vendor_data_len = sizeof(mld->monitor.cur_aid); @@ -1353,8 +1353,6 @@ static void iwl_mld_add_rtap_sniffer_config(struct iwl_mld *mld, /* fill the data now */ memcpy(radiotap->data, &mld->monitor.cur_aid, sizeof(mld->monitor.cur_aid)); - - rx_status->flag |= RX_FLAG_RADIOTAP_TLV_AT_END; } #endif @@ -1362,7 +1360,6 @@ static void iwl_mld_add_rtap_sniffer_phy_data(struct iwl_mld *mld, struct sk_buff *skb, struct iwl_rx_phy_air_sniffer_ntfy *ntfy) { - struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); struct ieee80211_radiotap_vendor_content *radiotap; const u16 vendor_data_len = sizeof(*ntfy); @@ -1382,8 +1379,6 @@ static void iwl_mld_add_rtap_sniffer_phy_data(struct iwl_mld *mld, /* fill the data now */ memcpy(radiotap->data, ntfy, vendor_data_len); - - rx_status->flag |= RX_FLAG_RADIOTAP_TLV_AT_END; } static void From 506e26881751ad4c7093cb06ba46d312af13e33b Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Wed, 25 Mar 2026 15:46:09 +0200 Subject: [PATCH 0926/1561] wifi: mac80211: extract channel logic from link logic The logic that tries to reuse an existing chanctx or create a new one if such doesn't exist will be used for other types of chanctx users. Extract this logic from _ieee80211_link_use_channel. Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260325154550.9a08397a7590.Id24934d14f240f8d38a23f3b1786235bac0b3e60@changeid Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 52 +++++++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 1bcf501cfe8e..bc396d6c64c5 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -2031,6 +2031,36 @@ void __ieee80211_link_release_channel(struct ieee80211_link_data *link, ieee80211_vif_use_reserved_switch(local); } +static struct ieee80211_chanctx * +ieee80211_find_or_create_chanctx(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_chan_req *chanreq, + enum ieee80211_chanctx_mode mode, + bool assign_on_failure, + bool *reused_ctx) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_chanctx *ctx; + int radio_idx; + + lockdep_assert_wiphy(local->hw.wiphy); + + ctx = ieee80211_find_chanctx(local, chanreq, mode); + if (ctx) { + *reused_ctx = true; + return ctx; + } + + *reused_ctx = false; + + if (!ieee80211_find_available_radio(local, chanreq, + sdata->wdev.radio_mask, + &radio_idx)) + return ERR_PTR(-EBUSY); + + return ieee80211_new_chanctx(local, chanreq, mode, + assign_on_failure, radio_idx); +} + int _ieee80211_link_use_channel(struct ieee80211_link_data *link, const struct ieee80211_chan_req *chanreq, enum ieee80211_chanctx_mode mode, @@ -2040,8 +2070,7 @@ int _ieee80211_link_use_channel(struct ieee80211_link_data *link, struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx *ctx; u8 radar_detect_width = 0; - bool reserved = false; - int radio_idx; + bool reused_ctx = false; int ret; lockdep_assert_wiphy(local->hw.wiphy); @@ -2069,17 +2098,8 @@ int _ieee80211_link_use_channel(struct ieee80211_link_data *link, if (!local->in_reconfig) __ieee80211_link_release_channel(link, false); - ctx = ieee80211_find_chanctx(local, chanreq, mode); - /* Note: context will_be_used flag is now set */ - if (ctx) - reserved = true; - else if (!ieee80211_find_available_radio(local, chanreq, - sdata->wdev.radio_mask, - &radio_idx)) - ctx = ERR_PTR(-EBUSY); - else - ctx = ieee80211_new_chanctx(local, chanreq, mode, - assign_on_failure, radio_idx); + ctx = ieee80211_find_or_create_chanctx(sdata, chanreq, mode, + assign_on_failure, &reused_ctx); if (IS_ERR(ctx)) { ret = PTR_ERR(ctx); goto out; @@ -2089,7 +2109,11 @@ int _ieee80211_link_use_channel(struct ieee80211_link_data *link, ret = ieee80211_assign_link_chanctx(link, ctx, assign_on_failure); - if (reserved) { + /* + * In case an existing channel context is being used, we marked it as + * will_be_used, now that it is assigned - clear this indication + */ + if (reused_ctx) { WARN_ON(!ctx->will_be_used); ctx->will_be_used = false; } From ad73dea1a4ac26c3ee95dd9c7a01ebe5ce299ac1 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Wed, 25 Mar 2026 15:48:23 +0200 Subject: [PATCH 0927/1561] wifi: mac80211: cleanup error path of ieee80211_do_open If we failed on drv_start, we currently cleanup AP_VLAN reference to bss. But this is not needed, since AP_VLAN must be tied to a pre-existing AP interface, so open_count cannot be 0, so we will never call drv_start for AP_VLAN interfaces. Remove these cleanup and return immediately instead. Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260325154742.3c532a9132c3.Idac5c38d5ad7ce97782a8c05ae72bb0c689c4fa9@changeid Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 232fc0b80e44..234de4762be5 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1361,8 +1361,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) break; } case NL80211_IFTYPE_AP: - sdata->bss = &sdata->u.ap; - break; case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_MONITOR: @@ -1387,8 +1385,13 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) local->reconfig_failure = false; res = drv_start(local); - if (res) - goto err_del_bss; + if (res) { + /* + * no need to worry about AP_VLAN cleanup since in that + * case we can't have open_count == 0 + */ + return res; + } ieee80211_led_radio(local, true); ieee80211_mod_tpt_led_trig(local, IEEE80211_TPT_LEDTRIG_FL_RADIO, 0); @@ -1459,6 +1462,9 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) netif_carrier_on(dev); list_add_tail_rcu(&sdata->u.mntr.list, &local->mon_list); break; + case NL80211_IFTYPE_AP: + sdata->bss = &sdata->u.ap; + fallthrough; default: if (coming_up) { ieee80211_del_virtual_monitor(local); @@ -1547,10 +1553,10 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) err_stop: if (!local->open_count) drv_stop(local, false); - err_del_bss: - sdata->bss = NULL; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) list_del(&sdata->u.vlan.list); + /* Might not be initialized yet, but it is harmless */ + sdata->bss = NULL; return res; } From 6e78b70c9a3d2a627229801f93e3f62869922587 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Wed, 18 Mar 2026 14:39:15 +0200 Subject: [PATCH 0928/1561] wifi: cfg80211: Add an API to configure local NAN schedule Add an nl80211 API to allow user space to configure the local NAN schedule. The local schedule consists of a list of channel definitions and a schedule map, in which each element covers a time slot and indicates on what channel the device should be in that time slot. Channels can be added to schedule even without being scheduled, for reservation purposes. A schedule can be configured either immedietally or be deferred, in case there are already connected peers. When the deferred flag is set, the command is a request from the device to perform an announced schedule update: send the updated NAN Availability - as set in this command - to the peers, and do the actual switch to the new schedule on the right time (i.e. at the end of the slot after the slot in which the update was sent to the peers). In addition, a notification will be sent to indicate a deferred update completion. Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260219114327.ecca178a2de0.Ic977ab08b4ed5cf9b849e55d3a59b01ad3fbd08e@changeid Link: https://patch.msgid.link/20260318123926.206536-2-miriam.rachel.korenblit@intel.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 73 +++++++++- include/uapi/linux/nl80211.h | 76 ++++++++++ net/wireless/core.c | 54 ++++++- net/wireless/core.h | 4 + net/wireless/nl80211.c | 266 +++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 16 +++ net/wireless/trace.h | 38 +++++ 7 files changed, 525 insertions(+), 2 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8cd870ece351..539dcf65c188 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4050,6 +4050,54 @@ struct cfg80211_nan_conf { u16 vendor_elems_len; }; +#define CFG80211_NAN_SCHED_NUM_TIME_SLOTS 32 + +/** + * struct cfg80211_nan_channel - NAN channel configuration + * + * This struct defines a NAN channel configuration + * + * @chandef: the channel definition + * @channel_entry: pointer to the Channel Entry blob as defined in Wi-Fi Aware + * (TM) 4.0 specification Table 100 (Channel Entry format for the NAN + * Availability attribute). + * @rx_nss: number of spatial streams supported on this channel + */ +struct cfg80211_nan_channel { + struct cfg80211_chan_def chandef; + const u8 *channel_entry; + u8 rx_nss; +}; + +/** + * struct cfg80211_nan_local_sched - NAN local schedule + * + * This struct defines NAN local schedule parameters + * + * @schedule: a mapping of time slots to chandef indexes in %nan_channels. + * An unscheduled slot will be set to %NL80211_NAN_SCHED_NOT_AVAIL_SLOT. + * @n_channels: number of channel definitions in %nan_channels. + * @nan_avail_blob: pointer to NAN Availability attribute blob. + * See %NL80211_ATTR_NAN_AVAIL_BLOB for more details. + * @nan_avail_blob_len: length of the @nan_avail_blob in bytes. + * @deferred: if true, the command containing this schedule configuration is a + * request from the device to perform an announced schedule update. This + * means that it needs to send the updated NAN availability to the peers, + * and do the actual switch on the right time (i.e. at the end of the slot + * after the slot in which the updated NAN Availability was sent). + * See %NL80211_ATTR_NAN_SCHED_DEFERRED for more details. + * If false, the schedule is applied immediately. + * @nan_channels: array of NAN channel definitions that can be scheduled. + */ +struct cfg80211_nan_local_sched { + u8 schedule[CFG80211_NAN_SCHED_NUM_TIME_SLOTS]; + u8 n_channels; + const u8 *nan_avail_blob; + u16 nan_avail_blob_len; + bool deferred; + struct cfg80211_nan_channel nan_channels[] __counted_by(n_channels); +}; + /** * enum cfg80211_nan_conf_changes - indicates changed fields in NAN * configuration @@ -4830,6 +4878,12 @@ struct mgmt_frame_regs { * @nan_change_conf: changes NAN configuration. The changed parameters must * be specified in @changes (using &enum cfg80211_nan_conf_changes); * All other parameters must be ignored. + * @nan_set_local_sched: configure the local schedule for NAN. The schedule + * consists of an array of %cfg80211_nan_channel and the schedule itself, + * in which each entry maps each time slot to the channel on which the + * radio should operate on. If the chandef of a NAN channel is not + * changed, the channel entry must also remain unchanged. It is the + * driver's responsibility to verify this. * * @set_multicast_to_unicast: configure multicast to unicast conversion for BSS * @@ -5207,7 +5261,9 @@ struct cfg80211_ops { struct wireless_dev *wdev, struct cfg80211_nan_conf *conf, u32 changes); - + int (*nan_set_local_sched)(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_local_sched *sched); int (*set_multicast_to_unicast)(struct wiphy *wiphy, struct net_device *dev, const bool enabled); @@ -6859,6 +6915,9 @@ struct wireless_dev { } ocb; struct { u8 cluster_id[ETH_ALEN] __aligned(2); + u8 n_channels; + struct cfg80211_chan_def *chandefs; + bool sched_update_pending; } nan; } u; @@ -10016,6 +10075,18 @@ void cfg80211_nan_func_terminated(struct wireless_dev *wdev, enum nl80211_nan_func_term_reason reason, u64 cookie, gfp_t gfp); +/** + * cfg80211_nan_sched_update_done - notify deferred schedule update completion + * @wdev: the wireless device reporting the event + * @success: whether or not the schedule update was successful + * @gfp: allocation flags + * + * This function notifies user space that a deferred local NAN schedule update + * (requested with %NL80211_ATTR_NAN_SCHED_DEFERRED) has been completed. + */ +void cfg80211_nan_sched_update_done(struct wireless_dev *wdev, bool success, + gfp_t gfp); + /* ethtool helper */ void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info); diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 67d764023988..484094667abc 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1367,6 +1367,20 @@ * %NL80211_ATTR_INCUMBENT_SIGNAL_INTERFERENCE_BITMAP. The current channel * definition is also sent. * + * @NL80211_CMD_NAN_SET_LOCAL_SCHED: Set the local NAN schedule. NAN must be + * operational (%NL80211_CMD_START_NAN was executed). Must contain + * %NL80211_ATTR_NAN_TIME_SLOTS and %NL80211_ATTR_NAN_AVAIL_BLOB, but + * %NL80211_ATTR_NAN_CHANNEL is optional (for example in case of a channel + * removal, that channel won't be provided). + * If %NL80211_ATTR_NAN_SCHED_DEFERRED is set, the command is a request + * from the device to perform an announced schedule update. See + * %NL80211_ATTR_NAN_SCHED_DEFERRED for more details. + * If not set, the schedule should be applied immediately. + * @NL80211_CMD_NAN_SCHED_UPDATE_DONE: Event sent to user space to notify that + * a deferred local NAN schedule update (requested with + * %NL80211_CMD_NAN_SET_LOCAL_SCHED and %NL80211_ATTR_NAN_SCHED_DEFERRED) + * has been completed. The presence of %NL80211_ATTR_NAN_SCHED_UPDATE_SUCCESS + * indicates that the update was successful. * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1632,6 +1646,10 @@ enum nl80211_commands { NL80211_CMD_INCUMBENT_SIGNAL_DETECT, + NL80211_CMD_NAN_SET_LOCAL_SCHED, + + NL80211_CMD_NAN_SCHED_UPDATE_DONE, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2991,6 +3009,54 @@ enum nl80211_commands { * @NL80211_ATTR_DISABLE_UHR: Force UHR capable interfaces to disable * this feature during association. This is a flag attribute. * Currently only supported in mac80211 drivers. + * @NL80211_ATTR_NAN_CHANNEL: This is a nested attribute. There can be multiple + * attributes of this type, each one represents a channel definition and + * consists of top-level attributes like %NL80211_ATTR_WIPHY_FREQ. Must + * contain %NL80211_ATTR_NAN_CHANNEL_ENTRY and + * %NL80211_ATTR_NAN_RX_NSS. + * This attribute is used with %NL80211_CMD_NAN_SET_LOCAL_SCHED to specify + * the channel definitions on which the radio needs to operate during + * specific time slots. All of the channel definitions should be mutually + * incompatible. The number of channels should fit the current + * configuration of channels and the possible interface combinations. + * If an existing NAN channel is changed but the chandef isn't, the + * channel entry must also remain unchanged. + * @NL80211_ATTR_NAN_CHANNEL_ENTRY: a byte array of 6 bytes. contains the + * Channel Entry as defined in Wi-Fi Aware (TM) 4.0 specification Table + * 100 (Channel Entry format for the NAN Availability attribute). + * @NL80211_ATTR_NAN_RX_NSS: (u8) RX NSS used for a NAN channel. This is + * used with %NL80211_ATTR_NAN_CHANNEL when configuring NAN channels with + * %NL80211_CMD_NAN_SET_LOCAL_SCHED. + * @NL80211_ATTR_NAN_TIME_SLOTS: an array of u8 values and 32 cells. each value + * maps a time slot to the chandef on which the radio should operate on in + * that time. %NL80211_NAN_SCHED_NOT_AVAIL_SLOT indicates unscheduled. + * The chandef is represented using its index, where the index is the + * sequential number of the %NL80211_ATTR_NAN_CHANNEL attribute within all + * the attributes of this type. + * Each slots spans over 16TUs, hence the entire schedule spans over + * 512TUs. Other slot durations and periods are currently not supported. + * @NL80211_ATTR_NAN_AVAIL_BLOB: (Binary) The NAN Availability attribute blob, + * including the attribute header, as defined in Wi-Fi Aware (TM) 4.0 + * specification Table 93 (NAN Availability attribute format). Required with + * %NL80211_CMD_NAN_SET_LOCAL_SCHED to provide the raw NAN Availability + * attribute. Used by the device to publish Schedule Update NAFs. + * @NL80211_ATTR_NAN_SCHED_DEFERRED: Flag attribute used with + * %NL80211_CMD_NAN_SET_LOCAL_SCHED. When present, the command is a + * request from the device to perform an announced schedule update. This + * means that it needs to send the updated NAN availability to the peers, + * and do the actual switch on the right time (i.e. at the end of the slot + * after the slot in which the updated NAN Availability was sent). Since + * the slots management is done in the device, the update to the peers + * needs to be sent by the device, so it knows the actual switch time. + * If the flag is not set, the schedule should be applied immediately. + * When this flag is set, the total number of NAN channels from both the + * old and new schedules must not exceed the allowed number of local NAN + * channels, because with deferred scheduling the old channels cannot be + * removed before adding the new ones to free up space. + * @NL80211_ATTR_NAN_SCHED_UPDATE_SUCCESS: flag attribute used with + * %NL80211_CMD_NAN_SCHED_UPDATE_DONE to indicate that the deferred + * schedule update completed successfully. If this flag is not present, + * the update failed. * * @NL80211_ATTR_INCUMBENT_SIGNAL_INTERFERENCE_BITMAP: u32 attribute specifying * the signal interference bitmap detected on the operating bandwidth for @@ -3582,6 +3648,14 @@ enum nl80211_attrs { NL80211_ATTR_UHR_OPERATION, + NL80211_ATTR_NAN_CHANNEL, + NL80211_ATTR_NAN_CHANNEL_ENTRY, + NL80211_ATTR_NAN_TIME_SLOTS, + NL80211_ATTR_NAN_RX_NSS, + NL80211_ATTR_NAN_AVAIL_BLOB, + NL80211_ATTR_NAN_SCHED_DEFERRED, + NL80211_ATTR_NAN_SCHED_UPDATE_SUCCESS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -8574,4 +8648,6 @@ enum nl80211_nan_capabilities { NL80211_NAN_CAPABILITIES_MAX = __NL80211_NAN_CAPABILITIES_LAST - 1, }; +#define NL80211_NAN_SCHED_NOT_AVAIL_SLOT 0xff + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/core.c b/net/wireless/core.c index 23afc250bc10..54c89b0db352 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2025 Intel Corporation + * Copyright (C) 2018-2026 Intel Corporation */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -254,6 +254,8 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { + struct cfg80211_nan_local_sched empty_sched = {}; + lockdep_assert_held(&rdev->wiphy.mtx); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_NAN)) @@ -262,6 +264,15 @@ void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, if (!wdev_running(wdev)) return; + /* + * If there is a scheduled update pending, mark it as canceled, so the + * empty schedule will be accepted + */ + wdev->u.nan.sched_update_pending = false; + + /* Unschedule all */ + cfg80211_nan_set_local_schedule(rdev, wdev, &empty_sched); + rdev_stop_nan(rdev, wdev); wdev->is_running = false; @@ -270,6 +281,47 @@ void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, rdev->opencount--; } +int cfg80211_nan_set_local_schedule(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_nan_local_sched *sched) +{ + int ret; + + lockdep_assert_held(&rdev->wiphy.mtx); + + if (wdev->iftype != NL80211_IFTYPE_NAN || !wdev_running(wdev)) + return -EINVAL; + + if (wdev->u.nan.sched_update_pending) + return -EBUSY; + + ret = rdev_nan_set_local_sched(rdev, wdev, sched); + if (ret) + return ret; + + wdev->u.nan.sched_update_pending = sched->deferred; + + kfree(wdev->u.nan.chandefs); + wdev->u.nan.chandefs = NULL; + wdev->u.nan.n_channels = 0; + + if (!sched->n_channels) + return 0; + + wdev->u.nan.chandefs = kcalloc(sched->n_channels, + sizeof(*wdev->u.nan.chandefs), + GFP_KERNEL); + if (!wdev->u.nan.chandefs) + return -ENOMEM; + + for (int i = 0; i < sched->n_channels; i++) + wdev->u.nan.chandefs[i] = sched->nan_channels[i].chandef; + + wdev->u.nan.n_channels = sched->n_channels; + + return 0; +} + void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); diff --git a/net/wireless/core.h b/net/wireless/core.h index 6cace846d7a3..c7ae1f8a9bd8 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -551,6 +551,10 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); +int cfg80211_nan_set_local_schedule(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_nan_local_sched *sched); + struct cfg80211_internal_bss * cfg80211_bss_update(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *tmp, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e15cd26f3a79..de630e0d388b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -333,6 +333,40 @@ static int validate_nan_cluster_id(const struct nlattr *attr, return 0; } +static int validate_nan_avail_blob(const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + const u8 *data = nla_data(attr); + unsigned int len = nla_len(attr); + u16 attr_len; + + /* Need at least: Attr ID (1) + Length (2) */ + if (len < 3) { + NL_SET_ERR_MSG_FMT(extack, + "NAN Availability: Too short (need at least 3 bytes, have %u)", + len); + return -EINVAL; + } + + if (data[0] != 0x12) { + NL_SET_ERR_MSG_FMT(extack, + "NAN Availability: Invalid Attribute ID 0x%02x (expected 0x12)", + data[0]); + return -EINVAL; + } + + attr_len = get_unaligned_le16(&data[1]); + + if (attr_len != len - 3) { + NL_SET_ERR_MSG_FMT(extack, + "NAN Availability: Length field (%u) doesn't match data length (%u)", + attr_len, len - 3); + return -EINVAL; + } + + return 0; +} + static int validate_uhr_capa(const struct nlattr *attr, struct netlink_ext_ack *extack) { @@ -962,6 +996,14 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_DISABLE_UHR] = { .type = NLA_FLAG }, [NL80211_ATTR_UHR_OPERATION] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_uhr_operation), + [NL80211_ATTR_NAN_CHANNEL] = NLA_POLICY_NESTED(nl80211_policy), + [NL80211_ATTR_NAN_CHANNEL_ENTRY] = NLA_POLICY_EXACT_LEN(6), + [NL80211_ATTR_NAN_RX_NSS] = { .type = NLA_U8 }, + [NL80211_ATTR_NAN_TIME_SLOTS] = + NLA_POLICY_EXACT_LEN(CFG80211_NAN_SCHED_NUM_TIME_SLOTS), + [NL80211_ATTR_NAN_AVAIL_BLOB] = + NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_nan_avail_blob), + [NL80211_ATTR_NAN_SCHED_DEFERRED] = { .type = NLA_FLAG }, }; /* policy for the key attributes */ @@ -16421,6 +16463,224 @@ nla_put_failure: } EXPORT_SYMBOL(cfg80211_nan_func_terminated); +void cfg80211_nan_sched_update_done(struct wireless_dev *wdev, bool success, + gfp_t gfp) +{ + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct sk_buff *msg; + void *hdr; + + trace_cfg80211_nan_sched_update_done(wiphy, wdev, success); + + /* Can happen if we stopped NAN */ + if (!wdev->u.nan.sched_update_pending) + return; + + wdev->u.nan.sched_update_pending = false; + + if (!wdev->owner_nlportid) + return; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NAN_SCHED_UPDATE_DONE); + if (!hdr) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD) || + (success && + nla_put_flag(msg, NL80211_ATTR_NAN_SCHED_UPDATE_SUCCESS))) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_unicast(wiphy_net(wiphy), msg, wdev->owner_nlportid); + + return; + +nla_put_failure: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_nan_sched_update_done); + +static int nl80211_parse_nan_channel(struct cfg80211_registered_device *rdev, + struct nlattr *channel, + struct genl_info *info, + struct cfg80211_nan_local_sched *sched, + u8 index) +{ + struct nlattr **channel_parsed __free(kfree) = NULL; + struct cfg80211_chan_def chandef; + u8 n_rx_nss; + int ret; + + channel_parsed = kcalloc(NL80211_ATTR_MAX + 1, sizeof(*channel_parsed), + GFP_KERNEL); + if (!channel_parsed) + return -ENOMEM; + + ret = nla_parse_nested(channel_parsed, NL80211_ATTR_MAX, channel, NULL, + info->extack); + if (ret) + return ret; + + ret = nl80211_parse_chandef(rdev, info->extack, channel_parsed, + &chandef); + if (ret) + return ret; + + if (chandef.chan->band == NL80211_BAND_6GHZ) { + NL_SET_ERR_MSG(info->extack, + "6 GHz band is not supported"); + return -EOPNOTSUPP; + } + + if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef, + NL80211_IFTYPE_NAN)) { + NL_SET_ERR_MSG_ATTR(info->extack, channel, + "Channel in NAN schedule is not allowed for NAN operation"); + return -EINVAL; + } + + for (int i = 0; i < index; i++) { + if (cfg80211_chandef_compatible(&sched->nan_channels[i].chandef, + &chandef)) { + NL_SET_ERR_MSG_ATTR(info->extack, channel, + "Channels in NAN schedule must be mutually incompatible"); + return -EINVAL; + } + } + + if (!channel_parsed[NL80211_ATTR_NAN_CHANNEL_ENTRY]) + return -EINVAL; + + sched->nan_channels[index].channel_entry = + nla_data(channel_parsed[NL80211_ATTR_NAN_CHANNEL_ENTRY]); + + if (!channel_parsed[NL80211_ATTR_NAN_RX_NSS]) + return -EINVAL; + + sched->nan_channels[index].rx_nss = + nla_get_u8(channel_parsed[NL80211_ATTR_NAN_RX_NSS]); + + n_rx_nss = u8_get_bits(rdev->wiphy.nan_capa.n_antennas, 0x03); + if (sched->nan_channels[index].rx_nss > n_rx_nss || + !sched->nan_channels[index].rx_nss) { + NL_SET_ERR_MSG_ATTR(info->extack, channel, + "Invalid RX NSS in NAN channel definition"); + return -EINVAL; + } + + sched->nan_channels[index].chandef = chandef; + + return 0; +} + +static bool nl80211_nan_is_sched_empty(struct cfg80211_nan_local_sched *sched) +{ + if (!sched->n_channels) + return true; + + for (int i = 0; i < ARRAY_SIZE(sched->schedule); i++) { + if (sched->schedule[i] != NL80211_NAN_SCHED_NOT_AVAIL_SLOT) + return false; + } + + return true; +} + +static int nl80211_nan_set_local_sched(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct cfg80211_nan_local_sched *sched __free(kfree) = NULL; + struct wireless_dev *wdev = info->user_ptr[1]; + int rem, i = 0, n_channels = 0; + struct nlattr *channel; + bool sched_empty; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!wdev_running(wdev)) + return -ENOTCONN; + + if (!info->attrs[NL80211_ATTR_NAN_TIME_SLOTS]) + return -EINVAL; + + /* First count how many channel attributes we got */ + nlmsg_for_each_attr_type(channel, NL80211_ATTR_NAN_CHANNEL, + info->nlhdr, GENL_HDRLEN, rem) + n_channels++; + + sched = kzalloc(struct_size(sched, nan_channels, n_channels), + GFP_KERNEL); + if (!sched) + return -ENOMEM; + + sched->n_channels = n_channels; + + nlmsg_for_each_attr_type(channel, NL80211_ATTR_NAN_CHANNEL, + info->nlhdr, GENL_HDRLEN, rem) { + int ret = nl80211_parse_nan_channel(rdev, channel, info, sched, + i); + + if (ret) + return ret; + i++; + } + + memcpy(sched->schedule, + nla_data(info->attrs[NL80211_ATTR_NAN_TIME_SLOTS]), + nla_len(info->attrs[NL80211_ATTR_NAN_TIME_SLOTS])); + + for (int slot = 0; slot < ARRAY_SIZE(sched->schedule); slot++) { + if (sched->schedule[slot] != NL80211_NAN_SCHED_NOT_AVAIL_SLOT && + sched->schedule[slot] >= sched->n_channels) { + NL_SET_ERR_MSG(info->extack, + "Invalid time slot in NAN schedule"); + return -EINVAL; + } + } + + sched_empty = nl80211_nan_is_sched_empty(sched); + + sched->deferred = + nla_get_flag(info->attrs[NL80211_ATTR_NAN_SCHED_DEFERRED]); + + if (sched_empty) { + if (sched->deferred) { + NL_SET_ERR_MSG(info->extack, + "Schedule cannot be deferred if all time slots are unavailable"); + return -EINVAL; + } + + if (info->attrs[NL80211_ATTR_NAN_AVAIL_BLOB]) { + NL_SET_ERR_MSG(info->extack, + "NAN Availability blob must be empty if all time slots are unavailable"); + return -EINVAL; + } + } else { + if (!info->attrs[NL80211_ATTR_NAN_AVAIL_BLOB]) { + NL_SET_ERR_MSG(info->extack, + "NAN Availability blob attribute is required"); + return -EINVAL; + } + + sched->nan_avail_blob = + nla_data(info->attrs[NL80211_ATTR_NAN_AVAIL_BLOB]); + sched->nan_avail_blob_len = + nla_len(info->attrs[NL80211_ATTR_NAN_AVAIL_BLOB]); + } + + return cfg80211_nan_set_local_schedule(rdev, wdev, sched); +} + static int nl80211_get_protocol_features(struct sk_buff *skb, struct genl_info *info) { @@ -19227,6 +19487,12 @@ static const struct genl_small_ops nl80211_small_ops[] = { .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, + { + .cmd = NL80211_CMD_NAN_SET_LOCAL_SCHED, + .doit = nl80211_nan_set_local_sched, + .flags = GENL_ADMIN_PERM, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), + }, }; static struct genl_family nl80211_fam __ro_after_init = { diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 2bad8b60b7c9..b886dedb25c6 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1060,6 +1060,22 @@ rdev_nan_change_conf(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_nan_set_local_sched(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_nan_local_sched *sched) +{ + int ret; + + trace_rdev_nan_set_local_sched(&rdev->wiphy, wdev, sched); + if (rdev->ops->nan_set_local_sched) + ret = rdev->ops->nan_set_local_sched(&rdev->wiphy, wdev, sched); + else + ret = -EOPNOTSUPP; + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_acl_data *params) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index af23f4fca90a..d32b83439363 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2410,6 +2410,27 @@ TRACE_EVENT(rdev_del_nan_func, WIPHY_PR_ARG, WDEV_PR_ARG, __entry->cookie) ); +TRACE_EVENT(rdev_nan_set_local_sched, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_nan_local_sched *sched), + TP_ARGS(wiphy, wdev, sched), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __array(u8, schedule, CFG80211_NAN_SCHED_NUM_TIME_SLOTS) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + memcpy(__entry->schedule, sched->schedule, + CFG80211_NAN_SCHED_NUM_TIME_SLOTS); + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", schedule: %s", + WIPHY_PR_ARG, WDEV_PR_ARG, + __print_array(__entry->schedule, + CFG80211_NAN_SCHED_NUM_TIME_SLOTS, 1)) +); + TRACE_EVENT(rdev_set_mac_acl, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_acl_data *params), @@ -4276,6 +4297,23 @@ TRACE_EVENT(cfg80211_incumbent_signal_notify, TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", signal_interference_bitmap=0x%x", WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->signal_interference_bitmap) ); + +TRACE_EVENT(cfg80211_nan_sched_update_done, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, bool success), + TP_ARGS(wiphy, wdev, success), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(bool, success) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->success = success; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT " success=%d", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->success) +); #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH From 763a5a580f9532d58b6c9f9e9723ceaa8332d5ca Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Wed, 18 Mar 2026 14:39:16 +0200 Subject: [PATCH 0929/1561] wifi: cfg80211: make sure NAN chandefs are valid Until now there was not handling for NAN in reg_wdev_chan_valid. Now as this wdev might use chandefs, check the validity of those. Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260108102921.51b42ffc9a42.Iacb030fc17027afb55707ca1d6dc146631d55767@changeid Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260219094725.3846371-4-miriam.rachel.korenblit@intel.com Link: https://patch.msgid.link/20260318123926.206536-3-miriam.rachel.korenblit@intel.com Signed-off-by: Johannes Berg --- net/wireless/reg.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 20bba7e491c5..4b5450aec72e 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2348,6 +2348,18 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) if (!wdev->netdev || !netif_running(wdev->netdev)) return true; + /* NAN doesn't have links, handle it separately */ + if (iftype == NL80211_IFTYPE_NAN) { + for (int i = 0; i < wdev->u.nan.n_channels; i++) { + ret = cfg80211_reg_can_beacon(wiphy, + &wdev->u.nan.chandefs[i], + NL80211_IFTYPE_NAN); + if (!ret) + return false; + } + return true; + } + for (link = 0; link < ARRAY_SIZE(wdev->links); link++) { struct ieee80211_channel *chan; @@ -2397,9 +2409,6 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) continue; chandef = wdev->u.ocb.chandef; break; - case NL80211_IFTYPE_NAN: - /* we have no info, but NAN is also pretty universal */ - continue; default: /* others not implemented for now */ WARN_ON_ONCE(1); From 0e8ec738a71ee4e8da7c56d21dd7bb54f954c38b Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Wed, 18 Mar 2026 14:39:17 +0200 Subject: [PATCH 0930/1561] wifi: cfg80211: add support for NAN data interface This new interface type represents a NAN data interface (NDI). It is used for data communication with NAN peers. Note that the existing NL80211_IFTYPE_NAN interface, which is the NAN Management Interface (NMI), is used for management communication. An NDI interface is started when a new NAN data path is about to be established, and is stopped after the NAN data path is terminated. - An NDI interface can only be started if the NMI is running, and NAN is started. - Before the NMI is stopped, the NDI interfaces will be stopped. Add the new interface type, handle add/remove operations for it, and makes sure of the conditions above. Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260219114327.0d681335c2e2.I92973483e927820ae2297853c141842fdb262747@changeid Link: https://patch.msgid.link/20260318123926.206536-4-miriam.rachel.korenblit@intel.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 21 +++++++++++ include/uapi/linux/nl80211.h | 4 ++ net/mac80211/cfg.c | 1 + net/mac80211/chan.c | 2 + net/mac80211/iface.c | 3 ++ net/mac80211/rx.c | 2 + net/mac80211/util.c | 1 + net/wireless/chan.c | 2 + net/wireless/core.c | 72 +++++++++++++++++++++++++++++++----- net/wireless/core.h | 6 +++ net/wireless/nl80211.c | 14 ++++++- net/wireless/reg.c | 12 ++++-- net/wireless/sysfs.c | 27 +++++++------- net/wireless/util.c | 21 +++++++++-- 14 files changed, 159 insertions(+), 29 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 539dcf65c188..1797ece50295 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3980,6 +3980,27 @@ struct cfg80211_qos_map { struct cfg80211_dscp_range up[8]; }; +/** + * DOC: Neighbor Awareness Networking (NAN) + * + * NAN uses two interface types: + * + * - %NL80211_IFTYPE_NAN: a non-netdev interface. This has two roles: (1) holds + * the configuration of all NAN activities (DE parameters, synchronisation + * parameters, local schedule, etc.), and (2) uses as the NAN Management + * Interface (NMI), which is used for NAN management communication. + * + * - %NL80211_IFTYPE_NAN_DATA: The NAN Data Interface (NDI), used for data + * communication with NAN peers. + * + * An NDI interface can only be started (IFF_UP) if the NMI one is running and + * NAN is started. Before NAN is stopped, all associated NDI interfaces + * must be stopped first. + * + * The local schedule specifies which channels the device is available on and + * when. Must be cancelled before NAN is stopped. + */ + /** * struct cfg80211_nan_band_config - NAN band specific configuration * diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 484094667abc..3984c176f9e7 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3749,6 +3749,9 @@ enum nl80211_attrs { * @NL80211_IFTYPE_OCB: Outside Context of a BSS * This mode corresponds to the MIB variable dot11OCBActivated=true * @NL80211_IFTYPE_NAN: NAN device interface type (not a netdev) + * @NL80211_IFTYPE_NAN_DATA: NAN data interface type (netdev); NAN data + * interfaces can only be brought up (IFF_UP) when a NAN interface + * already exists and NAN has been started (using %NL80211_CMD_START_NAN). * @NL80211_IFTYPE_MAX: highest interface type number currently defined * @NUM_NL80211_IFTYPES: number of defined interface types * @@ -3770,6 +3773,7 @@ enum nl80211_iftype { NL80211_IFTYPE_P2P_DEVICE, NL80211_IFTYPE_OCB, NL80211_IFTYPE_NAN, + NL80211_IFTYPE_NAN_DATA, /* keep last */ NUM_NL80211_IFTYPES, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 9aa4ae0621be..13132215afb4 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -718,6 +718,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct wireless_dev *wdev, case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_OCB: + case NL80211_IFTYPE_NAN_DATA: /* shouldn't happen */ WARN_ON_ONCE(1); break; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index bc396d6c64c5..1e4bfcd25697 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -495,6 +495,7 @@ ieee80211_get_width_of_link(struct ieee80211_link_data *link) case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_NAN_DATA: WARN_ON_ONCE(1); break; } @@ -1458,6 +1459,7 @@ ieee80211_link_chanctx_reservation_complete(struct ieee80211_link_data *link) case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: case NUM_NL80211_IFTYPES: WARN_ON(1); break; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 234de4762be5..125897717a4c 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1368,6 +1368,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_OCB: case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: /* no special treatment */ break; case NL80211_IFTYPE_UNSPECIFIED: @@ -1945,6 +1946,8 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_P2P_DEVICE: sdata->vif.bss_conf.bssid = sdata->vif.addr; break; + case NL80211_IFTYPE_NAN_DATA: + break; case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_WDS: case NUM_NL80211_IFTYPES: diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 19c33f7a8193..d9a654ef082d 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4607,6 +4607,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) (ieee80211_is_public_action(hdr, skb->len) || (ieee80211_is_auth(hdr->frame_control) && ether_addr_equal(sdata->vif.addr, hdr->addr1))); + case NL80211_IFTYPE_NAN_DATA: + return false; default: break; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 55054de62508..8987a4504520 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2118,6 +2118,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) return res; } break; + case NL80211_IFTYPE_NAN_DATA: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_P2P_DEVICE: diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 2dcf18f5655e..8b94c0de80ad 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -816,6 +816,7 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN_DATA: break; case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_UNSPECIFIED: @@ -939,6 +940,7 @@ bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev) case NL80211_IFTYPE_P2P_DEVICE: /* Can NAN type be considered as beaconing interface? */ case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: break; case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_WDS: diff --git a/net/wireless/core.c b/net/wireless/core.c index 54c89b0db352..200b97f912eb 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -329,16 +329,21 @@ void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) ASSERT_RTNL(); + /* + * Some netdev interfaces need to be closed before some non-netdev + * ones, i.e. NAN_DATA interfaces need to be closed before the NAN + * interface + */ list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (wdev->netdev) { dev_close(wdev->netdev); continue; } + } - /* otherwise, check iftype */ - - guard(wiphy)(wiphy); + guard(wiphy)(wiphy); + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: cfg80211_stop_p2p_device(rdev, wdev); @@ -396,6 +401,8 @@ void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev) list_for_each_entry_safe(wdev, tmp, &rdev->wiphy.wdev_list, list) { if (wdev->nl_owner_dead) { + cfg80211_close_dependents(rdev, wdev); + if (wdev->netdev) dev_close(wdev->netdev); @@ -406,6 +413,21 @@ void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev) } } +void cfg80211_close_dependents(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + ASSERT_RTNL(); + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return; + + /* Close all NAN DATA interfaces */ + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { + if (wdev->iftype == NL80211_IFTYPE_NAN_DATA) + dev_close(wdev->netdev); + } +} + static void cfg80211_destroy_iface_wk(struct work_struct *work) { struct cfg80211_registered_device *rdev; @@ -1419,9 +1441,8 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, rdev->num_running_monitor_ifaces += num; } -void cfg80211_leave(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, - int link_id) +void cfg80211_leave_locked(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, int link_id) { struct net_device *dev = wdev->netdev; struct cfg80211_sched_scan_request *pos, *tmp; @@ -1472,6 +1493,7 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, break; case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_NAN_DATA: /* nothing to do */ break; case NL80211_IFTYPE_UNSPECIFIED: @@ -1482,6 +1504,19 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, } } +void cfg80211_leave(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, int link_id) +{ + ASSERT_RTNL(); + + /* NAN_DATA interfaces must be closed before stopping NAN */ + cfg80211_close_dependents(rdev, wdev); + + guard(wiphy)(&rdev->wiphy); + + cfg80211_leave_locked(rdev, wdev, link_id); +} + void cfg80211_stop_link(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, gfp_t gfp) { @@ -1497,6 +1532,9 @@ void cfg80211_stop_link(struct wiphy *wiphy, struct wireless_dev *wdev, trace_cfg80211_stop_link(wiphy, wdev, link_id); + if (wdev->iftype == NL80211_IFTYPE_NAN) + return; + ev = kzalloc_obj(*ev, gfp); if (!ev) return; @@ -1647,10 +1685,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, } break; case NETDEV_GOING_DOWN: - scoped_guard(wiphy, &rdev->wiphy) { - cfg80211_leave(rdev, wdev, -1); + cfg80211_leave(rdev, wdev, -1); + scoped_guard(wiphy, &rdev->wiphy) cfg80211_remove_links(wdev); - } /* since we just did cfg80211_leave() nothing to do there */ cancel_work_sync(&wdev->disconnect_wk); cancel_work_sync(&wdev->pmsr_free_wk); @@ -1731,6 +1768,23 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, if (rfkill_blocked(rdev->wiphy.rfkill)) return notifier_from_errno(-ERFKILL); + + /* NAN_DATA interfaces require a running NAN interface */ + if (wdev->iftype == NL80211_IFTYPE_NAN_DATA) { + struct wireless_dev *iter; + bool nan_started = false; + + list_for_each_entry(iter, &rdev->wiphy.wdev_list, list) { + if (iter->iftype == NL80211_IFTYPE_NAN && + wdev_running(iter)) { + nan_started = true; + break; + } + } + + if (!nan_started) + return notifier_from_errno(-ENOLINK); + } break; default: return NOTIFY_DONE; diff --git a/net/wireless/core.h b/net/wireless/core.h index c7ae1f8a9bd8..ae2d56d3ad90 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -318,6 +318,9 @@ void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev); +void cfg80211_close_dependents(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev); + /* free object */ void cfg80211_dev_free(struct cfg80211_registered_device *rdev); @@ -541,6 +544,9 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, int num); +void cfg80211_leave_locked(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, int link_id); + void cfg80211_leave(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, int link_id); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index de630e0d388b..7cea8fef6ae5 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1764,6 +1764,7 @@ static int nl80211_key_allowed(struct wireless_dev *wdev) return 0; return -ENOLINK; case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: if (wiphy_ext_feature_isset(wdev->wiphy, NL80211_EXT_FEATURE_SECURE_NAN)) return 0; @@ -4921,6 +4922,8 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) else dev_close(wdev->netdev); + cfg80211_close_dependents(rdev, wdev); + mutex_lock(&rdev->wiphy.mtx); return cfg80211_remove_virtual_intf(rdev, wdev); @@ -15964,6 +15967,10 @@ static int nl80211_stop_nan(struct sk_buff *skb, struct genl_info *info) if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; + cfg80211_close_dependents(rdev, wdev); + + guard(wiphy)(&rdev->wiphy); + cfg80211_stop_nan(rdev, wdev); return 0; @@ -18356,7 +18363,11 @@ nl80211_epcs_cfg(struct sk_buff *skb, struct genl_info *info) NL80211_FLAG_NEED_RTNL) \ SELECTOR(__sel, WIPHY_CLEAR, \ NL80211_FLAG_NEED_WIPHY | \ - NL80211_FLAG_CLEAR_SKB) + NL80211_FLAG_CLEAR_SKB) \ + SELECTOR(__sel, WDEV_UP_RTNL_NOMTX, \ + NL80211_FLAG_NEED_WDEV_UP | \ + NL80211_FLAG_NO_WIPHY_MTX | \ + NL80211_FLAG_NEED_RTNL) enum nl80211_internal_flags_selector { #define SELECTOR(_, name, value) NL80211_IFL_SEL_##name, @@ -19193,6 +19204,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_stop_nan, .flags = GENL_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NO_WIPHY_MTX | NL80211_FLAG_NEED_RTNL), }, { diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 4b5450aec72e..5db2121c0b57 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2409,6 +2409,9 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) continue; chandef = wdev->u.ocb.chandef; break; + case NL80211_IFTYPE_NAN_DATA: + /* NAN channels are checked in NL80211_IFTYPE_NAN interface */ + break; default: /* others not implemented for now */ WARN_ON_ONCE(1); @@ -2445,11 +2448,14 @@ static void reg_leave_invalid_chans(struct wiphy *wiphy) struct wireless_dev *wdev; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - guard(wiphy)(wiphy); + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { + bool valid; - list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) - if (!reg_wdev_chan_valid(wiphy, wdev)) + scoped_guard(wiphy, wiphy) + valid = reg_wdev_chan_valid(wiphy, wdev); + if (!valid) cfg80211_leave(rdev, wdev, -1); + } } static void reg_check_chans_work(struct work_struct *work) diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 3385a27468f7..d45ddc457c30 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -102,25 +102,26 @@ static int wiphy_suspend(struct device *dev) if (!rdev->wiphy.registered) goto out_unlock_rtnl; - wiphy_lock(&rdev->wiphy); if (rdev->wiphy.wowlan_config) { - cfg80211_process_wiphy_works(rdev, NULL); - if (rdev->ops->suspend) - ret = rdev_suspend(rdev, rdev->wiphy.wowlan_config); - if (ret <= 0) - goto out_unlock_wiphy; + scoped_guard(wiphy, &rdev->wiphy) { + cfg80211_process_wiphy_works(rdev, NULL); + if (rdev->ops->suspend) + ret = rdev_suspend(rdev, + rdev->wiphy.wowlan_config); + if (ret <= 0) + goto out_unlock_rtnl; + } } /* Driver refused to configure wowlan (ret = 1) or no wowlan */ cfg80211_leave_all(rdev); - cfg80211_process_rdev_events(rdev); - cfg80211_process_wiphy_works(rdev, NULL); - if (rdev->ops->suspend) - ret = rdev_suspend(rdev, NULL); - -out_unlock_wiphy: - wiphy_unlock(&rdev->wiphy); + scoped_guard(wiphy, &rdev->wiphy) { + cfg80211_process_rdev_events(rdev); + cfg80211_process_wiphy_works(rdev, NULL); + if (rdev->ops->suspend) + ret = rdev_suspend(rdev, NULL); + } out_unlock_rtnl: if (ret == 0) rdev->suspended = true; diff --git a/net/wireless/util.c b/net/wireless/util.c index 1a861a6ea380..e2878d20a32d 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1144,8 +1144,15 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev) ev->ij.channel); break; case EVENT_STOPPED: - cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev, - ev->link_id); + /* + * for NAN interfaces cfg80211_leave must be called but + * locking here doesn't allow this. + */ + if (WARN_ON(wdev->iftype == NL80211_IFTYPE_NAN)) + break; + + cfg80211_leave_locked(wiphy_to_rdev(wdev->wiphy), wdev, + ev->link_id); break; case EVENT_PORT_AUTHORIZED: __cfg80211_port_authorized(wdev, ev->pa.peer_addr, @@ -1184,6 +1191,13 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, if (otype == NL80211_IFTYPE_AP_VLAN) return -EOPNOTSUPP; + /* + * for NAN interfaces cfg80211_leave must be called for leaving, + * but locking here doesn't allow this. + */ + if (otype == NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + /* cannot change into P2P device or NAN */ if (ntype == NL80211_IFTYPE_P2P_DEVICE || ntype == NL80211_IFTYPE_NAN) @@ -1204,7 +1218,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, dev->ieee80211_ptr->use_4addr = false; rdev_set_qos_map(rdev, dev, NULL); - cfg80211_leave(rdev, dev->ieee80211_ptr, -1); + cfg80211_leave_locked(rdev, dev->ieee80211_ptr, -1); cfg80211_process_rdev_events(rdev); cfg80211_mlme_purge_registrations(dev->ieee80211_ptr); @@ -1232,6 +1246,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, case NL80211_IFTYPE_OCB: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_NAN_DATA: dev->priv_flags |= IFF_DONT_BRIDGE; break; case NL80211_IFTYPE_P2P_GO: From bd11c96604693723297c403625c3059b33fb0618 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Wed, 18 Mar 2026 14:39:18 +0200 Subject: [PATCH 0931/1561] wifi: cfg80211: separately store HT, VHT and HE capabilities for NAN In NAN, unlike in other modes, there is only one set of (HT, VHT, HE) capabilities that is used for all channels (and bands) used in the NAN data path. This set of capabilities will have to be a special one, for example - have the minimum of (HT-for-5 GHz, HT-for-2.4 GHz), careful handling of the bits that have a different meaning for each band, etc. While we could use the exiting sband/iftype capabilities, and require identical capabilities for all bands (makes no sense since this means that we will have VHT capabilities in the 2.4 GHz slot), or require that only one of the sbands will be set, or have logic to extract the minimum and handle the conflicting bits - it seems simpler to add a dedicated set of capabilities which is special for NAN, and is band agnostic, to be populated by the driver. That way we also let the driver decide how it wants to handle the conflicting bits. Add this special set of these capabilities to wiphy:nan_capabilities, to be populated by the driver. Send it to user space. Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260219114327.4b6f3e4a81b4.I45422adc0df3ad4101d857a92e83f0de5cf241e1@changeid Link: https://patch.msgid.link/20260318123926.206536-5-miriam.rachel.korenblit@intel.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 11 ++++++ include/uapi/linux/nl80211.h | 43 ++++++++++++++++++++++++ net/wireless/core.c | 4 +++ net/wireless/nl80211.c | 65 ++++++++++++++++++++++++++++++++++++ 4 files changed, 123 insertions(+) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 1797ece50295..60cd0fbe9a46 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5913,6 +5913,12 @@ enum wiphy_nan_flags { * @max_channel_switch_time: maximum channel switch time in milliseconds. * @dev_capabilities: NAN device capabilities as defined in Wi-Fi Aware (TM) * specification Table 79 (Capabilities field). + * @phy: Band-agnostic capabilities for NAN data interfaces. Since NAN + * operates on multiple channels simultaneously, these capabilities apply + * across all bands. Valid only if NL80211_IFTYPE_NAN_DATA is supported. + * @phy.ht: HT capabilities (mandatory for NAN data) + * @phy.vht: VHT capabilities (optional) + * @phy.he: HE capabilities (optional) */ struct wiphy_nan_capa { u32 flags; @@ -5920,6 +5926,11 @@ struct wiphy_nan_capa { u8 n_antennas; u16 max_channel_switch_time; u8 dev_capabilities; + struct { + struct ieee80211_sta_ht_cap ht; + struct ieee80211_sta_vht_cap vht; + struct ieee80211_sta_he_cap he; + } phy; }; #define CFG80211_HW_TIMESTAMP_ALL_PEERS 0xffff diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 3984c176f9e7..c94e957a3467 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4462,6 +4462,46 @@ enum nl80211_band_attr { #define NL80211_BAND_ATTR_HT_CAPA NL80211_BAND_ATTR_HT_CAPA +/** + * enum nl80211_nan_phy_cap_attr - NAN PHY capabilities attributes + * @__NL80211_NAN_PHY_CAP_ATTR_INVALID: attribute number 0 is reserved + * @NL80211_NAN_PHY_CAP_ATTR_HT_MCS_SET: 16-byte attribute containing HT MCS set + * @NL80211_NAN_PHY_CAP_ATTR_HT_CAPA: HT capabilities (u16) + * @NL80211_NAN_PHY_CAP_ATTR_HT_AMPDU_FACTOR: HT A-MPDU factor (u8) + * @NL80211_NAN_PHY_CAP_ATTR_HT_AMPDU_DENSITY: HT A-MPDU density (u8) + * @NL80211_NAN_PHY_CAP_ATTR_VHT_MCS_SET: 8-byte attribute containing VHT MCS set + * @NL80211_NAN_PHY_CAP_ATTR_VHT_CAPA: VHT capabilities (u32) + * @NL80211_NAN_PHY_CAP_ATTR_HE_MAC: HE MAC capabilities + * @NL80211_NAN_PHY_CAP_ATTR_HE_PHY: HE PHY capabilities + * @NL80211_NAN_PHY_CAP_ATTR_HE_MCS_SET: HE supported NSS/MCS combinations + * @NL80211_NAN_PHY_CAP_ATTR_HE_PPE: HE PPE thresholds + * @NL80211_NAN_PHY_CAP_ATTR_MAX: highest NAN PHY cap attribute number + * @__NL80211_NAN_PHY_CAP_ATTR_AFTER_LAST: internal use + */ +enum nl80211_nan_phy_cap_attr { + __NL80211_NAN_PHY_CAP_ATTR_INVALID, + + /* HT capabilities */ + NL80211_NAN_PHY_CAP_ATTR_HT_MCS_SET, + NL80211_NAN_PHY_CAP_ATTR_HT_CAPA, + NL80211_NAN_PHY_CAP_ATTR_HT_AMPDU_FACTOR, + NL80211_NAN_PHY_CAP_ATTR_HT_AMPDU_DENSITY, + + /* VHT capabilities */ + NL80211_NAN_PHY_CAP_ATTR_VHT_MCS_SET, + NL80211_NAN_PHY_CAP_ATTR_VHT_CAPA, + + /* HE capabilities */ + NL80211_NAN_PHY_CAP_ATTR_HE_MAC, + NL80211_NAN_PHY_CAP_ATTR_HE_PHY, + NL80211_NAN_PHY_CAP_ATTR_HE_MCS_SET, + NL80211_NAN_PHY_CAP_ATTR_HE_PPE, + + /* keep last */ + __NL80211_NAN_PHY_CAP_ATTR_AFTER_LAST, + NL80211_NAN_PHY_CAP_ATTR_MAX = __NL80211_NAN_PHY_CAP_ATTR_AFTER_LAST - 1 +}; + /** * enum nl80211_wmm_rule - regulatory wmm rule * @@ -8635,6 +8675,8 @@ enum nl80211_s1g_short_beacon_attrs { * @NL80211_NAN_CAPA_CAPABILITIES: u8 attribute containing the * capabilities of the device as defined in Wi-Fi Aware (TM) * specification Table 79 (Capabilities field). + * @NL80211_NAN_CAPA_PHY: nested attribute containing band-agnostic + * capabilities for NAN data path. See &enum nl80211_nan_phy_cap_attr. * @__NL80211_NAN_CAPABILITIES_LAST: Internal * @NL80211_NAN_CAPABILITIES_MAX: Highest NAN capability attribute. */ @@ -8647,6 +8689,7 @@ enum nl80211_nan_capabilities { NL80211_NAN_CAPA_NUM_ANTENNAS, NL80211_NAN_CAPA_MAX_CHANNEL_SWITCH_TIME, NL80211_NAN_CAPA_CAPABILITIES, + NL80211_NAN_CAPA_PHY, /* keep last */ __NL80211_NAN_CAPABILITIES_LAST, NL80211_NAN_CAPABILITIES_MAX = __NL80211_NAN_CAPABILITIES_LAST - 1, diff --git a/net/wireless/core.c b/net/wireless/core.c index 200b97f912eb..6783e0672dcb 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -835,6 +835,10 @@ int wiphy_register(struct wiphy *wiphy) !(wiphy->nan_supported_bands & BIT(NL80211_BAND_2GHZ))))) return -EINVAL; + if (WARN_ON((wiphy->interface_modes & BIT(NL80211_IFTYPE_NAN_DATA)) && + !wiphy->nan_capa.phy.ht.ht_supported)) + return -EINVAL; + if (WARN_ON(wiphy->interface_modes & BIT(NL80211_IFTYPE_WDS))) return -EINVAL; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7cea8fef6ae5..a9a829613b7b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2721,6 +2721,68 @@ fail: return -ENOBUFS; } +static int nl80211_put_nan_phy_cap(struct wiphy *wiphy, struct sk_buff *msg) +{ + struct nlattr *nl_phy_cap; + const struct ieee80211_sta_ht_cap *ht_cap; + const struct ieee80211_sta_vht_cap *vht_cap; + const struct ieee80211_sta_he_cap *he_cap; + + if (!cfg80211_iftype_allowed(wiphy, NL80211_IFTYPE_NAN_DATA, false, 0)) + return 0; + + ht_cap = &wiphy->nan_capa.phy.ht; + vht_cap = &wiphy->nan_capa.phy.vht; + he_cap = &wiphy->nan_capa.phy.he; + + /* HT is mandatory */ + if (WARN_ON(!ht_cap->ht_supported)) + return 0; + + nl_phy_cap = nla_nest_start_noflag(msg, NL80211_NAN_CAPA_PHY); + if (!nl_phy_cap) + return -ENOBUFS; + + if (nla_put(msg, NL80211_NAN_PHY_CAP_ATTR_HT_MCS_SET, + sizeof(ht_cap->mcs), &ht_cap->mcs) || + nla_put_u16(msg, NL80211_NAN_PHY_CAP_ATTR_HT_CAPA, ht_cap->cap) || + nla_put_u8(msg, NL80211_NAN_PHY_CAP_ATTR_HT_AMPDU_FACTOR, + ht_cap->ampdu_factor) || + nla_put_u8(msg, NL80211_NAN_PHY_CAP_ATTR_HT_AMPDU_DENSITY, + ht_cap->ampdu_density)) + goto fail; + + if (vht_cap->vht_supported) { + if (nla_put(msg, NL80211_NAN_PHY_CAP_ATTR_VHT_MCS_SET, + sizeof(vht_cap->vht_mcs), &vht_cap->vht_mcs) || + nla_put_u32(msg, NL80211_NAN_PHY_CAP_ATTR_VHT_CAPA, + vht_cap->cap)) + goto fail; + } + + if (he_cap->has_he) { + if (nla_put(msg, NL80211_NAN_PHY_CAP_ATTR_HE_MAC, + sizeof(he_cap->he_cap_elem.mac_cap_info), + he_cap->he_cap_elem.mac_cap_info) || + nla_put(msg, NL80211_NAN_PHY_CAP_ATTR_HE_PHY, + sizeof(he_cap->he_cap_elem.phy_cap_info), + he_cap->he_cap_elem.phy_cap_info) || + nla_put(msg, NL80211_NAN_PHY_CAP_ATTR_HE_MCS_SET, + sizeof(he_cap->he_mcs_nss_supp), + &he_cap->he_mcs_nss_supp) || + nla_put(msg, NL80211_NAN_PHY_CAP_ATTR_HE_PPE, + sizeof(he_cap->ppe_thres), he_cap->ppe_thres)) + goto fail; + } + + nla_nest_end(msg, nl_phy_cap); + return 0; + +fail: + nla_nest_cancel(msg, nl_phy_cap); + return -ENOBUFS; +} + static int nl80211_put_nan_capa(struct wiphy *wiphy, struct sk_buff *msg) { struct nlattr *nan_caps; @@ -2747,6 +2809,9 @@ static int nl80211_put_nan_capa(struct wiphy *wiphy, struct sk_buff *msg) wiphy->nan_capa.dev_capabilities)) goto fail; + if (nl80211_put_nan_phy_cap(wiphy, msg)) + goto fail; + nla_nest_end(msg, nan_caps); return 0; From 1f1101c29e55195db7b52bef47d11978442998e0 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Wed, 18 Mar 2026 14:39:19 +0200 Subject: [PATCH 0932/1561] wifi: nl80211: add support for NAN stations There are 2 types of logical links with a NAN peer: - management (NMI), which is used for Tx/Rx of NAN management frames. - data (NDI), which is used for Tx/Rx of data frames, or non-NAN management frames. The NMI station has two roles: - representation of the NAN peer - for example, the peer's schedule and the HT, VHT, HE capabilities - belong to the NMI station, and not to the NDI ones. - Tx/Rx of NAN management frames to/from the peer. The NDI station is used for Tx/Rx data frames of a specific NDP that was established with the NAN peer. Note that a peer can choose to reuse its NMI address as the NDI address. In that case, it is expected that two stations will be added even though they will have the same address. - An NDI station can only be added after the corresponding NMI station was configured with capabilities. - All the NDI stations will be removed before the NDI interface is brought down. - All NMI stations will be removed before NAN is stopped. - Before NMI sta removal, all corresponding NDI stations will be removed Add support for adding, removing, and changing NMI and NDI stations. Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260219114327.d280936ee832.I6d859eee759bb5824a9ffd2984410faf879ba00e@changeid Link: https://patch.msgid.link/20260318123926.206536-6-miriam.rachel.korenblit@intel.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 56 ++++++++++++++++ include/uapi/linux/nl80211.h | 8 ++- net/wireless/nl80211.c | 120 ++++++++++++++++++++++++++++------- 3 files changed, 161 insertions(+), 23 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 60cd0fbe9a46..654d71f60e8c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1831,6 +1831,7 @@ struct cfg80211_ttlm_params { * @eml_cap: EML capabilities of this station * @link_sta_params: link related params. * @epp_peer: EPP peer indication + * @nmi_mac: MAC address of the NMI station of the NAN peer */ struct station_parameters { struct net_device *vlan; @@ -1858,6 +1859,7 @@ struct station_parameters { u16 eml_cap; struct link_station_parameters link_sta_params; bool epp_peer; + const u8 *nmi_mac; }; /** @@ -1897,6 +1899,8 @@ struct station_del_parameters { * entry that is operating, has been marked authorized by userspace) * @CFG80211_STA_MESH_PEER_KERNEL: peer on mesh interface (kernel managed) * @CFG80211_STA_MESH_PEER_USER: peer on mesh interface (user managed) + * @CFG80211_STA_NAN_MGMT: NAN management interface station + * @CFG80211_STA_NAN_DATA: NAN data path station */ enum cfg80211_station_type { CFG80211_STA_AP_CLIENT, @@ -1908,6 +1912,8 @@ enum cfg80211_station_type { CFG80211_STA_TDLS_PEER_ACTIVE, CFG80211_STA_MESH_PEER_KERNEL, CFG80211_STA_MESH_PEER_USER, + CFG80211_STA_NAN_MGMT, + CFG80211_STA_NAN_DATA, }; /** @@ -3999,6 +4005,56 @@ struct cfg80211_qos_map { * * The local schedule specifies which channels the device is available on and * when. Must be cancelled before NAN is stopped. + * + * NAN Stations + * ~~~~~~~~~~~~ + * + * There are two types of stations corresponding to the two interface types: + * + * - NMI station: Represents the NAN peer. Peer-specific data such as the peer's + * schedule and the HT, VHT and HE capabilities belongs to the NMI station. + * Also used for Tx/Rx of NAN management frames to/from the peer. + * Added on the %NL80211_IFTYPE_NAN interface. + * + * - NDI station: Used for Tx/Rx of data frames (and non-NAN management frames) + * for a specific NDP established with the NAN peer. Added on the + * %NL80211_IFTYPE_NAN_DATA interface. + * + * A peer may reuse its NMI address as the NDI address. In that case, two + * separate stations should be added even though they share the same MAC + * address. + * + * HT, VHT and HE capabilities should not changes after it was set. It is the + * driver's responsibility to check that. + * + * An NDI station can only be added if the corresponding NMI station has already + * been configured with HT (and possibly VHT and HE) capabilities. It is the + * driver's responsibility to check that. + * + * All NDI stations must be removed before corresponding NMI station is removed. + * Therefore, removing a NMI station implies that the associated NDI station(s) + * (if any) will be removed first. + * + * NAN Dependencies + * ~~~~~~~~~~~~~~~~ + * + * The following diagram shows the dependencies between NAN components. + * An arrow from A to B means A must be started/added before B, and B must be + * stopped/removed before A: + * + * +-------------+ + * | NMI iface |---(local schedule) + * +------+------+ + * / \ + * v v + * +-----------+ +-------------+ + * | NDI iface | | NMI sta |---(peer schedule) + * +-----+-----+ +------+------+ + * \ / + * v v + * +----------+ + * | NDI sta | + * +----------+ */ /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c94e957a3467..1897b9a35be8 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2677,7 +2677,8 @@ enum nl80211_commands { * a flow is assigned on each round of the DRR scheduler. * @NL80211_ATTR_HE_CAPABILITY: HE Capability information element (from * association request when used with NL80211_CMD_NEW_STATION). Can be set - * only if %NL80211_STA_FLAG_WME is set. + * only if %NL80211_STA_FLAG_WME is set (except for NAN, which uses WME + * anyway). * * @NL80211_ATTR_FTM_RESPONDER: nested attribute which user-space can include * in %NL80211_CMD_START_AP or %NL80211_CMD_SET_BEACON for fine timing @@ -3057,6 +3058,9 @@ enum nl80211_commands { * %NL80211_CMD_NAN_SCHED_UPDATE_DONE to indicate that the deferred * schedule update completed successfully. If this flag is not present, * the update failed. + * @NL80211_ATTR_NAN_NMI_MAC: The address of the NMI station to which this NDI + * station belongs. Used with %NL80211_CMD_NEW_STATION when adding an NDI + * station. * * @NL80211_ATTR_INCUMBENT_SIGNAL_INTERFERENCE_BITMAP: u32 attribute specifying * the signal interference bitmap detected on the operating bandwidth for @@ -3656,6 +3660,8 @@ enum nl80211_attrs { NL80211_ATTR_NAN_SCHED_DEFERRED, NL80211_ATTR_NAN_SCHED_UPDATE_SUCCESS, + NL80211_ATTR_NAN_NMI_MAC, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a9a829613b7b..89fb61d53e2f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1004,6 +1004,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_NAN_AVAIL_BLOB] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_nan_avail_blob), [NL80211_ATTR_NAN_SCHED_DEFERRED] = { .type = NLA_FLAG }, + [NL80211_ATTR_NAN_NMI_MAC] = NLA_POLICY_ETH_ADDR, }; /* policy for the key attributes */ @@ -7233,6 +7234,26 @@ static int parse_station_flags(struct genl_info *info, if ((params->sta_flags_mask | params->sta_flags_set) & BIT(__NL80211_STA_FLAG_INVALID)) return -EINVAL; + + if ((iftype == NL80211_IFTYPE_NAN || + iftype == NL80211_IFTYPE_NAN_DATA) && + params->sta_flags_mask & + ~(BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED) | + BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_MFP))) + return -EINVAL; + + /* WME is always used in NAN */ + if (iftype == NL80211_IFTYPE_NAN_DATA) { + /* but don't let userspace control it */ + if (params->sta_flags_mask & BIT(NL80211_STA_FLAG_WME)) + return -EINVAL; + + params->sta_flags_mask |= BIT(NL80211_STA_FLAG_WME); + params->sta_flags_set |= BIT(NL80211_STA_FLAG_WME); + } + return 0; } @@ -8115,7 +8136,7 @@ static int nl80211_dump_station(struct sk_buff *skb, /* nl80211_prepare_wdev_dump acquired it in the successful case */ __acquire(&rdev->wiphy.mtx); - if (!wdev->netdev) { + if (!wdev->netdev && wdev->iftype != NL80211_IFTYPE_NAN) { err = -EINVAL; goto out_err; } @@ -8302,10 +8323,12 @@ int cfg80211_check_station_change(struct wiphy *wiphy, return -EINVAL; if (params->link_sta_params.supported_rates) return -EINVAL; - if (params->ext_capab || params->link_sta_params.ht_capa || - params->link_sta_params.vht_capa || - params->link_sta_params.he_capa || - params->link_sta_params.eht_capa || + if (statype != CFG80211_STA_NAN_MGMT && + (params->link_sta_params.ht_capa || + params->link_sta_params.vht_capa || + params->link_sta_params.he_capa)) + return -EINVAL; + if (params->ext_capab || params->link_sta_params.eht_capa || params->link_sta_params.uhr_capa) return -EINVAL; if (params->sta_flags_mask & BIT(NL80211_STA_FLAG_SPP_AMSDU)) @@ -8377,6 +8400,19 @@ int cfg80211_check_station_change(struct wiphy *wiphy, params->plink_action != NL80211_PLINK_ACTION_BLOCK) return -EINVAL; break; + case CFG80211_STA_NAN_MGMT: + if (params->sta_flags_mask & + ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_MFP))) + return -EINVAL; + break; + case CFG80211_STA_NAN_DATA: + if (params->sta_flags_mask & + ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_MFP) | + BIT(NL80211_STA_FLAG_WME))) + return -EINVAL; + break; } /* @@ -8591,7 +8627,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) memset(¶ms, 0, sizeof(params)); - if (!dev) + if (!dev && wdev->iftype != NL80211_IFTYPE_NAN && + wdev->iftype != NL80211_IFTYPE_NAN_DATA) return -EINVAL; if (!rdev->ops->change_station) @@ -8734,6 +8771,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: break; default: err = -EOPNOTSUPP; @@ -8762,7 +8801,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) memset(¶ms, 0, sizeof(params)); - if (!dev) + if (!dev && wdev->iftype != NL80211_IFTYPE_NAN) return -EINVAL; if (!rdev->ops->add_station) @@ -8771,15 +8810,31 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; - if (!info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) - return -EINVAL; + if (wdev->iftype == NL80211_IFTYPE_NAN || + wdev->iftype == NL80211_IFTYPE_NAN_DATA) { + if (info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) + return -EINVAL; + if (wdev->iftype == NL80211_IFTYPE_NAN_DATA) { + if (!info->attrs[NL80211_ATTR_NAN_NMI_MAC]) + return -EINVAL; - if (!info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) - return -EINVAL; + /* Only NMI stations receive the HT/VHT/HE capabilities */ + if (info->attrs[NL80211_ATTR_HT_CAPABILITY] || + info->attrs[NL80211_ATTR_VHT_CAPABILITY] || + info->attrs[NL80211_ATTR_HE_CAPABILITY]) + return -EINVAL; + } + } else { + if (!info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) + return -EINVAL; - if (!info->attrs[NL80211_ATTR_STA_AID] && - !info->attrs[NL80211_ATTR_PEER_AID]) - return -EINVAL; + if (!info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) + return -EINVAL; + + if (!info->attrs[NL80211_ATTR_STA_AID] && + !info->attrs[NL80211_ATTR_PEER_AID]) + return -EINVAL; + } params.link_sta_params.link_id = nl80211_link_id_or_invalid(info->attrs); @@ -8795,12 +8850,16 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); } - params.link_sta_params.supported_rates = - nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); - params.link_sta_params.supported_rates_len = - nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); - params.listen_interval = - nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); + if (info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) { + params.link_sta_params.supported_rates = + nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); + params.link_sta_params.supported_rates_len = + nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); + } + + if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) + params.listen_interval = + nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); if (info->attrs[NL80211_ATTR_VLAN_ID]) params.vlan_id = nla_get_u16(info->attrs[NL80211_ATTR_VLAN_ID]); @@ -8819,7 +8878,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_PEER_AID]) params.aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]); - else + else if (info->attrs[NL80211_ATTR_STA_AID]) params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) { @@ -8940,6 +8999,16 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } + if (wdev->iftype == NL80211_IFTYPE_NAN || + wdev->iftype == NL80211_IFTYPE_NAN_DATA) { + if (params.sta_modify_mask & STATION_PARAM_APPLY_UAPSD) + return -EINVAL; + /* NAN NMI station must be added in associated or authorized state */ + if (!(params.sta_flags_set & (BIT(NL80211_STA_FLAG_ASSOCIATED) | + BIT(NL80211_STA_FLAG_AUTHENTICATED)))) + return -EINVAL; + } + /* Ensure that HT/VHT capabilities are not set for 6 GHz HE STA */ if (params.link_sta_params.he_6ghz_capa && (params.link_sta_params.ht_capa || params.link_sta_params.vht_capa)) @@ -9032,6 +9101,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) */ params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_AUTHORIZED); break; + case NL80211_IFTYPE_NAN: + break; + case NL80211_IFTYPE_NAN_DATA: + params.nmi_mac = nla_data(info->attrs[NL80211_ATTR_NAN_NMI_MAC]); + break; default: return -EOPNOTSUPP; } @@ -9073,7 +9147,7 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) memset(¶ms, 0, sizeof(params)); - if (!dev) + if (!dev && wdev->iftype != NL80211_IFTYPE_NAN) return -EINVAL; if (info->attrs[NL80211_ATTR_MAC]) @@ -9084,6 +9158,8 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: /* always accept these */ break; case NL80211_IFTYPE_ADHOC: From c4aa273ff6b5dae62f4981763bd91047ea6ffdda Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Wed, 18 Mar 2026 14:39:20 +0200 Subject: [PATCH 0933/1561] wifi: nl80211: define an API for configuring the NAN peer's schedule Add an NL80211 command to configure the NAN schedule of a NAN peer. Such a schedule contains a list of NAN channels, and a mapping from each time slots to the corresponding channel (or unscheduled). Also contains more information about the schedule, such as sequence ID and map ID. Not all of the restrictions are validated in this patch. In particular, comparison of two maps of the same peer requires storing/retrieving each map of each peer, only for validation. Therefore, it is the responsibilty of the driver to check that. Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260219114327.5b13fa5af4f6.If0e214ff5b52c9666e985fefa3f7be0ad14d93fb@changeid Link: https://patch.msgid.link/20260318123926.206536-7-miriam.rachel.korenblit@intel.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 58 +++++ include/uapi/linux/nl80211.h | 82 +++++++- net/wireless/nl80211.c | 395 ++++++++++++++++++++++++++++++++--- net/wireless/rdev-ops.h | 16 ++ net/wireless/trace.h | 28 +++ 5 files changed, 546 insertions(+), 33 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 654d71f60e8c..48ca5d3aa201 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4175,6 +4175,54 @@ struct cfg80211_nan_local_sched { struct cfg80211_nan_channel nan_channels[] __counted_by(n_channels); }; +/** + * struct cfg80211_nan_peer_map - NAN peer schedule map + * + * This struct defines a single NAN peer schedule map + * + * @map_id: map ID of this schedule map + * @schedule: a mapping of time slots to chandef indexes in the schedule's + * @nan_channels. Each slot lasts 16TUs. An unscheduled slot will be + * set to %NL80211_NAN_SCHED_NOT_AVAIL_SLOT. + */ +struct cfg80211_nan_peer_map { + u8 map_id; + u8 schedule[CFG80211_NAN_SCHED_NUM_TIME_SLOTS]; +}; + +#define CFG80211_NAN_MAX_PEER_MAPS 2 +#define CFG80211_NAN_INVALID_MAP_ID 0xff + +/** + * struct cfg80211_nan_peer_sched - NAN peer schedule + * + * This struct defines NAN peer schedule parameters for a peer. + * + * @peer_addr: MAC address of the peer (NMI address) + * @seq_id: sequence ID of the peer schedule. + * @committed_dw: committed DW as published by the peer. + * See %NL80211_ATTR_NAN_COMMITTED_DW + * @max_chan_switch: maximum channel switch time in microseconds as published + * by the peer. See %NL80211_ATTR_NAN_MAX_CHAN_SWITCH_TIME. + * @init_ulw: initial ULWs as published by the peer. + * @ulw_size: number of bytes in @init_ulw. + * @n_channels: number of channel definitions in @nan_channels. + * @nan_channels: array of NAN channel definitions for this schedule. + * @maps: array of peer schedule maps. Unused entries have + * map_id = %CFG80211_NAN_INVALID_MAP_ID. + */ +struct cfg80211_nan_peer_sched { + const u8 *peer_addr; + u8 seq_id; + u16 committed_dw; + u16 max_chan_switch; + const u8 *init_ulw; + u16 ulw_size; + u8 n_channels; + struct cfg80211_nan_channel *nan_channels; + struct cfg80211_nan_peer_map maps[CFG80211_NAN_MAX_PEER_MAPS]; +}; + /** * enum cfg80211_nan_conf_changes - indicates changed fields in NAN * configuration @@ -4961,6 +5009,13 @@ struct mgmt_frame_regs { * radio should operate on. If the chandef of a NAN channel is not * changed, the channel entry must also remain unchanged. It is the * driver's responsibility to verify this. + * @nan_set_peer_sched: configure the peer schedule for NAN. The schedule + * consists of an array of %cfg80211_nan_channel and the schedule itself, + * in which each entry maps each time slot to a channel on which the + * radio should operate on. In addition, it contains more peer's schedule + * information such as committed DW, etc. When updating an existing peer + * schedule, the full new schedule is provided - partial updates are not + * supported, and the new schedule completely replaces the previous one. * * @set_multicast_to_unicast: configure multicast to unicast conversion for BSS * @@ -5341,6 +5396,9 @@ struct cfg80211_ops { int (*nan_set_local_sched)(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_nan_local_sched *sched); + int (*nan_set_peer_sched)(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_peer_sched *sched); int (*set_multicast_to_unicast)(struct wiphy *wiphy, struct net_device *dev, const bool enabled); diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1897b9a35be8..e7f31a34eee4 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1381,6 +1381,26 @@ * %NL80211_CMD_NAN_SET_LOCAL_SCHED and %NL80211_ATTR_NAN_SCHED_DEFERRED) * has been completed. The presence of %NL80211_ATTR_NAN_SCHED_UPDATE_SUCCESS * indicates that the update was successful. + * @NL80211_CMD_NAN_SET_PEER_SCHED: Set the peer NAN schedule. NAN + * must be operational (%NL80211_CMD_START_NAN was executed). + * Required attributes: %NL80211_ATTR_MAC (peer NMI address) and + * %NL80211_ATTR_NAN_COMMITTED_DW. + * Optionally, the full schedule can be provided by including all of: + * %NL80211_ATTR_NAN_SEQ_ID, %NL80211_ATTR_NAN_CHANNEL (one or more), and + * %NL80211_ATTR_NAN_PEER_MAPS (see &enum nl80211_nan_peer_map_attrs). + * If any of these three optional attributes is provided, all three must + * be provided. + * Each peer channel must be compatible with at least one local channel + * set by %NL80211_CMD_SET_LOCAL_NAN_SCHED. Different maps must not + * contain compatible channels. + * For single-radio devices (n_radio <= 1), different maps must not + * schedule the same time slot, as the device cannot operate on multiple + * channels simultaneously. + * When updating an existing peer schedule, the full new schedule must be + * provided - partial updates are not supported. The new schedule will + * completely replace the previous one. + * The peer schedule is automatically removed when the NMI station is + * removed. * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1650,6 +1670,8 @@ enum nl80211_commands { NL80211_CMD_NAN_SCHED_UPDATE_DONE, + NL80211_CMD_NAN_SET_PEER_SCHED, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -3018,8 +3040,12 @@ enum nl80211_commands { * This attribute is used with %NL80211_CMD_NAN_SET_LOCAL_SCHED to specify * the channel definitions on which the radio needs to operate during * specific time slots. All of the channel definitions should be mutually - * incompatible. The number of channels should fit the current - * configuration of channels and the possible interface combinations. + * incompatible. + * This is also used with %NL80211_CMD_NAN_SET_PEER_SCHED to configure the + * peer NAN channels. In that case, the channel definitions can be + * compatible to each other, or even identical just with different RX NSS. + * The number of channels should fit the current configuration of channels + * and the possible interface combinations. * If an existing NAN channel is changed but the chandef isn't, the * channel entry must also remain unchanged. * @NL80211_ATTR_NAN_CHANNEL_ENTRY: a byte array of 6 bytes. contains the @@ -3027,7 +3053,7 @@ enum nl80211_commands { * 100 (Channel Entry format for the NAN Availability attribute). * @NL80211_ATTR_NAN_RX_NSS: (u8) RX NSS used for a NAN channel. This is * used with %NL80211_ATTR_NAN_CHANNEL when configuring NAN channels with - * %NL80211_CMD_NAN_SET_LOCAL_SCHED. + * %NL80211_CMD_NAN_SET_LOCAL_SCHED or %NL80211_CMD_NAN_SET_PEER_SCHED. * @NL80211_ATTR_NAN_TIME_SLOTS: an array of u8 values and 32 cells. each value * maps a time slot to the chandef on which the radio should operate on in * that time. %NL80211_NAN_SCHED_NOT_AVAIL_SLOT indicates unscheduled. @@ -3061,6 +3087,24 @@ enum nl80211_commands { * @NL80211_ATTR_NAN_NMI_MAC: The address of the NMI station to which this NDI * station belongs. Used with %NL80211_CMD_NEW_STATION when adding an NDI * station. + * @NL80211_ATTR_NAN_ULW: (Binary) The initial ULW(s) as published by the + * peer, as defined in the Wi-Fi Aware (TM) 4.0 specification Table 109 + * (Unaligned Schedule attribute format). Used to configure the device + * with the initial ULW(s) of a peer, before the device starts tracking it. + * @NL80211_ATTR_NAN_COMMITTED_DW: (u16) The committed DW as published by the + * peer, as defined in the Wi-Fi Aware (TM) 4.0 specification Table 80 + * (Committed DW Information field format). + * @NL80211_ATTR_NAN_SEQ_ID: (u8) The sequence ID of the peer schedule that + * %NL80211_CMD_NAN_SET_PEER_SCHED defines. The device follows the + * sequence ID in the frames to identify newer schedules. Once a schedule + * with a higher sequence ID is received, the device may stop communicating + * with that peer until a new peer schedule with a matching sequence ID is + * received. + * @NL80211_ATTR_NAN_MAX_CHAN_SWITCH_TIME: (u16) The maximum channel switch + * time, in microseconds. + * @NL80211_ATTR_NAN_PEER_MAPS: Nested array of peer schedule maps. + * Used with %NL80211_CMD_NAN_SET_PEER_SCHED. Contains up to 2 entries, + * each containing nested attributes from &enum nl80211_nan_peer_map_attrs. * * @NL80211_ATTR_INCUMBENT_SIGNAL_INTERFERENCE_BITMAP: u32 attribute specifying * the signal interference bitmap detected on the operating bandwidth for @@ -3662,6 +3706,12 @@ enum nl80211_attrs { NL80211_ATTR_NAN_NMI_MAC, + NL80211_ATTR_NAN_ULW, + NL80211_ATTR_NAN_COMMITTED_DW, + NL80211_ATTR_NAN_SEQ_ID, + NL80211_ATTR_NAN_MAX_CHAN_SWITCH_TIME, + NL80211_ATTR_NAN_PEER_MAPS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -8701,6 +8751,32 @@ enum nl80211_nan_capabilities { NL80211_NAN_CAPABILITIES_MAX = __NL80211_NAN_CAPABILITIES_LAST - 1, }; +/** + * enum nl80211_nan_peer_map_attrs - NAN peer schedule map attributes + * + * Nested attributes used within %NL80211_ATTR_NAN_PEER_MAPS to define + * individual peer schedule maps. + * + * @__NL80211_NAN_PEER_MAP_ATTR_INVALID: Invalid + * @NL80211_NAN_PEER_MAP_ATTR_MAP_ID: (u8) The map ID for this schedule map. + * @NL80211_NAN_PEER_MAP_ATTR_TIME_SLOTS: An array of u8 values with 32 cells. + * Each value maps a time slot to a channel index within the schedule's + * channel list (%NL80211_ATTR_NAN_CHANNEL attributes). + * %NL80211_NAN_SCHED_NOT_AVAIL_SLOT indicates unscheduled. + * @__NL80211_NAN_PEER_MAP_ATTR_LAST: Internal + * @NL80211_NAN_PEER_MAP_ATTR_MAX: Highest peer map attribute + */ +enum nl80211_nan_peer_map_attrs { + __NL80211_NAN_PEER_MAP_ATTR_INVALID, + + NL80211_NAN_PEER_MAP_ATTR_MAP_ID, + NL80211_NAN_PEER_MAP_ATTR_TIME_SLOTS, + + /* keep last */ + __NL80211_NAN_PEER_MAP_ATTR_LAST, + NL80211_NAN_PEER_MAP_ATTR_MAX = __NL80211_NAN_PEER_MAP_ATTR_LAST - 1, +}; + #define NL80211_NAN_SCHED_NOT_AVAIL_SLOT 0xff #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 89fb61d53e2f..8f93e3548d2a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -367,6 +367,63 @@ static int validate_nan_avail_blob(const struct nlattr *attr, return 0; } +static int validate_nan_ulw(const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + const u8 *data = nla_data(attr); + unsigned int len = nla_len(attr); + unsigned int pos = 0; + + while (pos < len) { + u16 attr_len; + + /* Need at least: Attr ID (1) + Length (2) */ + if (pos + 3 > len) { + NL_SET_ERR_MSG_FMT(extack, + "ULW: Incomplete header (need 3 bytes, have %u)", + len - pos); + return -EINVAL; + } + + if (data[pos] != 0x17) { + NL_SET_ERR_MSG_FMT(extack, + "ULW: Invalid Attribute ID 0x%02x (expected 0x17)", + data[pos]); + return -EINVAL; + } + pos++; + + /* Length is in little-endian format */ + attr_len = get_unaligned_le16(&data[pos]); + pos += 2; + + /* + * Check if length is one of the valid values: 16 (no + * channel/band entry included), 18 (band entry included), + * 21 (channel entry included without Auxiliary channel bitmap), + * or 23 (channel entry included with Auxiliary channel bitmap). + */ + if (attr_len != 16 && attr_len != 18 && attr_len != 21 && + attr_len != 23) { + NL_SET_ERR_MSG_FMT(extack, + "ULW: Invalid length %u (must be 16, 18, 21, or 23)", + attr_len); + return -EINVAL; + } + + if (pos + attr_len > len) { + NL_SET_ERR_MSG_FMT(extack, + "ULW: Length field (%u) exceeds remaining data (%u)", + attr_len, len - pos); + return -EINVAL; + } + + pos += attr_len; + } + + return 0; +} + static int validate_uhr_capa(const struct nlattr *attr, struct netlink_ext_ack *extack) { @@ -589,6 +646,13 @@ nl80211_nan_band_conf_policy[NL80211_NAN_BAND_CONF_ATTR_MAX + 1] = { [NL80211_NAN_BAND_CONF_DISABLE_SCAN] = { .type = NLA_FLAG }, }; +static const struct nla_policy +nl80211_nan_peer_map_policy[NL80211_NAN_PEER_MAP_ATTR_MAX + 1] = { + [NL80211_NAN_PEER_MAP_ATTR_MAP_ID] = NLA_POLICY_MAX(NLA_U8, 15), + [NL80211_NAN_PEER_MAP_ATTR_TIME_SLOTS] = + NLA_POLICY_EXACT_LEN(CFG80211_NAN_SCHED_NUM_TIME_SLOTS), +}; + static const struct nla_policy nl80211_nan_conf_policy[NL80211_NAN_CONF_ATTR_MAX + 1] = { [NL80211_NAN_CONF_CLUSTER_ID] = @@ -1005,6 +1069,13 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_nan_avail_blob), [NL80211_ATTR_NAN_SCHED_DEFERRED] = { .type = NLA_FLAG }, [NL80211_ATTR_NAN_NMI_MAC] = NLA_POLICY_ETH_ADDR, + [NL80211_ATTR_NAN_ULW] = + NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_nan_ulw), + [NL80211_ATTR_NAN_COMMITTED_DW] = { .type = NLA_U16 }, + [NL80211_ATTR_NAN_SEQ_ID] = { .type = NLA_U8 }, + [NL80211_ATTR_NAN_MAX_CHAN_SWITCH_TIME] = { .type = NLA_U16 }, + [NL80211_ATTR_NAN_PEER_MAPS] = + NLA_POLICY_NESTED_ARRAY(nl80211_nan_peer_map_policy), }; /* policy for the key attributes */ @@ -16659,8 +16730,8 @@ EXPORT_SYMBOL(cfg80211_nan_sched_update_done); static int nl80211_parse_nan_channel(struct cfg80211_registered_device *rdev, struct nlattr *channel, struct genl_info *info, - struct cfg80211_nan_local_sched *sched, - u8 index) + struct cfg80211_nan_channel *nan_channels, + u8 index, bool local) { struct nlattr **channel_parsed __free(kfree) = NULL; struct cfg80211_chan_def chandef; @@ -16695,40 +16766,304 @@ static int nl80211_parse_nan_channel(struct cfg80211_registered_device *rdev, return -EINVAL; } - for (int i = 0; i < index; i++) { - if (cfg80211_chandef_compatible(&sched->nan_channels[i].chandef, - &chandef)) { - NL_SET_ERR_MSG_ATTR(info->extack, channel, - "Channels in NAN schedule must be mutually incompatible"); - return -EINVAL; + if (local) { + for (int i = 0; i < index; i++) { + if (cfg80211_chandef_compatible(&nan_channels[i].chandef, + &chandef)) { + NL_SET_ERR_MSG_ATTR(info->extack, channel, + "Channels in NAN schedule must be mutually incompatible"); + return -EINVAL; + } } } - if (!channel_parsed[NL80211_ATTR_NAN_CHANNEL_ENTRY]) + if (!channel_parsed[NL80211_ATTR_NAN_CHANNEL_ENTRY]) { + NL_SET_ERR_MSG(info->extack, + "Missing NAN channel entry attribute"); return -EINVAL; + } - sched->nan_channels[index].channel_entry = + nan_channels[index].channel_entry = nla_data(channel_parsed[NL80211_ATTR_NAN_CHANNEL_ENTRY]); - if (!channel_parsed[NL80211_ATTR_NAN_RX_NSS]) + if (!channel_parsed[NL80211_ATTR_NAN_RX_NSS]) { + NL_SET_ERR_MSG(info->extack, + "Missing NAN RX NSS attribute"); return -EINVAL; + } - sched->nan_channels[index].rx_nss = + nan_channels[index].rx_nss = nla_get_u8(channel_parsed[NL80211_ATTR_NAN_RX_NSS]); n_rx_nss = u8_get_bits(rdev->wiphy.nan_capa.n_antennas, 0x03); - if (sched->nan_channels[index].rx_nss > n_rx_nss || - !sched->nan_channels[index].rx_nss) { + if ((local && nan_channels[index].rx_nss > n_rx_nss) || + !nan_channels[index].rx_nss) { NL_SET_ERR_MSG_ATTR(info->extack, channel, "Invalid RX NSS in NAN channel definition"); return -EINVAL; } - sched->nan_channels[index].chandef = chandef; + nan_channels[index].chandef = chandef; return 0; } +static int +nl80211_parse_nan_schedule(struct genl_info *info, struct nlattr *slots_attr, + u8 schedule[CFG80211_NAN_SCHED_NUM_TIME_SLOTS], + u8 n_channels) +{ + if (WARN_ON(nla_len(slots_attr) != CFG80211_NAN_SCHED_NUM_TIME_SLOTS)) + return -EINVAL; + + memcpy(schedule, nla_data(slots_attr), nla_len(slots_attr)); + + for (int slot = 0; slot < CFG80211_NAN_SCHED_NUM_TIME_SLOTS; slot++) { + if (schedule[slot] != NL80211_NAN_SCHED_NOT_AVAIL_SLOT && + schedule[slot] >= n_channels) { + NL_SET_ERR_MSG_FMT(info->extack, + "Invalid time slot: slot %d refers to channel index %d, n_channels=%d", + slot, schedule[slot], n_channels); + return -EINVAL; + } + } + + return 0; +} + +static int +nl80211_parse_nan_peer_map(struct genl_info *info, struct nlattr *map_attr, + struct cfg80211_nan_peer_map *map, u8 n_channels) +{ + struct nlattr *tb[NL80211_NAN_PEER_MAP_ATTR_MAX + 1]; + int ret; + + ret = nla_parse_nested(tb, NL80211_NAN_PEER_MAP_ATTR_MAX, map_attr, + nl80211_nan_peer_map_policy, info->extack); + if (ret) + return ret; + + if (!tb[NL80211_NAN_PEER_MAP_ATTR_MAP_ID] || + !tb[NL80211_NAN_PEER_MAP_ATTR_TIME_SLOTS]) { + NL_SET_ERR_MSG(info->extack, + "Missing required peer map attributes"); + return -EINVAL; + } + + map->map_id = nla_get_u8(tb[NL80211_NAN_PEER_MAP_ATTR_MAP_ID]); + + /* Parse schedule */ + return nl80211_parse_nan_schedule(info, + tb[NL80211_NAN_PEER_MAP_ATTR_TIME_SLOTS], + map->schedule, n_channels); +} + +static int nl80211_nan_validate_map_pair(struct wiphy *wiphy, + struct genl_info *info, + const struct cfg80211_nan_peer_map *map1, + const struct cfg80211_nan_peer_map *map2, + struct cfg80211_nan_channel *nan_channels) +{ + /* Check for duplicate map_id */ + if (map1->map_id == map2->map_id) { + NL_SET_ERR_MSG_FMT(info->extack, "Duplicate map_id %u", + map1->map_id); + return -EINVAL; + } + + /* Check for compatible channels between maps */ + for (int i = 0; i < ARRAY_SIZE(map1->schedule); i++) { + if (map1->schedule[i] == NL80211_NAN_SCHED_NOT_AVAIL_SLOT) + continue; + + for (int j = 0; j < ARRAY_SIZE(map2->schedule); j++) { + u8 ch1 = map1->schedule[i]; + u8 ch2 = map2->schedule[j]; + + if (ch2 == NL80211_NAN_SCHED_NOT_AVAIL_SLOT) + continue; + + if (cfg80211_chandef_compatible(&nan_channels[ch1].chandef, + &nan_channels[ch2].chandef)) { + NL_SET_ERR_MSG_FMT(info->extack, + "Maps %u and %u have compatible channels %d and %d", + map1->map_id, map2->map_id, + ch1, ch2); + return -EINVAL; + } + } + } + + /* + * Check for conflicting time slots between maps. + * Only check for single-radio devices (n_radio <= 1) which cannot + * operate on multiple channels simultaneously. + */ + if (wiphy->n_radio > 1) + return 0; + + for (int i = 0; i < ARRAY_SIZE(map1->schedule); i++) { + if (map1->schedule[i] != NL80211_NAN_SCHED_NOT_AVAIL_SLOT && + map2->schedule[i] != NL80211_NAN_SCHED_NOT_AVAIL_SLOT) { + NL_SET_ERR_MSG_FMT(info->extack, + "Maps %u and %u both schedule slot %d", + map1->map_id, map2->map_id, i); + return -EINVAL; + } + } + + return 0; +} + +static int nl80211_nan_set_peer_sched(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct cfg80211_nan_channel *nan_channels __free(kfree) = NULL; + struct cfg80211_nan_peer_sched sched = {}; + struct wireless_dev *wdev = info->user_ptr[1]; + struct nlattr *map_attr, *channel; + int ret, n_maps = 0, n_channels = 0, i = 0, rem; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_MAC] || + !info->attrs[NL80211_ATTR_NAN_COMMITTED_DW]) { + NL_SET_ERR_MSG(info->extack, + "Required NAN peer schedule attributes are missing"); + return -EINVAL; + } + + /* First count how many channel attributes we got */ + nlmsg_for_each_attr_type(channel, NL80211_ATTR_NAN_CHANNEL, + info->nlhdr, GENL_HDRLEN, rem) + n_channels++; + + if (!((info->attrs[NL80211_ATTR_NAN_SEQ_ID] && + info->attrs[NL80211_ATTR_NAN_PEER_MAPS] && n_channels) || + ((!info->attrs[NL80211_ATTR_NAN_SEQ_ID] && + !info->attrs[NL80211_ATTR_NAN_PEER_MAPS] && !n_channels)))) { + NL_SET_ERR_MSG(info->extack, + "Either provide all of: seq id, channels and maps, or none"); + return -EINVAL; + } + + /* + * Limit the number of peer channels to: + * local_channels * 4 (possible BWs) * 2 (possible NSS values) + */ + if (n_channels && n_channels > wdev->u.nan.n_channels * 4 * 2) { + NL_SET_ERR_MSG_FMT(info->extack, + "Too many peer channels: %d (max %d)", + n_channels, + wdev->u.nan.n_channels * 4 * 2); + return -EINVAL; + } + + if (n_channels) { + nan_channels = kcalloc(n_channels, sizeof(*nan_channels), + GFP_KERNEL); + if (!nan_channels) + return -ENOMEM; + } + + /* Parse peer channels */ + nlmsg_for_each_attr_type(channel, NL80211_ATTR_NAN_CHANNEL, + info->nlhdr, GENL_HDRLEN, rem) { + bool compatible = false; + + ret = nl80211_parse_nan_channel(rdev, channel, info, + nan_channels, i, false); + if (ret) + return ret; + + /* Verify channel is compatible with at least one local channel */ + for (int j = 0; j < wdev->u.nan.n_channels; j++) { + if (cfg80211_chandef_compatible(&nan_channels[i].chandef, + &wdev->u.nan.chandefs[j])) { + compatible = true; + break; + } + } + if (!compatible) { + NL_SET_ERR_MSG_FMT(info->extack, + "Channel %d not compatible with any local channel", + i); + return -EINVAL; + } + i++; + } + + sched.n_channels = n_channels; + sched.nan_channels = nan_channels; + sched.peer_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); + sched.seq_id = nla_get_u8_default(info->attrs[NL80211_ATTR_NAN_SEQ_ID], 0); + sched.committed_dw = nla_get_u16(info->attrs[NL80211_ATTR_NAN_COMMITTED_DW]); + sched.max_chan_switch = + nla_get_u16_default(info->attrs[NL80211_ATTR_NAN_MAX_CHAN_SWITCH_TIME], 0); + + if (info->attrs[NL80211_ATTR_NAN_ULW]) { + sched.ulw_size = nla_len(info->attrs[NL80211_ATTR_NAN_ULW]); + sched.init_ulw = nla_data(info->attrs[NL80211_ATTR_NAN_ULW]); + } + + /* Initialize all maps as invalid */ + for (int j = 0; j < ARRAY_SIZE(sched.maps); j++) + sched.maps[j].map_id = CFG80211_NAN_INVALID_MAP_ID; + + if (info->attrs[NL80211_ATTR_NAN_PEER_MAPS]) { + /* Parse each map */ + nla_for_each_nested(map_attr, info->attrs[NL80211_ATTR_NAN_PEER_MAPS], + rem) { + if (n_maps >= ARRAY_SIZE(sched.maps)) { + NL_SET_ERR_MSG(info->extack, "Too many peer maps"); + return -EINVAL; + } + + ret = nl80211_parse_nan_peer_map(info, map_attr, + &sched.maps[n_maps], + n_channels); + if (ret) + return ret; + + /* Validate against previous maps */ + for (int j = 0; j < n_maps; j++) { + ret = nl80211_nan_validate_map_pair(&rdev->wiphy, info, + &sched.maps[j], + &sched.maps[n_maps], + nan_channels); + if (ret) + return ret; + } + + n_maps++; + } + } + + /* Verify each channel is scheduled at least once */ + for (int ch = 0; ch < n_channels; ch++) { + bool scheduled = false; + + for (int m = 0; m < n_maps && !scheduled; m++) { + for (int s = 0; s < ARRAY_SIZE(sched.maps[m].schedule); s++) { + if (sched.maps[m].schedule[s] == ch) { + scheduled = true; + break; + } + } + } + if (!scheduled) { + NL_SET_ERR_MSG_FMT(info->extack, + "Channel %d is not scheduled in any map", + ch); + return -EINVAL; + } + } + + return rdev_nan_set_peer_sched(rdev, wdev, &sched); +} + static bool nl80211_nan_is_sched_empty(struct cfg80211_nan_local_sched *sched) { if (!sched->n_channels) @@ -16748,7 +17083,7 @@ static int nl80211_nan_set_local_sched(struct sk_buff *skb, struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct cfg80211_nan_local_sched *sched __free(kfree) = NULL; struct wireless_dev *wdev = info->user_ptr[1]; - int rem, i = 0, n_channels = 0; + int rem, i = 0, n_channels = 0, ret; struct nlattr *channel; bool sched_empty; @@ -16775,26 +17110,20 @@ static int nl80211_nan_set_local_sched(struct sk_buff *skb, nlmsg_for_each_attr_type(channel, NL80211_ATTR_NAN_CHANNEL, info->nlhdr, GENL_HDRLEN, rem) { - int ret = nl80211_parse_nan_channel(rdev, channel, info, sched, - i); + ret = nl80211_parse_nan_channel(rdev, channel, info, + sched->nan_channels, i, true); if (ret) return ret; i++; } - memcpy(sched->schedule, - nla_data(info->attrs[NL80211_ATTR_NAN_TIME_SLOTS]), - nla_len(info->attrs[NL80211_ATTR_NAN_TIME_SLOTS])); - - for (int slot = 0; slot < ARRAY_SIZE(sched->schedule); slot++) { - if (sched->schedule[slot] != NL80211_NAN_SCHED_NOT_AVAIL_SLOT && - sched->schedule[slot] >= sched->n_channels) { - NL_SET_ERR_MSG(info->extack, - "Invalid time slot in NAN schedule"); - return -EINVAL; - } - } + /* Parse and validate schedule */ + ret = nl80211_parse_nan_schedule(info, + info->attrs[NL80211_ATTR_NAN_TIME_SLOTS], + sched->schedule, sched->n_channels); + if (ret) + return ret; sched_empty = nl80211_nan_is_sched_empty(sched); @@ -19646,6 +19975,12 @@ static const struct genl_small_ops nl80211_small_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, + { + .cmd = NL80211_CMD_NAN_SET_PEER_SCHED, + .doit = nl80211_nan_set_peer_sched, + .flags = GENL_ADMIN_PERM, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), + }, }; static struct genl_family nl80211_fam __ro_after_init = { diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index b886dedb25c6..bba239a068f6 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1076,6 +1076,22 @@ rdev_nan_set_local_sched(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_nan_set_peer_sched(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_nan_peer_sched *sched) +{ + int ret; + + trace_rdev_nan_set_peer_sched(&rdev->wiphy, wdev, sched); + if (rdev->ops->nan_set_peer_sched) + ret = rdev->ops->nan_set_peer_sched(&rdev->wiphy, wdev, sched); + else + ret = -EOPNOTSUPP; + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_acl_data *params) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index d32b83439363..df639d97cc0c 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2431,6 +2431,34 @@ TRACE_EVENT(rdev_nan_set_local_sched, CFG80211_NAN_SCHED_NUM_TIME_SLOTS, 1)) ); +TRACE_EVENT(rdev_nan_set_peer_sched, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_nan_peer_sched *sched), + TP_ARGS(wiphy, wdev, sched), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __array(u8, peer_addr, ETH_ALEN) + __field(u8, seq_id) + __field(u16, committed_dw) + __field(u16, max_chan_switch) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + memcpy(__entry->peer_addr, sched->peer_addr, ETH_ALEN); + __entry->seq_id = sched->seq_id; + __entry->committed_dw = sched->committed_dw; + __entry->max_chan_switch = sched->max_chan_switch; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT + ", peer: %pM, seq_id: %u, committed_dw: 0x%x, max_chan_switch: %u", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->peer_addr, + __entry->seq_id, __entry->committed_dw, + __entry->max_chan_switch + ) +); + TRACE_EVENT(rdev_set_mac_acl, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_acl_data *params), From 21ade3eed33e055aa67e0b99ff33b6eafb57a5ec Mon Sep 17 00:00:00 2001 From: Daniel Gabay Date: Wed, 18 Mar 2026 14:39:21 +0200 Subject: [PATCH 0934/1561] wifi: cfg80211: allow ToDS=0/FromDS=0 data frames on NAN data interfaces According to Wi-Fi Aware (TM) specification Table 3, data frame should have 0 in the FromDS/ToDS fields. Don't drop received frames with 0 FromDS/ToDS if they are received on NAN_DATA interface. While at it, fix a double indent. Signed-off-by: Daniel Gabay Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260108102921.de5f318a790a.Id34dd69552920b579e6881ffd38fa692a491b601@changeid Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260219094725.3846371-5-miriam.rachel.korenblit@intel.com Link: https://patch.msgid.link/20260318123926.206536-8-miriam.rachel.korenblit@intel.com Signed-off-by: Johannes Berg --- net/wireless/util.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index e2878d20a32d..cff5a1bd95cc 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -625,8 +625,9 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, case cpu_to_le16(0): if (iftype != NL80211_IFTYPE_ADHOC && iftype != NL80211_IFTYPE_STATION && - iftype != NL80211_IFTYPE_OCB) - return -1; + iftype != NL80211_IFTYPE_OCB && + iftype != NL80211_IFTYPE_NAN_DATA) + return -1; break; } From f826534483bac96320a3686694e3e1a033087240 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Wed, 18 Mar 2026 14:39:22 +0200 Subject: [PATCH 0935/1561] wifi: nl80211: allow reporting spurious NAN Data frames Currently we have this ability for AP and GO. But it is now needed also for NAN_DATA mode - as per Wi-Fi Aware (TM) 4.0 specification 6.2.5: "If a NAN Device receives a unicast NAN Data frame destined for it, but with A1 address and A2 address that are not assigned to the NDP, it shall discard the frame, and should send a Data Path Termination NAF to the frame transmitter" To allow this, change NL80211_CMD_UNEXPECTED_FRAME to support also NAN_DATA, so drivers can report such cases and the user space can act accordingly. Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260108102921.5cf9f1351655.I47c98ce37843730b8b9eb8bd8e9ef62ed6c17613@changeid Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260219094725.3846371-6-miriam.rachel.korenblit@intel.com Link: https://patch.msgid.link/20260318123926.206536-9-miriam.rachel.korenblit@intel.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 13 +++++++------ include/uapi/linux/nl80211.h | 5 +++-- net/wireless/mlme.c | 4 ++-- net/wireless/nl80211.c | 12 +++++++----- 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 48ca5d3aa201..0d19f34ea7ac 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6938,8 +6938,8 @@ enum ieee80211_ap_reg_power { * the P2P Device. * @ps: powersave mode is enabled * @ps_timeout: dynamic powersave timeout - * @ap_unexpected_nlportid: (private) netlink port ID of application - * registered for unexpected class 3 frames (AP mode) + * @unexpected_nlportid: (private) netlink port ID of application + * registered for unexpected frames (AP mode or NAN_DATA mode) * @conn: (private) cfg80211 software SME connection state machine data * @connect_keys: (private) keys to set after connection is established * @conn_bss_type: connecting/connected BSS type @@ -7001,7 +7001,7 @@ struct wireless_dev { bool ps; int ps_timeout; - u32 ap_unexpected_nlportid; + u32 unexpected_nlportid; u32 owner_nlportid; bool nl_owner_dead; @@ -9572,9 +9572,10 @@ void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index, * @addr: the transmitter address * @gfp: context flags * - * This function is used in AP mode (only!) to inform userspace that - * a spurious class 3 frame was received, to be able to deauth the - * sender. + * This function is used in AP mode to inform userspace that a spurious + * class 3 frame was received, to be able to deauth the sender. + * It is also used in NAN_DATA mode to report frames from unknown peers + * (A2 not assigned to any active NDP), per Wi-Fi Aware (TM) 4.0 specification 6.2.5. * Return: %true if the frame was passed to userspace (or this failed * for a reason other than not having a subscription.) */ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e7f31a34eee4..cf6f1f6b9e36 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -906,8 +906,9 @@ * @NL80211_CMD_UNEXPECTED_FRAME: Used by an application controlling an AP * (or GO) interface (i.e. hostapd) to ask for unexpected frames to * implement sending deauth to stations that send unexpected class 3 - * frames. Also used as the event sent by the kernel when such a frame - * is received. + * frames. For NAN_DATA interfaces, this is used to report frames from + * unknown peers (A2 not assigned to any active NDP). + * Also used as the event sent by the kernel when such a frame is received. * For the event, the %NL80211_ATTR_MAC attribute carries the TA and * other attributes like the interface index are present. * If used as the command it must have an interface index and you can diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 5cd86253a62e..e817ee297df0 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -782,8 +782,8 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) rdev_crit_proto_stop(rdev, wdev); } - if (nlportid == wdev->ap_unexpected_nlportid) - wdev->ap_unexpected_nlportid = 0; + if (nlportid == wdev->unexpected_nlportid) + wdev->unexpected_nlportid = 0; } void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 8f93e3548d2a..7f47feaf4422 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -15777,13 +15777,14 @@ static int nl80211_register_unexpected_frame(struct sk_buff *skb, struct wireless_dev *wdev = dev->ieee80211_ptr; if (wdev->iftype != NL80211_IFTYPE_AP && - wdev->iftype != NL80211_IFTYPE_P2P_GO) + wdev->iftype != NL80211_IFTYPE_P2P_GO && + wdev->iftype != NL80211_IFTYPE_NAN_DATA) return -EINVAL; - if (wdev->ap_unexpected_nlportid) + if (wdev->unexpected_nlportid) return -EBUSY; - wdev->ap_unexpected_nlportid = info->snd_portid; + wdev->unexpected_nlportid = info->snd_portid; return 0; } @@ -21281,7 +21282,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; - u32 nlportid = READ_ONCE(wdev->ap_unexpected_nlportid); + u32 nlportid = READ_ONCE(wdev->unexpected_nlportid); if (!nlportid) return false; @@ -21321,7 +21322,8 @@ bool cfg80211_rx_spurious_frame(struct net_device *dev, const u8 *addr, trace_cfg80211_rx_spurious_frame(dev, addr, link_id); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && - wdev->iftype != NL80211_IFTYPE_P2P_GO)) { + wdev->iftype != NL80211_IFTYPE_P2P_GO && + wdev->iftype != NL80211_IFTYPE_NAN_DATA)) { trace_cfg80211_return_bool(false); return false; } From 44ea50a5bf304d3d6b55e4a2f946ce3c45a4e648 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Wed, 18 Mar 2026 14:39:23 +0200 Subject: [PATCH 0936/1561] wifi: nl80211: add NL80211_CMD_NAN_ULW_UPDATE notification Add a new notification command that allows drivers to notify user space when the device's ULW (Unaligned Schedule) blob has been updated. This enables user space to attach the updated ULW blob to frames sent to NAN peers. Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260219114327.32b715af4ebb.Ibdb6e33941afd94abf77245245f87e4338d729d3@changeid Link: https://patch.msgid.link/20260318123926.206536-10-miriam.rachel.korenblit@intel.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 14 ++++++++++++ include/uapi/linux/nl80211.h | 5 +++++ net/wireless/nl80211.c | 43 ++++++++++++++++++++++++++++++++++++ net/wireless/trace.h | 21 ++++++++++++++++++ 4 files changed, 83 insertions(+) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0d19f34ea7ac..ee173f69c417 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -10574,6 +10574,20 @@ void cfg80211_nan_cluster_joined(struct wireless_dev *wdev, const u8 *cluster_id, bool new_cluster, gfp_t gfp); +/** + * cfg80211_nan_ulw_update - Notify user space about ULW update + * @wdev: Pointer to the wireless device structure + * @ulw: Pointer to the ULW blob data + * @ulw_len: Length of the ULW blob in bytes + * @gfp: Memory allocation flags + * + * This function is used by drivers to notify user space when the device's + * ULW (Unaligned Schedule) blob has been updated. User space can use this + * blob to attach to frames sent to peers. + */ +void cfg80211_nan_ulw_update(struct wireless_dev *wdev, + const u8 *ulw, size_t ulw_len, gfp_t gfp); + #ifdef CONFIG_CFG80211_DEBUGFS /** * wiphy_locked_debugfs_read - do a locked read in debugfs diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index cf6f1f6b9e36..947ec7079484 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1402,6 +1402,10 @@ * completely replace the previous one. * The peer schedule is automatically removed when the NMI station is * removed. + * @NL80211_CMD_NAN_ULW_UPDATE: Notification from the driver to user space + * with the updated ULW blob of the device. User space can use this blob + * to attach to frames sent to peers. This notification contains + * %NL80211_ATTR_NAN_ULW with the ULW blob. * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1673,6 +1677,7 @@ enum nl80211_commands { NL80211_CMD_NAN_SET_PEER_SCHED, + NL80211_CMD_NAN_ULW_UPDATE, /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7f47feaf4422..b5185655e687 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -22893,6 +22893,49 @@ void cfg80211_nan_cluster_joined(struct wireless_dev *wdev, } EXPORT_SYMBOL(cfg80211_nan_cluster_joined); +void cfg80211_nan_ulw_update(struct wireless_dev *wdev, + const u8 *ulw, size_t ulw_len, gfp_t gfp) +{ + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct sk_buff *msg; + void *hdr; + + trace_cfg80211_nan_ulw_update(wiphy, wdev, ulw, ulw_len); + + if (!wdev->owner_nlportid) + return; + + /* 32 for the wiphy idx, 64 for the wdev id, 100 for padding */ + msg = nlmsg_new(nla_total_size(sizeof(u32)) + + nla_total_size(ulw_len) + + nla_total_size(sizeof(u64)) + 100, + gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NAN_ULW_UPDATE); + if (!hdr) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD) || + (ulw && ulw_len && + nla_put(msg, NL80211_ATTR_NAN_ULW, ulw_len, ulw))) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_unicast(wiphy_net(wiphy), msg, wdev->owner_nlportid); + + return; + + nla_put_failure: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_nan_ulw_update); + /* initialisation/exit functions */ int __init nl80211_init(void) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index df639d97cc0c..061bb84f1a48 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -4342,6 +4342,27 @@ TRACE_EVENT(cfg80211_nan_sched_update_done, TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT " success=%d", WIPHY_PR_ARG, WDEV_PR_ARG, __entry->success) ); + +TRACE_EVENT(cfg80211_nan_ulw_update, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + const u8 *ulw, size_t ulw_len), + TP_ARGS(wiphy, wdev, ulw, ulw_len), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __dynamic_array(u8, ulw, ulw_len) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + if (ulw && ulw_len) + memcpy(__get_dynamic_array(ulw), ulw, ulw_len); + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT " ulw: %s", + WIPHY_PR_ARG, WDEV_PR_ARG, + __print_array(__get_dynamic_array(ulw), + __get_dynamic_array_len(ulw), 1)) +); #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH From 154b0296c0ecd3edb05555f824b6061438de2cd4 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Wed, 18 Mar 2026 14:39:24 +0200 Subject: [PATCH 0937/1561] wifi: nl80211: Add a notification to notify NAN channel evacuation If all available channel resources are used for NAN channels, and one of them is shared with another interface, and that interface needs to move to a different channel (for example STA interface that needs to do a channel or a link switch), then the driver can evacuate one of the NAN channels (i.e. detach it from its channel resource and announce to the peers that this channel is ULWed). In that case, the driver needs to notify user space about the channel evacuation, so the user space can adjust the local schedule accordingly. Add a notification to let userspace know about it. Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260219114327.d5bebfd5ff73.Iaaf5ef17e1ab7a38c19d60558e68fcf517e2b400@changeid Link: https://patch.msgid.link/20260318123926.206536-11-miriam.rachel.korenblit@intel.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 19 ++++++++++++++ include/uapi/linux/nl80211.h | 27 +++++++++++++++----- net/wireless/nl80211.c | 48 ++++++++++++++++++++++++++++++++++++ net/wireless/trace.h | 18 ++++++++++++++ 4 files changed, 106 insertions(+), 6 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ee173f69c417..9d3639ff9c28 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -10588,6 +10588,25 @@ void cfg80211_nan_cluster_joined(struct wireless_dev *wdev, void cfg80211_nan_ulw_update(struct wireless_dev *wdev, const u8 *ulw, size_t ulw_len, gfp_t gfp); +/** + * cfg80211_nan_channel_evac - Notify user space about NAN channel evacuation + * @wdev: Pointer to the wireless device structure + * @chandef: Pointer to the channel definition of the NAN channel that was + * evacuated + * @gfp: Memory allocation flags + * + * This function is used by drivers to notify user space when a NAN + * channel has been evacuated (i.e. ULWed) due to channel resource conflicts + * with other interfaces. + * This can happen when another interface sharing the channel resource with NAN + * needs to move to a different channel (e.g. due to channel switch or link + * switch). User space may reconfigure the local schedule to exclude the + * evacuated channel. + */ +void cfg80211_nan_channel_evac(struct wireless_dev *wdev, + const struct cfg80211_chan_def *chandef, + gfp_t gfp); + #ifdef CONFIG_CFG80211_DEBUGFS /** * wiphy_locked_debugfs_read - do a locked read in debugfs diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 947ec7079484..3d55bf4be36f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1406,6 +1406,15 @@ * with the updated ULW blob of the device. User space can use this blob * to attach to frames sent to peers. This notification contains * %NL80211_ATTR_NAN_ULW with the ULW blob. + * @NL80211_CMD_NAN_CHANNEL_EVAC: Notification to indicate that a NAN + * channel has been evacuated due to resource conflicts with other + * interfaces. This can happen when another interface sharing the channel + * resource with NAN needs to move to a different channel (e.g., channel + * switch or link switch on a BSS interface). + * The notification contains %NL80211_ATTR_NAN_CHANNEL attribute + * identifying the evacuated channel. + * User space may reconfigure the local schedule in response to this + * notification. * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1678,6 +1687,9 @@ enum nl80211_commands { NL80211_CMD_NAN_SET_PEER_SCHED, NL80211_CMD_NAN_ULW_UPDATE, + + NL80211_CMD_NAN_CHANNEL_EVAC, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -3040,20 +3052,23 @@ enum nl80211_commands { * Currently only supported in mac80211 drivers. * @NL80211_ATTR_NAN_CHANNEL: This is a nested attribute. There can be multiple * attributes of this type, each one represents a channel definition and - * consists of top-level attributes like %NL80211_ATTR_WIPHY_FREQ. Must - * contain %NL80211_ATTR_NAN_CHANNEL_ENTRY and - * %NL80211_ATTR_NAN_RX_NSS. - * This attribute is used with %NL80211_CMD_NAN_SET_LOCAL_SCHED to specify + * consists of top-level attributes like %NL80211_ATTR_WIPHY_FREQ. + * When used with %NL80211_CMD_NAN_SET_LOCAL_SCHED, it specifies * the channel definitions on which the radio needs to operate during * specific time slots. All of the channel definitions should be mutually - * incompatible. - * This is also used with %NL80211_CMD_NAN_SET_PEER_SCHED to configure the + * incompatible. With this command, %NL80211_ATTR_NAN_CHANNEL_ENTRY and + * %NL80211_ATTR_NAN_RX_NSS are mandatory. + * When used with %NL80211_CMD_NAN_SET_PEER_SCHED, it configures the * peer NAN channels. In that case, the channel definitions can be * compatible to each other, or even identical just with different RX NSS. + * With this command, %NL80211_ATTR_NAN_CHANNEL_ENTRY and + * %NL80211_ATTR_NAN_RX_NSS are mandatory. * The number of channels should fit the current configuration of channels * and the possible interface combinations. * If an existing NAN channel is changed but the chandef isn't, the * channel entry must also remain unchanged. + * When used with %NL80211_CMD_NAN_CHANNEL_EVAC, this identifies the + * channels that were evacuated. * @NL80211_ATTR_NAN_CHANNEL_ENTRY: a byte array of 6 bytes. contains the * Channel Entry as defined in Wi-Fi Aware (TM) 4.0 specification Table * 100 (Channel Entry format for the NAN Availability attribute). diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b5185655e687..d65ebba0970b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -22936,6 +22936,54 @@ void cfg80211_nan_ulw_update(struct wireless_dev *wdev, } EXPORT_SYMBOL(cfg80211_nan_ulw_update); +void cfg80211_nan_channel_evac(struct wireless_dev *wdev, + const struct cfg80211_chan_def *chandef, + gfp_t gfp) +{ + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct sk_buff *msg; + struct nlattr *chan_attr; + void *hdr; + + trace_cfg80211_nan_channel_evac(wiphy, wdev, chandef); + + if (!wdev->owner_nlportid) + return; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NAN_CHANNEL_EVAC); + if (!hdr) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD)) + goto nla_put_failure; + + chan_attr = nla_nest_start(msg, NL80211_ATTR_NAN_CHANNEL); + if (!chan_attr) + goto nla_put_failure; + + if (nl80211_send_chandef(msg, chandef)) + goto nla_put_failure; + + nla_nest_end(msg, chan_attr); + + genlmsg_end(msg, hdr); + + genlmsg_unicast(wiphy_net(wiphy), msg, wdev->owner_nlportid); + + return; + + nla_put_failure: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_nan_channel_evac); + /* initialisation/exit functions */ int __init nl80211_init(void) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 061bb84f1a48..eb5bedf9c92a 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -4363,6 +4363,24 @@ TRACE_EVENT(cfg80211_nan_ulw_update, __print_array(__get_dynamic_array(ulw), __get_dynamic_array_len(ulw), 1)) ); + +TRACE_EVENT(cfg80211_nan_channel_evac, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + const struct cfg80211_chan_def *chandef), + TP_ARGS(wiphy, wdev, chandef), + TP_STRUCT__entry( + WDEV_ENTRY + WIPHY_ENTRY + CHAN_DEF_ENTRY + ), + TP_fast_assign( + WDEV_ASSIGN; + WIPHY_ASSIGN; + CHAN_DEF_ASSIGN(chandef); + ), + TP_printk(WDEV_PR_FMT ", " WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, + WDEV_PR_ARG, WIPHY_PR_ARG, CHAN_DEF_PR_ARG) +); #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH From 15d6dacdc97dce5ca8a0baf40f5fe8f3dcfef516 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Wed, 18 Mar 2026 14:39:25 +0200 Subject: [PATCH 0938/1561] wifi: ieee80211: Add some missing NAN definitions Add some missing NAN Device capabilities definitions. Signed-off-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260318143604.5f6b36d2b208.I7ef571682d5add96eabfcf87f81285893021e851@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211-nan.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/ieee80211-nan.h b/include/linux/ieee80211-nan.h index d07959bf8a90..ebf28ea651f9 100644 --- a/include/linux/ieee80211-nan.h +++ b/include/linux/ieee80211-nan.h @@ -9,7 +9,7 @@ * Copyright (c) 2006, Michael Wu * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH * Copyright (c) 2016 - 2017 Intel Deutschland GmbH - * Copyright (c) 2018 - 2025 Intel Corporation + * Copyright (c) 2018 - 2026 Intel Corporation */ #ifndef LINUX_IEEE80211_NAN_H @@ -23,6 +23,11 @@ #define NAN_OP_MODE_160MHZ 0x04 #define NAN_OP_MODE_PNDL_SUPPRTED 0x08 +#define NAN_DEV_CAPA_NUM_TX_ANT_POS 0 +#define NAN_DEV_CAPA_NUM_TX_ANT_MASK 0x0f +#define NAN_DEV_CAPA_NUM_RX_ANT_POS 4 +#define NAN_DEV_CAPA_NUM_RX_ANT_MASK 0xf0 + /* NAN Device capabilities, as defined in Wi-Fi Aware (TM) specification * Table 79 */ From e465ce0a8801e37d3092b2b364be59cd7f9ad49a Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Wed, 18 Mar 2026 14:39:26 +0200 Subject: [PATCH 0939/1561] wifi: cfg80211: allow protected action frame TX for NAN Allow transmitting protected dual of public action frames on NAN device and NAN data interfaces, since NAN action frames may be protected and can be sent on both. Signed-off-by: Avraham Stern Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260318143604.73801a92180c.I16000c3e1e2bbc320457db1ac728d789bb2f36c6@changeid Signed-off-by: Johannes Berg --- net/wireless/mlme.c | 9 +++++++-- net/wireless/nl80211.c | 2 ++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index e817ee297df0..bd72317c4964 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -4,7 +4,7 @@ * * Copyright (c) 2009, Jouni Malinen * Copyright (c) 2015 Intel Deutschland GmbH - * Copyright (C) 2019-2020, 2022-2025 Intel Corporation + * Copyright (C) 2019-2020, 2022-2026 Intel Corporation */ #include @@ -933,12 +933,17 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, * cfg80211 doesn't track the stations */ break; + case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: + if (mgmt->u.action.category != + WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION) + err = -EOPNOTSUPP; + break; case NL80211_IFTYPE_P2P_DEVICE: /* * fall through, P2P device only supports * public action frames */ - case NL80211_IFTYPE_NAN: default: err = -EOPNOTSUPP; break; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d65ebba0970b..f334cdef8958 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -14207,6 +14207,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_P2P_DEVICE: break; case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: if (!wiphy_ext_feature_isset(wdev->wiphy, NL80211_EXT_FEATURE_SECURE_NAN) && !(wdev->wiphy->nan_capa.flags & @@ -14270,6 +14271,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_P2P_GO: break; case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: if (!wiphy_ext_feature_isset(wdev->wiphy, NL80211_EXT_FEATURE_SECURE_NAN) && !(wdev->wiphy->nan_capa.flags & From 7dd6f81f4ef801b57f6ce7b0eee32aef5c488538 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Wed, 25 Mar 2026 21:57:39 +0200 Subject: [PATCH 0940/1561] wifi: mac80211: ignore reserved bits in reconfiguration status The Link ID Info field in the Reconfiguration Status Duple subfield of the Reconfiguration Response frame only uses the lower four bits for the link ID. The upper bits are reserved and should therefore be ignored. Signed-off-by: Benjamin Berg Reviewed-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260325215404.ab5ccf4bc62e.I9aef8f4fb6f1b06671bb6cf0e2bd4ec6e4c8bda4@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 7 +++++++ net/mac80211/mlme.c | 14 ++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 52db36120314..b5d649db123f 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1194,6 +1194,13 @@ struct ieee80211_mgmt { #define IEEE80211_MIN_ACTION_SIZE(type) offsetofend(struct ieee80211_mgmt, u.action.type) +/* Link Reconfiguration Status Duple field */ +struct ieee80211_ml_reconf_status { + u8 info; + __le16 status; +} __packed; + +#define IEEE80211_ML_RECONF_LINK_ID_MASK 0xf /* Management MIC information element (IEEE 802.11w) for CMAC */ struct ieee80211_mmie { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 173a60360a45..7fc5616cb244 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -10459,8 +10459,8 @@ void ieee80211_process_ml_reconf_resp(struct ieee80211_sub_if_data *sdata, pos = mgmt->u.action.ml_reconf_resp.variable; len -= offsetofend(typeof(*mgmt), u.action.ml_reconf_resp); - /* each status duple is 3 octets */ - if (len < mgmt->u.action.ml_reconf_resp.count * 3) { + if (len < mgmt->u.action.ml_reconf_resp.count * + sizeof(struct ieee80211_ml_reconf_status)) { sdata_info(sdata, "mlo: reconf: unexpected len=%zu, count=%u\n", len, mgmt->u.action.ml_reconf_resp.count); @@ -10469,9 +10469,11 @@ void ieee80211_process_ml_reconf_resp(struct ieee80211_sub_if_data *sdata, link_mask = sta_changed_links; for (i = 0; i < mgmt->u.action.ml_reconf_resp.count; i++) { - u16 status = get_unaligned_le16(pos + 1); + struct ieee80211_ml_reconf_status *reconf_status = (void *)pos; + u16 status = le16_to_cpu(reconf_status->status); - link_id = *pos; + link_id = u8_get_bits(reconf_status->info, + IEEE80211_ML_RECONF_LINK_ID_MASK); if (!(link_mask & BIT(link_id))) { sdata_info(sdata, @@ -10506,8 +10508,8 @@ void ieee80211_process_ml_reconf_resp(struct ieee80211_sub_if_data *sdata, sdata->u.mgd.reconf.added_links &= ~BIT(link_id); } - pos += 3; - len -= 3; + pos += sizeof(*reconf_status); + len -= sizeof(*reconf_status); } if (link_mask) { From 8f303194b241c2795bb9b79ee80bc93e7876879c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 21 Mar 2026 15:52:08 -0700 Subject: [PATCH 0941/1561] octeontx2-pf: macsec: Use AES library instead of ecb(aes) skcipher cn10k_ecb_aes_encrypt() just encrypts a single block with AES. That is much more easily and efficiently done with the AES library than crypto_skcipher. Use the AES library instead. Signed-off-by: Eric Biggers Reviewed-by: Subbaraya Sundeep Link: https://patch.msgid.link/20260321225208.64508-1-ebiggers@kernel.org Signed-off-by: Paolo Abeni --- .../net/ethernet/marvell/octeontx2/Kconfig | 1 + .../marvell/octeontx2/nic/cn10k_macsec.c | 53 +++++-------------- 2 files changed, 13 insertions(+), 41 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/Kconfig b/drivers/net/ethernet/marvell/octeontx2/Kconfig index 35c4f5f64f58..47e549c581f0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/Kconfig +++ b/drivers/net/ethernet/marvell/octeontx2/Kconfig @@ -33,6 +33,7 @@ config OCTEONTX2_PF select OCTEONTX2_MBOX select NET_DEVLINK select PAGE_POOL + select CRYPTO_LIB_AES if MACSEC depends on (64BIT && COMPILE_TEST) || ARM64 select DIMLIB depends on PCI diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c index 4649996dc7da..2cc1bdfd9b2e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c @@ -4,7 +4,7 @@ * Copyright (C) 2022 Marvell. */ -#include +#include #include #include #include "otx2_common.h" @@ -46,51 +46,22 @@ #define CN10K_MAX_HASH_LEN 16 #define CN10K_MAX_SAK_LEN 32 -static int cn10k_ecb_aes_encrypt(struct otx2_nic *pfvf, u8 *sak, - u16 sak_len, u8 *hash) +static int cn10k_ecb_aes_encrypt(struct otx2_nic *pfvf, const u8 *sak, + u16 sak_len, u8 hash[CN10K_MAX_HASH_LEN]) { - u8 data[CN10K_MAX_HASH_LEN] = { 0 }; - struct skcipher_request *req = NULL; - struct scatterlist sg_src, sg_dst; - struct crypto_skcipher *tfm; - DECLARE_CRYPTO_WAIT(wait); - int err; + static const u8 zeroes[CN10K_MAX_HASH_LEN]; + struct aes_enckey aes; - tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0); - if (IS_ERR(tfm)) { - dev_err(pfvf->dev, "failed to allocate transform for ecb-aes\n"); - return PTR_ERR(tfm); + if (aes_prepareenckey(&aes, sak, sak_len) != 0) { + dev_err(pfvf->dev, "invalid AES key length: %d\n", sak_len); + return -EINVAL; } - req = skcipher_request_alloc(tfm, GFP_KERNEL); - if (!req) { - dev_err(pfvf->dev, "failed to allocate request for skcipher\n"); - err = -ENOMEM; - goto free_tfm; - } + static_assert(CN10K_MAX_HASH_LEN == AES_BLOCK_SIZE); + aes_encrypt(&aes, hash, zeroes); - err = crypto_skcipher_setkey(tfm, sak, sak_len); - if (err) { - dev_err(pfvf->dev, "failed to set key for skcipher\n"); - goto free_req; - } - - /* build sg list */ - sg_init_one(&sg_src, data, CN10K_MAX_HASH_LEN); - sg_init_one(&sg_dst, hash, CN10K_MAX_HASH_LEN); - - skcipher_request_set_callback(req, 0, crypto_req_done, &wait); - skcipher_request_set_crypt(req, &sg_src, &sg_dst, - CN10K_MAX_HASH_LEN, NULL); - - err = crypto_skcipher_encrypt(req); - err = crypto_wait_req(err, &wait); - -free_req: - skcipher_request_free(req); -free_tfm: - crypto_free_skcipher(tfm); - return err; + memzero_explicit(&aes, sizeof(aes)); + return 0; } static struct cn10k_mcs_txsc *cn10k_mcs_get_txsc(struct cn10k_mcs_cfg *cfg, From fbf3fecf6ae1c9f429c2450f5a10d5640d7c5f9c Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Mon, 23 Mar 2026 13:50:37 +0200 Subject: [PATCH 0942/1561] net: dpaa2-mac: extend APIs related to statistics Extend the dpmac_counter_id enum with the newly added counters which can be interrogated through the MC firmware. Also add the dpmac_get_statistics() API which can be used to retrieve multiple MAC counters through a single firmware command. Signed-off-by: Ioana Ciornei Link: https://patch.msgid.link/20260323115039.3932600-2-ioana.ciornei@nxp.com Signed-off-by: Paolo Abeni --- .../net/ethernet/freescale/dpaa2/dpmac-cmd.h | 11 ++- drivers/net/ethernet/freescale/dpaa2/dpmac.c | 31 +++++- drivers/net/ethernet/freescale/dpaa2/dpmac.h | 94 ++++++++++++++++++- 3 files changed, 132 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h index e9ac2ecef3be..a864a99a0f75 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ /* Copyright 2013-2016 Freescale Semiconductor Inc. - * Copyright 2019 NXP + * Copyright 2019, 2024-2026 NXP */ #ifndef _FSL_DPMAC_CMD_H #define _FSL_DPMAC_CMD_H @@ -28,6 +28,8 @@ #define DPMAC_CMDID_SET_PROTOCOL DPMAC_CMD(0x0c7) +#define DPMAC_CMDID_GET_STATISTICS DPMAC_CMD(0x0c8) + /* Macros for accessing command fields smaller than 1byte */ #define DPMAC_MASK(field) \ GENMASK(DPMAC_##field##_SHIFT + DPMAC_##field##_SIZE - 1, \ @@ -82,4 +84,11 @@ struct dpmac_rsp_get_api_version { struct dpmac_cmd_set_protocol { u8 eth_if; }; + +struct dpmac_cmd_get_statistics { + __le64 iova_cnt; + __le64 iova_values; + __le32 num_cnt; +}; + #endif /* _FSL_DPMAC_CMD_H */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpmac.c b/drivers/net/ethernet/freescale/dpaa2/dpmac.c index f440a4c3b70c..efb9864d051f 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpmac.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpmac.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) /* Copyright 2013-2016 Freescale Semiconductor Inc. - * Copyright 2019 NXP + * Copyright 2019, 2024-2026 NXP */ #include #include "dpmac.h" @@ -235,3 +235,32 @@ int dpmac_set_protocol(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, return mc_send_command(mc_io, &cmd); } + +/** + * dpmac_get_statistics() - Get MAC statistics + * @mc_io: Pointer to opaque I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPMAC object + * @iova_cnt: IOVA containing the requested MAC counters formatted as an + * array of __le32 representing the dpmac_counter_id. + * @iova_values: IOVA containing the values for all the requested counters + * formatted as an array of __le64. + * @num_cnt: Number of counters requested + * + * Return: '0' on Success; Error code otherwise. + */ +int dpmac_get_statistics(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u64 iova_cnt, u64 iova_values, u32 num_cnt) +{ + struct dpmac_cmd_get_statistics *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + cmd.header = mc_encode_cmd_header(DPMAC_CMDID_GET_STATISTICS, + cmd_flags, token); + cmd_params = (struct dpmac_cmd_get_statistics *)cmd.params; + cmd_params->iova_cnt = cpu_to_le64(iova_cnt); + cmd_params->iova_values = cpu_to_le64(iova_values); + cmd_params->num_cnt = cpu_to_le32(num_cnt); + + return mc_send_command(mc_io, &cmd); +} diff --git a/drivers/net/ethernet/freescale/dpaa2/dpmac.h b/drivers/net/ethernet/freescale/dpaa2/dpmac.h index 17488819ef68..b5276168b869 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpmac.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpmac.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ /* Copyright 2013-2016 Freescale Semiconductor Inc. - * Copyright 2019 NXP + * Copyright 2019, 2024-2026 NXP */ #ifndef __FSL_DPMAC_H #define __FSL_DPMAC_H @@ -170,6 +170,58 @@ int dpmac_set_link_state(struct fsl_mc_io *mc_io, * pause frames. * @DPMAC_CNT_EGR_GOOD_FRAME: counts frames transmitted without error, including * pause frames. + * @DPMAC_CNT_EGR_FRAME_64: counts transmitted 64-bytes frames, good or bad. + * @DPMAC_CNT_EGR_FRAME_127: counts transmitted 65 to 127-bytes frames, good or + * bad. + * @DPMAC_CNT_EGR_FRAME_255: counts transmitted 128 to 255-bytes frames, good + * or bad. + * @DPMAC_CNT_EGR_FRAME_511: counts transmitted 256 to 511-bytes frames, good + * or bad. + * @DPMAC_CNT_EGR_FRAME_1023: counts transmitted 512 to 1023-bytes frames, good + * or bad. + * @DPMAC_CNT_EGR_FRAME_1518: counts transmitted 1024 to 1518-bytes frames, + * good or bad. + * @DPMAC_CNT_EGR_FRAME_1519_MAX: counts transmitted 1519-bytes frames and + * larger (up to max frame length specified), + * good or bad. + * @DPMAC_CNT_ING_ALL_BYTE: counts bytes received in both good and bad packets + * @DPMAC_CNT_ING_FCS_ERR: counts frames received with a CRC-32 error but the + * frame is otherwise of correct length + * @DPMAC_CNT_ING_VLAN_FRAME: counts the received VLAN tagged frames which are + * valid. + * @DPMAC_CNT_ING_UNDERSIZED: counts received frames which were less than 64 + * bytes long and with a good CRC. + * @DPMAC_CNT_ING_CONTROL_FRAME: counts received control frames (type 0x8808) + * but not pause frames. + * @DPMAC_CNT_ING_FRAME_DISCARD_NOT_TRUNC: counts the fully dropped frames (not + * truncated) due to internal errors of + * the MAC client. Occurs when a + * receive FIFO overflows. + * @DPMAC_CNT_EGR_ALL_BYTE: counts transmitted bytes in both good and bad + * packets. + * @DPMAC_CNT_EGR_FCS_ERR: counts trasmitted frames with a CRC-32 error except + * for underflows. + * @DPMAC_CNT_EGR_VLAN_FRAME: counts the transmitted VLAN tagged frames which + * are valid. + * @DPMAC_CNT_EGR_ALL_FRAME: counts all trasmitted frames, good or bad. + * @DPMAC_CNT_EGR_CONTROL_FRAME: counts transmitted control frames (type + * 0x8808) but not pause frames. + * @DPMAC_CNT_ING_PFC0: number of PFC frames received for class 0. + * @DPMAC_CNT_ING_PFC1: number of PFC frames received for class 1. + * @DPMAC_CNT_ING_PFC2: number of PFC frames received for class 2. + * @DPMAC_CNT_ING_PFC3: number of PFC frames received for class 3. + * @DPMAC_CNT_ING_PFC4: number of PFC frames received for class 4. + * @DPMAC_CNT_ING_PFC5: number of PFC frames received for class 5. + * @DPMAC_CNT_ING_PFC6: number of PFC frames received for class 6. + * @DPMAC_CNT_ING_PFC7: number of PFC frames received for class 7. + * @DPMAC_CNT_EGR_PFC0: number of PFC frames transmitted for class 0. + * @DPMAC_CNT_EGR_PFC1: number of PFC frames transmitted for class 1. + * @DPMAC_CNT_EGR_PFC2: number of PFC frames transmitted for class 2. + * @DPMAC_CNT_EGR_PFC3: number of PFC frames transmitted for class 3. + * @DPMAC_CNT_EGR_PFC4: number of PFC frames transmitted for class 4. + * @DPMAC_CNT_EGR_PFC5: number of PFC frames transmitted for class 5. + * @DPMAC_CNT_EGR_PFC6: number of PFC frames transmitted for class 6. + * @DPMAC_CNT_EGR_PFC7: number of PFC frames transmitted for class 7. */ enum dpmac_counter_id { DPMAC_CNT_ING_FRAME_64, @@ -199,7 +251,41 @@ enum dpmac_counter_id { DPMAC_CNT_EGR_UCAST_FRAME, DPMAC_CNT_EGR_ERR_FRAME, DPMAC_CNT_ING_GOOD_FRAME, - DPMAC_CNT_EGR_GOOD_FRAME + DPMAC_CNT_EGR_GOOD_FRAME, + DPMAC_CNT_EGR_FRAME_64, + DPMAC_CNT_EGR_FRAME_127, + DPMAC_CNT_EGR_FRAME_255, + DPMAC_CNT_EGR_FRAME_511, + DPMAC_CNT_EGR_FRAME_1023, + DPMAC_CNT_EGR_FRAME_1518, + DPMAC_CNT_EGR_FRAME_1519_MAX, + DPMAC_CNT_ING_ALL_BYTE, + DPMAC_CNT_ING_FCS_ERR, + DPMAC_CNT_ING_VLAN_FRAME, + DPMAC_CNT_ING_UNDERSIZED, + DPMAC_CNT_ING_CONTROL_FRAME, + DPMAC_CNT_ING_FRAME_DISCARD_NOT_TRUNC, + DPMAC_CNT_EGR_ALL_BYTE, + DPMAC_CNT_EGR_FCS_ERR, + DPMAC_CNT_EGR_VLAN_FRAME, + DPMAC_CNT_EGR_ALL_FRAME, + DPMAC_CNT_EGR_CONTROL_FRAME, + DPMAC_CNT_ING_PFC0, + DPMAC_CNT_ING_PFC1, + DPMAC_CNT_ING_PFC2, + DPMAC_CNT_ING_PFC3, + DPMAC_CNT_ING_PFC4, + DPMAC_CNT_ING_PFC5, + DPMAC_CNT_ING_PFC6, + DPMAC_CNT_ING_PFC7, + DPMAC_CNT_EGR_PFC0, + DPMAC_CNT_EGR_PFC1, + DPMAC_CNT_EGR_PFC2, + DPMAC_CNT_EGR_PFC3, + DPMAC_CNT_EGR_PFC4, + DPMAC_CNT_EGR_PFC5, + DPMAC_CNT_EGR_PFC6, + DPMAC_CNT_EGR_PFC7, }; int dpmac_get_counter(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, @@ -210,4 +296,8 @@ int dpmac_get_api_version(struct fsl_mc_io *mc_io, u32 cmd_flags, int dpmac_set_protocol(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, enum dpmac_eth_if protocol); + +int dpmac_get_statistics(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u64 iova_cnt, u64 iova_values, u32 num_cnt); + #endif /* __FSL_DPMAC_H */ From d369154d9d7fa7530a2520b945905fbecd35b05b Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Mon, 23 Mar 2026 13:50:38 +0200 Subject: [PATCH 0943/1561] net: dpaa2-mac: retrieve MAC statistics in one firmware command The latest MC firmware version added a new command to retrieve all DPMAC counters in a single firmware call. Use this new command, when possible, in dpaa2-mac as well. In order to use the dpmac_get_statistics() API, two DMA memory areas are used: one to transmit what counters the driver is requesting and one to receive the values of those counters. These memory areas are allocated and DMA mapped at probe time so that we don't waste time at runtime. And since we are planning to add rmon, eth-ctrl and other standard statistics using the same infrastructure, make the setup and cleanup processes as generic as possibile through the dpaa2_mac_setup_stats() and dpaa2_mac_clear_stats() functions. Signed-off-by: Ioana Ciornei Link: https://patch.msgid.link/20260323115039.3932600-3-ioana.ciornei@nxp.com Signed-off-by: Paolo Abeni --- .../net/ethernet/freescale/dpaa2/dpaa2-mac.c | 201 ++++++++++++++---- .../net/ethernet/freescale/dpaa2/dpaa2-mac.h | 10 +- 2 files changed, 171 insertions(+), 40 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c index 422ce13a7c94..5a7fda6a2012 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) -/* Copyright 2019 NXP */ +/* Copyright 2019, 2024-2026 NXP */ #include #include @@ -15,7 +15,118 @@ #define DPMAC_PROTOCOL_CHANGE_VER_MAJOR 4 #define DPMAC_PROTOCOL_CHANGE_VER_MINOR 8 +#define DPMAC_STATS_BUNDLE_VER_MAJOR 4 +#define DPMAC_STATS_BUNDLE_VER_MINOR 10 + #define DPAA2_MAC_FEATURE_PROTOCOL_CHANGE BIT(0) +#define DPAA2_MAC_FEATURE_STATS_BUNDLE BIT(1) + +struct dpmac_counter { + enum dpmac_counter_id id; + const char *name; +}; + +#define DPMAC_UNSTRUCTURED_COUNTER(counter_id, counter_name) \ + { \ + .id = counter_id, \ + .name = counter_name, \ + } + +static const struct dpmac_counter dpaa2_mac_ethtool_stats[] = { + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_ING_ALL_FRAME, "[mac] rx all frames"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_ING_GOOD_FRAME, "[mac] rx frames ok"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_ING_ERR_FRAME, "[mac] rx frame errors"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_ING_FRAME_DISCARD, "[mac] rx frame discards"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_ING_UCAST_FRAME, "[mac] rx u-cast"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_ING_BCAST_FRAME, "[mac] rx b-cast"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_ING_MCAST_FRAME, "[mac] rx m-cast"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_ING_FRAME_64, "[mac] rx 64 bytes"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_ING_FRAME_127, "[mac] rx 65-127 bytes"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_ING_FRAME_255, "[mac] rx 128-255 bytes"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_ING_FRAME_511, "[mac] rx 256-511 bytes"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_ING_FRAME_1023, "[mac] rx 512-1023 bytes"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_ING_FRAME_1518, "[mac] rx 1024-1518 bytes"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_ING_FRAME_1519_MAX, "[mac] rx 1519-max bytes"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_ING_FRAG, "[mac] rx frags"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_ING_JABBER, "[mac] rx jabber"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_ING_ALIGN_ERR, "[mac] rx align errors"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_ING_OVERSIZED, "[mac] rx oversized"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_ING_VALID_PAUSE_FRAME, "[mac] rx pause"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_ING_BYTE, "[mac] rx bytes"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_EGR_GOOD_FRAME, "[mac] tx frames ok"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_EGR_UCAST_FRAME, "[mac] tx u-cast"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_EGR_MCAST_FRAME, "[mac] tx m-cast"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_EGR_BCAST_FRAME, "[mac] tx b-cast"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_EGR_ERR_FRAME, "[mac] tx frame errors"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_EGR_UNDERSIZED, "[mac] tx undersized"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_EGR_VALID_PAUSE_FRAME, "[mac] tx b-pause"), + DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_EGR_BYTE, "[mac] tx bytes"), +}; + +#define DPAA2_MAC_NUM_ETHTOOL_STATS ARRAY_SIZE(dpaa2_mac_ethtool_stats) + +static void dpaa2_mac_setup_stats(struct dpaa2_mac *mac, + struct dpaa2_mac_stats *stats, + size_t num_stats, + const struct dpmac_counter *counters) +{ + struct device *dev = mac->net_dev->dev.parent; + size_t size_idx, size_values; + __le32 *cnt_idx; + + size_idx = num_stats * sizeof(u32); + stats->idx_dma_mem = dma_alloc_noncoherent(dev, size_idx, + &stats->idx_iova, + DMA_TO_DEVICE, + GFP_KERNEL); + if (!stats->idx_dma_mem) + goto out; + + size_values = num_stats * sizeof(u64); + stats->values_dma_mem = dma_alloc_noncoherent(dev, size_values, + &stats->values_iova, + DMA_FROM_DEVICE, + GFP_KERNEL); + if (!stats->values_dma_mem) + goto err_alloc_values; + + cnt_idx = stats->idx_dma_mem; + for (size_t i = 0; i < num_stats; i++) + *cnt_idx++ = cpu_to_le32((u32)(counters[i].id)); + + dma_sync_single_for_device(dev, stats->idx_iova, size_idx, + DMA_TO_DEVICE); + + return; + +err_alloc_values: + dma_free_noncoherent(dev, num_stats * sizeof(u32), stats->idx_dma_mem, + stats->idx_iova, DMA_TO_DEVICE); +out: + stats->idx_dma_mem = NULL; + stats->values_dma_mem = NULL; +} + +static void dpaa2_mac_clear_stats(struct dpaa2_mac *mac, + struct dpaa2_mac_stats *stats, + size_t num_stats) +{ + struct device *dev = mac->net_dev->dev.parent; + + if (stats->idx_dma_mem) { + dma_free_noncoherent(dev, num_stats * sizeof(u32), + stats->idx_dma_mem, + stats->idx_iova, DMA_TO_DEVICE); + stats->idx_dma_mem = NULL; + } + + if (stats->values_dma_mem) { + dma_free_noncoherent(dev, num_stats * sizeof(u64), + stats->values_dma_mem, + stats->values_iova, DMA_FROM_DEVICE); + stats->values_dma_mem = NULL; + } +} static int dpaa2_mac_cmp_ver(struct dpaa2_mac *mac, u16 ver_major, u16 ver_minor) @@ -32,6 +143,10 @@ static void dpaa2_mac_detect_features(struct dpaa2_mac *mac) if (dpaa2_mac_cmp_ver(mac, DPMAC_PROTOCOL_CHANGE_VER_MAJOR, DPMAC_PROTOCOL_CHANGE_VER_MINOR) >= 0) mac->features |= DPAA2_MAC_FEATURE_PROTOCOL_CHANGE; + + if (dpaa2_mac_cmp_ver(mac, DPMAC_STATS_BUNDLE_VER_MAJOR, + DPMAC_STATS_BUNDLE_VER_MINOR) >= 0) + mac->features |= DPAA2_MAC_FEATURE_STATS_BUNDLE; } static int phy_mode(enum dpmac_eth_if eth_if, phy_interface_t *if_mode) @@ -504,6 +619,11 @@ int dpaa2_mac_open(struct dpaa2_mac *mac) mac->fw_node = fw_node; net_dev->dev.of_node = to_of_node(mac->fw_node); + if (mac->features & DPAA2_MAC_FEATURE_STATS_BUNDLE) + dpaa2_mac_setup_stats(mac, &mac->ethtool_stats, + DPAA2_MAC_NUM_ETHTOOL_STATS, + dpaa2_mac_ethtool_stats); + return 0; err_close_dpmac: @@ -515,66 +635,69 @@ void dpaa2_mac_close(struct dpaa2_mac *mac) { struct fsl_mc_device *dpmac_dev = mac->mc_dev; + if (mac->features & DPAA2_MAC_FEATURE_STATS_BUNDLE) + dpaa2_mac_clear_stats(mac, &mac->ethtool_stats, + DPAA2_MAC_NUM_ETHTOOL_STATS); + dpmac_close(mac->mc_io, 0, dpmac_dev->mc_handle); if (mac->fw_node) fwnode_handle_put(mac->fw_node); } -static char dpaa2_mac_ethtool_stats[][ETH_GSTRING_LEN] = { - [DPMAC_CNT_ING_ALL_FRAME] = "[mac] rx all frames", - [DPMAC_CNT_ING_GOOD_FRAME] = "[mac] rx frames ok", - [DPMAC_CNT_ING_ERR_FRAME] = "[mac] rx frame errors", - [DPMAC_CNT_ING_FRAME_DISCARD] = "[mac] rx frame discards", - [DPMAC_CNT_ING_UCAST_FRAME] = "[mac] rx u-cast", - [DPMAC_CNT_ING_BCAST_FRAME] = "[mac] rx b-cast", - [DPMAC_CNT_ING_MCAST_FRAME] = "[mac] rx m-cast", - [DPMAC_CNT_ING_FRAME_64] = "[mac] rx 64 bytes", - [DPMAC_CNT_ING_FRAME_127] = "[mac] rx 65-127 bytes", - [DPMAC_CNT_ING_FRAME_255] = "[mac] rx 128-255 bytes", - [DPMAC_CNT_ING_FRAME_511] = "[mac] rx 256-511 bytes", - [DPMAC_CNT_ING_FRAME_1023] = "[mac] rx 512-1023 bytes", - [DPMAC_CNT_ING_FRAME_1518] = "[mac] rx 1024-1518 bytes", - [DPMAC_CNT_ING_FRAME_1519_MAX] = "[mac] rx 1519-max bytes", - [DPMAC_CNT_ING_FRAG] = "[mac] rx frags", - [DPMAC_CNT_ING_JABBER] = "[mac] rx jabber", - [DPMAC_CNT_ING_ALIGN_ERR] = "[mac] rx align errors", - [DPMAC_CNT_ING_OVERSIZED] = "[mac] rx oversized", - [DPMAC_CNT_ING_VALID_PAUSE_FRAME] = "[mac] rx pause", - [DPMAC_CNT_ING_BYTE] = "[mac] rx bytes", - [DPMAC_CNT_EGR_GOOD_FRAME] = "[mac] tx frames ok", - [DPMAC_CNT_EGR_UCAST_FRAME] = "[mac] tx u-cast", - [DPMAC_CNT_EGR_MCAST_FRAME] = "[mac] tx m-cast", - [DPMAC_CNT_EGR_BCAST_FRAME] = "[mac] tx b-cast", - [DPMAC_CNT_EGR_ERR_FRAME] = "[mac] tx frame errors", - [DPMAC_CNT_EGR_UNDERSIZED] = "[mac] tx undersized", - [DPMAC_CNT_EGR_VALID_PAUSE_FRAME] = "[mac] tx b-pause", - [DPMAC_CNT_EGR_BYTE] = "[mac] tx bytes", -}; - -#define DPAA2_MAC_NUM_STATS ARRAY_SIZE(dpaa2_mac_ethtool_stats) - int dpaa2_mac_get_sset_count(void) { - return DPAA2_MAC_NUM_STATS; + return DPAA2_MAC_NUM_ETHTOOL_STATS; } void dpaa2_mac_get_strings(u8 **data) { int i; - for (i = 0; i < DPAA2_MAC_NUM_STATS; i++) - ethtool_puts(data, dpaa2_mac_ethtool_stats[i]); + for (i = 0; i < DPAA2_MAC_NUM_ETHTOOL_STATS; i++) + ethtool_puts(data, dpaa2_mac_ethtool_stats[i].name); } void dpaa2_mac_get_ethtool_stats(struct dpaa2_mac *mac, u64 *data) { + size_t values_size = DPAA2_MAC_NUM_ETHTOOL_STATS * sizeof(u64); + struct device *dev = mac->net_dev->dev.parent; struct fsl_mc_device *dpmac_dev = mac->mc_dev; + __le64 *cnt_values; int i, err; u64 value; - for (i = 0; i < DPAA2_MAC_NUM_STATS; i++) { + if (!(mac->features & DPAA2_MAC_FEATURE_STATS_BUNDLE)) + goto fallback; + + if (!mac->ethtool_stats.idx_dma_mem || + !mac->ethtool_stats.values_dma_mem) + goto fallback; + + dma_sync_single_for_device(dev, mac->ethtool_stats.values_iova, + values_size, DMA_FROM_DEVICE); + + err = dpmac_get_statistics(mac->mc_io, 0, dpmac_dev->mc_handle, + mac->ethtool_stats.idx_iova, + mac->ethtool_stats.values_iova, + DPAA2_MAC_NUM_ETHTOOL_STATS); + if (err) + goto fallback; + + dma_sync_single_for_cpu(dev, mac->ethtool_stats.values_iova, + values_size, DMA_FROM_DEVICE); + + cnt_values = mac->ethtool_stats.values_dma_mem; + for (i = 0; i < DPAA2_MAC_NUM_ETHTOOL_STATS; i++) + *(data + i) = le64_to_cpu(*cnt_values++); + + return; + +fallback: + + /* Fallback and retrieve each counter one by one */ + for (i = 0; i < DPAA2_MAC_NUM_ETHTOOL_STATS; i++) { err = dpmac_get_counter(mac->mc_io, 0, dpmac_dev->mc_handle, - i, &value); + dpaa2_mac_ethtool_stats[i].id, &value); if (err) { netdev_err_once(mac->net_dev, "dpmac_get_counter error %d\n", err); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h index 53f8d106d11e..6f4981627961 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ -/* Copyright 2019 NXP */ +/* Copyright 2019, 2024-2026 NXP */ #ifndef DPAA2_MAC_H #define DPAA2_MAC_H @@ -11,6 +11,12 @@ #include "dpmac.h" #include "dpmac-cmd.h" +struct dpaa2_mac_stats { + __le32 *idx_dma_mem; + __le64 *values_dma_mem; + dma_addr_t idx_iova, values_iova; +}; + struct dpaa2_mac { struct fsl_mc_device *mc_dev; struct dpmac_link_state state; @@ -28,6 +34,8 @@ struct dpaa2_mac { struct fwnode_handle *fw_node; struct phy *serdes_phy; + + struct dpaa2_mac_stats ethtool_stats; }; static inline bool dpaa2_mac_is_type_phy(struct dpaa2_mac *mac) From dd1d4ccb860289ad3dee33178cc2705c5ed1143b Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Mon, 23 Mar 2026 13:50:39 +0200 Subject: [PATCH 0944/1561] net: dpaa2-mac: export standard statistics Take advantage of all the newly added MAC counters available through the MC API and export them through the standard statistics structures - rmon, eth-ctrl, eth-mac and pause. A new version based feature is added into dpaa2-mac - DPAA2_MAC_FEATURE_STANDARD_STATS - and based on the two memory zones needed for gathering the MAC counters are setup for each statistics group. The dpmac_counter structure is extended with a new field - size_t offset - which is being used to instruct the dpaa2_mac_transfer_stats() function where exactly to store a counter value inside the standard statistics structure. The newly added support is used both in the dpaa2-eth driver as well as the dpaa2-switch one. Signed-off-by: Ioana Ciornei Link: https://patch.msgid.link/20260323115039.3932600-4-ioana.ciornei@nxp.com Signed-off-by: Paolo Abeni --- .../ethernet/freescale/dpaa2/dpaa2-ethtool.c | 61 ++++- .../net/ethernet/freescale/dpaa2/dpaa2-mac.c | 214 ++++++++++++++++++ .../net/ethernet/freescale/dpaa2/dpaa2-mac.h | 17 ++ .../freescale/dpaa2/dpaa2-switch-ethtool.c | 48 +++- 4 files changed, 338 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c index baab4f1c908d..59f5c778df38 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) /* Copyright 2014-2016 Freescale Semiconductor Inc. - * Copyright 2016-2022 NXP + * Copyright 2016-2022, 2024-2026 NXP */ #include @@ -938,6 +938,61 @@ static void dpaa2_eth_get_channels(struct net_device *net_dev, channels->other_count; } +static void +dpaa2_eth_get_rmon_stats(struct net_device *net_dev, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges) +{ + struct dpaa2_eth_priv *priv = netdev_priv(net_dev); + + mutex_lock(&priv->mac_lock); + + if (dpaa2_eth_has_mac(priv)) + dpaa2_mac_get_rmon_stats(priv->mac, rmon_stats, ranges); + + mutex_unlock(&priv->mac_lock); +} + +static void dpaa2_eth_get_pause_stats(struct net_device *net_dev, + struct ethtool_pause_stats *pause_stats) +{ + struct dpaa2_eth_priv *priv = netdev_priv(net_dev); + + mutex_lock(&priv->mac_lock); + + if (dpaa2_eth_has_mac(priv)) + dpaa2_mac_get_pause_stats(priv->mac, pause_stats); + + mutex_unlock(&priv->mac_lock); +} + +static void dpaa2_eth_get_ctrl_stats(struct net_device *net_dev, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + struct dpaa2_eth_priv *priv = netdev_priv(net_dev); + + mutex_lock(&priv->mac_lock); + + if (dpaa2_eth_has_mac(priv)) + dpaa2_mac_get_ctrl_stats(priv->mac, ctrl_stats); + + mutex_unlock(&priv->mac_lock); +} + +static void +dpaa2_eth_get_eth_mac_stats(struct net_device *net_dev, + struct ethtool_eth_mac_stats *eth_mac_stats) +{ + struct dpaa2_eth_priv *priv = netdev_priv(net_dev); + + mutex_lock(&priv->mac_lock); + + if (dpaa2_eth_has_mac(priv)) + dpaa2_mac_get_eth_mac_stats(priv->mac, eth_mac_stats); + + mutex_unlock(&priv->mac_lock); +} + const struct ethtool_ops dpaa2_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX, @@ -962,4 +1017,8 @@ const struct ethtool_ops dpaa2_ethtool_ops = { .get_coalesce = dpaa2_eth_get_coalesce, .set_coalesce = dpaa2_eth_set_coalesce, .get_channels = dpaa2_eth_get_channels, + .get_rmon_stats = dpaa2_eth_get_rmon_stats, + .get_pause_stats = dpaa2_eth_get_pause_stats, + .get_eth_ctrl_stats = dpaa2_eth_get_ctrl_stats, + .get_eth_mac_stats = dpaa2_eth_get_eth_mac_stats, }; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c index 5a7fda6a2012..ad812ebf3139 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c @@ -18,20 +18,43 @@ #define DPMAC_STATS_BUNDLE_VER_MAJOR 4 #define DPMAC_STATS_BUNDLE_VER_MINOR 10 +#define DPMAC_STANDARD_STATS_VER_MAJOR 4 +#define DPMAC_STANDARD_STATS_VER_MINOR 11 + #define DPAA2_MAC_FEATURE_PROTOCOL_CHANGE BIT(0) #define DPAA2_MAC_FEATURE_STATS_BUNDLE BIT(1) +#define DPAA2_MAC_FEATURE_STANDARD_STATS BIT(2) struct dpmac_counter { enum dpmac_counter_id id; + size_t offset; const char *name; }; +#define DPMAC_COUNTER(counter_id, struct_name, struct_offset) \ + { \ + .id = counter_id, \ + .offset = offsetof(struct_name, struct_offset), \ + } + #define DPMAC_UNSTRUCTURED_COUNTER(counter_id, counter_name) \ { \ .id = counter_id, \ .name = counter_name, \ } +#define DPMAC_RMON_COUNTER(counter_id, struct_offset) \ + DPMAC_COUNTER(counter_id, struct ethtool_rmon_stats, struct_offset) + +#define DPMAC_PAUSE_COUNTER(counter_id, struct_offset) \ + DPMAC_COUNTER(counter_id, struct ethtool_pause_stats, struct_offset) + +#define DPMAC_CTRL_COUNTER(counter_id, struct_offset) \ + DPMAC_COUNTER(counter_id, struct ethtool_eth_ctrl_stats, struct_offset) + +#define DPMAC_MAC_COUNTER(counter_id, struct_offset) \ + DPMAC_COUNTER(counter_id, struct ethtool_eth_mac_stats, struct_offset) + static const struct dpmac_counter dpaa2_mac_ethtool_stats[] = { DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_ING_ALL_FRAME, "[mac] rx all frames"), DPMAC_UNSTRUCTURED_COUNTER(DPMAC_CNT_ING_GOOD_FRAME, "[mac] rx frames ok"), @@ -65,6 +88,60 @@ static const struct dpmac_counter dpaa2_mac_ethtool_stats[] = { #define DPAA2_MAC_NUM_ETHTOOL_STATS ARRAY_SIZE(dpaa2_mac_ethtool_stats) +static const struct dpmac_counter dpaa2_mac_rmon_stats[] = { + DPMAC_RMON_COUNTER(DPMAC_CNT_ING_FRAME_64, hist[0]), + DPMAC_RMON_COUNTER(DPMAC_CNT_ING_FRAME_127, hist[1]), + DPMAC_RMON_COUNTER(DPMAC_CNT_ING_FRAME_255, hist[2]), + DPMAC_RMON_COUNTER(DPMAC_CNT_ING_FRAME_511, hist[3]), + DPMAC_RMON_COUNTER(DPMAC_CNT_ING_FRAME_1023, hist[4]), + DPMAC_RMON_COUNTER(DPMAC_CNT_ING_FRAME_1518, hist[5]), + DPMAC_RMON_COUNTER(DPMAC_CNT_ING_FRAME_1519_MAX, hist[6]), + DPMAC_RMON_COUNTER(DPMAC_CNT_EGR_FRAME_64, hist_tx[0]), + DPMAC_RMON_COUNTER(DPMAC_CNT_EGR_FRAME_127, hist_tx[1]), + DPMAC_RMON_COUNTER(DPMAC_CNT_EGR_FRAME_255, hist_tx[2]), + DPMAC_RMON_COUNTER(DPMAC_CNT_EGR_FRAME_511, hist_tx[3]), + DPMAC_RMON_COUNTER(DPMAC_CNT_EGR_FRAME_1023, hist_tx[4]), + DPMAC_RMON_COUNTER(DPMAC_CNT_EGR_FRAME_1518, hist_tx[5]), + DPMAC_RMON_COUNTER(DPMAC_CNT_EGR_FRAME_1519_MAX, hist_tx[6]), + DPMAC_RMON_COUNTER(DPMAC_CNT_ING_UNDERSIZED, undersize_pkts), + DPMAC_RMON_COUNTER(DPMAC_CNT_ING_OVERSIZED, oversize_pkts), + DPMAC_RMON_COUNTER(DPMAC_CNT_ING_FRAG, fragments), + DPMAC_RMON_COUNTER(DPMAC_CNT_ING_JABBER, jabbers), +}; + +#define DPAA2_MAC_NUM_RMON_STATS ARRAY_SIZE(dpaa2_mac_rmon_stats) + +static const struct dpmac_counter dpaa2_mac_pause_stats[] = { + DPMAC_PAUSE_COUNTER(DPMAC_CNT_ING_VALID_PAUSE_FRAME, rx_pause_frames), + DPMAC_PAUSE_COUNTER(DPMAC_CNT_EGR_VALID_PAUSE_FRAME, tx_pause_frames), +}; + +#define DPAA2_MAC_NUM_PAUSE_STATS ARRAY_SIZE(dpaa2_mac_pause_stats) + +static const struct dpmac_counter dpaa2_mac_eth_ctrl_stats[] = { + DPMAC_CTRL_COUNTER(DPMAC_CNT_ING_CONTROL_FRAME, MACControlFramesReceived), + DPMAC_CTRL_COUNTER(DPMAC_CNT_EGR_CONTROL_FRAME, MACControlFramesTransmitted), +}; + +#define DPAA2_MAC_NUM_ETH_CTRL_STATS ARRAY_SIZE(dpaa2_mac_eth_ctrl_stats) + +static const struct dpmac_counter dpaa2_mac_eth_mac_stats[] = { + DPMAC_MAC_COUNTER(DPMAC_CNT_EGR_GOOD_FRAME, FramesTransmittedOK), + DPMAC_MAC_COUNTER(DPMAC_CNT_ING_GOOD_FRAME, FramesReceivedOK), + DPMAC_MAC_COUNTER(DPMAC_CNT_ING_FCS_ERR, FrameCheckSequenceErrors), + DPMAC_MAC_COUNTER(DPMAC_CNT_ING_ALIGN_ERR, AlignmentErrors), + DPMAC_MAC_COUNTER(DPMAC_CNT_EGR_ALL_BYTE, OctetsTransmittedOK), + DPMAC_MAC_COUNTER(DPMAC_CNT_EGR_ERR_FRAME, FramesLostDueToIntMACXmitError), + DPMAC_MAC_COUNTER(DPMAC_CNT_ING_ALL_BYTE, OctetsReceivedOK), + DPMAC_MAC_COUNTER(DPMAC_CNT_ING_FRAME_DISCARD_NOT_TRUNC, FramesLostDueToIntMACRcvError), + DPMAC_MAC_COUNTER(DPMAC_CNT_EGR_MCAST_FRAME, MulticastFramesXmittedOK), + DPMAC_MAC_COUNTER(DPMAC_CNT_EGR_BCAST_FRAME, BroadcastFramesXmittedOK), + DPMAC_MAC_COUNTER(DPMAC_CNT_ING_MCAST_FRAME, MulticastFramesReceivedOK), + DPMAC_MAC_COUNTER(DPMAC_CNT_ING_BCAST_FRAME, BroadcastFramesReceivedOK), +}; + +#define DPAA2_MAC_NUM_ETH_MAC_STATS ARRAY_SIZE(dpaa2_mac_eth_mac_stats) + static void dpaa2_mac_setup_stats(struct dpaa2_mac *mac, struct dpaa2_mac_stats *stats, size_t num_stats, @@ -147,6 +224,10 @@ static void dpaa2_mac_detect_features(struct dpaa2_mac *mac) if (dpaa2_mac_cmp_ver(mac, DPMAC_STATS_BUNDLE_VER_MAJOR, DPMAC_STATS_BUNDLE_VER_MINOR) >= 0) mac->features |= DPAA2_MAC_FEATURE_STATS_BUNDLE; + + if (dpaa2_mac_cmp_ver(mac, DPMAC_STANDARD_STATS_VER_MAJOR, + DPMAC_STANDARD_STATS_VER_MINOR) >= 0) + mac->features |= DPAA2_MAC_FEATURE_STANDARD_STATS; } static int phy_mode(enum dpmac_eth_if eth_if, phy_interface_t *if_mode) @@ -624,6 +705,24 @@ int dpaa2_mac_open(struct dpaa2_mac *mac) DPAA2_MAC_NUM_ETHTOOL_STATS, dpaa2_mac_ethtool_stats); + if (mac->features & DPAA2_MAC_FEATURE_STANDARD_STATS) { + dpaa2_mac_setup_stats(mac, &mac->rmon_stats, + DPAA2_MAC_NUM_RMON_STATS, + dpaa2_mac_rmon_stats); + + dpaa2_mac_setup_stats(mac, &mac->pause_stats, + DPAA2_MAC_NUM_PAUSE_STATS, + dpaa2_mac_pause_stats); + + dpaa2_mac_setup_stats(mac, &mac->eth_ctrl_stats, + DPAA2_MAC_NUM_ETH_CTRL_STATS, + dpaa2_mac_eth_ctrl_stats); + + dpaa2_mac_setup_stats(mac, &mac->eth_mac_stats, + DPAA2_MAC_NUM_ETH_MAC_STATS, + dpaa2_mac_eth_mac_stats); + } + return 0; err_close_dpmac: @@ -639,11 +738,126 @@ void dpaa2_mac_close(struct dpaa2_mac *mac) dpaa2_mac_clear_stats(mac, &mac->ethtool_stats, DPAA2_MAC_NUM_ETHTOOL_STATS); + if (mac->features & DPAA2_MAC_FEATURE_STANDARD_STATS) { + dpaa2_mac_clear_stats(mac, &mac->rmon_stats, + DPAA2_MAC_NUM_RMON_STATS); + dpaa2_mac_clear_stats(mac, &mac->pause_stats, + DPAA2_MAC_NUM_PAUSE_STATS); + dpaa2_mac_clear_stats(mac, &mac->eth_ctrl_stats, + DPAA2_MAC_NUM_ETH_CTRL_STATS); + dpaa2_mac_clear_stats(mac, &mac->eth_mac_stats, + DPAA2_MAC_NUM_ETH_MAC_STATS); + } + dpmac_close(mac->mc_io, 0, dpmac_dev->mc_handle); if (mac->fw_node) fwnode_handle_put(mac->fw_node); } +static void dpaa2_mac_transfer_stats(const struct dpmac_counter *counters, + size_t num_counters, void *s, + __le64 *cnt_values) +{ + for (size_t i = 0; i < num_counters; i++) { + u64 *p = s + counters[i].offset; + + *p = le64_to_cpu(cnt_values[i]); + } +} + +static const struct ethtool_rmon_hist_range dpaa2_mac_rmon_ranges[] = { + { 64, 64 }, + { 65, 127 }, + { 128, 255 }, + { 256, 511 }, + { 512, 1023 }, + { 1024, 1518 }, + { 1519, DPAA2_ETH_MFL }, + {}, +}; + +static void dpaa2_mac_get_standard_stats(struct dpaa2_mac *mac, + struct dpaa2_mac_stats *stats, + size_t num_cnt, + const struct dpmac_counter *counters, + void *s) +{ + struct device *dev = mac->net_dev->dev.parent; + struct fsl_mc_device *dpmac_dev = mac->mc_dev; + size_t values_size = num_cnt * sizeof(u64); + int err; + + if (!(mac->features & DPAA2_MAC_FEATURE_STANDARD_STATS)) + return; + + if (!stats->idx_dma_mem || !stats->values_dma_mem) + return; + + dma_sync_single_for_device(dev, stats->values_iova, values_size, + DMA_FROM_DEVICE); + + err = dpmac_get_statistics(mac->mc_io, 0, dpmac_dev->mc_handle, + stats->idx_iova, stats->values_iova, + num_cnt); + if (err) { + netdev_err(mac->net_dev, "%s: dpmac_get_statistics() = %d\n", + __func__, err); + return; + } + + dma_sync_single_for_cpu(dev, stats->values_iova, values_size, + DMA_FROM_DEVICE); + + dpaa2_mac_transfer_stats(counters, num_cnt, s, stats->values_dma_mem); +} + +void dpaa2_mac_get_rmon_stats(struct dpaa2_mac *mac, + struct ethtool_rmon_stats *s, + const struct ethtool_rmon_hist_range **ranges) +{ + if (s->src != ETHTOOL_MAC_STATS_SRC_AGGREGATE) + return; + + dpaa2_mac_get_standard_stats(mac, &mac->rmon_stats, + DPAA2_MAC_NUM_RMON_STATS, + dpaa2_mac_rmon_stats, s); + + *ranges = dpaa2_mac_rmon_ranges; +} + +void dpaa2_mac_get_pause_stats(struct dpaa2_mac *mac, + struct ethtool_pause_stats *s) +{ + if (s->src != ETHTOOL_MAC_STATS_SRC_AGGREGATE) + return; + + dpaa2_mac_get_standard_stats(mac, &mac->pause_stats, + DPAA2_MAC_NUM_PAUSE_STATS, + dpaa2_mac_pause_stats, s); +} + +void dpaa2_mac_get_ctrl_stats(struct dpaa2_mac *mac, + struct ethtool_eth_ctrl_stats *s) +{ + if (s->src != ETHTOOL_MAC_STATS_SRC_AGGREGATE) + return; + + dpaa2_mac_get_standard_stats(mac, &mac->eth_ctrl_stats, + DPAA2_MAC_NUM_ETH_CTRL_STATS, + dpaa2_mac_eth_ctrl_stats, s); +} + +void dpaa2_mac_get_eth_mac_stats(struct dpaa2_mac *mac, + struct ethtool_eth_mac_stats *s) +{ + if (s->src != ETHTOOL_MAC_STATS_SRC_AGGREGATE) + return; + + dpaa2_mac_get_standard_stats(mac, &mac->eth_mac_stats, + DPAA2_MAC_NUM_ETH_MAC_STATS, + dpaa2_mac_eth_mac_stats, s); +} + int dpaa2_mac_get_sset_count(void) { return DPAA2_MAC_NUM_ETHTOOL_STATS; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h index 6f4981627961..98c725f609e9 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h @@ -36,6 +36,10 @@ struct dpaa2_mac { struct phy *serdes_phy; struct dpaa2_mac_stats ethtool_stats; + struct dpaa2_mac_stats rmon_stats; + struct dpaa2_mac_stats pause_stats; + struct dpaa2_mac_stats eth_ctrl_stats; + struct dpaa2_mac_stats eth_mac_stats; }; static inline bool dpaa2_mac_is_type_phy(struct dpaa2_mac *mac) @@ -61,6 +65,19 @@ void dpaa2_mac_get_strings(u8 **data); void dpaa2_mac_get_ethtool_stats(struct dpaa2_mac *mac, u64 *data); +void dpaa2_mac_get_rmon_stats(struct dpaa2_mac *mac, + struct ethtool_rmon_stats *s, + const struct ethtool_rmon_hist_range **ranges); + +void dpaa2_mac_get_pause_stats(struct dpaa2_mac *mac, + struct ethtool_pause_stats *s); + +void dpaa2_mac_get_ctrl_stats(struct dpaa2_mac *mac, + struct ethtool_eth_ctrl_stats *s); + +void dpaa2_mac_get_eth_mac_stats(struct dpaa2_mac *mac, + struct ethtool_eth_mac_stats *s); + void dpaa2_mac_start(struct dpaa2_mac *mac); void dpaa2_mac_stop(struct dpaa2_mac *mac); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c index a888f6e6e9b0..f5d9321c7ef9 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c @@ -3,7 +3,7 @@ * DPAA2 Ethernet Switch ethtool support * * Copyright 2014-2016 Freescale Semiconductor Inc. - * Copyright 2017-2018 NXP + * Copyright 2017-2018, 2024-2026 NXP * */ @@ -210,6 +210,49 @@ static void dpaa2_switch_ethtool_get_stats(struct net_device *netdev, mutex_unlock(&port_priv->mac_lock); } +static void +dpaa2_switch_get_rmon_stats(struct net_device *netdev, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + + mutex_lock(&port_priv->mac_lock); + + if (dpaa2_switch_port_has_mac(port_priv)) + dpaa2_mac_get_rmon_stats(port_priv->mac, rmon_stats, ranges); + + mutex_unlock(&port_priv->mac_lock); +} + +static void +dpaa2_switch_get_ctrl_stats(struct net_device *net_dev, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + struct ethsw_port_priv *port_priv = netdev_priv(net_dev); + + mutex_lock(&port_priv->mac_lock); + + if (dpaa2_switch_port_has_mac(port_priv)) + dpaa2_mac_get_ctrl_stats(port_priv->mac, ctrl_stats); + + mutex_unlock(&port_priv->mac_lock); +} + +static void +dpaa2_switch_get_eth_mac_stats(struct net_device *net_dev, + struct ethtool_eth_mac_stats *eth_mac_stats) +{ + struct ethsw_port_priv *port_priv = netdev_priv(net_dev); + + mutex_lock(&port_priv->mac_lock); + + if (dpaa2_switch_port_has_mac(port_priv)) + dpaa2_mac_get_eth_mac_stats(port_priv->mac, eth_mac_stats); + + mutex_unlock(&port_priv->mac_lock); +} + const struct ethtool_ops dpaa2_switch_port_ethtool_ops = { .get_drvinfo = dpaa2_switch_get_drvinfo, .get_link = ethtool_op_get_link, @@ -218,4 +261,7 @@ const struct ethtool_ops dpaa2_switch_port_ethtool_ops = { .get_strings = dpaa2_switch_ethtool_get_strings, .get_ethtool_stats = dpaa2_switch_ethtool_get_stats, .get_sset_count = dpaa2_switch_ethtool_get_sset_count, + .get_rmon_stats = dpaa2_switch_get_rmon_stats, + .get_eth_ctrl_stats = dpaa2_switch_get_ctrl_stats, + .get_eth_mac_stats = dpaa2_switch_get_eth_mac_stats, }; From 45b2b84ac6fde39c427018d6cdf7d44258938faa Mon Sep 17 00:00:00 2001 From: Long Li Date: Mon, 23 Mar 2026 12:49:25 -0700 Subject: [PATCH 0945/1561] net: mana: Set default number of queues to 16 Set the default number of queues per vPort to MANA_DEF_NUM_QUEUES (16), as 16 queues can achieve optimal throughput for typical workloads. The actual number of queues may be lower if it exceeds the hardware reported limit. Users can increase the number of queues up to max_queues via ethtool if needed. Signed-off-by: Long Li Link: https://patch.msgid.link/20260323194925.1766385-1-longli@microsoft.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microsoft/mana/mana_en.c | 3 ++- include/net/mana/mana.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 49c65cc1697c..b39e8b920791 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -3357,7 +3357,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, apc->ac = ac; apc->ndev = ndev; apc->max_queues = gc->max_num_queues; - apc->num_queues = gc->max_num_queues; + /* Use MANA_DEF_NUM_QUEUES as default, still honoring the HW limit */ + apc->num_queues = min(gc->max_num_queues, MANA_DEF_NUM_QUEUES); apc->tx_queue_size = DEF_TX_BUFFERS_PER_QUEUE; apc->rx_queue_size = DEF_RX_BUFFERS_PER_QUEUE; apc->port_handle = INVALID_MANA_HANDLE; diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 3336688fed5e..96d21cbbdee2 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -1007,6 +1007,7 @@ struct mana_deregister_filter_resp { #define STATISTICS_FLAGS_TX_ERRORS_GDMA_ERROR 0x0000000004000000 #define MANA_MAX_NUM_QUEUES 64 +#define MANA_DEF_NUM_QUEUES 16 #define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1) From 8454478ef9abd4b4acb657160d9587a94cdae180 Mon Sep 17 00:00:00 2001 From: Joey Lu Date: Mon, 23 Mar 2026 18:17:54 +0800 Subject: [PATCH 0946/1561] dt-bindings: net: nuvoton: Add schema for Nuvoton MA35 family GMAC Create initial schema for Nuvoton MA35 family Gigabit MAC. Reviewed-by: Rob Herring (Arm) Signed-off-by: Joey Lu Link: https://patch.msgid.link/20260323101756.81849-2-a0987203069@gmail.com Signed-off-by: Jakub Kicinski --- .../bindings/net/nuvoton,ma35d1-dwmac.yaml | 140 ++++++++++++++++++ .../devicetree/bindings/net/snps,dwmac.yaml | 1 + 2 files changed, 141 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/nuvoton,ma35d1-dwmac.yaml diff --git a/Documentation/devicetree/bindings/net/nuvoton,ma35d1-dwmac.yaml b/Documentation/devicetree/bindings/net/nuvoton,ma35d1-dwmac.yaml new file mode 100644 index 000000000000..ab18702e53f9 --- /dev/null +++ b/Documentation/devicetree/bindings/net/nuvoton,ma35d1-dwmac.yaml @@ -0,0 +1,140 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/nuvoton,ma35d1-dwmac.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Nuvoton DWMAC glue layer controller + +maintainers: + - Joey Lu + +description: + Nuvoton 10/100/1000Mbps Gigabit Ethernet MAC Controller is based on + Synopsys DesignWare MAC (version 3.73a). + +select: + properties: + compatible: + contains: + enum: + - nuvoton,ma35d1-dwmac + required: + - compatible + +allOf: + - $ref: snps,dwmac.yaml# + +properties: + compatible: + items: + - const: nuvoton,ma35d1-dwmac + - const: snps,dwmac-3.70a + + reg: + maxItems: 1 + description: + Register range should be one of the GMAC interface. + + interrupts: + maxItems: 1 + + clocks: + items: + - description: MAC clock + - description: PTP clock + + clock-names: + items: + - const: stmmaceth + - const: ptp_ref + + nuvoton,sys: + $ref: /schemas/types.yaml#/definitions/phandle-array + items: + - items: + - description: phandle to access syscon registers. + - description: GMAC interface ID. + enum: + - 0 + - 1 + description: + A phandle to the syscon with one argument that configures system registers + for MA35D1's two GMACs. The argument specifies the GMAC interface ID. + + resets: + maxItems: 1 + + reset-names: + items: + - const: stmmaceth + + phy-mode: + enum: + - rmii + - rgmii + - rgmii-id + - rgmii-txid + - rgmii-rxid + + tx-internal-delay-ps: + default: 0 + minimum: 0 + maximum: 2000 + description: + RGMII TX path delay used only when PHY operates in RGMII mode with + internal delay (phy-mode is 'rgmii-id' or 'rgmii-txid') in pico-seconds. + Allowed values are from 0 to 2000. + + rx-internal-delay-ps: + default: 0 + minimum: 0 + maximum: 2000 + description: + RGMII RX path delay used only when PHY operates in RGMII mode with + internal delay (phy-mode is 'rgmii-id' or 'rgmii-rxid') in pico-seconds. + Allowed values are from 0 to 2000. + +required: + - clocks + - clock-names + - nuvoton,sys + - resets + - reset-names + - phy-mode + +unevaluatedProperties: false + +examples: + - | + #include + #include + #include + ethernet@40120000 { + compatible = "nuvoton,ma35d1-dwmac", "snps,dwmac-3.70a"; + reg = <0x40120000 0x10000>; + interrupts = ; + interrupt-names = "macirq"; + clocks = <&clk EMAC0_GATE>, <&clk EPLL_DIV8>; + clock-names = "stmmaceth", "ptp_ref"; + + nuvoton,sys = <&sys 0>; + resets = <&sys MA35D1_RESET_GMAC0>; + reset-names = "stmmaceth"; + snps,multicast-filter-bins = <0>; + snps,perfect-filter-entries = <8>; + rx-fifo-depth = <4096>; + tx-fifo-depth = <2048>; + + phy-mode = "rgmii-id"; + phy-handle = <ð_phy0>; + mdio { + compatible = "snps,dwmac-mdio"; + #address-cells = <1>; + #size-cells = <0>; + + eth_phy0: ethernet-phy@0 { + reg = <0>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index 98ebb6276bc6..c25903c74484 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -69,6 +69,7 @@ properties: - ingenic,x2000-mac - loongson,ls2k-dwmac - loongson,ls7a-dwmac + - nuvoton,ma35d1-dwmac - nxp,s32g2-dwmac - qcom,qcs404-ethqos - qcom,sa8775p-ethqos From 4d7c557f58efda70ecf11e80cebf8fbbc9e08282 Mon Sep 17 00:00:00 2001 From: Joey Lu Date: Mon, 23 Mar 2026 18:17:56 +0800 Subject: [PATCH 0947/1561] net: stmmac: dwmac-nuvoton: Add dwmac glue for Nuvoton MA35 family Add support for Gigabit Ethernet on Nuvoton MA35 series using dwmac driver. Reviewed-by: Andrew Lunn Signed-off-by: Joey Lu Link: https://patch.msgid.link/20260323101756.81849-4-a0987203069@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 12 ++ drivers/net/ethernet/stmicro/stmmac/Makefile | 1 + .../ethernet/stmicro/stmmac/dwmac-nuvoton.c | 136 ++++++++++++++++++ 3 files changed, 149 insertions(+) create mode 100644 drivers/net/ethernet/stmicro/stmmac/dwmac-nuvoton.c diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index d3a6ab7383fc..c2cb530fd0a2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -132,6 +132,18 @@ config DWMAC_MESON the stmmac device driver. This driver is used for Meson6, Meson8, Meson8b and GXBB SoCs. +config DWMAC_NUVOTON + tristate "Nuvoton MA35 dwmac support" + default ARCH_MA35 + depends on OF && (ARCH_MA35 || COMPILE_TEST) + select MFD_SYSCON + help + Support for Ethernet controller on Nuvoton MA35 series SoC. + + This selects the Nuvoton MA35 series SoC glue layer support + for the stmmac device driver. The nuvoton-dwmac driver is + used for MA35 series SoCs. + config DWMAC_QCOM_ETHQOS tristate "Qualcomm ETHQOS support" default ARCH_QCOM diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 945c5354eced..a1cea2f57252 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_DWMAC_IPQ806X) += dwmac-ipq806x.o obj-$(CONFIG_DWMAC_LPC18XX) += dwmac-lpc18xx.o obj-$(CONFIG_DWMAC_MEDIATEK) += dwmac-mediatek.o obj-$(CONFIG_DWMAC_MESON) += dwmac-meson.o dwmac-meson8b.o +obj-$(CONFIG_DWMAC_NUVOTON) += dwmac-nuvoton.o obj-$(CONFIG_DWMAC_QCOM_ETHQOS) += dwmac-qcom-ethqos.o obj-$(CONFIG_DWMAC_RENESAS_GBETH) += dwmac-renesas-gbeth.o obj-$(CONFIG_DWMAC_ROCKCHIP) += dwmac-rk.o diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-nuvoton.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-nuvoton.c new file mode 100644 index 000000000000..e2240b68ad98 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-nuvoton.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Nuvoton DWMAC specific glue layer + * + * Copyright (C) 2025 Nuvoton Technology Corp. + * + * Author: Joey Lu + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "stmmac_platform.h" + +#define NVT_REG_SYS_GMAC0MISCR 0x108 +#define NVT_REG_SYS_GMAC1MISCR 0x10C + +#define NVT_MISCR_RMII BIT(0) + +/* Two thousand picoseconds are evenly mapped to a 4-bit field, + * resulting in each step being 2000/15 picoseconds. + */ +#define NVT_PATH_DELAY_STEP 134 +#define NVT_TX_DELAY_MASK GENMASK(19, 16) +#define NVT_RX_DELAY_MASK GENMASK(23, 20) + +struct nvt_priv_data { + struct device *dev; + struct regmap *regmap; + u32 macid; +}; + +static int nvt_gmac_get_delay(struct device *dev, const char *property) +{ + u32 arg; + + if (of_property_read_u32(dev->of_node, property, &arg)) + return 0; + + if (arg > 2000) + return -EINVAL; + + if (arg == 2000) + return 15; + + return arg / NVT_PATH_DELAY_STEP; +} + +static int nvt_set_phy_intf_sel(void *bsp_priv, u8 phy_intf_sel) +{ + struct nvt_priv_data *priv = bsp_priv; + u32 reg, val; + int ret; + + if (phy_intf_sel == PHY_INTF_SEL_RGMII) { + ret = nvt_gmac_get_delay(priv->dev, "rx-internal-delay-ps"); + if (ret < 0) + return ret; + val = FIELD_PREP(NVT_RX_DELAY_MASK, ret); + + ret = nvt_gmac_get_delay(priv->dev, "tx-internal-delay-ps"); + if (ret < 0) + return ret; + val |= FIELD_PREP(NVT_TX_DELAY_MASK, ret); + } else if (phy_intf_sel == PHY_INTF_SEL_RMII) { + val = NVT_MISCR_RMII; + } else { + return -EINVAL; + } + + reg = (priv->macid == 0) ? NVT_REG_SYS_GMAC0MISCR : NVT_REG_SYS_GMAC1MISCR; + regmap_update_bits(priv->regmap, reg, + NVT_RX_DELAY_MASK | NVT_TX_DELAY_MASK | NVT_MISCR_RMII, val); + + return 0; +} + +static int nvt_gmac_probe(struct platform_device *pdev) +{ + struct plat_stmmacenet_data *plat_dat; + struct stmmac_resources stmmac_res; + struct device *dev = &pdev->dev; + struct nvt_priv_data *priv; + int ret; + + ret = stmmac_get_platform_resources(pdev, &stmmac_res); + if (ret) + return dev_err_probe(dev, ret, "Failed to get platform resources\n"); + + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); + if (IS_ERR(plat_dat)) + return dev_err_probe(dev, PTR_ERR(plat_dat), "Failed to get platform data\n"); + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return dev_err_probe(dev, -ENOMEM, "Failed to allocate private data\n"); + + priv->regmap = syscon_regmap_lookup_by_phandle_args(dev->of_node, "nuvoton,sys", + 1, &priv->macid); + if (IS_ERR(priv->regmap)) + return dev_err_probe(dev, PTR_ERR(priv->regmap), "Failed to get sys register\n"); + + if (priv->macid > 1) + return dev_err_probe(dev, -EINVAL, "Invalid sys arguments\n"); + + plat_dat->bsp_priv = priv; + plat_dat->set_phy_intf_sel = nvt_set_phy_intf_sel; + + return stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); +} + +static const struct of_device_id nvt_dwmac_match[] = { + { .compatible = "nuvoton,ma35d1-dwmac"}, + { } +}; +MODULE_DEVICE_TABLE(of, nvt_dwmac_match); + +static struct platform_driver nvt_dwmac_driver = { + .probe = nvt_gmac_probe, + .remove = stmmac_pltfr_remove, + .driver = { + .name = "nuvoton-dwmac", + .pm = &stmmac_pltfr_pm_ops, + .of_match_table = nvt_dwmac_match, + }, +}; +module_platform_driver(nvt_dwmac_driver); + +MODULE_AUTHOR("Joey Lu "); +MODULE_DESCRIPTION("Nuvoton DWMAC specific glue layer"); +MODULE_LICENSE("GPL"); From 845a0441111812a540c928e3264602ecf1c17df9 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Tue, 24 Mar 2026 10:20:56 +0100 Subject: [PATCH 0948/1561] net: stmmac: dwmac-socfpga: Move internal helpers This is preparatory work to allow reusing the SGMII configuration helper and the wrapper to get the interface in the fix_mac_speed() callback. Signed-off-by: Maxime Chevallier Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260324092102.687082-2-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-socfpga.c | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 5f89fd968ae9..42da73b92ceb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -72,6 +72,18 @@ struct socfpga_dwmac { const struct socfpga_dwmac_ops *ops; }; +static int socfpga_get_plat_phymode(struct socfpga_dwmac *dwmac) +{ + return dwmac->plat_dat->phy_interface; +} + +static void socfpga_sgmii_config(struct socfpga_dwmac *dwmac, bool enable) +{ + u16 val = enable ? SGMII_ADAPTER_ENABLE : SGMII_ADAPTER_DISABLE; + + writew(val, dwmac->sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); +} + static void socfpga_dwmac_fix_mac_speed(void *bsp_priv, phy_interface_t interface, int speed, unsigned int mode) @@ -244,18 +256,6 @@ err_node_put: return ret; } -static int socfpga_get_plat_phymode(struct socfpga_dwmac *dwmac) -{ - return dwmac->plat_dat->phy_interface; -} - -static void socfpga_sgmii_config(struct socfpga_dwmac *dwmac, bool enable) -{ - u16 val = enable ? SGMII_ADAPTER_ENABLE : SGMII_ADAPTER_DISABLE; - - writew(val, dwmac->sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); -} - static int socfpga_set_phy_mode_common(int phymode, u32 *val) { switch (phymode) { From a7be7cc12442479f44e3c17c6070cff03f6f8f02 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Tue, 24 Mar 2026 10:20:57 +0100 Subject: [PATCH 0949/1561] net: stmmac: dwmac-socfpga: Use the socfpga_sgmii_config() helper Use the existing socfpga_sgmii_config() helper in socfpga_dwmac_fix_mac_speed(), instead of re-coding the register access. Signed-off-by: Maxime Chevallier Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260324092102.687082-3-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 42da73b92ceb..ed6448c0ad2a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -95,8 +95,7 @@ static void socfpga_dwmac_fix_mac_speed(void *bsp_priv, u32 val; if (sgmii_adapter_base) - writew(SGMII_ADAPTER_DISABLE, - sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); + socfpga_sgmii_config(dwmac, false); if (splitter_base) { val = readl(splitter_base + EMAC_SPLITTER_CTRL_REG); @@ -121,8 +120,7 @@ static void socfpga_dwmac_fix_mac_speed(void *bsp_priv, if ((priv->plat->phy_interface == PHY_INTERFACE_MODE_SGMII || priv->plat->phy_interface == PHY_INTERFACE_MODE_1000BASEX) && sgmii_adapter_base) - writew(SGMII_ADAPTER_ENABLE, - sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); + socfpga_sgmii_config(dwmac, true); } static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *dev) From 9b04ecdfb8768a754db4bdb4a9073d0a650c0b53 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Tue, 24 Mar 2026 10:20:58 +0100 Subject: [PATCH 0950/1561] net: stmmac: dwmac-socfpga: Use the correct type for interface modes The internal helper socfpga_get_plat_phymode() returns an int where we actually return a PHY_INTERFACE_MODE_xxx, use the correct type for this. Signed-off-by: Maxime Chevallier Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260324092102.687082-4-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index ed6448c0ad2a..629074dbbc15 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -72,7 +72,7 @@ struct socfpga_dwmac { const struct socfpga_dwmac_ops *ops; }; -static int socfpga_get_plat_phymode(struct socfpga_dwmac *dwmac) +static phy_interface_t socfpga_get_plat_phymode(struct socfpga_dwmac *dwmac) { return dwmac->plat_dat->phy_interface; } @@ -386,7 +386,7 @@ static int smtg_crosststamp(ktime_t *device, struct system_counterval_t *system, static int socfpga_gen5_set_phy_mode(struct socfpga_dwmac *dwmac) { struct regmap *sys_mgr_base_addr = dwmac->sys_mgr_base_addr; - int phymode = socfpga_get_plat_phymode(dwmac); + phy_interface_t phymode = socfpga_get_plat_phymode(dwmac); u32 reg_offset = dwmac->reg_offset; u32 reg_shift = dwmac->reg_shift; u32 ctrl, val, module; @@ -444,7 +444,7 @@ static int socfpga_gen5_set_phy_mode(struct socfpga_dwmac *dwmac) static int socfpga_gen10_set_phy_mode(struct socfpga_dwmac *dwmac) { struct regmap *sys_mgr_base_addr = dwmac->sys_mgr_base_addr; - int phymode = socfpga_get_plat_phymode(dwmac); + phy_interface_t phymode = socfpga_get_plat_phymode(dwmac); u32 reg_offset = dwmac->reg_offset; u32 reg_shift = dwmac->reg_shift; u32 ctrl, val, module; From adf1536f79a5699b2fbece74d175af43320ec404 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Tue, 24 Mar 2026 10:20:59 +0100 Subject: [PATCH 0951/1561] net: stmmac: dwmac-socfpga: get the phy_mode with the dedicated helper We enable/disable the sgmii_adapter in the .fix_mac_speed() ops based on the phy_mode used in the plat_data. We currently get it with : socfpga_dwmac ->dev ->drv_data ->netdev ->priv ->stmmac_priv ->plat ->phy_interface where we can get it with : socfpga_dwmac ->plat_data ->phy_interface (done by socfpga_get_plat_phymode) Use that helper here. Note that we are also being passed a phy_interface_t from the .fix_mac_speed() callback, provided by phylink. We can handle that in the future when dynamic interface selection is supported. We'd need to guarantee that we have a Lynx PCS to handle it. Signed-off-by: Maxime Chevallier Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260324092102.687082-5-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 629074dbbc15..ae40de2ed8eb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -89,9 +89,9 @@ static void socfpga_dwmac_fix_mac_speed(void *bsp_priv, unsigned int mode) { struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)bsp_priv; - struct stmmac_priv *priv = netdev_priv(dev_get_drvdata(dwmac->dev)); - void __iomem *splitter_base = dwmac->splitter_base; void __iomem *sgmii_adapter_base = dwmac->sgmii_adapter_base; + phy_interface_t phymode = socfpga_get_plat_phymode(dwmac); + void __iomem *splitter_base = dwmac->splitter_base; u32 val; if (sgmii_adapter_base) @@ -117,9 +117,8 @@ static void socfpga_dwmac_fix_mac_speed(void *bsp_priv, writel(val, splitter_base + EMAC_SPLITTER_CTRL_REG); } - if ((priv->plat->phy_interface == PHY_INTERFACE_MODE_SGMII || - priv->plat->phy_interface == PHY_INTERFACE_MODE_1000BASEX) && - sgmii_adapter_base) + if ((phymode == PHY_INTERFACE_MODE_SGMII || + phymode == PHY_INTERFACE_MODE_1000BASEX) && sgmii_adapter_base) socfpga_sgmii_config(dwmac, true); } From 9bd1af853750af99f7038af239bc7d8a525fb720 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Tue, 24 Mar 2026 10:21:00 +0100 Subject: [PATCH 0952/1561] net: stmmac: dwmac-sofcpga: Drop the struct device reference We keep a reference to our the struct device in the socfpga_dwmac priv structure, but now it's only ever used to produce logs in the .set_phy_mode() ops, that are specific to this driver. When we call that ops, we always have a ref to the struct device around, so let's pass it to .set_phy_mode(). We can now discard that reference from struct socfpga_dwmac. Signed-off-by: Maxime Chevallier Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260324092102.687082-6-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index ae40de2ed8eb..1d7f0a57d288 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -53,14 +53,14 @@ struct socfpga_dwmac; struct socfpga_dwmac_ops { - int (*set_phy_mode)(struct socfpga_dwmac *dwmac_priv); + int (*set_phy_mode)(struct socfpga_dwmac *dwmac_priv, + struct device *dev); void (*setup_plat_dat)(struct socfpga_dwmac *dwmac_priv); }; struct socfpga_dwmac { u32 reg_offset; u32 reg_shift; - struct device *dev; struct plat_stmmacenet_data *plat_dat; struct regmap *sys_mgr_base_addr; struct reset_control *stmmac_rst; @@ -243,7 +243,6 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device * dwmac->reg_offset = reg_offset; dwmac->reg_shift = reg_shift; dwmac->sys_mgr_base_addr = sys_mgr_base_addr; - dwmac->dev = dev; of_node_put(np_sgmii_adapter); return 0; @@ -382,7 +381,8 @@ static int smtg_crosststamp(ktime_t *device, struct system_counterval_t *system, return 0; } -static int socfpga_gen5_set_phy_mode(struct socfpga_dwmac *dwmac) +static int socfpga_gen5_set_phy_mode(struct socfpga_dwmac *dwmac, + struct device *dev) { struct regmap *sys_mgr_base_addr = dwmac->sys_mgr_base_addr; phy_interface_t phymode = socfpga_get_plat_phymode(dwmac); @@ -391,7 +391,7 @@ static int socfpga_gen5_set_phy_mode(struct socfpga_dwmac *dwmac) u32 ctrl, val, module; if (socfpga_set_phy_mode_common(phymode, &val)) { - dev_err(dwmac->dev, "bad phy mode %d\n", phymode); + dev_err(dev, "bad phy mode %d\n", phymode); return -EINVAL; } @@ -440,7 +440,8 @@ static int socfpga_gen5_set_phy_mode(struct socfpga_dwmac *dwmac) return 0; } -static int socfpga_gen10_set_phy_mode(struct socfpga_dwmac *dwmac) +static int socfpga_gen10_set_phy_mode(struct socfpga_dwmac *dwmac, + struct device *dev) { struct regmap *sys_mgr_base_addr = dwmac->sys_mgr_base_addr; phy_interface_t phymode = socfpga_get_plat_phymode(dwmac); @@ -552,7 +553,7 @@ static int socfpga_dwmac_init(struct device *dev, void *bsp_priv) { struct socfpga_dwmac *dwmac = bsp_priv; - return dwmac->ops->set_phy_mode(dwmac); + return dwmac->ops->set_phy_mode(dwmac, dev); } static void socfpga_gen5_setup_plat_dat(struct socfpga_dwmac *dwmac) From b83c28328fee90ba8c8380f0637846b316694edb Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 24 Mar 2026 10:46:52 +0000 Subject: [PATCH 0953/1561] net: phylink: use phylink_expects_phy() in phylink_fwnode_phy_connect() The tests in phylink_expects_phy() and phylink_fwnode_phy_connect() are identical (by intention). Use phylink_expects_phy() to decide whether to ignore a call to phylink_fwnode_phy_connect(). Signed-off-by: Russell King (Oracle) Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/E1w4zHg-0000000DmC4-2oyb@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index f80cd13c8e32..087ac63f9193 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -2295,14 +2295,12 @@ int phylink_fwnode_phy_connect(struct phylink *pl, struct phy_device *phy_dev; int ret; - /* Fixed links and 802.3z are handled without needing a PHY */ - if (pl->cfg_link_an_mode == MLO_AN_FIXED || - (pl->cfg_link_an_mode == MLO_AN_INBAND && - phy_interface_mode_is_8023z(pl->link_interface))) + if (!phylink_expects_phy(pl)) return 0; phy_fwnode = fwnode_get_phy_node(fwnode); if (IS_ERR(phy_fwnode)) { + /* PHY mode requires a PHY to be specified. */ if (pl->cfg_link_an_mode == MLO_AN_PHY) return -ENODEV; return 0; From ed8edcd47529d9ad6558ca2c00dccf21fc0abc08 Mon Sep 17 00:00:00 2001 From: Ryohei Kinugawa Date: Tue, 24 Mar 2026 14:34:10 +0900 Subject: [PATCH 0954/1561] docs/mlx5: Fix typo subfuction Fix two typos: - 'Subfunctons' -> 'Subfunctions' - 'subfuction' -> 'subfunction' Reviewed-by: Joe Damato Signed-off-by: Ryohei Kinugawa Reviewed-by: Tariq Toukan Acked-by: Randy Dunlap Link: https://patch.msgid.link/20260324053416.70166-1-ryohei.kinugawa@gmail.com Signed-off-by: Jakub Kicinski --- .../device_drivers/ethernet/mellanox/mlx5/kconfig.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/kconfig.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/kconfig.rst index 34e911480108..b45d6871492c 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/kconfig.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/kconfig.rst @@ -114,13 +114,13 @@ Enabling the driver and kconfig options **CONFIG_MLX5_SF=(y/n)** | Build support for subfunction. -| Subfunctons are more light weight than PCI SRIOV VFs. Choosing this option +| Subfunctions are more light weight than PCI SRIOV VFs. Choosing this option | will enable support for creating subfunction devices. **CONFIG_MLX5_SF_MANAGER=(y/n)** -| Build support for subfuction port in the NIC. A Mellanox subfunction +| Build support for subfunction port in the NIC. A Mellanox subfunction | port is managed through devlink. A subfunction supports RDMA, netdevice | and vdpa device. It is similar to a SRIOV VF but it doesn't require | SRIOV support. From 78723a62b969af404fde2468bb9782519d5f8ba7 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 24 Mar 2026 10:14:33 +0100 Subject: [PATCH 0955/1561] seg6: add per-route tunnel source address Add SEG6_IPTUNNEL_SRC in the uapi for users to configure a specific tunnel source address. Make seg6_iptunnel handle the new attribute correctly. It has priority over the configured per-netns tunnel source address, if any. Cc: David Ahern Signed-off-by: Justin Iurman Reviewed-by: Andrea Mayer Link: https://patch.msgid.link/20260324091434.359341-2-justin.iurman@6wind.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/seg6_iptunnel.h | 1 + net/ipv6/seg6_iptunnel.c | 114 +++++++++++++++++++++-------- 2 files changed, 84 insertions(+), 31 deletions(-) diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h index ae78791372b8..485889b19900 100644 --- a/include/uapi/linux/seg6_iptunnel.h +++ b/include/uapi/linux/seg6_iptunnel.h @@ -20,6 +20,7 @@ enum { SEG6_IPTUNNEL_UNSPEC, SEG6_IPTUNNEL_SRH, + SEG6_IPTUNNEL_SRC, /* struct in6_addr */ __SEG6_IPTUNNEL_MAX, }; #define SEG6_IPTUNNEL_MAX (__SEG6_IPTUNNEL_MAX - 1) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 3e1b9991131a..e76cc0cc481e 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -49,6 +49,7 @@ static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) struct seg6_lwt { struct dst_cache cache; + struct in6_addr tunsrc; struct seg6_iptunnel_encap tuninfo[]; }; @@ -65,6 +66,7 @@ seg6_encap_lwtunnel(struct lwtunnel_state *lwt) static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = { [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY }, + [SEG6_IPTUNNEL_SRC] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), }; static int nla_put_srh(struct sk_buff *skb, int attrtype, @@ -87,23 +89,32 @@ static int nla_put_srh(struct sk_buff *skb, int attrtype, } static void set_tun_src(struct net *net, struct net_device *dev, - struct in6_addr *daddr, struct in6_addr *saddr) + struct in6_addr *daddr, struct in6_addr *saddr, + struct in6_addr *route_tunsrc) { struct seg6_pernet_data *sdata = seg6_pernet(net); struct in6_addr *tun_src; - rcu_read_lock(); - - tun_src = rcu_dereference(sdata->tun_src); - - if (!ipv6_addr_any(tun_src)) { - memcpy(saddr, tun_src, sizeof(struct in6_addr)); + /* Priority order to select tunnel source address: + * 1. per route source address (if configured) + * 2. per network namespace source address (if configured) + * 3. dynamic resolution + */ + if (route_tunsrc && !ipv6_addr_any(route_tunsrc)) { + memcpy(saddr, route_tunsrc, sizeof(struct in6_addr)); } else { - ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC, - saddr); - } + rcu_read_lock(); + tun_src = rcu_dereference(sdata->tun_src); - rcu_read_unlock(); + if (!ipv6_addr_any(tun_src)) { + memcpy(saddr, tun_src, sizeof(struct in6_addr)); + } else { + ipv6_dev_get_saddr(net, dev, daddr, + IPV6_PREFER_SRC_PUBLIC, saddr); + } + + rcu_read_unlock(); + } } /* Compute flowlabel for outer IPv6 header */ @@ -125,7 +136,8 @@ static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb, } static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, - int proto, struct dst_entry *cache_dst) + int proto, struct dst_entry *cache_dst, + struct in6_addr *route_tunsrc) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst_dev(dst); @@ -182,7 +194,7 @@ static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, isrh->nexthdr = proto; hdr->daddr = isrh->segments[isrh->first_segment]; - set_tun_src(net, dev, &hdr->daddr, &hdr->saddr); + set_tun_src(net, dev, &hdr->daddr, &hdr->saddr, route_tunsrc); #ifdef CONFIG_IPV6_SEG6_HMAC if (sr_has_hmac(isrh)) { @@ -202,14 +214,15 @@ static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) { - return __seg6_do_srh_encap(skb, osrh, proto, NULL); + return __seg6_do_srh_encap(skb, osrh, proto, NULL, NULL); } EXPORT_SYMBOL_GPL(seg6_do_srh_encap); /* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */ static int seg6_do_srh_encap_red(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto, - struct dst_entry *cache_dst) + struct dst_entry *cache_dst, + struct in6_addr *route_tunsrc) { __u8 first_seg = osrh->first_segment; struct dst_entry *dst = skb_dst(skb); @@ -272,7 +285,7 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb, if (skip_srh) { hdr->nexthdr = proto; - set_tun_src(net, dev, &hdr->daddr, &hdr->saddr); + set_tun_src(net, dev, &hdr->daddr, &hdr->saddr, route_tunsrc); goto out; } @@ -308,7 +321,7 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb, srcaddr: isrh->nexthdr = proto; - set_tun_src(net, dev, &hdr->daddr, &hdr->saddr); + set_tun_src(net, dev, &hdr->daddr, &hdr->saddr, route_tunsrc); #ifdef CONFIG_IPV6_SEG6_HMAC if (unlikely(!skip_srh && sr_has_hmac(isrh))) { @@ -383,9 +396,11 @@ static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst) { struct dst_entry *dst = skb_dst(skb); struct seg6_iptunnel_encap *tinfo; + struct seg6_lwt *slwt; int proto, err = 0; - tinfo = seg6_encap_lwtunnel(dst->lwtstate); + slwt = seg6_lwt_lwtunnel(dst->lwtstate); + tinfo = slwt->tuninfo; switch (tinfo->mode) { case SEG6_IPTUN_MODE_INLINE: @@ -410,11 +425,11 @@ static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst) return -EINVAL; if (tinfo->mode == SEG6_IPTUN_MODE_ENCAP) - err = __seg6_do_srh_encap(skb, tinfo->srh, - proto, cache_dst); + err = __seg6_do_srh_encap(skb, tinfo->srh, proto, + cache_dst, &slwt->tunsrc); else - err = seg6_do_srh_encap_red(skb, tinfo->srh, - proto, cache_dst); + err = seg6_do_srh_encap_red(skb, tinfo->srh, proto, + cache_dst, &slwt->tunsrc); if (err) return err; @@ -436,12 +451,12 @@ static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst) if (tinfo->mode == SEG6_IPTUN_MODE_L2ENCAP) err = __seg6_do_srh_encap(skb, tinfo->srh, - IPPROTO_ETHERNET, - cache_dst); + IPPROTO_ETHERNET, cache_dst, + &slwt->tunsrc); else err = seg6_do_srh_encap_red(skb, tinfo->srh, - IPPROTO_ETHERNET, - cache_dst); + IPPROTO_ETHERNET, cache_dst, + &slwt->tunsrc); if (err) return err; @@ -678,6 +693,10 @@ static int seg6_build_state(struct net *net, struct nlattr *nla, if (family != AF_INET6) return -EINVAL; + if (tb[SEG6_IPTUNNEL_SRC]) { + NL_SET_ERR_MSG(extack, "incompatible mode for tunsrc"); + return -EINVAL; + } break; case SEG6_IPTUN_MODE_ENCAP: break; @@ -702,13 +721,23 @@ static int seg6_build_state(struct net *net, struct nlattr *nla, slwt = seg6_lwt_lwtunnel(newts); err = dst_cache_init(&slwt->cache, GFP_ATOMIC); - if (err) { - kfree(newts); - return err; - } + if (err) + goto free_lwt_state; memcpy(&slwt->tuninfo, tuninfo, tuninfo_len); + if (tb[SEG6_IPTUNNEL_SRC]) { + slwt->tunsrc = nla_get_in6_addr(tb[SEG6_IPTUNNEL_SRC]); + + if (ipv6_addr_any(&slwt->tunsrc) || + ipv6_addr_is_multicast(&slwt->tunsrc) || + ipv6_addr_loopback(&slwt->tunsrc)) { + NL_SET_ERR_MSG(extack, "invalid tunsrc address"); + err = -EINVAL; + goto free_dst_cache; + } + } + newts->type = LWTUNNEL_ENCAP_SEG6; newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT; @@ -720,6 +749,12 @@ static int seg6_build_state(struct net *net, struct nlattr *nla, *ts = newts; return 0; + +free_dst_cache: + dst_cache_destroy(&slwt->cache); +free_lwt_state: + kfree(newts); + return err; } static void seg6_destroy_state(struct lwtunnel_state *lwt) @@ -731,29 +766,46 @@ static int seg6_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); + struct seg6_lwt *slwt = seg6_lwt_lwtunnel(lwtstate); if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo)) return -EMSGSIZE; + if (!ipv6_addr_any(&slwt->tunsrc) && + nla_put_in6_addr(skb, SEG6_IPTUNNEL_SRC, &slwt->tunsrc)) + return -EMSGSIZE; + return 0; } static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate) { struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); + struct seg6_lwt *slwt = seg6_lwt_lwtunnel(lwtstate); + int nlsize; - return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo)); + nlsize = nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo)); + + if (!ipv6_addr_any(&slwt->tunsrc)) + nlsize += nla_total_size(sizeof(slwt->tunsrc)); + + return nlsize; } static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) { struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a); struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b); + struct seg6_lwt *a_slwt = seg6_lwt_lwtunnel(a); + struct seg6_lwt *b_slwt = seg6_lwt_lwtunnel(b); int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr); if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr)) return 1; + if (!ipv6_addr_equal(&a_slwt->tunsrc, &b_slwt->tunsrc)) + return 1; + return memcmp(a_hdr, b_hdr, len); } From 2d047673d374e1d808b345fddcbebe4f2a86d2f2 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 24 Mar 2026 10:14:34 +0100 Subject: [PATCH 0956/1561] selftests: add check for seg6 tunsrc Extend srv6_hencap_red_l3vpn_test.sh to include checks for the new "tunsrc" feature. If there is no support for tunsrc, it silently falls back to the encap config without tunsrc. Cc: Shuah Khan Signed-off-by: Justin Iurman Reviewed-by: Andrea Mayer Link: https://patch.msgid.link/20260324091434.359341-3-justin.iurman@6wind.com Signed-off-by: Jakub Kicinski --- .../net/srv6_hencap_red_l3vpn_test.sh | 109 +++++++++++++++--- 1 file changed, 96 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh index 6a68c7eff1dc..cd7d061e21f8 100755 --- a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh @@ -193,6 +193,8 @@ ret=${ksft_skip} nsuccess=0 nfail=0 +HAS_TUNSRC=false + log_test() { local rc="$1" @@ -345,6 +347,17 @@ setup_rt_networking() ip -netns "${nsname}" addr \ add "${net_prefix}::${rt}/64" dev "${devname}" nodad + # A dedicated ::dead: address (with preferred_lft 0, i.e., + # deprecated) is added when there is support for tunsrc. Because + # it is deprecated, the kernel should never auto-select it as + # source with current config. Only an explicit tunsrc can place + # it in the outer header. + if $HAS_TUNSRC; then + ip -netns "${nsname}" addr \ + add "${net_prefix}::dead:${rt}/64" \ + dev "${devname}" nodad preferred_lft 0 + fi + ip -netns "${nsname}" link set "${devname}" up done @@ -420,6 +433,7 @@ setup_rt_local_sids() # to the destination host) # $5 - encap mode (full or red) # $6 - traffic type (IPv6 or IPv4) +# $7 - force tunsrc (true or false) __setup_rt_policy() { local dst="$1" @@ -428,10 +442,46 @@ __setup_rt_policy() local dec_rt="$4" local mode="$5" local traffic="$6" + local with_tunsrc="$7" local nsname local policy='' + local tunsrc='' local n + # Verify the per-route tunnel source address ("tunsrc") feature. + # If it is not supported, fallback on encap config without tunsrc. + if $with_tunsrc && $HAS_TUNSRC; then + local net_prefix + local drule + local nxt + + eval nsname=\${$(get_rtname "${dec_rt}")} + + # Next SRv6 hop: first End router if any, or the decap router + [ -z "${end_rts}" ] && nxt="${dec_rt}" || nxt="${end_rts%% *}" + + # Use the right prefix for tunsrc depending on the next SRv6 hop + net_prefix="$(get_network_prefix "${encap_rt}" "${nxt}")" + tunsrc="tunsrc ${net_prefix}::dead:${encap_rt}" + + # To verify that the outer source address matches the one + # configured with tunsrc, the decap router discards packets + # with any other source address. + ip netns exec "${nsname}" ip6tables -t raw -I PREROUTING 1 \ + -s "${net_prefix}::dead:${encap_rt}" \ + -d "${VPN_LOCATOR_SERVICE}:${dec_rt}::${DT46_FUNC}" \ + -j ACCEPT + + drule="PREROUTING \ + -d ${VPN_LOCATOR_SERVICE}:${dec_rt}::${DT46_FUNC} \ + -j DROP" + + if ! ip netns exec "${nsname}" \ + ip6tables -t raw -C ${drule} &>/dev/null; then + ip netns exec "${nsname}" ip6tables -t raw -A ${drule} + fi + fi + eval nsname=\${$(get_rtname "${encap_rt}")} for n in ${end_rts}; do @@ -444,7 +494,7 @@ __setup_rt_policy() if [ "${traffic}" -eq 6 ]; then ip -netns "${nsname}" -6 route \ add "${IPv6_HS_NETWORK}::${dst}" vrf "${VRF_DEVNAME}" \ - encap seg6 mode "${mode}" segs "${policy}" \ + encap seg6 mode "${mode}" ${tunsrc} segs "${policy}" \ dev "${VRF_DEVNAME}" ip -netns "${nsname}" -6 neigh \ @@ -455,7 +505,7 @@ __setup_rt_policy() # received, otherwise the proxy arp does not work. ip -netns "${nsname}" -4 route \ add "${IPv4_HS_NETWORK}.${dst}" vrf "${VRF_DEVNAME}" \ - encap seg6 mode "${mode}" segs "${policy}" \ + encap seg6 mode "${mode}" ${tunsrc} segs "${policy}" \ dev "${VRF_DEVNAME}" fi } @@ -463,13 +513,13 @@ __setup_rt_policy() # see __setup_rt_policy setup_rt_policy_ipv6() { - __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6 + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6 "$6" } #see __setup_rt_policy setup_rt_policy_ipv4() { - __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4 + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4 "$6" } setup_hs() @@ -567,41 +617,41 @@ setup() # the network path between hs-1 and hs-2 traverses several routers # depending on the direction of traffic. # - # Direction hs-1 -> hs-2 (H.Encaps.Red) + # Direction hs-1 -> hs-2 (H.Encaps.Red + tunsrc) # - rt-3,rt-4 (SRv6 End behaviors) # - rt-2 (SRv6 End.DT46 behavior) # # Direction hs-2 -> hs-1 (H.Encaps.Red) # - rt-1 (SRv6 End.DT46 behavior) - setup_rt_policy_ipv6 2 1 "3 4" 2 encap.red - setup_rt_policy_ipv6 1 2 "" 1 encap.red + setup_rt_policy_ipv6 2 1 "3 4" 2 encap.red true + setup_rt_policy_ipv6 1 2 "" 1 encap.red false # create an IPv4 VPN between hosts hs-1 and hs-2 # the network path between hs-1 and hs-2 traverses several routers # depending on the direction of traffic. # - # Direction hs-1 -> hs-2 (H.Encaps.Red) + # Direction hs-1 -> hs-2 (H.Encaps.Red + tunsrc) # - rt-2 (SRv6 End.DT46 behavior) # # Direction hs-2 -> hs-1 (H.Encaps.Red) # - rt-4,rt-3 (SRv6 End behaviors) # - rt-1 (SRv6 End.DT46 behavior) - setup_rt_policy_ipv4 2 1 "" 2 encap.red - setup_rt_policy_ipv4 1 2 "4 3" 1 encap.red + setup_rt_policy_ipv4 2 1 "" 2 encap.red true + setup_rt_policy_ipv4 1 2 "4 3" 1 encap.red false # create an IPv6 VPN between hosts hs-3 and hs-4 # the network path between hs-3 and hs-4 traverses several routers # depending on the direction of traffic. # - # Direction hs-3 -> hs-4 (H.Encaps.Red) + # Direction hs-3 -> hs-4 (H.Encaps.Red + tunsrc) # - rt-2 (SRv6 End Behavior) # - rt-4 (SRv6 End.DT46 behavior) # # Direction hs-4 -> hs-3 (H.Encaps.Red) # - rt-1 (SRv6 End behavior) # - rt-3 (SRv6 End.DT46 behavior) - setup_rt_policy_ipv6 4 3 "2" 4 encap.red - setup_rt_policy_ipv6 3 4 "1" 3 encap.red + setup_rt_policy_ipv6 4 3 "2" 4 encap.red true + setup_rt_policy_ipv6 3 4 "1" 3 encap.red false # testing environment was set up successfully SETUP_ERR=0 @@ -809,6 +859,38 @@ test_vrf_or_ksft_skip() fi } +# Before enabling tunsrc tests, make sure tunsrc and ip6tables are supported. +check_tunsrc_support() +{ + setup_ns tunsrc_ns + + ip -netns "${tunsrc_ns}" link add veth0 type veth \ + peer name veth1 netns "${tunsrc_ns}" + + ip -netns "${tunsrc_ns}" link set veth0 up + + if ! ip -netns "${tunsrc_ns}" -6 route add fc00::dead:beef/128 \ + encap seg6 mode encap.red tunsrc fc00::1 segs fc00::2 \ + dev veth0 &>/dev/null; then + cleanup_ns "${tunsrc_ns}" + return + fi + + if ! ip -netns "${tunsrc_ns}" -6 route show | grep -q "tunsrc"; then + cleanup_ns "${tunsrc_ns}" + return + fi + + if ! ip netns exec "${tunsrc_ns}" ip6tables -t raw -A PREROUTING \ + -d fc00::dead:beef -j DROP &>/dev/null; then + cleanup_ns "${tunsrc_ns}" + return + fi + + cleanup_ns "${tunsrc_ns}" + HAS_TUNSRC=true +} + if [ "$(id -u)" -ne 0 ]; then echo "SKIP: Need root privileges" exit "${ksft_skip}" @@ -826,6 +908,7 @@ test_vrf_or_ksft_skip set -e trap cleanup EXIT +check_tunsrc_support setup set +e From e1877cf6231152ff4686bd9900a6a88ffdd90f84 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Tue, 24 Mar 2026 15:19:56 +0800 Subject: [PATCH 0957/1561] net: mctp: avoid copy in fragmentation loop for near-MTU messages Currently, we incorrectly send messages that are within 4 bytes (a struct mctp_hdr) smaller than the MTU through mctp_do_fragment_route(). This has no effect on the actual fragmentation, as we will still send as one packet, but unnecessarily copies the original skb into a new single-fragment skb. Instead of having the MTU comparisons in both mctp_local_output() and mctp_do_fragment_route(), feed all local messages through the latter, and add the single-packet optimisation there. This means we can coalesce the routing path of mctp_local_output, so our out_release path is now solely for errors, so rename the label accordingly. Include a check in the route tests for the single-packet case too. Reported-by: yuanzhaoming Closes: https://github.com/openbmc/linux/commit/269936db5eb3962fe290b1dc4dbf1859cd5a04dd#r175836230 Signed-off-by: Jeremy Kerr Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260324-dev-mtu-copy-v1-1-7af6bd7027d3@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- net/mctp/route.c | 26 +++++++++++--------------- net/mctp/test/route-test.c | 4 ++++ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/net/mctp/route.c b/net/mctp/route.c index 59ad60b88563..021e04f1ea7c 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -1037,6 +1037,13 @@ static int mctp_do_fragment_route(struct mctp_dst *dst, struct sk_buff *skb, return -EMSGSIZE; } + /* within MTU? avoid the copy, send original skb */ + if (skb->len <= mtu) { + hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | + MCTP_HDR_FLAG_EOM | tag; + return dst->output(dst, skb); + } + /* keep same headroom as the original skb */ headroom = skb_headroom(skb); @@ -1111,7 +1118,6 @@ int mctp_local_output(struct sock *sk, struct mctp_dst *dst, struct mctp_hdr *hdr; unsigned long flags; unsigned int netid; - unsigned int mtu; mctp_eid_t saddr; int rc; u8 tag; @@ -1133,7 +1139,7 @@ int mctp_local_output(struct sock *sk, struct mctp_dst *dst, netid = READ_ONCE(dst->dev->net); if (rc) - goto out_release; + goto err_free; if (req_tag & MCTP_TAG_OWNER) { if (req_tag & MCTP_TAG_PREALLOC) @@ -1145,7 +1151,7 @@ int mctp_local_output(struct sock *sk, struct mctp_dst *dst, if (IS_ERR(key)) { rc = PTR_ERR(key); - goto out_release; + goto err_free; } mctp_skb_set_flow(skb, key); /* done with the key in this scope */ @@ -1170,20 +1176,10 @@ int mctp_local_output(struct sock *sk, struct mctp_dst *dst, hdr->dest = daddr; hdr->src = saddr; - mtu = dst->mtu; - - if (skb->len + sizeof(struct mctp_hdr) <= mtu) { - hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | - MCTP_HDR_FLAG_EOM | tag; - rc = dst->output(dst, skb); - } else { - rc = mctp_do_fragment_route(dst, skb, mtu, tag); - } - /* route output functions consume the skb, even on error */ - skb = NULL; + return mctp_do_fragment_route(dst, skb, dst->mtu, tag); -out_release: +err_free: kfree_skb(skb); return rc; } diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 75ea96c10e49..61c989c43ec0 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -63,6 +63,10 @@ static void mctp_test_fragment(struct kunit *test) if (!skb2) break; + /* avoid copying single-skb messages */ + if (first && last) + KUNIT_EXPECT_PTR_EQ(test, skb, skb2); + hdr2 = mctp_hdr(skb2); tag_mask = MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO; From 5f70b0aa08ada1004259902dbee3b401ac2f7dcb Mon Sep 17 00:00:00 2001 From: Yohei Kojima Date: Wed, 25 Mar 2026 02:20:28 +0900 Subject: [PATCH 0958/1561] selftests: net: Remove unnecessary backslashes in fq_band_pktlimit.sh Address "grep: warning: stray \ before white space" warning from GNU grep 3.12. This warns the misplaced backslashes before whitespaces (e.g. \\' ' or '\ ') which leads to unspecified behavior [1]. We can just remove the backslashes before whitespaces as POSIX says: Enclosing characters in single-quotes ('') shall preserve the literal value of each character within the single-quotes. and bourne-compatible shells behave so. [1]: https://lists.gnu.org/r/bug-gnulib/2022-05/msg00057.html Signed-off-by: Yohei Kojima Reviewed-by: Simon Horman Link: https://patch.msgid.link/dd0bbd48cdf468da56ec34fd61cecd4d2111d7ba.1774372510.git.yk@y-koj.net Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fq_band_pktlimit.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/fq_band_pktlimit.sh b/tools/testing/selftests/net/fq_band_pktlimit.sh index 977070ed42b3..223f9efe4090 100755 --- a/tools/testing/selftests/net/fq_band_pktlimit.sh +++ b/tools/testing/selftests/net/fq_band_pktlimit.sh @@ -32,19 +32,19 @@ tc qdisc replace dev dummy0 root handle 1: fq quantum 1514 initial_quantum 1514 DELAY=400000 ./cmsg_sender -6 -p u -d "${DELAY}" -n 20 fdaa::2 8000 -OUT1="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" +OUT1="$(tc -s qdisc show dev dummy0 | grep '^ Sent')" ./cmsg_sender -6 -p u -d "${DELAY}" -n 20 fdaa::2 8000 -OUT2="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" +OUT2="$(tc -s qdisc show dev dummy0 | grep '^ Sent')" ./cmsg_sender -6 -p u -d "${DELAY}" -n 20 -P 7 fdaa::2 8000 -OUT3="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" +OUT3="$(tc -s qdisc show dev dummy0 | grep '^ Sent')" # Initial stats will report zero sent, as all packets are still # queued in FQ. Sleep for at least the delay period and see that # twenty are now sent. sleep 0.6 -OUT4="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" +OUT4="$(tc -s qdisc show dev dummy0 | grep '^ Sent')" # Log the output after the test echo "${OUT1}" @@ -53,7 +53,7 @@ echo "${OUT3}" echo "${OUT4}" # Test the output for expected values -echo "${OUT1}" | grep -q '0\ pkt\ (dropped\ 10' || die "unexpected drop count at 1" -echo "${OUT2}" | grep -q '0\ pkt\ (dropped\ 30' || die "unexpected drop count at 2" -echo "${OUT3}" | grep -q '0\ pkt\ (dropped\ 40' || die "unexpected drop count at 3" -echo "${OUT4}" | grep -q '20\ pkt\ (dropped\ 40' || die "unexpected accept count at 4" +echo "${OUT1}" | grep -q '0 pkt (dropped 10' || die "unexpected drop count at 1" +echo "${OUT2}" | grep -q '0 pkt (dropped 30' || die "unexpected drop count at 2" +echo "${OUT3}" | grep -q '0 pkt (dropped 40' || die "unexpected drop count at 3" +echo "${OUT4}" | grep -q '20 pkt (dropped 40' || die "unexpected accept count at 4" From e1da0e1859c612ca81d05518cb908642c00c2e93 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Tue, 24 Mar 2026 19:08:24 +0100 Subject: [PATCH 0959/1561] net: dsa: microchip: Drop unnecessary check in ksz9477 PCS setup The ksz_dev_ops .pcs_create() is called under the assumption that the switch has a PCS port : if (ksz_has_sgmii_port(dev) && dev->dev_ops->pcs_create) { ret = dev->dev_ops->pcs_create(dev); [...] } The KSZ9477 implementation of .pcs_create() does the same check on ksz_has_sgmii_port(), and protects the entire function with it. Drop it, saving a level of indentation and increasing readability. Signed-off-by: Maxime Chevallier Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20260324180826.524327-2-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz9477.c | 49 ++++++++++++++--------------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 5416016b33e0..30d8c0146bbb 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -310,36 +310,33 @@ static int ksz9477_pcs_write(struct mii_bus *bus, int phy, int mmd, int reg, int ksz9477_pcs_create(struct ksz_device *dev) { - /* This chip has a SGMII port. */ - if (ksz_has_sgmii_port(dev)) { - int port = ksz_get_sgmii_port(dev); - struct ksz_port *p = &dev->ports[port]; - struct phylink_pcs *pcs; - struct mii_bus *bus; - int ret; + int port = ksz_get_sgmii_port(dev); + struct ksz_port *p = &dev->ports[port]; + struct phylink_pcs *pcs; + struct mii_bus *bus; + int ret; - bus = devm_mdiobus_alloc(dev->dev); - if (!bus) - return -ENOMEM; + bus = devm_mdiobus_alloc(dev->dev); + if (!bus) + return -ENOMEM; - bus->name = "ksz_pcs_mdio_bus"; - snprintf(bus->id, MII_BUS_ID_SIZE, "%s-pcs", - dev_name(dev->dev)); - bus->read_c45 = &ksz9477_pcs_read; - bus->write_c45 = &ksz9477_pcs_write; - bus->parent = dev->dev; - bus->phy_mask = ~0; - bus->priv = dev; + bus->name = "ksz_pcs_mdio_bus"; + snprintf(bus->id, MII_BUS_ID_SIZE, "%s-pcs", + dev_name(dev->dev)); + bus->read_c45 = &ksz9477_pcs_read; + bus->write_c45 = &ksz9477_pcs_write; + bus->parent = dev->dev; + bus->phy_mask = ~0; + bus->priv = dev; - ret = devm_mdiobus_register(dev->dev, bus); - if (ret) - return ret; + ret = devm_mdiobus_register(dev->dev, bus); + if (ret) + return ret; - pcs = xpcs_create_pcs_mdiodev(bus, 0); - if (IS_ERR(pcs)) - return PTR_ERR(pcs); - p->pcs = pcs; - } + pcs = xpcs_create_pcs_mdiodev(bus, 0); + if (IS_ERR(pcs)) + return PTR_ERR(pcs); + p->pcs = pcs; return 0; } From 588cd4e7d22cf82081865a6677a468c88d93b4ba Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Tue, 24 Mar 2026 19:08:25 +0100 Subject: [PATCH 0960/1561] net: dsa: microchip: drop an outdated comment about SGMII support SGMII support has been added to ksz9477, we can drop the comment saying that it'll be added later. Signed-off-by: Maxime Chevallier Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20260324180826.524327-3-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz9477.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 30d8c0146bbb..d3c23dcaea8c 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -525,7 +525,7 @@ int ksz9477_r_phy(struct ksz_device *dev, u16 addr, u16 reg, u16 *data) * A fixed PHY can be setup in the device tree, but this function is * still called for that port during initialization. * For RGMII PHY there is no way to access it so the fixed PHY should - * be used. For SGMII PHY the supporting code will be added later. + * be used. */ if (!dev->info->internal_phy[addr]) { struct ksz_port *p = &dev->ports[addr]; From fe3e54253f0b04ec9e85d46e10aadbdbb31d29b2 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 24 Mar 2026 11:15:54 -0400 Subject: [PATCH 0961/1561] virtio_net: sync RX buffer before reading the header receive_buf() reads the virtio header through buf before page_pool_dma_sync_for_cpu() runs in receive_small() or receive_mergeable(). The header buffer is thus unsynchronized at the point where flags and, for mergeable buffers, num_buffers are consumed. Omar Elghoul reported that on s390x Secure Execution this showed up as greatly reduced virtio-net performance together with "bad gso" and "bad csum" messages in dmesg. This is because with SE sync actually copies data, so the header is uninitialized. Move the sync into receive_buf() so the header is synchronized before any access through buf. Tool use: Cursor with GPT-5.4 drafted the initial code move from prompt: "in drivers/net/virtio_net.c, move page_pool_dma_sync_for_cpu on receive path to before memory is accessed through buf". The result and the commit log were reviewed and edited manually. Fixes: 24fbd3967f3f ("virtio_net: add page_pool support for buffer allocation") Reported-by: Omar Elghoul Tested-by: Srikanth Aithal Tested-by: Omar Elghoul Link: https://lore.kernel.org/r/20260323150136.14452-1-oelghoul@linux.ibm.com Signed-off-by: Michael S. Tsirkin Tested-by: Vishwanath Seshagiri Acked-by: Jason Wang Link: https://patch.msgid.link/f4caa9be9e5addae7851c012cab0a733be7f0974.1774365273.git.mst@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index bce885dc234b..9cf240afe81e 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1956,13 +1956,6 @@ static struct sk_buff *receive_small(struct net_device *dev, */ buf -= VIRTNET_RX_PAD + xdp_headroom; - if (rq->use_page_pool_dma) { - int offset = buf - page_address(page) + - VIRTNET_RX_PAD + xdp_headroom; - - page_pool_dma_sync_for_cpu(rq->page_pool, page, offset, len); - } - len -= vi->hdr_len; u64_stats_add(&stats->bytes, len); @@ -2398,9 +2391,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, head_skb = NULL; - if (rq->use_page_pool_dma) - page_pool_dma_sync_for_cpu(rq->page_pool, page, offset, len); - u64_stats_add(&stats->bytes, len - vi->hdr_len); if (check_mergeable_len(dev, ctx, len)) @@ -2563,6 +2553,16 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, return; } + /* Sync the memory before touching anything through buf, + * unless virtio core did it already. + */ + if (rq->use_page_pool_dma) { + struct page *page = virt_to_head_page(buf); + int offset = buf - page_address(page); + + page_pool_dma_sync_for_cpu(rq->page_pool, page, offset, len); + } + /* About the flags below: * 1. Save the flags early, as the XDP program might overwrite them. * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID From 6a539eee855cbfe9c32507c70003b7710604fcfb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 Mar 2026 21:24:39 +0000 Subject: [PATCH 0962/1561] tcp: tcp_vegas: use tcp_vegas_cwnd_event_tx_start() While net/ipv4/tcp_yeah.c is correctly setting .cwnd_event_tx_start to tcp_vegas_cwnd_event_tx_start(), I forgot to do the same in tcp_vegas.c Fixes: d1e59a469737 ("tcp: add cwnd_event_tx_start to tcp_congestion_ops") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260325212440.4146579-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_vegas.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index cf12fb6be079..950a66966059 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -319,6 +319,7 @@ static struct tcp_congestion_ops tcp_vegas __read_mostly = { .pkts_acked = tcp_vegas_pkts_acked, .set_state = tcp_vegas_state, .cwnd_event = tcp_vegas_cwnd_event, + .cwnd_event_tx_start = tcp_vegas_cwnd_event_tx_start, .get_info = tcp_vegas_get_info, .owner = THIS_MODULE, From f8844dfeeae88db074146e3f8a865b5565f61b40 Mon Sep 17 00:00:00 2001 From: Di Zhu Date: Mon, 23 Mar 2026 12:17:30 +0800 Subject: [PATCH 0963/1561] virtio-net: enable NETIF_F_GRO_HW only if GRO-related offloads are supported Negotiating VIRTIO_NET_F_CTRL_GUEST_OFFLOADS indicates the device allows control over offload support, but the offloads that can be controlled may have nothing to do with GRO (e.g., if neither GUEST_TSO4 nor GUEST_TSO6 is supported). In such a setup, reporting NETIF_F_GRO_HW as available for the device is too optimistic and misleading to the user. Improve the situation by masking off NETIF_F_GRO_HW unless the device possesses actual GRO-related offload capabilities. Out of an abundance of caution, this does not change the current behaviour for hardware with just v6 or just v4 GRO: current interfaces do not allow distinguishing between v6/v4 GRO, so we can't expose them to userspace precisely. Signed-off-by: Di Zhu Acked-by: Jason Wang Link: https://patch.msgid.link/20260323041730.986351-1-zhud@hygon.cn Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 9cf240afe81e..0fcb3f8486c3 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -6800,8 +6800,6 @@ static int virtnet_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) dev->features |= NETIF_F_GRO_HW; - if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) - dev->hw_features |= NETIF_F_GRO_HW; dev->vlan_features = dev->features; dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | @@ -6994,6 +6992,19 @@ static int virtnet_probe(struct virtio_device *vdev) enable_rx_mode_work(vi); + for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) { + unsigned int fbit; + + fbit = virtio_offload_to_feature(guest_offloads[i]); + if (virtio_has_feature(vi->vdev, fbit)) + set_bit(guest_offloads[i], &vi->guest_offloads); + } + vi->guest_offloads_capable = vi->guest_offloads; + + if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) && + (vi->guest_offloads_capable & GUEST_OFFLOAD_GRO_HW_MASK)) + dev->hw_features |= NETIF_F_GRO_HW; + /* serialize netdev register + virtio_device_ready() with ndo_open() */ rtnl_lock(); @@ -7076,15 +7087,6 @@ static int virtnet_probe(struct virtio_device *vdev) netif_carrier_on(dev); } - for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) { - unsigned int fbit; - - fbit = virtio_offload_to_feature(guest_offloads[i]); - if (virtio_has_feature(vi->vdev, fbit)) - set_bit(guest_offloads[i], &vi->guest_offloads); - } - vi->guest_offloads_capable = vi->guest_offloads; - rtnl_unlock(); err = virtnet_cpu_notif_add(vi); From a800398e746f8c9010c626a71d92a05b708f7622 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 24 Mar 2026 10:05:40 +0000 Subject: [PATCH 0964/1561] net: stmmac: remove axi_kbbe, axi_mb and axi_rb members axi_kbbe, axi_mb and axi_rb are all written, but nothing ever reads their values. Remove the code that sets these and the struct members. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w4ydo-0000000Dlpb-34jd@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c | 1 - drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 3 --- include/linux/stmmac.h | 3 --- 3 files changed, 7 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c index d245546b90db..02c786ce5dd4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-motorcomm.c @@ -231,7 +231,6 @@ motorcomm_default_plat_data(struct pci_dev *pdev) plat->axi->axi_wr_osr_lmt = 1; plat->axi->axi_rd_osr_lmt = 1; - plat->axi->axi_mb = true; plat->axi->axi_blen_regval = DMA_AXI_BLEN4 | DMA_AXI_BLEN8 | DMA_AXI_BLEN16 | DMA_AXI_BLEN32; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 545b8a3425eb..5cae2aa72906 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -109,10 +109,7 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev) axi->axi_lpi_en = of_property_read_bool(np, "snps,lpi_en"); axi->axi_xit_frm = of_property_read_bool(np, "snps,xit_frm"); - axi->axi_kbbe = of_property_read_bool(np, "snps,kbbe"); axi->axi_fb = of_property_read_bool(np, "snps,fb"); - axi->axi_mb = of_property_read_bool(np, "snps,mb"); - axi->axi_rb = of_property_read_bool(np, "snps,rb"); if (of_property_read_u32(np, "snps,wr_osr_lmt", &axi->axi_wr_osr_lmt)) axi->axi_wr_osr_lmt = 1; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 5b2bece81448..eaaee329ef9d 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -133,10 +133,7 @@ struct stmmac_axi { u32 axi_blen_regval; bool axi_lpi_en; bool axi_xit_frm; - bool axi_kbbe; bool axi_fb; - bool axi_mb; - bool axi_rb; }; struct stmmac_rxq_cfg { From af0331e1ac51f48d0cc6bb09547b8c8ae467a019 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 24 Mar 2026 10:05:45 +0000 Subject: [PATCH 0965/1561] dt-bindings: remove unimplemented AXI snps,kbbe snps,mb and snps,rb Remove the AXI snps,kbbe snps,mb and snps,rb properties as they have not been used, and although the driver parses these, the code hasn't ever used the parsed result. This parsing has now been removed. These were introduced by commit afea03656add ("stmmac: rework DMA bus setting and introduce new platform AXI structure"). Signed-off-by: Russell King (Oracle) Acked-by: Conor Dooley Link: https://patch.msgid.link/E1w4ydt-0000000Dlph-3WvI@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/net/snps,dwmac.yaml | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index c25903c74484..2449311c6d28 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -204,11 +204,8 @@ properties: * snps,xit_frm, unlock on WoL * snps,wr_osr_lmt, max write outstanding req. limit * snps,rd_osr_lmt, max read outstanding req. limit - * snps,kbbe, do not cross 1KiB boundary. * snps,blen, this is a vector of supported burst length. * snps,fb, fixed-burst - * snps,mb, mixed-burst - * snps,rb, rebuild INCRx Burst snps,mtl-rx-config: $ref: /schemas/types.yaml#/definitions/phandle @@ -588,11 +585,6 @@ properties: description: max read outstanding req. limit - snps,kbbe: - $ref: /schemas/types.yaml#/definitions/flag - description: - do not cross 1KiB boundary. - snps,blen: $ref: /schemas/types.yaml#/definitions/uint32-array description: @@ -605,16 +597,6 @@ properties: description: fixed-burst - snps,mb: - $ref: /schemas/types.yaml#/definitions/flag - description: - mixed-burst - - snps,rb: - $ref: /schemas/types.yaml#/definitions/flag - description: - rebuild INCRx Burst - required: - compatible - reg From 552994294fe27b42a6a735b0388029b45d776b38 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Wed, 25 Mar 2026 15:18:54 +0800 Subject: [PATCH 0966/1561] tcp: Fix inconsistent indenting warning Suppress such warning reported by test robot: include/net/tcp.h:1449 tcp_ca_event() warn: inconsistent indenting Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202603251430.gQ3VuiKV-lkp@intel.com/ Signed-off-by: Jiayuan Chen Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260325071854.805-1-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 39ff4cf3c810..565943c34b7e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1444,7 +1444,7 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) const struct inet_connection_sock *icsk = inet_csk(sk); if (event == CA_EVENT_TX_START) { - if (icsk->icsk_ca_ops->cwnd_event_tx_start) + if (icsk->icsk_ca_ops->cwnd_event_tx_start) icsk->icsk_ca_ops->cwnd_event_tx_start(sk); return; } From e4cf6087cab382c7031e6b436ec55202fa9f2d7b Mon Sep 17 00:00:00 2001 From: Alexander Wilhelm Date: Thu, 26 Mar 2026 08:17:52 +0100 Subject: [PATCH 0967/1561] net: qrtr: fix endian handling of confirm_rx field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert confirm_rx to little endian when enqueueing and convert it back on receive. This fixes control flow on big endian hosts, little endian is unaffected. On transmit, store confirm_rx as __le32 using cpu_to_le32(). On receive, apply le32_to_cpu() before using the value. !! ensures the value is 0 or 1 in native endianness, so the conversion isn’t strictly required here, but it is kept for consistency and clarity. Reviewed-by: Manivannan Sadhasivam Signed-off-by: Alexander Wilhelm Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/qrtr/af_qrtr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c index 55fd2dd37588..bea1f1720e7f 100644 --- a/net/qrtr/af_qrtr.c +++ b/net/qrtr/af_qrtr.c @@ -365,7 +365,7 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb, } hdr->size = cpu_to_le32(len); - hdr->confirm_rx = !!confirm_rx; + hdr->confirm_rx = cpu_to_le32(!!confirm_rx); rc = skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr)); @@ -466,7 +466,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) cb->type = le32_to_cpu(v1->type); cb->src_node = le32_to_cpu(v1->src_node_id); cb->src_port = le32_to_cpu(v1->src_port_id); - cb->confirm_rx = !!v1->confirm_rx; + cb->confirm_rx = !!le32_to_cpu(v1->confirm_rx); cb->dst_node = le32_to_cpu(v1->dst_node_id); cb->dst_port = le32_to_cpu(v1->dst_port_id); From d748047af1355df044faf8df0eedf0ef75f87de8 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 25 Mar 2026 13:36:00 -0400 Subject: [PATCH 0968/1561] ptr_ring: disable KCSAN warnings Eric Dumazet reported KCSAN warnings: BUG: KCSAN: data-race in pfifo_fast_dequeue / pfifo_fast_enqueue write to 0xffff88811d5ccc00 of 8 bytes by interrupt on cpu 0: __ptr_ring_zero_tail include/linux/ptr_ring.h:259 [inline] __ptr_ring_discard_one include/linux/ptr_ring.h:291 [inline] __ptr_ring_consume include/linux/ptr_ring.h:311 [inline] __skb_array_consume include/linux/skb_array.h:98 [inline] pfifo_fast_dequeue+0x770/0x8f0 net/sched/sch_generic.c:770 dequeue_skb net/sched/sch_generic.c:297 [inline] qdisc_restart net/sched/sch_generic.c:402 [inline] __qdisc_run+0x189/0xc80 net/sched/sch_generic.c:420 qdisc_run include/net/pkt_sched.h:120 [inline] net_tx_action+0x379/0x590 net/core/dev.c:5793 handle_softirqs+0xb9/0x280 kernel/softirq.c:622 do_softirq+0x45/0x60 kernel/softirq.c:523 __local_bh_enable_ip+0x70/0x80 kernel/softirq.c:450 local_bh_enable include/linux/bottom_half.h:33 [inline] bpf_test_run+0x2db/0x620 net/bpf/test_run.c:426 bpf_prog_test_run_skb+0x9a4/0xef0 net/bpf/test_run.c:1159 bpf_prog_test_run+0x204/0x340 kernel/bpf/syscall.c:4721 __sys_bpf+0x52e/0x7e0 kernel/bpf/syscall.c:6246 __do_sys_bpf kernel/bpf/syscall.c:6341 [inline] __se_sys_bpf kernel/bpf/syscall.c:6339 [inline] __x64_sys_bpf+0x41/0x50 kernel/bpf/syscall.c:6339 x64_sys_call+0x10cb/0x3020 arch/x86/include/generated/asm/syscalls_64.h:322 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0x12c/0x370 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f read to 0xffff88811d5ccc00 of 8 bytes by task 22632 on cpu 1: __ptr_ring_produce include/linux/ptr_ring.h:106 [inline] ptr_ring_produce include/linux/ptr_ring.h:129 [inline] skb_array_produce include/linux/skb_array.h:44 [inline] pfifo_fast_enqueue+0xd5/0x2c0 net/sched/sch_generic.c:741 dev_qdisc_enqueue net/core/dev.c:4144 [inline] __dev_xmit_skb net/core/dev.c:4188 [inline] __dev_queue_xmit+0x6a4/0x1f20 net/core/dev.c:4795 dev_queue_xmit include/linux/netdevice.h:3384 [inline] __bpf_tx_skb net/core/filter.c:2153 [inline] __bpf_redirect_common net/core/filter.c:2197 [inline] __bpf_redirect+0x862/0x990 net/core/filter.c:2204 ____bpf_clone_redirect net/core/filter.c:2487 [inline] bpf_clone_redirect+0x20c/0x290 net/core/filter.c:2450 bpf_prog_53f18857bc887b09+0x22/0x2a bpf_dispatcher_nop_func include/linux/bpf.h:1402 [inline] __bpf_prog_run include/linux/filter.h:723 [inline] bpf_prog_run include/linux/filter.h:730 [inline] bpf_test_run+0x29d/0x620 net/bpf/test_run.c:423 bpf_prog_test_run_skb+0x9a4/0xef0 net/bpf/test_run.c:1159 bpf_prog_test_run+0x204/0x340 kernel/bpf/syscall.c:4721 __sys_bpf+0x52e/0x7e0 kernel/bpf/syscall.c:6246 __do_sys_bpf kernel/bpf/syscall.c:6341 [inline] __se_sys_bpf kernel/bpf/syscall.c:6339 [inline] __x64_sys_bpf+0x41/0x50 kernel/bpf/syscall.c:6339 x64_sys_call+0x10cb/0x3020 arch/x86/include/generated/asm/syscalls_64.h:322 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0x12c/0x370 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f value changed: 0xffff888104a93a00 -> 0x0000000000000000 Reported by Kernel Concurrency Sanitizer on: CPU: 1 UID: 0 PID: 22632 Comm: syz.0.4135 Tainted: G W syzkaller #0 PREEMPT(full) Tainted: [W]=WARN Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/24/2026 There is no race on ring accesses: reading/writing a partial pointer would be fine, because the reading is done by the producer which merely cares about NULL/non NULL. Document and disable the warnings using data_race(). Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/dd3984b3bce9df3591927f927668cb31cc7ecf34.1774460059.git.mst@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/ptr_ring.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 534531807d95..d2c3629bbe45 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -48,7 +48,7 @@ struct ptr_ring { */ static inline bool __ptr_ring_full(struct ptr_ring *r) { - return r->queue[r->producer]; + return data_race(r->queue[r->producer]); } static inline bool ptr_ring_full(struct ptr_ring *r) @@ -103,7 +103,7 @@ static inline bool ptr_ring_full_bh(struct ptr_ring *r) */ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) { - if (unlikely(!r->size) || r->queue[r->producer]) + if (unlikely(!r->size) || data_race(r->queue[r->producer])) return -ENOSPC; /* Make sure the pointer we are storing points to a valid data. */ @@ -194,7 +194,7 @@ static inline void *__ptr_ring_peek(struct ptr_ring *r) static inline bool __ptr_ring_empty(struct ptr_ring *r) { if (likely(r->size)) - return !r->queue[READ_ONCE(r->consumer_head)]; + return !data_race(r->queue[READ_ONCE(r->consumer_head)]); return true; } @@ -256,7 +256,7 @@ static inline void __ptr_ring_zero_tail(struct ptr_ring *r, int consumer_head) * besides the first one until we write out all entries. */ while (likely(head > r->consumer_tail)) - r->queue[--head] = NULL; + data_race(r->queue[--head] = NULL); r->consumer_tail = consumer_head; } From b8f9410aefc15e55304788b958a796d22bcabb7e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 25 Mar 2026 16:28:01 +0100 Subject: [PATCH 0969/1561] mlx5: shd: Gracefully avoid shared devlink creation when no usable SN is found On some HW, not even fall-back "SERIALNO" is found in VPD data. Unify the behavior with the case there are no VPD data at all and avoid creation of shared devlink instance. Fixes: 2a8c8a03f306 ("net/mlx5: Add a shared devlink instance for PFs on same chip") Reported-by: Adam Young Closes: https://lore.kernel.org/all/bab5b6bc-aa42-4af1-80d1-e56bcef06bc2@amperemail.onmicrosoft.com/ Reported-by: Ben Copeland Closes: https://lore.kernel.org/all/20260324151014.860376-1-ben.copeland@linaro.org/ Signed-off-by: Jiri Pirko Tested-by: Ben Copeland Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260325152801.236343-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c index bc33f95302df..b925364765ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c @@ -33,7 +33,7 @@ int mlx5_shd_init(struct mlx5_core_dev *dev) start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size, PCI_VPD_RO_KEYWORD_SERIALNO, &kw_len); if (start < 0) - return -ENOENT; + return 0; /* No usable serial number found, ignore. */ } sn = kstrndup(vpd_data + start, kw_len, GFP_KERNEL); if (!sn) From fb4b4a05aeeb8b0f253c5ddce21f4635dadc9550 Mon Sep 17 00:00:00 2001 From: Erni Sri Satya Vennela Date: Wed, 25 Mar 2026 11:04:17 -0700 Subject: [PATCH 0970/1561] net: mana: Use at least SZ_4K in doorbell ID range check mana_gd_ring_doorbell() accesses offsets up to DOORBELL_OFFSET_EQ (0xFF8) + 8 bytes = 4KB within each doorbell page. A db_page_size smaller than SZ_4K is fundamentally incompatible with the driver: doorbell pages would overlap and the device cannot function correctly. Validate db_page_size at the source and fail the probe early if the value is below SZ_4K. This ensures the doorbell ID range check in mana_gd_register_device() can rely on db_page_size being valid. Fixes: 89fe91c65992 ("net: mana: hardening: Validate doorbell ID from GDMA_REGISTER_DEVICE response") Signed-off-by: Erni Sri Satya Vennela Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260325180423.1923060-1-ernis@linux.microsoft.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/microsoft/mana/gdma_main.c | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 2ba1fa3336f9..43741cd35af8 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +47,18 @@ static int mana_gd_init_pf_regs(struct pci_dev *pdev) u64 sriov_base_off; gc->db_page_size = mana_gd_r32(gc, GDMA_PF_REG_DB_PAGE_SIZE) & 0xFFFF; + + /* mana_gd_ring_doorbell() accesses offsets up to DOORBELL_OFFSET_EQ + * (0xFF8) + 8 bytes = 4KB within each doorbell page, so the page + * size must be at least SZ_4K. + */ + if (gc->db_page_size < SZ_4K) { + dev_err(gc->dev, + "Doorbell page size %llu too small (min %u)\n", + gc->db_page_size, SZ_4K); + return -EPROTO; + } + gc->db_page_off = mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF); /* Validate doorbell offset is within BAR0 */ @@ -73,6 +86,18 @@ static int mana_gd_init_vf_regs(struct pci_dev *pdev) struct gdma_context *gc = pci_get_drvdata(pdev); gc->db_page_size = mana_gd_r32(gc, GDMA_REG_DB_PAGE_SIZE) & 0xFFFF; + + /* mana_gd_ring_doorbell() accesses offsets up to DOORBELL_OFFSET_EQ + * (0xFF8) + 8 bytes = 4KB within each doorbell page, so the page + * size must be at least SZ_4K. + */ + if (gc->db_page_size < SZ_4K) { + dev_err(gc->dev, + "Doorbell page size %llu too small (min %u)\n", + gc->db_page_size, SZ_4K); + return -EPROTO; + } + gc->db_page_off = mana_gd_r64(gc, GDMA_REG_DB_PAGE_OFFSET); /* Validate doorbell offset is within BAR0 */ From 187b00a26679ae58a79f56c0024df1e3dbd7dff0 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 25 Mar 2026 23:00:02 +0200 Subject: [PATCH 0971/1561] net: stmmac: provide flag to disable EEE Some platforms have problems when EEE is enabled, and thus need a way to disable stmmac EEE support. Add a flag before the other LPI related flags which tells stmmac to avoid populating the phylink LPI capabilities, which causes phylink to call phy_disable_eee() for any PHY that is attached to the affected phylink instance. iMX8MP is an example - the lpi_intr_o signal is wired to an OR gate along with the main dwmac interrupts. Since lpi_intr_o is synchronous to the receive clock domain, and takes four clock cycles to clear, this leads to interrupt storms as the interrupt remains asserted for some time after the LPI control and status register is read. This problem becomes worse when the receive clock from the PHY stops when the receive path enters LPI state - which means that lpi_intr_o can not deassert until the clock restarts. Since the LPI state of the receive path depends on the link partner, this is out of our control. We could disable RX clock stop at the PHY, but that doesn't get around the slow-to-deassert lpi_intr_o mentioned in the above paragraph. Previously, iMX8MP worked around this by disabling gigabit EEE, but this is insufficient - the problem is also visible at 100M speeds, where the receive clock is slower. There is extensive discussion and investigation in the thread linked below, the result of which is summarised in this commit message. Reported-by: Laurent Pinchart Closes: https://lore.kernel.org/r/20251026122905.29028-1-laurent.pinchart@ideasonboard.com Signed-off-by: Russell King (Oracle) Tested-by: Ovidiu Panait Signed-off-by: Laurent Pinchart Reviewed-by: Laurent Pinchart Reviewed-by: Kieran Bingham Link: https://patch.msgid.link/20260325210003.2752013-2-laurent.pinchart@ideasonboard.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 7 ++++++- include/linux/stmmac.h | 13 +++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 9b6b49331639..ce51b9c22129 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1438,7 +1438,12 @@ static int stmmac_phylink_setup(struct stmmac_priv *priv) config->supported_interfaces, pcs->supported_interfaces); - if (priv->dma_cap.eee) { + /* Some platforms, e.g. iMX8MP, wire lpi_intr_o to the same interrupt + * used for stmmac's main interrupts, which leads to interrupt storms. + * STMMAC_FLAG_EEE_DISABLE allows EEE to be disabled on such platforms. + */ + if (priv->dma_cap.eee && + !(priv->plat->flags & STMMAC_FLAG_EEE_DISABLE)) { /* The GMAC 3.74a databook states that EEE is only supported * in MII, GMII, and RGMII interfaces. */ diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index eaaee329ef9d..4430b967abde 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -204,12 +204,13 @@ enum dwmac_core_type { #define STMMAC_FLAG_MULTI_MSI_EN BIT(7) #define STMMAC_FLAG_EXT_SNAPSHOT_EN BIT(8) #define STMMAC_FLAG_INT_SNAPSHOT_EN BIT(9) -#define STMMAC_FLAG_RX_CLK_RUNS_IN_LPI BIT(10) -#define STMMAC_FLAG_EN_TX_LPI_CLOCKGATING BIT(11) -#define STMMAC_FLAG_EN_TX_LPI_CLK_PHY_CAP BIT(12) -#define STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY BIT(13) -#define STMMAC_FLAG_KEEP_PREAMBLE_BEFORE_SFD BIT(14) -#define STMMAC_FLAG_SERDES_SUPPORTS_2500M BIT(15) +#define STMMAC_FLAG_EEE_DISABLE BIT(10) +#define STMMAC_FLAG_RX_CLK_RUNS_IN_LPI BIT(11) +#define STMMAC_FLAG_EN_TX_LPI_CLOCKGATING BIT(12) +#define STMMAC_FLAG_EN_TX_LPI_CLK_PHY_CAP BIT(13) +#define STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY BIT(14) +#define STMMAC_FLAG_KEEP_PREAMBLE_BEFORE_SFD BIT(15) +#define STMMAC_FLAG_SERDES_SUPPORTS_2500M BIT(16) struct mac_device_info; From 394863097e3603eafe819ab4085cbd0ddf371dd9 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 25 Mar 2026 23:00:03 +0200 Subject: [PATCH 0972/1561] net: stmmac: imx: Disable EEE The i.MX8MP suffers from an interrupt storm related to the stmmac and EEE. A long and tedious analysis ([1]) concluded that the SoC wires the stmmac lpi_intr_o signal to an OR gate along with the main dwmac interrupts, which causes an interrupt storm for two reasons. First, there's a race condition due to the interrupt deassertion being synchronous to the RX clock domain: - When the PHY exits LPI mode, it restarts generating the RX clock (clk_rx_i input signal to the GMAC). - The MAC detects exit from LPI, and asserts lpi_intr_o. This triggers the ENET_EQOS interrupt. - Before the CPU has time to process the interrupt, the PHY enters LPI mode again, and stops generating the RX clock. - The CPU processes the interrupt and reads the GMAC4_LPI_CTRL_STATUS registers. This does not clear lpi_intr_o as there's no clk_rx_i. An attempt was made to fixing the issue by not stopping RX_CLK in Rx LPI state ([2]). This alleviates the symptoms but doesn't fix the issue. Since lpi_intr_o takes four RX_CLK cycles to clear, an interrupt storm can still occur during that window. In 1000T mode this is harder to notice, but slower receive clocks cause hundreds to thousands of spurious interrupts. Fix the issue by disabling EEE completely on i.MX8MP. [1] https://lore.kernel.org/all/20251026122905.29028-1-laurent.pinchart@ideasonboard.com/ [2] https://lore.kernel.org/all/20251123053518.8478-1-laurent.pinchart@ideasonboard.com/ Signed-off-by: Laurent Pinchart Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260325210003.2752013-3-laurent.pinchart@ideasonboard.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index 9d1bd72ffb73..01260dbbb698 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -325,11 +325,7 @@ static int imx_dwmac_probe(struct platform_device *pdev) return ret; } - if (data->flags & STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY) - plat_dat->flags |= STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY; - - if (data->flags & STMMAC_FLAG_KEEP_PREAMBLE_BEFORE_SFD) - plat_dat->flags |= STMMAC_FLAG_KEEP_PREAMBLE_BEFORE_SFD; + plat_dat->flags |= data->flags; /* Default TX Q0 to use TSO and rest TXQ for TBS */ for (int i = 1; i < plat_dat->tx_queues_to_use; i++) @@ -366,7 +362,8 @@ static struct imx_dwmac_ops imx8mp_dwmac_data = { .addr_width = 34, .mac_rgmii_txclk_auto_adj = false, .set_intf_mode = imx8mp_set_intf_mode, - .flags = STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY | + .flags = STMMAC_FLAG_EEE_DISABLE | + STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY | STMMAC_FLAG_KEEP_PREAMBLE_BEFORE_SFD, }; From ae3cdfd4e0b86e5d23b46b80e8b010f5392c9635 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 26 Mar 2026 22:32:31 +0200 Subject: [PATCH 0973/1561] vrf: Remove unnecessary NULL check The VRF driver always allocates an IPv4 dst entry for a VRF device and prevents the device from being registered if the allocation fails. Therefore, there is no need to check if the entry exists when tearing down a VRF device. Remove the check. Note that the same is not true for the IPv6 dst entry. Its creation can be skipped if IPv6 is administratively disabled (i.e., 'ipv6.disable=1'). Reviewed-by: Petr Machata Reviewed-by: David Ahern Signed-off-by: Ido Schimmel Link: https://patch.msgid.link/20260326203233.1128554-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vrf.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 8c009bcaa8e7..0952ab6a2571 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1007,13 +1007,11 @@ static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf) /* move dev in dst's to loopback so this VRF device can be deleted * - based on dst_ifdown */ - if (rth) { - dst = &rth->dst; - netdev_ref_replace(dst->dev, net->loopback_dev, - &dst->dev_tracker, GFP_KERNEL); - dst->dev = net->loopback_dev; - dst_release(dst); - } + dst = &rth->dst; + netdev_ref_replace(dst->dev, net->loopback_dev, + &dst->dev_tracker, GFP_KERNEL); + dst->dev = net->loopback_dev; + dst_release(dst); } static int vrf_rtable_create(struct net_device *dev) From 50504e2579c1e0379bc0ffeaec67884e1d6c9212 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 26 Mar 2026 22:32:32 +0200 Subject: [PATCH 0974/1561] vrf: Use dst_dev_put() instead of using loopback device Use dst_dev_put() to clean up the device referenced by the dst entry instead of partially open coding it. Internally, the helper uses the blackhole device instead of the loopback device. Reviewed-by: Petr Machata Reviewed-by: David Ahern Signed-off-by: Ido Schimmel Link: https://patch.msgid.link/20260326203233.1128554-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vrf.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 0952ab6a2571..bfc9ea91ac20 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -751,21 +751,13 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) { struct rt6_info *rt6 = rtnl_dereference(vrf->rt6); - struct net *net = dev_net(dev); - struct dst_entry *dst; RCU_INIT_POINTER(vrf->rt6, NULL); synchronize_rcu(); - /* move dev in dst's to loopback so this VRF device can be deleted - * - based on dst_ifdown - */ if (rt6) { - dst = &rt6->dst; - netdev_ref_replace(dst->dev, net->loopback_dev, - &dst->dev_tracker, GFP_KERNEL); - dst->dev = net->loopback_dev; - dst_release(dst); + dst_dev_put(&rt6->dst); + dst_release(&rt6->dst); } } @@ -998,20 +990,12 @@ static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev, static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf) { struct rtable *rth = rtnl_dereference(vrf->rth); - struct net *net = dev_net(dev); - struct dst_entry *dst; RCU_INIT_POINTER(vrf->rth, NULL); synchronize_rcu(); - /* move dev in dst's to loopback so this VRF device can be deleted - * - based on dst_ifdown - */ - dst = &rth->dst; - netdev_ref_replace(dst->dev, net->loopback_dev, - &dst->dev_tracker, GFP_KERNEL); - dst->dev = net->loopback_dev; - dst_release(dst); + dst_dev_put(&rth->dst); + dst_release(&rth->dst); } static int vrf_rtable_create(struct net_device *dev) From 075196489a3797c2a931de68c438e06f8303dd5c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 26 Mar 2026 22:32:33 +0200 Subject: [PATCH 0975/1561] vrf: Remove unnecessary RCU protection around dst entries During initialization of a VRF device, the VRF driver creates two dst entries (for IPv4 and IPv6). They are attached to locally generated packets that are transmitted out of the VRF ports (via the l3mdev_l3_out() hook). Their purpose is to redirect packets towards the VRF device instead of having the packets egress directly out of the VRF ports. This is useful, for example, when a queuing discipline is configured on the VRF device. In order to avoid a NULL pointer dereference, commit b0e95ccdd775 ("net: vrf: protect changes to private data with rcu") made the pointers to the dst entries RCU protected. As far as I can tell, this was needed because back then the dst entries were released (and the pointers reset to NULL) before removing the VRF ports. Later on, commit f630c38ef0d7 ("vrf: fix bug_on triggered by rx when destroying a vrf") moved the removal of the VRF ports to the VRF device's dellink() callback. As such, the tear down sequence of a VRF device looks as follows: 1. VRF ports are removed. 2. VRF device is unregistered. a. Device is closed. b. An RCU grace period passes. c. ndo_uninit() is called. i. dst entries are released. Given the above, the Tx path will always see the same fully initialized dst entries and will never race with the ndo_uninit() callback. Therefore, there is no need to make the pointers to the dst entries RCU protected. Remove it as well as the unnecessary NULL checks in the Tx path. Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Link: https://patch.msgid.link/20260326203233.1128554-4-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vrf.c | 56 ++++++++++------------------------------------- 1 file changed, 12 insertions(+), 44 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index bfc9ea91ac20..2cf2dbd1c12f 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -112,8 +112,8 @@ struct netns_vrf { }; struct net_vrf { - struct rtable __rcu *rth; - struct rt6_info __rcu *rt6; + struct rtable *rth; + struct rt6_info *rt6; #if IS_ENABLED(CONFIG_IPV6) struct fib6_table *fib6_table; #endif @@ -648,26 +648,13 @@ static struct sk_buff *vrf_ip6_out_redirect(struct net_device *vrf_dev, struct sk_buff *skb) { struct net_vrf *vrf = netdev_priv(vrf_dev); - struct dst_entry *dst = NULL; struct rt6_info *rt6; - rcu_read_lock(); - - rt6 = rcu_dereference(vrf->rt6); - if (likely(rt6)) { - dst = &rt6->dst; - dst_hold(dst); - } - - rcu_read_unlock(); - - if (unlikely(!dst)) { - vrf_tx_error(vrf_dev, skb); - return NULL; - } + rt6 = vrf->rt6; + dst_hold(&rt6->dst); skb_dst_drop(skb); - skb_dst_set(skb, dst); + skb_dst_set(skb, &rt6->dst); return skb; } @@ -750,10 +737,7 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, /* holding rtnl */ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) { - struct rt6_info *rt6 = rtnl_dereference(vrf->rt6); - - RCU_INIT_POINTER(vrf->rt6, NULL); - synchronize_rcu(); + struct rt6_info *rt6 = vrf->rt6; if (rt6) { dst_dev_put(&rt6->dst); @@ -784,7 +768,7 @@ static int vrf_rt6_create(struct net_device *dev) rt6->dst.output = vrf_output6; - rcu_assign_pointer(vrf->rt6, rt6); + vrf->rt6 = rt6; rc = 0; out: @@ -870,26 +854,13 @@ static struct sk_buff *vrf_ip_out_redirect(struct net_device *vrf_dev, struct sk_buff *skb) { struct net_vrf *vrf = netdev_priv(vrf_dev); - struct dst_entry *dst = NULL; struct rtable *rth; - rcu_read_lock(); - - rth = rcu_dereference(vrf->rth); - if (likely(rth)) { - dst = &rth->dst; - dst_hold(dst); - } - - rcu_read_unlock(); - - if (unlikely(!dst)) { - vrf_tx_error(vrf_dev, skb); - return NULL; - } + rth = vrf->rth; + dst_hold(&rth->dst); skb_dst_drop(skb); - skb_dst_set(skb, dst); + skb_dst_set(skb, &rth->dst); return skb; } @@ -989,10 +960,7 @@ static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev, /* holding rtnl */ static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf) { - struct rtable *rth = rtnl_dereference(vrf->rth); - - RCU_INIT_POINTER(vrf->rth, NULL); - synchronize_rcu(); + struct rtable *rth = vrf->rth; dst_dev_put(&rth->dst); dst_release(&rth->dst); @@ -1013,7 +981,7 @@ static int vrf_rtable_create(struct net_device *dev) rth->dst.output = vrf_output; - rcu_assign_pointer(vrf->rth, rth); + vrf->rth = rth; return 0; } From 309b905deee595619cc38719f48d63d57b8bff3d Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 25 Mar 2026 13:08:42 +0100 Subject: [PATCH 0976/1561] ipv6: convert CONFIG_IPV6 to built-in only and clean up Kconfigs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Maintaining a modular IPv6 stack offers image size savings for specific setups, this benefit is outweighed by the architectural burden it imposes on the subsystems on implementation and maintenance. Therefore, drop it. Change CONFIG_IPV6 from tristate to bool. Remove all Kconfig dependencies across the tree that explicitly checked for IPV6=m. In addition, remove MODULE_DESCRIPTION(), MODULE_ALIAS(), MODULE_AUTHOR() and MODULE_LICENSE(). This is also replacing module_init() by device_initcall(). It is not possible to use fs_initcall() as IPv4 does because that creates a race condition on IPv6 addrconf. Finally, modify the default configs from CONFIG_IPV6=m to CONFIG_IPV6=y except for m68k as according to the bloat-o-meter the image is increasing by 330KB~ and that isn't acceptable. Instead, disable IPv6 on this architecture by default. This is aligned with m68k RAM requirements and recommendations [1]. [1] http://www.linux-m68k.org/faq/ram.html Signed-off-by: Fernando Fernandez Mancera Tested-by: Ricardo B. Marlière Acked-by: Krzysztof Kozlowski # arm64 Link: https://patch.msgid.link/20260325120928.15848-2-fmancera@suse.de Signed-off-by: Jakub Kicinski --- arch/arm64/configs/defconfig | 2 +- arch/m68k/configs/amiga_defconfig | 44 +-------------------- arch/m68k/configs/apollo_defconfig | 44 +-------------------- arch/m68k/configs/atari_defconfig | 44 +-------------------- arch/m68k/configs/bvme6000_defconfig | 44 +-------------------- arch/m68k/configs/hp300_defconfig | 44 +-------------------- arch/m68k/configs/mac_defconfig | 44 +-------------------- arch/m68k/configs/multi_defconfig | 44 +-------------------- arch/m68k/configs/mvme147_defconfig | 44 +-------------------- arch/m68k/configs/mvme16x_defconfig | 44 +-------------------- arch/m68k/configs/q40_defconfig | 44 +-------------------- arch/m68k/configs/sun3_defconfig | 44 +-------------------- arch/m68k/configs/sun3x_defconfig | 44 +-------------------- drivers/infiniband/Kconfig | 1 - drivers/infiniband/hw/ocrdma/Kconfig | 2 +- drivers/infiniband/ulp/ipoib/Kconfig | 2 +- drivers/net/Kconfig | 9 ----- drivers/net/ethernet/broadcom/Kconfig | 2 +- drivers/net/ethernet/chelsio/Kconfig | 2 +- drivers/net/ethernet/mellanox/mlxsw/Kconfig | 1 - drivers/net/ethernet/netronome/Kconfig | 1 - drivers/scsi/bnx2fc/Kconfig | 1 - drivers/scsi/bnx2i/Kconfig | 1 - drivers/scsi/cxgbi/cxgb3i/Kconfig | 2 +- drivers/scsi/cxgbi/cxgb4i/Kconfig | 2 +- fs/dlm/Kconfig | 2 +- fs/gfs2/Kconfig | 2 +- net/bridge/Kconfig | 1 - net/ipv4/Kconfig | 9 ++--- net/ipv6/Kconfig | 6 +-- net/ipv6/af_inet6.c | 8 +--- net/l2tp/Kconfig | 1 - net/netfilter/Kconfig | 8 ---- net/rxrpc/Kconfig | 2 +- net/sctp/Kconfig | 1 - net/tipc/Kconfig | 1 - 36 files changed, 40 insertions(+), 557 deletions(-) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index b67d5b1fc45b..0651a771f5c1 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -140,7 +140,7 @@ CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -CONFIG_IPV6=m +CONFIG_IPV6=y CONFIG_NETFILTER=y CONFIG_BRIDGE_NETFILTER=m CONFIG_NF_CONNTRACK=m diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 31d16cba9879..c8b936bb702f 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -72,15 +72,7 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m -CONFIG_IPV6=m -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_ESP_OFFLOAD=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_ILA=m -CONFIG_IPV6_VTI=m -CONFIG_IPV6_GRE=m +# CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m @@ -96,7 +88,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m @@ -113,7 +104,6 @@ CONFIG_NFT_QUOTA=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m -CONFIG_NFT_FIB_INET=m CONFIG_NFT_XFRM=m CONFIG_NFT_SOCKET=m CONFIG_NFT_OSF=m @@ -121,8 +111,6 @@ CONFIG_NFT_TPROXY=m CONFIG_NFT_SYNPROXY=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m -CONFIG_NFT_FIB_NETDEV=m -CONFIG_NFT_REJECT_NETDEV=m CONFIG_NF_FLOW_TABLE_INET=m CONFIG_NF_FLOW_TABLE=m CONFIG_NETFILTER_XTABLES_LEGACY=y @@ -197,6 +185,7 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m +CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -218,29 +207,8 @@ CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_SRH=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_SYNPROXY=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_IP6_NF_TARGET_NPT=m CONFIG_NF_TABLES_BRIDGE=m CONFIG_NFT_BRIDGE_META=m -CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_CONNTRACK_BRIDGE=m CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m CONFIG_BRIDGE_NF_EBTABLES=m @@ -251,7 +219,6 @@ CONFIG_BRIDGE_EBT_802_3=m CONFIG_BRIDGE_EBT_AMONG=m CONFIG_BRIDGE_EBT_ARP=m CONFIG_BRIDGE_EBT_IP=m -CONFIG_BRIDGE_EBT_IP6=m CONFIG_BRIDGE_EBT_LIMIT=m CONFIG_BRIDGE_EBT_MARK=m CONFIG_BRIDGE_EBT_PKTTYPE=m @@ -270,13 +237,6 @@ CONFIG_RDS_TCP=m CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m -CONFIG_6LOWPAN=m -CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m -CONFIG_6LOWPAN_GHC_UDP=m -CONFIG_6LOWPAN_GHC_ICMPV6=m -CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m -CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m -CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index c0c419ec9a9e..fc1792495bbc 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -68,15 +68,7 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m -CONFIG_IPV6=m -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_ESP_OFFLOAD=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_ILA=m -CONFIG_IPV6_VTI=m -CONFIG_IPV6_GRE=m +# CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m @@ -92,7 +84,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m @@ -109,7 +100,6 @@ CONFIG_NFT_QUOTA=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m -CONFIG_NFT_FIB_INET=m CONFIG_NFT_XFRM=m CONFIG_NFT_SOCKET=m CONFIG_NFT_OSF=m @@ -117,8 +107,6 @@ CONFIG_NFT_TPROXY=m CONFIG_NFT_SYNPROXY=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m -CONFIG_NFT_FIB_NETDEV=m -CONFIG_NFT_REJECT_NETDEV=m CONFIG_NF_FLOW_TABLE_INET=m CONFIG_NF_FLOW_TABLE=m CONFIG_NETFILTER_XTABLES_LEGACY=y @@ -193,6 +181,7 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m +CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -214,29 +203,8 @@ CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_SRH=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_SYNPROXY=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_IP6_NF_TARGET_NPT=m CONFIG_NF_TABLES_BRIDGE=m CONFIG_NFT_BRIDGE_META=m -CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_CONNTRACK_BRIDGE=m CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m CONFIG_BRIDGE_NF_EBTABLES=m @@ -247,7 +215,6 @@ CONFIG_BRIDGE_EBT_802_3=m CONFIG_BRIDGE_EBT_AMONG=m CONFIG_BRIDGE_EBT_ARP=m CONFIG_BRIDGE_EBT_IP=m -CONFIG_BRIDGE_EBT_IP6=m CONFIG_BRIDGE_EBT_LIMIT=m CONFIG_BRIDGE_EBT_MARK=m CONFIG_BRIDGE_EBT_PKTTYPE=m @@ -266,13 +233,6 @@ CONFIG_RDS_TCP=m CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m -CONFIG_6LOWPAN=m -CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m -CONFIG_6LOWPAN_GHC_UDP=m -CONFIG_6LOWPAN_GHC_ICMPV6=m -CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m -CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m -CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 2b7547ecc4c4..e440c596e60b 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -75,15 +75,7 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m -CONFIG_IPV6=m -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_ESP_OFFLOAD=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_ILA=m -CONFIG_IPV6_VTI=m -CONFIG_IPV6_GRE=m +# CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m @@ -99,7 +91,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m @@ -116,7 +107,6 @@ CONFIG_NFT_QUOTA=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m -CONFIG_NFT_FIB_INET=m CONFIG_NFT_XFRM=m CONFIG_NFT_SOCKET=m CONFIG_NFT_OSF=m @@ -124,8 +114,6 @@ CONFIG_NFT_TPROXY=m CONFIG_NFT_SYNPROXY=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m -CONFIG_NFT_FIB_NETDEV=m -CONFIG_NFT_REJECT_NETDEV=m CONFIG_NF_FLOW_TABLE_INET=m CONFIG_NF_FLOW_TABLE=m CONFIG_NETFILTER_XTABLES_LEGACY=y @@ -200,6 +188,7 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m +CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -221,29 +210,8 @@ CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_SRH=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_SYNPROXY=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_IP6_NF_TARGET_NPT=m CONFIG_NF_TABLES_BRIDGE=m CONFIG_NFT_BRIDGE_META=m -CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_CONNTRACK_BRIDGE=m CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m CONFIG_BRIDGE_NF_EBTABLES=m @@ -254,7 +222,6 @@ CONFIG_BRIDGE_EBT_802_3=m CONFIG_BRIDGE_EBT_AMONG=m CONFIG_BRIDGE_EBT_ARP=m CONFIG_BRIDGE_EBT_IP=m -CONFIG_BRIDGE_EBT_IP6=m CONFIG_BRIDGE_EBT_LIMIT=m CONFIG_BRIDGE_EBT_MARK=m CONFIG_BRIDGE_EBT_PKTTYPE=m @@ -273,13 +240,6 @@ CONFIG_RDS_TCP=m CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m -CONFIG_6LOWPAN=m -CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m -CONFIG_6LOWPAN_GHC_UDP=m -CONFIG_6LOWPAN_GHC_ICMPV6=m -CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m -CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m -CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 0b63787cff0d..7aa352d14363 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -65,15 +65,7 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m -CONFIG_IPV6=m -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_ESP_OFFLOAD=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_ILA=m -CONFIG_IPV6_VTI=m -CONFIG_IPV6_GRE=m +# CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m @@ -89,7 +81,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m @@ -106,7 +97,6 @@ CONFIG_NFT_QUOTA=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m -CONFIG_NFT_FIB_INET=m CONFIG_NFT_XFRM=m CONFIG_NFT_SOCKET=m CONFIG_NFT_OSF=m @@ -114,8 +104,6 @@ CONFIG_NFT_TPROXY=m CONFIG_NFT_SYNPROXY=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m -CONFIG_NFT_FIB_NETDEV=m -CONFIG_NFT_REJECT_NETDEV=m CONFIG_NF_FLOW_TABLE_INET=m CONFIG_NF_FLOW_TABLE=m CONFIG_NETFILTER_XTABLES_LEGACY=y @@ -190,6 +178,7 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m +CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -211,29 +200,8 @@ CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_SRH=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_SYNPROXY=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_IP6_NF_TARGET_NPT=m CONFIG_NF_TABLES_BRIDGE=m CONFIG_NFT_BRIDGE_META=m -CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_CONNTRACK_BRIDGE=m CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m CONFIG_BRIDGE_NF_EBTABLES=m @@ -244,7 +212,6 @@ CONFIG_BRIDGE_EBT_802_3=m CONFIG_BRIDGE_EBT_AMONG=m CONFIG_BRIDGE_EBT_ARP=m CONFIG_BRIDGE_EBT_IP=m -CONFIG_BRIDGE_EBT_IP6=m CONFIG_BRIDGE_EBT_LIMIT=m CONFIG_BRIDGE_EBT_MARK=m CONFIG_BRIDGE_EBT_PKTTYPE=m @@ -263,13 +230,6 @@ CONFIG_RDS_TCP=m CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m -CONFIG_6LOWPAN=m -CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m -CONFIG_6LOWPAN_GHC_UDP=m -CONFIG_6LOWPAN_GHC_ICMPV6=m -CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m -CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m -CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 308836b60bba..0baaf2a82c61 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -67,15 +67,7 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m -CONFIG_IPV6=m -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_ESP_OFFLOAD=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_ILA=m -CONFIG_IPV6_VTI=m -CONFIG_IPV6_GRE=m +# CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m @@ -91,7 +83,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m @@ -108,7 +99,6 @@ CONFIG_NFT_QUOTA=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m -CONFIG_NFT_FIB_INET=m CONFIG_NFT_XFRM=m CONFIG_NFT_SOCKET=m CONFIG_NFT_OSF=m @@ -116,8 +106,6 @@ CONFIG_NFT_TPROXY=m CONFIG_NFT_SYNPROXY=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m -CONFIG_NFT_FIB_NETDEV=m -CONFIG_NFT_REJECT_NETDEV=m CONFIG_NF_FLOW_TABLE_INET=m CONFIG_NF_FLOW_TABLE=m CONFIG_NETFILTER_XTABLES_LEGACY=y @@ -192,6 +180,7 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m +CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -213,29 +202,8 @@ CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_SRH=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_SYNPROXY=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_IP6_NF_TARGET_NPT=m CONFIG_NF_TABLES_BRIDGE=m CONFIG_NFT_BRIDGE_META=m -CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_CONNTRACK_BRIDGE=m CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m CONFIG_BRIDGE_NF_EBTABLES=m @@ -246,7 +214,6 @@ CONFIG_BRIDGE_EBT_802_3=m CONFIG_BRIDGE_EBT_AMONG=m CONFIG_BRIDGE_EBT_ARP=m CONFIG_BRIDGE_EBT_IP=m -CONFIG_BRIDGE_EBT_IP6=m CONFIG_BRIDGE_EBT_LIMIT=m CONFIG_BRIDGE_EBT_MARK=m CONFIG_BRIDGE_EBT_PKTTYPE=m @@ -265,13 +232,6 @@ CONFIG_RDS_TCP=m CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m -CONFIG_6LOWPAN=m -CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m -CONFIG_6LOWPAN_GHC_UDP=m -CONFIG_6LOWPAN_GHC_ICMPV6=m -CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m -CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m -CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 97e108c0d24f..0cbbfe5aeaec 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -66,15 +66,7 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m -CONFIG_IPV6=m -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_ESP_OFFLOAD=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_ILA=m -CONFIG_IPV6_VTI=m -CONFIG_IPV6_GRE=m +# CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m @@ -90,7 +82,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m @@ -107,7 +98,6 @@ CONFIG_NFT_QUOTA=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m -CONFIG_NFT_FIB_INET=m CONFIG_NFT_XFRM=m CONFIG_NFT_SOCKET=m CONFIG_NFT_OSF=m @@ -115,8 +105,6 @@ CONFIG_NFT_TPROXY=m CONFIG_NFT_SYNPROXY=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m -CONFIG_NFT_FIB_NETDEV=m -CONFIG_NFT_REJECT_NETDEV=m CONFIG_NF_FLOW_TABLE_INET=m CONFIG_NF_FLOW_TABLE=m CONFIG_NETFILTER_XTABLES_LEGACY=y @@ -191,6 +179,7 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m +CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -212,29 +201,8 @@ CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_SRH=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_SYNPROXY=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_IP6_NF_TARGET_NPT=m CONFIG_NF_TABLES_BRIDGE=m CONFIG_NFT_BRIDGE_META=m -CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_CONNTRACK_BRIDGE=m CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m CONFIG_BRIDGE_NF_EBTABLES=m @@ -245,7 +213,6 @@ CONFIG_BRIDGE_EBT_802_3=m CONFIG_BRIDGE_EBT_AMONG=m CONFIG_BRIDGE_EBT_ARP=m CONFIG_BRIDGE_EBT_IP=m -CONFIG_BRIDGE_EBT_IP6=m CONFIG_BRIDGE_EBT_LIMIT=m CONFIG_BRIDGE_EBT_MARK=m CONFIG_BRIDGE_EBT_PKTTYPE=m @@ -264,13 +231,6 @@ CONFIG_RDS_TCP=m CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m -CONFIG_6LOWPAN=m -CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m -CONFIG_6LOWPAN_GHC_UDP=m -CONFIG_6LOWPAN_GHC_ICMPV6=m -CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m -CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m -CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 7e9f83af9af4..2b96f90e1a4d 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -86,15 +86,7 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m -CONFIG_IPV6=m -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_ESP_OFFLOAD=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_ILA=m -CONFIG_IPV6_VTI=m -CONFIG_IPV6_GRE=m +# CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m @@ -110,7 +102,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m @@ -127,7 +118,6 @@ CONFIG_NFT_QUOTA=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m -CONFIG_NFT_FIB_INET=m CONFIG_NFT_XFRM=m CONFIG_NFT_SOCKET=m CONFIG_NFT_OSF=m @@ -135,8 +125,6 @@ CONFIG_NFT_TPROXY=m CONFIG_NFT_SYNPROXY=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m -CONFIG_NFT_FIB_NETDEV=m -CONFIG_NFT_REJECT_NETDEV=m CONFIG_NF_FLOW_TABLE_INET=m CONFIG_NF_FLOW_TABLE=m CONFIG_NETFILTER_XTABLES_LEGACY=y @@ -211,6 +199,7 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m +CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -232,29 +221,8 @@ CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_SRH=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_SYNPROXY=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_IP6_NF_TARGET_NPT=m CONFIG_NF_TABLES_BRIDGE=m CONFIG_NFT_BRIDGE_META=m -CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_CONNTRACK_BRIDGE=m CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m CONFIG_BRIDGE_NF_EBTABLES=m @@ -265,7 +233,6 @@ CONFIG_BRIDGE_EBT_802_3=m CONFIG_BRIDGE_EBT_AMONG=m CONFIG_BRIDGE_EBT_ARP=m CONFIG_BRIDGE_EBT_IP=m -CONFIG_BRIDGE_EBT_IP6=m CONFIG_BRIDGE_EBT_LIMIT=m CONFIG_BRIDGE_EBT_MARK=m CONFIG_BRIDGE_EBT_PKTTYPE=m @@ -284,13 +251,6 @@ CONFIG_RDS_TCP=m CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m -CONFIG_6LOWPAN=m -CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m -CONFIG_6LOWPAN_GHC_UDP=m -CONFIG_6LOWPAN_GHC_ICMPV6=m -CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m -CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m -CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 2fe33271d249..b49264cec911 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -64,15 +64,7 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m -CONFIG_IPV6=m -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_ESP_OFFLOAD=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_ILA=m -CONFIG_IPV6_VTI=m -CONFIG_IPV6_GRE=m +# CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m @@ -88,7 +80,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m @@ -105,7 +96,6 @@ CONFIG_NFT_QUOTA=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m -CONFIG_NFT_FIB_INET=m CONFIG_NFT_XFRM=m CONFIG_NFT_SOCKET=m CONFIG_NFT_OSF=m @@ -113,8 +103,6 @@ CONFIG_NFT_TPROXY=m CONFIG_NFT_SYNPROXY=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m -CONFIG_NFT_FIB_NETDEV=m -CONFIG_NFT_REJECT_NETDEV=m CONFIG_NF_FLOW_TABLE_INET=m CONFIG_NF_FLOW_TABLE=m CONFIG_NETFILTER_XTABLES_LEGACY=y @@ -189,6 +177,7 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m +CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -210,29 +199,8 @@ CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_SRH=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_SYNPROXY=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_IP6_NF_TARGET_NPT=m CONFIG_NF_TABLES_BRIDGE=m CONFIG_NFT_BRIDGE_META=m -CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_CONNTRACK_BRIDGE=m CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m CONFIG_BRIDGE_NF_EBTABLES=m @@ -243,7 +211,6 @@ CONFIG_BRIDGE_EBT_802_3=m CONFIG_BRIDGE_EBT_AMONG=m CONFIG_BRIDGE_EBT_ARP=m CONFIG_BRIDGE_EBT_IP=m -CONFIG_BRIDGE_EBT_IP6=m CONFIG_BRIDGE_EBT_LIMIT=m CONFIG_BRIDGE_EBT_MARK=m CONFIG_BRIDGE_EBT_PKTTYPE=m @@ -262,13 +229,6 @@ CONFIG_RDS_TCP=m CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m -CONFIG_6LOWPAN=m -CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m -CONFIG_6LOWPAN_GHC_UDP=m -CONFIG_6LOWPAN_GHC_ICMPV6=m -CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m -CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m -CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 4308daaa7f74..96a974b0a766 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -65,15 +65,7 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m -CONFIG_IPV6=m -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_ESP_OFFLOAD=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_ILA=m -CONFIG_IPV6_VTI=m -CONFIG_IPV6_GRE=m +# CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m @@ -89,7 +81,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m @@ -106,7 +97,6 @@ CONFIG_NFT_QUOTA=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m -CONFIG_NFT_FIB_INET=m CONFIG_NFT_XFRM=m CONFIG_NFT_SOCKET=m CONFIG_NFT_OSF=m @@ -114,8 +104,6 @@ CONFIG_NFT_TPROXY=m CONFIG_NFT_SYNPROXY=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m -CONFIG_NFT_FIB_NETDEV=m -CONFIG_NFT_REJECT_NETDEV=m CONFIG_NF_FLOW_TABLE_INET=m CONFIG_NF_FLOW_TABLE=m CONFIG_NETFILTER_XTABLES_LEGACY=y @@ -190,6 +178,7 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m +CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -211,29 +200,8 @@ CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_SRH=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_SYNPROXY=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_IP6_NF_TARGET_NPT=m CONFIG_NF_TABLES_BRIDGE=m CONFIG_NFT_BRIDGE_META=m -CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_CONNTRACK_BRIDGE=m CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m CONFIG_BRIDGE_NF_EBTABLES=m @@ -244,7 +212,6 @@ CONFIG_BRIDGE_EBT_802_3=m CONFIG_BRIDGE_EBT_AMONG=m CONFIG_BRIDGE_EBT_ARP=m CONFIG_BRIDGE_EBT_IP=m -CONFIG_BRIDGE_EBT_IP6=m CONFIG_BRIDGE_EBT_LIMIT=m CONFIG_BRIDGE_EBT_MARK=m CONFIG_BRIDGE_EBT_PKTTYPE=m @@ -263,13 +230,6 @@ CONFIG_RDS_TCP=m CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m -CONFIG_6LOWPAN=m -CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m -CONFIG_6LOWPAN_GHC_UDP=m -CONFIG_6LOWPAN_GHC_ICMPV6=m -CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m -CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m -CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 36eb29ec54ee..e53361584393 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -66,15 +66,7 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m -CONFIG_IPV6=m -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_ESP_OFFLOAD=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_ILA=m -CONFIG_IPV6_VTI=m -CONFIG_IPV6_GRE=m +# CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m @@ -90,7 +82,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m @@ -107,7 +98,6 @@ CONFIG_NFT_QUOTA=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m -CONFIG_NFT_FIB_INET=m CONFIG_NFT_XFRM=m CONFIG_NFT_SOCKET=m CONFIG_NFT_OSF=m @@ -115,8 +105,6 @@ CONFIG_NFT_TPROXY=m CONFIG_NFT_SYNPROXY=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m -CONFIG_NFT_FIB_NETDEV=m -CONFIG_NFT_REJECT_NETDEV=m CONFIG_NF_FLOW_TABLE_INET=m CONFIG_NF_FLOW_TABLE=m CONFIG_NETFILTER_XTABLES_LEGACY=y @@ -191,6 +179,7 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m +CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -212,29 +201,8 @@ CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_SRH=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_SYNPROXY=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_IP6_NF_TARGET_NPT=m CONFIG_NF_TABLES_BRIDGE=m CONFIG_NFT_BRIDGE_META=m -CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_CONNTRACK_BRIDGE=m CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m CONFIG_BRIDGE_NF_EBTABLES=m @@ -245,7 +213,6 @@ CONFIG_BRIDGE_EBT_802_3=m CONFIG_BRIDGE_EBT_AMONG=m CONFIG_BRIDGE_EBT_ARP=m CONFIG_BRIDGE_EBT_IP=m -CONFIG_BRIDGE_EBT_IP6=m CONFIG_BRIDGE_EBT_LIMIT=m CONFIG_BRIDGE_EBT_MARK=m CONFIG_BRIDGE_EBT_PKTTYPE=m @@ -264,13 +231,6 @@ CONFIG_RDS_TCP=m CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m -CONFIG_6LOWPAN=m -CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m -CONFIG_6LOWPAN_GHC_UDP=m -CONFIG_6LOWPAN_GHC_ICMPV6=m -CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m -CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m -CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 524a89fa6953..af89287c1093 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -61,15 +61,7 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m -CONFIG_IPV6=m -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_ESP_OFFLOAD=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_ILA=m -CONFIG_IPV6_VTI=m -CONFIG_IPV6_GRE=m +# CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m @@ -85,7 +77,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m @@ -102,7 +93,6 @@ CONFIG_NFT_QUOTA=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m -CONFIG_NFT_FIB_INET=m CONFIG_NFT_XFRM=m CONFIG_NFT_SOCKET=m CONFIG_NFT_OSF=m @@ -110,8 +100,6 @@ CONFIG_NFT_TPROXY=m CONFIG_NFT_SYNPROXY=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m -CONFIG_NFT_FIB_NETDEV=m -CONFIG_NFT_REJECT_NETDEV=m CONFIG_NF_FLOW_TABLE_INET=m CONFIG_NF_FLOW_TABLE=m CONFIG_NETFILTER_XTABLES_LEGACY=y @@ -186,6 +174,7 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m +CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -207,29 +196,8 @@ CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_SRH=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_SYNPROXY=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_IP6_NF_TARGET_NPT=m CONFIG_NF_TABLES_BRIDGE=m CONFIG_NFT_BRIDGE_META=m -CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_CONNTRACK_BRIDGE=m CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m CONFIG_BRIDGE_NF_EBTABLES=m @@ -240,7 +208,6 @@ CONFIG_BRIDGE_EBT_802_3=m CONFIG_BRIDGE_EBT_AMONG=m CONFIG_BRIDGE_EBT_ARP=m CONFIG_BRIDGE_EBT_IP=m -CONFIG_BRIDGE_EBT_IP6=m CONFIG_BRIDGE_EBT_LIMIT=m CONFIG_BRIDGE_EBT_MARK=m CONFIG_BRIDGE_EBT_PKTTYPE=m @@ -259,13 +226,6 @@ CONFIG_RDS_TCP=m CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m -CONFIG_6LOWPAN=m -CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m -CONFIG_6LOWPAN_GHC_UDP=m -CONFIG_6LOWPAN_GHC_ICMPV6=m -CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m -CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m -CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index f4fbc65c52d9..af210e8b77f9 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -62,15 +62,7 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m -CONFIG_IPV6=m -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_ESP_OFFLOAD=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_ILA=m -CONFIG_IPV6_VTI=m -CONFIG_IPV6_GRE=m +# CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m @@ -86,7 +78,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m @@ -103,7 +94,6 @@ CONFIG_NFT_QUOTA=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m -CONFIG_NFT_FIB_INET=m CONFIG_NFT_XFRM=m CONFIG_NFT_SOCKET=m CONFIG_NFT_OSF=m @@ -111,8 +101,6 @@ CONFIG_NFT_TPROXY=m CONFIG_NFT_SYNPROXY=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m -CONFIG_NFT_FIB_NETDEV=m -CONFIG_NFT_REJECT_NETDEV=m CONFIG_NF_FLOW_TABLE_INET=m CONFIG_NF_FLOW_TABLE=m CONFIG_NETFILTER_XTABLES_LEGACY=y @@ -187,6 +175,7 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m +CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -208,29 +197,8 @@ CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_MATCH_SRH=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_SYNPROXY=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_IP6_NF_TARGET_NPT=m CONFIG_NF_TABLES_BRIDGE=m CONFIG_NFT_BRIDGE_META=m -CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_CONNTRACK_BRIDGE=m CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m CONFIG_BRIDGE_NF_EBTABLES=m @@ -241,7 +209,6 @@ CONFIG_BRIDGE_EBT_802_3=m CONFIG_BRIDGE_EBT_AMONG=m CONFIG_BRIDGE_EBT_ARP=m CONFIG_BRIDGE_EBT_IP=m -CONFIG_BRIDGE_EBT_IP6=m CONFIG_BRIDGE_EBT_LIMIT=m CONFIG_BRIDGE_EBT_MARK=m CONFIG_BRIDGE_EBT_PKTTYPE=m @@ -260,13 +227,6 @@ CONFIG_RDS_TCP=m CONFIG_L2TP=m CONFIG_BRIDGE=m CONFIG_ATALK=m -CONFIG_6LOWPAN=m -CONFIG_6LOWPAN_GHC_EXT_HDR_HOP=m -CONFIG_6LOWPAN_GHC_UDP=m -CONFIG_6LOWPAN_GHC_ICMPV6=m -CONFIG_6LOWPAN_GHC_EXT_HDR_DEST=m -CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG=m -CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m CONFIG_DNS_RESOLVER=y CONFIG_BATMAN_ADV=m # CONFIG_BATMAN_ADV_BATMAN_V is not set diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 78ac2ff5befd..23f4245f7d7d 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -4,7 +4,6 @@ menuconfig INFINIBAND depends on HAS_IOMEM && HAS_DMA depends on NET depends on INET - depends on m || IPV6 != m depends on !ALPHA select DMA_SHARED_BUFFER select IRQ_POLL diff --git a/drivers/infiniband/hw/ocrdma/Kconfig b/drivers/infiniband/hw/ocrdma/Kconfig index 54bd70bc4d1a..b50c5f507e7c 100644 --- a/drivers/infiniband/hw/ocrdma/Kconfig +++ b/drivers/infiniband/hw/ocrdma/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config INFINIBAND_OCRDMA tristate "Emulex One Connect HCA support" - depends on ETHERNET && NETDEVICES && PCI && INET && (IPV6 || IPV6=n) + depends on ETHERNET && NETDEVICES && PCI && INET select NET_VENDOR_EMULEX select BE2NET help diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig index 254e31a90a66..b5253a231bdd 100644 --- a/drivers/infiniband/ulp/ipoib/Kconfig +++ b/drivers/infiniband/ulp/ipoib/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config INFINIBAND_IPOIB tristate "IP-over-InfiniBand" - depends on NETDEVICES && INET && (IPV6 || IPV6=n) + depends on NETDEVICES && INET help Support for the IP-over-InfiniBand protocol (IPoIB). This transports IP packets over InfiniBand so you can use your IB diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index b2fd90466bab..edaab759dc50 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -41,7 +41,6 @@ if NET_CORE config BONDING tristate "Bonding driver support" depends on INET - depends on IPV6 || IPV6=n depends on TLS || TLS_DEVICE=n help Say 'Y' or 'M' if you wish to be able to 'bond' multiple Ethernet @@ -75,7 +74,6 @@ config DUMMY config WIREGUARD tristate "WireGuard secure network tunnel" depends on NET && INET - depends on IPV6 || !IPV6 select NET_UDP_TUNNEL select DST_CACHE select CRYPTO_LIB_CURVE25519 @@ -105,7 +103,6 @@ config WIREGUARD_DEBUG config OVPN tristate "OpenVPN data channel offload" depends on NET && INET - depends on IPV6 || !IPV6 select DST_CACHE select NET_UDP_TUNNEL select CRYPTO @@ -202,7 +199,6 @@ config IPVLAN_L3S config IPVLAN tristate "IP-VLAN support" depends on INET - depends on IPV6 || !IPV6 help This allows one to create virtual devices off of a main interface and packets will be delivered based on the dest L3 (IPv6/IPv4 addr) @@ -249,7 +245,6 @@ config VXLAN config GENEVE tristate "Generic Network Virtualization Encapsulation" depends on INET - depends on IPV6 || !IPV6 select NET_UDP_TUNNEL select GRO_CELLS help @@ -265,7 +260,6 @@ config GENEVE config BAREUDP tristate "Bare UDP Encapsulation" depends on INET - depends on IPV6 || !IPV6 select NET_UDP_TUNNEL select GRO_CELLS help @@ -308,7 +302,6 @@ config PFCP config AMT tristate "Automatic Multicast Tunneling (AMT)" depends on INET && IP_MULTICAST - depends on IPV6 || !IPV6 select NET_UDP_TUNNEL help This allows one to create AMT(Automatic Multicast Tunneling) @@ -480,7 +473,6 @@ config NET_VRF tristate "Virtual Routing and Forwarding (Lite)" depends on IP_MULTIPLE_TABLES depends on NET_L3_MASTER_DEV - depends on IPV6 || IPV6=n depends on IPV6_MULTIPLE_TABLES || IPV6=n help This option enables the support for mapping interfaces into VRF's. The @@ -615,7 +607,6 @@ config NETDEVSIM tristate "Simulated networking device" depends on DEBUG_FS depends on INET - depends on IPV6 || IPV6=n depends on PSAMPLE || PSAMPLE=n depends on PTP_1588_CLOCK_MOCK || PTP_1588_CLOCK_MOCK=n select NET_DEVLINK diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index 9787c1857e13..dd164acafd01 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -96,7 +96,7 @@ config BNX2 config CNIC tristate "QLogic CNIC support" - depends on PCI && (IPV6 || IPV6=n) + depends on PCI select BNX2 select UIO help diff --git a/drivers/net/ethernet/chelsio/Kconfig b/drivers/net/ethernet/chelsio/Kconfig index c931ec8cac40..96d7779cd2f0 100644 --- a/drivers/net/ethernet/chelsio/Kconfig +++ b/drivers/net/ethernet/chelsio/Kconfig @@ -68,7 +68,7 @@ config CHELSIO_T3 config CHELSIO_T4 tristate "Chelsio Communications T4/T5/T6 Ethernet support" - depends on PCI && (IPV6 || IPV6=n) && (TLS || TLS=n) + depends on PCI && (TLS || TLS=n) depends on PTP_1588_CLOCK_OPTIONAL select FW_LOADER select MDIO diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index 74f7e27b490f..2229a2694aa5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -56,7 +56,6 @@ config MLXSW_SPECTRUM depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q depends on PSAMPLE || PSAMPLE=n depends on BRIDGE || BRIDGE=n - depends on IPV6 || IPV6=n depends on NET_IPGRE || NET_IPGRE=n depends on IPV6_GRE || IPV6_GRE=n depends on VXLAN || VXLAN=n diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig index d03d6e96f730..d115d16d4649 100644 --- a/drivers/net/ethernet/netronome/Kconfig +++ b/drivers/net/ethernet/netronome/Kconfig @@ -33,7 +33,6 @@ config NFP_APP_FLOWER bool "NFP4000/NFP6000 TC Flower offload support" depends on NFP depends on NET_SWITCHDEV - depends on IPV6!=m || NFP=m default y help Enable driver support for TC Flower offload on NFP4000 and NFP6000. diff --git a/drivers/scsi/bnx2fc/Kconfig b/drivers/scsi/bnx2fc/Kconfig index 3cf7e08df809..d12eeb13384a 100644 --- a/drivers/scsi/bnx2fc/Kconfig +++ b/drivers/scsi/bnx2fc/Kconfig @@ -2,7 +2,6 @@ config SCSI_BNX2X_FCOE tristate "QLogic FCoE offload support" depends on PCI - depends on (IPV6 || IPV6=n) depends on LIBFC depends on LIBFCOE select NETDEVICES diff --git a/drivers/scsi/bnx2i/Kconfig b/drivers/scsi/bnx2i/Kconfig index 75ace2302fed..e649a04fab1d 100644 --- a/drivers/scsi/bnx2i/Kconfig +++ b/drivers/scsi/bnx2i/Kconfig @@ -3,7 +3,6 @@ config SCSI_BNX2_ISCSI tristate "QLogic NetXtreme II iSCSI support" depends on NET depends on PCI - depends on (IPV6 || IPV6=n) select SCSI_ISCSI_ATTRS select NETDEVICES select ETHERNET diff --git a/drivers/scsi/cxgbi/cxgb3i/Kconfig b/drivers/scsi/cxgbi/cxgb3i/Kconfig index e20e6f3bfe64..143e881ec77e 100644 --- a/drivers/scsi/cxgbi/cxgb3i/Kconfig +++ b/drivers/scsi/cxgbi/cxgb3i/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config SCSI_CXGB3_ISCSI tristate "Chelsio T3 iSCSI support" - depends on PCI && INET && (IPV6 || IPV6=n) + depends on PCI && INET select NETDEVICES select ETHERNET select NET_VENDOR_CHELSIO diff --git a/drivers/scsi/cxgbi/cxgb4i/Kconfig b/drivers/scsi/cxgbi/cxgb4i/Kconfig index 63c8a0f3cd0c..dd1c8ff36b00 100644 --- a/drivers/scsi/cxgbi/cxgb4i/Kconfig +++ b/drivers/scsi/cxgbi/cxgb4i/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config SCSI_CXGB4_ISCSI tristate "Chelsio T4 iSCSI support" - depends on PCI && INET && (IPV6 || IPV6=n) + depends on PCI && INET depends on PTP_1588_CLOCK_OPTIONAL depends on THERMAL || !THERMAL depends on ETHERNET diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig index b46165df5a91..fb6ba9f5a634 100644 --- a/fs/dlm/Kconfig +++ b/fs/dlm/Kconfig @@ -2,7 +2,7 @@ menuconfig DLM tristate "Distributed Lock Manager (DLM)" depends on INET - depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n) + depends on SYSFS && CONFIGFS_FS help A general purpose distributed lock manager for kernel or userspace applications. diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index 7bd231d16d4a..8beee571b6af 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig @@ -26,7 +26,7 @@ config GFS2_FS config GFS2_FS_LOCKING_DLM bool "GFS2 DLM locking" - depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \ + depends on (GFS2_FS!=n) && NET && INET && \ CONFIGFS_FS && SYSFS && (DLM=y || DLM=GFS2_FS) help Multiple node locking module for GFS2 diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig index 3c8ded7d3e84..318715c8fc9b 100644 --- a/net/bridge/Kconfig +++ b/net/bridge/Kconfig @@ -7,7 +7,6 @@ config BRIDGE tristate "802.1d Ethernet Bridging" select LLC select STP - depends on IPV6 || IPV6=n help If you say Y here, then your Linux box will be able to act as an Ethernet bridge, which means that the different Ethernet segments it diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index df922f9f5289..21e5164e30db 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -191,7 +191,7 @@ config NET_IP_TUNNEL config NET_IPGRE tristate "IP: GRE tunnels over IP" - depends on (IPV6 || IPV6=n) && NET_IPGRE_DEMUX + depends on NET_IPGRE_DEMUX select NET_IP_TUNNEL help Tunneling means encapsulating data of one protocol type within @@ -303,7 +303,6 @@ config SYN_COOKIES config NET_IPVTI tristate "Virtual (secure) IP: tunneling" - depends on IPV6 || IPV6=n select INET_TUNNEL select NET_IP_TUNNEL select XFRM @@ -439,7 +438,7 @@ config INET_TCP_DIAG config INET_UDP_DIAG tristate "UDP: socket monitoring interface" - depends on INET_DIAG && (IPV6 || IPV6=n) + depends on INET_DIAG default n help Support for UDP socket monitoring interface used by the ss tool. @@ -447,7 +446,7 @@ config INET_UDP_DIAG config INET_RAW_DIAG tristate "RAW: socket monitoring interface" - depends on INET_DIAG && (IPV6 || IPV6=n) + depends on INET_DIAG default n help Support for RAW socket monitoring interface used by the ss tool. @@ -750,7 +749,7 @@ config TCP_AO select CRYPTO select CRYPTO_LIB_UTILS select TCP_SIGPOOL - depends on 64BIT && IPV6 != m # seq-number extension needs WRITE_ONCE(u64) + depends on 64BIT # seq-number extension needs WRITE_ONCE(u64) help TCP-AO specifies the use of stronger Message Authentication Codes (MACs), protects against replays for long-lived TCP connections, and diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index b8f9a8c0302e..c024aa77f25b 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -3,9 +3,8 @@ # IPv6 configuration # -# IPv6 as module will cause a CRASH if you try to unload it menuconfig IPV6 - tristate "The IPv6 protocol" + bool "The IPv6 protocol" default y select CRYPTO_LIB_SHA1 help @@ -17,9 +16,6 @@ menuconfig IPV6 Documentation/networking/ipv6.rst and read the HOWTO at - To compile this protocol support as a module, choose M here: the - module will be called ipv6. - if IPV6 config IPV6_ROUTER_PREF diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index eb9fff86baa1..bb29b29fdcfb 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -70,10 +70,6 @@ #include "ip6_offload.h" -MODULE_AUTHOR("Cast of dozens"); -MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); -MODULE_LICENSE("GPL"); - /* The inetsw6 table contains everything that inet6_create needs to * build a new socket. */ @@ -1278,6 +1274,4 @@ out_unregister_tcp_proto: proto_unregister(&tcpv6_prot); goto out; } -module_init(inet6_init); - -MODULE_ALIAS_NETPROTO(PF_INET6); +device_initcall(inet6_init); diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig index b7856748e960..0de178d5baba 100644 --- a/net/l2tp/Kconfig +++ b/net/l2tp/Kconfig @@ -5,7 +5,6 @@ menuconfig L2TP tristate "Layer Two Tunneling Protocol (L2TP)" - depends on (IPV6 || IPV6=n) depends on INET select NET_UDP_TUNNEL help diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 6cdc994fdc8a..f3ea0cb26f36 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -249,7 +249,6 @@ config NF_CONNTRACK_FTP config NF_CONNTRACK_H323 tristate "H.323 protocol support" - depends on IPV6 || IPV6=n depends on NETFILTER_ADVANCED help H.323 is a VoIP signalling protocol from ITU-T. As one of the most @@ -589,7 +588,6 @@ config NFT_QUOTA config NFT_REJECT default m if NETFILTER_ADVANCED=n tristate "Netfilter nf_tables reject support" - depends on !NF_TABLES_INET || (IPV6!=m || m) help This option adds the "reject" expression that you can use to explicitly deny and notify via TCP reset/ICMP informational errors @@ -636,7 +634,6 @@ config NFT_XFRM config NFT_SOCKET tristate "Netfilter nf_tables socket match support" - depends on IPV6 || IPV6=n select NF_SOCKET_IPV4 select NF_SOCKET_IPV6 if NF_TABLES_IPV6 help @@ -652,7 +649,6 @@ config NFT_OSF config NFT_TPROXY tristate "Netfilter nf_tables tproxy support" - depends on IPV6 || IPV6=n select NF_DEFRAG_IPV4 select NF_DEFRAG_IPV6 if NF_TABLES_IPV6 select NF_TPROXY_IPV4 @@ -1071,7 +1067,6 @@ config NETFILTER_XT_TARGET_MASQUERADE config NETFILTER_XT_TARGET_TEE tristate '"TEE" - packet cloning to alternate destination' depends on NETFILTER_ADVANCED - depends on IPV6 || IPV6=n depends on !NF_CONNTRACK || NF_CONNTRACK depends on IP6_NF_IPTABLES || !IP6_NF_IPTABLES select NF_DUP_IPV4 @@ -1084,7 +1079,6 @@ config NETFILTER_XT_TARGET_TPROXY tristate '"TPROXY" target transparent proxying support' depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED - depends on IPV6 || IPV6=n depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n depends on IP_NF_MANGLE || NFT_COMPAT select NF_DEFRAG_IPV4 @@ -1126,7 +1120,6 @@ config NETFILTER_XT_TARGET_SECMARK config NETFILTER_XT_TARGET_TCPMSS tristate '"TCPMSS" target support' - depends on IPV6 || IPV6=n default m if NETFILTER_ADVANCED=n help This option adds a `TCPMSS' target, which allows you to alter the @@ -1581,7 +1574,6 @@ config NETFILTER_XT_MATCH_SOCKET tristate '"socket" match support' depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED - depends on IPV6 || IPV6=n depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n select NF_SOCKET_IPV4 select NF_SOCKET_IPV6 if IP6_NF_IPTABLES diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index f60b81c66078..43416b3026fb 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -25,7 +25,7 @@ if AF_RXRPC config AF_RXRPC_IPV6 bool "IPv6 support for RxRPC" - depends on (IPV6 = m && AF_RXRPC = m) || (IPV6 = y && AF_RXRPC) + depends on IPV6 help Say Y here to allow AF_RXRPC to use IPV6 UDP as well as IPV4 UDP as its network transport. diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index e947646a380c..fc989a3791b3 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -6,7 +6,6 @@ menuconfig IP_SCTP tristate "The SCTP Protocol" depends on INET - depends on IPV6 || IPV6=n select CRYPTO_LIB_SHA1 select CRYPTO_LIB_SHA256 select CRYPTO_LIB_UTILS diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index bb0d71eb02a6..18f62135e47b 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -6,7 +6,6 @@ menuconfig TIPC tristate "The TIPC Protocol" depends on INET - depends on IPV6 || IPV6=n help The Transparent Inter Process Communication (TIPC) protocol is specially designed for intra cluster communication. This protocol From 0557a34487b10f3e600a3a20b98ea164f36a3ad2 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 25 Mar 2026 13:08:43 +0100 Subject: [PATCH 0977/1561] net: remove EXPORT_IPV6_MOD() and EXPORT_IPV6_MOD_GPL() macros As IPv6 is built-in only, the macro is always evaluating to an empty one. Remove it completely from the code. Signed-off-by: Fernando Fernandez Mancera Link: https://patch.msgid.link/20260325120928.15848-3-fmancera@suse.de Signed-off-by: Jakub Kicinski --- include/net/ip.h | 8 -------- net/core/dev.c | 3 --- net/core/hotdata.c | 1 - net/core/neighbour.c | 1 - net/ipv4/inet_connection_sock.c | 2 -- net/ipv4/inet_hashtables.c | 5 ----- net/ipv4/inetpeer.c | 4 ---- net/ipv4/metrics.c | 1 - net/ipv4/ping.c | 16 ---------------- net/ipv4/syncookies.c | 4 ---- net/ipv4/tcp.c | 23 ----------------------- net/ipv4/tcp_fastopen.c | 1 - net/ipv4/tcp_input.c | 7 ------- net/ipv4/tcp_ipv4.c | 21 --------------------- net/ipv4/tcp_minisocks.c | 4 ---- net/ipv4/tcp_output.c | 6 ------ net/ipv4/tcp_timer.c | 1 - net/ipv4/udp.c | 29 ----------------------------- net/psp/psp_sock.c | 1 - 19 files changed, 138 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index f39a3787fedd..7f2fe1a8401b 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -692,14 +692,6 @@ static __inline__ void inet_reset_saddr(struct sock *sk) #endif -#if IS_MODULE(CONFIG_IPV6) -#define EXPORT_IPV6_MOD(X) EXPORT_SYMBOL(X) -#define EXPORT_IPV6_MOD_GPL(X) EXPORT_SYMBOL_GPL(X) -#else -#define EXPORT_IPV6_MOD(X) -#define EXPORT_IPV6_MOD_GPL(X) -#endif - static inline unsigned int ipv4_addr_hash(__be32 ip) { return (__force unsigned int) ip; diff --git a/net/core/dev.c b/net/core/dev.c index ae8fe706c486..90d041390e29 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1182,7 +1182,6 @@ void netdev_copy_name(struct net_device *dev, char *name) strscpy(name, dev->name, IFNAMSIZ); } while (read_seqretry(&netdev_rename_lock, seq)); } -EXPORT_IPV6_MOD_GPL(netdev_copy_name); /** * netdev_get_name - get a netdevice name, knowing its ifindex. @@ -1312,7 +1311,6 @@ struct net_device *netdev_get_by_flags_rcu(struct net *net, netdevice_tracker *t return NULL; } -EXPORT_IPV6_MOD(netdev_get_by_flags_rcu); /** * dev_valid_name - check if name is okay for network device @@ -1838,7 +1836,6 @@ void netif_disable_lro(struct net_device *dev) netdev_unlock_ops(lower_dev); } } -EXPORT_IPV6_MOD(netif_disable_lro); /** * dev_disable_gro_hw - disable HW Generic Receive Offload on a device diff --git a/net/core/hotdata.c b/net/core/hotdata.c index a6db36580817..b0f51a726a3d 100644 --- a/net/core/hotdata.c +++ b/net/core/hotdata.c @@ -27,4 +27,3 @@ struct net_hotdata net_hotdata __cacheline_aligned = { EXPORT_SYMBOL(net_hotdata); struct net_aligned_data net_aligned_data; -EXPORT_IPV6_MOD(net_aligned_data); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index c56a4e7bf790..9e12524b67fa 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -778,7 +778,6 @@ struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, return NULL; } -EXPORT_IPV6_MOD(pneigh_lookup); int pneigh_create(struct neigh_table *tbl, struct net *net, const void *pkey, struct net_device *dev, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index bfc98283b244..4ac3ae1bc1af 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -709,7 +709,6 @@ out_err: arg->err = error; return NULL; } -EXPORT_IPV6_MOD(inet_csk_accept); /* * Using different timers for retransmit, delayed acks and probes @@ -1021,7 +1020,6 @@ void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req inet_csk_reqsk_queue_drop(sk, req); reqsk_put(req); } -EXPORT_IPV6_MOD(inet_csk_reqsk_queue_drop_and_put); static void reqsk_timer_handler(struct timer_list *t) { diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 13310c72b0bf..3e795547b40c 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -758,7 +758,6 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk) } return ok; } -EXPORT_IPV6_MOD(inet_ehash_nolisten); static int inet_reuseport_add_sock(struct sock *sk, struct inet_listen_hashbucket *ilb) @@ -826,7 +825,6 @@ unlock: return err; } -EXPORT_IPV6_MOD(inet_hash); void inet_unhash(struct sock *sk) { @@ -859,7 +857,6 @@ void inet_unhash(struct sock *sk) spin_unlock_bh(lock); } } -EXPORT_IPV6_MOD(inet_unhash); static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb, const struct net *net, unsigned short port, @@ -1022,14 +1019,12 @@ int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family) { return __inet_bhash2_update_saddr(sk, saddr, family, false); } -EXPORT_IPV6_MOD(inet_bhash2_update_saddr); void inet_bhash2_reset_saddr(struct sock *sk) { if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) __inet_bhash2_update_saddr(sk, NULL, 0, true); } -EXPORT_IPV6_MOD(inet_bhash2_reset_saddr); /* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm * Note that we use 32bit integers (vs RFC 'short integers') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 9fa396d5f09f..d8083b9033c2 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -59,7 +59,6 @@ void inet_peer_base_init(struct inet_peer_base *bp) seqlock_init(&bp->lock); bp->total = 0; } -EXPORT_IPV6_MOD_GPL(inet_peer_base_init); #define PEER_MAX_GC 32 @@ -217,7 +216,6 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, return p; } -EXPORT_IPV6_MOD_GPL(inet_getpeer); void inet_putpeer(struct inet_peer *p) { @@ -268,7 +266,6 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) WRITE_ONCE(peer->rate_tokens, token); return rc; } -EXPORT_IPV6_MOD(inet_peer_xrlim_allow); void inetpeer_invalidate_tree(struct inet_peer_base *base) { @@ -285,4 +282,3 @@ void inetpeer_invalidate_tree(struct inet_peer_base *base) base->total = 0; } -EXPORT_IPV6_MOD(inetpeer_invalidate_tree); diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c index c1463add48c4..ad40762a8b38 100644 --- a/net/ipv4/metrics.c +++ b/net/ipv4/metrics.c @@ -88,4 +88,3 @@ struct dst_metrics *ip_fib_metrics_init(struct nlattr *fc_mx, return fib_metrics; } -EXPORT_IPV6_MOD_GPL(ip_fib_metrics_init); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 92ab0e0f6f71..bda245c80893 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -56,7 +56,6 @@ struct ping_table { static struct ping_table ping_table; struct pingv6_ops pingv6_ops; -EXPORT_IPV6_MOD_GPL(pingv6_ops); static inline u32 ping_hashfn(const struct net *net, u32 num, u32 mask) { @@ -139,7 +138,6 @@ fail: spin_unlock(&ping_table.lock); return -EADDRINUSE; } -EXPORT_IPV6_MOD_GPL(ping_get_port); void ping_unhash(struct sock *sk) { @@ -154,7 +152,6 @@ void ping_unhash(struct sock *sk) } spin_unlock(&ping_table.lock); } -EXPORT_IPV6_MOD_GPL(ping_unhash); /* Called under rcu_read_lock() */ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) @@ -278,7 +275,6 @@ out_release_group: put_group_info(group_info); return ret; } -EXPORT_IPV6_MOD_GPL(ping_init_sock); void ping_close(struct sock *sk, long timeout) { @@ -288,7 +284,6 @@ void ping_close(struct sock *sk, long timeout) sk_common_release(sk); } -EXPORT_IPV6_MOD_GPL(ping_close); static int ping_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) @@ -468,7 +463,6 @@ out: pr_debug("ping_v4_bind -> %d\n", err); return err; } -EXPORT_IPV6_MOD_GPL(ping_bind); /* * Is this a supported type of ICMP message? @@ -601,7 +595,6 @@ void ping_err(struct sk_buff *skb, int offset, u32 info) out: return; } -EXPORT_IPV6_MOD_GPL(ping_err); /* * Copy and checksum an ICMP Echo packet from user space into a buffer @@ -631,7 +624,6 @@ int ping_getfrag(void *from, char *to, return 0; } -EXPORT_IPV6_MOD_GPL(ping_getfrag); static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, struct flowi4 *fl4) @@ -692,7 +684,6 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, return 0; } -EXPORT_IPV6_MOD_GPL(ping_common_sendmsg); static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { @@ -936,7 +927,6 @@ out: pr_debug("ping_recvmsg -> %d\n", err); return err; } -EXPORT_IPV6_MOD_GPL(ping_recvmsg); static enum skb_drop_reason __ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) @@ -957,7 +947,6 @@ int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { return __ping_queue_rcv_skb(sk, skb) ? -1 : 0; } -EXPORT_IPV6_MOD_GPL(ping_queue_rcv_skb); /* @@ -985,7 +974,6 @@ enum skb_drop_reason ping_rcv(struct sk_buff *skb) kfree_skb_reason(skb, SKB_DROP_REASON_NO_SOCKET); return SKB_DROP_REASON_NO_SOCKET; } -EXPORT_IPV6_MOD_GPL(ping_rcv); struct proto ping_prot = { .name = "PING", @@ -1007,7 +995,6 @@ struct proto ping_prot = { .put_port = ping_unhash, .obj_size = sizeof(struct inet_sock), }; -EXPORT_IPV6_MOD(ping_prot); #ifdef CONFIG_PROC_FS @@ -1072,7 +1059,6 @@ void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family) return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } -EXPORT_IPV6_MOD_GPL(ping_seq_start); static void *ping_v4_seq_start(struct seq_file *seq, loff_t *pos) { @@ -1091,14 +1077,12 @@ void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; return sk; } -EXPORT_IPV6_MOD_GPL(ping_seq_next); void ping_seq_stop(struct seq_file *seq, void *v) __releases(ping_table.lock) { spin_unlock(&ping_table.lock); } -EXPORT_IPV6_MOD_GPL(ping_seq_stop); static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, int bucket) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index fc3affd9c801..f1474598d2c8 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -223,7 +223,6 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, return NULL; } -EXPORT_IPV6_MOD(tcp_get_cookie_sock); /* * when syncookies are in effect and tcp timestamps are enabled we stored @@ -260,7 +259,6 @@ bool cookie_timestamp_decode(const struct net *net, return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0; } -EXPORT_IPV6_MOD(cookie_timestamp_decode); static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb, struct request_sock *req) @@ -312,7 +310,6 @@ struct request_sock *cookie_bpf_check(struct sock *sk, struct sk_buff *skb) return req; } -EXPORT_IPV6_MOD_GPL(cookie_bpf_check); #endif struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, @@ -353,7 +350,6 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, return req; } -EXPORT_IPV6_MOD_GPL(cookie_tcp_reqsk_alloc); static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 516087c622ad..bd2c3c4587e1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -303,7 +303,6 @@ DEFINE_PER_CPU(u32, tcp_tw_isn); EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn); long sysctl_tcp_mem[3] __read_mostly; -EXPORT_IPV6_MOD(sysctl_tcp_mem); DEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc); EXPORT_PER_CPU_SYMBOL_GPL(tcp_memory_per_cpu_fw_alloc); @@ -317,7 +316,6 @@ EXPORT_SYMBOL(tcp_have_smc); * Current number of TCP sockets. */ struct percpu_counter tcp_sockets_allocated ____cacheline_aligned_in_smp; -EXPORT_IPV6_MOD(tcp_sockets_allocated); /* * Pressure flag: try to collapse. @@ -341,7 +339,6 @@ void tcp_enter_memory_pressure(struct sock *sk) if (!cmpxchg(&tcp_memory_pressure, 0, val)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES); } -EXPORT_IPV6_MOD_GPL(tcp_enter_memory_pressure); void tcp_leave_memory_pressure(struct sock *sk) { @@ -354,7 +351,6 @@ void tcp_leave_memory_pressure(struct sock *sk) NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURESCHRONO, jiffies_to_msecs(jiffies - val)); } -EXPORT_IPV6_MOD_GPL(tcp_leave_memory_pressure); /* Convert seconds to retransmits based on initial and max timeout */ static u8 secs_to_retrans(int seconds, int timeout, int rto_max) @@ -418,7 +414,6 @@ void tcp_md5_destruct_sock(struct sock *sk) static_branch_slow_dec_deferred(&tcp_md5_needed); } } -EXPORT_IPV6_MOD_GPL(tcp_md5_destruct_sock); #endif /* Address-family independent initialization for a tcp_sock. @@ -486,7 +481,6 @@ void tcp_init_sock(struct sock *sk) sk_sockets_allocated_inc(sk); xa_init_flags(&sk->sk_user_frags, XA_FLAGS_ALLOC1); } -EXPORT_IPV6_MOD(tcp_init_sock); static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc) { @@ -691,7 +685,6 @@ int tcp_ioctl(struct sock *sk, int cmd, int *karg) *karg = answ; return 0; } -EXPORT_IPV6_MOD(tcp_ioctl); void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) { @@ -908,7 +901,6 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, return ret; } -EXPORT_IPV6_MOD(tcp_splice_read); /* We allow to exceed memory limits for FIN packets to expedite * connection tear down and (memory) recovery. @@ -1483,7 +1475,6 @@ void tcp_splice_eof(struct socket *sock) tcp_push(sk, 0, mss_now, tp->nonagle, size_goal); release_sock(sk); } -EXPORT_IPV6_MOD_GPL(tcp_splice_eof); /* * Handle reading urgent data. BSD has very simple semantics for @@ -1795,7 +1786,6 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) } return copied; } -EXPORT_IPV6_MOD(tcp_read_skb); void tcp_read_done(struct sock *sk, size_t len) { @@ -1840,7 +1830,6 @@ int tcp_peek_len(struct socket *sock) { return tcp_inq(sock->sk); } -EXPORT_IPV6_MOD(tcp_peek_len); /* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */ int tcp_set_rcvlowat(struct sock *sk, int val) @@ -1870,7 +1859,6 @@ int tcp_set_rcvlowat(struct sock *sk, int val) } return 0; } -EXPORT_IPV6_MOD(tcp_set_rcvlowat); #ifdef CONFIG_MMU static const struct vm_operations_struct tcp_vm_ops = { @@ -1889,7 +1877,6 @@ int tcp_mmap(struct file *file, struct socket *sock, vma->vm_ops = &tcp_vm_ops; return 0; } -EXPORT_IPV6_MOD(tcp_mmap); static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb, u32 *offset_frag) @@ -2974,7 +2961,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags) } return ret; } -EXPORT_IPV6_MOD(tcp_recvmsg); void tcp_set_state(struct sock *sk, int state) { @@ -3104,7 +3090,6 @@ void tcp_shutdown(struct sock *sk, int how) tcp_send_fin(sk); } } -EXPORT_IPV6_MOD(tcp_shutdown); int tcp_orphan_count_sum(void) { @@ -3617,7 +3602,6 @@ static int tcp_repair_options_est(struct sock *sk, sockptr_t optbuf, } DEFINE_STATIC_KEY_FALSE(tcp_tx_delay_enabled); -EXPORT_IPV6_MOD(tcp_tx_delay_enabled); static void tcp_enable_tx_delay(struct sock *sk, int val) { @@ -4202,7 +4186,6 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, optval, optlen); return do_tcp_setsockopt(sk, level, optname, optval, optlen); } -EXPORT_IPV6_MOD(tcp_setsockopt); static void tcp_get_info_chrono_stats(const struct tcp_sock *tp, struct tcp_info *info) @@ -4865,7 +4848,6 @@ bool tcp_bpf_bypass_getsockopt(int level, int optname) return false; } -EXPORT_IPV6_MOD(tcp_bpf_bypass_getsockopt); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) @@ -4879,7 +4861,6 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, return do_tcp_getsockopt(sk, level, optname, USER_SOCKPTR(optval), USER_SOCKPTR(optlen)); } -EXPORT_IPV6_MOD(tcp_getsockopt); #ifdef CONFIG_TCP_MD5SIG void tcp_md5_hash_skb_data(struct md5_ctx *ctx, const struct sk_buff *skb, @@ -4910,7 +4891,6 @@ void tcp_md5_hash_skb_data(struct md5_ctx *ctx, const struct sk_buff *skb, skb_walk_frags(skb, frag_iter) tcp_md5_hash_skb_data(ctx, frag_iter, 0); } -EXPORT_IPV6_MOD(tcp_md5_hash_skb_data); void tcp_md5_hash_key(struct md5_ctx *ctx, const struct tcp_md5sig_key *key) @@ -4922,7 +4902,6 @@ void tcp_md5_hash_key(struct md5_ctx *ctx, */ data_race(({ md5_update(ctx, key->key, keylen), 0; })); } -EXPORT_IPV6_MOD(tcp_md5_hash_key); /* Called with rcu_read_lock() */ static enum skb_drop_reason @@ -5023,7 +5002,6 @@ int tcp_do_parse_auth_options(const struct tcphdr *th, } return 0; } -EXPORT_IPV6_MOD(tcp_do_parse_auth_options); #endif /* Called with rcu_read_lock() */ @@ -5092,7 +5070,6 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, return tcp_inbound_md5_hash(sk, skb, saddr, daddr, family, l3index, md5_location); } -EXPORT_IPV6_MOD_GPL(tcp_inbound_hash); void tcp_done(struct sock *sk) { diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 4e389d609f91..471c78be5513 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -559,7 +559,6 @@ bool tcp_fastopen_defer_connect(struct sock *sk, int *err) } return false; } -EXPORT_IPV6_MOD(tcp_fastopen_defer_connect); /* * The following code block is to deal with middle box issues with TFO: diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e6b2f4be7723..7171442c3ed7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -814,7 +814,6 @@ void tcp_initialize_rcv_mss(struct sock *sk) inet_csk(sk)->icsk_ack.rcv_mss = hint; } -EXPORT_IPV6_MOD(tcp_initialize_rcv_mss); /* Receiver "autotuning" code. * @@ -3172,7 +3171,6 @@ void tcp_simple_retransmit(struct sock *sk) */ tcp_non_congestion_loss_retransmit(sk); } -EXPORT_IPV6_MOD(tcp_simple_retransmit); void tcp_enter_recovery(struct sock *sk, bool ece_ack) { @@ -4843,7 +4841,6 @@ void tcp_done_with_error(struct sock *sk, int err) if (!sock_flag(sk, SOCK_DEAD)) sk_error_report(sk); } -EXPORT_IPV6_MOD(tcp_done_with_error); /* When we get a reset we do this. */ void tcp_reset(struct sock *sk, struct sk_buff *skb) @@ -6665,7 +6662,6 @@ csum_error: discard: tcp_drop_reason(sk, skb, reason); } -EXPORT_IPV6_MOD(tcp_rcv_established); void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb) { @@ -7384,7 +7380,6 @@ consume: __kfree_skb(skb); return 0; } -EXPORT_IPV6_MOD(tcp_rcv_state_process); static inline void pr_drop_req(struct request_sock *req, __u16 port, int family) { @@ -7582,7 +7577,6 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, return mss; } -EXPORT_IPV6_MOD_GPL(tcp_get_syncookie_mss); int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, @@ -7762,4 +7756,3 @@ drop: tcp_listendrop(sk); return 0; } -EXPORT_IPV6_MOD(tcp_conn_request); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2ea8253b737a..69ab236072e7 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -201,7 +201,6 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) return 0; } -EXPORT_IPV6_MOD_GPL(tcp_twsk_unique); static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) @@ -362,7 +361,6 @@ failure: inet->inet_dport = 0; return err; } -EXPORT_IPV6_MOD(tcp_v4_connect); /* * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191. @@ -402,7 +400,6 @@ void tcp_v4_mtu_reduced(struct sock *sk) tcp_simple_retransmit(sk); } /* else let the usual retransmit timer handle it */ } -EXPORT_IPV6_MOD(tcp_v4_mtu_reduced); static void do_redirect(struct sk_buff *skb, struct sock *sk) { @@ -436,7 +433,6 @@ void tcp_req_err(struct sock *sk, u32 seq, bool abort) } reqsk_put(req); } -EXPORT_IPV6_MOD(tcp_req_err); /* TCP-LD (RFC 6069) logic */ void tcp_ld_RTO_revert(struct sock *sk, u32 seq) @@ -475,7 +471,6 @@ void tcp_ld_RTO_revert(struct sock *sk, u32 seq) tcp_retransmit_timer(sk); } } -EXPORT_IPV6_MOD(tcp_ld_RTO_revert); /* * This routine is called by the ICMP module when it gets some @@ -1222,7 +1217,6 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) */ DEFINE_STATIC_KEY_DEFERRED_FALSE(tcp_md5_needed, HZ); -EXPORT_IPV6_MOD(tcp_md5_needed); static bool better_md5_match(struct tcp_md5sig_key *old, struct tcp_md5sig_key *new) { @@ -1281,7 +1275,6 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, } return best_match; } -EXPORT_IPV6_MOD(__tcp_md5_do_lookup); static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk, const union tcp_md5_addr *addr, @@ -1328,7 +1321,6 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr; return tcp_md5_do_lookup(sk, l3index, addr, AF_INET); } -EXPORT_IPV6_MOD(tcp_v4_md5_lookup); static int tcp_md5sig_info_add(struct sock *sk, gfp_t gfp) { @@ -1423,7 +1415,6 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, return __tcp_md5_do_add(sk, addr, family, prefixlen, l3index, flags, newkey, newkeylen, GFP_KERNEL); } -EXPORT_IPV6_MOD(tcp_md5_do_add); int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, @@ -1451,7 +1442,6 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, key->flags, key->key, key->keylen, sk_gfp_mask(sk, GFP_ATOMIC)); } -EXPORT_IPV6_MOD(tcp_md5_key_copy); int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, u8 flags) @@ -1466,7 +1456,6 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, kfree_rcu(key, rcu); return 0; } -EXPORT_IPV6_MOD(tcp_md5_do_del); void tcp_clear_md5_list(struct sock *sk) { @@ -1604,7 +1593,6 @@ tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, tcp_md5_hash_key(&ctx, key); md5_final(&ctx, md5_hash); } -EXPORT_IPV6_MOD(tcp_v4_md5_hash_skb); #endif @@ -1674,7 +1662,6 @@ drop: tcp_listendrop(sk); return 0; } -EXPORT_IPV6_MOD(tcp_v4_conn_request); /* @@ -1800,7 +1787,6 @@ put_and_exit: tcp_done(newsk); goto exit; } -EXPORT_IPV6_MOD(tcp_v4_syn_recv_sock); static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb) { @@ -2055,7 +2041,6 @@ no_coalesce: } return false; } -EXPORT_IPV6_MOD(tcp_add_backlog); static void tcp_v4_restore_cb(struct sk_buff *skb) { @@ -2367,7 +2352,6 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) sk->sk_rx_dst_ifindex = skb->skb_iif; } } -EXPORT_IPV6_MOD(inet_sk_rx_dst_set); const struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, @@ -2380,7 +2364,6 @@ const struct inet_connection_sock_af_ops ipv4_specific = { .getsockopt = ip_getsockopt, .mtu_reduced = tcp_v4_mtu_reduced, }; -EXPORT_IPV6_MOD(ipv4_specific); #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { @@ -2473,7 +2456,6 @@ void tcp_v4_destroy_sock(struct sock *sk) sk_sockets_allocated_dec(sk); } -EXPORT_IPV6_MOD(tcp_v4_destroy_sock); #ifdef CONFIG_PROC_FS /* Proc filesystem TCP sock list dumping. */ @@ -2709,7 +2691,6 @@ out: st->last_pos = *pos; return rc; } -EXPORT_IPV6_MOD(tcp_seq_start); void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) { @@ -2740,7 +2721,6 @@ out: st->last_pos = *pos; return rc; } -EXPORT_IPV6_MOD(tcp_seq_next); void tcp_seq_stop(struct seq_file *seq, void *v) { @@ -2758,7 +2738,6 @@ void tcp_seq_stop(struct seq_file *seq, void *v) break; } } -EXPORT_IPV6_MOD(tcp_seq_stop); static void get_openreq4(const struct request_sock *req, struct seq_file *f, int i) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index d350d794a959..199f0b579e89 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -287,7 +287,6 @@ out_put: inet_twsk_put(tw); return TCP_TW_SUCCESS; } -EXPORT_IPV6_MOD(tcp_timewait_state_process); static void tcp_time_wait_init(struct sock *sk, struct tcp_timewait_sock *tcptw) { @@ -523,7 +522,6 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) tcp_set_ca_state(sk, TCP_CA_Open); } -EXPORT_IPV6_MOD_GPL(tcp_ca_openreq_child); static void smc_check_reset_syn_req(const struct tcp_sock *oldtp, struct request_sock *req, @@ -976,7 +974,6 @@ embryonic_reset: } return NULL; } -EXPORT_IPV6_MOD(tcp_check_req); /* * Queue segment on the new socket if the new socket is active, @@ -1018,4 +1015,3 @@ enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child, sock_put(child); return reason; } -EXPORT_IPV6_MOD(tcp_child_process); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 35c3b0ab5a0c..8e99687526a6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -272,7 +272,6 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, WRITE_ONCE(*__window_clamp, min_t(__u32, U16_MAX << (*rcv_wscale), window_clamp)); } -EXPORT_IPV6_MOD(tcp_select_initial_window); /* Chose a new window to advertise, update state in tcp_sock for the * socket, and return result with RFC1323 scaling applied. The return @@ -1361,7 +1360,6 @@ void tcp_release_cb(struct sock *sk) if ((flags & TCPF_ACK_DEFERRED) && inet_csk_ack_scheduled(sk)) tcp_send_ack(sk); } -EXPORT_IPV6_MOD(tcp_release_cb); void __init tcp_tsq_work_init(void) { @@ -2023,7 +2021,6 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu) return __tcp_mtu_to_mss(sk, pmtu) - (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr)); } -EXPORT_IPV6_MOD(tcp_mtu_to_mss); /* Inverse of above */ int tcp_mss_to_mtu(struct sock *sk, int mss) @@ -2096,7 +2093,6 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) return mss_now; } -EXPORT_IPV6_MOD(tcp_sync_mss); /* Compute the current effective MSS, taking SACKs and IP options, * and even PMTU discovery events into account. @@ -4077,7 +4073,6 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, return skb; } -EXPORT_IPV6_MOD(tcp_make_synack); static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst) { @@ -4658,4 +4653,3 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) } return res; } -EXPORT_IPV6_MOD(tcp_rtx_synack); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 5a14a53a3c9e..ea99988795e7 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -774,7 +774,6 @@ void tcp_set_keepalive(struct sock *sk, int val) else if (!val) tcp_delete_keepalive_timer(sk); } -EXPORT_IPV6_MOD_GPL(tcp_set_keepalive); static void tcp_keepalive_timer(struct timer_list *t) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 256388668263..e7b93b874f34 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -126,7 +126,6 @@ struct udp_table udp_table __read_mostly; long sysctl_udp_mem[3] __read_mostly; -EXPORT_IPV6_MOD(sysctl_udp_mem); DEFINE_PER_CPU(int, udp_memory_per_cpu_fw_alloc); EXPORT_PER_CPU_SYMBOL_GPL(udp_memory_per_cpu_fw_alloc); @@ -349,7 +348,6 @@ fail_unlock: fail: return error; } -EXPORT_IPV6_MOD(udp_lib_get_port); static int udp_v4_get_port(struct sock *sk, unsigned short snum) { @@ -415,7 +413,6 @@ u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, return __inet_ehashfn(laddr, lport, faddr, fport, udp_ehash_secret + net_hash_mix(net)); } -EXPORT_IPV6_MOD(udp_ehashfn); /** * udp4_lib_lookup1() - Simplified lookup using primary hash (destination port) @@ -650,7 +647,6 @@ void udp_lib_hash4(struct sock *sk, u16 hash) spin_unlock_bh(&hslot->lock); } -EXPORT_IPV6_MOD(udp_lib_hash4); /* call with sock lock */ void udp4_hash4(struct sock *sk) @@ -666,7 +662,6 @@ void udp4_hash4(struct sock *sk) udp_lib_hash4(sk, hash); } -EXPORT_IPV6_MOD(udp4_hash4); #endif /* CONFIG_BASE_SMALL */ /* UDP is nearly always wildcards out the wazoo, it makes no sense to try @@ -802,11 +797,9 @@ static inline bool __udp_is_mcast_sock(struct net *net, const struct sock *sk, } DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); -EXPORT_IPV6_MOD(udp_encap_needed_key); #if IS_ENABLED(CONFIG_IPV6) DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); -EXPORT_IPV6_MOD(udpv6_encap_needed_key); #endif void udp_encap_enable(void) @@ -1026,7 +1019,6 @@ void udp_flush_pending_frames(struct sock *sk) ip_flush_pending_frames(sk); } } -EXPORT_IPV6_MOD(udp_flush_pending_frames); /** * udp4_hwcsum - handle outgoing HW checksumming @@ -1204,7 +1196,6 @@ out: WRITE_ONCE(up->pending, 0); return err; } -EXPORT_IPV6_MOD(udp_push_pending_frames); static int __udp_cmsg_send(struct cmsghdr *cmsg, u16 *gso_size) { @@ -1241,7 +1232,6 @@ int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size) return need_ip; } -EXPORT_IPV6_MOD_GPL(udp_cmsg_send); int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { @@ -1532,7 +1522,6 @@ void udp_splice_eof(struct socket *sock) udp_push_pending_frames(sk); release_sock(sk); } -EXPORT_IPV6_MOD_GPL(udp_splice_eof); #define UDP_SKB_IS_STATELESS 0x80000000 @@ -1647,7 +1636,6 @@ void udp_skb_destructor(struct sock *sk, struct sk_buff *skb) prefetch(&skb->data); udp_rmem_release(sk, udp_skb_truesize(skb), 1, false); } -EXPORT_IPV6_MOD(udp_skb_destructor); /* as above, but the caller held the rx queue lock, too */ static void udp_skb_dtor_locked(struct sock *sk, struct sk_buff *skb) @@ -1800,7 +1788,6 @@ drop: udp_drops_inc(sk); return err; } -EXPORT_IPV6_MOD_GPL(__udp_enqueue_schedule_skb); void udp_destruct_common(struct sock *sk) { @@ -1817,7 +1804,6 @@ void udp_destruct_common(struct sock *sk) udp_rmem_release(sk, total, 0, true); kfree(up->udp_prod_queue); } -EXPORT_IPV6_MOD_GPL(udp_destruct_common); static void udp_destruct_sock(struct sock *sk) { @@ -1855,7 +1841,6 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) skb_release_head_state(skb); __consume_stateless_skb(skb); } -EXPORT_IPV6_MOD_GPL(skb_consume_udp); static struct sk_buff *__first_packet_length(struct sock *sk, struct sk_buff_head *rcvq, @@ -1937,7 +1922,6 @@ int udp_ioctl(struct sock *sk, int cmd, int *karg) return 0; } -EXPORT_IPV6_MOD(udp_ioctl); struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, int *off, int *err) @@ -2032,7 +2016,6 @@ try_again: WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk)); return recv_actor(sk, skb); } -EXPORT_IPV6_MOD(udp_read_skb); /* * This should be easy, if there is something there we @@ -2154,7 +2137,6 @@ int udp_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len); } -EXPORT_IPV6_MOD(udp_pre_connect); static int udp_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) @@ -2204,7 +2186,6 @@ int udp_disconnect(struct sock *sk, int flags) release_sock(sk); return 0; } -EXPORT_IPV6_MOD(udp_disconnect); void udp_lib_unhash(struct sock *sk) { @@ -2236,7 +2217,6 @@ void udp_lib_unhash(struct sock *sk) spin_unlock_bh(&hslot->lock); } } -EXPORT_IPV6_MOD(udp_lib_unhash); /* * inet_rcv_saddr was changed, we must rehash secondary hash @@ -2306,7 +2286,6 @@ void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4) udp_sk(sk)->udp_portaddr_hash = newhash; } } -EXPORT_IPV6_MOD(udp_lib_rehash); static void udp_v4_rehash(struct sock *sk) { @@ -2470,7 +2449,6 @@ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) } return false; } -EXPORT_IPV6_MOD(udp_sk_rx_dst_set); /* * Multicasts and broadcasts go to each listener. @@ -2999,7 +2977,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, return err; } -EXPORT_IPV6_MOD(udp_lib_setsockopt); static int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) @@ -3060,7 +3037,6 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, return -EFAULT; return 0; } -EXPORT_IPV6_MOD(udp_lib_getsockopt); static int udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) @@ -3102,7 +3078,6 @@ __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait) return mask; } -EXPORT_IPV6_MOD(udp_poll); int udp_abort(struct sock *sk, int err) { @@ -3125,7 +3100,6 @@ out: return 0; } -EXPORT_IPV6_MOD_GPL(udp_abort); struct proto udp_prot = { .name = "UDP", @@ -3245,7 +3219,6 @@ void *udp_seq_start(struct seq_file *seq, loff_t *pos) return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } -EXPORT_IPV6_MOD(udp_seq_start); void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) { @@ -3259,7 +3232,6 @@ void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; return sk; } -EXPORT_IPV6_MOD(udp_seq_next); void udp_seq_stop(struct seq_file *seq, void *v) { @@ -3271,7 +3243,6 @@ void udp_seq_stop(struct seq_file *seq, void *v) if (state->bucket <= udptable->mask) spin_unlock_bh(&udptable->hash[state->bucket].lock); } -EXPORT_IPV6_MOD(udp_seq_stop); /* ------------------------------------------------------------------------ */ static void udp4_format_sock(struct sock *sp, struct seq_file *f, diff --git a/net/psp/psp_sock.c b/net/psp/psp_sock.c index a85b0ed88842..07dc4cf741f3 100644 --- a/net/psp/psp_sock.c +++ b/net/psp/psp_sock.c @@ -291,4 +291,3 @@ void psp_reply_set_decrypted(const struct sock *sk, struct sk_buff *skb) skb->decrypted = 1; rcu_read_unlock(); } -EXPORT_IPV6_MOD_GPL(psp_reply_set_decrypted); From fde39f7df10b3dc150abb87c4718efba93cbc755 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 25 Mar 2026 13:08:44 +0100 Subject: [PATCH 0978/1561] ipv6: replace IS_BUILTIN(CONFIG_IPV6) with IS_ENABLED(CONFIG_IPV6) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As IPv6 is built-in only, it does not make sense to continue using IS_BUILTIN(CONFIG_IPV6). Therefore, replace it with IS_ENABLED() when necessary and drop it if it isn't valid anymore. Notice that there is still one instance related to ICMPv6, as it requires more changes it will be handle separately. Signed-off-by: Fernando Fernandez Mancera Tested-by: Ricardo B. Marlière Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/20260325120928.15848-4-fmancera@suse.de Signed-off-by: Jakub Kicinski --- include/linux/indirect_call_wrapper.h | 2 +- include/net/ip6_fib.h | 2 +- net/core/filter.c | 14 +++++++------- net/ipv6/ip6_fib.c | 2 +- net/ipv6/ip6_offload.c | 4 ++-- net/ipv6/route.c | 6 ------ 6 files changed, 12 insertions(+), 18 deletions(-) diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h index dc272b514a01..0e4340ecd857 100644 --- a/include/linux/indirect_call_wrapper.h +++ b/include/linux/indirect_call_wrapper.h @@ -57,7 +57,7 @@ * builtin, this macro simplify dealing with indirect calls with only ipv4/ipv6 * alternatives */ -#if IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) #define INDIRECT_CALL_INET(f, f2, f1, ...) \ INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__) #elif IS_ENABLED(CONFIG_INET) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 9f8b6814a96a..10f30d158340 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -599,7 +599,7 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric) void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i, bool offload, bool trap, bool offload_failed); -#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL) +#if IS_ENABLED(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL) struct bpf_iter__ipv6_route { __bpf_md_ptr(struct bpf_iter_meta *, meta); __bpf_md_ptr(struct fib6_info *, rt); diff --git a/net/core/filter.c b/net/core/filter.c index 2f023999f046..c56821afaa0f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7590,7 +7590,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len ret = __cookie_v4_check((struct iphdr *)iph, th); break; -#if IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) case 6: if (unlikely(iph_len < sizeof(struct ipv6hdr))) return -EINVAL; @@ -7660,7 +7660,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len, mss = tcp_v4_get_syncookie(sk, iph, th, &cookie); break; -#if IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) case 6: if (unlikely(iph_len < sizeof(struct ipv6hdr))) return -EINVAL; @@ -8026,7 +8026,7 @@ static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv4_proto = { BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv6, struct ipv6hdr *, iph, struct tcphdr *, th, u32, th_len) { -#if IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) const u16 mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); u32 cookie; @@ -8078,7 +8078,7 @@ static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv4_proto = { BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv6, struct ipv6hdr *, iph, struct tcphdr *, th) { -#if IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) if (__cookie_v6_check(iph, th) > 0) return 0; @@ -11964,7 +11964,7 @@ BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk) return (unsigned long)sk; #endif -#if IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT) return (unsigned long)sk; #endif @@ -11987,7 +11987,7 @@ BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk) return (unsigned long)sk; #endif -#if IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV) return (unsigned long)sk; #endif @@ -12250,7 +12250,7 @@ __bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct __sk_buff *s, struct sock *sk, ops = &tcp_request_sock_ops; min_mss = 536; break; -#if IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): ops = &tcp6_request_sock_ops; min_mss = IPV6_MIN_MTU - 60; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 388ac88d741a..17f757e9c54a 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -2774,7 +2774,7 @@ static void ipv6_route_native_seq_stop(struct seq_file *seq, void *v) rcu_read_unlock(); } -#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL) +#if defined(CONFIG_BPF_SYSCALL) static int ipv6_route_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, void *v) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index bd7f780e37a5..d8072ad6b8c4 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -286,7 +286,7 @@ not_same_flow: if (likely(proto == IPPROTO_TCP)) pp = tcp6_gro_receive(head, skb); -#if IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) else if (likely(proto == IPPROTO_UDP)) pp = udp6_gro_receive(head, skb); #endif @@ -346,7 +346,7 @@ INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff) if (likely(ops == &net_hotdata.tcpv6_offload)) return tcp6_gro_complete(skb, nhoff); -#if IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) if (ops == &net_hotdata.udpv6_offload) return udp6_gro_complete(skb, nhoff); #endif diff --git a/net/ipv6/route.c b/net/ipv6/route.c index cb521700cee7..08deb18dcc85 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -6826,7 +6826,6 @@ void __init ip6_route_init_special_entries(void) #endif } -#if IS_BUILTIN(CONFIG_IPV6) #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt) @@ -6860,7 +6859,6 @@ static void bpf_iter_unregister(void) bpf_iter_unreg_target(&ipv6_route_reg_info); } #endif -#endif static const struct rtnl_msg_handler ip6_route_rtnl_msg_handlers[] __initconst_or_module = { {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWROUTE, @@ -6921,12 +6919,10 @@ int __init ip6_route_init(void) if (ret) goto out_register_late_subsys; -#if IS_BUILTIN(CONFIG_IPV6) #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) ret = bpf_iter_register(); if (ret) goto out_register_late_subsys; -#endif #endif for_each_possible_cpu(cpu) { @@ -6961,10 +6957,8 @@ out_kmem_cache: void ip6_route_cleanup(void) { -#if IS_BUILTIN(CONFIG_IPV6) #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) bpf_iter_unregister(); -#endif #endif unregister_netdevice_notifier(&ip6_route_dev_notifier); unregister_pernet_subsys(&ip6_route_net_late_ops); From d2042d35f413b7131cc571655bbcb2c049489fe7 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 25 Mar 2026 13:08:45 +0100 Subject: [PATCH 0979/1561] ipv6: remove dynamic ICMPv6 sender registration infrastructure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As IPv6 is built-in only, there is no need to maintain the sender registration infrastructure used to allow built-in subsystems to send ICMPv6 messages when IPv6 was compiled as a module. Drop the registration mechanism and the __icmpv6_send() sender implementation. While icmpv6_send() users could be converted to icmp6_send() that doesn't seems necessary as none of them are using the force_saddr parameter. Signed-off-by: Fernando Fernandez Mancera Tested-by: Ricardo B. Marlière Link: https://patch.msgid.link/20260325120928.15848-5-fmancera@suse.de Signed-off-by: Jakub Kicinski --- include/linux/icmpv6.h | 29 ++------------------------ net/ipv6/icmp.c | 6 ------ net/ipv6/ip6_icmp.c | 46 +++--------------------------------------- 3 files changed, 5 insertions(+), 76 deletions(-) diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index e3b3b0fa2a8f..2bd9f2157e6c 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -15,38 +15,13 @@ static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb) #if IS_ENABLED(CONFIG_IPV6) -typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info, - const struct in6_addr *force_saddr, - const struct inet6_skb_parm *parm); void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct in6_addr *force_saddr, const struct inet6_skb_parm *parm); -#if IS_BUILTIN(CONFIG_IPV6) -static inline void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, - const struct inet6_skb_parm *parm) -{ - icmp6_send(skb, type, code, info, NULL, parm); -} -static inline int inet6_register_icmp_sender(ip6_icmp_send_t *fn) -{ - BUILD_BUG_ON(fn != icmp6_send); - return 0; -} -static inline int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn) -{ - BUILD_BUG_ON(fn != icmp6_send); - return 0; -} -#else -extern void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, - const struct inet6_skb_parm *parm); -extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn); -extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn); -#endif static inline void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) { - __icmpv6_send(skb, type, code, info, IP6CB(skb)); + icmp6_send(skb, type, code, info, NULL, IP6CB(skb)); } int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, @@ -58,7 +33,7 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info); static inline void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) { struct inet6_skb_parm parm = { 0 }; - __icmpv6_send(skb_in, type, code, info, &parm); + icmp6_send(skb_in, type, code, info, NULL, &parm); } #endif diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 813d2e9edb8b..8e8d7bd84a4c 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -1288,13 +1288,8 @@ int __init icmpv6_init(void) if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) goto fail; - err = inet6_register_icmp_sender(icmp6_send); - if (err) - goto sender_reg_err; return 0; -sender_reg_err: - inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); fail: pr_err("Failed to register ICMP6 protocol\n"); return err; @@ -1302,7 +1297,6 @@ fail: void icmpv6_cleanup(void) { - inet6_unregister_icmp_sender(icmp6_send); inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); } diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c index 233914b63bdb..e43ea9492332 100644 --- a/net/ipv6/ip6_icmp.c +++ b/net/ipv6/ip6_icmp.c @@ -7,47 +7,8 @@ #include -#if IS_ENABLED(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) && IS_ENABLED(CONFIG_NF_NAT) -#if !IS_BUILTIN(CONFIG_IPV6) - -static ip6_icmp_send_t __rcu *ip6_icmp_send; - -int inet6_register_icmp_sender(ip6_icmp_send_t *fn) -{ - return (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, NULL, fn) == NULL) ? - 0 : -EBUSY; -} -EXPORT_SYMBOL(inet6_register_icmp_sender); - -int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn) -{ - int ret; - - ret = (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, fn, NULL) == fn) ? - 0 : -EINVAL; - - synchronize_net(); - - return ret; -} -EXPORT_SYMBOL(inet6_unregister_icmp_sender); - -void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, - const struct inet6_skb_parm *parm) -{ - ip6_icmp_send_t *send; - - rcu_read_lock(); - send = rcu_dereference(ip6_icmp_send); - if (send) - send(skb, type, code, info, NULL, parm); - rcu_read_unlock(); -} -EXPORT_SYMBOL(__icmpv6_send); -#endif - -#if IS_ENABLED(CONFIG_NF_NAT) #include void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) { @@ -60,7 +21,7 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) ct = nf_ct_get(skb_in, &ctinfo); if (!ct || !(READ_ONCE(ct->status) & IPS_NAT_MASK)) { - __icmpv6_send(skb_in, type, code, info, &parm); + icmp6_send(skb_in, type, code, info, NULL, &parm); return; } @@ -76,11 +37,10 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) orig_ip = ipv6_hdr(skb_in)->saddr; dir = CTINFO2DIR(ctinfo); ipv6_hdr(skb_in)->saddr = ct->tuplehash[dir].tuple.src.u3.in6; - __icmpv6_send(skb_in, type, code, info, &parm); + icmp6_send(skb_in, type, code, info, NULL, &parm); ipv6_hdr(skb_in)->saddr = orig_ip; out: consume_skb(cloned_skb); } EXPORT_SYMBOL(icmpv6_ndo_send); #endif -#endif From 4b70b20215049472f200ed563a7a0d44a7188fd3 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 25 Mar 2026 13:08:46 +0100 Subject: [PATCH 0980/1561] ipv6: prepare headers for ipv6_stub removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation for dropping ipv6_stub and converting its users to direct function calls, introduce static inline dummy functions and fallback macros in the IPv6 networking headers. In addition, introduce checks on fib6_nh_init(), ip6_dst_lookup_flow() and ip6_fragment() to avoid a crash due to ipv6.disable=1 set during booting. The other functions are safe as they cannot be called with ipv6.disable=1 set. These fallbacks ensure that when CONFIG_IPV6 is completely disabled, there are no compiling or linking errors due to code paths not guarded by preprocessor macro IS_ENABLED(CONFIG_IPV6). In addition, export ndisc_send_na(), ip6_route_input() and ip6_fragment(). Signed-off-by: Fernando Fernandez Mancera Tested-by: Ricardo B. Marlière Link: https://patch.msgid.link/20260325120928.15848-6-fmancera@suse.de Signed-off-by: Jakub Kicinski --- include/net/ip6_fib.h | 32 ++++++++++++++++++++++++++++++++ include/net/ip6_route.h | 26 ++++++++++++++++++++++++++ include/net/ipv6.h | 10 ++++++++++ include/net/ndisc.h | 6 +++++- net/ipv6/ip6_output.c | 8 ++++++++ net/ipv6/ndisc.c | 1 + net/ipv6/route.c | 6 ++++++ 7 files changed, 88 insertions(+), 1 deletion(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 10f30d158340..20e1231262d6 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -486,11 +486,30 @@ void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr) rcu_read_unlock(); } +#if IS_ENABLED(CONFIG_IPV6) int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); void fib6_nh_release(struct fib6_nh *fib6_nh); void fib6_nh_release_dsts(struct fib6_nh *fib6_nh); +#else +static inline int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, + struct fib6_config *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack) +{ + NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel"); + return -EAFNOSUPPORT; +} + +static inline void fib6_nh_release(struct fib6_nh *fib6_nh) +{ +} + +static inline void fib6_nh_release_dsts(struct fib6_nh *fib6_nh) +{ +} +#endif + int call_fib6_entry_notifiers(struct net *net, enum fib_event_type event_type, @@ -502,8 +521,15 @@ int call_fib6_multipath_entry_notifiers(struct net *net, unsigned int nsiblings, struct netlink_ext_ack *extack); int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt); +#if IS_ENABLED(CONFIG_IPV6) void fib6_rt_update(struct net *net, struct fib6_info *rt, struct nl_info *info); +#else +static inline void fib6_rt_update(struct net *net, struct fib6_info *rt, + struct nl_info *info) +{ +} +#endif void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int flags); @@ -588,7 +614,13 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); void fib6_update_sernum(struct net *net, struct fib6_info *rt); +#if IS_ENABLED(CONFIG_IPV6) void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt); +#else +static inline void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt) +{ +} +#endif void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i); void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 0c8eeb6abe7a..09ffe0f13ce7 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -77,7 +77,14 @@ static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i) f6i->fib6_nh->fib_nh_gw_family; } +#if IS_ENABLED(CONFIG_IPV6) void ip6_route_input(struct sk_buff *skb); +#else +static inline void ip6_route_input(struct sk_buff *skb) +{ +} +#endif + struct dst_entry *ip6_route_input_lookup(struct net *net, struct net_device *dev, struct flowi6 *fl6, @@ -119,7 +126,15 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); int ip6_ins_rt(struct net *net, struct fib6_info *f6i); +#if IS_ENABLED(CONFIG_IPV6) int ip6_del_rt(struct net *net, struct fib6_info *f6i, bool skip_notify); +#else +static inline int ip6_del_rt(struct net *net, struct fib6_info *f6i, + bool skip_notify) +{ + return -EAFNOSUPPORT; +} +#endif void rt6_flush_exceptions(struct fib6_info *f6i); void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args, @@ -270,8 +285,19 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst, return __ipv6_anycast_destination(&rt->rt6i_dst, rt->rt6i_flags, daddr); } +#if IS_ENABLED(CONFIG_IPV6) int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); +#else +static inline int ip6_fragment(struct net *net, struct sock *sk, + struct sk_buff *skb, + int (*output)(struct net *, struct sock *, + struct sk_buff *)) +{ + kfree_skb(skb); + return -EAFNOSUPPORT; +} +#endif /* Variant of dst_mtu() for IPv6 users */ static inline u32 dst6_mtu(const struct dst_entry *dst) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 0958cc5c6ec3..f99f273341f0 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1044,8 +1044,18 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk) int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6); +#if IS_ENABLED(CONFIG_IPV6) struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst); +#else +static inline struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, + struct flowi6 *fl6, + const struct in6_addr *final_dst) +{ + return ERR_PTR(-EAFNOSUPPORT); +} +#endif + struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst, bool connected); diff --git a/include/net/ndisc.h b/include/net/ndisc.h index d38783a2ce57..19e2a177bd29 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -406,13 +406,17 @@ static inline void __ipv6_confirm_neigh_stub(struct net_device *dev, static inline struct neighbour *ip_neigh_gw6(struct net_device *dev, const void *addr) { +#if IS_ENABLED(CONFIG_IPV6) struct neighbour *neigh; neigh = __ipv6_neigh_lookup_noref_stub(dev, addr); if (unlikely(!neigh)) - neigh = __neigh_create(ipv6_stub->nd_tbl, addr, dev, false); + neigh = __neigh_create(&nd_tbl, addr, dev, false); return neigh; +#else + return ERR_PTR(-EAFNOSUPPORT); +#endif } int ndisc_init(void); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8e2a6b28cea7..63abc4fddaee 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -873,6 +873,11 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, __be32 frag_id; u8 *prevhdr, nexthdr = 0; + if (!ipv6_mod_enabled()) { + kfree_skb(skb); + return -EAFNOSUPPORT; + } + err = ip6_find_1stfragopt(skb, &prevhdr); if (err < 0) goto fail; @@ -1045,6 +1050,7 @@ fail: kfree_skb(skb); return err; } +EXPORT_SYMBOL_GPL(ip6_fragment); static inline int ip6_rt_check(const struct rt6key *rt_key, const struct in6_addr *fl_addr, @@ -1256,6 +1262,8 @@ struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, st struct dst_entry *dst = NULL; int err; + if (!ipv6_mod_enabled()) + return ERR_PTR(-EAFNOSUPPORT); err = ip6_dst_lookup_tail(net, sk, &dst, fl6); if (err) return ERR_PTR(err); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f6a5d8c73af9..f76fb8a85452 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -576,6 +576,7 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, ndisc_send_skb(skb, daddr, src_addr); } +EXPORT_SYMBOL_GPL(ndisc_send_na); static void ndisc_send_unsol_na(struct net_device *dev) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 08deb18dcc85..19eb6b702227 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2655,6 +2655,7 @@ void ip6_route_input(struct sk_buff *skb) skb_dst_set_noref(skb, ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags)); } +EXPORT_SYMBOL_GPL(ip6_route_input); INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, @@ -3585,6 +3586,11 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, struct inet6_dev *idev = NULL; int err; + if (!ipv6_mod_enabled()) { + NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel"); + return -EAFNOSUPPORT; + } + fib6_nh->fib_nh_family = AF_INET6; #ifdef CONFIG_IPV6_ROUTER_PREF fib6_nh->last_probe = jiffies; From 29ae61b2fe7eeafc921a1481230aac4431520418 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 25 Mar 2026 13:08:47 +0100 Subject: [PATCH 0981/1561] drivers: net: drop ipv6_stub usage and use direct function calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As IPv6 is built-in only, the ipv6_stub infrastructure is no longer necessary. Convert all drivers currently utilizing ipv6_stub to make direct function calls. The fallback functions introduced previously will prevent linkage errors when CONFIG_IPV6 is disabled. Signed-off-by: Fernando Fernandez Mancera Tested-by: Ricardo B. Marlière Reviewed-by: Jason A. Donenfeld Reviewed-by: Antonio Quartulli Reviewed-by: Edward Cree Link: https://patch.msgid.link/20260325120928.15848-7-fmancera@suse.de Signed-off-by: Jakub Kicinski --- drivers/infiniband/core/addr.c | 3 +-- drivers/infiniband/sw/rxe/rxe_net.c | 6 +++--- .../ethernet/mellanox/mlx5/core/en/rep/neigh.c | 9 +++++---- .../net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 3 +-- .../mellanox/mlx5/core/en/tc_tun_encap.c | 2 +- .../mellanox/mlx5/core/en_accel/ipsec.c | 1 - .../net/ethernet/mellanox/mlx5/core/en_rep.c | 1 - drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 1 - .../net/ethernet/netronome/nfp/flower/action.c | 2 +- .../ethernet/netronome/nfp/flower/tunnel_conf.c | 7 +++---- drivers/net/ethernet/sfc/tc_counters.c | 2 +- drivers/net/ethernet/sfc/tc_encap_actions.c | 5 ++--- drivers/net/geneve.c | 1 - drivers/net/gtp.c | 2 +- drivers/net/ovpn/peer.c | 3 +-- drivers/net/ovpn/udp.c | 3 +-- drivers/net/usb/cdc_mbim.c | 17 +++++++++-------- drivers/net/vxlan/vxlan_core.c | 11 +++++------ drivers/net/vxlan/vxlan_multicast.c | 6 ++---- drivers/net/wireguard/socket.c | 3 +-- drivers/net/wireless/intel/ipw2x00/ipw2100.c | 2 +- net/bridge/br_arp_nd_proxy.c | 3 +-- 22 files changed, 40 insertions(+), 53 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 866746695712..48d4b06384ec 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include @@ -411,7 +410,7 @@ static int addr6_resolve(struct sockaddr *src_sock, fl6.saddr = src_in->sin6_addr; fl6.flowi6_oif = addr->bound_dev_if; - dst = ipv6_stub->ipv6_dst_lookup_flow(addr->net, NULL, &fl6, NULL); + dst = ip6_dst_lookup_flow(addr->net, NULL, &fl6, NULL); if (IS_ERR(dst)) return PTR_ERR(dst); diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 0bd0902b11f7..cbc646a30003 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -138,9 +138,9 @@ static struct dst_entry *rxe_find_route6(struct rxe_qp *qp, memcpy(&fl6.daddr, daddr, sizeof(*daddr)); fl6.flowi6_proto = IPPROTO_UDP; - ndst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(recv_sockets.sk6->sk), - recv_sockets.sk6->sk, &fl6, - NULL); + ndst = ip6_dst_lookup_flow(sock_net(recv_sockets.sk6->sk), + recv_sockets.sk6->sk, &fl6, + NULL); if (IS_ERR(ndst)) { rxe_dbg_qp(qp, "no route to %pI6\n", daddr); return NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c index d220b045b331..648f4521c096 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "neigh.h" #include "tc.h" #include "en_rep.h" @@ -18,8 +19,8 @@ static unsigned long mlx5e_rep_ipv6_interval(void) { - if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl) - return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME); + if (IS_ENABLED(CONFIG_IPV6) && ipv6_mod_enabled()) + return NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME); return ~0UL; } @@ -217,7 +218,7 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb, case NETEVENT_NEIGH_UPDATE: n = ptr; #if IS_ENABLED(CONFIG_IPV6) - if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) + if (n->tbl != &nd_tbl && n->tbl != &arp_tbl) #else if (n->tbl != &arp_tbl) #endif @@ -238,7 +239,7 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb, * done per device delay prob time parameter. */ #if IS_ENABLED(CONFIG_IPV6) - if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl)) + if (!p->dev || (p->tbl != &nd_tbl && p->tbl != &arp_tbl)) #else if (!p->dev || p->tbl != &arp_tbl) #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index a14f216048cd..de74dbfe7b20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -453,8 +453,7 @@ static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv, if (tunnel && tunnel->get_remote_ifindex) attr->fl.fl6.flowi6_oif = tunnel->get_remote_ifindex(dev); - dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(dev), NULL, &attr->fl.fl6, - NULL); + dst = ip6_dst_lookup_flow(dev_net(dev), NULL, &attr->fl.fl6, NULL); if (IS_ERR(dst)) return PTR_ERR(dst); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index bfd401bee9e8..8b827201935e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -402,7 +402,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) tbl = &arp_tbl; #if IS_ENABLED(CONFIG_IPV6) else if (m_neigh->family == AF_INET6) - tbl = ipv6_stub->nd_tbl; + tbl = &nd_tbl; #endif else return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 64e13747084e..a52e12c3c95a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -36,7 +36,6 @@ #include #include #include -#include #include "en.h" #include "eswitch.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 8992f0f7a870..ba6c0f38cc73 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -38,7 +38,6 @@ #include #include #include -#include #include "eswitch.h" #include "en.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 397a93584fd6..a9001d1c902f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index aca2a7417af3..ae2f8b31adfb 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -470,7 +470,7 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, flow.daddr = ip_tun->key.u.ipv6.dst; flow.flowi4_proto = IPPROTO_UDP; - dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow, NULL); + dst = ip6_dst_lookup_flow(net, NULL, &flow, NULL); if (!IS_ERR(dst)) { set_tun->ttl = ip6_dst_hoplimit(dst); dst_release(dst); diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 0cef0e2b85d0..ca30702f8878 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -650,7 +650,7 @@ static void nfp_tun_neigh_update(struct work_struct *work) flow6.daddr = *(struct in6_addr *)n->primary_key; if (!neigh_invalid) { struct dst_entry *dst; - /* Use ipv6_dst_lookup_flow to populate flow6->saddr + /* Use ip6_dst_lookup_flow to populate flow6->saddr * and other fields. This information is only needed * for new entries, lookup can be skipped when an entry * gets invalidated - as only the daddr is needed for @@ -730,7 +730,7 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, return NOTIFY_DONE; } #if IS_ENABLED(CONFIG_IPV6) - if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) + if (n->tbl != &nd_tbl && n->tbl != &arp_tbl) #else if (n->tbl != &arp_tbl) #endif @@ -815,8 +815,7 @@ void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb) flow.flowi6_proto = IPPROTO_UDP; #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) - dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(netdev), NULL, &flow, - NULL); + dst = ip6_dst_lookup_flow(dev_net(netdev), NULL, &flow, NULL); if (IS_ERR(dst)) goto fail_rcu_unlock; #else diff --git a/drivers/net/ethernet/sfc/tc_counters.c b/drivers/net/ethernet/sfc/tc_counters.c index d168282f30bf..b84235e93ffe 100644 --- a/drivers/net/ethernet/sfc/tc_counters.c +++ b/drivers/net/ethernet/sfc/tc_counters.c @@ -112,7 +112,7 @@ static void efx_tc_counter_work(struct work_struct *work) encap->neigh->egdev); else #if IS_ENABLED(CONFIG_IPV6) - n = neigh_lookup(ipv6_stub->nd_tbl, + n = neigh_lookup(&nd_tbl, &encap->neigh->dst_ip6, encap->neigh->egdev); #else diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.c b/drivers/net/ethernet/sfc/tc_encap_actions.c index da35705cc5e1..db222abef53b 100644 --- a/drivers/net/ethernet/sfc/tc_encap_actions.c +++ b/drivers/net/ethernet/sfc/tc_encap_actions.c @@ -149,8 +149,7 @@ static int efx_bind_neigh(struct efx_nic *efx, #if IS_ENABLED(CONFIG_IPV6) struct dst_entry *dst; - dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow6, - NULL); + dst = ip6_dst_lookup_flow(net, NULL, &flow6, NULL); rc = PTR_ERR_OR_ZERO(dst); if (rc) { NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for IPv6 encap"); @@ -531,7 +530,7 @@ static int efx_neigh_event(struct efx_nic *efx, struct neighbour *n) if (n->tbl == &arp_tbl) { keysize = sizeof(keys.dst_ip); #if IS_ENABLED(CONFIG_IPV6) - } else if (n->tbl == ipv6_stub->nd_tbl) { + } else if (n->tbl == &nd_tbl) { ipv6 = true; keysize = sizeof(keys.dst_ip6); #endif diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 01cdd06102e0..c6563367d382 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index e8949f556209..70b9e58b9b78 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -374,7 +374,7 @@ static struct rt6_info *ip6_route_output_gtp(struct net *net, fl6->saddr = *saddr; fl6->flowi6_proto = sk->sk_protocol; - dst = ipv6_stub->ipv6_dst_lookup_flow(net, sk, fl6, NULL); + dst = ip6_dst_lookup_flow(net, sk, fl6, NULL); if (IS_ERR(dst)) return ERR_PTR(-ENETUNREACH); diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c index 26b55d813f0e..c02dfab51a6e 100644 --- a/drivers/net/ovpn/peer.c +++ b/drivers/net/ovpn/peer.c @@ -827,8 +827,7 @@ static struct in6_addr ovpn_nexthop_from_rt6(struct ovpn_priv *ovpn, .daddr = dest, }; - entry = ipv6_stub->ipv6_dst_lookup_flow(dev_net(ovpn->dev), NULL, &fl, - NULL); + entry = ip6_dst_lookup_flow(dev_net(ovpn->dev), NULL, &fl, NULL); if (IS_ERR(entry)) { net_dbg_ratelimited("%s: no route to host %pI6c\n", netdev_name(ovpn->dev), &dest); diff --git a/drivers/net/ovpn/udp.c b/drivers/net/ovpn/udp.c index 272b535ecaad..059e896b4a2f 100644 --- a/drivers/net/ovpn/udp.c +++ b/drivers/net/ovpn/udp.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -251,7 +250,7 @@ static int ovpn_udp6_output(struct ovpn_peer *peer, struct ovpn_bind *bind, dst_cache_reset(cache); } - dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sk), sk, &fl, NULL); + dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl, NULL); if (IS_ERR(dst)) { ret = PTR_ERR(dst); net_dbg_ratelimited("%s: no route to host %pISpc: %d\n", diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index dbf01210b0e7..877fb0ed7d3d 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -20,7 +20,6 @@ #include #include #include -#include #include /* alternative VLAN for IP session 0 if not untagged */ @@ -302,6 +301,7 @@ error: return NULL; } +#if IS_ENABLED(CONFIG_IPV6) /* Some devices are known to send Neighbor Solicitation messages and * require Neighbor Advertisement replies. The IPv6 core will not * respond since IFF_NOARP is set, so we must handle them ourselves. @@ -342,12 +342,11 @@ static void do_neigh_solicit(struct usbnet *dev, u8 *buf, u16 tci) is_router = !!READ_ONCE(in6_dev->cnf.forwarding); in6_dev_put(in6_dev); - /* ipv6_stub != NULL if in6_dev_get returned an inet6_dev */ - ipv6_stub->ndisc_send_na(netdev, &iph->saddr, &msg->target, - is_router /* router */, - true /* solicited */, - false /* override */, - true /* inc_opt */); + ndisc_send_na(netdev, &iph->saddr, &msg->target, + is_router /* router */, + true /* solicited */, + false /* override */, + true /* inc_opt */); out: dev_put(netdev); } @@ -362,7 +361,7 @@ static bool is_neigh_solicit(u8 *buf, size_t len) msg->icmph.icmp6_code == 0 && msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION); } - +#endif /* IPV6 */ static struct sk_buff *cdc_mbim_process_dgram(struct usbnet *dev, u8 *buf, size_t len, u16 tci) { @@ -378,8 +377,10 @@ static struct sk_buff *cdc_mbim_process_dgram(struct usbnet *dev, u8 *buf, size_ proto = htons(ETH_P_IP); break; case 0x60: +#if IS_ENABLED(CONFIG_IPV6) if (is_neigh_solicit(buf, len)) do_neigh_solicit(dev, buf, tci); +#endif proto = htons(ETH_P_IPV6); break; default: diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 17c941aac32d..b5fbd03418b6 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -2045,7 +2044,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) ipv6_addr_is_multicast(&msg->target)) goto out; - n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, dev); + n = neigh_lookup(&nd_tbl, &msg->target, dev); if (n) { struct vxlan_rdst *rdst = NULL; @@ -2130,15 +2129,15 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) { struct ipv6hdr *pip6; - /* check if nd_tbl is not initiliazed due to - * ipv6.disable=1 set during boot + /* check if ipv6.disable=1 set during boot was set + * during booting so nd_tbl is not initialized */ - if (!ipv6_stub->nd_tbl) + if (!ipv6_mod_enabled()) return false; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) return false; pip6 = ipv6_hdr(skb); - n = neigh_lookup(ipv6_stub->nd_tbl, &pip6->daddr, dev); + n = neigh_lookup(&nd_tbl, &pip6->daddr, dev); if (!n && (vxlan->cfg.flags & VXLAN_F_L3MISS)) { union vxlan_addr ipa = { .sin6.sin6_addr = pip6->daddr, diff --git a/drivers/net/vxlan/vxlan_multicast.c b/drivers/net/vxlan/vxlan_multicast.c index a7f2d67dc61b..b0e80bca855c 100644 --- a/drivers/net/vxlan/vxlan_multicast.c +++ b/drivers/net/vxlan/vxlan_multicast.c @@ -39,8 +39,7 @@ int vxlan_igmp_join(struct vxlan_dev *vxlan, union vxlan_addr *rip, sk = sock6->sock->sk; lock_sock(sk); - ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex, - &ip->sin6.sin6_addr); + ret = ipv6_sock_mc_join(sk, ifindex, &ip->sin6.sin6_addr); release_sock(sk); #endif } @@ -73,8 +72,7 @@ int vxlan_igmp_leave(struct vxlan_dev *vxlan, union vxlan_addr *rip, sk = sock6->sock->sk; lock_sock(sk); - ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex, - &ip->sin6.sin6_addr); + ret = ipv6_sock_mc_drop(sk, ifindex, &ip->sin6.sin6_addr); release_sock(sk); #endif } diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c index 253488f8c00f..c362c78d908e 100644 --- a/drivers/net/wireguard/socket.c +++ b/drivers/net/wireguard/socket.c @@ -136,8 +136,7 @@ static int send6(struct wg_device *wg, struct sk_buff *skb, if (cache) dst_cache_reset(cache); } - dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl, - NULL); + dst = ip6_dst_lookup_flow(sock_net(sock), sock, &fl, NULL); if (IS_ERR(dst)) { ret = PTR_ERR(dst); net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c index 248a051da52d..c11428485dcc 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c @@ -4838,7 +4838,7 @@ static int ipw2100_system_config(struct ipw2100_priv *priv, int batch_mode) /* If IPv6 is configured in the kernel then we don't want to filter out all * of the multicast packets as IPv6 needs some. */ -#if !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) +#if !defined(CONFIG_IPV6) cmd.host_command = ADD_MULTICAST; cmd.host_command_sequence = 0; cmd.host_command_length = 0; diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index 1e2b51769eec..c06386eda47f 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c @@ -17,7 +17,6 @@ #include #include #include -#include #if IS_ENABLED(CONFIG_IPV6) #include #endif @@ -455,7 +454,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, return; } - n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, vlandev); + n = neigh_lookup(&nd_tbl, &msg->target, vlandev); if (n) { struct net_bridge_fdb_entry *f; From d98adfbdd5c014f73d5f5e6b43735cc1b2d9450a Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 25 Mar 2026 13:08:48 +0100 Subject: [PATCH 0982/1561] ipv4: drop ipv6_stub usage and use direct function calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As IPv6 is built-in only, the ipv6_stub infrastructure is no longer necessary. The IPv4 stack interacts with IPv6 mainly to support IPv4 routes with IPv6 next-hops (RFC 8950). Convert all these cross-family calls from ipv6_stub to direct function calls. The fallback functions introduced previously will prevent linkage errors when CONFIG_IPV6 is disabled. Signed-off-by: Fernando Fernandez Mancera Tested-by: Ricardo B. Marlière Link: https://patch.msgid.link/20260325120928.15848-8-fmancera@suse.de Signed-off-by: Jakub Kicinski --- net/ipv4/fib_semantics.c | 16 ++++++++-------- net/ipv4/icmp.c | 2 +- net/ipv4/nexthop.c | 28 +++++++++++++++------------- net/ipv4/route.c | 4 ++-- net/ipv4/udp.c | 7 ++----- 5 files changed, 28 insertions(+), 29 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 01cb587866d8..3e8fadc28798 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -585,9 +585,8 @@ static int fib_detect_death(struct fib_info *fi, int order, if (likely(nhc->nhc_gw_family == AF_INET)) n = neigh_lookup(&arp_tbl, &nhc->nhc_gw.ipv4, nhc->nhc_dev); - else if (nhc->nhc_gw_family == AF_INET6) - n = neigh_lookup(ipv6_stub->nd_tbl, &nhc->nhc_gw.ipv6, - nhc->nhc_dev); + else if (IS_ENABLED(CONFIG_IPV6) && nhc->nhc_gw_family == AF_INET6) + n = neigh_lookup(&nd_tbl, &nhc->nhc_gw.ipv6, nhc->nhc_dev); else n = NULL; @@ -1083,7 +1082,7 @@ static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh, struct fib6_nh fib6_nh = {}; int err; - err = ipv6_stub->fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack); + err = fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack); if (!err) { nh->fib_nh_dev = fib6_nh.fib_nh_dev; netdev_hold(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, @@ -1091,7 +1090,7 @@ static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh, nh->fib_nh_oif = nh->fib_nh_dev->ifindex; nh->fib_nh_scope = RT_SCOPE_LINK; - ipv6_stub->fib6_nh_release(&fib6_nh); + fib6_nh_release(&fib6_nh); } return err; @@ -2147,9 +2146,10 @@ static bool fib_good_nh(const struct fib_nh *nh) if (likely(nh->fib_nh_gw_family == AF_INET)) n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev, (__force u32)nh->fib_nh_gw4); - else if (nh->fib_nh_gw_family == AF_INET6) - n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, - &nh->fib_nh_gw6); + else if (IS_ENABLED(CONFIG_IPV6) && + nh->fib_nh_gw_family == AF_INET6) + n = __ipv6_neigh_lookup_noref(nh->fib_nh_dev, + &nh->fib_nh_gw6); else n = NULL; if (n) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 223e5efa5ac7..fb397fbb28fc 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1344,7 +1344,7 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) case ICMP_AFI_IP6: if (iio->ident.addr.ctype3_hdr.addrlen != sizeof(struct in6_addr)) goto send_mal_query; - dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev); + dev = ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev); dev_hold(dev); break; #endif diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index c942f1282236..9703e67d552a 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include @@ -510,7 +510,7 @@ static void nexthop_free_single(struct nexthop *nh) fib_nh_release(nh->net, &nhi->fib_nh); break; case AF_INET6: - ipv6_stub->fib6_nh_release(&nhi->fib6_nh); + fib6_nh_release(&nhi->fib6_nh); break; } kfree(nhi); @@ -1367,7 +1367,7 @@ static bool ipv6_good_nh(const struct fib6_nh *nh) rcu_read_lock(); - n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6); + n = __ipv6_neigh_lookup_noref(nh->fib_nh_dev, &nh->fib_nh_gw6); if (n) state = READ_ONCE(n->nud_state); @@ -1401,7 +1401,7 @@ static bool nexthop_is_good_nh(const struct nexthop *nh) case AF_INET: return ipv4_good_nh(&nhi->fib_nh); case AF_INET6: - return ipv6_good_nh(&nhi->fib6_nh); + return IS_ENABLED(CONFIG_IPV6) && ipv6_good_nh(&nhi->fib6_nh); } return false; @@ -2151,8 +2151,8 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) fib6_info_hold(f6i); spin_unlock_bh(&nh->lock); - ipv6_stub->ip6_del_rt(net, f6i, - !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)); + ip6_del_rt(net, f6i, + !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)); spin_lock_bh(&nh->lock); } @@ -2208,8 +2208,11 @@ static void nh_rt_cache_flush(struct net *net, struct nexthop *nh, if (!list_empty(&nh->fi_list)) rt_cache_flush(net); - list_for_each_entry(f6i, &nh->f6i_list, nh_list) - ipv6_stub->fib6_update_sernum(net, f6i); + list_for_each_entry(f6i, &nh->f6i_list, nh_list) { + spin_lock_bh(&f6i->fib6_table->tb6_lock); + fib6_update_sernum_upto_root(net, f6i); + spin_unlock_bh(&f6i->fib6_table->tb6_lock); + } /* if an IPv6 group was replaced, we have to release all old * dsts to make sure all refcounts are released @@ -2223,7 +2226,7 @@ static void nh_rt_cache_flush(struct net *net, struct nexthop *nh, struct nh_info *nhi = rtnl_dereference(nhge->nh->nh_info); if (nhi->family == AF_INET6) - ipv6_stub->fib6_nh_release_dsts(&nhi->fib6_nh); + fib6_nh_release_dsts(&nhi->fib6_nh); } } @@ -2504,7 +2507,7 @@ static void __nexthop_replace_notify(struct net *net, struct nexthop *nh, } list_for_each_entry(f6i, &nh->f6i_list, nh_list) - ipv6_stub->fib6_rt_update(net, f6i, info); + fib6_rt_update(net, f6i, info); } /* send RTM_NEWROUTE with REPLACE flag set for all FIB entries @@ -2877,13 +2880,12 @@ static int nh_create_ipv6(struct net *net, struct nexthop *nh, fib6_cfg.fc_flags |= RTF_GATEWAY; /* sets nh_dev if successful */ - err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL, - extack); + err = fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL, extack); if (err) { /* IPv6 is not enabled, don't call fib6_nh_release */ if (err == -EAFNOSUPPORT) goto out; - ipv6_stub->fib6_nh_release(fib6_nh); + fib6_nh_release(fib6_nh); } else { nh->nh_flags = fib6_nh->fib_nh_flags; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 463236e0dc2d..bc1296f0ea69 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -446,8 +446,8 @@ static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr) if (rt->rt_gw_family == AF_INET) { pkey = (const __be32 *)&rt->rt_gw4; - } else if (rt->rt_gw_family == AF_INET6) { - return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6); + } else if (IS_ENABLED(CONFIG_IPV6) && rt->rt_gw_family == AF_INET6) { + return __ipv6_confirm_neigh(dev, &rt->rt_gw6); } else if (!daddr || (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e7b93b874f34..ab415de32443 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -118,9 +118,6 @@ #include #include #include -#if IS_ENABLED(CONFIG_IPV6) -#include -#endif #include struct udp_table udp_table __read_mostly; @@ -2855,7 +2852,7 @@ static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family, if (udp_test_bit(GRO_ENABLED, sk) && encap_type == UDP_ENCAP_ESPINUDP) { if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6) - new_gro_receive = ipv6_stub->xfrm6_gro_udp_encap_rcv; + new_gro_receive = xfrm6_gro_udp_encap_rcv; else new_gro_receive = xfrm4_gro_udp_encap_rcv; @@ -2927,7 +2924,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) WRITE_ONCE(up->encap_rcv, - ipv6_stub->xfrm6_udp_encap_rcv); + xfrm6_udp_encap_rcv); else #endif WRITE_ONCE(up->encap_rcv, From d76f6b170a10414a9c674b6db4bd0473b1b30050 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 25 Mar 2026 13:08:49 +0100 Subject: [PATCH 0983/1561] net: convert remaining ipv6_stub users to direct function calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As IPv6 is built-in only, the ipv6_stub infrastructure is no longer necessary. Convert remaining ipv6_stub users to make direct function calls. The fallback functions introduced previously will prevent linkage errors when CONFIG_IPV6 is disabled. Signed-off-by: Fernando Fernandez Mancera Tested-by: Ricardo B. Marlière Link: https://patch.msgid.link/20260325120928.15848-9-fmancera@suse.de Signed-off-by: Jakub Kicinski --- include/net/udp_tunnel.h | 2 +- net/mpls/af_mpls.c | 3 +-- net/openvswitch/actions.c | 3 ++- net/sched/sch_frag.c | 4 ++-- net/tipc/udp_media.c | 9 +++------ net/xfrm/espintcp.c | 5 +---- net/xfrm/xfrm_nat_keepalive.c | 4 ++-- net/xfrm/xfrm_output.c | 3 +-- 8 files changed, 13 insertions(+), 20 deletions(-) diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index fc1fc43345b5..40089fc4c1ba 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -230,7 +230,7 @@ static inline void udp_tunnel_encap_enable(struct sock *sk) #if IS_ENABLED(CONFIG_IPV6) if (READ_ONCE(sk->sk_family) == PF_INET6) - ipv6_stub->udpv6_encap_enable(); + udpv6_encap_enable(); #endif udp_encap_enable(); } diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index b32311f5cbf7..58a5bd69ee61 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -24,7 +24,6 @@ #if IS_ENABLED(CONFIG_IPV6) #include #endif -#include #include #include "internal.h" @@ -642,7 +641,7 @@ static struct net_device *inet6_fib_lookup_dev(struct net *net, memset(&fl6, 0, sizeof(fl6)); memcpy(&fl6.daddr, addr, sizeof(struct in6_addr)); - dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL); + dst = ip6_dst_lookup_flow(net, NULL, &fl6, NULL); if (IS_ERR(dst)) return ERR_CAST(dst); diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 792ca44a461d..140388a18ae0 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -810,7 +811,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, skb_dst_set_noref(skb, &ovs_rt.dst); IP6CB(skb)->frag_max_size = mru; - ipv6_stub->ipv6_fragment(net, skb->sk, skb, ovs_vport_output); + ip6_fragment(net, skb->sk, skb, ovs_vport_output); refdst_drop(orig_dst); } else { WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.", diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c index d1d87dce7f3f..75ee52750919 100644 --- a/net/sched/sch_frag.c +++ b/net/sched/sch_frag.c @@ -6,6 +6,7 @@ #include #include #include +#include struct sch_frag_data { unsigned long dst; @@ -127,8 +128,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb, skb_dst_set_noref(skb, &sch_frag_rt.dst); IP6CB(skb)->frag_max_size = mru; - ret = ipv6_stub->ipv6_fragment(net, skb->sk, skb, - sch_frag_xmit); + ret = ip6_fragment(net, skb->sk, skb, sch_frag_xmit); local_unlock_nested_bh(&sch_frag_data_storage.bh_lock); refdst_drop(orig_dst); } else { diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 2b8e385d1e51..2c66b356025a 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -44,7 +44,6 @@ #include #include #include -#include #include #include "core.h" #include "addr.h" @@ -207,9 +206,8 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, .saddr = src->ipv6, .flowi6_proto = IPPROTO_UDP }; - ndst = ipv6_stub->ipv6_dst_lookup_flow(net, - ub->ubsock->sk, - &fl6, NULL); + ndst = ip6_dst_lookup_flow(net, ub->ubsock->sk, + &fl6, NULL); if (IS_ERR(ndst)) { err = PTR_ERR(ndst); goto tx_error; @@ -418,8 +416,7 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote) #if IS_ENABLED(CONFIG_IPV6) } else { lock_sock(sk); - err = ipv6_stub->ipv6_sock_mc_join(sk, ub->ifindex, - &remote->ipv6); + err = ipv6_sock_mc_join(sk, ub->ifindex, &remote->ipv6); release_sock(sk); #endif } diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index 998832419097..a2756186e13a 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -7,9 +7,6 @@ #include #include #include -#if IS_ENABLED(CONFIG_IPV6) -#include -#endif #include static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb, @@ -43,7 +40,7 @@ static void handle_esp(struct sk_buff *skb, struct sock *sk) local_bh_disable(); #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) - ipv6_stub->xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP); + xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP); else #endif xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP); diff --git a/net/xfrm/xfrm_nat_keepalive.c b/net/xfrm/xfrm_nat_keepalive.c index 1856beee0149..458931062a04 100644 --- a/net/xfrm/xfrm_nat_keepalive.c +++ b/net/xfrm/xfrm_nat_keepalive.c @@ -98,14 +98,14 @@ static int nat_keepalive_send_ipv6(struct sk_buff *skb, local_lock_nested_bh(&nat_keepalive_sk_ipv6.bh_lock); sk = this_cpu_read(nat_keepalive_sk_ipv6.sock); sock_net_set(sk, net); - dst = ipv6_stub->ipv6_dst_lookup_flow(net, sk, &fl6, NULL); + dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); if (IS_ERR(dst)) { local_unlock_nested_bh(&nat_keepalive_sk_ipv6.bh_lock); return PTR_ERR(dst); } skb_dst_set(skb, dst); - err = ipv6_stub->ip6_xmit(sk, skb, &fl6, skb->mark, NULL, 0, 0); + err = ip6_xmit(sk, skb, &fl6, skb->mark, NULL, 0, 0); sock_net_set(sk, &init_net); local_unlock_nested_bh(&nat_keepalive_sk_ipv6.bh_lock); return err; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 54222fcbd7fd..a9652b422f51 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -20,7 +20,6 @@ #if IS_ENABLED(CONFIG_IPV6) #include -#include #endif #include "xfrm_inout.h" @@ -900,7 +899,7 @@ int xfrm6_tunnel_check_size(struct sk_buff *skb) skb->protocol = htons(ETH_P_IPV6); if (xfrm6_local_dontfrag(sk)) - ipv6_stub->xfrm6_local_rxpmtu(skb, mtu); + xfrm6_local_rxpmtu(skb, mtu); else if (sk) xfrm_local_error(skb, mtu); else From ad84b1eefe28cee96c572b84bfa4f0fbfd425b68 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 25 Mar 2026 13:08:50 +0100 Subject: [PATCH 0984/1561] bpf: remove ipv6_bpf_stub completely and use direct function calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As IPv6 is built-in only, the ipv6_bpf_stub can be removed completely. Convert all ipv6_bpf_stub usage to direct function calls instead. The fallback functions introduced previously will prevent linkage errors when CONFIG_IPV6 is disabled. Signed-off-by: Fernando Fernandez Mancera Tested-by: Ricardo B. Marlière Acked-by: Daniel Borkmann Reviewed-by: Martin KaFai Lau Link: https://patch.msgid.link/20260325120928.15848-10-fmancera@suse.de Signed-off-by: Jakub Kicinski --- include/net/ipv6.h | 2 ++ include/net/ipv6_stubs.h | 21 --------------- net/core/filter.c | 58 +++++++++++++++++----------------------- net/core/lwt_bpf.c | 10 ++++--- net/ipv6/af_inet6.c | 13 ++------- 5 files changed, 35 insertions(+), 69 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index f99f273341f0..d042afe7a245 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1149,6 +1149,8 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu); void inet6_cleanup_sock(struct sock *sk); void inet6_sock_destruct(struct sock *sk); int inet6_release(struct socket *sock); +int __inet6_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len, + u32 flags); int inet6_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len); int inet6_bind_sk(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len); int inet6_getname(struct socket *sock, struct sockaddr *uaddr, diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 907681cecde8..dc708d9eca7a 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -77,25 +77,4 @@ struct ipv6_stub { __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority); }; extern const struct ipv6_stub *ipv6_stub __read_mostly; - -/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */ -struct ipv6_bpf_stub { - int (*inet6_bind)(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len, - u32 flags); - struct sock *(*udp6_lib_lookup)(const struct net *net, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, __be16 dport, - int dif, int sdif, struct sk_buff *skb); - int (*ipv6_setsockopt)(struct sock *sk, int level, int optname, - sockptr_t optval, unsigned int optlen); - int (*ipv6_getsockopt)(struct sock *sk, int level, int optname, - sockptr_t optval, sockptr_t optlen); - int (*ipv6_dev_get_saddr)(struct net *net, - const struct net_device *dst_dev, - const struct in6_addr *daddr, - unsigned int prefs, - struct in6_addr *saddr); -}; -extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; - #endif diff --git a/net/core/filter.c b/net/core/filter.c index c56821afaa0f..d55525cc5540 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -73,7 +73,6 @@ #include #include #include -#include #include #include #include @@ -2279,7 +2278,7 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev, .saddr = ip6h->saddr, }; - dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL); + dst = ip6_dst_lookup_flow(net, NULL, &fl6, NULL); if (IS_ERR(dst)) goto out_drop; @@ -5577,12 +5576,12 @@ static int sol_ipv6_sockopt(struct sock *sk, int optname, } if (getopt) - return ipv6_bpf_stub->ipv6_getsockopt(sk, SOL_IPV6, optname, - KERNEL_SOCKPTR(optval), - KERNEL_SOCKPTR(optlen)); + return do_ipv6_getsockopt(sk, SOL_IPV6, optname, + KERNEL_SOCKPTR(optval), + KERNEL_SOCKPTR(optlen)); - return ipv6_bpf_stub->ipv6_setsockopt(sk, SOL_IPV6, optname, - KERNEL_SOCKPTR(optval), *optlen); + return do_ipv6_setsockopt(sk, SOL_IPV6, optname, + KERNEL_SOCKPTR(optval), *optlen); } static int __bpf_setsockopt(struct sock *sk, int level, int optname, @@ -5981,9 +5980,6 @@ static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = { .arg2_type = ARG_ANYTHING, }; -const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; -EXPORT_SYMBOL_GPL(ipv6_bpf_stub); - BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr, int, addr_len) { @@ -6007,11 +6003,9 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr, return err; if (((struct sockaddr_in6 *)addr)->sin6_port == htons(0)) flags |= BIND_FORCE_ADDRESS_NO_PORT; - /* ipv6_bpf_stub cannot be NULL, since it's called from - * bpf_cgroup_inet6_connect hook and ipv6 is already loaded - */ - return ipv6_bpf_stub->inet6_bind(sk, (struct sockaddr_unsized *)addr, - addr_len, flags); + + return __inet6_bind(sk, (struct sockaddr_unsized *)addr, + addr_len, flags); #endif /* CONFIG_IPV6 */ } #endif /* CONFIG_INET */ @@ -6099,9 +6093,9 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, u32 mtu) static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, u32 flags, bool check_mtu) { + struct neighbour *neigh = NULL; struct fib_nh_common *nhc; struct in_device *in_dev; - struct neighbour *neigh; struct net_device *dev; struct fib_result res; struct flowi4 fl4; @@ -6221,8 +6215,8 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (likely(nhc->nhc_gw_family != AF_INET6)) neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst); - else - neigh = __ipv6_neigh_lookup_noref_stub(dev, params->ipv6_dst); + else if (IS_ENABLED(CONFIG_IPV6)) + neigh = __ipv6_neigh_lookup_noref(dev, params->ipv6_dst); if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID)) return BPF_FIB_LKUP_RET_NO_NEIGH; @@ -6290,12 +6284,11 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->tbid = 0; } - tb = ipv6_stub->fib6_get_table(net, tbid); + tb = fib6_get_table(net, tbid); if (unlikely(!tb)) return BPF_FIB_LKUP_RET_NOT_FWDED; - err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res, - strict); + err = fib6_table_lookup(net, tb, oif, &fl6, &res, strict); } else { if (flags & BPF_FIB_LOOKUP_MARK) fl6.flowi6_mark = params->mark; @@ -6305,7 +6298,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, fl6.flowi6_tun_key.tun_id = 0; fl6.flowi6_uid = sock_net_uid(net, NULL); - err = ipv6_stub->fib6_lookup(net, oif, &fl6, &res, strict); + err = fib6_lookup(net, oif, &fl6, &res, strict); } if (unlikely(err || IS_ERR_OR_NULL(res.f6i) || @@ -6326,11 +6319,11 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, return BPF_FIB_LKUP_RET_NOT_FWDED; } - ipv6_stub->fib6_select_path(net, &res, &fl6, fl6.flowi6_oif, - fl6.flowi6_oif != 0, NULL, strict); + fib6_select_path(net, &res, &fl6, fl6.flowi6_oif, + fl6.flowi6_oif != 0, NULL, strict); if (check_mtu) { - mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src); + mtu = ip6_mtu_from_fib6(&res, dst, src); if (params->tot_len > mtu) { params->mtu_result = mtu; /* union with tot_len */ return BPF_FIB_LKUP_RET_FRAG_NEEDED; @@ -6351,9 +6344,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (res.f6i->fib6_prefsrc.plen) { *src = res.f6i->fib6_prefsrc.addr; } else { - err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev, - &fl6.daddr, 0, - src); + err = ipv6_dev_get_saddr(net, dev, &fl6.daddr, 0, src); if (err) return BPF_FIB_LKUP_RET_NO_SRC_ADDR; } @@ -6365,7 +6356,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is * not needed here. */ - neigh = __ipv6_neigh_lookup_noref_stub(dev, dst); + neigh = __ipv6_neigh_lookup_noref(dev, dst); if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID)) return BPF_FIB_LKUP_RET_NO_NEIGH; memcpy(params->dmac, neigh->ha, ETH_ALEN); @@ -6900,11 +6891,10 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, src6, tuple->ipv6.sport, dst6, ntohs(tuple->ipv6.dport), dif, sdif, &refcounted); - else if (likely(ipv6_bpf_stub)) - sk = ipv6_bpf_stub->udp6_lib_lookup(net, - src6, tuple->ipv6.sport, - dst6, tuple->ipv6.dport, - dif, sdif, NULL); + else if (likely(ipv6_mod_enabled())) + sk = __udp6_lib_lookup(net, src6, tuple->ipv6.sport, + dst6, tuple->ipv6.dport, + dif, sdif, NULL); #endif } diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 9f40be0c3e71..f71ef82a5f3d 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -13,7 +13,6 @@ #include #include #include -#include struct bpf_lwt_prog { struct bpf_prog *prog; @@ -103,7 +102,12 @@ static int bpf_lwt_input_reroute(struct sk_buff *skb) dev_put(dev); } else if (skb->protocol == htons(ETH_P_IPV6)) { skb_dst_drop(skb); - err = ipv6_stub->ipv6_route_input(skb); + if (IS_ENABLED(CONFIG_IPV6)) { + ip6_route_input(skb); + err = skb_dst(skb)->error; + } else { + err = -EAFNOSUPPORT; + } } else { err = -EAFNOSUPPORT; } @@ -233,7 +237,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb) fl6.daddr = iph6->daddr; fl6.saddr = iph6->saddr; - dst = ipv6_stub->ipv6_dst_lookup_flow(net, skb->sk, &fl6, NULL); + dst = ip6_dst_lookup_flow(net, skb->sk, &fl6, NULL); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto err; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index bb29b29fdcfb..07ae6ea7743a 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -264,8 +264,8 @@ out_sk_release: goto out; } -static int __inet6_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len, - u32 flags) +int __inet6_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len, + u32 flags) { struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr; struct inet_sock *inet = inet_sk(sk); @@ -1032,14 +1032,6 @@ static const struct ipv6_stub ipv6_stub_impl = { .ip6_xmit = ip6_xmit, }; -static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { - .inet6_bind = __inet6_bind, - .udp6_lib_lookup = __udp6_lib_lookup, - .ipv6_setsockopt = do_ipv6_setsockopt, - .ipv6_getsockopt = do_ipv6_getsockopt, - .ipv6_dev_get_saddr = ipv6_dev_get_saddr, -}; - static int __init inet6_init(void) { struct list_head *r; @@ -1199,7 +1191,6 @@ static int __init inet6_init(void) /* ensure that ipv6 stubs are visible only after ipv6 is ready */ wmb(); ipv6_stub = &ipv6_stub_impl; - ipv6_bpf_stub = &ipv6_bpf_stub_impl; out: return err; From 964870b4b9012bd42f29336f32b5c8be8ef3c00d Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 25 Mar 2026 13:08:51 +0100 Subject: [PATCH 0985/1561] ipv6: remove ipv6_stub infrastructure completely MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As IPv6 is built-in only and there are no more users of ipv6_stub, the ipv6_stub is now entirely obsolete. Remove all the code related to the definition, initialization and usage. Signed-off-by: Fernando Fernandez Mancera Tested-by: Ricardo B. Marlière Link: https://patch.msgid.link/20260325120928.15848-11-fmancera@suse.de Signed-off-by: Jakub Kicinski --- include/net/ip6_fib.h | 1 - include/net/ipv6_stubs.h | 80 ---------------------------------- include/net/ndisc.h | 25 +---------- include/net/udp_tunnel.h | 1 - net/ipv6/addrconf_core.c | 91 --------------------------------------- net/ipv6/af_inet6.c | 40 ----------------- net/ipv6/ip6_fib.c | 8 ---- net/ipv6/ip6_udp_tunnel.c | 3 +- 8 files changed, 3 insertions(+), 246 deletions(-) delete mode 100644 include/net/ipv6_stubs.h diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 20e1231262d6..9cd27e1b9b69 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -621,7 +621,6 @@ static inline void fib6_update_sernum_upto_root(struct net *net, struct fib6_inf { } #endif -void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i); void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val); static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric) diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h deleted file mode 100644 index dc708d9eca7a..000000000000 --- a/include/net/ipv6_stubs.h +++ /dev/null @@ -1,80 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _IPV6_STUBS_H -#define _IPV6_STUBS_H - -#include -#include -#include -#include -#include -#include -#include -#include - -/* structs from net/ip6_fib.h */ -struct fib6_info; -struct fib6_nh; -struct fib6_config; -struct fib6_result; - -/* This is ugly, ideally these symbols should be built - * into the core kernel. - */ -struct ipv6_stub { - int (*ipv6_sock_mc_join)(struct sock *sk, int ifindex, - const struct in6_addr *addr); - int (*ipv6_sock_mc_drop)(struct sock *sk, int ifindex, - const struct in6_addr *addr); - struct dst_entry *(*ipv6_dst_lookup_flow)(struct net *net, - const struct sock *sk, - struct flowi6 *fl6, - const struct in6_addr *final_dst); - int (*ipv6_route_input)(struct sk_buff *skb); - - struct fib6_table *(*fib6_get_table)(struct net *net, u32 id); - int (*fib6_lookup)(struct net *net, int oif, struct flowi6 *fl6, - struct fib6_result *res, int flags); - int (*fib6_table_lookup)(struct net *net, struct fib6_table *table, - int oif, struct flowi6 *fl6, - struct fib6_result *res, int flags); - void (*fib6_select_path)(const struct net *net, struct fib6_result *res, - struct flowi6 *fl6, int oif, bool oif_match, - const struct sk_buff *skb, int strict); - u32 (*ip6_mtu_from_fib6)(const struct fib6_result *res, - const struct in6_addr *daddr, - const struct in6_addr *saddr); - - int (*fib6_nh_init)(struct net *net, struct fib6_nh *fib6_nh, - struct fib6_config *cfg, gfp_t gfp_flags, - struct netlink_ext_ack *extack); - void (*fib6_nh_release)(struct fib6_nh *fib6_nh); - void (*fib6_nh_release_dsts)(struct fib6_nh *fib6_nh); - void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt); - int (*ip6_del_rt)(struct net *net, struct fib6_info *rt, bool skip_notify); - void (*fib6_rt_update)(struct net *net, struct fib6_info *rt, - struct nl_info *info); - - void (*udpv6_encap_enable)(void); - void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr, - const struct in6_addr *solicited_addr, - bool router, bool solicited, bool override, bool inc_opt); -#if IS_ENABLED(CONFIG_XFRM) - void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu); - int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb); - struct sk_buff *(*xfrm6_gro_udp_encap_rcv)(struct sock *sk, - struct list_head *head, - struct sk_buff *skb); - int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi, - int encap_type); -#endif - struct neigh_table *nd_tbl; - - int (*ipv6_fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, - int (*output)(struct net *, struct sock *, struct sk_buff *)); - struct net_device *(*ipv6_dev_find)(struct net *net, const struct in6_addr *addr, - struct net_device *dev); - int (*ip6_xmit)(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, - __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority); -}; -extern const struct ipv6_stub *ipv6_stub __read_mostly; -#endif diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 19e2a177bd29..3da1a6f8d3f9 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -2,8 +2,6 @@ #ifndef _NDISC_H #define _NDISC_H -#include - /* * ICMP codes for neighbour discovery messages */ @@ -359,14 +357,6 @@ static inline struct neighbour *__ipv6_neigh_lookup_noref(struct net_device *dev return ___neigh_lookup_noref(&nd_tbl, neigh_key_eq128, ndisc_hashfn, pkey, dev); } -static inline -struct neighbour *__ipv6_neigh_lookup_noref_stub(struct net_device *dev, - const void *pkey) -{ - return ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128, - ndisc_hashfn, pkey, dev); -} - static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, const void *pkey) { struct neighbour *n; @@ -391,25 +381,13 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev, rcu_read_unlock(); } -static inline void __ipv6_confirm_neigh_stub(struct net_device *dev, - const void *pkey) -{ - struct neighbour *n; - - rcu_read_lock(); - n = __ipv6_neigh_lookup_noref_stub(dev, pkey); - neigh_confirm(n); - rcu_read_unlock(); -} - -/* uses ipv6_stub and is meant for use outside of IPv6 core */ static inline struct neighbour *ip_neigh_gw6(struct net_device *dev, const void *addr) { #if IS_ENABLED(CONFIG_IPV6) struct neighbour *neigh; - neigh = __ipv6_neigh_lookup_noref_stub(dev, addr); + neigh = __ipv6_neigh_lookup_noref(dev, addr); if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, addr, dev, false); @@ -438,6 +416,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr); + void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, bool router, bool solicited, bool override, bool inc_opt); diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 40089fc4c1ba..47c23d4a1740 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -7,7 +7,6 @@ #if IS_ENABLED(CONFIG_IPV6) #include -#include #endif #define UDP_TUNNEL_PARTIAL_FEATURES NETIF_F_GSO_ENCAP_ALL diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index c008d21925d7..fa27a90ab3cd 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -6,7 +6,6 @@ #include #include -#include #include #include @@ -129,96 +128,6 @@ int inet6addr_validator_notifier_call_chain(unsigned long val, void *v) } EXPORT_SYMBOL(inet6addr_validator_notifier_call_chain); -static struct dst_entry *eafnosupport_ipv6_dst_lookup_flow(struct net *net, - const struct sock *sk, - struct flowi6 *fl6, - const struct in6_addr *final_dst) -{ - return ERR_PTR(-EAFNOSUPPORT); -} - -static int eafnosupport_ipv6_route_input(struct sk_buff *skb) -{ - return -EAFNOSUPPORT; -} - -static struct fib6_table *eafnosupport_fib6_get_table(struct net *net, u32 id) -{ - return NULL; -} - -static int -eafnosupport_fib6_table_lookup(struct net *net, struct fib6_table *table, - int oif, struct flowi6 *fl6, - struct fib6_result *res, int flags) -{ - return -EAFNOSUPPORT; -} - -static int -eafnosupport_fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, - struct fib6_result *res, int flags) -{ - return -EAFNOSUPPORT; -} - -static void -eafnosupport_fib6_select_path(const struct net *net, struct fib6_result *res, - struct flowi6 *fl6, int oif, bool have_oif_match, - const struct sk_buff *skb, int strict) -{ -} - -static u32 -eafnosupport_ip6_mtu_from_fib6(const struct fib6_result *res, - const struct in6_addr *daddr, - const struct in6_addr *saddr) -{ - return 0; -} - -static int eafnosupport_fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, - struct fib6_config *cfg, gfp_t gfp_flags, - struct netlink_ext_ack *extack) -{ - NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel"); - return -EAFNOSUPPORT; -} - -static int eafnosupport_ip6_del_rt(struct net *net, struct fib6_info *rt, - bool skip_notify) -{ - return -EAFNOSUPPORT; -} - -static int eafnosupport_ipv6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, - int (*output)(struct net *, struct sock *, struct sk_buff *)) -{ - kfree_skb(skb); - return -EAFNOSUPPORT; -} - -static struct net_device *eafnosupport_ipv6_dev_find(struct net *net, const struct in6_addr *addr, - struct net_device *dev) -{ - return ERR_PTR(-EAFNOSUPPORT); -} - -const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { - .ipv6_dst_lookup_flow = eafnosupport_ipv6_dst_lookup_flow, - .ipv6_route_input = eafnosupport_ipv6_route_input, - .fib6_get_table = eafnosupport_fib6_get_table, - .fib6_table_lookup = eafnosupport_fib6_table_lookup, - .fib6_lookup = eafnosupport_fib6_lookup, - .fib6_select_path = eafnosupport_fib6_select_path, - .ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6, - .fib6_nh_init = eafnosupport_fib6_nh_init, - .ip6_del_rt = eafnosupport_ip6_del_rt, - .ipv6_fragment = eafnosupport_ipv6_fragment, - .ipv6_dev_find = eafnosupport_ipv6_dev_find, -}; -EXPORT_SYMBOL_GPL(ipv6_stub); - /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ const struct in6_addr in6addr_loopback __aligned(BITS_PER_LONG/8) = IN6ADDR_LOOPBACK_INIT; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 07ae6ea7743a..33abd8d8cd7d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -51,7 +51,6 @@ #include #include #include -#include #include #ifdef CONFIG_IPV6_TUNNEL #include @@ -996,42 +995,6 @@ static struct pernet_operations inet6_net_ops = { .exit = inet6_net_exit, }; -static int ipv6_route_input(struct sk_buff *skb) -{ - ip6_route_input(skb); - return skb_dst(skb)->error; -} - -static const struct ipv6_stub ipv6_stub_impl = { - .ipv6_sock_mc_join = ipv6_sock_mc_join, - .ipv6_sock_mc_drop = ipv6_sock_mc_drop, - .ipv6_dst_lookup_flow = ip6_dst_lookup_flow, - .ipv6_route_input = ipv6_route_input, - .fib6_get_table = fib6_get_table, - .fib6_table_lookup = fib6_table_lookup, - .fib6_lookup = fib6_lookup, - .fib6_select_path = fib6_select_path, - .ip6_mtu_from_fib6 = ip6_mtu_from_fib6, - .fib6_nh_init = fib6_nh_init, - .fib6_nh_release = fib6_nh_release, - .fib6_nh_release_dsts = fib6_nh_release_dsts, - .fib6_update_sernum = fib6_update_sernum_stub, - .fib6_rt_update = fib6_rt_update, - .ip6_del_rt = ip6_del_rt, - .udpv6_encap_enable = udpv6_encap_enable, - .ndisc_send_na = ndisc_send_na, -#if IS_ENABLED(CONFIG_XFRM) - .xfrm6_local_rxpmtu = xfrm6_local_rxpmtu, - .xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv, - .xfrm6_gro_udp_encap_rcv = xfrm6_gro_udp_encap_rcv, - .xfrm6_rcv_encap = xfrm6_rcv_encap, -#endif - .nd_tbl = &nd_tbl, - .ipv6_fragment = ip6_fragment, - .ipv6_dev_find = ipv6_dev_find, - .ip6_xmit = ip6_xmit, -}; - static int __init inet6_init(void) { struct list_head *r; @@ -1188,9 +1151,6 @@ static int __init inet6_init(void) goto sysctl_fail; #endif - /* ensure that ipv6 stubs are visible only after ipv6 is ready */ - wmb(); - ipv6_stub = &ipv6_stub_impl; out: return err; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 17f757e9c54a..0691f561bc26 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1408,14 +1408,6 @@ void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt) __fib6_update_sernum_upto_root(rt, fib6_new_sernum(net)); } -/* allow ipv4 to update sernum via ipv6_stub */ -void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i) -{ - spin_lock_bh(&f6i->fib6_table->tb6_lock); - fib6_update_sernum_upto_root(net, f6i); - spin_unlock_bh(&f6i->fib6_table->tb6_lock); -} - /* * Add routing information to the routing tree. * / diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index cef3e0210744..405ef1cb8864 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -162,8 +162,7 @@ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, fl6.fl6_dport = dport; fl6.flowlabel = ip6_make_flowinfo(dsfield, key->label); - dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, - NULL); + dst = ip6_dst_lookup_flow(net, sock->sk, &fl6, NULL); if (IS_ERR(dst)) { netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); return ERR_PTR(-ENETUNREACH); From b2c981e7c4653e3c276d5f3a0e012711d3596418 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 25 Mar 2026 13:08:52 +0100 Subject: [PATCH 0986/1561] netfilter: remove nf_ipv6_ops and use direct function calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As IPv6 is built-in only, nf_ipv6_ops can be removed completely as it is not longer necessary. Convert all nf_ipv6_ops usage to direct function calls instead. In addition, remove the ipv6_netfilter_init/fini() functions as they are not necessary any longer. Signed-off-by: Fernando Fernandez Mancera Tested-by: Ricardo B. Marlière Link: https://patch.msgid.link/20260325120928.15848-12-fmancera@suse.de Signed-off-by: Jakub Kicinski --- include/linux/netfilter_ipv6.h | 102 ++---------------------------- net/bridge/br_netfilter_hooks.c | 12 +--- net/bridge/br_netfilter_ipv6.c | 7 +- net/ipv6/af_inet6.c | 6 -- net/ipv6/netfilter.c | 48 -------------- net/netfilter/core.c | 3 - net/netfilter/nf_nat_masquerade.c | 21 +----- net/netfilter/nfnetlink_queue.c | 22 +++++-- net/netfilter/utils.c | 1 - 9 files changed, 32 insertions(+), 190 deletions(-) diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 61aa48f46dd7..5ce45b6d890f 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -34,59 +34,13 @@ struct ip6_rt_info { struct nf_queue_entry; struct nf_bridge_frag_data; -/* - * Hook functions for ipv6 to allow xt_* modules to be built-in even - * if IPv6 is a module. - */ -struct nf_ipv6_ops { -#if IS_MODULE(CONFIG_IPV6) - int (*chk_addr)(struct net *net, const struct in6_addr *addr, - const struct net_device *dev, int strict); - int (*route_me_harder)(struct net *net, struct sock *sk, struct sk_buff *skb); - int (*dev_get_saddr)(struct net *net, const struct net_device *dev, - const struct in6_addr *daddr, unsigned int srcprefs, - struct in6_addr *saddr); - int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl, - bool strict); - u32 (*cookie_init_sequence)(const struct ipv6hdr *iph, - const struct tcphdr *th, u16 *mssp); - int (*cookie_v6_check)(const struct ipv6hdr *iph, - const struct tcphdr *th); -#endif - void (*route_input)(struct sk_buff *skb); - int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, - int (*output)(struct net *, struct sock *, struct sk_buff *)); - int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry); -#if IS_MODULE(CONFIG_IPV6) - int (*br_fragment)(struct net *net, struct sock *sk, - struct sk_buff *skb, - struct nf_bridge_frag_data *data, - int (*output)(struct net *, struct sock *sk, - const struct nf_bridge_frag_data *data, - struct sk_buff *)); -#endif -}; - #ifdef CONFIG_NETFILTER #include -extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops; -static inline const struct nf_ipv6_ops *nf_get_ipv6_ops(void) -{ - return rcu_dereference(nf_ipv6_ops); -} - static inline int nf_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict) { -#if IS_MODULE(CONFIG_IPV6) - const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); - - if (!v6_ops) - return 1; - - return v6_ops->chk_addr(net, addr, dev, strict); -#elif IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) return ipv6_chk_addr(net, addr, dev, strict); #else return 1; @@ -99,15 +53,7 @@ int __nf_ip6_route(struct net *net, struct dst_entry **dst, static inline int nf_ip6_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict) { -#if IS_MODULE(CONFIG_IPV6) - const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); - - if (v6ops) - return v6ops->route(net, dst, fl, strict); - - return -EHOSTUNREACH; -#endif -#if IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) return __nf_ip6_route(net, dst, fl, strict); #else return -EHOSTUNREACH; @@ -129,14 +75,7 @@ static inline int nf_br_ip6_fragment(struct net *net, struct sock *sk, const struct nf_bridge_frag_data *data, struct sk_buff *)) { -#if IS_MODULE(CONFIG_IPV6) - const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); - - if (!v6_ops) - return 1; - - return v6_ops->br_fragment(net, sk, skb, data, output); -#elif IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) return br_ip6_fragment(net, sk, skb, data, output); #else return 1; @@ -147,14 +86,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb); static inline int nf_ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb) { -#if IS_MODULE(CONFIG_IPV6) - const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); - - if (!v6_ops) - return -EHOSTUNREACH; - - return v6_ops->route_me_harder(net, sk, skb); -#elif IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) return ip6_route_me_harder(net, sk, skb); #else return -EHOSTUNREACH; @@ -165,15 +97,8 @@ static inline u32 nf_ipv6_cookie_init_sequence(const struct ipv6hdr *iph, const struct tcphdr *th, u16 *mssp) { -#if IS_ENABLED(CONFIG_SYN_COOKIES) -#if IS_MODULE(CONFIG_IPV6) - const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); - - if (v6_ops) - return v6_ops->cookie_init_sequence(iph, th, mssp); -#elif IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) && IS_ENABLED(CONFIG_SYN_COOKIES) return __cookie_v6_init_sequence(iph, th, mssp); -#endif #endif return 0; } @@ -181,15 +106,8 @@ static inline u32 nf_ipv6_cookie_init_sequence(const struct ipv6hdr *iph, static inline int nf_cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th) { -#if IS_ENABLED(CONFIG_SYN_COOKIES) -#if IS_MODULE(CONFIG_IPV6) - const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); - - if (v6_ops) - return v6_ops->cookie_v6_check(iph, th); -#elif IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) && IS_ENABLED(CONFIG_SYN_COOKIES) return __cookie_v6_check(iph, th); -#endif #endif return 0; } @@ -198,14 +116,6 @@ __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); int nf_ip6_check_hbh_len(struct sk_buff *skb, u32 *plen); - -int ipv6_netfilter_init(void); -void ipv6_netfilter_fini(void); - -#else /* CONFIG_NETFILTER */ -static inline int ipv6_netfilter_init(void) { return 0; } -static inline void ipv6_netfilter_fini(void) { return; } -static inline const struct nf_ipv6_ops *nf_get_ipv6_ops(void) { return NULL; } #endif /* CONFIG_NETFILTER */ #endif /*__LINUX_IP6_NETFILTER_H*/ diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 083e2fe96441..0ab1c94db4b9 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -32,6 +32,7 @@ #include #include +#include #include #include #include @@ -890,7 +891,6 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff } if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && skb->protocol == htons(ETH_P_IPV6)) { - const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); struct brnf_frag_data *data; if (br_validate_ipv6(net, skb)) @@ -906,15 +906,9 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff skb_copy_from_linear_data_offset(skb, -data->size, data->mac, data->size); - if (v6ops) { - ret = v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit); - local_unlock_nested_bh(&brnf_frag_data_storage.bh_lock); - return ret; - } + ret = ip6_fragment(net, sk, skb, br_nf_push_frag_xmit); local_unlock_nested_bh(&brnf_frag_data_storage.bh_lock); - - kfree_skb(skb); - return -EMSGSIZE; + return ret; } nf_bridge_info_free(skb); return br_dev_queue_push_xmit(net, sk, skb); diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 76ce70b4e7f3..d8548428929e 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -95,15 +96,13 @@ br_nf_ipv6_daddr_was_changed(const struct sk_buff *skb, /* PF_BRIDGE/PRE_ROUTING: Undo the changes made for ip6tables * PREROUTING and continue the bridge PRE_ROUTING hook. See comment - * for br_nf_pre_routing_finish(), same logic is used here but - * equivalent IPv6 function ip6_route_input() called indirectly. + * for br_nf_pre_routing_finish(), same logic is used here. */ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct rtable *rt; struct net_device *dev = skb->dev, *br_indev; - const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); br_indev = nf_bridge_get_physindev(skb, net); if (!br_indev) { @@ -120,7 +119,7 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc nf_bridge->in_prerouting = 0; if (br_nf_ipv6_daddr_was_changed(skb, nf_bridge)) { skb_dst_drop(skb); - v6ops->route_input(skb); + ip6_route_input(skb); if (skb_dst(skb)->error) { kfree_skb(skb); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 33abd8d8cd7d..ee341a8254bf 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include @@ -1066,9 +1065,6 @@ static int __init inet6_init(void) if (err) goto igmp_fail; - err = ipv6_netfilter_init(); - if (err) - goto netfilter_fail; /* Create /proc/foo6 entries. */ #ifdef CONFIG_PROC_FS err = -ENOMEM; @@ -1199,8 +1195,6 @@ proc_misc6_fail: raw6_proc_exit(); proc_raw6_fail: #endif - ipv6_netfilter_fini(); -netfilter_fail: igmp6_cleanup(); igmp_fail: ndisc_cleanup(); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index c3dc90dfab80..6d80f85e55fa 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -86,21 +86,6 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff } EXPORT_SYMBOL(ip6_route_me_harder); -static int nf_ip6_reroute(struct sk_buff *skb, - const struct nf_queue_entry *entry) -{ - struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); - - if (entry->state.hook == NF_INET_LOCAL_OUT) { - const struct ipv6hdr *iph = ipv6_hdr(skb); - if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || - !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) || - skb->mark != rt_info->mark) - return ip6_route_me_harder(entry->state.net, entry->state.sk, skb); - } - return 0; -} - int __nf_ip6_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict) { @@ -243,36 +228,3 @@ blackhole: return 0; } EXPORT_SYMBOL_GPL(br_ip6_fragment); - -static const struct nf_ipv6_ops ipv6ops = { -#if IS_MODULE(CONFIG_IPV6) - .chk_addr = ipv6_chk_addr, - .route_me_harder = ip6_route_me_harder, - .dev_get_saddr = ipv6_dev_get_saddr, - .route = __nf_ip6_route, -#if IS_ENABLED(CONFIG_SYN_COOKIES) - .cookie_init_sequence = __cookie_v6_init_sequence, - .cookie_v6_check = __cookie_v6_check, -#endif -#endif - .route_input = ip6_route_input, - .fragment = ip6_fragment, - .reroute = nf_ip6_reroute, -#if IS_MODULE(CONFIG_IPV6) - .br_fragment = br_ip6_fragment, -#endif -}; - -int __init ipv6_netfilter_init(void) -{ - RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops); - return 0; -} - -/* This can be called from inet6_init() on errors, so it cannot - * be marked __exit. -DaveM - */ -void ipv6_netfilter_fini(void) -{ - RCU_INIT_POINTER(nf_ipv6_ops, NULL); -} diff --git a/net/netfilter/core.c b/net/netfilter/core.c index d5df44ea9e7b..675a1034b340 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -27,9 +27,6 @@ #include "nf_internals.h" -const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly; -EXPORT_SYMBOL_GPL(nf_ipv6_ops); - #ifdef CONFIG_JUMP_LABEL struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; EXPORT_SYMBOL(nf_hooks_needed); diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c index a5a23c03fda9..4de6e0a51701 100644 --- a/net/netfilter/nf_nat_masquerade.c +++ b/net/netfilter/nf_nat_masquerade.c @@ -220,23 +220,6 @@ static struct notifier_block masq_inet_notifier = { }; #if IS_ENABLED(CONFIG_IPV6) -static int -nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, - const struct in6_addr *daddr, unsigned int srcprefs, - struct in6_addr *saddr) -{ -#ifdef CONFIG_IPV6_MODULE - const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); - - if (!v6_ops) - return -EHOSTUNREACH; - - return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr); -#else - return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr); -#endif -} - unsigned int nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, const struct net_device *out) @@ -251,8 +234,8 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY))); - if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out, - &ipv6_hdr(skb)->daddr, 0, &src) < 0) + if (ipv6_dev_get_saddr(nf_ct_net(ct), out, + &ipv6_hdr(skb)->daddr, 0, &src) < 0) return NF_DROP; nat = nf_ct_nat_ext_add(ct); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index a91ae07db059..2aa2380d976a 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -356,9 +356,25 @@ static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry return 0; } +static int nf_ip6_reroute(struct sk_buff *skb, + const struct nf_queue_entry *entry) +{ + struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); + + if (entry->state.hook == NF_INET_LOCAL_OUT) { + const struct ipv6hdr *iph = ipv6_hdr(skb); + + if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || + !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) || + skb->mark != rt_info->mark) + return nf_ip6_route_me_harder(entry->state.net, + entry->state.sk, skb); + } + return 0; +} + static int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry) { - const struct nf_ipv6_ops *v6ops; int ret = 0; switch (entry->state.pf) { @@ -366,9 +382,7 @@ static int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry) ret = nf_ip_reroute(skb, entry); break; case AF_INET6: - v6ops = rcu_dereference(nf_ipv6_ops); - if (v6ops) - ret = v6ops->reroute(skb, entry); + ret = nf_ip6_reroute(skb, entry); break; } return ret; diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c index 008419db815a..29c4dcc362c7 100644 --- a/net/netfilter/utils.c +++ b/net/netfilter/utils.c @@ -163,7 +163,6 @@ EXPORT_SYMBOL_GPL(nf_checksum_partial); int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict, unsigned short family) { - const struct nf_ipv6_ops *v6ops __maybe_unused; int ret = 0; switch (family) { From 30fcf28d83ee30494454b3f62f89a86e9e67bc25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B0=A2=E8=87=B4=E9=82=A6=20=28XIE=20Zhibang=29?= Date: Sat, 28 Mar 2026 13:43:31 +0000 Subject: [PATCH 0987/1561] net: stmmac: dwmac-rk: Fix typo in comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Correct the typo "rk3520" to "rk3528" in comment. Signed-off-by: 谢致邦 (XIE Zhibang) Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/tencent_833D2AD6577F21CF38ED1C3FE8814EB4B308@qq.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index b0441a368cb1..8d7042e68926 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -214,7 +214,7 @@ static int rk_configure_io_clksel(struct rk_priv_data *bsp_priv) cru = !io; /* The io_clksel configuration can be either: - * 0=CRU, 1=IO (rk3506, rk3520, rk3576) or + * 0=CRU, 1=IO (rk3506, rk3528, rk3576) or * 0=IO, 1=CRU (rk3588) * where CRU means the transmit clock comes from the CRU and IO * means the transmit clock comes from IO. From 02d0e59e36e06fb728eb4dea8479f502c67b9fbc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Mar 2026 04:06:46 +0000 Subject: [PATCH 0988/1561] tcp: use __jhash_final() in inet6_ehashfn() I misread jhash2() implementation. Last round should use __jhash_final() instead of __jhash_mix(). Using __jhash_mix() here leaves entropy distributed across a, b, and c, which might lead to incomplete diffusion of the faddr and fport bits into the bucket index. Replacing this last __jhash_mix() with __jhash_final() provides the correct avalanche properties for the returned value in c. $ scripts/bloat-o-meter -t vmlinux.0 vmlinux add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-4 (-4) Function old new delta inet6_ehashfn 306 302 -4 Total: Before=25155089, After=25155085, chg -0.00% Fixes: 854587e69ef3 ("tcp: improve inet6_ehashfn() entropy") Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260327040646.3849503-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/inet6_hashtables.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 109fbf620ad7..b111b51d69fc 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -42,7 +42,7 @@ u32 inet6_ehashfn(const struct net *net, * Hash laddr + faddr + lport/fport + net_hash_mix. * Notes: * We combine laddr[0] (high order 32 bits of local address) - * with net_hash_mix() to avoid using __jhash_final(a, b, c). + * with net_hash_mix() to hash a multiple of 3 words. * * We do not include JHASH_INITVAL + 36 contribution * to initial values of a, b, c. @@ -63,7 +63,7 @@ u32 inet6_ehashfn(const struct net *net, a += (__force u32)faddr->s6_addr32[2]; b += (__force u32)faddr->s6_addr32[3]; c += (__force u32)fport; - __jhash_mix(a, b, c); + __jhash_final(a, b, c); /* Note: We need to add @lport instead of fully hashing it. * See commits 9544d60a2605 ("inet: change lport contribution From d4383c7c78635ed96b89646eeb000d66022f06f4 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 27 Mar 2026 13:23:45 +0100 Subject: [PATCH 0989/1561] net: dsa: qca8k: Use the right GPIO header The driver header for qca8k includes the legacy GPIO header but does not use any symbols from it and actually wants so fix this up. Signed-off-by: Linus Walleij Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20260327-net-dsa-qca8k-v1-1-94e613a5c369@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/qca/qca8k.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/qca/qca8k.h b/drivers/net/dsa/qca/qca8k.h index d046679265fa..1a00e2f62fef 100644 --- a/drivers/net/dsa/qca/qca8k.h +++ b/drivers/net/dsa/qca/qca8k.h @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include From 542d3ec4508f02c9d5d9cdf92f7f3ae6958ca67c Mon Sep 17 00:00:00 2001 From: Chris J Arges Date: Wed, 25 Mar 2026 15:09:47 -0500 Subject: [PATCH 0990/1561] bnxt_en: use bnxt_xdp_buff for xdp context This adds bnxt_xdp_buff which embeds the xdp_buff struct and stores pointers to hardware RX completion descriptors (rx_cmp and rx_cmp_ext) along with the completion type. Signed-off-by: Chris J Arges Link: https://patch.msgid.link/20260325201139.2501937-2-carges@cloudflare.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 29 ++++++++++++------- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h | 7 +++++ 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 84eb53b4172b..e70fd04fe5f3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2112,13 +2112,13 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, u32 tmp_raw_cons = *raw_cons; u16 cons, prod, cp_cons = RING_CMP(tmp_raw_cons); struct skb_shared_info *sinfo; + struct bnxt_xdp_buff bnxt_xdp; struct bnxt_sw_rx_bd *rx_buf; unsigned int len; u8 *data_ptr, agg_bufs, cmp_type; bool xdp_active = false; dma_addr_t dma_addr; struct sk_buff *skb; - struct xdp_buff xdp; u32 flags, misc; u32 cmpl_ts; void *data; @@ -2231,9 +2231,14 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, dma_addr = rx_buf->mapping; if (bnxt_xdp_attached(bp, rxr)) { - bnxt_xdp_buff_init(bp, rxr, cons, data_ptr, len, &xdp); + bnxt_xdp.rxcmp = rxcmp; + bnxt_xdp.rxcmp1 = rxcmp1; + bnxt_xdp.cmp_type = cmp_type; + + bnxt_xdp_buff_init(bp, rxr, cons, data_ptr, len, &bnxt_xdp.xdp); if (agg_bufs) { - u32 frag_len = bnxt_rx_agg_netmems_xdp(bp, cpr, &xdp, + u32 frag_len = bnxt_rx_agg_netmems_xdp(bp, cpr, + &bnxt_xdp.xdp, cp_cons, agg_bufs, false); @@ -2245,12 +2250,13 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, } if (xdp_active) { - if (bnxt_rx_xdp(bp, rxr, cons, &xdp, data, &data_ptr, &len, event)) { + if (bnxt_rx_xdp(bp, rxr, cons, &bnxt_xdp.xdp, data, &data_ptr, + &len, event)) { rc = 1; goto next_rx; } - if (xdp_buff_has_frags(&xdp)) { - sinfo = xdp_get_shared_info_from_buff(&xdp); + if (xdp_buff_has_frags(&bnxt_xdp.xdp)) { + sinfo = xdp_get_shared_info_from_buff(&bnxt_xdp.xdp); agg_bufs = sinfo->nr_frags; } else { agg_bufs = 0; @@ -2261,7 +2267,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, if (!xdp_active) skb = bnxt_copy_skb(bnapi, data_ptr, len, dma_addr); else - skb = bnxt_copy_xdp(bnapi, &xdp, len, dma_addr); + skb = bnxt_copy_xdp(bnapi, &bnxt_xdp.xdp, len, + dma_addr); bnxt_reuse_rx_data(rxr, cons, data); if (!skb) { if (agg_bufs) { @@ -2269,7 +2276,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, bnxt_reuse_rx_agg_bufs(cpr, cp_cons, 0, agg_bufs, false); else - bnxt_xdp_buff_frags_free(rxr, &xdp); + bnxt_xdp_buff_frags_free(rxr, + &bnxt_xdp.xdp); } goto oom_next_rx; } @@ -2293,10 +2301,11 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, if (!skb) goto oom_next_rx; } else { - skb = bnxt_xdp_build_skb(bp, skb, agg_bufs, rxr, &xdp); + skb = bnxt_xdp_build_skb(bp, skb, agg_bufs, + rxr, &bnxt_xdp.xdp); if (!skb) { /* we should be able to free the old skb here */ - bnxt_xdp_buff_frags_free(rxr, &xdp); + bnxt_xdp_buff_frags_free(rxr, &bnxt_xdp.xdp); goto oom_next_rx; } } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h index 8933a0dec09a..8c66698bde11 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h @@ -12,6 +12,13 @@ DECLARE_STATIC_KEY_FALSE(bnxt_xdp_locking_key); +struct bnxt_xdp_buff { + struct xdp_buff xdp; + struct rx_cmp *rxcmp; + struct rx_cmp_ext *rxcmp1; + u8 cmp_type; +}; + struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, struct bnxt_tx_ring_info *txr, dma_addr_t mapping, u32 len, From e5648b08cbfc9e7921138d9ace830b48824608f2 Mon Sep 17 00:00:00 2001 From: Chris J Arges Date: Wed, 25 Mar 2026 15:09:48 -0500 Subject: [PATCH 0991/1561] bnxt_en: Implement XDP RSS hash metadata extraction Add support for extracting RSS hash values and hash types from hardware completion descriptors in XDP programs for bnxt_en. Add IP_TYPE definition for determining if completion is ipv4 or ipv6. In addition add ITYPE_ICMP flag for identifying ICMP completions. Signed-off-by: Chris J Arges Reviewed-by: Joe Damato Reviewed-by: Andy Gospodarek Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 ++ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 + drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 47 +++++++++++++++++++ drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h | 3 ++ 4 files changed, 57 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index e70fd04fe5f3..d15548dfd4a3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -15911,6 +15911,10 @@ static const struct net_device_ops bnxt_netdev_ops = { .ndo_hwtstamp_set = bnxt_hwtstamp_set, }; +static const struct xdp_metadata_ops bnxt_xdp_metadata_ops = { + .xmo_rx_hash = bnxt_xdp_rx_hash, +}; + static void bnxt_get_queue_stats_rx(struct net_device *dev, int i, struct netdev_queue_stats_rx *stats) { @@ -16795,6 +16799,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) goto init_err_free; dev->netdev_ops = &bnxt_netdev_ops; + dev->xdp_metadata_ops = &bnxt_xdp_metadata_ops; dev->stat_ops = &bnxt_stat_ops; dev->watchdog_timeo = BNXT_TX_TIMEOUT; dev->ethtool_ops = &bnxt_ethtool_ops; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index dd0f6743acf5..99f45686ed09 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -232,6 +232,7 @@ struct rx_cmp { #define RX_CMP_FLAGS_ITYPE_UDP (3 << 12) #define RX_CMP_FLAGS_ITYPE_FCOE (4 << 12) #define RX_CMP_FLAGS_ITYPE_ROCE (5 << 12) + #define RX_CMP_FLAGS_ITYPE_ICMP (7 << 12) #define RX_CMP_FLAGS_ITYPE_PTP_WO_TS (8 << 12) #define RX_CMP_FLAGS_ITYPE_PTP_W_TS (9 << 12) #define RX_CMP_LEN (0xffff << 16) @@ -311,6 +312,7 @@ struct rx_cmp_ext { #define RX_CMP_FLAGS2_T_IP_CS_CALC (0x1 << 2) #define RX_CMP_FLAGS2_T_L4_CS_CALC (0x1 << 3) #define RX_CMP_FLAGS2_META_FORMAT_VLAN (0x1 << 4) + #define RX_CMP_FLAGS2_IP_TYPE (0x1 << 8) __le32 rx_cmp_meta_data; #define RX_CMP_FLAGS2_METADATA_TCI_MASK 0xffff #define RX_CMP_FLAGS2_METADATA_VID_MASK 0xfff diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 85cbeb35681c..1f920464b426 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -472,3 +472,50 @@ bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags, xdp_buff_get_skb_flags(xdp)); return skb; } + +int bnxt_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, + enum xdp_rss_hash_type *rss_type) +{ + const struct bnxt_xdp_buff *xdp = (void *)ctx; + const struct rx_cmp_ext *rxcmp1 = xdp->rxcmp1; + const struct rx_cmp *rxcmp = xdp->rxcmp; + enum xdp_rss_hash_type hash_type = 0; + u32 itypes; + + if (!rxcmp || !RX_CMP_HASH_VALID(rxcmp)) + return -ENODATA; + + *hash = le32_to_cpu(rxcmp->rx_cmp_rss_hash); + + if (!rxcmp1) { + *rss_type = XDP_RSS_TYPE_L2; + return 0; + } + + if (xdp->cmp_type == CMP_TYPE_RX_L2_CMP) { + itypes = RX_CMP_ITYPES(rxcmp); + if (rxcmp1->rx_cmp_flags2 & + cpu_to_le32(RX_CMP_FLAGS2_IP_TYPE)) { + hash_type |= XDP_RSS_TYPE_L3_IPV6; + } else { + hash_type |= XDP_RSS_TYPE_L3_IPV4; + } + + switch (itypes) { + case RX_CMP_FLAGS_ITYPE_TCP: + hash_type |= XDP_RSS_L4 | XDP_RSS_L4_TCP; + break; + case RX_CMP_FLAGS_ITYPE_UDP: + hash_type |= XDP_RSS_L4 | XDP_RSS_L4_UDP; + break; + case RX_CMP_FLAGS_ITYPE_ICMP: + hash_type |= XDP_RSS_L4 | XDP_RSS_L4_ICMP; + break; + default: + break; + } + } + + *rss_type = hash_type; + return 0; +} diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h index 8c66698bde11..fb4f9143929f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h @@ -41,4 +41,7 @@ void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr, struct sk_buff *bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags, struct bnxt_rx_ring_info *rxr, struct xdp_buff *xdp); +int bnxt_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, + enum xdp_rss_hash_type *rss_type); + #endif From 4a037aeb6bc8e1fbf4d131d4d06544a9857ad604 Mon Sep 17 00:00:00 2001 From: Chris J Arges Date: Wed, 25 Mar 2026 15:09:49 -0500 Subject: [PATCH 0992/1561] bnxt_en: Move bnxt_rss_ext_op into header This allows bnxt_rss_ext_op to be used by other functions. In addition this modifies the rxcmp argument to be const since the function only reads from this structure. Reviewed-by: Joe Damato Signed-off-by: Chris J Arges Link: https://patch.msgid.link/20260325201139.2501937-4-carges@cloudflare.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 17 ----------------- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index d15548dfd4a3..8ec611bc01ee 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2077,23 +2077,6 @@ vlan_err: return NULL; } -static enum pkt_hash_types bnxt_rss_ext_op(struct bnxt *bp, - struct rx_cmp *rxcmp) -{ - u8 ext_op; - - ext_op = RX_CMP_V3_HASH_TYPE(bp, rxcmp); - switch (ext_op) { - case EXT_OP_INNER_4: - case EXT_OP_OUTER_4: - case EXT_OP_INNFL_3: - case EXT_OP_OUTFL_3: - return PKT_HASH_TYPE_L4; - default: - return PKT_HASH_TYPE_L3; - } -} - /* returns the following: * 1 - 1 packet successfully received * 0 - successful TPA_START, packet not completed yet diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 99f45686ed09..30efcfbb4791 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2902,6 +2902,23 @@ static inline bool bnxt_sriov_cfg(struct bnxt *bp) #endif } +static inline enum pkt_hash_types bnxt_rss_ext_op(struct bnxt *bp, + const struct rx_cmp *rxcmp) +{ + u8 ext_op; + + ext_op = RX_CMP_V3_HASH_TYPE(bp, rxcmp); + switch (ext_op) { + case EXT_OP_INNER_4: + case EXT_OP_OUTER_4: + case EXT_OP_INNFL_3: + case EXT_OP_OUTFL_3: + return PKT_HASH_TYPE_L4; + default: + return PKT_HASH_TYPE_L3; + } +} + extern const u16 bnxt_bstore_to_trace[]; extern const u16 bnxt_lhint_arr[]; From 5ec22d5f2afcf2f9d7d631dcc2f848dda64a4933 Mon Sep 17 00:00:00 2001 From: Chris J Arges Date: Wed, 25 Mar 2026 15:09:50 -0500 Subject: [PATCH 0993/1561] bnxt_en: Implement XDP RSS hash metadata extraction for V3_CMP This adds another conditional when cmp_type is CMP_TYPE_RX_L2_V3_CMP for drivers that support this completion format. This re-uses bnxt_rss_ext_op to provide similar functionality. One limitation is for L4 hash-types, protocol-specific bits can't be determined. Reviewed-by: Joe Damato Signed-off-by: Chris J Arges Link: https://patch.msgid.link/20260325201139.2501937-5-carges@cloudflare.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 1f920464b426..babcd463e50e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -514,6 +514,16 @@ int bnxt_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, default: break; } + } else if (xdp->cmp_type == CMP_TYPE_RX_L2_V3_CMP) { + struct bnxt *bp = netdev_priv(xdp->xdp.rxq->dev); + + if (rxcmp1->rx_cmp_flags2 & cpu_to_le32(RX_CMP_FLAGS2_IP_TYPE)) + hash_type |= XDP_RSS_TYPE_L3_IPV6; + else + hash_type |= XDP_RSS_TYPE_L3_IPV4; + + if (bnxt_rss_ext_op(bp, rxcmp) == PKT_HASH_TYPE_L4) + hash_type |= XDP_RSS_L4; } *rss_type = hash_type; From 4ce06406958b67fdddcc2e6948237dd6ff6ba112 Mon Sep 17 00:00:00 2001 From: Chris J Arges Date: Wed, 25 Mar 2026 15:09:51 -0500 Subject: [PATCH 0994/1561] selftests: net: move common xdp.py functions into lib This moves a few functions which can be useful to other python programs that manipulate XDP programs. This also refactors xdp.py to use the refactored functions. Signed-off-by: Chris J Arges Link: https://patch.msgid.link/20260325201139.2501937-6-carges@cloudflare.com Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/lib/py/__init__.py | 2 + tools/testing/selftests/drivers/net/xdp.py | 96 +++++-------------- .../testing/selftests/net/lib/py/__init__.py | 2 + tools/testing/selftests/net/lib/py/bpf.py | 68 +++++++++++++ 4 files changed, 94 insertions(+), 74 deletions(-) create mode 100644 tools/testing/selftests/net/lib/py/bpf.py diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index 374d4f08dd05..2b5ec0505672 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -24,6 +24,7 @@ try: from net.lib.py import CmdExitFailure from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ fd_read_timeout, ip, rand_port, rand_ports, wait_port_listen, wait_file + from net.lib.py import bpf_map_set, bpf_map_dump, bpf_prog_map_ids from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ ksft_setup, ksft_variants, KsftNamedVariant @@ -37,6 +38,7 @@ try: "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool", "fd_read_timeout", "ip", "rand_port", "rand_ports", "wait_port_listen", "wait_file", + "bpf_map_set", "bpf_map_dump", "bpf_prog_map_ids", "KsftSkipEx", "KsftFailEx", "KsftXfailEx", "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run", "ksft_setup", "ksft_variants", "KsftNamedVariant", diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py index e54df158dfe9..d86446569f89 100755 --- a/tools/testing/selftests/drivers/net/xdp.py +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -16,7 +16,8 @@ from lib.py import KsftNamedVariant, ksft_variants from lib.py import KsftFailEx, NetDrvEpEnv from lib.py import EthtoolFamily, NetdevFamily, NlError from lib.py import bkg, cmd, rand_port, wait_port_listen -from lib.py import ip, bpftool, defer +from lib.py import ip, defer +from lib.py import bpf_map_set, bpf_map_dump, bpf_prog_map_ids class TestConfig(Enum): @@ -122,47 +123,11 @@ def _load_xdp_prog(cfg, bpf_info): xdp_info = ip(f"-d link show dev {cfg.ifname}", json=True)[0] prog_info["id"] = xdp_info["xdp"]["prog"]["id"] prog_info["name"] = xdp_info["xdp"]["prog"]["name"] - prog_id = prog_info["id"] - - map_ids = bpftool(f"prog show id {prog_id}", json=True)["map_ids"] - prog_info["maps"] = {} - for map_id in map_ids: - name = bpftool(f"map show id {map_id}", json=True)["name"] - prog_info["maps"][name] = map_id + prog_info["maps"] = bpf_prog_map_ids(prog_info["id"]) return prog_info -def format_hex_bytes(value): - """ - Helper function that converts an integer into a formatted hexadecimal byte string. - - Args: - value: An integer representing the number to be converted. - - Returns: - A string representing hexadecimal equivalent of value, with bytes separated by spaces. - """ - hex_str = value.to_bytes(4, byteorder='little', signed=True) - return ' '.join(f'{byte:02x}' for byte in hex_str) - - -def _set_xdp_map(map_name, key, value): - """ - Updates an XDP map with a given key-value pair using bpftool. - - Args: - map_name: The name of the XDP map to update. - key: The key to update in the map, formatted as a hexadecimal string. - value: The value to associate with the key, formatted as a hexadecimal string. - """ - key_formatted = format_hex_bytes(key) - value_formatted = format_hex_bytes(value) - bpftool( - f"map update name {map_name} key hex {key_formatted} value hex {value_formatted}" - ) - - def _get_stats(xdp_map_id): """ Retrieves and formats statistics from an XDP map. @@ -177,25 +142,11 @@ def _get_stats(xdp_map_id): Raises: KsftFailEx: If the stats retrieval fails. """ - stats_dump = bpftool(f"map dump id {xdp_map_id}", json=True) - if not stats_dump: + stats = bpf_map_dump(xdp_map_id) + if not stats: raise KsftFailEx(f"Failed to get stats for map {xdp_map_id}") - stats_formatted = {} - for key in range(0, 5): - val = stats_dump[key]["formatted"]["value"] - if stats_dump[key]["formatted"]["key"] == XDPStats.RX.value: - stats_formatted[XDPStats.RX.value] = val - elif stats_dump[key]["formatted"]["key"] == XDPStats.PASS.value: - stats_formatted[XDPStats.PASS.value] = val - elif stats_dump[key]["formatted"]["key"] == XDPStats.DROP.value: - stats_formatted[XDPStats.DROP.value] = val - elif stats_dump[key]["formatted"]["key"] == XDPStats.TX.value: - stats_formatted[XDPStats.TX.value] = val - elif stats_dump[key]["formatted"]["key"] == XDPStats.ABORT.value: - stats_formatted[XDPStats.ABORT.value] = val - - return stats_formatted + return stats def _test_pass(cfg, bpf_info, msg_sz): @@ -211,8 +162,8 @@ def _test_pass(cfg, bpf_info, msg_sz): prog_info = _load_xdp_prog(cfg, bpf_info) port = rand_port() - _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.PASS.value) - _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + bpf_map_set("map_xdp_setup", TestConfig.MODE.value, XDPAction.PASS.value) + bpf_map_set("map_xdp_setup", TestConfig.PORT.value, port) ksft_eq(_test_udp(cfg, port, msg_sz), True, "UDP packet exchange failed") stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) @@ -258,8 +209,8 @@ def _test_drop(cfg, bpf_info, msg_sz): prog_info = _load_xdp_prog(cfg, bpf_info) port = rand_port() - _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.DROP.value) - _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + bpf_map_set("map_xdp_setup", TestConfig.MODE.value, XDPAction.DROP.value) + bpf_map_set("map_xdp_setup", TestConfig.PORT.value, port) ksft_eq(_test_udp(cfg, port, msg_sz), False, "UDP packet exchange should fail") stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) @@ -305,8 +256,8 @@ def _test_xdp_native_tx(cfg, bpf_info, payload_lens): prog_info = _load_xdp_prog(cfg, bpf_info) port = rand_port() - _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TX.value) - _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + bpf_map_set("map_xdp_setup", TestConfig.MODE.value, XDPAction.TX.value) + bpf_map_set("map_xdp_setup", TestConfig.PORT.value, port) expected_pkts = 0 for payload_len in payload_lens: @@ -454,15 +405,15 @@ def _test_xdp_native_tail_adjst(cfg, pkt_sz_lst, offset_lst): prog_info = _load_xdp_prog(cfg, bpf_info) # Configure the XDP map for tail adjustment - _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TAIL_ADJST.value) - _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + bpf_map_set("map_xdp_setup", TestConfig.MODE.value, XDPAction.TAIL_ADJST.value) + bpf_map_set("map_xdp_setup", TestConfig.PORT.value, port) for offset in offset_lst: tag = format(random.randint(65, 90), "02x") - _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset) + bpf_map_set("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset) if offset > 0: - _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16)) + bpf_map_set("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16)) for pkt_sz in pkt_sz_lst: test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz)) @@ -574,8 +525,8 @@ def _test_xdp_native_head_adjst(cfg, prog, pkt_sz_lst, offset_lst): prog_info = _load_xdp_prog(cfg, BPFProgInfo(prog, "xdp_native.bpf.o", "xdp.frags", 9000)) port = rand_port() - _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.HEAD_ADJST.value) - _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + bpf_map_set("map_xdp_setup", TestConfig.MODE.value, XDPAction.HEAD_ADJST.value) + bpf_map_set("map_xdp_setup", TestConfig.PORT.value, port) hds_thresh = get_hds_thresh(cfg) for offset in offset_lst: @@ -595,11 +546,8 @@ def _test_xdp_native_head_adjst(cfg, prog, pkt_sz_lst, offset_lst): test_str = ''.join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz)) tag = format(random.randint(65, 90), '02x') - _set_xdp_map("map_xdp_setup", - TestConfig.ADJST_OFFSET.value, - offset) - _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16)) - _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset) + bpf_map_set("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset) + bpf_map_set("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16)) recvd_str = _exchg_udp(cfg, port, test_str) @@ -691,8 +639,8 @@ def test_xdp_native_qstats(cfg, act): prog_info = _load_xdp_prog(cfg, bpf_info) port = rand_port() - _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, act.value) - _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + bpf_map_set("map_xdp_setup", TestConfig.MODE.value, act.value) + bpf_map_set("map_xdp_setup", TestConfig.PORT.value, port) # Discard the input, but we need a listener to avoid ICMP errors rx_udp = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport " + \ diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index e0c920041a58..7c81d86a7e97 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -15,6 +15,7 @@ from .nsim import NetdevSim, NetdevSimDev from .utils import CmdExitFailure, fd_read_timeout, cmd, bkg, defer, \ bpftool, ip, ethtool, bpftrace, rand_port, rand_ports, wait_port_listen, \ wait_file, tool +from .bpf import bpf_map_set, bpf_map_dump, bpf_prog_map_ids from .ynl import NlError, NlctrlFamily, YnlFamily, \ EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily from .ynl import NetshaperFamily, DevlinkFamily, PSPFamily, Netlink @@ -29,6 +30,7 @@ __all__ = ["KSRC", "CmdExitFailure", "fd_read_timeout", "cmd", "bkg", "defer", "bpftool", "ip", "ethtool", "bpftrace", "rand_port", "rand_ports", "wait_port_listen", "wait_file", "tool", + "bpf_map_set", "bpf_map_dump", "bpf_prog_map_ids", "NetdevSim", "NetdevSimDev", "NetshaperFamily", "DevlinkFamily", "PSPFamily", "NlError", "YnlFamily", "EthtoolFamily", "NetdevFamily", "RtnlFamily", diff --git a/tools/testing/selftests/net/lib/py/bpf.py b/tools/testing/selftests/net/lib/py/bpf.py new file mode 100644 index 000000000000..beb6bf2896a8 --- /dev/null +++ b/tools/testing/selftests/net/lib/py/bpf.py @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: GPL-2.0 + +""" +BPF helper utilities for kernel selftests. + +Provides common operations for interacting with BPF maps and programs +via bpftool, used by XDP and other BPF-based test files. +""" + +from .utils import bpftool + +def _format_hex_bytes(value): + """ + Helper function that converts an integer into a formatted hexadecimal byte string. + + Args: + value: An integer representing the number to be converted. + + Returns: + A string representing hexadecimal equivalent of value, with bytes separated by spaces. + """ + hex_str = value.to_bytes(4, byteorder='little', signed=True) + return ' '.join(f'{byte:02x}' for byte in hex_str) + + +def bpf_map_set(map_name, key, value): + """ + Updates an XDP map with a given key-value pair using bpftool. + + Args: + map_name: The name of the XDP map to update. + key: The key to update in the map, formatted as a hexadecimal string. + value: The value to associate with the key, formatted as a hexadecimal string. + """ + key_formatted = _format_hex_bytes(key) + value_formatted = _format_hex_bytes(value) + bpftool( + f"map update name {map_name} key hex {key_formatted} value hex {value_formatted}" + ) + +def bpf_map_dump(map_id): + """Dump all entries of a BPF array map. + + Args: + map_id: Numeric map ID (as returned by bpftool prog show). + + Returns: + A dict mapping formatted key (int) to formatted value (int). + """ + raw = bpftool(f"map dump id {map_id}", json=True) + return {e["formatted"]["key"]: e["formatted"]["value"] for e in raw} + + +def bpf_prog_map_ids(prog_id): + """Get the map name-to-ID mapping for a loaded BPF program. + + Args: + prog_id: Numeric program ID. + + Returns: + A dict mapping map name (str) to map ID (int). + """ + map_ids = bpftool(f"prog show id {prog_id}", json=True)["map_ids"] + maps = {} + for mid in map_ids: + name = bpftool(f"map show id {mid}", json=True)["name"] + maps[name] = mid + return maps From e3cdf6cf5fc6db0643723083e2c70fffe098e249 Mon Sep 17 00:00:00 2001 From: Chris J Arges Date: Wed, 25 Mar 2026 15:09:52 -0500 Subject: [PATCH 0995/1561] selftests: drv-net: xdp: Add rss_hash metadata tests This test loads xdp_metadata.bpf which calls bpf_xdp_metadata_rx_hash() on incoming packets. The metadata from that packet is then sent to a BPF map for validation. It borrows structure from xdp.py, reusing common functions. The test checks the device's xdp-rx-metadata-features via netlink before running and skips on devices that do not advertise hash support. This can be run on veth devices as well as real hardware. The test is fairly simple and just verifies that a TCP or UDP packet can be identified as an L4 flow. This minimal test also passes if run on a veth device. Signed-off-by: Chris J Arges Link: https://patch.msgid.link/20260325201139.2501937-7-carges@cloudflare.com Signed-off-by: Jakub Kicinski --- .../testing/selftests/drivers/net/hw/Makefile | 1 + .../drivers/net/hw/lib/py/__init__.py | 2 + .../selftests/drivers/net/hw/xdp_metadata.py | 146 ++++++++++++++++ .../selftests/net/lib/xdp_metadata.bpf.c | 163 ++++++++++++++++++ 4 files changed, 312 insertions(+) create mode 100644 tools/testing/selftests/drivers/net/hw/xdp_metadata.py create mode 100644 tools/testing/selftests/net/lib/xdp_metadata.bpf.c diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 3c97dac9baaa..884cc77daeaa 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -42,6 +42,7 @@ TEST_PROGS = \ rss_input_xfrm.py \ toeplitz.py \ tso.py \ + xdp_metadata.py \ xsk_reconfig.py \ # diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py index b8d9ae282390..df4da5078c48 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -25,6 +25,7 @@ try: from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ fd_read_timeout, ip, rand_port, rand_ports, wait_port_listen, \ wait_file, tool + from net.lib.py import bpf_map_set, bpf_map_dump, bpf_prog_map_ids from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ ksft_setup, ksft_variants, KsftNamedVariant @@ -40,6 +41,7 @@ try: "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool", "fd_read_timeout", "ip", "rand_port", "rand_ports", "wait_port_listen", "wait_file", "tool", + "bpf_map_set", "bpf_map_dump", "bpf_prog_map_ids", "KsftSkipEx", "KsftFailEx", "KsftXfailEx", "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run", "ksft_setup", "ksft_variants", "KsftNamedVariant", diff --git a/tools/testing/selftests/drivers/net/hw/xdp_metadata.py b/tools/testing/selftests/drivers/net/hw/xdp_metadata.py new file mode 100644 index 000000000000..33a1985356d9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/xdp_metadata.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Tests for XDP metadata kfuncs (e.g. bpf_xdp_metadata_rx_hash). + +These tests load device-bound XDP programs from xdp_metadata.bpf.o +that call metadata kfuncs, send traffic, and verify the extracted +metadata via BPF maps. +""" +from lib.py import ksft_run, ksft_eq, ksft_exit, ksft_ge, ksft_ne, ksft_pr +from lib.py import KsftNamedVariant, ksft_variants +from lib.py import CmdExitFailure, KsftSkipEx, NetDrvEpEnv +from lib.py import NetdevFamily +from lib.py import bkg, cmd, rand_port, wait_port_listen +from lib.py import ip, bpftool, defer +from lib.py import bpf_map_set, bpf_map_dump, bpf_prog_map_ids + + +def _load_xdp_metadata_prog(cfg, prog_name, bpf_file="xdp_metadata.bpf.o"): + """Load a device-bound XDP metadata program and return prog/map info. + + Returns: + dict with 'id', 'name', and 'maps' (name -> map_id). + """ + abs_path = cfg.net_lib_dir / bpf_file + pin_dir = "/sys/fs/bpf/xdp_metadata_test" + + cmd(f"rm -rf {pin_dir}", shell=True, fail=False) + cmd(f"mkdir -p {pin_dir}", shell=True) + + try: + bpftool(f"prog loadall {abs_path} {pin_dir} type xdp " + f"xdpmeta_dev {cfg.ifname}") + except CmdExitFailure as e: + cmd(f"rm -rf {pin_dir}", shell=True, fail=False) + raise KsftSkipEx( + f"Failed to load device-bound XDP program '{prog_name}'" + ) from e + defer(cmd, f"rm -rf {pin_dir}", shell=True, fail=False) + + pin_path = f"{pin_dir}/{prog_name}" + ip(f"link set dev {cfg.ifname} xdpdrv pinned {pin_path}") + defer(ip, f"link set dev {cfg.ifname} xdpdrv off") + + xdp_info = ip(f"-d link show dev {cfg.ifname}", json=True)[0] + prog_id = xdp_info["xdp"]["prog"]["id"] + + return {"id": prog_id, + "name": xdp_info["xdp"]["prog"]["name"], + "maps": bpf_prog_map_ids(prog_id)} + + +def _send_probe(cfg, port, proto="tcp"): + """Send a single payload from the remote end using socat. + + Args: + cfg: Configuration object containing network settings. + port: Port number for the exchange. + proto: Protocol to use, either "tcp" or "udp". + """ + cfg.require_cmd("socat", remote=True) + + if proto == "tcp": + rx_cmd = f"socat -{cfg.addr_ipver} -T 2 TCP-LISTEN:{port},reuseport STDOUT" + tx_cmd = f"echo -n rss_hash_test | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}" + else: + rx_cmd = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT" + tx_cmd = f"echo -n rss_hash_test | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}" + + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(port, proto=proto) + cmd(tx_cmd, host=cfg.remote, shell=True) + + +# BPF map keys matching the enums in xdp_metadata.bpf.c +_SETUP_KEY_PORT = 1 + +_RSS_KEY_HASH = 0 +_RSS_KEY_TYPE = 1 +_RSS_KEY_PKT_CNT = 2 +_RSS_KEY_ERR_CNT = 3 + +XDP_RSS_L4 = 0x8 # BIT(3) from enum xdp_rss_hash_type + + +@ksft_variants([ + KsftNamedVariant("tcp", "tcp"), + KsftNamedVariant("udp", "udp"), +]) +def test_xdp_rss_hash(cfg, proto): + """Test RSS hash metadata extraction via bpf_xdp_metadata_rx_hash(). + + This test will only run on devices that support xdp-rx-metadata-features. + + Loads the xdp_rss_hash program from xdp_metadata, sends a packet using + the specified protocol, and verifies that the program extracted a non-zero + hash with an L4 hash type. + """ + dev_info = cfg.netnl.dev_get({"ifindex": cfg.ifindex}) + rx_meta = dev_info.get("xdp-rx-metadata-features", []) + if "hash" not in rx_meta: + raise KsftSkipEx("device does not support XDP rx hash metadata") + + prog_info = _load_xdp_metadata_prog(cfg, "xdp_rss_hash") + + port = rand_port() + bpf_map_set("map_xdp_setup", _SETUP_KEY_PORT, port) + + rss_map_id = prog_info["maps"]["map_rss"] + + _send_probe(cfg, port, proto=proto) + + rss = bpf_map_dump(rss_map_id) + + pkt_cnt = rss.get(_RSS_KEY_PKT_CNT, 0) + err_cnt = rss.get(_RSS_KEY_ERR_CNT, 0) + hash_val = rss.get(_RSS_KEY_HASH, 0) + hash_type = rss.get(_RSS_KEY_TYPE, 0) + + ksft_ge(pkt_cnt, 1, comment="should have received at least one packet") + ksft_eq(err_cnt, 0, comment=f"RSS hash error count: {err_cnt}") + + ksft_ne(hash_val, 0, + f"RSS hash should be non-zero for {proto.upper()} traffic") + ksft_pr(f" RSS hash: {hash_val:#010x}") + + ksft_pr(f" RSS hash type: {hash_type:#06x}") + ksft_ne(hash_type & XDP_RSS_L4, 0, + f"RSS hash type should include L4 for {proto.upper()} traffic") + + +def main(): + """Run XDP metadata kfunc tests against a real device.""" + with NetDrvEpEnv(__file__) as cfg: + cfg.netnl = NetdevFamily() + ksft_run( + [ + test_xdp_rss_hash, + ], + args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/lib/xdp_metadata.bpf.c b/tools/testing/selftests/net/lib/xdp_metadata.bpf.c new file mode 100644 index 000000000000..f71f59215239 --- /dev/null +++ b/tools/testing/selftests/net/lib/xdp_metadata.bpf.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +enum { + XDP_PORT = 1, + XDP_PROTO = 4, +} xdp_map_setup_keys; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 5); + __type(key, __u32); + __type(value, __s32); +} map_xdp_setup SEC(".maps"); + +/* RSS hash results: key 0 = hash, key 1 = hash type, + * key 2 = packet count, key 3 = error count. + */ +enum { + RSS_KEY_HASH = 0, + RSS_KEY_TYPE = 1, + RSS_KEY_PKT_CNT = 2, + RSS_KEY_ERR_CNT = 3, +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 4); +} map_rss SEC(".maps"); + +/* Mirror of enum xdp_rss_hash_type from include/net/xdp.h. + * Needed because the enum is not part of UAPI headers. + */ +enum xdp_rss_hash_type { + XDP_RSS_L3_IPV4 = 1U << 0, + XDP_RSS_L3_IPV6 = 1U << 1, + XDP_RSS_L3_DYNHDR = 1U << 2, + XDP_RSS_L4 = 1U << 3, + XDP_RSS_L4_TCP = 1U << 4, + XDP_RSS_L4_UDP = 1U << 5, + XDP_RSS_L4_SCTP = 1U << 6, + XDP_RSS_L4_IPSEC = 1U << 7, + XDP_RSS_L4_ICMP = 1U << 8, +}; + +extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash, + enum xdp_rss_hash_type *rss_type) __ksym; + +static __always_inline __u16 get_dest_port(void *l4, void *data_end, + __u8 protocol) +{ + if (protocol == IPPROTO_UDP) { + struct udphdr *udp = l4; + + if ((void *)(udp + 1) > data_end) + return 0; + return udp->dest; + } else if (protocol == IPPROTO_TCP) { + struct tcphdr *tcp = l4; + + if ((void *)(tcp + 1) > data_end) + return 0; + return tcp->dest; + } + + return 0; +} + +SEC("xdp") +int xdp_rss_hash(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + enum xdp_rss_hash_type rss_type = 0; + struct ethhdr *eth = data; + __u8 l4_proto = 0; + __u32 hash = 0; + __u32 key, val; + void *l4 = NULL; + __u32 *cnt; + int ret; + + if ((void *)(eth + 1) > data_end) + return XDP_PASS; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = (void *)(eth + 1); + + if ((void *)(iph + 1) > data_end) + return XDP_PASS; + l4_proto = iph->protocol; + l4 = (void *)(iph + 1); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6h = (void *)(eth + 1); + + if ((void *)(ip6h + 1) > data_end) + return XDP_PASS; + l4_proto = ip6h->nexthdr; + l4 = (void *)(ip6h + 1); + } + + if (!l4) + return XDP_PASS; + + /* Filter on the configured protocol (map_xdp_setup key XDP_PROTO). + * When set, only process packets matching the requested L4 protocol. + */ + key = XDP_PROTO; + __s32 *proto_cfg = bpf_map_lookup_elem(&map_xdp_setup, &key); + + if (proto_cfg && *proto_cfg != 0 && l4_proto != (__u8)*proto_cfg) + return XDP_PASS; + + /* Filter on the configured port (map_xdp_setup key XDP_PORT). + * Only applies to protocols with ports (UDP, TCP). + */ + key = XDP_PORT; + __s32 *port_cfg = bpf_map_lookup_elem(&map_xdp_setup, &key); + + if (port_cfg && *port_cfg != 0) { + __u16 dest = get_dest_port(l4, data_end, l4_proto); + + if (!dest || bpf_ntohs(dest) != (__u16)*port_cfg) + return XDP_PASS; + } + + ret = bpf_xdp_metadata_rx_hash(ctx, &hash, &rss_type); + if (ret < 0) { + key = RSS_KEY_ERR_CNT; + cnt = bpf_map_lookup_elem(&map_rss, &key); + if (cnt) + __sync_fetch_and_add(cnt, 1); + return XDP_PASS; + } + + key = RSS_KEY_HASH; + bpf_map_update_elem(&map_rss, &key, &hash, BPF_ANY); + + key = RSS_KEY_TYPE; + val = (__u32)rss_type; + bpf_map_update_elem(&map_rss, &key, &val, BPF_ANY); + + key = RSS_KEY_PKT_CNT; + cnt = bpf_map_lookup_elem(&map_rss, &key); + if (cnt) + __sync_fetch_and_add(cnt, 1); + + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; From 57ec1622b62ae8dcd88a0084c49c5b72b1850e68 Mon Sep 17 00:00:00 2001 From: Tomas Alvarez Vanoli Date: Thu, 26 Mar 2026 11:02:32 +0100 Subject: [PATCH 0996/1561] net: wan/fsl_ucc_hdlc: cleanup ucc_hdlc_poll Immediately after setting to 0 we are adding to it, and subtracting 0 from budget. Replace with just assignment and no subtraction. Signed-off-by: Tomas Alvarez Vanoli Reviewed-by: Christophe Leroy (CS GROUP) Link: https://patch.msgid.link/20260326100232.904289-1-tomas.alvarez-vanoli@hitachienergy.com Signed-off-by: Jakub Kicinski --- drivers/net/wan/fsl_ucc_hdlc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index b72ab9951875..3bd57527b1be 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -628,8 +628,7 @@ static int ucc_hdlc_poll(struct napi_struct *napi, int budget) hdlc_tx_done(priv); spin_unlock(&priv->lock); - howmany = 0; - howmany += hdlc_rx_done(priv, budget - howmany); + howmany = hdlc_rx_done(priv, budget); if (howmany < budget) { napi_complete_done(napi, howmany); From 8ccf062c6770618cb19f72ec0202463e01e2c7b8 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 25 Mar 2026 16:28:05 +0000 Subject: [PATCH 0997/1561] Revert "net: macb: Clean up the .usrio settings in macb_config instances" Commit 0ae998c4efd69 ("net: macb: Clean up the .usrio settings in macb_config instances") was a misguided attempt to clean up the driver that actually just propagated problematic code. The default for usrio is actually no usrio, and already there are issues with people using the problematically named "macb_default_usrio" on platforms where the usrio does not have this so-called default behaviour. usrio is platform specific and using the default at91 usrio settings should be opt-in only. Revert the "cleanup" patch. Signed-off-by: Conor Dooley Link: https://patch.msgid.link/20260325-rundown-unrushed-dd82c9f5c56c@spud Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index c8182559edf6..6623b91b5efa 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5513,31 +5513,37 @@ static const struct macb_config fu540_c000_config = { .clk_init = fu540_c000_clk_init, .init = fu540_c000_init, .jumbo_max_len = 10240, + .usrio = &macb_default_usrio, }; static const struct macb_config at91sam9260_config = { .caps = MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII, + .usrio = &macb_default_usrio, }; static const struct macb_config sama5d3macb_config = { .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII, + .usrio = &macb_default_usrio, }; static const struct macb_config pc302gem_config = { .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE, .dma_burst_length = 16, + .usrio = &macb_default_usrio, }; static const struct macb_config sama5d2_config = { .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_JUMBO, .dma_burst_length = 16, .jumbo_max_len = 10240, + .usrio = &macb_default_usrio, }; static const struct macb_config sama5d29_config = { .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, + .usrio = &macb_default_usrio, }; static const struct macb_config sama5d3_config = { @@ -5545,21 +5551,25 @@ static const struct macb_config sama5d3_config = { MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_JUMBO, .dma_burst_length = 16, .jumbo_max_len = 10240, + .usrio = &macb_default_usrio, }; static const struct macb_config sama5d4_config = { .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII, .dma_burst_length = 4, + .usrio = &macb_default_usrio, }; static const struct macb_config emac_config = { .caps = MACB_CAPS_NEEDS_RSTONUBR | MACB_CAPS_MACB_IS_EMAC, .clk_init = at91ether_clk_init, .init = at91ether_init, + .usrio = &macb_default_usrio, }; static const struct macb_config np4_config = { .caps = MACB_CAPS_USRIO_DISABLED, + .usrio = &macb_default_usrio, }; static const struct macb_config zynqmp_config = { @@ -5569,12 +5579,14 @@ static const struct macb_config zynqmp_config = { .dma_burst_length = 16, .init = init_reset_optional, .jumbo_max_len = 10240, + .usrio = &macb_default_usrio, }; static const struct macb_config zynq_config = { .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_NO_GIGABIT_HALF | MACB_CAPS_NEEDS_RSTONUBR, .dma_burst_length = 16, + .usrio = &macb_default_usrio, }; static const struct macb_config mpfs_config = { @@ -5583,6 +5595,7 @@ static const struct macb_config mpfs_config = { MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, .init = init_reset_optional, + .usrio = &macb_default_usrio, .max_tx_length = 4040, /* Cadence Erratum 1686 */ .jumbo_max_len = 4040, }; @@ -5611,6 +5624,7 @@ static const struct macb_config versal_config = { .dma_burst_length = 16, .init = init_reset_optional, .jumbo_max_len = 10240, + .usrio = &macb_default_usrio, }; static const struct macb_config eyeq5_config = { @@ -5620,6 +5634,7 @@ static const struct macb_config eyeq5_config = { .dma_burst_length = 16, .init = eyeq5_init, .jumbo_max_len = 10240, + .usrio = &macb_default_usrio, }; static const struct macb_config raspberrypi_rp1_config = { @@ -5628,6 +5643,7 @@ static const struct macb_config raspberrypi_rp1_config = { MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_EEE, .dma_burst_length = 16, + .usrio = &macb_default_usrio, .jumbo_max_len = 10240, }; @@ -5675,6 +5691,7 @@ static const struct macb_config default_gem_config = { MACB_CAPS_JUMBO | MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, + .usrio = &macb_default_usrio, .jumbo_max_len = 10240, }; @@ -5761,7 +5778,7 @@ static int macb_probe(struct platform_device *pdev) bp->wol = 0; device_set_wakeup_capable(&pdev->dev, 1); - bp->usrio = macb_config->usrio ? : &macb_default_usrio; + bp->usrio = macb_config->usrio; /* By default we set to partial store and forward mode for zynqmp. * Disable if not set in devicetree. From a17871778ee28e4df054521e966e9f37c61f541b Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 25 Mar 2026 16:28:06 +0000 Subject: [PATCH 0998/1561] net: macb: rename macb_default_usrio to at91_default_usrio as not all platforms have mii mode control in usrio Calling this structure macb_default_usrio is misleading, I believe, as it implies that it should be used if your platform has nothing special to do in usrio. Since usrio is platform dependent, the default here is probably for each usrio to do nothing, with the macb documentation I have access to prescribing no standard behaviour here. We noticed that this was problematic because on mpfs, a bit that macb_default_usrio sets to deal with the MII mode actually changes the source for the tsu_clk to something with how the majority of mpfs devices are actually configured! Rename it to at91_default_usrio, since that's where the values actually come from for these. I have no idea if any of the other platforms that use the default actually copied at91's usrio configuration or if they have usrio configurations where what the driver does has no impact. Gate touching these bits behind a capability, like the clken refclock usrio knob, so that platforms without the MII mode stuff can avoid running this code. Signed-off-by: Conor Dooley Link: https://patch.msgid.link/20260325-landowner-preformed-2922ce736337@spud Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb.h | 1 + drivers/net/ethernet/cadence/macb_main.c | 108 +++++++++++++---------- 2 files changed, 63 insertions(+), 46 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index c69828b27dae..32217de5e073 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -786,6 +786,7 @@ #define MACB_CAPS_RSC BIT(23) #define MACB_CAPS_NO_LSO BIT(24) #define MACB_CAPS_EEE BIT(25) +#define MACB_CAPS_USRIO_HAS_MII BIT(26) /* LSO settings */ #define MACB_LSO_UFO_ENABLE 0x01 diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 6623b91b5efa..a96d2cf5355d 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4884,13 +4884,15 @@ static int macb_init_dflt(struct platform_device *pdev) if (!(bp->caps & MACB_CAPS_USRIO_DISABLED)) { val = 0; - if (phy_interface_mode_is_rgmii(bp->phy_interface)) - val = bp->usrio->rgmii; - else if (bp->phy_interface == PHY_INTERFACE_MODE_RMII && - (bp->caps & MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII)) - val = bp->usrio->rmii; - else if (!(bp->caps & MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII)) - val = bp->usrio->mii; + if (bp->caps & MACB_CAPS_USRIO_HAS_MII) { + if (phy_interface_mode_is_rgmii(bp->phy_interface)) + val = bp->usrio->rgmii; + else if (bp->phy_interface == PHY_INTERFACE_MODE_RMII && + (bp->caps & MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII)) + val = bp->usrio->rmii; + else if (!(bp->caps & MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII)) + val = bp->usrio->mii; + } if (bp->caps & MACB_CAPS_USRIO_HAS_CLKEN) val |= bp->usrio->refclk; @@ -4917,13 +4919,6 @@ static int macb_init(struct platform_device *pdev, return macb_init_dflt(pdev); } -static const struct macb_usrio_config macb_default_usrio = { - .mii = MACB_BIT(MII), - .rmii = MACB_BIT(RMII), - .rgmii = GEM_BIT(RGMII), - .refclk = MACB_BIT(CLKEN), -}; - #if defined(CONFIG_OF) /* 1518 rounded up */ #define AT91ETHER_MAX_RBUFF_SZ 0x600 @@ -5498,6 +5493,13 @@ static int eyeq5_init(struct platform_device *pdev) return ret; } +static const struct macb_usrio_config at91_default_usrio = { + .mii = MACB_BIT(MII), + .rmii = MACB_BIT(RMII), + .rgmii = GEM_BIT(RGMII), + .refclk = MACB_BIT(CLKEN), +}; + static const struct macb_usrio_config sama7g5_usrio = { .mii = 0, .rmii = 1, @@ -5508,85 +5510,95 @@ static const struct macb_usrio_config sama7g5_usrio = { static const struct macb_config fu540_c000_config = { .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO | - MACB_CAPS_GEM_HAS_PTP, + MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_USRIO_HAS_MII, .dma_burst_length = 16, .clk_init = fu540_c000_clk_init, .init = fu540_c000_init, .jumbo_max_len = 10240, - .usrio = &macb_default_usrio, + .usrio = &at91_default_usrio, }; static const struct macb_config at91sam9260_config = { - .caps = MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII, - .usrio = &macb_default_usrio, + .caps = MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | + MACB_CAPS_USRIO_HAS_MII, + .usrio = &at91_default_usrio, }; static const struct macb_config sama5d3macb_config = { .caps = MACB_CAPS_SG_DISABLED | - MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII, - .usrio = &macb_default_usrio, + MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | + MACB_CAPS_USRIO_HAS_MII, + .usrio = &at91_default_usrio, }; static const struct macb_config pc302gem_config = { - .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE, + .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE | + MACB_CAPS_USRIO_HAS_MII, .dma_burst_length = 16, - .usrio = &macb_default_usrio, + .usrio = &at91_default_usrio, }; static const struct macb_config sama5d2_config = { - .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_JUMBO, + .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_JUMBO | + MACB_CAPS_USRIO_HAS_MII, .dma_burst_length = 16, .jumbo_max_len = 10240, - .usrio = &macb_default_usrio, + .usrio = &at91_default_usrio, }; static const struct macb_config sama5d29_config = { - .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_GEM_HAS_PTP, + .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_GEM_HAS_PTP | + MACB_CAPS_USRIO_HAS_MII, .dma_burst_length = 16, - .usrio = &macb_default_usrio, + .usrio = &at91_default_usrio, }; static const struct macb_config sama5d3_config = { .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE | - MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_JUMBO, + MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_JUMBO | + MACB_CAPS_USRIO_HAS_MII, .dma_burst_length = 16, .jumbo_max_len = 10240, - .usrio = &macb_default_usrio, + .usrio = &at91_default_usrio, }; static const struct macb_config sama5d4_config = { - .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII, + .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | + MACB_CAPS_USRIO_HAS_MII, .dma_burst_length = 4, - .usrio = &macb_default_usrio, + .usrio = &at91_default_usrio, }; static const struct macb_config emac_config = { - .caps = MACB_CAPS_NEEDS_RSTONUBR | MACB_CAPS_MACB_IS_EMAC, + .caps = MACB_CAPS_NEEDS_RSTONUBR | MACB_CAPS_MACB_IS_EMAC | + MACB_CAPS_USRIO_HAS_MII, .clk_init = at91ether_clk_init, .init = at91ether_init, - .usrio = &macb_default_usrio, + .usrio = &at91_default_usrio, }; static const struct macb_config np4_config = { .caps = MACB_CAPS_USRIO_DISABLED, - .usrio = &macb_default_usrio, + .usrio = &at91_default_usrio, }; static const struct macb_config zynqmp_config = { .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO | - MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH, + MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH | + MACB_CAPS_USRIO_HAS_MII, .dma_burst_length = 16, .init = init_reset_optional, .jumbo_max_len = 10240, - .usrio = &macb_default_usrio, + .usrio = &at91_default_usrio, }; static const struct macb_config zynq_config = { .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_NO_GIGABIT_HALF | - MACB_CAPS_NEEDS_RSTONUBR, + MACB_CAPS_NEEDS_RSTONUBR | + MACB_CAPS_USRIO_HAS_MII, .dma_burst_length = 16, - .usrio = &macb_default_usrio, + .usrio = &at91_default_usrio, }; static const struct macb_config mpfs_config = { @@ -5595,7 +5607,7 @@ static const struct macb_config mpfs_config = { MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, .init = init_reset_optional, - .usrio = &macb_default_usrio, + .usrio = &at91_default_usrio, .max_tx_length = 4040, /* Cadence Erratum 1686 */ .jumbo_max_len = 4040, }; @@ -5603,7 +5615,8 @@ static const struct macb_config mpfs_config = { static const struct macb_config sama7g5_gem_config = { .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_CLK_HW_CHG | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | - MACB_CAPS_MIIONRGMII | MACB_CAPS_GEM_HAS_PTP, + MACB_CAPS_MIIONRGMII | MACB_CAPS_GEM_HAS_PTP | + MACB_CAPS_USRIO_HAS_MII, .dma_burst_length = 16, .usrio = &sama7g5_usrio, }; @@ -5611,7 +5624,8 @@ static const struct macb_config sama7g5_gem_config = { static const struct macb_config sama7g5_emac_config = { .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_MIIONRGMII | - MACB_CAPS_GEM_HAS_PTP, + MACB_CAPS_GEM_HAS_PTP | + MACB_CAPS_USRIO_HAS_MII, .dma_burst_length = 16, .usrio = &sama7g5_usrio, }; @@ -5620,11 +5634,12 @@ static const struct macb_config versal_config = { .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO | MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH | MACB_CAPS_NEED_TSUCLK | MACB_CAPS_QUEUE_DISABLE | - MACB_CAPS_QBV, + MACB_CAPS_QBV | + MACB_CAPS_USRIO_HAS_MII, .dma_burst_length = 16, .init = init_reset_optional, .jumbo_max_len = 10240, - .usrio = &macb_default_usrio, + .usrio = &at91_default_usrio, }; static const struct macb_config eyeq5_config = { @@ -5634,16 +5649,17 @@ static const struct macb_config eyeq5_config = { .dma_burst_length = 16, .init = eyeq5_init, .jumbo_max_len = 10240, - .usrio = &macb_default_usrio, + .usrio = &at91_default_usrio, }; static const struct macb_config raspberrypi_rp1_config = { .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_CLK_HW_CHG | MACB_CAPS_JUMBO | MACB_CAPS_GEM_HAS_PTP | - MACB_CAPS_EEE, + MACB_CAPS_EEE | + MACB_CAPS_USRIO_HAS_MII, .dma_burst_length = 16, - .usrio = &macb_default_usrio, + .usrio = &at91_default_usrio, .jumbo_max_len = 10240, }; @@ -5691,7 +5707,7 @@ static const struct macb_config default_gem_config = { MACB_CAPS_JUMBO | MACB_CAPS_GEM_HAS_PTP, .dma_burst_length = 16, - .usrio = &macb_default_usrio, + .usrio = &at91_default_usrio, .jumbo_max_len = 10240, }; From 039f185a00602fe4e760216ffb50dda6d1ed863a Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 25 Mar 2026 16:28:07 +0000 Subject: [PATCH 0999/1561] net: macb: split USRIO_HAS_CLKEN capability in two While trying to rework the internal/external refclk selection on sama7g5, Ryan and I noticed that the sama7g5 was "overloading" the meaning of MACB_CAPS_USRIO_HAS_CLKEN, using it differently to how it was originally intended. Originally, on the macb hardware on sam9620 et al, MACB_CAPS_USRIO_HAS_CLKEN represented the hardware having a bit that needed to be set to turn on the input clock to the transceivers. The sama7g5 doesn't have this bit, so for some reason the decision was made to reuse this capability flag to control selection of internal/external references. Split the caps in two, so that capabilities do what they say on the tin, and allow reworking the refclk selection handling without impacting the older devices that use MACB_CAPS_USRIO_CLKEN for its original purpose. Signed-off-by: Conor Dooley Link: https://patch.msgid.link/20260325-gradient-grading-b23b9e6ef9ff@spud Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb.h | 2 ++ drivers/net/ethernet/cadence/macb_main.c | 11 ++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 32217de5e073..35b7129d7c1e 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -787,6 +787,7 @@ #define MACB_CAPS_NO_LSO BIT(24) #define MACB_CAPS_EEE BIT(25) #define MACB_CAPS_USRIO_HAS_MII BIT(26) +#define MACB_CAPS_USRIO_HAS_REFCLK_SOURCE BIT(27) /* LSO settings */ #define MACB_LSO_UFO_ENABLE 0x01 @@ -1226,6 +1227,7 @@ struct macb_usrio_config { u32 rmii; u32 rgmii; u32 refclk; + u32 clken; u32 hdfctlen; }; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index a96d2cf5355d..545d9222ea1f 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4615,7 +4615,7 @@ static void macb_configure_caps(struct macb *bp, } if (refclk_ext) - bp->caps |= MACB_CAPS_USRIO_HAS_CLKEN; + bp->caps |= MACB_CAPS_USRIO_HAS_REFCLK_SOURCE; dev_dbg(&bp->pdev->dev, "Cadence caps 0x%08x\n", bp->caps); } @@ -4895,6 +4895,9 @@ static int macb_init_dflt(struct platform_device *pdev) } if (bp->caps & MACB_CAPS_USRIO_HAS_CLKEN) + val |= bp->usrio->clken; + + if (bp->caps & MACB_CAPS_USRIO_HAS_REFCLK_SOURCE) val |= bp->usrio->refclk; macb_or_gem_writel(bp, USRIO, val); @@ -5497,7 +5500,7 @@ static const struct macb_usrio_config at91_default_usrio = { .mii = MACB_BIT(MII), .rmii = MACB_BIT(RMII), .rgmii = GEM_BIT(RGMII), - .refclk = MACB_BIT(CLKEN), + .clken = MACB_BIT(CLKEN), }; static const struct macb_usrio_config sama7g5_usrio = { @@ -5615,6 +5618,7 @@ static const struct macb_config mpfs_config = { static const struct macb_config sama7g5_gem_config = { .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_CLK_HW_CHG | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | + MACB_CAPS_USRIO_HAS_REFCLK_SOURCE | MACB_CAPS_MIIONRGMII | MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_USRIO_HAS_MII, .dma_burst_length = 16, @@ -5623,7 +5627,8 @@ static const struct macb_config sama7g5_gem_config = { static const struct macb_config sama7g5_emac_config = { .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | - MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_MIIONRGMII | + MACB_CAPS_MIIONRGMII | + MACB_CAPS_USRIO_HAS_REFCLK_SOURCE | MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_USRIO_HAS_MII, .dma_burst_length = 16, From dfa36d7e860c2f008d92d91903f2d41ed0c88f3d Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 25 Mar 2026 16:28:08 +0000 Subject: [PATCH 1000/1561] dt-bindings: net: cdns,macb: replace cdns,refclk-ext with cdns,refclk-source Ryan added cdns,refclk-ext with the intent of decoupling the source of the reference clock on sama7g5 (and related platforms) from the compatible. Unfortunately, the default for sama7g5-emac is an external reference clock, so this property had no effect there, so that compatibility with older devicetrees is preserved. Replace cdns,refclk-ext with one that supports both default states and therefore is usable for sama7g5-emac. For now, limit it to only the platforms that have USRIO controlled reference clock selection, but this could be generalised in the future. The existing property only works on devices that are compatible with sama7g5-gem, so mark it deprecated, and limit its use to that specific scenario. Signed-off-by: Conor Dooley Link: https://patch.msgid.link/20260325-savior-untainted-03057ee0a917@spud Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/net/cdns,macb.yaml | 56 ++++++++++++++++++- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/net/cdns,macb.yaml b/Documentation/devicetree/bindings/net/cdns,macb.yaml index feb168385837..50490acbb6fc 100644 --- a/Documentation/devicetree/bindings/net/cdns,macb.yaml +++ b/Documentation/devicetree/bindings/net/cdns,macb.yaml @@ -130,10 +130,23 @@ properties: cdns,refclk-ext: type: boolean + deprecated: true + description: | + This selects if the REFCLK for RMII is provided by an external source. + For RGMII mode this selects if the 125MHz REF clock is provided by an external + source. + + This property has been replaced by cdns,refclk-source, as it only works + for devices that use an internal reference clock by default. + + cdns,refclk-source: + $ref: /schemas/types.yaml#/definitions/string + enum: + - internal + - external description: - This selects if the REFCLK for RMII is provided by an external source. - For RGMII mode this selects if the 125MHz REF clock is provided by an external - source. + Select whether or not the refclk for RGMII or RMII is provided by an + internal or external source. The default is device specific. cdns,rx-watermark: $ref: /schemas/types.yaml#/definitions/uint32 @@ -215,6 +228,43 @@ allOf: properties: mdio: false + - if: + not: + properties: + compatible: + contains: + enum: + - microchip,sama7g5-gem + - microchip,sama7g5-emac + then: + properties: + cdns,refclk-source: false + + - if: + not: + properties: + compatible: + contains: + const: microchip,sama7g5-gem + then: + properties: + cdns,refclk-ext: false + + - if: + properties: + compatible: + contains: + enum: + - microchip,sama7g5-emac + then: + properties: + cdns,refclk-source: + default: external + else: + properties: + cdns,refclk-source: + default: internal + unevaluatedProperties: false examples: From 6c5b565d7d41561709f3fe969e4eca1ba9e7f6f7 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 25 Mar 2026 16:28:09 +0000 Subject: [PATCH 1001/1561] net: macb: rework usrio refclk selection code The USRIO based refclk selection code abuses a capability flag to set the refclk to an external source based on match data/compatible on sama7g5-emac and use an internal source for the gmac. Ryan previously added a property in an attempt to decouple the refclk source from the compatible, because this is not fixed by compatible and there's variance based on the choices made by board designers. Originally when Ryan added it, he removed the capability flag entirely from match data, but this changed the default for the sama7g5-emac and the removal had to be reverted for these devices. Because these devices default to an external refclk, and the current property is only capable of communicating external refclks, there's no way to make the sama7g5-emac use an internal refclk. Additionally, this property has no limiting based on compatible, and if used on a platform with an external refclk that is not controlled by USRIO the capability would be erroneously set. Because of the reuse of the at91_default_usrio struct by non-at91 devices, this could cause the refclk bit to be set in error, on a system where the refclk is externally provided without usrio settings being required. Change the new capability flag so that it actually represents the hardware being capable of controlling the refclk source via USRIO, and move the selection of default behaviour into the macb_usrio_config struct provided as part of match data. Modify the devicetree code to support a new property, "cdns,refclk-source" which will support devices with either default, retaining support for "cdns,refclk-external" for compatibility reasons. Signed-off-by: Conor Dooley Link: https://patch.msgid.link/20260325-tarantula-bullring-6ac44b39dd52@spud Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb.h | 1 + drivers/net/ethernet/cadence/macb_main.c | 55 ++++++++++++++++++------ 2 files changed, 44 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 35b7129d7c1e..1bdbe66b0559 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -1229,6 +1229,7 @@ struct macb_usrio_config { u32 refclk; u32 clken; u32 hdfctlen; + bool refclk_default_external; }; struct macb_config { diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 545d9222ea1f..ab584a173202 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4576,12 +4576,8 @@ static const struct net_device_ops macb_netdev_ops = { static void macb_configure_caps(struct macb *bp, const struct macb_config *dt_conf) { - struct device_node *np = bp->pdev->dev.of_node; - bool refclk_ext; u32 dcfg; - refclk_ext = of_property_read_bool(np, "cdns,refclk-ext"); - if (dt_conf) bp->caps = dt_conf->caps; @@ -4614,9 +4610,6 @@ static void macb_configure_caps(struct macb *bp, } } - if (refclk_ext) - bp->caps |= MACB_CAPS_USRIO_HAS_REFCLK_SOURCE; - dev_dbg(&bp->pdev->dev, "Cadence caps 0x%08x\n", bp->caps); } @@ -4897,8 +4890,36 @@ static int macb_init_dflt(struct platform_device *pdev) if (bp->caps & MACB_CAPS_USRIO_HAS_CLKEN) val |= bp->usrio->clken; - if (bp->caps & MACB_CAPS_USRIO_HAS_REFCLK_SOURCE) - val |= bp->usrio->refclk; + if (bp->caps & MACB_CAPS_USRIO_HAS_REFCLK_SOURCE) { + const char *prop; + bool refclk_ext; + int ret; + + /* Default to whatever was set in the match data for + * this device. There's two properties for refclk + * control, but the boolean one is deprecated so is + * a lower priority to check, no device should have + * both. + */ + refclk_ext = bp->usrio->refclk_default_external; + + ret = of_property_read_string(pdev->dev.of_node, + "cdns,refclk-source", &prop); + if (!ret) { + if (!strcmp(prop, "external")) + refclk_ext = true; + else + refclk_ext = false; + } else { + ret = of_property_read_bool(pdev->dev.of_node, + "cdns,refclk-ext"); + if (ret) + refclk_ext = true; + } + + if (refclk_ext) + val |= bp->usrio->refclk; + } macb_or_gem_writel(bp, USRIO, val); } @@ -5503,11 +5524,21 @@ static const struct macb_usrio_config at91_default_usrio = { .clken = MACB_BIT(CLKEN), }; -static const struct macb_usrio_config sama7g5_usrio = { +static const struct macb_usrio_config sama7g5_gem_usrio = { .mii = 0, .rmii = 1, .rgmii = 2, .refclk = BIT(2), + .refclk_default_external = false, + .hdfctlen = BIT(6), +}; + +static const struct macb_usrio_config sama7g5_emac_usrio = { + .mii = 0, + .rmii = 1, + .rgmii = 2, + .refclk = BIT(2), + .refclk_default_external = true, .hdfctlen = BIT(6), }; @@ -5622,7 +5653,7 @@ static const struct macb_config sama7g5_gem_config = { MACB_CAPS_MIIONRGMII | MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_USRIO_HAS_MII, .dma_burst_length = 16, - .usrio = &sama7g5_usrio, + .usrio = &sama7g5_gem_usrio, }; static const struct macb_config sama7g5_emac_config = { @@ -5632,7 +5663,7 @@ static const struct macb_config sama7g5_emac_config = { MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_USRIO_HAS_MII, .dma_burst_length = 16, - .usrio = &sama7g5_usrio, + .usrio = &sama7g5_emac_usrio, }; static const struct macb_config versal_config = { From 826cbe636e10581ea6a78541d756bce6049e5087 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 25 Mar 2026 16:28:10 +0000 Subject: [PATCH 1002/1561] net: macb: np4 doesn't need a usrio pointer USRIO is disabled on this platform, having a pointer to a usrio config structure doesn't actually do anything other than look weird. Signed-off-by: Conor Dooley Link: https://patch.msgid.link/20260325-passover-rimless-73c19c67d94b@spud Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index ab584a173202..464b9e5bcdcf 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5613,7 +5613,6 @@ static const struct macb_config emac_config = { static const struct macb_config np4_config = { .caps = MACB_CAPS_USRIO_DISABLED, - .usrio = &at91_default_usrio, }; static const struct macb_config zynqmp_config = { From c711311d6ba359ece75c77cca973f4c3b78e25b8 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 25 Mar 2026 16:28:11 +0000 Subject: [PATCH 1003/1561] net: macb: add mpfs specific usrio configuration On mpfs the driver needs to make sure the tsu clock source is not the fabric, as this requires that the hardware is in Timer Adjust mode, which is not compatible with the linux driver trying to control the hardware. It is unlikely that this will be set, as the peripheral is reset during probe, but if the resets are not provided in devicetree it's probable that this bit is set incorrectly, as U-Boot's macb driver has the same issue with using usrio settings for at91 platforms as the default. Fixes: 8aad66aa59be5 ("net: macb: add polarfire soc reset support") Signed-off-by: Conor Dooley Link: https://patch.msgid.link/20260325-excavate-jester-798e7cfe02b5@spud Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb.h | 2 ++ drivers/net/ethernet/cadence/macb_main.c | 12 ++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 1bdbe66b0559..8e5305f9a754 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -788,6 +788,7 @@ #define MACB_CAPS_EEE BIT(25) #define MACB_CAPS_USRIO_HAS_MII BIT(26) #define MACB_CAPS_USRIO_HAS_REFCLK_SOURCE BIT(27) +#define MACB_CAPS_USRIO_HAS_TSUCLK_SOURCE BIT(28) /* LSO settings */ #define MACB_LSO_UFO_ENABLE 0x01 @@ -1229,6 +1230,7 @@ struct macb_usrio_config { u32 refclk; u32 clken; u32 hdfctlen; + u32 tsu_source; bool refclk_default_external; }; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 464b9e5bcdcf..2587279d50a0 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4921,6 +4921,9 @@ static int macb_init_dflt(struct platform_device *pdev) val |= bp->usrio->refclk; } + if (bp->caps & MACB_CAPS_USRIO_HAS_TSUCLK_SOURCE) + val |= bp->usrio->tsu_source; + macb_or_gem_writel(bp, USRIO, val); } @@ -5524,6 +5527,10 @@ static const struct macb_usrio_config at91_default_usrio = { .clken = MACB_BIT(CLKEN), }; +static const struct macb_usrio_config mpfs_usrio = { + .tsu_source = 0, +}; + static const struct macb_usrio_config sama7g5_gem_usrio = { .mii = 0, .rmii = 1, @@ -5637,10 +5644,11 @@ static const struct macb_config zynq_config = { static const struct macb_config mpfs_config = { .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO | - MACB_CAPS_GEM_HAS_PTP, + MACB_CAPS_GEM_HAS_PTP | + MACB_CAPS_USRIO_HAS_TSUCLK_SOURCE, .dma_burst_length = 16, .init = init_reset_optional, - .usrio = &at91_default_usrio, + .usrio = &mpfs_usrio, .max_tx_length = 4040, /* Cadence Erratum 1686 */ .jumbo_max_len = 4040, }; From 3fe13d858f8391e38c75698719198d15a6a962dc Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 25 Mar 2026 16:28:12 +0000 Subject: [PATCH 1004/1561] net: macb: warn on pclk use as a tsu_clk fallback The Candence GEM IP has a configuration parameter which determines the source of the clock used for the timestamp unit (if it is enabled), switching it between using the pclk and a dedicated input. When ptp support was added to the macb driver, a new tsu_clk was added to represent the dedicated input. While this is understandable, I think it is bug prone and that the tsu_clk should represent whatever clock is used for the timestamper and not just that specific input. >From a driver point of view, the benefit of taking the conceptual approach is avoiding misconfiguring the driver when the hardware supports ptp (and it is set as a capability in the relevant per-device structure) but no tsu_clk is provided in devicetree. At the moment, the timestamper will be registered and programmed with an increment that reflects the pclk in these cases, but will malfunction if the pclk and tsu_clk frequencies do not match. Obviously, this means the devicetree incorrectly represents the hardware, but this change in approach would make the driver more resilient without meaningfully impacting correctly described users. Out of the devices that claim MACB_CAPS_GEM_HAS_PTP the fu540, mpfs, sama5d2 and sama7g5-emac (but not sama7g5-gem) are at risk of having this problem with the in-kernel devicetrees. mpfs and sama7g5-emac have been confirmed to be incorrect, and sama5d2 is correct. It may be that the other platforms actually do use the pclk for the timestamper (either by supplying pclk to the tsu_clk input of the IP, or by having the IP block configured to use pclk instead of the tsu_clk input), but at least two are wrong, as they do not use pclk for the tsu_clk, so the driver is registering the ptp clock incorrectly. Add a warning if no tsu_clk is provided on a platform that uses the timerstamper, to encourage people to specifically provide a tsu_clk and avoid silently registering the timerstamper with the wrong clock. If the pclk is actually used, it can be provided as a tsu_clk for improved clarity in devicetrees. While this changes the meaning of the devicetree property, it is backwards compatible as there's no functional change for platforms that didn't provide a tsu_clk and the changed meaning of providing a tsu_clk in the devicetree does not impact platforms that already provided one as the decision about the tsu clock source is at IP instantiation time rather than at runtime, so there's no driver behaviour that needs to change based on the input to the IP used for the timestamping unit. Signed-off-by: Conor Dooley Link: https://patch.msgid.link/20260325-dust-revision-368053e82d0e@spud Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 2587279d50a0..8600b3505efb 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3772,6 +3772,7 @@ static unsigned int gem_get_tsu_rate(struct macb *bp) else if (!IS_ERR(bp->pclk)) { tsu_clk = bp->pclk; tsu_rate = clk_get_rate(tsu_clk); + dev_warn(&bp->pdev->dev, "devicetree missing tsu_clk, using pclk as fallback\n"); } else return -ENOTSUPP; return tsu_rate; From b698a1e397ab9371911a130605908d26510e83c6 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 25 Mar 2026 16:28:13 +0000 Subject: [PATCH 1005/1561] net: macb: clean up tsu clk rate acquisition tsu_clk is grabbed during probe, so doesn't need to be re-grabbed here. pclk is mandatory, probe will fail if it is err/NULL, so there's no need to check it here or have a !pclk 3rd arm. Simplify gem_get_tsu_rate() to account for these facts. Signed-off-by: Conor Dooley Link: https://patch.msgid.link/20260325-hazing-penniless-14ba803efbb6@spud Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 8600b3505efb..7deae61a61d4 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3765,16 +3765,14 @@ static unsigned int gem_get_tsu_rate(struct macb *bp) struct clk *tsu_clk; unsigned int tsu_rate; - tsu_clk = devm_clk_get(&bp->pdev->dev, "tsu_clk"); - if (!IS_ERR(tsu_clk)) - tsu_rate = clk_get_rate(tsu_clk); - /* try pclk instead */ - else if (!IS_ERR(bp->pclk)) { + if (!IS_ERR_OR_NULL(bp->tsu_clk)) { + tsu_rate = clk_get_rate(bp->tsu_clk); + } else { tsu_clk = bp->pclk; tsu_rate = clk_get_rate(tsu_clk); dev_warn(&bp->pdev->dev, "devicetree missing tsu_clk, using pclk as fallback\n"); - } else - return -ENOTSUPP; + } + return tsu_rate; } From 09a6164a4f1d1b5364069bd290dba1d808718016 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 25 Mar 2026 16:28:14 +0000 Subject: [PATCH 1006/1561] dt-bindings: net: macb: add property indicating timer adjust mode The GEM IP has two methods for modifying the ptp timer. The first of these, named "increment mode", relies on software controlling the timer by setting tsu_timer_incr and tsu_timer_incr_sub_nsec and performing once-off adjustments via the tsu_timer_adjust register. This is what the macb driver uses. The second mechanism, "timer adjust mode" uses the gem_tsu_inc_ctrl and gem_tsu_ms signals to control the timer. These modes are not intended to be used in parallel, but both can be possible on the same device and which mode is used cannot be determined from the compatible on all devices, because some users of the GEM IP are SoC FPGAs that permit configuring how the IP is wired up. Add a property to indicate that gem_tsu_inc_ctrl and gem_tsu_ms are wired up for timer adjust mode. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Conor Dooley Link: https://patch.msgid.link/20260325-daily-entitle-3640f7254da4@spud Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/net/cdns,macb.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Documentation/devicetree/bindings/net/cdns,macb.yaml b/Documentation/devicetree/bindings/net/cdns,macb.yaml index 50490acbb6fc..2c8c080a3d88 100644 --- a/Documentation/devicetree/bindings/net/cdns,macb.yaml +++ b/Documentation/devicetree/bindings/net/cdns,macb.yaml @@ -158,6 +158,12 @@ properties: that need to be filled, before the forwarding process is activated. Width of the SRAM is platform dependent, and can be 4, 8 or 16 bytes. + cdns,timer-adjust: + type: boolean + description: + Set when the hardware is operating in timer-adjust mode, where the timer + is controlled by the gem_tsu_inc_ctrl and gem_tsu_ms inputs. + '#address-cells': const: 1 @@ -207,6 +213,15 @@ allOf: properties: reg: maxItems: 1 + - if: + not: + properties: + compatible: + contains: + const: microchip,mpfs-macb + then: + properties: + cdns,timer-adjust: false - if: properties: From 41adda8764fd4d5e0f4e40cdcf47d60344dc4c4d Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 25 Mar 2026 16:28:15 +0000 Subject: [PATCH 1007/1561] net: macb: timer adjust mode is not supported The ptp portion of this driver controls the tsu's timer using the controls for "increment mode", which is not compatible with the hardware trying to control it via the gem_tsu_inc_ctrl and gem_tsu_ms inputs in "timer adjust mode". Abort probe if the property signalling that the relevant signals have been wired up is present. Signed-off-by: Conor Dooley Link: https://patch.msgid.link/20260325-facebook-chop-cf792c53f1da@spud Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 7deae61a61d4..11074938c6bb 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5838,6 +5838,13 @@ static int macb_probe(struct platform_device *pdev) bp->usrio = macb_config->usrio; + if (of_property_read_bool(bp->pdev->dev.of_node, "cdns,timer-adjust") && + IS_ENABLED(CONFIG_MACB_USE_HWSTAMP)) { + dev_err(&pdev->dev, "Timer adjust mode is not supported\n"); + err = -EINVAL; + goto err_out_free_netdev; + } + /* By default we set to partial store and forward mode for zynqmp. * Disable if not set in devicetree. */ From 47c86c463612edc3107eb7c0fd25b463f1756adf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Wed, 25 Mar 2026 16:28:16 +0000 Subject: [PATCH 1008/1561] net: macb: runtime detect MACB_CAPS_USRIO_DISABLED MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DCFG1 (design config 1 register) carries a bit indicating whether User I/O feature has been enabled or not. The MACB/GEM driver has a cap flag indicating that HW has the feature disabled (default is enabled). Add the missing connection between DCFG1 bit and MACB_CAPS_USRIO_DISABLED. Indirect impact: avoid useless writel() on USERIO register; this is not an important fix because USERIO is anyway read-only when feature is disabled. If for some reason a compatible sets USRIO_DISABLED but DCFG1 indicates it is enabled, we still keep the disabled capability flag. This ensures we don't break "cdns,np4-macb" that sets the flag from compatible match data. Signed-off-by: Théo Lebrun Signed-off-by: Conor Dooley Link: https://patch.msgid.link/20260325-compactly-glue-f426a2e68904@spud Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb.h | 2 ++ drivers/net/ethernet/cadence/macb_main.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 8e5305f9a754..16527dbab875 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -524,6 +524,8 @@ #define GEM_IRQCOR_SIZE 1 #define GEM_DBWDEF_OFFSET 25 #define GEM_DBWDEF_SIZE 3 +#define GEM_USERIO_OFFSET 9 +#define GEM_USERIO_SIZE 1 #define GEM_NO_PCS_OFFSET 0 #define GEM_NO_PCS_SIZE 1 diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 11074938c6bb..281ab0d7b7c0 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4588,6 +4588,8 @@ static void macb_configure_caps(struct macb *bp, bp->caps |= MACB_CAPS_ISR_CLEAR_ON_WRITE; if (GEM_BFEXT(NO_PCS, dcfg) == 0) bp->caps |= MACB_CAPS_PCS; + if (!(dcfg & GEM_BIT(USERIO))) + bp->caps |= MACB_CAPS_USRIO_DISABLED; dcfg = gem_readl(bp, DCFG12); if (GEM_BFEXT(HIGH_SPEED, dcfg) == 1) bp->caps |= MACB_CAPS_HIGH_SPEED; From 32fc6a9f6e7585b2b39999a22635c06aa810d1bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Wed, 25 Mar 2026 16:28:17 +0000 Subject: [PATCH 1009/1561] net: macb: set MACB_CAPS_USRIO_DISABLED if no usrio config is provided MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bp->usrio is copied directly from dt_conf->usrio in macb_probe(). If dt_conf->usrio is NULL, we do not want to land in USRIO write codepaths which dereference bp->usrio. Inherit automatically MACB_CAPS_USRIO_DISABLED to avoid those. This means a macb_config that wants to disable usrio can simply drop its .usrio field, rather than add the disabled capability explicitly. Nit: drop the dt_conf NULL check because the pointer is always valid. Signed-off-by: Théo Lebrun Signed-off-by: Conor Dooley Link: https://patch.msgid.link/20260325-husband-cape-ec4945b9184c@spud Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 281ab0d7b7c0..4dae84ceb545 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4577,8 +4577,10 @@ static void macb_configure_caps(struct macb *bp, { u32 dcfg; - if (dt_conf) - bp->caps = dt_conf->caps; + bp->caps = dt_conf->caps; + + if (!dt_conf->usrio) + bp->caps |= MACB_CAPS_USRIO_DISABLED; if (hw_is_gem(bp->regs, bp->native_io)) { bp->caps |= MACB_CAPS_MACB_IS_GEM; From cd1082a96f9a1ffe053e48ac0a1b4fffb189c30e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Wed, 25 Mar 2026 16:28:18 +0000 Subject: [PATCH 1010/1561] net: macb: drop usrio pointer on EyeQ5 config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit USRIO is disabled on this platform, drop its inherited usrio config. We will end up with MACB_CAPS_USRIO_DISABLED on this platform: - We have no config->usrio so macb_configure_caps() deduces that the feature is disabled. - Anecdotally, we would also land in the runtime detection codepath that reads DCFG1. Signed-off-by: Théo Lebrun Signed-off-by: Conor Dooley Link: https://patch.msgid.link/20260325-stillness-undertake-d83054057b8d@spud Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 4dae84ceb545..7a48ebe0741f 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5695,7 +5695,6 @@ static const struct macb_config eyeq5_config = { .dma_burst_length = 16, .init = eyeq5_init, .jumbo_max_len = 10240, - .usrio = &at91_default_usrio, }; static const struct macb_config raspberrypi_rp1_config = { From 3e09b370f830d6f00336390a5f622f7202dcd7cc Mon Sep 17 00:00:00 2001 From: Luka Gejak Date: Thu, 26 Mar 2026 18:45:59 +0100 Subject: [PATCH 1011/1561] net: hsr: constify hsr_ops and prp_ops protocol operation structures The hsr_ops and prp_ops structures are assigned to hsr->proto_ops during device initialization and are never modified at runtime. Declaring them as const allows the compiler to place these structures in read-only memory, which improves security by preventing accidental or malicious modification of the function pointers they contain. The proto_ops field in struct hsr_priv is also updated to a const pointer to maintain type consistency. Signed-off-by: Luka Gejak Reviewed-by: Felix Maurer Link: https://patch.msgid.link/20260326174600.136232-2-luka.gejak@linux.dev Signed-off-by: Jakub Kicinski --- net/hsr/hsr_device.c | 4 ++-- net/hsr/hsr_main.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 5c3eca2235ce..90236028817d 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -616,7 +616,7 @@ static const struct device_type hsr_type = { .name = "hsr", }; -static struct hsr_proto_ops hsr_ops = { +static const struct hsr_proto_ops hsr_ops = { .send_sv_frame = send_hsr_supervision_frame, .create_tagged_frame = hsr_create_tagged_frame, .get_untagged_frame = hsr_get_untagged_frame, @@ -626,7 +626,7 @@ static struct hsr_proto_ops hsr_ops = { .register_frame_out = hsr_register_frame_out, }; -static struct hsr_proto_ops prp_ops = { +static const struct hsr_proto_ops prp_ops = { .send_sv_frame = send_prp_supervision_frame, .create_tagged_frame = prp_create_tagged_frame, .get_untagged_frame = prp_get_untagged_frame, diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 33b0d2460c9b..134e4f3fff60 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -202,7 +202,7 @@ struct hsr_priv { enum hsr_version prot_version; /* Indicate if HSRv0, HSRv1 or PRPv1 */ spinlock_t seqnr_lock; /* locking for sequence_nr */ spinlock_t list_lock; /* locking for node list */ - struct hsr_proto_ops *proto_ops; + const struct hsr_proto_ops *proto_ops; #define PRP_LAN_ID 0x5 /* 0x1010 for A and 0x1011 for B. Bit 0 is set * based on SLAVE_A or SLAVE_B */ From 137ac69c15fd2a9acf5d4d95456fe75f23c9ffc5 Mon Sep 17 00:00:00 2001 From: Luka Gejak Date: Thu, 26 Mar 2026 18:46:00 +0100 Subject: [PATCH 1012/1561] net: hsr: use __func__ instead of hardcoded function name Replace the hardcoded string "hsr_get_untagged_frame" with the standard __func__ macro in netdev_warn_once() call to make the code more robust to refactoring. Signed-off-by: Luka Gejak Reviewed-by: Felix Maurer Link: https://patch.msgid.link/20260326174600.136232-3-luka.gejak@linux.dev Signed-off-by: Jakub Kicinski --- net/hsr/hsr_forward.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index aefc9b6936ba..0aca859c88cb 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -184,7 +184,7 @@ struct sk_buff *hsr_get_untagged_frame(struct hsr_frame_info *frame, create_stripped_skb_hsr(frame->skb_hsr, frame); else netdev_warn_once(port->dev, - "Unexpected frame received in hsr_get_untagged_frame()\n"); + "Unexpected frame received in %s()\n", __func__); if (!frame->skb_std) return NULL; From b2bf9d61e14af4129362aeb9c10034229a6d8f08 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Mon, 16 Mar 2026 11:56:35 +0800 Subject: [PATCH 1013/1561] wifi: rtw88: add quirks to disable PCI ASPM and deep LPS for HP P3S95EA#ACB On an HP laptop (P3S95EA#ACB) equipped with a Realtek RTL8821CE 802.11ac PCIe adapter (PCI ID: 10ec:c821), the system experiences a hard lockup (complete freeze of the UI and kernel, sysrq doesn't work, requires holding the power button) when the WiFi adapter enters the power saving state. Disable PCI ASPM to avoid system freeze. In addition, driver throws messages periodically. Though this doesn't always cause unstable connection, missing H2C commands might cause unpredictable results. Disable deep LPS to avoid this as well. rtw88_8821ce 0000:13:00.0: firmware failed to leave lps state rtw88_8821ce 0000:13:00.0: failed to send h2c command rtw88_8821ce 0000:13:00.0: failed to send h2c command Tested on HP Notebook P3S95EA#ACB (kernel 6.19.7-1-cachyos): - No hard freeze observed during idle or active usage. - Zero h2c or lps errors in dmesg across idle (10 min), load stress (100MB download), and suspend/resume cycle. - Both quirk flags confirmed active via sysfs without any manual modprobe parameters. Reported-by: Oleksandr Havrylov Closes: https://lore.kernel.org/linux-wireless/CALdGYqSQ1Ko2TTBhUizMu_FvLMUAuQfFrVwS10n_C-LSQJQQkQ@mail.gmail.com/ Signed-off-by: Ping-Ke Shih Tested-by: Oleksandr Havrylov Link: https://patch.msgid.link/20260316035635.16550-1-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw88/main.h | 5 ++++ drivers/net/wireless/realtek/rtw88/pci.c | 31 +++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h index 1179231a672d..9c0b746540b0 100644 --- a/drivers/net/wireless/realtek/rtw88/main.h +++ b/drivers/net/wireless/realtek/rtw88/main.h @@ -432,6 +432,11 @@ enum rtw_wow_flags { RTW_WOW_FLAG_MAX, }; +enum rtw_quirk_dis_caps { + QUIRK_DIS_CAP_PCI_ASPM, + QUIRK_DIS_CAP_LPS_DEEP, +}; + /* the power index is represented by differences, which cck-1s & ht40-1s are * the base values, so for 1s's differences, there are only ht20 & ofdm */ diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c index ec0a45bfb670..bba370ad510c 100644 --- a/drivers/net/wireless/realtek/rtw88/pci.c +++ b/drivers/net/wireless/realtek/rtw88/pci.c @@ -2,6 +2,7 @@ /* Copyright(c) 2018-2019 Realtek Corporation */ +#include #include #include #include "main.h" @@ -1744,6 +1745,34 @@ const struct pci_error_handlers rtw_pci_err_handler = { }; EXPORT_SYMBOL(rtw_pci_err_handler); +static int rtw_pci_disable_caps(const struct dmi_system_id *dmi) +{ + uintptr_t dis_caps = (uintptr_t)dmi->driver_data; + + if (dis_caps & BIT(QUIRK_DIS_CAP_PCI_ASPM)) + rtw_pci_disable_aspm = true; + + if (dis_caps & BIT(QUIRK_DIS_CAP_LPS_DEEP)) + rtw_disable_lps_deep_mode = true; + + return 1; +} + +static const struct dmi_system_id rtw_pci_quirks[] = { + { + .callback = rtw_pci_disable_caps, + .ident = "HP Notebook - P3S95EA#ACB", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Notebook"), + DMI_MATCH(DMI_PRODUCT_SKU, "P3S95EA#ACB"), + }, + .driver_data = (void *)(BIT(QUIRK_DIS_CAP_PCI_ASPM) | + BIT(QUIRK_DIS_CAP_LPS_DEEP)), + }, + {} +}; + int rtw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -1771,6 +1800,8 @@ int rtw_pci_probe(struct pci_dev *pdev, rtwpci = (struct rtw_pci *)rtwdev->priv; atomic_set(&rtwpci->link_usage, 1); + dmi_check_system(rtw_pci_quirks); + ret = rtw_core_init(rtwdev); if (ret) goto err_release_hw; From 737e980e12983bb7420a2c00b981a1e607079a84 Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Wed, 18 Mar 2026 19:45:13 +0200 Subject: [PATCH 1014/1561] wifi: rtw88: TX QOS Null data the same way as Null data When filling out the TX descriptor, Null data frames are treated like management frames, but QOS Null data frames are treated like normal data frames. Somehow this causes a problem for the firmware. When connected to a network in the 2.4 GHz band, wpa_supplicant (or NetworkManager?) triggers a scan every five minutes. During these scans mac80211 transmits many QOS Null frames in quick succession. Because these frames are marked with IEEE80211_TX_CTL_REQ_TX_STATUS, rtw88 asks the firmware to report the TX ACK status for each of these frames. Sometimes the firmware can't process the TX status requests quickly enough, they add up, it only processes some of them, and then marks every subsequent TX status report with the wrong number. The symptom is that after a while the warning "failed to get tx report from firmware" appears every five minutes. This problem apparently happens only with the older RTL8723D, RTL8821A, RTL8812A, and probably RTL8703B chips. Treat QOS Null data frames the same way as Null data frames. This seems to avoid the problem. Tested with RTL8821AU, RTL8723DU, RTL8811CU, and RTL8812BU. Signed-off-by: Bitterblue Smith Acked-by: Ping-Ke Shih Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/2b53fb0d-b1ed-47b6-8caa-2bb9ae2acb80@gmail.com --- drivers/net/wireless/realtek/rtw88/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw88/tx.c b/drivers/net/wireless/realtek/rtw88/tx.c index 2ab440cb2d67..3106edb84fb4 100644 --- a/drivers/net/wireless/realtek/rtw88/tx.c +++ b/drivers/net/wireless/realtek/rtw88/tx.c @@ -421,7 +421,7 @@ void rtw_tx_pkt_info_update(struct rtw_dev *rtwdev, pkt_info->mac_id = rtwvif->mac_id; } - if (ieee80211_is_mgmt(fc) || ieee80211_is_nullfunc(fc)) + if (ieee80211_is_mgmt(fc) || ieee80211_is_any_nullfunc(fc)) rtw_tx_mgmt_pkt_info_update(rtwdev, pkt_info, sta, skb); else if (ieee80211_is_data(fc)) rtw_tx_data_pkt_info_update(rtwdev, pkt_info, sta, skb); From d92f6ad6483e6d430c8273eeb7be97ce85244bd5 Mon Sep 17 00:00:00 2001 From: Christian Hewitt Date: Tue, 17 Mar 2026 11:21:55 +0000 Subject: [PATCH 1015/1561] wifi: rtw89: retry efuse physical map dump on transient failure On Radxa Rock 5B with a RTL8852BE combo WiFi/BT card, the efuse physical map dump intermittently fails with -EBUSY during probe. The failure occurs in rtw89_dump_physical_efuse_map_ddv() where read_poll_timeout_atomic() times out waiting for the B_AX_EF_RDY bit after 1 second. The root cause is a timing race during boot: the WiFi driver's chip initialization (firmware download via PCIe) overlaps with Bluetooth firmware download to the same combo chip via USB. This can leave the efuse controller temporarily unavailable when the WiFi driver attempts to read the efuse map. The firmware download path retries up to 5 times, but the efuse read that follows has no similar logic. Address this by adding retry loop logic (also up to 5 attempts) around physical efuse map dump. Signed-off-by: Christian Hewitt Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260317112155.1939569-1-christianshewitt@gmail.com --- drivers/net/wireless/realtek/rtw89/efuse.c | 23 ++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/efuse.c b/drivers/net/wireless/realtek/rtw89/efuse.c index a2757a88d55d..89d4b1b865f8 100644 --- a/drivers/net/wireless/realtek/rtw89/efuse.c +++ b/drivers/net/wireless/realtek/rtw89/efuse.c @@ -185,8 +185,8 @@ static int rtw89_dump_physical_efuse_map_dav(struct rtw89_dev *rtwdev, u8 *map, return 0; } -static int rtw89_dump_physical_efuse_map(struct rtw89_dev *rtwdev, u8 *map, - u32 dump_addr, u32 dump_size, bool dav) +static int __rtw89_dump_physical_efuse_map(struct rtw89_dev *rtwdev, u8 *map, + u32 dump_addr, u32 dump_size, bool dav) { int ret; @@ -208,6 +208,25 @@ static int rtw89_dump_physical_efuse_map(struct rtw89_dev *rtwdev, u8 *map, return 0; } +static int rtw89_dump_physical_efuse_map(struct rtw89_dev *rtwdev, u8 *map, + u32 dump_addr, u32 dump_size, bool dav) +{ + int retry; + int ret; + + for (retry = 0; retry < 5; retry++) { + ret = __rtw89_dump_physical_efuse_map(rtwdev, map, dump_addr, + dump_size, dav); + if (!ret) + return 0; + + rtw89_warn(rtwdev, "efuse dump (dav=%d) failed, retrying (%d)\n", + dav, retry); + } + + return ret; +} + #define invalid_efuse_header(hdr1, hdr2) \ ((hdr1) == 0xff || (hdr2) == 0xff) #define invalid_efuse_content(word_en, i) \ From 6678828eb78f3ae0bc6db90436068d5fd0387703 Mon Sep 17 00:00:00 2001 From: Zenm Chen Date: Fri, 20 Mar 2026 17:31:22 +0800 Subject: [PATCH 1016/1561] wifi: rtw89: Add support for TP-Link Archer TX50U Add the ID 37ad:0103 to the table to support an additional RTL8832CU adapter: TP-Link Archer TX50U. Link: https://github.com/morrownr/rtl8852cu-20251113/issues/2 Signed-off-by: Zenm Chen Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260320093122.6754-1-zenmchen@gmail.com --- drivers/net/wireless/realtek/rtw89/rtw8852cu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852cu.c b/drivers/net/wireless/realtek/rtw89/rtw8852cu.c index 0c5aebaed873..4755c483e1e6 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852cu.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852cu.c @@ -55,6 +55,8 @@ static const struct usb_device_id rtw_8852cu_id_table[] = { .driver_info = (kernel_ulong_t)&rtw89_8852cu_info }, { USB_DEVICE_AND_INTERFACE_INFO(0x35bc, 0x0102, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&rtw89_8852cu_info }, + { USB_DEVICE_AND_INTERFACE_INFO(0x37ad, 0x0103, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&rtw89_8852cu_info }, {}, }; MODULE_DEVICE_TABLE(usb, rtw_8852cu_id_table); From aefb20749074731c4f35444761e730991f1b8c77 Mon Sep 17 00:00:00 2001 From: Zenm Chen Date: Fri, 20 Mar 2026 23:41:36 +0800 Subject: [PATCH 1017/1561] wifi: rtw89: Add support for Buffalo WI-U3-2400XE2 Add the ID 0411:03a6 to the table to support an additional RTL8832CU adapter: Buffalo WI-U3-2400XE2. Link: https://github.com/morrownr/rtw89/commit/506d193b8cb7d6394509aebcf8de1531629f6100 Signed-off-by: Zenm Chen Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260320154136.5750-1-zenmchen@gmail.com --- drivers/net/wireless/realtek/rtw89/rtw8852cu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852cu.c b/drivers/net/wireless/realtek/rtw89/rtw8852cu.c index 4755c483e1e6..de3f8358bbe7 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852cu.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852cu.c @@ -39,6 +39,8 @@ static const struct rtw89_driver_info rtw89_8852cu_info = { }; static const struct usb_device_id rtw_8852cu_id_table[] = { + { USB_DEVICE_AND_INTERFACE_INFO(0x0411, 0x03a6, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&rtw89_8852cu_info }, { USB_DEVICE_AND_INTERFACE_INFO(0x0bda, 0xc832, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&rtw89_8852cu_info }, { USB_DEVICE_AND_INTERFACE_INFO(0x0bda, 0xc85a, 0xff, 0xff, 0xff), From 047cddf88c611e616d49a00311d4722e46286234 Mon Sep 17 00:00:00 2001 From: Alexey Velichayshiy Date: Mon, 23 Mar 2026 17:05:53 +0300 Subject: [PATCH 1018/1561] wifi: rtw89: phy: fix uninitialized variable access in rtw89_phy_cfo_set_crystal_cap() In the rtw89_phy_cfo_set_crystal_cap() function, for chips other than RTL8852A/RTL8851B, the values read by rtw89_mac_read_xtal_si() are stored into the local variables sc_xi_val and sc_xo_val. If either read fails, these variables remain uninitialized, they are later used to update cfo->crystal_cap and in debug print statements. This can lead to undefined behavior. Fix the issue by initializing sc_xi_val and sc_xo_val to zero, like is implemented in vendor driver. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 8379fa611536 ("rtw89: 8852c: add write/read crystal function in CFO tracking") Signed-off-by: Alexey Velichayshiy Acked-by: Ping-Ke Shih Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260323140613.1615574-1-a.velichayshiy@ispras.ru --- drivers/net/wireless/realtek/rtw89/phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw89/phy.c b/drivers/net/wireless/realtek/rtw89/phy.c index 3a241738ac06..d205561f1f5c 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.c +++ b/drivers/net/wireless/realtek/rtw89/phy.c @@ -4898,7 +4898,7 @@ static void rtw89_phy_cfo_set_crystal_cap(struct rtw89_dev *rtwdev, { struct rtw89_cfo_tracking_info *cfo = &rtwdev->cfo_tracking; const struct rtw89_chip_info *chip = rtwdev->chip; - u8 sc_xi_val, sc_xo_val; + u8 sc_xi_val = 0, sc_xo_val = 0; if (!force && cfo->crystal_cap == crystal_cap) return; From bf14367719fa86f7c6922c64d37a2df347954c66 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Tue, 24 Mar 2026 09:10:01 +0800 Subject: [PATCH 1019/1561] wifi: rtw88: validate RX rate to prevent out-of-bound The reported RX rate might be unexpected, causing kernel warns: Rate marked as a VHT rate but data is invalid: MCS: 0, NSS: 0 WARNING: net/mac80211/rx.c:5491 at ieee80211_rx_list+0x183/0x1020 [mac80211] As the RX rate can be index of an array under certain conditions, validate it to prevent accessing array out-of-bound potentially. Tested on HP Notebook P3S95EA#ACB (kernel 6.19.9-1-cachyos): - No WARNING: net/mac80211/rx.c:5491 observed after the v2 patch. The unexpected `NSS: 0, MCS: 0` VHT rate warnings are successfully mitigated. - The system remains fully stable through prolonged idle periods, high network load, active Bluetooth A2DP usage, and multiple deep suspend/resume cycles. - Zero h2c timeouts or firmware lps state errors observed in dmesg. Reported-by: Oleksandr Havrylov Closes: https://lore.kernel.org/linux-wireless/CALdGYqSMUPnPfW-_q1RgYr0_SjoXUejAaJJr-o+jpwCk1S7ndQ@mail.gmail.com/ Tested-by: Oleksandr Havrylov Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260324011001.5742-1-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw88/rx.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw88/rx.c b/drivers/net/wireless/realtek/rtw88/rx.c index 8b0afaaffaa0..d9e11343d498 100644 --- a/drivers/net/wireless/realtek/rtw88/rx.c +++ b/drivers/net/wireless/realtek/rtw88/rx.c @@ -295,6 +295,14 @@ void rtw_rx_query_rx_desc(struct rtw_dev *rtwdev, void *rx_desc8, pkt_stat->tsf_low = le32_get_bits(rx_desc->w5, RTW_RX_DESC_W5_TSFL); + if (unlikely(pkt_stat->rate >= DESC_RATE_MAX)) { + rtw_dbg(rtwdev, RTW_DBG_UNEXP, + "unexpected RX rate=0x%x\n", pkt_stat->rate); + + pkt_stat->rate = DESC_RATE1M; + pkt_stat->bw = RTW_CHANNEL_WIDTH_20; + } + /* drv_info_sz is in unit of 8-bytes */ pkt_stat->drv_info_sz *= 8; From e51df91b894a583f03e35168d2082786f6dff1bd Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Tue, 24 Mar 2026 14:20:43 +0800 Subject: [PATCH 1020/1561] wifi: rtw89: 8922d: add definition of quota, registers and efuse block The quota is used to configure memory size for TX/RX, and the definition of registers includes H2C command, C2H event, WoWLAN reason, IMR of CMAC and DMAC, ACK rate selector, RF kill GPIO, and BB functions of dynamic initial gain and EDCCA. The layout of efuse block is to define logic map of efuse, such as MAC address and RF calibration values. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260324062049.52266-2-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/reg.h | 30 +++ drivers/net/wireless/realtek/rtw89/rtw8922d.c | 238 ++++++++++++++++++ drivers/net/wireless/realtek/rtw89/rtw8922d.h | 10 + 3 files changed, 278 insertions(+) create mode 100644 drivers/net/wireless/realtek/rtw89/rtw8922d.c create mode 100644 drivers/net/wireless/realtek/rtw89/rtw8922d.h diff --git a/drivers/net/wireless/realtek/rtw89/reg.h b/drivers/net/wireless/realtek/rtw89/reg.h index 9b605617c3f0..2cb35458a822 100644 --- a/drivers/net/wireless/realtek/rtw89/reg.h +++ b/drivers/net/wireless/realtek/rtw89/reg.h @@ -8737,6 +8737,7 @@ #define R_PHY_STS_BITMAP_EHT 0x0788 #define R_PHY_STS_BITMAP_EHT_BE4 0x20788 #define R_EDCCA_RPTREG_SEL_BE 0x078C +#define R_EDCCA_RPTREG_SEL_BE4 0x2078C #define B_EDCCA_RPTREG_SEL_BE_MSK GENMASK(22, 20) #define R_PMAC_GNT 0x0980 #define B_PMAC_GNT_TXEN BIT(0) @@ -8850,6 +8851,7 @@ #define R_UDP_COEEF 0x0CBC #define B_UDP_COEEF BIT(19) #define R_TX_COLLISION_T2R_ST_BE 0x0CC8 +#define R_TX_COLLISION_T2R_ST_BE4 0x20CC8 #define B_TX_COLLISION_T2R_ST_BE_M GENMASK(13, 8) #define R_RXHT_MCS_LIMIT 0x0D18 #define B_RXHT_MCS_LIMIT GENMASK(9, 8) @@ -9078,7 +9080,11 @@ #define R_P1_EN_SOUND_WO_NDP 0x2D7C #define B_P1_EN_SOUND_WO_NDP BIT(1) #define R_EDCCA_RPT_A_BE 0x2E38 +#define R_EDCCA_RPT_A_BE4 0x2EE30 +#define R_EDCCA_RPT_A_BE4_C1 0x2FE30 #define R_EDCCA_RPT_B_BE 0x2E3C +#define R_EDCCA_RPT_B_BE4 0x2EE34 +#define R_EDCCA_RPT_B_BE4_C1 0x2FE34 #define R_EDCCA_RPT_P1_A_BE 0x2E40 #define R_EDCCA_RPT_P1_B_BE 0x2E44 #define R_S1_HW_SI_DIS 0x3200 @@ -9262,11 +9268,13 @@ #define R_PATH0_P20_FOLLOW_BY_PAGCUGC_V1 0x4C24 #define R_PATH0_P20_FOLLOW_BY_PAGCUGC_V2 0x46E8 #define R_PATH0_P20_FOLLOW_BY_PAGCUGC_V3 0x41C8 +#define R_PATH0_P20_FOLLOW_BY_PAGCUGC_BE4 0x241C8 #define B_PATH0_P20_FOLLOW_BY_PAGCUGC_EN_MSK BIT(5) #define R_PATH0_S20_FOLLOW_BY_PAGCUGC 0x46A4 #define R_PATH0_S20_FOLLOW_BY_PAGCUGC_V1 0x4C28 #define R_PATH0_S20_FOLLOW_BY_PAGCUGC_V2 0x46EC #define R_PATH0_S20_FOLLOW_BY_PAGCUGC_V3 0x41CC +#define R_PATH0_S20_FOLLOW_BY_PAGCUGC_BE4 0x241CC #define B_PATH0_S20_FOLLOW_BY_PAGCUGC_EN_MSK BIT(5) #define R_PATH0_RXB_INIT_V1 0x46A8 #define B_PATH0_RXB_INIT_IDX_MSK_V1 GENMASK(14, 10) @@ -9313,11 +9321,13 @@ #define R_PATH1_P20_FOLLOW_BY_PAGCUGC_V1 0x4CE8 #define R_PATH1_P20_FOLLOW_BY_PAGCUGC_V2 0x47A8 #define R_PATH1_P20_FOLLOW_BY_PAGCUGC_V3 0x45C8 +#define R_PATH1_P20_FOLLOW_BY_PAGCUGC_BE4 0x245C8 #define B_PATH1_P20_FOLLOW_BY_PAGCUGC_EN_MSK BIT(5) #define R_PATH1_S20_FOLLOW_BY_PAGCUGC 0x4778 #define R_PATH1_S20_FOLLOW_BY_PAGCUGC_V1 0x4CEC #define R_PATH1_S20_FOLLOW_BY_PAGCUGC_V2 0x47AC #define R_PATH1_S20_FOLLOW_BY_PAGCUGC_V3 0x45CC +#define R_PATH1_S20_FOLLOW_BY_PAGCUGC_BE4 0x245CC #define B_PATH1_S20_FOLLOW_BY_PAGCUGC_EN_MSK BIT(5) #define R_PATH1_G_TIA0_LNA6_OP1DB_V1 0x4778 #define B_PATH1_G_TIA0_LNA6_OP1DB_V1 GENMASK(7, 0) @@ -9338,6 +9348,7 @@ #define R_SEG0R_PD 0x481C #define R_SEG0R_PD_V1 0x4860 #define R_SEG0R_PD_V2 0x6A74 +#define R_SEG0R_PD_BE4 0x26210 #define R_SEG0R_EDCCA_LVL 0x4840 #define R_SEG0R_EDCCA_LVL_V1 0x4884 #define B_EDCCA_LVL_MSK3 GENMASK(31, 24) @@ -9476,9 +9487,11 @@ #define B_DCFO_COMP_S0_V1_MSK GENMASK(13, 0) #define R_BMODE_PDTH_V1 0x4B64 #define R_BMODE_PDTH_V2 0x6708 +#define R_BMODE_PDTH_BE4 0x26040 #define B_BMODE_PDTH_LOWER_BOUND_MSK_V1 GENMASK(31, 24) #define R_BMODE_PDTH_EN_V1 0x4B74 #define R_BMODE_PDTH_EN_V2 0x6718 +#define R_BMODE_PDTH_EN_BE4 0x26050 #define B_BMODE_PDTH_LIMIT_EN_MSK_V1 BIT(30) #define R_BSS_CLR_VLD_V2 0x4EBC #define B_BSS_CLR_VLD0_V2 BIT(2) @@ -9653,7 +9666,9 @@ #define R_CCK_FC0INV 0x675c #define B_CCK_FC0INV GENMASK(18, 0) #define R_SEG0R_EDCCA_LVL_BE 0x69EC +#define R_SEG0R_EDCCA_LVL_BE4 0x2623C #define R_SEG0R_PPDU_LVL_BE 0x69F0 +#define R_SEG0R_PPDU_LVL_BE4 0x26240 #define R_SEGSND 0x6A14 #define B_SEGSND_EN BIT(31) #define R_DBCC 0x6B48 @@ -10380,6 +10395,9 @@ #define B_SW_SI_DATA_DAT_BE4 GENMASK(19, 0) #define R_SW_SI_READ_ADDR_BE4 0x20378 #define B_SW_SI_READ_ADDR_BE4 GENMASK(10, 0) +#define R_EDCCA_RPT_SEL_BE4 0x20780 +#define R_EDCCA_RPT_SEL_BE4_C1 0x21780 +#define B_EDCCA_RPT_SEL_BE4_MSK 0xE0000 #define R_IFS_T1_AVG_BE4 0x20EDC #define B_IFS_T1_AVG_BE4 GENMASK(15, 0) #define B_IFS_T2_AVG_BE4 GENMASK(31, 16) @@ -10402,6 +10420,18 @@ #define B_IFS_T3_HIS_BE4 GENMASK(15, 0) #define B_IFS_T4_HIS_BE4 GENMASK(31, 16) +#define R_PATH0_RXIDX_INIT_BE4 0x24108 +#define B_PATH0_RXIDX_INIT_BE4 GENMASK(29, 25) +#define R_PATH0_LNA_INIT_BE4 0x24158 +#define B_PATH0_LNA_INIT_IDX_BE4 GENMASK(14, 12) +#define R_PATH0_TIA_INIT_BE4 0x24168 +#define B_PATH0_TIA_INIT_IDX_BE4 BIT(18) +#define R_PATH1_RXIDX_INIT_BE4 0x24508 +#define B_PATH1_RXIDX_INIT_BE4 GENMASK(29, 25) +#define R_PATH1_LNA_INIT_BE4 0x24558 +#define B_PATH1_LNA_INIT_IDX_BE4 GENMASK(14, 12) +#define R_PATH1_TIA_INIT_BE4 0x24568 +#define B_PATH1_TIA_INIT_IDX_BE4 BIT(18) #define R_TX_CFR_MANUAL_EN_BE4 0x2483C #define B_TX_CFR_MANUAL_EN_BE4_M BIT(30) diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d.c b/drivers/net/wireless/realtek/rtw89/rtw8922d.c new file mode 100644 index 000000000000..6a90ded1b33e --- /dev/null +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d.c @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright(c) 2026 Realtek Corporation + */ + +#include "efuse.h" +#include "mac.h" +#include "reg.h" +#include "rtw8922d.h" + +#define RTW8922D_FW_FORMAT_MAX 0 +#define RTW8922D_FW_BASENAME "rtw89/rtw8922d_fw" +#define RTW8922D_MODULE_FIRMWARE \ + RTW89_GEN_MODULE_FWNAME(RTW8922D_FW_BASENAME, RTW8922D_FW_FORMAT_MAX) + +#define RTW8922DS_FW_FORMAT_MAX 0 +#define RTW8922DS_FW_BASENAME "rtw89/rtw8922ds_fw" +#define RTW8922DS_MODULE_FIRMWARE \ + RTW89_GEN_MODULE_FWNAME(RTW8922DS_FW_BASENAME, RTW8922DS_FW_FORMAT_MAX) + +static const struct rtw89_hfc_ch_cfg rtw8922d_hfc_chcfg_pcie[] = { + {2, 603, 0}, /* ACH 0 */ + {0, 601, 0}, /* ACH 1 */ + {2, 603, 0}, /* ACH 2 */ + {0, 601, 0}, /* ACH 3 */ + {2, 603, 0}, /* ACH 4 */ + {0, 601, 0}, /* ACH 5 */ + {2, 603, 0}, /* ACH 6 */ + {0, 601, 0}, /* ACH 7 */ + {2, 603, 0}, /* B0MGQ */ + {0, 601, 0}, /* B0HIQ */ + {2, 603, 0}, /* B1MGQ */ + {0, 601, 0}, /* B1HIQ */ + {0, 0, 0}, /* FWCMDQ */ + {0, 0, 0}, /* BMC */ + {0, 0, 0}, /* H2D */ +}; + +static const struct rtw89_hfc_pub_cfg rtw8922d_hfc_pubcfg_pcie = { + 613, /* Group 0 */ + 0, /* Group 1 */ + 613, /* Public Max */ + 0, /* WP threshold */ +}; + +static const struct rtw89_hfc_param_ini rtw8922d_hfc_param_ini_pcie[] = { + [RTW89_QTA_SCC] = {rtw8922d_hfc_chcfg_pcie, &rtw8922d_hfc_pubcfg_pcie, + &rtw89_mac_size.hfc_prec_cfg_c0, RTW89_HCIFC_POH}, + [RTW89_QTA_DBCC] = {rtw8922d_hfc_chcfg_pcie, &rtw8922d_hfc_pubcfg_pcie, + &rtw89_mac_size.hfc_prec_cfg_c0, RTW89_HCIFC_POH}, + [RTW89_QTA_DLFW] = {NULL, NULL, &rtw89_mac_size.hfc_prec_cfg_c2, + RTW89_HCIFC_POH}, + [RTW89_QTA_INVALID] = {NULL}, +}; + +static const struct rtw89_dle_mem rtw8922d_dle_mem_pcie[] = { + [RTW89_QTA_SCC] = {RTW89_QTA_SCC, &rtw89_mac_size.wde_size16_v1, + &rtw89_mac_size.ple_size20_v1, &rtw89_mac_size.wde_qt19_v1, + &rtw89_mac_size.wde_qt19_v1, &rtw89_mac_size.ple_qt42_v2, + &rtw89_mac_size.ple_qt43_v2, &rtw89_mac_size.ple_rsvd_qt9, + &rtw89_mac_size.rsvd0_size6, &rtw89_mac_size.rsvd1_size2, + &rtw89_mac_size.dle_input18}, + [RTW89_QTA_DBCC] = {RTW89_QTA_DBCC, &rtw89_mac_size.wde_size16_v1, + &rtw89_mac_size.ple_size20_v1, &rtw89_mac_size.wde_qt19_v1, + &rtw89_mac_size.wde_qt19_v1, &rtw89_mac_size.ple_qt42_v2, + &rtw89_mac_size.ple_qt43_v2, &rtw89_mac_size.ple_rsvd_qt9, + &rtw89_mac_size.rsvd0_size6, &rtw89_mac_size.rsvd1_size2, + &rtw89_mac_size.dle_input18}, + [RTW89_QTA_DLFW] = {RTW89_QTA_DLFW, &rtw89_mac_size.wde_size18_v1, + &rtw89_mac_size.ple_size22_v1, &rtw89_mac_size.wde_qt3, + &rtw89_mac_size.wde_qt3, &rtw89_mac_size.ple_qt5_v2, + &rtw89_mac_size.ple_qt5_v2, &rtw89_mac_size.ple_rsvd_qt1, + &rtw89_mac_size.rsvd0_size6, &rtw89_mac_size.rsvd1_size2, + &rtw89_mac_size.dle_input3}, + [RTW89_QTA_INVALID] = {RTW89_QTA_INVALID, NULL, NULL, NULL, NULL, NULL, + NULL}, +}; + +static const u32 rtw8922d_h2c_regs[RTW89_H2CREG_MAX] = { + R_BE_H2CREG_DATA0, R_BE_H2CREG_DATA1, R_BE_H2CREG_DATA2, + R_BE_H2CREG_DATA3 +}; + +static const u32 rtw8922d_c2h_regs[RTW89_H2CREG_MAX] = { + R_BE_C2HREG_DATA0, R_BE_C2HREG_DATA1, R_BE_C2HREG_DATA2, + R_BE_C2HREG_DATA3 +}; + +static const u32 rtw8922d_wow_wakeup_regs[RTW89_WOW_REASON_NUM] = { + R_BE_DBG_WOW, R_BE_DBG_WOW, +}; + +static const struct rtw89_page_regs rtw8922d_page_regs = { + .hci_fc_ctrl = R_BE_HCI_FC_CTRL, + .ch_page_ctrl = R_BE_CH_PAGE_CTRL, + .ach_page_ctrl = R_BE_CH0_PAGE_CTRL, + .ach_page_info = R_BE_CH0_PAGE_INFO, + .pub_page_info3 = R_BE_PUB_PAGE_INFO3, + .pub_page_ctrl1 = R_BE_PUB_PAGE_CTRL1, + .pub_page_ctrl2 = R_BE_PUB_PAGE_CTRL2, + .pub_page_info1 = R_BE_PUB_PAGE_INFO1, + .pub_page_info2 = R_BE_PUB_PAGE_INFO2, + .wp_page_ctrl1 = R_BE_WP_PAGE_CTRL1, + .wp_page_ctrl2 = R_BE_WP_PAGE_CTRL2, + .wp_page_info1 = R_BE_WP_PAGE_INFO1, +}; + +static const struct rtw89_reg_imr rtw8922d_imr_dmac_regs[] = { + {R_BE_HCI_BUF_IMR, B_BE_HCI_BUF_IMR_CLR, B_BE_HCI_BUF_IMR_SET}, + {R_BE_DISP_HOST_IMR, B_BE_DISP_HOST_IMR_CLR_V1, B_BE_DISP_HOST_IMR_SET_V1}, + {R_BE_DISP_CPU_IMR, B_BE_DISP_CPU_IMR_CLR_V1, B_BE_DISP_CPU_IMR_SET_V1}, + {R_BE_DISP_OTHER_IMR, B_BE_DISP_OTHER_IMR_CLR_V1, B_BE_DISP_OTHER_IMR_SET_V1}, + {R_BE_PKTIN_ERR_IMR, B_BE_PKTIN_ERR_IMR_CLR, B_BE_PKTIN_ERR_IMR_SET}, + {R_BE_MLO_ERR_IDCT_IMR, B_BE_MLO_ERR_IDCT_IMR_CLR, B_BE_MLO_ERR_IDCT_IMR_SET}, + {R_BE_MPDU_TX_ERR_IMR, B_BE_MPDU_TX_ERR_IMR_CLR, B_BE_MPDU_TX_ERR_IMR_SET}, + {R_BE_MPDU_RX_ERR_IMR, B_BE_MPDU_RX_ERR_IMR_CLR, B_BE_MPDU_RX_ERR_IMR_SET}, + {R_BE_SEC_ERROR_IMR, B_BE_SEC_ERROR_IMR_CLR, B_BE_SEC_ERROR_IMR_SET}, + {R_BE_CPUIO_ERR_IMR, B_BE_CPUIO_ERR_IMR_CLR, B_BE_CPUIO_ERR_IMR_SET}, + {R_BE_WDE_ERR_IMR, B_BE_WDE_ERR_IMR_CLR, B_BE_WDE_ERR_IMR_SET}, + {R_BE_PLE_ERR_IMR, B_BE_PLE_ERR_IMR_CLR, B_BE_PLE_ERR_IMR_SET}, + {R_BE_WDRLS_ERR_IMR, B_BE_WDRLS_ERR_IMR_CLR, B_BE_WDRLS_ERR_IMR_SET}, + {R_BE_TXPKTCTL_B0_ERRFLAG_IMR, B_BE_TXPKTCTL_B0_ERRFLAG_IMR_CLR, + B_BE_TXPKTCTL_B0_ERRFLAG_IMR_SET}, + {R_BE_TXPKTCTL_B1_ERRFLAG_IMR, B_BE_TXPKTCTL_B1_ERRFLAG_IMR_CLR, + B_BE_TXPKTCTL_B1_ERRFLAG_IMR_SET}, + {R_BE_BBRPT_COM_ERR_IMR, B_BE_BBRPT_COM_ERR_IMR_CLR, B_BE_BBRPT_COM_ERR_IMR_SET}, + {R_BE_BBRPT_CHINFO_ERR_IMR, B_BE_BBRPT_CHINFO_ERR_IMR_CLR, + B_BE_BBRPT_CHINFO_ERR_IMR_SET}, + {R_BE_BBRPT_DFS_ERR_IMR, B_BE_BBRPT_DFS_ERR_IMR_CLR, B_BE_BBRPT_DFS_ERR_IMR_SET}, + {R_BE_LA_ERRFLAG_IMR, B_BE_LA_ERRFLAG_IMR_CLR, B_BE_LA_ERRFLAG_IMR_SET}, + {R_BE_CH_INFO_DBGFLAG_IMR, B_BE_CH_INFO_DBGFLAG_IMR_CLR, B_BE_CH_INFO_DBGFLAG_IMR_SET}, + {R_BE_PLRLS_ERR_IMR_V1, B_BE_PLRLS_ERR_IMR_V1_CLR, B_BE_PLRLS_ERR_IMR_V1_SET}, + {R_BE_HAXI_IDCT_MSK, B_BE_HAXI_IDCT_MSK_CLR, B_BE_HAXI_IDCT_MSK_SET}, +}; + +static const struct rtw89_imr_table rtw8922d_imr_dmac_table = { + .regs = rtw8922d_imr_dmac_regs, + .n_regs = ARRAY_SIZE(rtw8922d_imr_dmac_regs), +}; + +static const struct rtw89_reg_imr rtw8922d_imr_cmac_regs[] = { + {R_BE_RESP_IMR, B_BE_RESP_IMR_CLR_V1, B_BE_RESP_IMR_SET_V1}, + {R_BE_RESP_IMR1, B_BE_RESP_IMR1_CLR, B_BE_RESP_IMR1_SET}, + {R_BE_RX_ERROR_FLAG_IMR, B_BE_RX_ERROR_FLAG_IMR_CLR_V1, B_BE_RX_ERROR_FLAG_IMR_SET_V1}, + {R_BE_TX_ERROR_FLAG_IMR, B_BE_TX_ERROR_FLAG_IMR_CLR, B_BE_TX_ERROR_FLAG_IMR_SET}, + {R_BE_RX_ERROR_FLAG_IMR_1, B_BE_TX_ERROR_FLAG_IMR_1_CLR, B_BE_TX_ERROR_FLAG_IMR_1_SET}, + {R_BE_PTCL_IMR1, B_BE_PTCL_IMR1_CLR, B_BE_PTCL_IMR1_SET}, + {R_BE_PTCL_IMR0, B_BE_PTCL_IMR0_CLR, B_BE_PTCL_IMR0_SET}, + {R_BE_PTCL_IMR_2, B_BE_PTCL_IMR_2_CLR, B_BE_PTCL_IMR_2_SET}, + {R_BE_SCHEDULE_ERR_IMR, B_BE_SCHEDULE_ERR_IMR_CLR, B_BE_SCHEDULE_ERR_IMR_SET}, + {R_BE_C0_TXPWR_IMR, B_BE_C0_TXPWR_IMR_CLR, B_BE_C0_TXPWR_IMR_SET}, + {R_BE_TRXPTCL_ERROR_INDICA_MASK, B_BE_TRXPTCL_ERROR_INDICA_MASK_CLR, + B_BE_TRXPTCL_ERROR_INDICA_MASK_SET}, + {R_BE_RX_ERR_IMR, B_BE_RX_ERR_IMR_CLR, B_BE_RX_ERR_IMR_SET}, + {R_BE_PHYINFO_ERR_IMR_V1, B_BE_PHYINFO_ERR_IMR_V1_CLR, B_BE_PHYINFO_ERR_IMR_V1_SET}, +}; + +static const struct rtw89_imr_table rtw8922d_imr_cmac_table = { + .regs = rtw8922d_imr_cmac_regs, + .n_regs = ARRAY_SIZE(rtw8922d_imr_cmac_regs), +}; + +static const struct rtw89_rrsr_cfgs rtw8922d_rrsr_cfgs = { + .ref_rate = {R_BE_TRXPTCL_RESP_1, B_BE_WMAC_RESP_REF_RATE_SEL, 0}, + .rsc = {R_BE_PTCL_RRSR1, B_BE_RSC_MASK, 2}, +}; + +static const struct rtw89_rfkill_regs rtw8922d_rfkill_regs = { + .pinmux = {R_BE_GPIO8_15_FUNC_SEL, + B_BE_PINMUX_GPIO9_FUNC_SEL_MASK, + 0xf}, + .mode = {R_BE_GPIO_EXT_CTRL + 2, + (B_BE_GPIO_MOD_9 | B_BE_GPIO_IO_SEL_9) >> 16, + 0x0}, +}; + +static const struct rtw89_dig_regs rtw8922d_dig_regs = { + .seg0_pd_reg = R_SEG0R_PD_BE4, + .pd_lower_bound_mask = B_SEG0R_PD_LOWER_BOUND_MSK, + .pd_spatial_reuse_en = B_SEG0R_PD_SPATIAL_REUSE_EN_MSK_V1, + .bmode_pd_reg = R_BMODE_PDTH_EN_BE4, + .bmode_cca_rssi_limit_en = B_BMODE_PDTH_LIMIT_EN_MSK_V1, + .bmode_pd_lower_bound_reg = R_BMODE_PDTH_BE4, + .bmode_rssi_nocca_low_th_mask = B_BMODE_PDTH_LOWER_BOUND_MSK_V1, + .p0_lna_init = {R_PATH0_LNA_INIT_BE4, B_PATH0_LNA_INIT_IDX_BE4}, + .p1_lna_init = {R_PATH1_LNA_INIT_BE4, B_PATH1_LNA_INIT_IDX_BE4}, + .p0_tia_init = {R_PATH0_TIA_INIT_BE4, B_PATH0_TIA_INIT_IDX_BE4}, + .p1_tia_init = {R_PATH1_TIA_INIT_BE4, B_PATH1_TIA_INIT_IDX_BE4}, + .p0_rxb_init = {R_PATH0_RXIDX_INIT_BE4, B_PATH0_RXIDX_INIT_BE4}, + .p1_rxb_init = {R_PATH1_RXIDX_INIT_BE4, B_PATH1_RXIDX_INIT_BE4}, + .p0_p20_pagcugc_en = {R_PATH0_P20_FOLLOW_BY_PAGCUGC_BE4, + B_PATH0_P20_FOLLOW_BY_PAGCUGC_EN_MSK}, + .p0_s20_pagcugc_en = {R_PATH0_S20_FOLLOW_BY_PAGCUGC_BE4, + B_PATH0_S20_FOLLOW_BY_PAGCUGC_EN_MSK}, + .p1_p20_pagcugc_en = {R_PATH1_P20_FOLLOW_BY_PAGCUGC_BE4, + B_PATH1_P20_FOLLOW_BY_PAGCUGC_EN_MSK}, + .p1_s20_pagcugc_en = {R_PATH1_S20_FOLLOW_BY_PAGCUGC_BE4, + B_PATH1_S20_FOLLOW_BY_PAGCUGC_EN_MSK}, +}; + +static const struct rtw89_edcca_regs rtw8922d_edcca_regs = { + .edcca_level = R_SEG0R_EDCCA_LVL_BE4, + .edcca_mask = B_EDCCA_LVL_MSK0, + .edcca_p_mask = B_EDCCA_LVL_MSK1, + .ppdu_level = R_SEG0R_PPDU_LVL_BE4, + .ppdu_mask = B_EDCCA_LVL_MSK1, + .p = {{ + .rpt_a = R_EDCCA_RPT_A_BE4, + .rpt_b = R_EDCCA_RPT_B_BE4, + .rpt_sel = R_EDCCA_RPT_SEL_BE4, + .rpt_sel_mask = B_EDCCA_RPT_SEL_BE4_MSK, + }, { + .rpt_a = R_EDCCA_RPT_A_BE4_C1, + .rpt_b = R_EDCCA_RPT_A_BE4_C1, + .rpt_sel = R_EDCCA_RPT_SEL_BE4_C1, + .rpt_sel_mask = B_EDCCA_RPT_SEL_BE4_MSK, + }}, + .rpt_sel_be = R_EDCCA_RPTREG_SEL_BE4, + .rpt_sel_be_mask = B_EDCCA_RPTREG_SEL_BE_MSK, + .tx_collision_t2r_st = R_TX_COLLISION_T2R_ST_BE4, + .tx_collision_t2r_st_mask = B_TX_COLLISION_T2R_ST_BE_M, +}; + +static const struct rtw89_efuse_block_cfg rtw8922d_efuse_blocks[] = { + [RTW89_EFUSE_BLOCK_SYS] = {.offset = 0x00000, .size = 0x310}, + [RTW89_EFUSE_BLOCK_RF] = {.offset = 0x10000, .size = 0x240}, + [RTW89_EFUSE_BLOCK_HCI_DIG_PCIE_SDIO] = {.offset = 0x20000, .size = 0x4800}, + [RTW89_EFUSE_BLOCK_HCI_DIG_USB] = {.offset = 0x30000, .size = 0x890}, + [RTW89_EFUSE_BLOCK_HCI_PHY_PCIE] = {.offset = 0x40000, .size = 0x400}, + [RTW89_EFUSE_BLOCK_HCI_PHY_USB3] = {.offset = 0x50000, .size = 0x80}, + [RTW89_EFUSE_BLOCK_HCI_PHY_USB2] = {.offset = 0x60000, .size = 0x50}, + [RTW89_EFUSE_BLOCK_ADIE] = {.offset = 0x70000, .size = 0x10}, +}; + +MODULE_FIRMWARE(RTW8922D_MODULE_FIRMWARE); +MODULE_FIRMWARE(RTW8922DS_MODULE_FIRMWARE); +MODULE_AUTHOR("Realtek Corporation"); +MODULE_DESCRIPTION("Realtek 802.11be wireless 8922D driver"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d.h b/drivers/net/wireless/realtek/rtw89/rtw8922d.h new file mode 100644 index 000000000000..7ef3f263274e --- /dev/null +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* Copyright(c) 2026 Realtek Corporation + */ + +#ifndef __RTW89_8922D_H__ +#define __RTW89_8922D_H__ + +#include "core.h" + +#endif From bbbde3d0fbe1005cffa45a7803c17589e2011107 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Tue, 24 Mar 2026 14:20:44 +0800 Subject: [PATCH 1021/1561] wifi: rtw89: 8922d: add power on/off functions The power on function is the first entry to power on hardware including all MAC/BB/RF circuits, and then it becomes possible to do high level operations, such as WiFi scan, connection. If connection becomes unavailable, device stays into idle mode, calling power off function to cut power. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260324062049.52266-3-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/reg.h | 63 +++++ drivers/net/wireless/realtek/rtw89/rtw8922d.c | 237 ++++++++++++++++++ 2 files changed, 300 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/reg.h b/drivers/net/wireless/realtek/rtw89/reg.h index 2cb35458a822..b6fd7b434de9 100644 --- a/drivers/net/wireless/realtek/rtw89/reg.h +++ b/drivers/net/wireless/realtek/rtw89/reg.h @@ -4403,6 +4403,31 @@ #define B_BE_FS_GPIO17_INT_EN BIT(1) #define B_BE_FS_GPIO16_INT_EN BIT(0) +#define R_BE_FWS1ISR 0x019C +#define B_BE_FS_WL_HW_RADIO_OFF_INT BIT(28) +#define B_BE_SWRD_BOD_INT BIT(27) +#define B_BE_HCIDBG_INT BIT(25) +#define B_BE_FS_RPWM_INT_V1 BIT(24) +#define B_BE_PCIE_HOTRST BIT(22) +#define B_BE_PCIE_SER_TIMEOUT_INDIC BIT(21) +#define B_BE_PCIE_RXI300_SLVTOUT_INDIC BIT(20) +#define B_BE_AON_PCIE_FLR_INT BIT(19) +#define B_BE_PCIE_ERR_INDIC BIT(18) +#define B_BE_SDIO_ERR_INDIC BIT(17) +#define B_BE_USB_ERR_INDIC BIT(16) +#define B_BE_FS_GPIO27_INT BIT(11) +#define B_BE_FS_GPIO26_INT BIT(10) +#define B_BE_FS_GPIO25_INT BIT(9) +#define B_BE_FS_GPIO24_INT BIT(8) +#define B_BE_FS_GPIO23_INT BIT(7) +#define B_BE_FS_GPIO22_INT BIT(6) +#define B_BE_FS_GPIO21_INT BIT(5) +#define B_BE_FS_GPIO20_INT BIT(4) +#define B_BE_FS_GPIO19_INT BIT(3) +#define B_BE_FS_GPIO18_INT BIT(2) +#define B_BE_FS_GPIO17_INT BIT(1) +#define B_BE_FS_GPIO16_INT BIT(0) + #define R_BE_HIMR0 0x01A0 #define B_BE_WDT_DATACPU_TIMEOUT_INT_EN BIT(25) #define B_BE_HALT_D2H_INT_EN BIT(24) @@ -4503,6 +4528,44 @@ #define R_BE_UDM2 0x01F8 #define B_BE_UDM2_EPC_RA_MASK GENMASK(31, 0) +#define R_BE_SPS_DIG_ON_CTRL1 0x0204 +#define B_BE_SN_N_L_MASK GENMASK(31, 28) +#define B_BE_SP_N_L_MASK GENMASK(27, 24) +#define B_BE_SN_P_L_MASK GENMASK(23, 20) +#define B_BE_SP_P_L_MASK GENMASK(19, 16) +#define B_BE_VO_DISCHG_PWM_H BIT(15) +#define B_BE_REG_MODE_PREDRIVER BIT(14) +#define B_BE_VREFOCP_MASK GENMASK(13, 10) +#define B_BE_POWOCP_L1 BIT(9) +#define B_BE_PWM_FORCE BIT(8) +#define B_BE_PFM_PD_RST BIT(7) +#define B_BE_VC_PFM_RSTB BIT(6) +#define B_BE_PFM_IN_SEL BIT(5) +#define B_BE_VC_RSTB BIT(4) +#define B_BE_FPWMDELAY BIT(3) +#define B_BE_ENFPWMDELAY_H BIT(2) +#define B_BE_REG_MOS_HALF_L BIT(1) +#define B_BE_CURRENT_SENSE_MOS BIT(0) + +#define R_BE_SPS_ANA_ON_CTRL1 0x0224 +#define B_BE_SN_N_L_ANA_MASK GENMASK(31, 28) +#define B_BE_SP_N_L_ANA_MASK GENMASK(27, 24) +#define B_BE_SN_P_L_ANA_MASK GENMASK(23, 20) +#define B_BE_SP_P_L_ANA_MASK GENMASK(19, 16) +#define B_BE_VO_DISCHG_PWM_H_ANA BIT(15) +#define B_BE_REG_MODE_PREDRIVER_ANA BIT(14) +#define B_BE_VREFOCP_ANA_MASK GENMASK(13, 10) +#define B_BE_POWOCP_L1_ANA BIT(9) +#define B_BE_PWM_FORCE_ANA BIT(8) +#define B_BE_PFM_PD_RST_ANA BIT(7) +#define B_BE_VC_PFM_RSTB_ANA BIT(6) +#define B_BE_PFM_IN_SEL_ANA BIT(5) +#define B_BE_VC_RSTB_ANA BIT(4) +#define B_BE_FPWMDELAY_ANA BIT(3) +#define B_BE_ENFPWMDELAY_H_ANA BIT(2) +#define B_BE_REG_MOS_HALF_L_ANA BIT(1) +#define B_BE_CURRENT_SENSE_MOS_ANA BIT(0) + #define R_BE_AFE_ON_CTRL0 0x0240 #define B_BE_REG_LPF_R3_3_0_MASK GENMASK(31, 29) #define B_BE_REG_LPF_R2_MASK GENMASK(28, 24) diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d.c b/drivers/net/wireless/realtek/rtw89/rtw8922d.c index 6a90ded1b33e..194e2901232b 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922d.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d.c @@ -231,6 +231,243 @@ static const struct rtw89_efuse_block_cfg rtw8922d_efuse_blocks[] = { [RTW89_EFUSE_BLOCK_ADIE] = {.offset = 0x70000, .size = 0x10}, }; +static int rtw8922d_pwr_on_func(struct rtw89_dev *rtwdev) +{ + struct rtw89_hal *hal = &rtwdev->hal; + u32 val32; + int ret; + + if (hal->cid != RTL8922D_CID7025) + goto begin; + + switch (hal->cv) { + case CHIP_CAV: + case CHIP_CBV: + rtw89_write32_set(rtwdev, R_BE_SPS_DIG_ON_CTRL1, B_BE_PWM_FORCE); + rtw89_write32_set(rtwdev, R_BE_SPS_ANA_ON_CTRL1, B_BE_PWM_FORCE_ANA); + break; + default: + break; + } + +begin: + rtw89_write32_clr(rtwdev, R_BE_SYS_PW_CTRL, B_BE_AFSM_WLSUS_EN | + B_BE_AFSM_PCIE_SUS_EN); + rtw89_write32_set(rtwdev, R_BE_SYS_PW_CTRL, B_BE_DIS_WLBT_PDNSUSEN_SOPC); + rtw89_write32_set(rtwdev, R_BE_WLLPS_CTRL, B_BE_DIS_WLBT_LPSEN_LOPC); + if (hal->cid != RTL8922D_CID7090) + rtw89_write32_clr(rtwdev, R_BE_SYS_PW_CTRL, B_BE_APDM_HPDN); + rtw89_write32_clr(rtwdev, R_BE_FWS1ISR, B_BE_FS_WL_HW_RADIO_OFF_INT); + rtw89_write32_clr(rtwdev, R_BE_SYS_PW_CTRL, B_BE_APFM_SWLPS); + + ret = read_poll_timeout(rtw89_read32, val32, val32 & B_BE_RDY_SYSPWR, + 1000, 3000000, false, rtwdev, R_BE_SYS_PW_CTRL); + if (ret) + return ret; + + rtw89_write32_set(rtwdev, R_BE_SYS_PW_CTRL, B_BE_EN_WLON); + rtw89_write32_set(rtwdev, R_BE_WLRESUME_CTRL, B_BE_LPSROP_CMAC0 | + B_BE_LPSROP_CMAC1); + rtw89_write32_set(rtwdev, R_BE_SYS_PW_CTRL, B_BE_APFN_ONMAC); + + ret = read_poll_timeout(rtw89_read32, val32, !(val32 & B_BE_APFN_ONMAC), + 1000, 3000000, false, rtwdev, R_BE_SYS_PW_CTRL); + if (ret) + return ret; + + rtw89_write8_set(rtwdev, R_BE_PLATFORM_ENABLE, B_BE_PLATFORM_EN); + rtw89_write32_set(rtwdev, R_BE_HCI_OPT_CTRL, B_BE_HAXIDMA_IO_EN); + + ret = read_poll_timeout(rtw89_read32, val32, val32 & B_BE_HAXIDMA_IO_ST, + 1000, 3000000, false, rtwdev, R_BE_HCI_OPT_CTRL); + if (ret) + return ret; + + ret = read_poll_timeout(rtw89_read32, val32, !(val32 & B_BE_HAXIDMA_BACKUP_RESTORE_ST), + 1000, 3000000, false, rtwdev, R_BE_HCI_OPT_CTRL); + if (ret) + return ret; + + rtw89_write32_set(rtwdev, R_BE_HCI_OPT_CTRL, B_BE_HCI_WLAN_IO_EN); + + ret = read_poll_timeout(rtw89_read32, val32, val32 & B_BE_HCI_WLAN_IO_ST, + 1000, 3000000, false, rtwdev, R_BE_HCI_OPT_CTRL); + if (ret) + return ret; + + rtw89_write32_clr(rtwdev, R_BE_SYS_SDIO_CTRL, B_BE_PCIE_FORCE_IBX_EN); + + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_NORMAL_WRITE, 0x10, 0x10); + if (ret) + return ret; + + rtw89_write32_set(rtwdev, R_BE_SYS_ADIE_PAD_PWR_CTRL, B_BE_SYM_PADPDN_WL_RFC1_1P3); + + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_ANAPAR_WL, 0x40, 0x40); + if (ret) + return ret; + + rtw89_write32_set(rtwdev, R_BE_SYS_ADIE_PAD_PWR_CTRL, B_BE_SYM_PADPDN_WL_RFC0_1P3); + + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_ANAPAR_WL, 0x20, 0x20); + if (ret) + return ret; + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_ANAPAR_WL, 0x04, 0x04); + if (ret) + return ret; + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_ANAPAR_WL, 0x08, 0x08); + if (ret) + return ret; + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_ANAPAR_WL, 0, 0x10); + if (ret) + return ret; + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_WL_RFC_S0, 0xEB, 0xFF); + if (ret) + return ret; + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_WL_RFC_S1, 0xEB, 0xFF); + if (ret) + return ret; + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_ANAPAR_WL, 0x01, 0x01); + if (ret) + return ret; + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_ANAPAR_WL, 0x02, 0x02); + if (ret) + return ret; + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_ANAPAR_WL, 0, 0x80); + if (ret) + return ret; + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_XTAL_XMD_2, 0, 0x70); + if (ret) + return ret; + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_SRAM_CTRL, 0, 0x02); + if (ret) + return ret; + + rtw89_write32_set(rtwdev, R_BE_PMC_DBG_CTRL2, B_BE_SYSON_DIS_PMCR_BE_WRMSK); + rtw89_write32_set(rtwdev, R_BE_SYS_ISO_CTRL, B_BE_ISO_EB2CORE); + rtw89_write32_clr(rtwdev, R_BE_SYS_ISO_CTRL, B_BE_PWC_EV2EF_B); + + mdelay(1); + + rtw89_write32_clr(rtwdev, R_BE_SYS_ISO_CTRL, B_BE_PWC_EV2EF_S); + rtw89_write32_clr(rtwdev, R_BE_PMC_DBG_CTRL2, B_BE_SYSON_DIS_PMCR_BE_WRMSK); + + rtw89_write32_set(rtwdev, R_BE_DMAC_FUNC_EN, + B_BE_MAC_FUNC_EN | B_BE_DMAC_FUNC_EN | + B_BE_MPDU_PROC_EN | B_BE_WD_RLS_EN | + B_BE_DLE_WDE_EN | B_BE_TXPKT_CTRL_EN | + B_BE_STA_SCH_EN | B_BE_DLE_PLE_EN | + B_BE_PKT_BUF_EN | B_BE_DMAC_TBL_EN | + B_BE_PKT_IN_EN | B_BE_DLE_CPUIO_EN | + B_BE_DISPATCHER_EN | B_BE_BBRPT_EN | + B_BE_MAC_SEC_EN | B_BE_H_AXIDMA_EN | + B_BE_DMAC_MLO_EN | B_BE_PLRLS_EN | + B_BE_P_AXIDMA_EN | B_BE_DLE_DATACPUIO_EN | + B_BE_LTR_CTL_EN); + + set_bit(RTW89_FLAG_DMAC_FUNC, rtwdev->flags); + + rtw89_write32_set(rtwdev, R_BE_CMAC_SHARE_FUNC_EN, + B_BE_CMAC_SHARE_EN | B_BE_RESPBA_EN | + B_BE_ADDRSRCH_EN | B_BE_BTCOEX_EN); + + rtw89_write32_set(rtwdev, R_BE_CMAC_FUNC_EN, + B_BE_CMAC_EN | B_BE_CMAC_TXEN | + B_BE_CMAC_RXEN | B_BE_SIGB_EN | + B_BE_PHYINTF_EN | B_BE_CMAC_DMA_EN | + B_BE_PTCLTOP_EN | B_BE_SCHEDULER_EN | + B_BE_TMAC_EN | B_BE_RMAC_EN | + B_BE_TXTIME_EN | B_BE_RESP_PKTCTL_EN); + + set_bit(RTW89_FLAG_CMAC0_FUNC, rtwdev->flags); + + rtw89_write32_set(rtwdev, R_BE_FEN_RST_ENABLE, + B_BE_FEN_BB_IP_RSTN | B_BE_FEN_BBPLAT_RSTB); + + return 0; +} + +static int rtw8922d_pwr_off_func(struct rtw89_dev *rtwdev) +{ + u32 val32; + int ret; + + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_ANAPAR_WL, 0x10, 0x10); + if (ret) + return ret; + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_ANAPAR_WL, 0, 0x08); + if (ret) + return ret; + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_ANAPAR_WL, 0, 0x04); + if (ret) + return ret; + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_WL_RFC_S0, 0, 0x01); + if (ret) + return ret; + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_WL_RFC_S1, 0, 0x01); + if (ret) + return ret; + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_ANAPAR_WL, 0x80, 0x80); + if (ret) + return ret; + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_ANAPAR_WL, 0, 0x02); + if (ret) + return ret; + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_ANAPAR_WL, 0, 0x01); + if (ret) + return ret; + + rtw89_write32_set(rtwdev, R_BE_SYS_PW_CTRL, B_BE_EN_WLON); + rtw89_write8_clr(rtwdev, R_BE_FEN_RST_ENABLE, B_BE_FEN_BB_IP_RSTN | + B_BE_FEN_BBPLAT_RSTB); + rtw89_write32_clr(rtwdev, R_BE_SYS_ADIE_PAD_PWR_CTRL, + B_BE_SYM_PADPDN_WL_RFC0_1P3); + + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_ANAPAR_WL, 0, 0x20); + if (ret) + return ret; + + rtw89_write32_clr(rtwdev, R_BE_SYS_ADIE_PAD_PWR_CTRL, + B_BE_SYM_PADPDN_WL_RFC1_1P3); + + ret = rtw89_mac_write_xtal_si(rtwdev, XTAL_SI_ANAPAR_WL, 0, 0x40); + if (ret) + return ret; + + rtw89_write32_clr(rtwdev, R_BE_HCI_OPT_CTRL, B_BE_HAXIDMA_IO_EN); + + ret = read_poll_timeout(rtw89_read32, val32, !(val32 & B_BE_HAXIDMA_IO_ST), + 1000, 3000000, false, rtwdev, R_BE_HCI_OPT_CTRL); + if (ret) + return ret; + ret = read_poll_timeout(rtw89_read32, val32, + !(val32 & B_BE_HAXIDMA_BACKUP_RESTORE_ST), + 1000, 3000000, false, rtwdev, R_BE_HCI_OPT_CTRL); + if (ret) + return ret; + + rtw89_write32_clr(rtwdev, R_BE_HCI_OPT_CTRL, B_BE_HCI_WLAN_IO_EN); + + ret = read_poll_timeout(rtw89_read32, val32, !(val32 & B_BE_HCI_WLAN_IO_ST), + 1000, 3000000, false, rtwdev, R_BE_HCI_OPT_CTRL); + if (ret) + return ret; + + rtw89_write32_set(rtwdev, R_BE_SYS_PW_CTRL, B_BE_APFM_OFFMAC); + + ret = read_poll_timeout(rtw89_read32, val32, !(val32 & B_BE_APFM_OFFMAC), + 1000, 3000000, false, rtwdev, R_BE_SYS_PW_CTRL); + if (ret) + return ret; + + rtw89_write32(rtwdev, R_BE_WLLPS_CTRL, 0x00015002); + rtw89_write32_clr(rtwdev, R_BE_SYS_PW_CTRL, B_BE_XTAL_OFF_A_DIE); + rtw89_write32_set(rtwdev, R_BE_SYS_PW_CTRL, B_BE_APFM_SWLPS); + rtw89_write32(rtwdev, R_BE_UDM1, 0); + + return 0; +} + MODULE_FIRMWARE(RTW8922D_MODULE_FIRMWARE); MODULE_FIRMWARE(RTW8922DS_MODULE_FIRMWARE); MODULE_AUTHOR("Realtek Corporation"); From 199e4235d33f01d3205d40a056e506ac4964da28 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Tue, 24 Mar 2026 14:20:45 +0800 Subject: [PATCH 1022/1561] wifi: rtw89: 8922d: define efuse map and read necessary fields Define specific efuse map for RTL8922D, including TSSI, RX gain, MAC address, RFE type and etc. The additional fields comparing to existing chips are BT setting (define BT switch GPIO, antenna number and etc) and gain offset2 (define more fields like existing RX gain offset). Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260324062049.52266-4-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.h | 3 + drivers/net/wireless/realtek/rtw89/rtw8922d.c | 162 ++++++++++++++++++ drivers/net/wireless/realtek/rtw89/rtw8922d.h | 70 ++++++++ 3 files changed, 235 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index 94e4faf70e12..d0ae3e15253b 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -3562,6 +3562,8 @@ struct rtw89_efuse { u8 rfe_type; char country_code[2]; u8 adc_td; + u8 bt_setting_2; + u8 bt_setting_3; }; struct rtw89_phy_rate_pattern { @@ -5926,6 +5928,7 @@ struct rtw89_phy_efuse_gain { bool offset_valid; bool comp_valid; s8 offset[RF_PATH_MAX][RTW89_GAIN_OFFSET_NR]; /* S(8, 0) */ + s8 offset2[RF_PATH_MAX][RTW89_GAIN_OFFSET_NR]; /* S(8, 0) */ s8 offset_base[RTW89_PHY_NUM]; /* S(8, 4) */ s8 rssi_base[RTW89_PHY_NUM]; /* S(8, 4) */ s8 comp[RF_PATH_MAX][RTW89_SUBBAND_NR]; /* S(8, 0) */ diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d.c b/drivers/net/wireless/realtek/rtw89/rtw8922d.c index 194e2901232b..0ae34a4f8d79 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922d.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d.c @@ -2,6 +2,7 @@ /* Copyright(c) 2026 Realtek Corporation */ +#include "debug.h" #include "efuse.h" #include "mac.h" #include "reg.h" @@ -468,6 +469,167 @@ static int rtw8922d_pwr_off_func(struct rtw89_dev *rtwdev) return 0; } +static void rtw8922d_efuse_parsing_tssi(struct rtw89_dev *rtwdev, + struct rtw8922d_efuse *map) +{ + const struct rtw8922d_tssi_offset_6g * const ofst_6g[] = { + &map->path_a_tssi_6g, + &map->path_b_tssi_6g, + }; + const struct rtw8922d_tssi_offset * const ofst[] = { + &map->path_a_tssi, + &map->path_b_tssi, + }; + struct rtw89_tssi_info *tssi = &rtwdev->tssi; + u8 i, j; + + tssi->thermal[RF_PATH_A] = map->path_a_therm; + tssi->thermal[RF_PATH_B] = map->path_b_therm; + + for (i = 0; i < RF_PATH_NUM_8922D; i++) { + memcpy(tssi->tssi_cck[i], ofst[i]->cck_tssi, TSSI_CCK_CH_GROUP_NUM); + + for (j = 0; j < TSSI_CCK_CH_GROUP_NUM; j++) + rtw89_debug(rtwdev, RTW89_DBG_TSSI, + "[TSSI][EFUSE] path=%d cck[%d]=0x%x\n", + i, j, tssi->tssi_cck[i][j]); + + memcpy(tssi->tssi_mcs[i], ofst[i]->bw40_tssi, + TSSI_MCS_2G_CH_GROUP_NUM); + memcpy(tssi->tssi_mcs[i] + TSSI_MCS_2G_CH_GROUP_NUM, + ofst[i]->bw40_1s_tssi_5g, TSSI_MCS_5G_CH_GROUP_NUM); + memcpy(tssi->tssi_6g_mcs[i], ofst_6g[i]->bw40_1s_tssi_6g, + TSSI_MCS_6G_CH_GROUP_NUM); + + for (j = 0; j < TSSI_MCS_CH_GROUP_NUM; j++) + rtw89_debug(rtwdev, RTW89_DBG_TSSI, + "[TSSI][EFUSE] path=%d mcs[%d]=0x%x\n", + i, j, tssi->tssi_mcs[i][j]); + + for (j = 0; j < TSSI_MCS_6G_CH_GROUP_NUM; j++) + rtw89_debug(rtwdev, RTW89_DBG_TSSI, + "[TSSI][EFUSE] path=%d mcs_6g[%d]=0x%x\n", + i, j, tssi->tssi_6g_mcs[i][j]); + } +} + +static void +__rtw8922d_efuse_parsing_gain_offset(struct rtw89_dev *rtwdev, + s8 offset[RTW89_GAIN_OFFSET_NR], + const s8 *offset_default, + const struct rtw8922d_rx_gain *rx_gain, + const struct rtw8922d_rx_gain_6g *rx_gain_6g) +{ + int i; + u8 t; + + offset[RTW89_GAIN_OFFSET_2G_CCK] = rx_gain->_2g_cck; + offset[RTW89_GAIN_OFFSET_2G_OFDM] = rx_gain->_2g_ofdm; + offset[RTW89_GAIN_OFFSET_5G_LOW] = rx_gain->_5g_low; + offset[RTW89_GAIN_OFFSET_5G_MID] = rx_gain->_5g_mid; + offset[RTW89_GAIN_OFFSET_5G_HIGH] = rx_gain->_5g_high; + offset[RTW89_GAIN_OFFSET_6G_L0] = rx_gain_6g->_6g_l0; + offset[RTW89_GAIN_OFFSET_6G_L1] = rx_gain_6g->_6g_l1; + offset[RTW89_GAIN_OFFSET_6G_M0] = rx_gain_6g->_6g_m0; + offset[RTW89_GAIN_OFFSET_6G_M1] = rx_gain_6g->_6g_m1; + offset[RTW89_GAIN_OFFSET_6G_H0] = rx_gain_6g->_6g_h0; + offset[RTW89_GAIN_OFFSET_6G_H1] = rx_gain_6g->_6g_h1; + offset[RTW89_GAIN_OFFSET_6G_UH0] = rx_gain_6g->_6g_uh0; + offset[RTW89_GAIN_OFFSET_6G_UH1] = rx_gain_6g->_6g_uh1; + + for (i = 0; i < RTW89_GAIN_OFFSET_NR; i++) { + t = offset[i]; + if (t == 0xff) { + if (offset_default) { + offset[i] = offset_default[i]; + continue; + } + t = 0; + } + + /* transform: sign-bit + U(7,2) to S(8,2) */ + if (t & 0x80) + offset[i] = (t ^ 0x7f) + 1; + else + offset[i] = t; + } +} + +static void rtw8922d_efuse_parsing_gain_offset(struct rtw89_dev *rtwdev, + struct rtw8922d_efuse *map) +{ + struct rtw89_phy_efuse_gain *gain = &rtwdev->efuse_gain; + + __rtw8922d_efuse_parsing_gain_offset(rtwdev, gain->offset[RF_PATH_A], + NULL, + &map->rx_gain_a, &map->rx_gain_6g_a); + __rtw8922d_efuse_parsing_gain_offset(rtwdev, gain->offset[RF_PATH_B], + NULL, + &map->rx_gain_b, &map->rx_gain_6g_b); + + __rtw8922d_efuse_parsing_gain_offset(rtwdev, gain->offset2[RF_PATH_A], + gain->offset[RF_PATH_A], + &map->rx_gain_a_2, &map->rx_gain_6g_a_2); + __rtw8922d_efuse_parsing_gain_offset(rtwdev, gain->offset2[RF_PATH_B], + gain->offset[RF_PATH_B], + &map->rx_gain_b_2, &map->rx_gain_6g_b_2); + + gain->offset_valid = true; +} + +static int rtw8922d_read_efuse_pci_sdio(struct rtw89_dev *rtwdev, u8 *log_map) +{ + struct rtw89_efuse *efuse = &rtwdev->efuse; + + if (rtwdev->hci.type == RTW89_HCI_TYPE_PCIE) + ether_addr_copy(efuse->addr, log_map + 0x4104); + else + ether_addr_copy(efuse->addr, log_map + 0x001A); + + return 0; +} + +static int rtw8922d_read_efuse_usb(struct rtw89_dev *rtwdev, u8 *log_map) +{ + struct rtw89_efuse *efuse = &rtwdev->efuse; + + ether_addr_copy(efuse->addr, log_map + 0x0078); + + return 0; +} + +static int rtw8922d_read_efuse_rf(struct rtw89_dev *rtwdev, u8 *log_map) +{ + struct rtw8922d_efuse *map = (struct rtw8922d_efuse *)log_map; + struct rtw89_efuse *efuse = &rtwdev->efuse; + + efuse->rfe_type = map->rfe_type; + efuse->xtal_cap = map->xtal_k; + efuse->country_code[0] = map->country_code[0]; + efuse->country_code[1] = map->country_code[1]; + efuse->bt_setting_2 = map->bt_setting_2; + efuse->bt_setting_3 = map->bt_setting_3; + rtw8922d_efuse_parsing_tssi(rtwdev, map); + rtw8922d_efuse_parsing_gain_offset(rtwdev, map); + + return 0; +} + +static int rtw8922d_read_efuse(struct rtw89_dev *rtwdev, u8 *log_map, + enum rtw89_efuse_block block) +{ + switch (block) { + case RTW89_EFUSE_BLOCK_HCI_DIG_PCIE_SDIO: + return rtw8922d_read_efuse_pci_sdio(rtwdev, log_map); + case RTW89_EFUSE_BLOCK_HCI_DIG_USB: + return rtw8922d_read_efuse_usb(rtwdev, log_map); + case RTW89_EFUSE_BLOCK_RF: + return rtw8922d_read_efuse_rf(rtwdev, log_map); + default: + return 0; + } +} + MODULE_FIRMWARE(RTW8922D_MODULE_FIRMWARE); MODULE_FIRMWARE(RTW8922DS_MODULE_FIRMWARE); MODULE_AUTHOR("Realtek Corporation"); diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d.h b/drivers/net/wireless/realtek/rtw89/rtw8922d.h index 7ef3f263274e..a3b98ad6636c 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922d.h +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d.h @@ -7,4 +7,74 @@ #include "core.h" +#define RF_PATH_NUM_8922D 2 +#define BB_PATH_NUM_8922D 2 + +struct rtw8922d_tssi_offset { + u8 cck_tssi[TSSI_CCK_CH_GROUP_NUM]; + u8 bw40_tssi[TSSI_MCS_2G_CH_GROUP_NUM]; + u8 rsvd[7]; + u8 bw40_1s_tssi_5g[TSSI_MCS_5G_CH_GROUP_NUM]; + u8 bw_diff_5g[10]; +} __packed; + +struct rtw8922d_tssi_offset_6g { + u8 bw40_1s_tssi_6g[TSSI_MCS_6G_CH_GROUP_NUM]; + u8 rsvd[0xa]; +} __packed; + +struct rtw8922d_rx_gain { + u8 _2g_ofdm; + u8 _2g_cck; + u8 _5g_low; + u8 _5g_mid; + u8 _5g_high; +} __packed; + +struct rtw8922d_rx_gain_6g { + u8 _6g_l0; + u8 _6g_l1; + u8 _6g_m0; + u8 _6g_m1; + u8 _6g_h0; + u8 _6g_h1; + u8 _6g_uh0; + u8 _6g_uh1; +} __packed; + +struct rtw8922d_efuse { + u8 country_code[2]; + u8 rsvd[0xe]; + struct rtw8922d_tssi_offset path_a_tssi; + struct rtw8922d_tssi_offset path_b_tssi; + u8 rsvd1[0x54]; + u8 channel_plan; + u8 xtal_k; + u8 rsvd2[0x7]; + u8 board_info; + u8 rsvd3[0x8]; + u8 rfe_type; + u8 rsvd4[2]; + u8 bt_setting_2; + u8 bt_setting_3; + u8 rsvd4_2; + u8 path_a_therm; + u8 path_b_therm; + u8 rsvd5[0x2]; + struct rtw8922d_rx_gain rx_gain_a; + struct rtw8922d_rx_gain rx_gain_b; + u8 rsvd6[0x18]; + struct rtw8922d_rx_gain rx_gain_a_2; + struct rtw8922d_rx_gain rx_gain_b_2; + struct rtw8922d_tssi_offset_6g path_a_tssi_6g; + struct rtw8922d_tssi_offset_6g path_b_tssi_6g; + struct rtw8922d_tssi_offset_6g path_c_tssi_6g; + struct rtw8922d_tssi_offset_6g path_d_tssi_6g; + struct rtw8922d_rx_gain_6g rx_gain_6g_a; + struct rtw8922d_rx_gain_6g rx_gain_6g_b; + u8 rsvd7[0x5a]; + struct rtw8922d_rx_gain_6g rx_gain_6g_a_2; + struct rtw8922d_rx_gain_6g rx_gain_6g_b_2; +} __packed; + #endif From 5b04b8049828d27b23ba5798f35bef835acc7fb1 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Tue, 24 Mar 2026 14:20:46 +0800 Subject: [PATCH 1023/1561] wifi: rtw89: 8922d: read and configure RF by calibration data from efuse physical map The calibration data is from physical map, including 1) thermal trim to align output thermal value across chips, and 2) PA bias to transmit expected power by controller. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260324062049.52266-5-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.h | 2 + drivers/net/wireless/realtek/rtw89/reg.h | 2 + drivers/net/wireless/realtek/rtw89/rtw8922d.c | 205 ++++++++++++++++++ 3 files changed, 209 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index d0ae3e15253b..cde46ed21d32 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -5595,9 +5595,11 @@ struct rtw89_tssi_info { struct rtw89_power_trim_info { bool pg_thermal_trim; bool pg_pa_bias_trim; + bool pg_vco_trim; u8 thermal_trim[RF_PATH_MAX]; u8 pa_bias_trim[RF_PATH_MAX]; u8 pad_bias_trim[RF_PATH_MAX]; + u8 vco_trim[RF_PATH_MAX]; }; enum rtw89_regd_func { diff --git a/drivers/net/wireless/realtek/rtw89/reg.h b/drivers/net/wireless/realtek/rtw89/reg.h index b6fd7b434de9..179006c8e499 100644 --- a/drivers/net/wireless/realtek/rtw89/reg.h +++ b/drivers/net/wireless/realtek/rtw89/reg.h @@ -8517,6 +8517,7 @@ #define RR_LUTWD0_LB GENMASK(5, 0) #define RR_TM 0x42 #define RR_TM_TRI BIT(19) +#define RR_TM_TRM GENMASK(17, 11) #define RR_TM_VAL_V1 GENMASK(7, 0) #define RR_TM_VAL GENMASK(6, 1) #define RR_TM2 0x43 @@ -8649,6 +8650,7 @@ #define RR_LDO 0xb1 #define RR_LDO_SEL GENMASK(8, 6) #define RR_VCO 0xb2 +#define RR_VCO_VAL GENMASK(18, 14) #define RR_VCO_SEL GENMASK(9, 8) #define RR_VCI 0xb3 #define RR_VCI_ON BIT(7) diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d.c b/drivers/net/wireless/realtek/rtw89/rtw8922d.c index 0ae34a4f8d79..cbe8e067ae55 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922d.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d.c @@ -630,6 +630,211 @@ static int rtw8922d_read_efuse(struct rtw89_dev *rtwdev, u8 *log_map, } } +static void rtw8922d_phycap_parsing_vco_trim(struct rtw89_dev *rtwdev, + u8 *phycap_map) +{ + static const u32 vco_trim_addr[RF_PATH_NUM_8922D] = {0x175E, 0x175F}; + struct rtw89_power_trim_info *info = &rtwdev->pwr_trim; + u32 addr = rtwdev->chip->phycap_addr; + const u32 vco_check_addr = 0x1700; + u8 val; + + val = phycap_map[vco_check_addr - addr]; + if (val & BIT(1)) + return; + + info->pg_vco_trim = true; + + info->vco_trim[0] = u8_get_bits(phycap_map[vco_trim_addr[0] - addr], GENMASK(4, 0)); + info->vco_trim[1] = u8_get_bits(phycap_map[vco_trim_addr[1] - addr], GENMASK(4, 0)); +} + +static void rtw8922d_vco_trim(struct rtw89_dev *rtwdev) +{ + struct rtw89_power_trim_info *info = &rtwdev->pwr_trim; + + if (!info->pg_vco_trim) + return; + + rtw89_write_rf(rtwdev, RF_PATH_A, RR_VCO, RR_VCO_VAL, info->vco_trim[0]); + rtw89_write_rf(rtwdev, RF_PATH_B, RR_VCO, RR_VCO_VAL, info->vco_trim[1]); +} + +#define THM_TRIM_POSITIVE_MASK BIT(6) +#define THM_TRIM_MAGNITUDE_MASK GENMASK(5, 0) +#define THM_TRIM_MAX (15) +#define THM_TRIM_MIN (-15) + +static void rtw8922d_phycap_parsing_thermal_trim(struct rtw89_dev *rtwdev, + u8 *phycap_map) +{ + static const u32 thm_trim_addr[RF_PATH_NUM_8922D] = {0x1706, 0x1732}; + struct rtw89_power_trim_info *info = &rtwdev->pwr_trim; + u32 addr = rtwdev->chip->phycap_addr; + bool pg = true; + u8 pg_th; + s8 val; + u8 i; + + for (i = 0; i < RF_PATH_NUM_8922D; i++) { + pg_th = phycap_map[thm_trim_addr[i] - addr]; + if (pg_th == 0xff) { + memset(info->thermal_trim, 0, sizeof(info->thermal_trim)); + pg = false; + goto out; + } + + val = u8_get_bits(pg_th, THM_TRIM_MAGNITUDE_MASK); + + if (!(pg_th & THM_TRIM_POSITIVE_MASK)) + val *= -1; + + if (val <= THM_TRIM_MIN || val >= THM_TRIM_MAX) { + val = 0; + info->thermal_trim[i] = 0; + } else { + info->thermal_trim[i] = pg_th; + } + + rtw89_debug(rtwdev, RTW89_DBG_RFK, + "[THERMAL][TRIM] path=%d thermal_trim=0x%x (%d)\n", + i, pg_th, val); + } + +out: + info->pg_thermal_trim = pg; +} + +static void rtw8922d_thermal_trim(struct rtw89_dev *rtwdev) +{ + struct rtw89_power_trim_info *info = &rtwdev->pwr_trim; + u8 thermal; + int i; + + for (i = 0; i < RF_PATH_NUM_8922D; i++) { + thermal = info->pg_thermal_trim ? info->thermal_trim[i] : 0; + rtw89_write_rf(rtwdev, i, RR_TM, RR_TM_TRM, thermal & 0x7f); + } +} + +static void rtw8922d_phycap_parsing_pa_bias_trim(struct rtw89_dev *rtwdev, + u8 *phycap_map) +{ + static const u32 pabias_trim_addr[RF_PATH_NUM_8922D] = {0x1707, 0x1733}; + static const u32 check_pa_pad_trim_addr = 0x1700; + struct rtw89_power_trim_info *info = &rtwdev->pwr_trim; + u32 addr = rtwdev->chip->phycap_addr; + bool pg = true; + u8 val; + u8 i; + + val = phycap_map[check_pa_pad_trim_addr - addr]; + if (val == 0xff) { + pg = false; + goto out; + } + + for (i = 0; i < RF_PATH_NUM_8922D; i++) { + info->pa_bias_trim[i] = phycap_map[pabias_trim_addr[i] - addr]; + + rtw89_debug(rtwdev, RTW89_DBG_RFK, + "[PA_BIAS][TRIM] path=%d pa_bias_trim=0x%x\n", + i, info->pa_bias_trim[i]); + } + +out: + info->pg_pa_bias_trim = pg; +} + +static void rtw8922d_pa_bias_trim(struct rtw89_dev *rtwdev) +{ + struct rtw89_power_trim_info *info = &rtwdev->pwr_trim; + u8 pabias_2g, pabias_5g; + u8 i; + + if (!info->pg_pa_bias_trim) { + rtw89_debug(rtwdev, RTW89_DBG_RFK, + "[PA_BIAS][TRIM] no PG, do nothing\n"); + + return; + } + + for (i = 0; i < RF_PATH_NUM_8922D; i++) { + pabias_2g = FIELD_GET(GENMASK(3, 0), info->pa_bias_trim[i]); + pabias_5g = FIELD_GET(GENMASK(7, 4), info->pa_bias_trim[i]); + + rtw89_debug(rtwdev, RTW89_DBG_RFK, + "[PA_BIAS][TRIM] path=%d 2G=0x%x 5G=0x%x\n", + i, pabias_2g, pabias_5g); + + rtw89_write_rf(rtwdev, i, RR_BIASA, RR_BIASA_TXG_V1, pabias_2g); + rtw89_write_rf(rtwdev, i, RR_BIASA, RR_BIASA_TXA_V1, pabias_5g); + } +} + +static void rtw8922d_phycap_parsing_pad_bias_trim(struct rtw89_dev *rtwdev, + u8 *phycap_map) +{ + static const u32 pad_bias_trim_addr[RF_PATH_NUM_8922D] = {0x1708, 0x1734}; + struct rtw89_power_trim_info *info = &rtwdev->pwr_trim; + u32 addr = rtwdev->chip->phycap_addr; + u8 i; + + if (!info->pg_pa_bias_trim) + return; + + for (i = 0; i < RF_PATH_NUM_8922D; i++) { + info->pad_bias_trim[i] = phycap_map[pad_bias_trim_addr[i] - addr]; + + rtw89_debug(rtwdev, RTW89_DBG_RFK, + "[PAD_BIAS][TRIM] path=%d pad_bias_trim=0x%x\n", + i, info->pad_bias_trim[i]); + } +} + +static void rtw8922d_pad_bias_trim(struct rtw89_dev *rtwdev) +{ + struct rtw89_power_trim_info *info = &rtwdev->pwr_trim; + u8 pad_bias_2g, pad_bias_5g; + u8 i; + + if (!info->pg_pa_bias_trim) { + rtw89_debug(rtwdev, RTW89_DBG_RFK, + "[PAD_BIAS][TRIM] no PG, do nothing\n"); + return; + } + + for (i = 0; i < RF_PATH_NUM_8922D; i++) { + pad_bias_2g = u8_get_bits(info->pad_bias_trim[i], GENMASK(3, 0)); + pad_bias_5g = u8_get_bits(info->pad_bias_trim[i], GENMASK(7, 4)); + + rtw89_debug(rtwdev, RTW89_DBG_RFK, + "[PAD_BIAS][TRIM] path=%d 2G=0x%x 5G=0x%x\n", + i, pad_bias_2g, pad_bias_5g); + + rtw89_write_rf(rtwdev, i, RR_BIASA, RR_BIASD_TXG_V1, pad_bias_2g); + rtw89_write_rf(rtwdev, i, RR_BIASA, RR_BIASD_TXA_V1, pad_bias_5g); + } +} + +static int rtw8922d_read_phycap(struct rtw89_dev *rtwdev, u8 *phycap_map) +{ + rtw8922d_phycap_parsing_vco_trim(rtwdev, phycap_map); + rtw8922d_phycap_parsing_thermal_trim(rtwdev, phycap_map); + rtw8922d_phycap_parsing_pa_bias_trim(rtwdev, phycap_map); + rtw8922d_phycap_parsing_pad_bias_trim(rtwdev, phycap_map); + + return 0; +} + +static void rtw8922d_power_trim(struct rtw89_dev *rtwdev) +{ + rtw8922d_vco_trim(rtwdev); + rtw8922d_thermal_trim(rtwdev); + rtw8922d_pa_bias_trim(rtwdev); + rtw8922d_pad_bias_trim(rtwdev); +} + MODULE_FIRMWARE(RTW8922D_MODULE_FIRMWARE); MODULE_FIRMWARE(RTW8922DS_MODULE_FIRMWARE); MODULE_AUTHOR("Realtek Corporation"); From 56f48df95e7a0d2325ef1ead73228d413c713100 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Tue, 24 Mar 2026 14:20:47 +0800 Subject: [PATCH 1024/1561] wifi: rtw89: 8922d: add set channel of MAC part The set channel is a key function to switch to specific operating channel. For MAC part, configure hardware according to channel bandwidth, and enable CCK rate for 2GHz band only. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260324062049.52266-6-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/reg.h | 1 + drivers/net/wireless/realtek/rtw89/rtw8922d.c | 92 +++++++++++++++++++ 2 files changed, 93 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/reg.h b/drivers/net/wireless/realtek/rtw89/reg.h index 179006c8e499..2195f576facc 100644 --- a/drivers/net/wireless/realtek/rtw89/reg.h +++ b/drivers/net/wireless/realtek/rtw89/reg.h @@ -6801,6 +6801,7 @@ #define R_BE_MUEDCA_EN 0x10370 #define R_BE_MUEDCA_EN_C1 0x14370 #define B_BE_SIFS_TIMEOUT_TB_T2_MASK GENMASK(30, 24) +#define B_BE_SIFS_MACTXEN_TB_T1_DOT05US_MASK GENMASK(23, 16) #define B_BE_SIFS_MACTXEN_TB_T1_MASK GENMASK(22, 16) #define B_BE_MUEDCA_WMM_SEL BIT(8) #define B_BE_SET_MUEDCATIMER_TF_MASK GENMASK(5, 4) diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d.c b/drivers/net/wireless/realtek/rtw89/rtw8922d.c index cbe8e067ae55..a341734ef54d 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922d.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d.c @@ -5,6 +5,7 @@ #include "debug.h" #include "efuse.h" #include "mac.h" +#include "phy.h" #include "reg.h" #include "rtw8922d.h" @@ -835,6 +836,97 @@ static void rtw8922d_power_trim(struct rtw89_dev *rtwdev) rtw8922d_pad_bias_trim(rtwdev); } +static void rtw8922d_set_channel_mac(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + u8 mac_idx) +{ + u32 sub_carr = rtw89_mac_reg_by_idx(rtwdev, R_BE_TX_SUB_BAND_VALUE, mac_idx); + u32 chk_rate = rtw89_mac_reg_by_idx(rtwdev, R_BE_TXRATE_CHK, mac_idx); + u32 rf_mod = rtw89_mac_reg_by_idx(rtwdev, R_BE_WMAC_RFMOD, mac_idx); + u8 txsb20 = 0, txsb40 = 0, txsb80 = 0; + u8 rf_mod_val, chk_rate_mask, sifs; + u32 txsb; + u32 reg; + + switch (chan->band_width) { + case RTW89_CHANNEL_WIDTH_160: + txsb80 = rtw89_phy_get_txsb(rtwdev, chan, RTW89_CHANNEL_WIDTH_80); + fallthrough; + case RTW89_CHANNEL_WIDTH_80: + txsb40 = rtw89_phy_get_txsb(rtwdev, chan, RTW89_CHANNEL_WIDTH_40); + fallthrough; + case RTW89_CHANNEL_WIDTH_40: + txsb20 = rtw89_phy_get_txsb(rtwdev, chan, RTW89_CHANNEL_WIDTH_20); + break; + default: + break; + } + + switch (chan->band_width) { + case RTW89_CHANNEL_WIDTH_160: + rf_mod_val = BE_WMAC_RFMOD_160M; + txsb = u32_encode_bits(txsb20, B_BE_TXSB_20M_MASK) | + u32_encode_bits(txsb40, B_BE_TXSB_40M_MASK) | + u32_encode_bits(txsb80, B_BE_TXSB_80M_MASK); + break; + case RTW89_CHANNEL_WIDTH_80: + rf_mod_val = BE_WMAC_RFMOD_80M; + txsb = u32_encode_bits(txsb20, B_BE_TXSB_20M_MASK) | + u32_encode_bits(txsb40, B_BE_TXSB_40M_MASK); + break; + case RTW89_CHANNEL_WIDTH_40: + rf_mod_val = BE_WMAC_RFMOD_40M; + txsb = u32_encode_bits(txsb20, B_BE_TXSB_20M_MASK); + break; + case RTW89_CHANNEL_WIDTH_20: + default: + rf_mod_val = BE_WMAC_RFMOD_20M; + txsb = 0; + break; + } + + if (txsb20 <= BE_PRI20_BITMAP_MAX) + txsb |= u32_encode_bits(BIT(txsb20), B_BE_PRI20_BITMAP_MASK); + + rtw89_write8_mask(rtwdev, rf_mod, B_BE_WMAC_RFMOD_MASK, rf_mod_val); + rtw89_write32(rtwdev, sub_carr, txsb); + + switch (chan->band_type) { + case RTW89_BAND_2G: + chk_rate_mask = B_BE_BAND_MODE; + break; + case RTW89_BAND_5G: + case RTW89_BAND_6G: + chk_rate_mask = B_BE_CHECK_CCK_EN | B_BE_RTS_LIMIT_IN_OFDM6; + break; + default: + rtw89_warn(rtwdev, "Invalid band_type:%d\n", chan->band_type); + return; + } + + rtw89_write8_clr(rtwdev, chk_rate, B_BE_BAND_MODE | B_BE_CHECK_CCK_EN | + B_BE_RTS_LIMIT_IN_OFDM6); + rtw89_write8_set(rtwdev, chk_rate, chk_rate_mask); + + switch (chan->band_width) { + case RTW89_CHANNEL_WIDTH_160: + sifs = 0x8C; + break; + case RTW89_CHANNEL_WIDTH_80: + sifs = 0x8A; + break; + case RTW89_CHANNEL_WIDTH_40: + sifs = 0x84; + break; + case RTW89_CHANNEL_WIDTH_20: + default: + sifs = 0x82; + } + + reg = rtw89_mac_reg_by_idx(rtwdev, R_BE_MUEDCA_EN, mac_idx); + rtw89_write32_mask(rtwdev, reg, B_BE_SIFS_MACTXEN_TB_T1_DOT05US_MASK, sifs); +} + MODULE_FIRMWARE(RTW8922D_MODULE_FIRMWARE); MODULE_FIRMWARE(RTW8922DS_MODULE_FIRMWARE); MODULE_AUTHOR("Realtek Corporation"); From 521f3a653dbe52b290607b5e0adf4730a3fcd9d6 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Tue, 24 Mar 2026 14:20:48 +0800 Subject: [PATCH 1025/1561] wifi: rtw89: 8922d: add set channel of BB part The set channel of BB part is the main part, which according to channel and bandwidth to configure CCK SCO, RX gain of LNA and TIA programmed in efuse for CCK and OFDM rate, and spur elimination of CSI and NBI tones. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260324062049.52266-7-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.h | 2 + drivers/net/wireless/realtek/rtw89/reg.h | 62 ++ drivers/net/wireless/realtek/rtw89/rtw8922d.c | 820 ++++++++++++++++++ 3 files changed, 884 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index cde46ed21d32..696a1d92d62b 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -5933,6 +5933,8 @@ struct rtw89_phy_efuse_gain { s8 offset2[RF_PATH_MAX][RTW89_GAIN_OFFSET_NR]; /* S(8, 0) */ s8 offset_base[RTW89_PHY_NUM]; /* S(8, 4) */ s8 rssi_base[RTW89_PHY_NUM]; /* S(8, 4) */ + s8 ref_gain_base[RTW89_PHY_NUM]; /* S(8, 2) */ + s8 cck_rpl_base[RTW89_PHY_NUM]; /* S(8, 0) */ s8 comp[RF_PATH_MAX][RTW89_SUBBAND_NR]; /* S(8, 0) */ }; diff --git a/drivers/net/wireless/realtek/rtw89/reg.h b/drivers/net/wireless/realtek/rtw89/reg.h index 2195f576facc..5d284f310069 100644 --- a/drivers/net/wireless/realtek/rtw89/reg.h +++ b/drivers/net/wireless/realtek/rtw89/reg.h @@ -8775,6 +8775,7 @@ #define B_P0_HW_ANTSW_DIS_BY_GNT_BT BIT(12) #define B_P0_TRSW_TX_EXTEND GENMASK(3, 0) #define R_MAC_PIN_SEL 0x0734 +#define R_MAC_PIN_SEL_BE4 0x20734 #define B_CH_IDX_SEG0 GENMASK(23, 16) #define R_PLCP_HISTOGRAM 0x0738 #define R_PLCP_HISTOGRAM_BE_V1 0x20738 @@ -10461,6 +10462,13 @@ #define B_SW_SI_DATA_DAT_BE4 GENMASK(19, 0) #define R_SW_SI_READ_ADDR_BE4 0x20378 #define B_SW_SI_READ_ADDR_BE4 GENMASK(10, 0) +#define R_RXBW67_BE4 0x2040C +#define B_RXBW6_BE4 GENMASK(22, 20) +#define B_RXBW7_BE4 GENMASK(25, 23) +#define R_RXBW_BE4 0x20410 +#define B_RXBW_BE4 GENMASK(29, 27) +#define R_ENABLE_CCK0_BE4 0x20700 +#define B_ENABLE_CCK0_BE4 BIT(5) #define R_EDCCA_RPT_SEL_BE4 0x20780 #define R_EDCCA_RPT_SEL_BE4_C1 0x21780 #define B_EDCCA_RPT_SEL_BE4_MSK 0xE0000 @@ -10486,21 +10494,75 @@ #define B_IFS_T3_HIS_BE4 GENMASK(15, 0) #define B_IFS_T4_HIS_BE4 GENMASK(31, 16) +#define R_TXPWR_RSTB0_BE4 0x2250C +#define B_TXPWR_RSTB0_BE4 BIT(16) +#define R_TXPWR_RSTB1_BE4 0x2260C +#define B_TXPWR_RSTB1_BE4 BIT(16) + +#define R_OFDM_OFST_P0_BE4 0x240C8 +#define B_OFDM_OFST_P0_BE4 GENMASK(31, 24) #define R_PATH0_RXIDX_INIT_BE4 0x24108 #define B_PATH0_RXIDX_INIT_BE4 GENMASK(29, 25) #define R_PATH0_LNA_INIT_BE4 0x24158 #define B_PATH0_LNA_INIT_IDX_BE4 GENMASK(14, 12) +#define R_BAND_SEL0_BE4 0x24160 +#define B_BAND_SEL0_BE4 BIT(26) #define R_PATH0_TIA_INIT_BE4 0x24168 #define B_PATH0_TIA_INIT_IDX_BE4 BIT(18) +#define R_OFDM_RPL_BIAS_P0_BE4 0x2420C +#define B_OFDM_RPL_BIAS_P0_BE4 GENMASK(11, 2) +#define R_OFDM_OFST_P1_BE4 0x244C8 +#define B_OFDM_OFST_P1_BE4 GENMASK(31, 24) #define R_PATH1_RXIDX_INIT_BE4 0x24508 #define B_PATH1_RXIDX_INIT_BE4 GENMASK(29, 25) #define R_PATH1_LNA_INIT_BE4 0x24558 #define B_PATH1_LNA_INIT_IDX_BE4 GENMASK(14, 12) +#define R_BAND_SEL1_BE4 0x24560 +#define B_BAND_SEL1_BE4 BIT(26) #define R_PATH1_TIA_INIT_BE4 0x24568 #define B_PATH1_TIA_INIT_IDX_BE4 BIT(18) +#define R_OFDM_RPL_BIAS_P1_BE4 0x2460C +#define B_OFDM_RPL_BIAS_P1_BE4 GENMASK(11, 2) #define R_TX_CFR_MANUAL_EN_BE4 0x2483C #define B_TX_CFR_MANUAL_EN_BE4_M BIT(30) +#define R_PCOEFF0_BE4 0x24880 +#define B_PCOEFF01_BE4 GENMASK(23, 0) +#define R_PCOEFF2_BE4 0x24884 +#define B_PCOEFF23_BE4 GENMASK(23, 0) +#define R_PCOEFF4_BE4 0x24888 +#define B_PCOEFF45_BE4 GENMASK(23, 0) +#define R_PCOEFF6_BE4 0x2488C +#define B_PCOEFF67_BE4 GENMASK(23, 0) +#define R_PCOEFF8_BE4 0x24890 +#define B_PCOEFF89_BE4 GENMASK(23, 0) +#define R_PCOEFF10_BE4 0x24894 +#define B_PCOEFF10_BE4 GENMASK(23, 0) +#define R_PCOEFF12_BE4 0x24898 +#define B_PCOEFF12_BE4 GENMASK(23, 0) +#define R_PCOEFF14_BE4 0x2489C +#define B_PCOEFF14_BE4 GENMASK(23, 0) +#define R_BW_BE4 0x24EE4 +#define B_BW_BE4 GENMASK(6, 4) +#define B_PRISB_BE4 GENMASK(3, 0) +#define R_FC0_BE4 0x24EE8 +#define B_FC0_BE4 GENMASK(12, 0) +#define R_FC0_INV_BE4 0x24EF4 +#define B_FC0_INV_BE4 GENMASK(15, 0) +#define R_CCK_RPL_OFST_BE4 0x26084 +#define B_CCK_RPL_OFST_BE4 GENMASK(7, 0) +#define R_BK_FC0_INV_BE4 0x2608C +#define B_BK_FC0_INV_BE4 GENMASK(18, 0) +#define R_CCK_FC0_INV_BE4 0x26090 +#define B_CCK_FC0_INV_BE4 GENMASK(18, 0) +#define R_GAIN_BIAS_BE4 0x260A0 +#define B_GAIN_BIAS_BW20_BE4 GENMASK(11, 6) +#define B_GAIN_BIAS_BW40_BE4 GENMASK(17, 12) +#define R_AWGN_DET_BE4 0x2668C +#define B_AWGN_DET_BE4 GENMASK(17, 9) +#define R_CSI_WGT_BE4 0x26770 +#define B_CSI_WGT_EN_BE4 BIT(0) +#define B_CSI_WGT_IDX_BE4 GENMASK(31, 20) #define R_CHINFO_OPT_BE4 0x267C8 #define B_CHINFO_OPT_BE4 GENMASK(14, 13) #define R_CHINFO_NX_BE4 0x267D0 diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d.c b/drivers/net/wireless/realtek/rtw89/rtw8922d.c index a341734ef54d..cae92e2abd85 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922d.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d.c @@ -8,6 +8,7 @@ #include "phy.h" #include "reg.h" #include "rtw8922d.h" +#include "util.h" #define RTW8922D_FW_FORMAT_MAX 0 #define RTW8922D_FW_BASENAME "rtw89/rtw8922d_fw" @@ -927,6 +928,825 @@ static void rtw8922d_set_channel_mac(struct rtw89_dev *rtwdev, rtw89_write32_mask(rtwdev, reg, B_BE_SIFS_MACTXEN_TB_T1_DOT05US_MASK, sifs); } +static const u32 rtw8922d_sco_barker_threshold[14] = { + 0x1fe4f, 0x1ff5e, 0x2006c, 0x2017b, 0x2028a, 0x20399, 0x204a8, 0x205b6, + 0x206c5, 0x207d4, 0x208e3, 0x209f2, 0x20b00, 0x20d8a +}; + +static const u32 rtw8922d_sco_cck_threshold[14] = { + 0x2bdac, 0x2bf21, 0x2c095, 0x2c209, 0x2c37e, 0x2c4f2, 0x2c666, 0x2c7db, + 0x2c94f, 0x2cac3, 0x2cc38, 0x2cdac, 0x2cf21, 0x2d29e +}; + +static int rtw8922d_ctrl_sco_cck(struct rtw89_dev *rtwdev, + u8 primary_ch, enum rtw89_bandwidth bw, + enum rtw89_phy_idx phy_idx) +{ + u8 ch_element; + + if (primary_ch >= 14) + return -EINVAL; + + ch_element = primary_ch - 1; + + rtw89_phy_write32_idx(rtwdev, R_BK_FC0_INV_BE4, B_BK_FC0_INV_BE4, + rtw8922d_sco_barker_threshold[ch_element], + phy_idx); + rtw89_phy_write32_idx(rtwdev, R_CCK_FC0_INV_BE4, B_CCK_FC0_INV_BE4, + rtw8922d_sco_cck_threshold[ch_element], + phy_idx); + + return 0; +} + +static void rtw8922d_ctrl_ch_core(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_phy_idx phy_idx) +{ + u16 central_freq = chan->freq; + u16 sco; + + if (chan->band_type == RTW89_BAND_2G) { + rtw89_phy_write32_idx(rtwdev, R_BAND_SEL0_BE4, B_BAND_SEL0_BE4, + 1, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_BAND_SEL1_BE4, B_BAND_SEL1_BE4, + 1, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_ENABLE_CCK0_BE4, B_ENABLE_CCK0_BE4, + 1, phy_idx); + } else { + rtw89_phy_write32_idx(rtwdev, R_BAND_SEL0_BE4, B_BAND_SEL0_BE4, + 0, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_BAND_SEL1_BE4, B_BAND_SEL1_BE4, + 0, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_ENABLE_CCK0_BE4, B_ENABLE_CCK0_BE4, + 0, phy_idx); + } + + rtw89_phy_write32_idx(rtwdev, R_FC0_BE4, B_FC0_BE4, central_freq, phy_idx); + + sco = phy_div((BIT(0) << 27) + (central_freq / 2), central_freq); + rtw89_phy_write32_idx(rtwdev, R_FC0_INV_BE4, B_FC0_INV_BE4, sco, phy_idx); +} + +struct rtw8922d_bb_gain { + u32 gain_g[BB_PATH_NUM_8922D]; + u32 gain_a[BB_PATH_NUM_8922D]; + u32 gain_g_mask; + u32 gain_a_mask; +}; + +static const struct rtw89_reg_def rpl_comp_bw160[RTW89_BW20_SC_160M] = { + { .addr = 0x241E8, .mask = 0xFF00}, + { .addr = 0x241E8, .mask = 0xFF0000}, + { .addr = 0x241E8, .mask = 0xFF000000}, + { .addr = 0x241EC, .mask = 0xFF}, + { .addr = 0x241EC, .mask = 0xFF00}, + { .addr = 0x241EC, .mask = 0xFF0000}, + { .addr = 0x241EC, .mask = 0xFF000000}, + { .addr = 0x241F0, .mask = 0xFF} +}; + +static const struct rtw89_reg_def rpl_comp_bw80[RTW89_BW20_SC_80M] = { + { .addr = 0x241F4, .mask = 0xFF}, + { .addr = 0x241F4, .mask = 0xFF00}, + { .addr = 0x241F4, .mask = 0xFF0000}, + { .addr = 0x241F4, .mask = 0xFF000000} +}; + +static const struct rtw89_reg_def rpl_comp_bw40[RTW89_BW20_SC_40M] = { + { .addr = 0x241F0, .mask = 0xFF0000}, + { .addr = 0x241F0, .mask = 0xFF000000} +}; + +static const struct rtw89_reg_def rpl_comp_bw20[RTW89_BW20_SC_20M] = { + { .addr = 0x241F0, .mask = 0xFF00} +}; + +static const struct rtw8922d_bb_gain bb_gain_lna[LNA_GAIN_NUM] = { + { .gain_g = {0x2409C, 0x2449C}, .gain_a = {0x2406C, 0x2446C}, + .gain_g_mask = 0xFF00, .gain_a_mask = 0xFF}, + { .gain_g = {0x2409C, 0x2449C}, .gain_a = {0x2406C, 0x2446C}, + .gain_g_mask = 0xFF000000, .gain_a_mask = 0xFF0000}, + { .gain_g = {0x240A0, 0x244A0}, .gain_a = {0x24070, 0x24470}, + .gain_g_mask = 0xFF00, .gain_a_mask = 0xFF}, + { .gain_g = {0x240A0, 0x244A0}, .gain_a = {0x24070, 0x24470}, + .gain_g_mask = 0xFF000000, .gain_a_mask = 0xFF0000}, + { .gain_g = {0x240A4, 0x244A4}, .gain_a = {0x24074, 0x24474}, + .gain_g_mask = 0xFF00, .gain_a_mask = 0xFF}, + { .gain_g = {0x240A4, 0x244A4}, .gain_a = {0x24074, 0x24474}, + .gain_g_mask = 0xFF000000, .gain_a_mask = 0xFF0000}, + { .gain_g = {0x240A8, 0x244A8}, .gain_a = {0x24078, 0x24478}, + .gain_g_mask = 0xFF00, .gain_a_mask = 0xFF}, +}; + +static const struct rtw8922d_bb_gain bb_gain_tia[TIA_GAIN_NUM] = { + { .gain_g = {0x24054, 0x24454}, .gain_a = {0x24054, 0x24454}, + .gain_g_mask = 0x7FC0000, .gain_a_mask = 0x1FF}, + { .gain_g = {0x24058, 0x24458}, .gain_a = {0x24054, 0x24454}, + .gain_g_mask = 0x1FF, .gain_a_mask = 0x3FE00 }, +}; + +static const struct rtw8922d_bb_gain bb_op1db_lna[LNA_GAIN_NUM] = { + { .gain_g = {0x240AC, 0x244AC}, .gain_a = {0x24078, 0x24478}, + .gain_g_mask = 0xFF00, .gain_a_mask = 0xFF000000}, + { .gain_g = {0x240AC, 0x244AC}, .gain_a = {0x2407C, 0x2447C}, + .gain_g_mask = 0xFF0000, .gain_a_mask = 0xFF}, + { .gain_g = {0x240AC, 0x244AC}, .gain_a = {0x2407C, 0x2447C}, + .gain_g_mask = 0xFF000000, .gain_a_mask = 0xFF00}, + { .gain_g = {0x240B0, 0x244B0}, .gain_a = {0x2407C, 0x2447C}, + .gain_g_mask = 0xFF, .gain_a_mask = 0xFF0000}, + { .gain_g = {0x240B0, 0x244B0}, .gain_a = {0x2407C, 0x2447C}, + .gain_g_mask = 0xFF00, .gain_a_mask = 0xFF000000}, + { .gain_g = {0x240B0, 0x244B0}, .gain_a = {0x24080, 0x24480}, + .gain_g_mask = 0xFF0000, .gain_a_mask = 0xFF}, + { .gain_g = {0x240B0, 0x244B0}, .gain_a = {0x24080, 0x24480}, + .gain_g_mask = 0xFF000000, .gain_a_mask = 0xFF00}, +}; + +static const struct rtw8922d_bb_gain bb_op1db_tia_lna[TIA_LNA_OP1DB_NUM] = { + { .gain_g = {0x240B4, 0x244B4}, .gain_a = {0x24080, 0x24480}, + .gain_g_mask = 0xFF0000, .gain_a_mask = 0xFF000000}, + { .gain_g = {0x240B4, 0x244B4}, .gain_a = {0x24084, 0x24484}, + .gain_g_mask = 0xFF000000, .gain_a_mask = 0xFF}, + { .gain_g = {0x240B8, 0x244B8}, .gain_a = {0x24084, 0x24484}, + .gain_g_mask = 0xFF, .gain_a_mask = 0xFF00}, + { .gain_g = {0x240B8, 0x244B8}, .gain_a = {0x24084, 0x24484}, + .gain_g_mask = 0xFF00, .gain_a_mask = 0xFF0000}, + { .gain_g = {0x240B8, 0x244B8}, .gain_a = {0x24084, 0x24484}, + .gain_g_mask = 0xFF0000, .gain_a_mask = 0xFF000000}, + { .gain_g = {0x240B8, 0x244B8}, .gain_a = {0x24088, 0x24488}, + .gain_g_mask = 0xFF000000, .gain_a_mask = 0xFF}, + { .gain_g = {0x240BC, 0x244BC}, .gain_a = {0x24088, 0x24488}, + .gain_g_mask = 0xFF, .gain_a_mask = 0xFF00}, + { .gain_g = {0x240BC, 0x244BC}, .gain_a = {0x24088, 0x24488}, + .gain_g_mask = 0xFF00, .gain_a_mask = 0xFF0000}, +}; + +static void rtw8922d_set_rpl_gain(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_rf_path path, + enum rtw89_phy_idx phy_idx) +{ + const struct rtw89_phy_bb_gain_info_be *gain = &rtwdev->bb_gain.be; + u8 gain_band = rtw89_subband_to_gain_band_be(chan->subband_type); + u32 reg_path_ofst = 0; + u32 mask; + s32 val; + u32 reg; + int i; + + if (path == RF_PATH_B) + reg_path_ofst = 0x400; + + for (i = 0; i < RTW89_BW20_SC_160M; i++) { + reg = rpl_comp_bw160[i].addr | reg_path_ofst; + mask = rpl_comp_bw160[i].mask; + val = gain->rpl_ofst_160[gain_band][path][i]; + rtw89_phy_write32_idx(rtwdev, reg, mask, val, phy_idx); + } + + for (i = 0; i < RTW89_BW20_SC_80M; i++) { + reg = rpl_comp_bw80[i].addr | reg_path_ofst; + mask = rpl_comp_bw80[i].mask; + val = gain->rpl_ofst_80[gain_band][path][i]; + rtw89_phy_write32_idx(rtwdev, reg, mask, val, phy_idx); + } + + for (i = 0; i < RTW89_BW20_SC_40M; i++) { + reg = rpl_comp_bw40[i].addr | reg_path_ofst; + mask = rpl_comp_bw40[i].mask; + val = gain->rpl_ofst_40[gain_band][path][i]; + rtw89_phy_write32_idx(rtwdev, reg, mask, val, phy_idx); + } + + for (i = 0; i < RTW89_BW20_SC_20M; i++) { + reg = rpl_comp_bw20[i].addr | reg_path_ofst; + mask = rpl_comp_bw20[i].mask; + val = gain->rpl_ofst_20[gain_band][path][i]; + rtw89_phy_write32_idx(rtwdev, reg, mask, val, phy_idx); + } +} + +static void rtw8922d_set_lna_tia_gain(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_rf_path path, + enum rtw89_phy_idx phy_idx) +{ + const struct rtw89_phy_bb_gain_info_be *gain = &rtwdev->bb_gain.be; + u8 gain_band = rtw89_subband_to_gain_band_be(chan->subband_type); + enum rtw89_phy_bb_bw_be bw_type; + u32 mask; + s32 val; + u32 reg; + int i; + + bw_type = chan->band_width <= RTW89_CHANNEL_WIDTH_40 ? + RTW89_BB_BW_20_40 : RTW89_BB_BW_80_160_320; + + for (i = 0; i < LNA_GAIN_NUM; i++) { + if (chan->band_type == RTW89_BAND_2G) { + reg = bb_gain_lna[i].gain_g[path]; + mask = bb_gain_lna[i].gain_g_mask; + } else { + reg = bb_gain_lna[i].gain_a[path]; + mask = bb_gain_lna[i].gain_a_mask; + } + val = gain->lna_gain[gain_band][bw_type][path][i]; + rtw89_phy_write32_idx(rtwdev, reg, mask, val, phy_idx); + } + + for (i = 0; i < TIA_GAIN_NUM; i++) { + if (chan->band_type == RTW89_BAND_2G) { + reg = bb_gain_tia[i].gain_g[path]; + mask = bb_gain_tia[i].gain_g_mask; + } else { + reg = bb_gain_tia[i].gain_a[path]; + mask = bb_gain_tia[i].gain_a_mask; + } + val = gain->tia_gain[gain_band][bw_type][path][i]; + rtw89_phy_write32_idx(rtwdev, reg, mask, val, phy_idx); + } +} + +static void rtw8922d_set_op1db(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_rf_path path, + enum rtw89_phy_idx phy_idx) +{ + const struct rtw89_phy_bb_gain_info_be *gain = &rtwdev->bb_gain.be; + u8 gain_band = rtw89_subband_to_gain_band_be(chan->subband_type); + enum rtw89_phy_bb_bw_be bw_type; + u32 mask; + s32 val; + u32 reg; + int i; + + bw_type = chan->band_width <= RTW89_CHANNEL_WIDTH_40 ? + RTW89_BB_BW_20_40 : RTW89_BB_BW_80_160_320; + + for (i = 0; i < LNA_GAIN_NUM; i++) { + if (chan->band_type == RTW89_BAND_2G) { + reg = bb_op1db_lna[i].gain_g[path]; + mask = bb_op1db_lna[i].gain_g_mask; + } else { + reg = bb_op1db_lna[i].gain_a[path]; + mask = bb_op1db_lna[i].gain_a_mask; + } + val = gain->lna_op1db[gain_band][bw_type][path][i]; + rtw89_phy_write32_idx(rtwdev, reg, mask, val, phy_idx); + } + + for (i = 0; i < TIA_LNA_OP1DB_NUM; i++) { + if (chan->band_type == RTW89_BAND_2G) { + reg = bb_op1db_tia_lna[i].gain_g[path]; + mask = bb_op1db_tia_lna[i].gain_g_mask; + } else { + reg = bb_op1db_tia_lna[i].gain_a[path]; + mask = bb_op1db_tia_lna[i].gain_a_mask; + } + val = gain->tia_lna_op1db[gain_band][bw_type][path][i]; + rtw89_phy_write32_idx(rtwdev, reg, mask, val, phy_idx); + } +} + +static void rtw8922d_set_gain(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_rf_path path, + enum rtw89_phy_idx phy_idx) +{ + rtw8922d_set_rpl_gain(rtwdev, chan, path, phy_idx); + rtw8922d_set_lna_tia_gain(rtwdev, chan, path, phy_idx); + rtw8922d_set_op1db(rtwdev, chan, path, phy_idx); +} + +static s8 rtw8922d_get_rx_gain_by_chan(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_rf_path path, bool is_cck) +{ + struct rtw89_phy_efuse_gain *gain = &rtwdev->efuse_gain; + enum rtw89_gain_offset band; + u8 fc_ch = chan->channel; + s8 normal_efuse = 0; + + if (path > RF_PATH_B) + return 0; + + if (is_cck) { + if (fc_ch >= 1 && fc_ch <= 7) + return gain->offset[path][RTW89_GAIN_OFFSET_2G_CCK]; + else if (fc_ch >= 8 && fc_ch <= 14) + return gain->offset2[path][RTW89_GAIN_OFFSET_2G_CCK]; + + return 0; + } + + band = rtw89_subband_to_gain_offset_band_of_ofdm(chan->subband_type); + + if (band == RTW89_GAIN_OFFSET_2G_OFDM) { + if (fc_ch >= 1 && fc_ch <= 7) + normal_efuse = gain->offset[path][band]; + else if (fc_ch >= 8 && fc_ch <= 14) + normal_efuse = gain->offset2[path][band]; + } else if (band == RTW89_GAIN_OFFSET_5G_LOW) { + if (fc_ch == 50) + normal_efuse = (gain->offset[path][band] + gain->offset2[path][band]) >> 1; + else if (fc_ch >= 36 && fc_ch <= 48) + normal_efuse = gain->offset[path][band]; + else if (fc_ch >= 52 && fc_ch <= 64) + normal_efuse = gain->offset2[path][band]; + + } else if (band == RTW89_GAIN_OFFSET_5G_MID) { + if (fc_ch == 122) + normal_efuse = (gain->offset[path][band] + gain->offset2[path][band]) >> 1; + else if (fc_ch >= 100 && fc_ch <= 120) + normal_efuse = gain->offset[path][band]; + else if (fc_ch >= 124 && fc_ch <= 144) + normal_efuse = gain->offset2[path][band]; + } else if (band == RTW89_GAIN_OFFSET_5G_HIGH) { + if (fc_ch == 163) + normal_efuse = (gain->offset[path][band] + gain->offset2[path][band]) >> 1; + else if (fc_ch >= 149 && fc_ch <= 161) + normal_efuse = gain->offset[path][band]; + else if (fc_ch >= 165 && fc_ch <= 177) + normal_efuse = gain->offset2[path][band]; + } else if (band == RTW89_GAIN_OFFSET_6G_L0) { + if (fc_ch == 15) + normal_efuse = (gain->offset[path][band] + gain->offset2[path][band]) >> 1; + else if (fc_ch >= 1 && fc_ch <= 13) + normal_efuse = gain->offset[path][band]; + else if (fc_ch >= 17 && fc_ch <= 29) + normal_efuse = gain->offset2[path][band]; + } else if (band == RTW89_GAIN_OFFSET_6G_L1) { + if (fc_ch == 47) + normal_efuse = (gain->offset[path][band] + gain->offset2[path][band]) >> 1; + else if (fc_ch >= 33 && fc_ch <= 45) + normal_efuse = gain->offset[path][band]; + else if (fc_ch >= 49 && fc_ch <= 61) + normal_efuse = gain->offset2[path][band]; + } else if (band == RTW89_GAIN_OFFSET_6G_M0) { + if (fc_ch == 79) + normal_efuse = (gain->offset[path][band] + gain->offset2[path][band]) >> 1; + else if (fc_ch >= 65 && fc_ch <= 77) + normal_efuse = gain->offset[path][band]; + else if (fc_ch >= 81 && fc_ch <= 93) + normal_efuse = gain->offset2[path][band]; + } else if (band == RTW89_GAIN_OFFSET_6G_M1) { + if (fc_ch == 111) + normal_efuse = (gain->offset[path][band] + gain->offset2[path][band]) >> 1; + else if (fc_ch >= 97 && fc_ch <= 109) + normal_efuse = gain->offset[path][band]; + else if (fc_ch >= 113 && fc_ch <= 125) + normal_efuse = gain->offset2[path][band]; + } else if (band == RTW89_GAIN_OFFSET_6G_H0) { + if (fc_ch == 143) + normal_efuse = (gain->offset[path][band] + gain->offset2[path][band]) >> 1; + else if (fc_ch >= 129 && fc_ch <= 141) + normal_efuse = gain->offset[path][band]; + else if (fc_ch >= 145 && fc_ch <= 157) + normal_efuse = gain->offset2[path][band]; + } else if (band == RTW89_GAIN_OFFSET_6G_H1) { + if (fc_ch == 175) + normal_efuse = (gain->offset[path][band] + gain->offset2[path][band]) >> 1; + else if (fc_ch >= 161 && fc_ch <= 173) + normal_efuse = gain->offset[path][band]; + else if (fc_ch >= 177 && fc_ch <= 189) + normal_efuse = gain->offset2[path][band]; + } else if (band == RTW89_GAIN_OFFSET_6G_UH0) { + if (fc_ch == 207) + normal_efuse = (gain->offset[path][band] + gain->offset2[path][band]) >> 1; + else if (fc_ch >= 193 && fc_ch <= 205) + normal_efuse = gain->offset[path][band]; + else if (fc_ch >= 209 && fc_ch <= 221) + normal_efuse = gain->offset2[path][band]; + } else if (band == RTW89_GAIN_OFFSET_6G_UH1) { + if (fc_ch == 239) + normal_efuse = (gain->offset[path][band] + gain->offset2[path][band]) >> 1; + else if (fc_ch >= 225 && fc_ch <= 237) + normal_efuse = gain->offset[path][band]; + else if (fc_ch >= 241 && fc_ch <= 253) + normal_efuse = gain->offset2[path][band]; + } else { + normal_efuse = gain->offset[path][band]; + } + + return normal_efuse; +} + +static void rtw8922d_calc_rx_gain_normal_cck(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_rf_path path, + enum rtw89_phy_idx phy_idx, + struct rtw89_phy_calc_efuse_gain *calc) +{ + struct rtw89_phy_efuse_gain *gain = &rtwdev->efuse_gain; + s8 rx_gain_offset; + + rx_gain_offset = -rtw8922d_get_rx_gain_by_chan(rtwdev, chan, path, true); + + if (chan->band_width == RTW89_CHANNEL_WIDTH_40) + rx_gain_offset += (3 << 2); /* compensate RPL loss of 3dB */ + + calc->cck_mean_gain_bias = (rx_gain_offset & 0x3) << 1; + calc->cck_rpl_ofst = (rx_gain_offset >> 2) + gain->cck_rpl_base[phy_idx]; +} + +static void rtw8922d_set_rx_gain_normal_cck(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_rf_path path, + enum rtw89_phy_idx phy_idx) +{ + struct rtw89_phy_calc_efuse_gain calc = {}; + + rtw8922d_calc_rx_gain_normal_cck(rtwdev, chan, path, phy_idx, &calc); + + rtw89_phy_write32_idx(rtwdev, R_GAIN_BIAS_BE4, B_GAIN_BIAS_BW20_BE4, + calc.cck_mean_gain_bias, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_GAIN_BIAS_BE4, B_GAIN_BIAS_BW40_BE4, + calc.cck_mean_gain_bias, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_CCK_RPL_OFST_BE4, B_CCK_RPL_OFST_BE4, + calc.cck_rpl_ofst, phy_idx); +} + +static void rtw8922d_calc_rx_gain_normal_ofdm(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_rf_path path, + enum rtw89_phy_idx phy_idx, + struct rtw89_phy_calc_efuse_gain *calc) +{ + struct rtw89_phy_efuse_gain *gain = &rtwdev->efuse_gain; + s8 rx_gain_offset; + + rx_gain_offset = rtw8922d_get_rx_gain_by_chan(rtwdev, chan, path, false); + calc->rssi_ofst = (rx_gain_offset + gain->ref_gain_base[phy_idx]) & 0xff; +} + +static void rtw8922d_set_rx_gain_normal_ofdm(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_rf_path path, + enum rtw89_phy_idx phy_idx) +{ + static const u32 rssi_ofst_addr[2] = {R_OFDM_OFST_P0_BE4, R_OFDM_OFST_P1_BE4}; + static const u32 rssi_ofst_addr_m[2] = {B_OFDM_OFST_P0_BE4, B_OFDM_OFST_P1_BE4}; + static const u32 rpl_bias_comp[2] = {R_OFDM_RPL_BIAS_P0_BE4, R_OFDM_RPL_BIAS_P1_BE4}; + static const u32 rpl_bias_comp_m[2] = {B_OFDM_RPL_BIAS_P0_BE4, B_OFDM_RPL_BIAS_P1_BE4}; + struct rtw89_phy_calc_efuse_gain calc = {}; + + rtw8922d_calc_rx_gain_normal_ofdm(rtwdev, chan, path, phy_idx, &calc); + + rtw89_phy_write32_idx(rtwdev, rssi_ofst_addr[path], rssi_ofst_addr_m[path], + calc.rssi_ofst, phy_idx); + rtw89_phy_write32_idx(rtwdev, rpl_bias_comp[path], rpl_bias_comp_m[path], 0, phy_idx); +} + +static void rtw8922d_set_rx_gain_normal(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_rf_path path, + enum rtw89_phy_idx phy_idx) +{ + struct rtw89_phy_efuse_gain *gain = &rtwdev->efuse_gain; + + if (!gain->offset_valid) + return; + + if (chan->band_type == RTW89_BAND_2G) + rtw8922d_set_rx_gain_normal_cck(rtwdev, chan, path, phy_idx); + + rtw8922d_set_rx_gain_normal_ofdm(rtwdev, chan, path, phy_idx); +} + +static void rtw8922d_set_cck_parameters(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_phy_idx phy_idx) +{ + u8 regd = rtw89_regd_get(rtwdev, chan->band_type); + u8 central_ch = chan->channel; + + if (central_ch == 14) { + rtw89_phy_write32_idx(rtwdev, R_PCOEFF0_BE4, B_PCOEFF01_BE4, 0x3b13ff, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_PCOEFF2_BE4, B_PCOEFF23_BE4, 0x1c42de, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_PCOEFF4_BE4, B_PCOEFF45_BE4, 0xfdb0ad, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_PCOEFF6_BE4, B_PCOEFF67_BE4, 0xf60f6e, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_PCOEFF8_BE4, B_PCOEFF89_BE4, 0xfd8f92, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_PCOEFF10_BE4, B_PCOEFF10_BE4, 0x2d011, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_PCOEFF12_BE4, B_PCOEFF12_BE4, 0x1c02c, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_PCOEFF14_BE4, B_PCOEFF14_BE4, 0xfff00a, phy_idx); + + return; + } + + if (regd == RTW89_FCC) { + rtw89_phy_write32_idx(rtwdev, R_PCOEFF0_BE4, B_PCOEFF01_BE4, 0x39A3BC, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_PCOEFF2_BE4, B_PCOEFF23_BE4, 0x2AA339, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_PCOEFF4_BE4, B_PCOEFF45_BE4, 0x15B202, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_PCOEFF6_BE4, B_PCOEFF67_BE4, 0x0550C7, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_PCOEFF8_BE4, B_PCOEFF89_BE4, 0xfe0009, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_PCOEFF10_BE4, B_PCOEFF10_BE4, 0xfd7fd3, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_PCOEFF12_BE4, B_PCOEFF12_BE4, 0xfeffe2, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_PCOEFF14_BE4, B_PCOEFF14_BE4, 0xffeff8, phy_idx); + } else { + rtw89_phy_write32_idx(rtwdev, R_PCOEFF0_BE4, B_PCOEFF01_BE4, 0x3d23ff, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_PCOEFF2_BE4, B_PCOEFF23_BE4, 0x29b354, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_PCOEFF4_BE4, B_PCOEFF45_BE4, 0xfc1c8, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_PCOEFF6_BE4, B_PCOEFF67_BE4, 0xfdb053, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_PCOEFF8_BE4, B_PCOEFF89_BE4, 0xf86f9a, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_PCOEFF10_BE4, B_PCOEFF10_BE4, 0xfaef92, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_PCOEFF12_BE4, B_PCOEFF12_BE4, 0xfe5fcc, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_PCOEFF14_BE4, B_PCOEFF14_BE4, 0xffdff5, phy_idx); + } +} + +static void rtw8922d_ctrl_ch(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_phy_idx phy_idx) +{ + u16 central_freq = chan->freq; + u8 band = chan->band_type; + u8 chan_idx; + + if (!central_freq) { + rtw89_warn(rtwdev, "Invalid central_freq\n"); + return; + } + + rtw8922d_ctrl_ch_core(rtwdev, chan, phy_idx); + + chan_idx = rtw89_encode_chan_idx(rtwdev, chan->primary_channel, band); + rtw89_phy_write32_idx(rtwdev, R_MAC_PIN_SEL_BE4, B_CH_IDX_SEG0, chan_idx, phy_idx); + + rtw8922d_set_gain(rtwdev, chan, RF_PATH_A, phy_idx); + rtw8922d_set_gain(rtwdev, chan, RF_PATH_B, phy_idx); + + rtw8922d_set_rx_gain_normal(rtwdev, chan, RF_PATH_A, phy_idx); + rtw8922d_set_rx_gain_normal(rtwdev, chan, RF_PATH_B, phy_idx); + + if (band == RTW89_BAND_2G) + rtw8922d_set_cck_parameters(rtwdev, chan, phy_idx); +} + +static void rtw8922d_ctrl_bw(struct rtw89_dev *rtwdev, u8 pri_sb, u8 bw, + enum rtw89_phy_idx phy_idx) +{ + switch (bw) { + default: + case RTW89_CHANNEL_WIDTH_20: + rtw89_phy_write32_idx(rtwdev, R_BW_BE4, B_BW_BE4, 0x0, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_BW_BE4, B_PRISB_BE4, 0x0, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXBW_BE4, B_RXBW_BE4, 0x2, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXBW67_BE4, B_RXBW6_BE4, 0x2, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXBW67_BE4, B_RXBW7_BE4, 0x2, phy_idx); + break; + case RTW89_CHANNEL_WIDTH_40: + rtw89_phy_write32_idx(rtwdev, R_BW_BE4, B_BW_BE4, 0x1, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_BW_BE4, B_PRISB_BE4, pri_sb, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXBW_BE4, B_RXBW_BE4, 0x3, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXBW67_BE4, B_RXBW6_BE4, 0x3, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXBW67_BE4, B_RXBW7_BE4, 0x3, phy_idx); + break; + case RTW89_CHANNEL_WIDTH_80: + rtw89_phy_write32_idx(rtwdev, R_BW_BE4, B_BW_BE4, 0x2, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_BW_BE4, B_PRISB_BE4, pri_sb, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXBW_BE4, B_RXBW_BE4, 0x4, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXBW67_BE4, B_RXBW6_BE4, 0x4, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXBW67_BE4, B_RXBW7_BE4, 0x4, phy_idx); + break; + case RTW89_CHANNEL_WIDTH_160: + rtw89_phy_write32_idx(rtwdev, R_BW_BE4, B_BW_BE4, 0x3, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_BW_BE4, B_PRISB_BE4, pri_sb, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXBW_BE4, B_RXBW_BE4, 0x5, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXBW67_BE4, B_RXBW6_BE4, 0x5, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXBW67_BE4, B_RXBW7_BE4, 0x5, phy_idx); + break; + } +} + +static const u16 spur_nbi_a[] = {6400}; +static const u16 spur_csi[] = {6400}; + +static u32 rtw8922d_spur_freq(struct rtw89_dev *rtwdev, const struct rtw89_chan *chan, + bool nbi_or_csi, enum rtw89_rf_path path) +{ + static const u16 cbw[RTW89_CHANNEL_WIDTH_ORDINARY_NUM] = { + 20, 40, 80, 160, 320, + }; + u16 freq_lower, freq_upper, freq; + const u16 *spur_freq; + int spur_freq_nr, i; + + if (rtwdev->hal.aid != RTL8922D_AID7060) + return 0; + + if (nbi_or_csi && path == RF_PATH_A) { + spur_freq = spur_nbi_a; + spur_freq_nr = ARRAY_SIZE(spur_nbi_a); + } else if (!nbi_or_csi) { + spur_freq = spur_csi; + spur_freq_nr = ARRAY_SIZE(spur_csi); + } else { + return 0; + } + + if (chan->band_width >= RTW89_CHANNEL_WIDTH_ORDINARY_NUM) + return 0; + + freq_lower = chan->freq - cbw[chan->band_width] / 2; + freq_upper = chan->freq + cbw[chan->band_width] / 2; + + for (i = 0; i < spur_freq_nr; i++) { + freq = spur_freq[i]; + + if (freq >= freq_lower && freq <= freq_upper) + return freq; + } + + return 0; +} + +#define CARRIER_SPACING_312_5 312500 /* 312.5 kHz */ +#define CARRIER_SPACING_78_125 78125 /* 78.125 kHz */ +#define MAX_TONE_NUM 2048 + +static void rtw8922d_set_csi_tone_idx(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_phy_idx phy_idx) +{ + s32 freq_diff, csi_idx, csi_tone_idx; + u32 spur_freq; + + spur_freq = rtw8922d_spur_freq(rtwdev, chan, false, RF_PATH_AB); + if (spur_freq == 0) { + rtw89_phy_write32_idx(rtwdev, R_CSI_WGT_BE4, B_CSI_WGT_EN_BE4, + 0, phy_idx); + return; + } + + freq_diff = (spur_freq - chan->freq) * 1000000; + csi_idx = s32_div_u32_round_closest(freq_diff, CARRIER_SPACING_78_125); + s32_div_u32_round_down(csi_idx, MAX_TONE_NUM, &csi_tone_idx); + + rtw89_phy_write32_idx(rtwdev, R_CSI_WGT_BE4, B_CSI_WGT_IDX_BE4, + csi_tone_idx, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_CSI_WGT_BE4, B_CSI_WGT_EN_BE4, 1, phy_idx); +} + +static const struct rtw89_nbi_reg_def rtw8922d_nbi_reg_def[] = { + [RF_PATH_A] = { + .notch1_idx = {0x241A0, 0xFF}, + .notch1_frac_idx = {0x241A0, 0xC00}, + .notch1_en = {0x241A0, 0x1000}, + .notch2_idx = {0x241AC, 0xFF}, + .notch2_frac_idx = {0x241AC, 0xC00}, + .notch2_en = {0x241AC, 0x1000}, + }, + [RF_PATH_B] = { + .notch1_idx = {0x245A0, 0xFF}, + .notch1_frac_idx = {0x245A0, 0xC00}, + .notch1_en = {0x245A0, 0x1000}, + .notch2_idx = {0x245AC, 0xFF}, + .notch2_frac_idx = {0x245AC, 0xC00}, + .notch2_en = {0x245AC, 0x1000}, + }, +}; + +static void rtw8922d_set_nbi_tone_idx(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_rf_path path, + enum rtw89_phy_idx phy_idx) +{ + const struct rtw89_nbi_reg_def *nbi = &rtw8922d_nbi_reg_def[path]; + s32 nbi_frac_idx, nbi_frac_tone_idx; + s32 nbi_idx, nbi_tone_idx; + bool notch2_chk = false; + u32 spur_freq, fc; + s32 freq_diff; + + spur_freq = rtw8922d_spur_freq(rtwdev, chan, true, path); + if (spur_freq == 0) { + rtw89_phy_write32_idx(rtwdev, nbi->notch1_en.addr, + nbi->notch1_en.mask, 0, phy_idx); + rtw89_phy_write32_idx(rtwdev, nbi->notch2_en.addr, + nbi->notch2_en.mask, 0, phy_idx); + return; + } + + fc = chan->freq; + if (chan->band_width == RTW89_CHANNEL_WIDTH_160) { + fc = (spur_freq > fc) ? fc + 40 : fc - 40; + if ((fc > spur_freq && + chan->channel < chan->primary_channel) || + (fc < spur_freq && + chan->channel > chan->primary_channel)) + notch2_chk = true; + } + + freq_diff = (spur_freq - fc) * 1000000; + nbi_idx = s32_div_u32_round_down(freq_diff, CARRIER_SPACING_312_5, + &nbi_frac_idx); + + if (chan->band_width == RTW89_CHANNEL_WIDTH_20) { + s32_div_u32_round_down(nbi_idx + 32, 64, &nbi_tone_idx); + } else { + u16 tone_para = (chan->band_width == RTW89_CHANNEL_WIDTH_40) ? + 128 : 256; + + s32_div_u32_round_down(nbi_idx, tone_para, &nbi_tone_idx); + } + nbi_frac_tone_idx = + s32_div_u32_round_closest(nbi_frac_idx, CARRIER_SPACING_78_125); + + if (chan->band_width == RTW89_CHANNEL_WIDTH_160 && notch2_chk) { + rtw89_phy_write32_idx(rtwdev, nbi->notch2_idx.addr, + nbi->notch2_idx.mask, nbi_tone_idx, phy_idx); + rtw89_phy_write32_idx(rtwdev, nbi->notch2_frac_idx.addr, + nbi->notch2_frac_idx.mask, nbi_frac_tone_idx, + phy_idx); + rtw89_phy_write32_idx(rtwdev, nbi->notch2_en.addr, + nbi->notch2_en.mask, 0, phy_idx); + rtw89_phy_write32_idx(rtwdev, nbi->notch2_en.addr, + nbi->notch2_en.mask, 1, phy_idx); + rtw89_phy_write32_idx(rtwdev, nbi->notch1_en.addr, + nbi->notch1_en.mask, 0, phy_idx); + } else { + rtw89_phy_write32_idx(rtwdev, nbi->notch1_idx.addr, + nbi->notch1_idx.mask, nbi_tone_idx, phy_idx); + rtw89_phy_write32_idx(rtwdev, nbi->notch1_frac_idx.addr, + nbi->notch1_frac_idx.mask, nbi_frac_tone_idx, + phy_idx); + rtw89_phy_write32_idx(rtwdev, nbi->notch1_en.addr, + nbi->notch1_en.mask, 0, phy_idx); + rtw89_phy_write32_idx(rtwdev, nbi->notch1_en.addr, + nbi->notch1_en.mask, 1, phy_idx); + rtw89_phy_write32_idx(rtwdev, nbi->notch2_en.addr, + nbi->notch2_en.mask, 0, phy_idx); + } +} + +static void rtw8922d_spur_elimination(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_phy_idx phy_idx) +{ + rtw8922d_set_csi_tone_idx(rtwdev, chan, phy_idx); + rtw8922d_set_nbi_tone_idx(rtwdev, chan, RF_PATH_A, phy_idx); + rtw8922d_set_nbi_tone_idx(rtwdev, chan, RF_PATH_B, phy_idx); +} + +static void rtw8922d_tssi_reset(struct rtw89_dev *rtwdev, + enum rtw89_rf_path path, + enum rtw89_phy_idx phy_idx) +{ + if (rtwdev->mlo_dbcc_mode == MLO_1_PLUS_1_1RF) { + if (phy_idx == RTW89_PHY_0) { + rtw89_phy_write32_mask(rtwdev, R_TXPWR_RSTB0_BE4, + B_TXPWR_RSTB0_BE4, 0x0); + rtw89_phy_write32_mask(rtwdev, R_TXPWR_RSTB0_BE4, + B_TXPWR_RSTB0_BE4, 0x1); + } else { + rtw89_phy_write32_mask(rtwdev, R_TXPWR_RSTB1_BE4, + B_TXPWR_RSTB1_BE4, 0x0); + rtw89_phy_write32_mask(rtwdev, R_TXPWR_RSTB1_BE4, + B_TXPWR_RSTB1_BE4, 0x1); + } + } else { + rtw89_phy_write32_mask(rtwdev, R_TXPWR_RSTB0_BE4, B_TXPWR_RSTB0_BE4, 0x0); + rtw89_phy_write32_mask(rtwdev, R_TXPWR_RSTB0_BE4, B_TXPWR_RSTB0_BE4, 0x1); + rtw89_phy_write32_mask(rtwdev, R_TXPWR_RSTB1_BE4, B_TXPWR_RSTB1_BE4, 0x0); + rtw89_phy_write32_mask(rtwdev, R_TXPWR_RSTB1_BE4, B_TXPWR_RSTB1_BE4, 0x1); + } +} + +static void rtw8922d_set_channel_bb(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_phy_idx phy_idx) +{ + struct rtw89_hal *hal = &rtwdev->hal; + bool cck_en = chan->band_type == RTW89_BAND_2G; + u8 pri_sb = chan->pri_sb_idx; + u32 val; + + rtw89_phy_bb_wrap_set_rfsi_ct_opt(rtwdev, phy_idx); + rtw8922d_ctrl_ch(rtwdev, chan, phy_idx); + rtw8922d_ctrl_bw(rtwdev, pri_sb, chan->band_width, phy_idx); + rtw89_phy_bb_wrap_set_rfsi_bandedge_ch(rtwdev, chan, phy_idx); + + if (cck_en) + rtw8922d_ctrl_sco_cck(rtwdev, chan->primary_channel, + chan->band_width, phy_idx); + + rtw8922d_spur_elimination(rtwdev, chan, phy_idx); + + if (hal->cid == RTL8922D_CID7025) { + if (chan->band_width == RTW89_CHANNEL_WIDTH_160) + val = 0x1f9; + else if (chan->band_width == RTW89_CHANNEL_WIDTH_80) + val = 0x1f5; + else + val = 0x1e2; + + rtw89_phy_write32_idx(rtwdev, R_AWGN_DET_BE4, B_AWGN_DET_BE4, val, phy_idx); + } + + rtw8922d_tssi_reset(rtwdev, RF_PATH_AB, phy_idx); +} + MODULE_FIRMWARE(RTW8922D_MODULE_FIRMWARE); MODULE_FIRMWARE(RTW8922DS_MODULE_FIRMWARE); MODULE_AUTHOR("Realtek Corporation"); From 1b622535a577ab24e07de8bb68bd5dc20b50ccf0 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Tue, 24 Mar 2026 14:20:49 +0800 Subject: [PATCH 1026/1561] wifi: rtw89: 8922d: add set channel of RF part The set channel of RF part is to configure channel and bandwidth on a register. The function to encode channel and bandwidth into register value will be implemented by coming patch. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260324062049.52266-8-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/rtw8922d.c | 1 + .../net/wireless/realtek/rtw89/rtw8922d_rfk.c | 33 +++++++++++++++++++ .../net/wireless/realtek/rtw89/rtw8922d_rfk.h | 14 ++++++++ 3 files changed, 48 insertions(+) create mode 100644 drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.c create mode 100644 drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.h diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d.c b/drivers/net/wireless/realtek/rtw89/rtw8922d.c index cae92e2abd85..1b5fc6c9ea85 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922d.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d.c @@ -8,6 +8,7 @@ #include "phy.h" #include "reg.h" #include "rtw8922d.h" +#include "rtw8922d_rfk.h" #include "util.h" #define RTW8922D_FW_FORMAT_MAX 0 diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.c b/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.c new file mode 100644 index 000000000000..6b35d196cb81 --- /dev/null +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright(c) 2026 Realtek Corporation + */ + +#include "phy.h" +#include "reg.h" +#include "rtw8922d.h" +#include "rtw8922d_rfk.h" + +static +void rtw8922d_ctl_band_ch_bw(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy, + const struct rtw89_chan *chan) +{ + u8 synpath; + u32 rf18; + + synpath = rtw89_phy_get_syn_sel(rtwdev, phy); + rf18 = rtw89_chip_chan_to_rf18_val(rtwdev, chan); + + rtw89_write_rf(rtwdev, synpath, RR_RSV1, RFREG_MASK, 0x0); + rtw89_write_rf(rtwdev, synpath, RR_MOD, RFREG_MASK, 0x30000); + rtw89_write_rf(rtwdev, synpath, RR_CFGCH, RFREG_MASK, rf18); + fsleep(400); + rtw89_write_rf(rtwdev, synpath, RR_RSV1, RFREG_MASK, 0x1); + rtw89_write_rf(rtwdev, synpath, RR_CFGCH_V1, RFREG_MASK, rf18); +} + +void rtw8922d_set_channel_rf(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_phy_idx phy_idx) +{ + rtw8922d_ctl_band_ch_bw(rtwdev, phy_idx, chan); +} diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.h b/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.h new file mode 100644 index 000000000000..03af1f0497ac --- /dev/null +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* Copyright(c) 2026 Realtek Corporation + */ + +#ifndef __RTW89_8922D_RFK_H__ +#define __RTW89_8922D_RFK_H__ + +#include "core.h" + +void rtw8922d_set_channel_rf(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_phy_idx phy_idx); + +#endif From 86a4c63c01e4c0ca4f47952e371c7219c09bfc9e Mon Sep 17 00:00:00 2001 From: Chin-Yen Lee Date: Wed, 25 Mar 2026 15:21:23 +0800 Subject: [PATCH 1027/1561] wifi: rtw89: wow: use struct style to fill WOW wakeup control H2C command The WOW wakeup control command is used to tell firmware the content of wakeup feature. Use struct instead of macros to fill the data. Signed-off-by: Chin-Yen Lee Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260325072130.41751-2-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/fw.c | 21 ++++++---- drivers/net/wireless/realtek/rtw89/fw.h | 56 ++++++------------------- 2 files changed, 26 insertions(+), 51 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index 45d8c5e70084..701cea9a771e 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -9705,38 +9705,43 @@ fail: return ret; } -#define H2C_WAKEUP_CTRL_LEN 4 int rtw89_fw_h2c_wow_wakeup_ctrl(struct rtw89_dev *rtwdev, struct rtw89_vif_link *rtwvif_link, bool enable) { struct rtw89_wow_param *rtw_wow = &rtwdev->wow; + struct rtw89_h2c_wow_wakeup_ctrl *h2c; struct sk_buff *skb; + u32 len = sizeof(*h2c); u8 macid = rtwvif_link->mac_id; int ret; - skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, H2C_WAKEUP_CTRL_LEN); + skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len); if (!skb) { rtw89_err(rtwdev, "failed to alloc skb for wakeup ctrl\n"); return -ENOMEM; } - skb_put(skb, H2C_WAKEUP_CTRL_LEN); + skb_put(skb, len); + h2c = (struct rtw89_h2c_wow_wakeup_ctrl *)skb->data; if (rtw_wow->pattern_cnt) - RTW89_SET_WOW_WAKEUP_CTRL_PATTERN_MATCH_ENABLE(skb->data, enable); + h2c->w0 |= le32_encode_bits(enable, + RTW89_H2C_WOW_WAKEUP_CTRL_W0_PATTERN_MATCH_ENABLE); if (test_bit(RTW89_WOW_FLAG_EN_MAGIC_PKT, rtw_wow->flags)) - RTW89_SET_WOW_WAKEUP_CTRL_MAGIC_ENABLE(skb->data, enable); + h2c->w0 |= le32_encode_bits(enable, + RTW89_H2C_WOW_WAKEUP_CTRL_W0_MAGIC_ENABLE); if (test_bit(RTW89_WOW_FLAG_EN_DISCONNECT, rtw_wow->flags)) - RTW89_SET_WOW_WAKEUP_CTRL_DEAUTH_ENABLE(skb->data, enable); + h2c->w0 |= le32_encode_bits(enable, + RTW89_H2C_WOW_WAKEUP_CTRL_W0_DEAUTH_ENABLE); - RTW89_SET_WOW_WAKEUP_CTRL_MAC_ID(skb->data, macid); + h2c->w0 |= le32_encode_bits(macid, RTW89_H2C_WOW_WAKEUP_CTRL_W0_MAC_ID); rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C, H2C_CAT_MAC, H2C_CL_MAC_WOW, H2C_FUNC_WAKEUP_CTRL, 0, 1, - H2C_WAKEUP_CTRL_LEN); + len); ret = rtw89_h2c_tx(rtwdev, skb, false); if (ret) { diff --git a/drivers/net/wireless/realtek/rtw89/fw.h b/drivers/net/wireless/realtek/rtw89/fw.h index 57e3b464c0a2..cf86fade84a2 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.h +++ b/drivers/net/wireless/realtek/rtw89/fw.h @@ -2219,50 +2219,20 @@ struct rtw89_h2c_cfg_nlo { #define RTW89_H2C_NLO_W0_IGNORE_CIPHER BIT(2) #define RTW89_H2C_NLO_W0_MACID GENMASK(31, 24) -static inline void RTW89_SET_WOW_WAKEUP_CTRL_PATTERN_MATCH_ENABLE(void *h2c, u32 val) -{ - le32p_replace_bits((__le32 *)h2c, val, BIT(0)); -} +struct rtw89_h2c_wow_wakeup_ctrl { + __le32 w0; +} __packed; -static inline void RTW89_SET_WOW_WAKEUP_CTRL_MAGIC_ENABLE(void *h2c, u32 val) -{ - le32p_replace_bits((__le32 *)h2c, val, BIT(1)); -} - -static inline void RTW89_SET_WOW_WAKEUP_CTRL_HW_UNICAST_ENABLE(void *h2c, u32 val) -{ - le32p_replace_bits((__le32 *)h2c, val, BIT(2)); -} - -static inline void RTW89_SET_WOW_WAKEUP_CTRL_FW_UNICAST_ENABLE(void *h2c, u32 val) -{ - le32p_replace_bits((__le32 *)h2c, val, BIT(3)); -} - -static inline void RTW89_SET_WOW_WAKEUP_CTRL_DEAUTH_ENABLE(void *h2c, u32 val) -{ - le32p_replace_bits((__le32 *)h2c, val, BIT(4)); -} - -static inline void RTW89_SET_WOW_WAKEUP_CTRL_REKEYP_ENABLE(void *h2c, u32 val) -{ - le32p_replace_bits((__le32 *)h2c, val, BIT(5)); -} - -static inline void RTW89_SET_WOW_WAKEUP_CTRL_EAP_ENABLE(void *h2c, u32 val) -{ - le32p_replace_bits((__le32 *)h2c, val, BIT(6)); -} - -static inline void RTW89_SET_WOW_WAKEUP_CTRL_ALL_DATA_ENABLE(void *h2c, u32 val) -{ - le32p_replace_bits((__le32 *)h2c, val, BIT(7)); -} - -static inline void RTW89_SET_WOW_WAKEUP_CTRL_MAC_ID(void *h2c, u32 val) -{ - le32p_replace_bits((__le32 *)h2c, val, GENMASK(31, 24)); -} +#define RTW89_H2C_WOW_WAKEUP_CTRL_W0_PATTERN_MATCH_ENABLE BIT(0) +#define RTW89_H2C_WOW_WAKEUP_CTRL_W0_MAGIC_ENABLE BIT(1) +#define RTW89_H2C_WOW_WAKEUP_CTRL_W0_HW_UNICAST_ENABLE BIT(2) +#define RTW89_H2C_WOW_WAKEUP_CTRL_W0_FW_UNICAST_ENABLE BIT(3) +#define RTW89_H2C_WOW_WAKEUP_CTRL_W0_DEAUTH_ENABLE BIT(4) +#define RTW89_H2C_WOW_WAKEUP_CTRL_W0_REKEYP_ENABLE BIT(5) +#define RTW89_H2C_WOW_WAKEUP_CTRL_W0_EAP_ENABLE BIT(6) +#define RTW89_H2C_WOW_WAKEUP_CTRL_W0_ALL_DATA_ENABLE BIT(7) +#define RTW89_H2C_WOW_WAKEUP_CTRL_W0_MAC_ID_EXT GENMASK(23, 16) +#define RTW89_H2C_WOW_WAKEUP_CTRL_W0_MAC_ID GENMASK(31, 24) struct rtw89_h2c_wow_cam_update { __le32 w0; From 4e7a7f57718eaeb07be7d7fcec800e88338fb1ad Mon Sep 17 00:00:00 2001 From: Chin-Yen Lee Date: Wed, 25 Mar 2026 15:21:24 +0800 Subject: [PATCH 1028/1561] wifi: rtw89: wow: enable MLD address for Magic packet wakeup Under MLO connections, the original Magic Packet configuration only supported Link Addresses for wakeup. Update the setting to support both MLD Address and Link Addresses for wakeup process. Signed-off-by: Chin-Yen Lee Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260325072130.41751-3-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/fw.c | 8 +++++++- drivers/net/wireless/realtek/rtw89/fw.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index 701cea9a771e..13391ec9627c 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -9709,6 +9709,7 @@ int rtw89_fw_h2c_wow_wakeup_ctrl(struct rtw89_dev *rtwdev, struct rtw89_vif_link *rtwvif_link, bool enable) { + struct ieee80211_vif *vif = rtwvif_link_to_vif(rtwvif_link); struct rtw89_wow_param *rtw_wow = &rtwdev->wow; struct rtw89_h2c_wow_wakeup_ctrl *h2c; struct sk_buff *skb; @@ -9728,9 +9729,14 @@ int rtw89_fw_h2c_wow_wakeup_ctrl(struct rtw89_dev *rtwdev, if (rtw_wow->pattern_cnt) h2c->w0 |= le32_encode_bits(enable, RTW89_H2C_WOW_WAKEUP_CTRL_W0_PATTERN_MATCH_ENABLE); - if (test_bit(RTW89_WOW_FLAG_EN_MAGIC_PKT, rtw_wow->flags)) + if (test_bit(RTW89_WOW_FLAG_EN_MAGIC_PKT, rtw_wow->flags)) { h2c->w0 |= le32_encode_bits(enable, RTW89_H2C_WOW_WAKEUP_CTRL_W0_MAGIC_ENABLE); + if (ieee80211_vif_is_mld(vif)) + h2c->w0 |= le32_encode_bits(enable, + RTW89_H2C_WOW_WAKEUP_CTRL_W0_MAGIC_MLD_ENABLE); + } + if (test_bit(RTW89_WOW_FLAG_EN_DISCONNECT, rtw_wow->flags)) h2c->w0 |= le32_encode_bits(enable, RTW89_H2C_WOW_WAKEUP_CTRL_W0_DEAUTH_ENABLE); diff --git a/drivers/net/wireless/realtek/rtw89/fw.h b/drivers/net/wireless/realtek/rtw89/fw.h index cf86fade84a2..4574a281d352 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.h +++ b/drivers/net/wireless/realtek/rtw89/fw.h @@ -2231,6 +2231,7 @@ struct rtw89_h2c_wow_wakeup_ctrl { #define RTW89_H2C_WOW_WAKEUP_CTRL_W0_REKEYP_ENABLE BIT(5) #define RTW89_H2C_WOW_WAKEUP_CTRL_W0_EAP_ENABLE BIT(6) #define RTW89_H2C_WOW_WAKEUP_CTRL_W0_ALL_DATA_ENABLE BIT(7) +#define RTW89_H2C_WOW_WAKEUP_CTRL_W0_MAGIC_MLD_ENABLE BIT(8) #define RTW89_H2C_WOW_WAKEUP_CTRL_W0_MAC_ID_EXT GENMASK(23, 16) #define RTW89_H2C_WOW_WAKEUP_CTRL_W0_MAC_ID GENMASK(31, 24) From bdc607a67edfb35a671eb3405ce5a750a5ef28b2 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Wed, 25 Mar 2026 15:21:25 +0800 Subject: [PATCH 1029/1561] wifi: rtw89: pci: clear SER ISR when initial and leaving WoWLAN for WiFi 7 chips The PCIE SER is to diagnose PCIE becomes abnormal, relying on IMR settings to trigger interrupt when status is weird. Update settings to disable PHY error flag 9, and clear ISR when initial and leaving WoWLAN. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260325072130.41751-4-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/pci.h | 3 ++ drivers/net/wireless/realtek/rtw89/pci_be.c | 52 +++++++++++++++++---- 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/pci.h b/drivers/net/wireless/realtek/rtw89/pci.h index dc488d9a8884..c7cd34e99349 100644 --- a/drivers/net/wireless/realtek/rtw89/pci.h +++ b/drivers/net/wireless/realtek/rtw89/pci.h @@ -55,6 +55,8 @@ #define B_AX_CALIB_EN BIT(13) #define B_AX_DIV GENMASK(15, 14) #define RAC_SET_PPR_V1 0x31 +#define RAC_ANA40 0x40 +#define PHY_ERR_IMR_DIS (BIT(9) | BIT(0)) #define RAC_ANA41 0x41 #define PHY_ERR_FLAG_EN BIT(6) @@ -1029,6 +1031,7 @@ #define B_BE_SER_PMU_IMR BIT(0) #define R_BE_REG_PL1_ISR 0x34B4 +#define B_PCIE_SER_ALL_ISR 0x7F #define R_BE_RX_APPEND_MODE 0x8920 #define B_BE_APPEND_OFFSET_MASK GENMASK(23, 16) diff --git a/drivers/net/wireless/realtek/rtw89/pci_be.c b/drivers/net/wireless/realtek/rtw89/pci_be.c index dea01d9e57fd..dfffec1ff3c7 100644 --- a/drivers/net/wireless/realtek/rtw89/pci_be.c +++ b/drivers/net/wireless/realtek/rtw89/pci_be.c @@ -351,14 +351,41 @@ static void rtw89_pci_ser_setting_be(struct rtw89_dev *rtwdev) return; rtw89_write32(rtwdev, R_BE_PL1_DBG_INFO, 0x0); - rtw89_write32_set(rtwdev, R_BE_FWS1IMR, B_BE_PCIE_SER_TIMEOUT_INDIC_EN); - rtw89_write32_set(rtwdev, R_BE_SER_PL1_CTRL, B_BE_PL1_SER_PL1_EN); - rtw89_write32_mask(rtwdev, R_BE_SER_PL1_CTRL, B_BE_PL1_TIMER_UNIT_MASK, 1); - val32 = rtw89_read32(rtwdev, R_BE_REG_PL1_MASK); - val32 |= B_BE_SER_PMU_IMR | B_BE_SER_L1SUB_IMR | B_BE_SER_PM_MASTER_IMR | - B_BE_SER_LTSSM_IMR | B_BE_SER_PM_CLK_MASK | B_BE_SER_PCLKREQ_ACK_MASK; - rtw89_write32(rtwdev, R_BE_REG_PL1_MASK, val32); + switch (hal->cv) { + case CHIP_CAV: + case CHIP_CBV: + rtw89_write32_clr(rtwdev, R_BE_SER_PL1_CTRL, B_BE_PL1_SER_PL1_EN); + rtw89_write32_mask(rtwdev, R_BE_SER_PL1_CTRL, + B_BE_PL1_TIMER_UNIT_MASK, PCIE_SER_TIMER_UNIT); + + val32 = rtw89_read32(rtwdev, R_BE_REG_PL1_MASK); + val32 &= ~(B_BE_SER_PMU_IMR | B_BE_SER_L1SUB_IMR | + B_BE_SER_PM_MASTER_IMR | B_BE_SER_LTSSM_IMR | + B_BE_SER_PM_CLK_MASK | B_BE_SER_PCLKREQ_ACK_MASK); + rtw89_write32(rtwdev, R_BE_REG_PL1_MASK, val32); + break; + case CHIP_CCV: + default: + rtw89_write32_clr(rtwdev, R_BE_SER_PL1_CTRL, B_BE_PL1_SER_PL1_EN); + + ret = read_poll_timeout_atomic(rtw89_read32, val32, !val32, + 1, 1000, false, rtwdev, R_BE_REG_PL1_ISR); + if (ret) + rtw89_warn(rtwdev, "[ERR] PCIE SER clear poll fail\n"); + + rtw89_write32_mask(rtwdev, R_BE_SER_PL1_CTRL, + B_BE_PL1_TIMER_UNIT_MASK, PCIE_SER_TIMER_UNIT); + rtw89_write32_set(rtwdev, R_BE_SER_PL1_CTRL, B_BE_PL1_SER_PL1_EN); + + val32 = rtw89_read32(rtwdev, R_BE_REG_PL1_MASK); + val32 |= (B_BE_SER_PMU_IMR | B_BE_SER_PM_MASTER_IMR | + B_BE_SER_LTSSM_IMR | B_BE_SER_PM_CLK_MASK | + B_BE_SER_PCLKREQ_ACK_MASK); + val32 &= ~B_BE_SER_L1SUB_IMR; + rtw89_write32(rtwdev, R_BE_REG_PL1_MASK, val32); + break; + } return; @@ -366,6 +393,11 @@ be2_chips: rtw89_write32_clr(rtwdev, R_BE_PCIE_SER_DBG, B_BE_PCIE_SER_FLUSH_RSTB); rtw89_write32_set(rtwdev, R_BE_PCIE_SER_DBG, B_BE_PCIE_SER_FLUSH_RSTB); + rtw89_write16_clr(rtwdev, RAC_DIRECT_OFFESET_L0_G1 + + RAC_ANA40 * RAC_MULT, PHY_ERR_IMR_DIS); + rtw89_write16_clr(rtwdev, RAC_DIRECT_OFFESET_L0_G2 + + RAC_ANA40 * RAC_MULT, PHY_ERR_IMR_DIS); + rtw89_write16_clr(rtwdev, RAC_DIRECT_OFFESET_L0_G1 + RAC_ANA41 * RAC_MULT, PHY_ERR_FLAG_EN); rtw89_write16_clr(rtwdev, RAC_DIRECT_OFFESET_L0_G2 + @@ -378,6 +410,7 @@ be2_chips: val32 = rtw89_read32(rtwdev, R_BE_SER_PL1_CTRL); val32 &= ~B_BE_PL1_SER_PL1_EN; rtw89_write32(rtwdev, R_BE_SER_PL1_CTRL, val32); + rtw89_write32(rtwdev, R_BE_REG_PL1_ISR, B_PCIE_SER_ALL_ISR); ret = read_poll_timeout_atomic(rtw89_read32, val32, !val32, 1, 1000, false, rtwdev, R_BE_REG_PL1_ISR); @@ -385,9 +418,10 @@ be2_chips: rtw89_warn(rtwdev, "[ERR] PCIE SER clear poll fail\n"); val32 = rtw89_read32(rtwdev, R_BE_REG_PL1_MASK); - val32 |= B_BE_SER_PMU_IMR | B_BE_SER_L1SUB_IMR | B_BE_SER_PM_MASTER_IMR | + val32 |= B_BE_SER_PMU_IMR | B_BE_SER_PM_MASTER_IMR | B_BE_SER_LTSSM_IMR | B_BE_SER_PM_CLK_MASK | B_BE_SER_PCLKREQ_ACK_MASK | B_BE_SER_LTSSM_UNSTABLE_MASK; + val32 &= ~B_BE_SER_L1SUB_IMR; rtw89_write32(rtwdev, R_BE_REG_PL1_MASK, val32); rtw89_write32_mask(rtwdev, R_BE_SER_PL1_CTRL, B_BE_PL1_TIMER_UNIT_MASK, @@ -761,6 +795,8 @@ static int __maybe_unused rtw89_pci_resume_be(struct device *dev) goto clear_phy_isr; rtw89_write32_clr(rtwdev, R_BE_SER_PL1_CTRL, B_BE_PL1_SER_PL1_EN); + if (rtwdev->chip->chip_id == RTL8922D) + rtw89_write32(rtwdev, R_BE_REG_PL1_ISR, B_PCIE_SER_ALL_ISR); ret = read_poll_timeout_atomic(rtw89_read32, polling, !polling, 1, 1000, false, rtwdev, R_BE_REG_PL1_ISR); From ba42bb2813aa9574d67ba0f4612ff0acba972d67 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Wed, 25 Mar 2026 15:21:26 +0800 Subject: [PATCH 1030/1561] wifi: rtw89: mac: add specific case to dump mac memory for RTL8922D The RTL8922D can reuse most mac memory addresses, but only RTW89_MAC_MEM_SECURITY_CAM is different from existing one. Add a function to return the specific memory address for RTL8922D. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260325072130.41751-5-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/debug.c | 2 +- drivers/net/wireless/realtek/rtw89/mac.c | 4 ++-- drivers/net/wireless/realtek/rtw89/mac.h | 13 +++++++++++++ drivers/net/wireless/realtek/rtw89/ser.c | 2 +- 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/debug.c b/drivers/net/wireless/realtek/rtw89/debug.c index 82849d109cc3..7d8d22311018 100644 --- a/drivers/net/wireless/realtek/rtw89/debug.c +++ b/drivers/net/wireless/realtek/rtw89/debug.c @@ -1129,7 +1129,7 @@ static int rtw89_debug_dump_mac_mem(struct rtw89_dev *rtwdev, pages = len / mem_page_size + 1; start_page = start_addr / mem_page_size; residue = start_addr % mem_page_size; - base_addr = mac->mem_base_addrs[sel]; + base_addr = rtw89_mac_mem_base_addrs(rtwdev, sel); base_addr += start_page * mem_page_size; for (pp = 0; pp < pages; pp++) { diff --git a/drivers/net/wireless/realtek/rtw89/mac.c b/drivers/net/wireless/realtek/rtw89/mac.c index 35fd18fe6470..54aad37485d6 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.c +++ b/drivers/net/wireless/realtek/rtw89/mac.c @@ -43,7 +43,7 @@ static void rtw89_mac_mem_write(struct rtw89_dev *rtwdev, u32 offset, u32 val, enum rtw89_mac_mem_sel sel) { const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def; - u32 addr = mac->mem_base_addrs[sel] + offset; + u32 addr = rtw89_mac_mem_base_addrs(rtwdev, sel) + offset; rtw89_write32(rtwdev, mac->filter_model_addr, addr); rtw89_write32(rtwdev, mac->indir_access_addr, val); @@ -53,7 +53,7 @@ static u32 rtw89_mac_mem_read(struct rtw89_dev *rtwdev, u32 offset, enum rtw89_mac_mem_sel sel) { const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def; - u32 addr = mac->mem_base_addrs[sel] + offset; + u32 addr = rtw89_mac_mem_base_addrs(rtwdev, sel) + offset; rtw89_write32(rtwdev, mac->filter_model_addr, addr); return rtw89_read32(rtwdev, mac->indir_access_addr); diff --git a/drivers/net/wireless/realtek/rtw89/mac.h b/drivers/net/wireless/realtek/rtw89/mac.h index 88a877556cb3..93bedf056f17 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.h +++ b/drivers/net/wireless/realtek/rtw89/mac.h @@ -334,6 +334,7 @@ enum rtw89_mac_dbg_port_sel { #define NAT25_CAM_BASE_ADDR_BE 0x18820000 #define RXPLD_FLTR_CAM_BASE_ADDR_BE 0x18823000 #define SEC_CAM_BASE_ADDR_BE 0x18824000 +#define SEC_CAM_BASE_ADDR_BE_8922D 0x1882C000 #define WOW_CAM_BASE_ADDR_BE 0x18828000 #define MLD_TBL_BASE_ADDR_BE 0x18829000 #define RX_CLSF_CAM_BASE_ADDR_BE 0x1882A000 @@ -1131,6 +1132,18 @@ struct rtw89_mac_gen_def { extern const struct rtw89_mac_gen_def rtw89_mac_gen_ax; extern const struct rtw89_mac_gen_def rtw89_mac_gen_be; +static inline +u32 rtw89_mac_mem_base_addrs(struct rtw89_dev *rtwdev, u8 sel) +{ + const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def; + + if (rtwdev->chip->chip_id == RTL8922D && + sel == RTW89_MAC_MEM_SECURITY_CAM) + return SEC_CAM_BASE_ADDR_BE_8922D; + + return mac->mem_base_addrs[sel]; +} + static inline u32 rtw89_mac_reg_by_idx(struct rtw89_dev *rtwdev, u32 reg_base, u8 band) { diff --git a/drivers/net/wireless/realtek/rtw89/ser.c b/drivers/net/wireless/realtek/rtw89/ser.c index 75220042a9a7..a507ce1fcd63 100644 --- a/drivers/net/wireless/realtek/rtw89/ser.c +++ b/drivers/net/wireless/realtek/rtw89/ser.c @@ -589,7 +589,7 @@ static void ser_mac_mem_dump(struct rtw89_dev *rtwdev, u8 *buf, start_page = start_addr / mem_page_size; residue = start_addr % mem_page_size; - base_addr = mac->mem_base_addrs[sel]; + base_addr = rtw89_mac_mem_base_addrs(rtwdev, sel); base_addr += start_page * mem_page_size; while (cnt < len) { From ce945fb377ce8620272536d734c99064de91dfd0 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Wed, 25 Mar 2026 15:21:27 +0800 Subject: [PATCH 1031/1561] wifi: rtw89: mac: disable pre-load function for RTL8922DE The pre-load function is a MAC function to pre-load TX packets into WiFi device's memory, so it can enhance performance. However, RTL8922DE has not fully verified and fine tune this function, temporarily disable this function. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260325072130.41751-6-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/mac.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/mac.h b/drivers/net/wireless/realtek/rtw89/mac.h index 93bedf056f17..9db9ae219cd6 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.h +++ b/drivers/net/wireless/realtek/rtw89/mac.h @@ -1828,8 +1828,7 @@ static inline bool rtw89_mac_chk_preload_allow(struct rtw89_dev *rtwdev) if (rtwdev->hci.type != RTW89_HCI_TYPE_PCIE) return false; - if (rtwdev->chip->chip_id == RTL8922D && rtwdev->hal.cid == RTL8922D_CID7090) - return true; + /* The RTL8922DE will re-enable pre-load function after verification. */ return false; } From e0b88e052e4c6ca5789c61cef00a90caf08cfc08 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Wed, 25 Mar 2026 15:21:28 +0800 Subject: [PATCH 1032/1561] wifi: rtw89: phy: expand PHY page for RTL8922D PHY page range is to define offset from PHY0 to PHY1, and RTL8922D needs to expand page to 0x2E0. Signed-off-by: Eric Huang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260325072130.41751-7-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/phy_be.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw89/phy_be.c b/drivers/net/wireless/realtek/rtw89/phy_be.c index 08fd24a55d85..929fac1b10d2 100644 --- a/drivers/net/wireless/realtek/rtw89/phy_be.c +++ b/drivers/net/wireless/realtek/rtw89/phy_be.c @@ -199,7 +199,7 @@ static u32 rtw89_phy0_phy1_offset_be_v1(struct rtw89_dev *rtwdev, u32 addr) (phy_page >= 0x240 && phy_page <= 0x24f) || (phy_page >= 0x260 && phy_page <= 0x26f) || (phy_page >= 0x2C0 && phy_page <= 0x2C9) || - (phy_page >= 0x2E4 && phy_page <= 0x2E8) || + (phy_page >= 0x2E0 && phy_page <= 0x2E8) || phy_page == 0x2EE) ofst = 0x1000; else From 9c52ad439e6c50dc54175f180d177f9be73f3e98 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Wed, 25 Mar 2026 15:21:29 +0800 Subject: [PATCH 1033/1561] wifi: rtw89: phy: load RF parameters relying on ACV for RTL8922D RF parameters are conditional formats with RFE type and CV as arguments, but RTL8922D has many variants and use ACV as argument instead of CV. Add to select proper register values. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260325072130.41751-8-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/phy.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/phy.c b/drivers/net/wireless/realtek/rtw89/phy.c index d205561f1f5c..e70d0e283987 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.c +++ b/drivers/net/wireless/realtek/rtw89/phy.c @@ -1808,7 +1808,7 @@ static int rtw89_phy_sel_headline(struct rtw89_dev *rtwdev, } static void rtw89_phy_init_reg(struct rtw89_dev *rtwdev, - const struct rtw89_phy_table *table, + const struct rtw89_phy_table *table, bool by_acv, void (*config)(struct rtw89_dev *rtwdev, const struct rtw89_reg2_def *reg, enum rtw89_rf_path rf_path, @@ -1817,8 +1817,8 @@ static void rtw89_phy_init_reg(struct rtw89_dev *rtwdev, { const struct rtw89_reg2_def *reg; enum rtw89_rf_path rf_path = table->rf_path; + u8 cv = by_acv ? rtwdev->hal.acv : rtwdev->hal.cv; u8 rfe = rtwdev->efuse.rfe_type; - u8 cv = rtwdev->hal.cv; u32 i; u32 headline_size = 0, headline_idx = 0; u32 target = 0, cfg_target; @@ -1885,16 +1885,16 @@ void rtw89_phy_init_bb_reg(struct rtw89_dev *rtwdev) const struct rtw89_phy_table *bb_gain_table; bb_table = elm_info->bb_tbl ? elm_info->bb_tbl : chip->bb_table; - rtw89_phy_init_reg(rtwdev, bb_table, rtw89_phy_config_bb_reg, NULL); + rtw89_phy_init_reg(rtwdev, bb_table, false, rtw89_phy_config_bb_reg, NULL); if (rtwdev->dbcc_en) - rtw89_phy_init_reg(rtwdev, bb_table, rtw89_phy_config_bb_reg, + rtw89_phy_init_reg(rtwdev, bb_table, false, rtw89_phy_config_bb_reg, (void *)RTW89_PHY_1); rtw89_chip_init_txpwr_unit(rtwdev); bb_gain_table = elm_info->bb_gain ? elm_info->bb_gain : chip->bb_gain_table; if (bb_gain_table) - rtw89_phy_init_reg(rtwdev, bb_gain_table, + rtw89_phy_init_reg(rtwdev, bb_gain_table, false, chip->phy_def->config_bb_gain, NULL); rtw89_phy_bb_reset(rtwdev); @@ -2000,6 +2000,7 @@ void rtw89_phy_init_rf_reg(struct rtw89_dev *rtwdev, bool noio) const struct rtw89_chip_info *chip = rtwdev->chip; const struct rtw89_phy_table *rf_table; struct rtw89_fw_h2c_rf_reg_info *rf_reg_info; + bool by_acv = chip->chip_id == RTL8922D; u8 path; rf_reg_info = kzalloc_obj(*rf_reg_info); @@ -2015,7 +2016,7 @@ void rtw89_phy_init_rf_reg(struct rtw89_dev *rtwdev, bool noio) else config = rf_table->config ? rf_table->config : rtw89_phy_config_rf_reg; - rtw89_phy_init_reg(rtwdev, rf_table, config, (void *)rf_reg_info); + rtw89_phy_init_reg(rtwdev, rf_table, by_acv, config, (void *)rf_reg_info); if (rtw89_phy_config_rf_reg_fw(rtwdev, rf_reg_info)) rtw89_warn(rtwdev, "rf path %d reg h2c config failed\n", rf_reg_info->rf_path); @@ -2056,7 +2057,7 @@ static void rtw89_phy_init_rf_nctl(struct rtw89_dev *rtwdev) rtw89_phy_preinit_rf_nctl(rtwdev); nctl_table = elm_info->rf_nctl ? elm_info->rf_nctl : chip->nctl_table; - rtw89_phy_init_reg(rtwdev, nctl_table, rtw89_phy_config_bb_reg, NULL); + rtw89_phy_init_reg(rtwdev, nctl_table, false, rtw89_phy_config_bb_reg, NULL); if (chip->nctl_post_table) rtw89_rfk_parser(rtwdev, chip->nctl_post_table); From e0da9859cfbc9a8cf9a879b00931b89f590de0c1 Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Wed, 25 Mar 2026 15:21:30 +0800 Subject: [PATCH 1034/1561] wifi: rtw89: fw: load TX power elements according to AID For different A-die, there will be different TX power parameters. In FW element header, the corresponding A-die ID will be described. So, compare runtime AID with that to load the target TX power parameters. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260325072130.41751-9-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/fw.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index 13391ec9627c..52db41c67265 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -1153,8 +1153,13 @@ int rtw89_fw_recognize_txpwr_from_elm(struct rtw89_dev *rtwdev, const struct __rtw89_fw_txpwr_element *txpwr_elm = &elm->u.txpwr; const unsigned long offset = arg.offset; struct rtw89_efuse *efuse = &rtwdev->efuse; + struct rtw89_hal *hal = &rtwdev->hal; + u16 aid = le16_to_cpu(elm->aid); struct rtw89_txpwr_conf *conf; + if (aid && aid != hal->aid) + return 1; + if (!rtwdev->rfe_data) { rtwdev->rfe_data = kzalloc_obj(*rtwdev->rfe_data); if (!rtwdev->rfe_data) From 658e3c836969e1624a7572c75684f54ec503c2ed Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Thu, 26 Mar 2026 00:27:15 +0200 Subject: [PATCH 1035/1561] wifi: rtw88: coex: Ignore BT info byte 5 from RTL8821A Sometimes while watching a Youtube video with Bluetooth headphones the audio has a lot of interruptions, because the 5th byte of the BT info sent by RTL8821AU has strange values, which result in coex_stat->bt_hid_pair_num being 2 or 3. When this happens rtw_coex_freerun_check() returns true, which causes rtw_coex_action_wl_connected() to call rtw_coex_action_freerun() instead of rtw_coex_action_bt_a2dp(). The RTL8821AU vendor driver doesn't do anything with the 5th byte of the BT info, so ignore it here as well. Signed-off-by: Bitterblue Smith Acked-by: Ping-Ke Shih Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/bbf06c83-d2ee-4205-8fbb-829e2347586f@gmail.com --- drivers/net/wireless/realtek/rtw88/coex.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw88/coex.c b/drivers/net/wireless/realtek/rtw88/coex.c index ee4007fe6c18..37c336def419 100644 --- a/drivers/net/wireless/realtek/rtw88/coex.c +++ b/drivers/net/wireless/realtek/rtw88/coex.c @@ -3109,6 +3109,9 @@ void rtw_coex_bt_info_notify(struct rtw_dev *rtwdev, u8 *buf, u8 length) for (i = 0; i < COEX_BTINFO_LENGTH; i++) coex_stat->bt_info_c2h[rsp_source][i] = buf[i]; + if (rtwdev->chip->id == RTW_CHIP_TYPE_8821A) + coex_stat->bt_info_c2h[rsp_source][5] = 0; + /* get the same info from bt, skip it */ if (coex_stat->bt_info_c2h[rsp_source][1] == coex_stat->bt_info_lb2 && coex_stat->bt_info_c2h[rsp_source][2] == coex_stat->bt_info_lb3 && From 699f47e616fed11d5074e7bbee2f4f920028c4a2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 29 Mar 2026 11:04:27 -0700 Subject: [PATCH 1036/1561] net: Clear the dst when performing encap / decap Commit ba9db6f907ac ("net: clear the dst when changing skb protocol") added dst clearing when a BPF program changes the skb protocol (e.g. IPv4 to IPv6). Since that was a fix we only cleared the dst when the L3 protocol actually changes to keep it minimal. As suggested during the discussion (see Link) encap or decap operation which wraps or unwraps a same-protocol header may also render the existing dst incorrect - even if that doesn't result in a crash, just the wrong route for the now-outermost IP dst. Make dropping dst unconditional for bpf_skb_change_proto() and all L3 encap / decap ops. Signed-off-by: Jakub Kicinski Signed-off-by: Martin KaFai Lau Reviewed-by: Willem de Bruijn Acked-by: Daniel Borkmann Link: https://lore.kernel.org/CANP3RGfRaYwve_xgxH6Tp2zenzKn2-DjZ9tg023WVzfdJF3p_w@mail.gmail.com Link: https://patch.msgid.link/20260329180428.2657785-1-kuba@kernel.org --- net/core/filter.c | 50 +++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index d55525cc5540..cf2113af4bc9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3256,13 +3256,6 @@ static const struct bpf_func_proto bpf_skb_vlan_pop_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -static void bpf_skb_change_protocol(struct sk_buff *skb, u16 proto) -{ - skb->protocol = htons(proto); - if (skb_valid_dst(skb)) - skb_dst_drop(skb); -} - static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) { /* Caller already did skb_cow() with meta_len+len as headroom, @@ -3361,7 +3354,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) shinfo->gso_type |= SKB_GSO_DODGY; } - bpf_skb_change_protocol(skb, ETH_P_IPV6); + skb->protocol = htons(ETH_P_IPV6); skb_clear_hash(skb); return 0; @@ -3392,7 +3385,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) shinfo->gso_type |= SKB_GSO_DODGY; } - bpf_skb_change_protocol(skb, ETH_P_IP); + skb->protocol = htons(ETH_P_IP); skb_clear_hash(skb); return 0; @@ -3440,7 +3433,13 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto, */ ret = bpf_skb_proto_xlat(skb, proto); bpf_compute_data_pointers(skb); - return ret; + if (ret) + return ret; + + if (skb_valid_dst(skb)) + skb_dst_drop(skb); + + return 0; } static const struct bpf_func_proto bpf_skb_change_proto_proto = { @@ -3582,12 +3581,13 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, } /* Match skb->protocol to new outer l3 protocol */ - if (skb->protocol == htons(ETH_P_IP) && - flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) - bpf_skb_change_protocol(skb, ETH_P_IPV6); - else if (skb->protocol == htons(ETH_P_IPV6) && - flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4) - bpf_skb_change_protocol(skb, ETH_P_IP); + if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) + skb->protocol = htons(ETH_P_IPV6); + else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4) + skb->protocol = htons(ETH_P_IP); + + if (skb_valid_dst(skb)) + skb_dst_drop(skb); } if (skb_is_gso(skb)) { @@ -3615,6 +3615,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff, u64 flags) { + bool decap = flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK; int ret; if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO | @@ -3637,13 +3638,16 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff, if (unlikely(ret < 0)) return ret; - /* Match skb->protocol to new outer l3 protocol */ - if (skb->protocol == htons(ETH_P_IP) && - flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6) - bpf_skb_change_protocol(skb, ETH_P_IPV6); - else if (skb->protocol == htons(ETH_P_IPV6) && - flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4) - bpf_skb_change_protocol(skb, ETH_P_IP); + if (decap) { + /* Match skb->protocol to new outer l3 protocol */ + if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6) + skb->protocol = htons(ETH_P_IPV6); + else if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4) + skb->protocol = htons(ETH_P_IP); + + if (skb_valid_dst(skb)) + skb_dst_drop(skb); + } if (skb_is_gso(skb)) { struct skb_shared_info *shinfo = skb_shinfo(skb); From 64c535db769e9ab917343d61045802e832d621ed Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 29 Mar 2026 11:04:28 -0700 Subject: [PATCH 1037/1561] selftests/bpf: Test that dst is cleared on same-protocol encap Verify that bpf_skb_adjust_room() clears the routing dst even when the encap L3 protocol matches the original packet (e.g. IPIP). The dst selected for the inner packet is not valid for the encapsulated result; a stale dst could lead to misrouting. Signed-off-by: Jakub Kicinski Signed-off-by: Martin KaFai Lau Acked-by: Daniel Borkmann Link: https://patch.msgid.link/20260329180428.2657785-2-kuba@kernel.org --- .../selftests/bpf/prog_tests/test_dst_clear.c | 55 ++++++++++++++++++ .../selftests/bpf/progs/test_dst_clear.c | 57 +++++++++++++++++++ 2 files changed, 112 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/test_dst_clear.c create mode 100644 tools/testing/selftests/bpf/progs/test_dst_clear.c diff --git a/tools/testing/selftests/bpf/prog_tests/test_dst_clear.c b/tools/testing/selftests/bpf/prog_tests/test_dst_clear.c new file mode 100644 index 000000000000..7c35ca6f4539 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_dst_clear.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include + +#include "test_progs.h" +#include "network_helpers.h" +#include "test_dst_clear.skel.h" + +#define IPV4_IFACE_ADDR "1.0.0.1" +#define UDP_TEST_PORT 7777 + +void test_ns_dst_clear(void) +{ + LIBBPF_OPTS(bpf_tcx_opts, tcx_opts); + struct test_dst_clear *skel; + struct sockaddr_in addr; + struct bpf_link *link; + socklen_t addrlen; + char buf[128] = {}; + int sockfd, err; + + skel = test_dst_clear__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel open_and_load")) + return; + + SYS(fail, "ip addr add %s/8 dev lo", IPV4_IFACE_ADDR); + + link = bpf_program__attach_tcx(skel->progs.dst_clear, + if_nametoindex("lo"), &tcx_opts); + if (!ASSERT_OK_PTR(link, "attach_tcx")) + goto fail; + skel->links.dst_clear = link; + + addrlen = sizeof(addr); + err = make_sockaddr(AF_INET, IPV4_IFACE_ADDR, UDP_TEST_PORT, + (void *)&addr, &addrlen); + if (!ASSERT_OK(err, "make_sockaddr")) + goto fail; + sockfd = socket(AF_INET, SOCK_DGRAM, 0); + if (!ASSERT_NEQ(sockfd, -1, "socket")) + goto fail; + err = sendto(sockfd, buf, sizeof(buf), 0, (void *)&addr, addrlen); + close(sockfd); + if (!ASSERT_EQ(err, sizeof(buf), "send")) + goto fail; + + ASSERT_TRUE(skel->bss->had_dst, "had_dst"); + ASSERT_TRUE(skel->bss->dst_cleared, "dst_cleared"); + +fail: + test_dst_clear__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_dst_clear.c b/tools/testing/selftests/bpf/progs/test_dst_clear.c new file mode 100644 index 000000000000..c22a6eeb4798 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_dst_clear.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include "bpf_tracing_net.h" +#include +#include + +#define UDP_TEST_PORT 7777 + +void *bpf_cast_to_kern_ctx(void *) __ksym; + +bool had_dst = false; +bool dst_cleared = false; + +SEC("tc/egress") +int dst_clear(struct __sk_buff *skb) +{ + struct sk_buff *kskb; + struct iphdr iph; + struct udphdr udph; + int err; + + if (skb->protocol != __bpf_constant_htons(ETH_P_IP)) + return TC_ACT_OK; + + if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph, sizeof(iph))) + return TC_ACT_OK; + + if (iph.protocol != IPPROTO_UDP) + return TC_ACT_OK; + + if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph), &udph, sizeof(udph))) + return TC_ACT_OK; + + if (udph.dest != __bpf_constant_htons(UDP_TEST_PORT)) + return TC_ACT_OK; + + kskb = bpf_cast_to_kern_ctx(skb); + had_dst = (kskb->_skb_refdst != 0); + + /* Same-protocol encap (IPIP): protocol stays IPv4, but the dst + * from the original routing is no longer valid for the outer hdr. + */ + err = bpf_skb_adjust_room(skb, (s32)sizeof(struct iphdr), + BPF_ADJ_ROOM_MAC, + BPF_F_ADJ_ROOM_FIXED_GSO | + BPF_F_ADJ_ROOM_ENCAP_L3_IPV4); + if (err) + return TC_ACT_SHOT; + + dst_cleared = (kskb->_skb_refdst == 0); + + return TC_ACT_SHOT; +} + +char __license[] SEC("license") = "GPL"; From 911e2c050963ccf239faec6ae9dee0f5e8f1cc5c Mon Sep 17 00:00:00 2001 From: Jan Hoffmann Date: Sun, 29 Mar 2026 21:11:11 +0200 Subject: [PATCH 1038/1561] net: sfp: add quirk for ZOERAX SFP-2.5G-T This is a 2.5G copper module which appears to be based on a Motorcomm YT8821 PHY. There doesn't seem to be a usable way to to access the PHY (I2C address 0x56 provides only read-only C22 access, and Rollball is also not working). The module does not report the correct extended compliance code for 2.5GBase-T, and instead claims to support SONET OC-48 and Fibre Channel: Identifier : 0x03 (SFP) Extended identifier : 0x04 (GBIC/SFP defined by 2-wire interface ID) Connector : 0x07 (LC) Transceiver codes : 0x00 0x01 0x00 0x00 0x40 0x40 0x04 0x00 0x00 Transceiver type : FC: Multimode, 50um (M5) Encoding : 0x05 (SONET Scrambled) BR Nominal : 2500MBd Despite this, the kernel still enables the correct 2500Base-X interface mode. However, for the module to actually work, it is also necessary to disable inband auto-negotiation. Enable the existing "sfp_quirk_oem_2_5g" for this module, which handles that and also sets the bit for 2500Base-T link mode. Signed-off-by: Jan Hoffmann Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260329191304.720160-1-jan@3e8.eu Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 5db841377199..0a455a2daccb 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -567,6 +567,8 @@ static const struct sfp_quirk sfp_quirks[] = { SFP_QUIRK_F("Turris", "RTSFP-2.5G", sfp_fixup_rollball), SFP_QUIRK_F("Turris", "RTSFP-10", sfp_fixup_rollball), SFP_QUIRK_F("Turris", "RTSFP-10G", sfp_fixup_rollball), + + SFP_QUIRK_S("ZOERAX", "SFP-2.5G-T", sfp_quirk_oem_2_5g), }; static size_t sfp_strlen(const char *str, size_t maxlen) From c3dd3b1e76e0ff20ce97e99bc8d1f3152eb177b9 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Mar 2026 08:43:28 +0000 Subject: [PATCH 1039/1561] net: stmmac: qcom-ethqos: remove ethqos_configure() ethqos_configure() does nothing more than indirect via ethqos->configure_func, and is only called from ethqos_fix_mac_speed() just below. Move the indirect call into ethqos_fix_mac_speed(). Signed-off-by: Russell King (Oracle) Tested-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1w62mu-0000000E3Bq-15wa@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 3ccf20fdf52a..502f2b184a87 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -623,18 +623,12 @@ static void ethqos_configure_sgmii(struct qcom_ethqos *ethqos, ethqos_pcs_set_inband(priv, interface == PHY_INTERFACE_MODE_SGMII); } -static void ethqos_configure(struct qcom_ethqos *ethqos, - phy_interface_t interface, int speed) -{ - return ethqos->configure_func(ethqos, interface, speed); -} - static void ethqos_fix_mac_speed(void *priv, phy_interface_t interface, int speed, unsigned int mode) { struct qcom_ethqos *ethqos = priv; - ethqos_configure(ethqos, interface, speed); + ethqos->configure_func(ethqos, interface, speed); } static int qcom_ethqos_serdes_powerup(struct net_device *ndev, void *priv) From 673416fb5b410b536f45e738ac34e482efd94e81 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Mar 2026 08:43:33 +0000 Subject: [PATCH 1040/1561] net: stmmac: qcom-ethqos: pass ethqos to ethqos_pcs_set_inband() Rather than getting the stmmac_priv pointer in ethqos_configure_sgmii(), move it into ethqos_pcs_set_inband() and pass the struct qcom_ethqos pointer instead. Signed-off-by: Russell King (Oracle) Tested-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1w62mz-0000000E3Bx-1Xd8@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 502f2b184a87..b9cfcf32cebc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -590,8 +590,11 @@ static void ethqos_configure_rgmii(struct qcom_ethqos *ethqos, ethqos_rgmii_macro_init(ethqos, speed); } -static void ethqos_pcs_set_inband(struct stmmac_priv *priv, bool enable) +static void ethqos_pcs_set_inband(struct qcom_ethqos *ethqos, bool enable) { + struct net_device *dev = platform_get_drvdata(ethqos->pdev); + struct stmmac_priv *priv = netdev_priv(dev); + stmmac_pcs_ctrl_ane(priv, enable, 0); } @@ -601,9 +604,6 @@ static void ethqos_pcs_set_inband(struct stmmac_priv *priv, bool enable) static void ethqos_configure_sgmii(struct qcom_ethqos *ethqos, phy_interface_t interface, int speed) { - struct net_device *dev = platform_get_drvdata(ethqos->pdev); - struct stmmac_priv *priv = netdev_priv(dev); - switch (speed) { case SPEED_2500: case SPEED_1000: @@ -620,7 +620,7 @@ static void ethqos_configure_sgmii(struct qcom_ethqos *ethqos, break; } - ethqos_pcs_set_inband(priv, interface == PHY_INTERFACE_MODE_SGMII); + ethqos_pcs_set_inband(ethqos, interface == PHY_INTERFACE_MODE_SGMII); } static void ethqos_fix_mac_speed(void *priv, phy_interface_t interface, From e9ed46a0b129d743f6ca008101b055fdd2e5ea2e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Mar 2026 08:43:38 +0000 Subject: [PATCH 1041/1561] net: stmmac: qcom-ethqos: eliminate configure_func Since ethqos_fix_mac_speed() is called via a function pointer, and only indirects via the configure_func function pointer, eliminate this unnecessary indirection. Signed-off-by: Russell King (Oracle) Tested-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1w62n4-0000000E3C3-251S@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../stmicro/stmmac/dwmac-qcom-ethqos.c | 35 ++++++++----------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index b9cfcf32cebc..f7e3a2b8803f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -100,9 +100,6 @@ struct ethqos_emac_driver_data { struct qcom_ethqos { struct platform_device *pdev; void __iomem *rgmii_base; - void (*configure_func)(struct qcom_ethqos *ethqos, - phy_interface_t interface, int speed); - struct clk *link_clk; struct phy *serdes_phy; phy_interface_t phy_mode; @@ -521,13 +518,17 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) return 0; } -static void ethqos_configure_rgmii(struct qcom_ethqos *ethqos, - phy_interface_t interface, int speed) +static void ethqos_fix_mac_speed_rgmii(void *bsp_priv, + phy_interface_t interface, int speed, + unsigned int mode) { - struct device *dev = ðqos->pdev->dev; + struct qcom_ethqos *ethqos = bsp_priv; + struct device *dev; unsigned int i; u32 val; + dev = ðqos->pdev->dev; + /* Reset to POR values and enable clk */ for (i = 0; i < ethqos->num_rgmii_por; i++) rgmii_writel(ethqos, ethqos->rgmii_por[i].value, @@ -601,9 +602,12 @@ static void ethqos_pcs_set_inband(struct qcom_ethqos *ethqos, bool enable) /* On interface toggle MAC registers gets reset. * Configure MAC block for SGMII on ethernet phy link up */ -static void ethqos_configure_sgmii(struct qcom_ethqos *ethqos, - phy_interface_t interface, int speed) +static void ethqos_fix_mac_speed_sgmii(void *bsp_priv, + phy_interface_t interface, int speed, + unsigned int mode) { + struct qcom_ethqos *ethqos = bsp_priv; + switch (speed) { case SPEED_2500: case SPEED_1000: @@ -623,14 +627,6 @@ static void ethqos_configure_sgmii(struct qcom_ethqos *ethqos, ethqos_pcs_set_inband(ethqos, interface == PHY_INTERFACE_MODE_SGMII); } -static void ethqos_fix_mac_speed(void *priv, phy_interface_t interface, - int speed, unsigned int mode) -{ - struct qcom_ethqos *ethqos = priv; - - ethqos->configure_func(ethqos, interface, speed); -} - static int qcom_ethqos_serdes_powerup(struct net_device *ndev, void *priv) { struct qcom_ethqos *ethqos = priv; @@ -687,7 +683,7 @@ static int ethqos_clks_config(void *priv, bool enabled) /* Enable functional clock to prevent DMA reset to timeout due * to lacking PHY clock after the hardware block has been power * cycled. The actual configuration will be adjusted once - * ethqos_fix_mac_speed() is invoked. + * ethqos' fix_mac_speed() method is invoked. */ qcom_ethqos_set_sgmii_loopback(ethqos, true); ethqos_set_func_clk_en(ethqos); @@ -753,11 +749,11 @@ static int qcom_ethqos_probe(struct platform_device *pdev) case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_TXID: - ethqos->configure_func = ethqos_configure_rgmii; + plat_dat->fix_mac_speed = ethqos_fix_mac_speed_rgmii; break; case PHY_INTERFACE_MODE_2500BASEX: case PHY_INTERFACE_MODE_SGMII: - ethqos->configure_func = ethqos_configure_sgmii; + plat_dat->fix_mac_speed = ethqos_fix_mac_speed_sgmii; plat_dat->mac_finish = ethqos_mac_finish_serdes; break; default: @@ -805,7 +801,6 @@ static int qcom_ethqos_probe(struct platform_device *pdev) plat_dat->bsp_priv = ethqos; plat_dat->set_clk_tx_rate = ethqos_set_clk_tx_rate; - plat_dat->fix_mac_speed = ethqos_fix_mac_speed; plat_dat->dump_debug_regs = rgmii_dump; plat_dat->ptp_clk_freq_config = ethqos_ptp_clk_freq_config; plat_dat->core_type = DWMAC_CORE_GMAC4; From 426ce4677e8101996d1ff230464ea696eba2d853 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Mar 2026 08:43:43 +0000 Subject: [PATCH 1042/1561] net: stmmac: qcom-ethqos: move detection of invalid RGMII speed Move detection of invalid RGMII speeds (which will never be triggered) before the switch() to allow register modifications that are common to all speeds to be moved out of the switch. Signed-off-by: Russell King (Oracle) Tested-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1w62n9-0000000E3C9-2Zkr@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index f7e3a2b8803f..d19331067459 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -397,6 +397,11 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) /* Select RGMII, write 0 to interface select */ rgmii_clrmask(ethqos, RGMII_CONFIG_INTF_SEL, RGMII_IO_MACRO_CONFIG); + if (speed != SPEED_1000 && speed != SPEED_100 && speed != SPEED_10) { + dev_err(dev, "Invalid speed %d\n", speed); + return -EINVAL; + } + switch (speed) { case SPEED_1000: rgmii_setmask(ethqos, RGMII_CONFIG_DDR_MODE, @@ -510,9 +515,6 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN, loopback, RGMII_IO_MACRO_CONFIG); break; - default: - dev_err(dev, "Invalid speed %d\n", speed); - return -EINVAL; } return 0; From 6be23c4c636acc08c691e2b7371f341b13de23f7 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Mar 2026 08:43:48 +0000 Subject: [PATCH 1043/1561] net: stmmac: qcom-ethqos: move RGMII_CONFIG_DDR_MODE RGMII_CONFIG_DDR_MODE is always set irrespective of the speed. Move this out of the switch. Signed-off-by: Russell King (Oracle) Tested-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1w62nE-0000000E3CF-331r@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index d19331067459..34d9a6b2d023 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -402,10 +402,10 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) return -EINVAL; } + rgmii_setmask(ethqos, RGMII_CONFIG_DDR_MODE, RGMII_IO_MACRO_CONFIG); + switch (speed) { case SPEED_1000: - rgmii_setmask(ethqos, RGMII_CONFIG_DDR_MODE, - RGMII_IO_MACRO_CONFIG); rgmii_clrmask(ethqos, RGMII_CONFIG_BYPASS_TX_ID_EN, RGMII_IO_MACRO_CONFIG); rgmii_setmask(ethqos, RGMII_CONFIG_POS_NEG_DATA_SEL, @@ -443,8 +443,6 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) break; case SPEED_100: - rgmii_setmask(ethqos, RGMII_CONFIG_DDR_MODE, - RGMII_IO_MACRO_CONFIG); rgmii_setmask(ethqos, RGMII_CONFIG_BYPASS_TX_ID_EN, RGMII_IO_MACRO_CONFIG); rgmii_clrmask(ethqos, RGMII_CONFIG_POS_NEG_DATA_SEL, @@ -481,8 +479,6 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) break; case SPEED_10: - rgmii_setmask(ethqos, RGMII_CONFIG_DDR_MODE, - RGMII_IO_MACRO_CONFIG); rgmii_setmask(ethqos, RGMII_CONFIG_BYPASS_TX_ID_EN, RGMII_IO_MACRO_CONFIG); rgmii_clrmask(ethqos, RGMII_CONFIG_POS_NEG_DATA_SEL, From 82d5fdc82a33bea93baec4795e835b016d2dc87e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Mar 2026 08:43:53 +0000 Subject: [PATCH 1044/1561] net: stmmac: qcom-ethqos: move 1G vs 100M/10M RGMII settings Move RGMII_CONFIG_BYPASS_TX_ID_EN, RGMII_CONFIG_POS_NEG_DATA_SEL and RGMII_CONFIG_PROG_SWAP. There are two states for these: one group for 1G, and the logical inversion for 100M and 10M. Move this out of the switch into an if-else clause. Signed-off-by: Russell King (Oracle) Tested-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1w62nJ-0000000E3CL-3YSr@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../stmicro/stmmac/dwmac-qcom-ethqos.c | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 34d9a6b2d023..e9d8c8a7146a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -404,14 +404,24 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) rgmii_setmask(ethqos, RGMII_CONFIG_DDR_MODE, RGMII_IO_MACRO_CONFIG); - switch (speed) { - case SPEED_1000: + if (speed == SPEED_1000) { rgmii_clrmask(ethqos, RGMII_CONFIG_BYPASS_TX_ID_EN, RGMII_IO_MACRO_CONFIG); rgmii_setmask(ethqos, RGMII_CONFIG_POS_NEG_DATA_SEL, RGMII_IO_MACRO_CONFIG); rgmii_setmask(ethqos, RGMII_CONFIG_PROG_SWAP, RGMII_IO_MACRO_CONFIG); + } else { + rgmii_setmask(ethqos, RGMII_CONFIG_BYPASS_TX_ID_EN, + RGMII_IO_MACRO_CONFIG); + rgmii_clrmask(ethqos, RGMII_CONFIG_POS_NEG_DATA_SEL, + RGMII_IO_MACRO_CONFIG); + rgmii_clrmask(ethqos, RGMII_CONFIG_PROG_SWAP, + RGMII_IO_MACRO_CONFIG); + } + + switch (speed) { + case SPEED_1000: rgmii_clrmask(ethqos, RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL, RGMII_IO_MACRO_CONFIG2); @@ -443,12 +453,6 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) break; case SPEED_100: - rgmii_setmask(ethqos, RGMII_CONFIG_BYPASS_TX_ID_EN, - RGMII_IO_MACRO_CONFIG); - rgmii_clrmask(ethqos, RGMII_CONFIG_POS_NEG_DATA_SEL, - RGMII_IO_MACRO_CONFIG); - rgmii_clrmask(ethqos, RGMII_CONFIG_PROG_SWAP, - RGMII_IO_MACRO_CONFIG); rgmii_clrmask(ethqos, RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL, RGMII_IO_MACRO_CONFIG2); rgmii_updatel(ethqos, RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN, @@ -479,12 +483,6 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) break; case SPEED_10: - rgmii_setmask(ethqos, RGMII_CONFIG_BYPASS_TX_ID_EN, - RGMII_IO_MACRO_CONFIG); - rgmii_clrmask(ethqos, RGMII_CONFIG_POS_NEG_DATA_SEL, - RGMII_IO_MACRO_CONFIG); - rgmii_clrmask(ethqos, RGMII_CONFIG_PROG_SWAP, - RGMII_IO_MACRO_CONFIG); rgmii_clrmask(ethqos, RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL, RGMII_IO_MACRO_CONFIG2); rgmii_updatel(ethqos, RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN, From dd07f2f9149a885ea2d8d19f95439227ec558106 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Mar 2026 08:43:58 +0000 Subject: [PATCH 1045/1561] net: stmmac: qcom-ethqos: move two more RGMII_IO_MACRO_CONFIG2 out RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL is always cleared, and RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN is always updated with the phase shift in each path through the switch, so these are independent of the speed. Move them out. Signed-off-by: Russell King (Oracle) Tested-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1w62nO-0000000E3CR-445p@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../stmicro/stmmac/dwmac-qcom-ethqos.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index e9d8c8a7146a..ab6554f58214 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -420,13 +420,13 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) RGMII_IO_MACRO_CONFIG); } + rgmii_clrmask(ethqos, RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL, + RGMII_IO_MACRO_CONFIG2); + rgmii_updatel(ethqos, RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN, phase_shift, + RGMII_IO_MACRO_CONFIG2); + switch (speed) { case SPEED_1000: - rgmii_clrmask(ethqos, RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL, - RGMII_IO_MACRO_CONFIG2); - - rgmii_updatel(ethqos, RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN, - phase_shift, RGMII_IO_MACRO_CONFIG2); rgmii_clrmask(ethqos, RGMII_CONFIG2_RSVD_CONFIG15, RGMII_IO_MACRO_CONFIG2); rgmii_setmask(ethqos, RGMII_CONFIG2_RX_PROG_SWAP, @@ -453,10 +453,6 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) break; case SPEED_100: - rgmii_clrmask(ethqos, RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL, - RGMII_IO_MACRO_CONFIG2); - rgmii_updatel(ethqos, RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN, - phase_shift, RGMII_IO_MACRO_CONFIG2); rgmii_updatel(ethqos, RGMII_CONFIG_MAX_SPD_PRG_2, FIELD_PREP(RGMII_CONFIG_MAX_SPD_PRG_2, 1), RGMII_IO_MACRO_CONFIG); @@ -483,10 +479,6 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) break; case SPEED_10: - rgmii_clrmask(ethqos, RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL, - RGMII_IO_MACRO_CONFIG2); - rgmii_updatel(ethqos, RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN, - phase_shift, RGMII_IO_MACRO_CONFIG2); rgmii_updatel(ethqos, RGMII_CONFIG_MAX_SPD_PRG_9, FIELD_PREP(RGMII_CONFIG_MAX_SPD_PRG_9, 19), RGMII_IO_MACRO_CONFIG); From 8b19a91844203db70584a2e56ad97d117e2c0277 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Mar 2026 08:44:04 +0000 Subject: [PATCH 1046/1561] net: stmmac: qcom-ethqos: move 100M/10M speed programming Move the speed programming for 100M and 10M out of the switch. There is no programming done for 1G speed. It looks like there are two fields, 7:6 which are programemd to '1' to select a /2 divisor for 100M, and bits 16:8 which are programmed to '19' to select a /20 divisor. Signed-off-by: Russell King (Oracle) Tested-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1w62nU-0000000E3CX-0KF9@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index ab6554f58214..2751def922df 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -425,6 +425,15 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) rgmii_updatel(ethqos, RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN, phase_shift, RGMII_IO_MACRO_CONFIG2); + if (speed == SPEED_100) + rgmii_updatel(ethqos, RGMII_CONFIG_MAX_SPD_PRG_2, + FIELD_PREP(RGMII_CONFIG_MAX_SPD_PRG_2, 1), + RGMII_IO_MACRO_CONFIG); + else if (speed == SPEED_10) + rgmii_updatel(ethqos, RGMII_CONFIG_MAX_SPD_PRG_9, + FIELD_PREP(RGMII_CONFIG_MAX_SPD_PRG_9, 19), + RGMII_IO_MACRO_CONFIG); + switch (speed) { case SPEED_1000: rgmii_clrmask(ethqos, RGMII_CONFIG2_RSVD_CONFIG15, @@ -453,9 +462,6 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) break; case SPEED_100: - rgmii_updatel(ethqos, RGMII_CONFIG_MAX_SPD_PRG_2, - FIELD_PREP(RGMII_CONFIG_MAX_SPD_PRG_2, 1), - RGMII_IO_MACRO_CONFIG); rgmii_clrmask(ethqos, RGMII_CONFIG2_RSVD_CONFIG15, RGMII_IO_MACRO_CONFIG2); @@ -479,9 +485,6 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) break; case SPEED_10: - rgmii_updatel(ethqos, RGMII_CONFIG_MAX_SPD_PRG_9, - FIELD_PREP(RGMII_CONFIG_MAX_SPD_PRG_9, 19), - RGMII_IO_MACRO_CONFIG); rgmii_clrmask(ethqos, RGMII_CONFIG2_RSVD_CONFIG15, RGMII_IO_MACRO_CONFIG2); if (ethqos->has_emac_ge_3) From dae1de3df3e17b98fe78c4b1ba26322b03cd9f46 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Mar 2026 08:44:09 +0000 Subject: [PATCH 1047/1561] net: stmmac: qcom-ethqos: move RGMII_CONFIG2_RSVD_CONFIG15 out All paths through the switch clear the RGMII_CONFIG2_RSVD_CONFIG15 field. move it out of the switch statement. Signed-off-by: Russell King (Oracle) Tested-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1w62nZ-0000000E3Ce-0lyP@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 2751def922df..13f4df0a90d7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -434,10 +434,11 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) FIELD_PREP(RGMII_CONFIG_MAX_SPD_PRG_9, 19), RGMII_IO_MACRO_CONFIG); + rgmii_clrmask(ethqos, RGMII_CONFIG2_RSVD_CONFIG15, + RGMII_IO_MACRO_CONFIG2); + switch (speed) { case SPEED_1000: - rgmii_clrmask(ethqos, RGMII_CONFIG2_RSVD_CONFIG15, - RGMII_IO_MACRO_CONFIG2); rgmii_setmask(ethqos, RGMII_CONFIG2_RX_PROG_SWAP, RGMII_IO_MACRO_CONFIG2); @@ -462,9 +463,6 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) break; case SPEED_100: - rgmii_clrmask(ethqos, RGMII_CONFIG2_RSVD_CONFIG15, - RGMII_IO_MACRO_CONFIG2); - if (ethqos->has_emac_ge_3) rgmii_setmask(ethqos, RGMII_CONFIG2_RX_PROG_SWAP, RGMII_IO_MACRO_CONFIG2); @@ -485,8 +483,6 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) break; case SPEED_10: - rgmii_clrmask(ethqos, RGMII_CONFIG2_RSVD_CONFIG15, - RGMII_IO_MACRO_CONFIG2); if (ethqos->has_emac_ge_3) rgmii_setmask(ethqos, RGMII_CONFIG2_RX_PROG_SWAP, RGMII_IO_MACRO_CONFIG2); From 432c8a9f5528e362cc42de46901f25373a3a9928 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Mar 2026 08:44:14 +0000 Subject: [PATCH 1048/1561] net: stmmac: qcom-ethqos: move RGMII_CONFIG2_RX_PROG_SWAP Move RGMII_CONFIG2_RX_PROG_SWAP out of the switch. 1G speed always sets this field. 100M and 10M sets it for has_emac_ge_3 devices, otherwise it is cleared. Signed-off-by: Russell King (Oracle) Tested-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1w62ne-0000000E3Ck-1Haf@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../stmicro/stmmac/dwmac-qcom-ethqos.c | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 13f4df0a90d7..b4c61cb24e1d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -437,11 +437,15 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) rgmii_clrmask(ethqos, RGMII_CONFIG2_RSVD_CONFIG15, RGMII_IO_MACRO_CONFIG2); - switch (speed) { - case SPEED_1000: + if (speed == SPEED_1000 || ethqos->has_emac_ge_3) rgmii_setmask(ethqos, RGMII_CONFIG2_RX_PROG_SWAP, RGMII_IO_MACRO_CONFIG2); + else + rgmii_clrmask(ethqos, RGMII_CONFIG2_RX_PROG_SWAP, + RGMII_IO_MACRO_CONFIG2); + switch (speed) { + case SPEED_1000: /* PRG_RCLK_DLY = TCXO period * TCXO_CYCLES_CNT / 2 * RX delay ns, * in practice this becomes PRG_RCLK_DLY = 52 * 4 / 2 * RX delay ns */ @@ -463,13 +467,6 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) break; case SPEED_100: - if (ethqos->has_emac_ge_3) - rgmii_setmask(ethqos, RGMII_CONFIG2_RX_PROG_SWAP, - RGMII_IO_MACRO_CONFIG2); - else - rgmii_clrmask(ethqos, RGMII_CONFIG2_RX_PROG_SWAP, - RGMII_IO_MACRO_CONFIG2); - /* Write 0x5 to PRG_RCLK_DLY_CODE */ rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE, FIELD_PREP(SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE, @@ -483,12 +480,6 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) break; case SPEED_10: - if (ethqos->has_emac_ge_3) - rgmii_setmask(ethqos, RGMII_CONFIG2_RX_PROG_SWAP, - RGMII_IO_MACRO_CONFIG2); - else - rgmii_clrmask(ethqos, RGMII_CONFIG2_RX_PROG_SWAP, - RGMII_IO_MACRO_CONFIG2); /* Write 0x5 to PRG_RCLK_DLY_CODE */ rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE, FIELD_PREP(SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE, From 439a27f21ecc1c7c59d6e00c7c6b9be380f66a64 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Mar 2026 08:44:19 +0000 Subject: [PATCH 1049/1561] net: stmmac: qcom-ethqos: finally eliminate the switch Move the RCLK delay configuration out of the switch, which just leaves the RGMII_CONFIG_LOOPBACK_EN setting in all three paths. This makes it trivial to eliminate the switch. Signed-off-by: Russell King (Oracle) Tested-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1w62nj-0000000E3Cq-1lPL@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../stmicro/stmmac/dwmac-qcom-ethqos.c | 47 +++++++------------ 1 file changed, 16 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index b4c61cb24e1d..7690ae0bb008 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -444,8 +444,18 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) rgmii_clrmask(ethqos, RGMII_CONFIG2_RX_PROG_SWAP, RGMII_IO_MACRO_CONFIG2); - switch (speed) { - case SPEED_1000: + if (speed != SPEED_1000) { + /* Write 0x5 to PRG_RCLK_DLY_CODE */ + rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE, + FIELD_PREP(SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE, + 5), SDCC_HC_REG_DDR_CONFIG); + + rgmii_setmask(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY, + SDCC_HC_REG_DDR_CONFIG); + + rgmii_setmask(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN, + SDCC_HC_REG_DDR_CONFIG); + } else { /* PRG_RCLK_DLY = TCXO period * TCXO_CYCLES_CNT / 2 * RX delay ns, * in practice this becomes PRG_RCLK_DLY = 52 * 4 / 2 * RX delay ns */ @@ -460,39 +470,14 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) FIELD_PREP(SDCC_DDR_CONFIG_PRG_RCLK_DLY, 57), SDCC_HC_REG_DDR_CONFIG); } + rgmii_setmask(ethqos, SDCC_DDR_CONFIG_PRG_DLY_EN, SDCC_HC_REG_DDR_CONFIG); - rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN, - loopback, RGMII_IO_MACRO_CONFIG); - break; - - case SPEED_100: - /* Write 0x5 to PRG_RCLK_DLY_CODE */ - rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE, - FIELD_PREP(SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE, - 5), SDCC_HC_REG_DDR_CONFIG); - rgmii_setmask(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY, - SDCC_HC_REG_DDR_CONFIG); - rgmii_setmask(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN, - SDCC_HC_REG_DDR_CONFIG); - rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN, - loopback, RGMII_IO_MACRO_CONFIG); - break; - - case SPEED_10: - /* Write 0x5 to PRG_RCLK_DLY_CODE */ - rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE, - FIELD_PREP(SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE, - 5), SDCC_HC_REG_DDR_CONFIG); - rgmii_setmask(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY, - SDCC_HC_REG_DDR_CONFIG); - rgmii_setmask(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN, - SDCC_HC_REG_DDR_CONFIG); - rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN, - loopback, RGMII_IO_MACRO_CONFIG); - break; } + rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN, loopback, + RGMII_IO_MACRO_CONFIG); + return 0; } From 3df0e86f8f8d708a820f7844f1a5830839f78efd Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Mar 2026 08:44:24 +0000 Subject: [PATCH 1050/1561] net: stmmac: qcom-ethqos: simplify prg_rclk_dly programming Rather than coding the entire register update twice with different values, use a local variable to specify the value and have one register update statement that uses this local variable. This results in neater code. Signed-off-by: Russell King (Oracle) Tested-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1w62no-0000000E3Cw-2EmH@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 7690ae0bb008..580deec1dc30 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -374,6 +374,7 @@ static int ethqos_dll_configure(struct qcom_ethqos *ethqos) static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) { struct device *dev = ðqos->pdev->dev; + unsigned int prg_rclk_dly; int phase_shift; int loopback; @@ -461,16 +462,16 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) */ if (ethqos->has_emac_ge_3) { /* 0.9 ns */ - rgmii_updatel(ethqos, SDCC_DDR_CONFIG_PRG_RCLK_DLY, - FIELD_PREP(SDCC_DDR_CONFIG_PRG_RCLK_DLY, - 115), SDCC_HC_REG_DDR_CONFIG); + prg_rclk_dly = 115; } else { /* 1.8 ns */ - rgmii_updatel(ethqos, SDCC_DDR_CONFIG_PRG_RCLK_DLY, - FIELD_PREP(SDCC_DDR_CONFIG_PRG_RCLK_DLY, - 57), SDCC_HC_REG_DDR_CONFIG); + prg_rclk_dly = 57; } + rgmii_updatel(ethqos, SDCC_DDR_CONFIG_PRG_RCLK_DLY, + FIELD_PREP(SDCC_DDR_CONFIG_PRG_RCLK_DLY, + prg_rclk_dly), SDCC_HC_REG_DDR_CONFIG); + rgmii_setmask(ethqos, SDCC_DDR_CONFIG_PRG_DLY_EN, SDCC_HC_REG_DDR_CONFIG); } From 67343aa24e595ac2522d9f7e2daba7ec29f32926 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Mar 2026 08:44:29 +0000 Subject: [PATCH 1051/1561] net: stmmac: qcom-ethqos: move loopback decision next to reg update Move the loopback decision next to the register update, and make the local variable unsigned. As a result, there is now no need for the comment referring to the programming being later. Signed-off-by: Russell King (Oracle) Tested-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1w62nt-0000000E3D2-2fWk@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 580deec1dc30..9dae40c27903 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -374,9 +374,8 @@ static int ethqos_dll_configure(struct qcom_ethqos *ethqos) static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) { struct device *dev = ðqos->pdev->dev; - unsigned int prg_rclk_dly; + unsigned int prg_rclk_dly, loopback; int phase_shift; - int loopback; /* Determine if the PHY adds a 2 ns TX delay or the MAC handles it */ if (ethqos->phy_mode == PHY_INTERFACE_MODE_RGMII_ID || @@ -389,12 +388,6 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) rgmii_clrmask(ethqos, RGMII_CONFIG2_TX_TO_RX_LOOPBACK_EN, RGMII_IO_MACRO_CONFIG2); - /* Determine if this platform wants loopback enabled after programming */ - if (ethqos->rgmii_config_loopback_en) - loopback = RGMII_CONFIG_LOOPBACK_EN; - else - loopback = 0; - /* Select RGMII, write 0 to interface select */ rgmii_clrmask(ethqos, RGMII_CONFIG_INTF_SEL, RGMII_IO_MACRO_CONFIG); @@ -476,6 +469,11 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) SDCC_HC_REG_DDR_CONFIG); } + if (ethqos->rgmii_config_loopback_en) + loopback = RGMII_CONFIG_LOOPBACK_EN; + else + loopback = 0; + rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN, loopback, RGMII_IO_MACRO_CONFIG); From 2d350a892aad3b3dfdcfc170e7681f4095808a0c Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Mar 2026 08:44:34 +0000 Subject: [PATCH 1052/1561] net: stmmac: qcom-ethqos: correct prg_rclk_dly comment The comment for calculating the prg_rclk_dly value is incorrect as it omits the brackets around the divisor. Add the brackets to allow the reader to correctly evaluate the value. Validated with the values given in the driver. Signed-off-by: Russell King (Oracle) Tested-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1w62ny-0000000E3D8-38Yp@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 9dae40c27903..667676c29a95 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -450,8 +450,10 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) rgmii_setmask(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN, SDCC_HC_REG_DDR_CONFIG); } else { - /* PRG_RCLK_DLY = TCXO period * TCXO_CYCLES_CNT / 2 * RX delay ns, - * in practice this becomes PRG_RCLK_DLY = 52 * 4 / 2 * RX delay ns + /* PRG_RCLK_DLY = TCXO period * TCXO_CYCLES_CNT / + * (2 * RX delay ns), + * in practice this becomes PRG_RCLK_DLY = 52 * 4 / + * (2 * RX delay ns) */ if (ethqos->has_emac_ge_3) { /* 0.9 ns */ From 7f9f301660051353282e579affcd40e7075ab223 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 27 Mar 2026 08:44:39 +0000 Subject: [PATCH 1053/1561] net: stmmac: qcom-ethqos: move phase_shift to register update site Move the determination of the phase shift enable alongside the register update, and make "phase_shift" unsigned. Signed-off-by: Russell King (Oracle) Tested-by: Mohd Ayaan Anwar Link: https://patch.msgid.link/E1w62o3-0000000E3DE-3Vf8@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 667676c29a95..ad3a983d2a08 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -375,14 +375,7 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) { struct device *dev = ðqos->pdev->dev; unsigned int prg_rclk_dly, loopback; - int phase_shift; - - /* Determine if the PHY adds a 2 ns TX delay or the MAC handles it */ - if (ethqos->phy_mode == PHY_INTERFACE_MODE_RGMII_ID || - ethqos->phy_mode == PHY_INTERFACE_MODE_RGMII_TXID) - phase_shift = 0; - else - phase_shift = RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN; + unsigned int phase_shift; /* Disable loopback mode */ rgmii_clrmask(ethqos, RGMII_CONFIG2_TX_TO_RX_LOOPBACK_EN, @@ -416,6 +409,14 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) rgmii_clrmask(ethqos, RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL, RGMII_IO_MACRO_CONFIG2); + + /* Determine if the PHY adds a 2 ns TX delay or the MAC handles it */ + if (ethqos->phy_mode == PHY_INTERFACE_MODE_RGMII_ID || + ethqos->phy_mode == PHY_INTERFACE_MODE_RGMII_TXID) + phase_shift = 0; + else + phase_shift = RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN; + rgmii_updatel(ethqos, RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN, phase_shift, RGMII_IO_MACRO_CONFIG2); From f4ef5b1c13316fa518b7a41e6bff82c33239823c Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 29 Mar 2026 13:32:34 +0100 Subject: [PATCH 1054/1561] FDDI: defza: Rate-limit memory allocation errors Prevent the system from becoming unstable or unusable due to a flood of memory allocation error messages under memory pressure. Signed-off-by: Maciej W. Rozycki Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/alpine.DEB.2.21.2603291252380.60268@angie.orcam.me.uk Signed-off-by: Jakub Kicinski --- drivers/net/fddi/defza.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/fddi/defza.c b/drivers/net/fddi/defza.c index 2098c3068d34..064fa484f797 100644 --- a/drivers/net/fddi/defza.c +++ b/drivers/net/fddi/defza.c @@ -767,8 +767,9 @@ static void fza_rx(struct net_device *dev) fp->rx_dma[i] = dma; } else { fp->stats.rx_dropped++; - pr_notice("%s: memory squeeze, dropping packet\n", - fp->name); + pr_notice_ratelimited( + "%s: memory squeeze, dropping packet\n", + fp->name); } err_rx: From 7fae6616704a17c64438ad4b73a6effa6c03ffda Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 29 Mar 2026 13:32:25 +0100 Subject: [PATCH 1055/1561] FDDI: defxx: Rate-limit memory allocation errors Prevent the system from becoming unstable or unusable due to a flood of memory allocation error messages under memory pressure, e.g.: [...] fddi0: Could not allocate receive buffer. Dropping packet. fddi0: Could not allocate receive buffer. Dropping packet. fddi0: Could not allocate receive buffer. Dropping packet. fddi0: Could not allocate receive buffer. Dropping packet. rcu: INFO: rcu_sched self-detected stall on CPU rcu: 0-...!: (332 ticks this GP) idle=255c/1/0x40000000 softirq=16420123/16420123 fqs=0 rcu: (t=2103 jiffies g=35680089 q=4 ncpus=1) rcu: rcu_sched kthread timer wakeup didn't happen for 2102 jiffies! g35680089 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x402 rcu: Possible timer handling issue on cpu=0 timer-softirq=12779658 rcu: rcu_sched kthread starved for 2103 jiffies! g35680089 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x402 ->cpu=0 rcu: Unless rcu_sched kthread gets sufficient CPU time, OOM is now expected behavior. rcu: RCU grace-period kthread stack dump: task:rcu_sched state:I stack:0 pid:14 tgid:14 ppid:2 flags:0x00004000 Call Trace: __schedule+0x258/0x580 schedule+0x19/0xa0 schedule_timeout+0x4a/0xb0 ? hrtimers_cpu_dying+0x1b0/0x1b0 rcu_gp_fqs_loop+0xb1/0x450 rcu_gp_kthread+0x9d/0x130 kthread+0xb2/0xe0 ? rcu_gp_init+0x4a0/0x4a0 ? kthread_park+0x90/0x90 ret_from_fork+0x2d/0x50 ? kthread_park+0x90/0x90 ret_from_fork_asm+0x12/0x20 entry_INT80_32+0x10d/0x10d CPU: 0 UID: 500 PID: 21895 Comm: 31370.exe Not tainted 6.13.0-dirty #2 (here running the libstdc++-v3 testsuite). Signed-off-by: Maciej W. Rozycki Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/alpine.DEB.2.21.2603291236590.60268@angie.orcam.me.uk Signed-off-by: Jakub Kicinski --- drivers/net/fddi/defxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c index 0fbbb7286008..6b8cfbee3b9d 100644 --- a/drivers/net/fddi/defxx.c +++ b/drivers/net/fddi/defxx.c @@ -3182,7 +3182,7 @@ static void dfx_rcv_queue_process( pkt_len + 3); if (skb == NULL) { - printk("%s: Could not allocate receive buffer. Dropping packet.\n", bp->dev->name); + printk_ratelimited("%s: Could not allocate receive buffer. Dropping packet.\n", bp->dev->name); bp->rcv_discards++; break; } From 48afd5124fd6129c46fd12cb06155384b1c4a0c4 Mon Sep 17 00:00:00 2001 From: Chih Kai Hsu Date: Thu, 26 Mar 2026 15:39:23 +0800 Subject: [PATCH 1056/1561] r8152: fix incorrect register write to USB_UPHY_XTAL The old code used ocp_write_byte() to clear the OOBS_POLLING bit (BIT(8)) in the USB_UPHY_XTAL register, but this doesn't correctly clear a bit in the upper byte of the 16-bit register. Fix this by using ocp_write_word() instead. Fixes: 195aae321c82 ("r8152: support new chips") Signed-off-by: Chih Kai Hsu Reviewed-by: Hayes Wang Link: https://patch.msgid.link/20260326073925.32976-454-nic_swsd@realtek.com Signed-off-by: Paolo Abeni --- drivers/net/usb/r8152.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 3b6d4252d34c..bef0611e7ef0 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3892,7 +3892,7 @@ static void r8156_ups_en(struct r8152 *tp, bool enable) case RTL_VER_15: ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_UPHY_XTAL); ocp_data &= ~OOBS_POLLING; - ocp_write_byte(tp, MCU_TYPE_USB, USB_UPHY_XTAL, ocp_data); + ocp_write_word(tp, MCU_TYPE_USB, USB_UPHY_XTAL, ocp_data); break; default: break; From d16133f177fea620d6b413990683b6a8b2641be5 Mon Sep 17 00:00:00 2001 From: Chih Kai Hsu Date: Thu, 26 Mar 2026 15:39:24 +0800 Subject: [PATCH 1057/1561] r8152: add helper functions for PLA/USB OCP registers Add the following bitwise operation functions for PLA/USB OCP registers to simplify the code. - ocp_dword_w0w1() - ocp_word_w0w1() - ocp_byte_w0w1() - ocp_dword_clr_bits() - ocp_dword_set_bits() - ocp_word_clr_bits() - ocp_word_set_bits() - ocp_word_test_and_clr_bits() - ocp_byte_clr_bits() - ocp_byte_set_bits() Signed-off-by: Chih Kai Hsu Reviewed-by: Hayes Wang Link: https://patch.msgid.link/20260326073925.32976-455-nic_swsd@realtek.com Signed-off-by: Paolo Abeni --- drivers/net/usb/r8152.c | 1101 +++++++++++++++------------------------ 1 file changed, 407 insertions(+), 694 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index bef0611e7ef0..32a4e8d42311 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1654,6 +1654,78 @@ void write_mii_word(struct net_device *netdev, int phy_id, int reg, int val) r8152_mdio_write(tp, reg, val); } +static void +ocp_dword_w0w1(struct r8152 *tp, u16 type, u16 index, u32 clear, u32 set) +{ + u32 ocp_data; + + ocp_data = ocp_read_dword(tp, type, index); + ocp_data = (ocp_data & ~clear) | set; + ocp_write_dword(tp, type, index, ocp_data); +} + +static void +ocp_word_w0w1(struct r8152 *tp, u16 type, u16 index, u16 clear, u16 set) +{ + u16 ocp_data; + + ocp_data = ocp_read_word(tp, type, index); + ocp_data = (ocp_data & ~clear) | set; + ocp_write_word(tp, type, index, ocp_data); +} + +static void +ocp_byte_w0w1(struct r8152 *tp, u16 type, u16 index, u8 clear, u8 set) +{ + u8 ocp_data; + + ocp_data = ocp_read_byte(tp, type, index); + ocp_data = (ocp_data & ~clear) | set; + ocp_write_byte(tp, type, index, ocp_data); +} + +static void ocp_dword_clr_bits(struct r8152 *tp, u16 type, u16 index, u32 clear) +{ + ocp_dword_w0w1(tp, type, index, clear, 0); +} + +static void ocp_dword_set_bits(struct r8152 *tp, u16 type, u16 index, u32 set) +{ + ocp_dword_w0w1(tp, type, index, 0, set); +} + +static void ocp_word_clr_bits(struct r8152 *tp, u16 type, u16 index, u16 clear) +{ + ocp_word_w0w1(tp, type, index, clear, 0); +} + +static void ocp_word_set_bits(struct r8152 *tp, u16 type, u16 index, u16 set) +{ + ocp_word_w0w1(tp, type, index, 0, set); +} + +static int +ocp_word_test_and_clr_bits(struct r8152 *tp, u16 type, u16 index, u16 clear) +{ + u16 ocp_data; + + ocp_data = ocp_read_word(tp, type, index); + if (ocp_data & clear) + ocp_write_word(tp, type, index, ocp_data & ~clear); + + return ocp_data & clear; +} + +static void ocp_byte_clr_bits(struct r8152 *tp, u16 type, u16 index, u8 clear) +{ + ocp_byte_w0w1(tp, type, index, clear, 0); +} + +static void ocp_byte_set_bits(struct r8152 *tp, u16 type, u16 index, u8 set) +{ + ocp_byte_w0w1(tp, type, index, 0, set); +} + static int r8152_submit_rx(struct r8152 *tp, struct rx_agg *agg, gfp_t mem_flags); @@ -2956,47 +3028,31 @@ static netdev_tx_t rtl8152_start_xmit(struct sk_buff *skb, static void r8152b_reset_packet_filter(struct r8152 *tp) { - u32 ocp_data; - - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_FMC); - ocp_data &= ~FMC_FCR_MCU_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_FMC, ocp_data); - ocp_data |= FMC_FCR_MCU_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_FMC, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_FMC, FMC_FCR_MCU_EN); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_FMC, FMC_FCR_MCU_EN); } static void rtl8152_nic_reset(struct r8152 *tp) { - u32 ocp_data; int i; switch (tp->version) { case RTL_TEST_01: case RTL_VER_10: case RTL_VER_11: - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CR); - ocp_data &= ~CR_TE; - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CR, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_PLA, PLA_CR, CR_TE); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_BMU_RESET); - ocp_data &= ~BMU_RESET_EP_IN; - ocp_write_word(tp, MCU_TYPE_USB, USB_BMU_RESET, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_BMU_RESET, + BMU_RESET_EP_IN); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL); - ocp_data |= CDC_ECM_EN; - ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_USB_CTRL, CDC_ECM_EN); - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CR); - ocp_data &= ~CR_RE; - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CR, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_PLA, PLA_CR, CR_RE); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_BMU_RESET); - ocp_data |= BMU_RESET_EP_IN; - ocp_write_word(tp, MCU_TYPE_USB, USB_BMU_RESET, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_BMU_RESET, + BMU_RESET_EP_IN); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL); - ocp_data &= ~CDC_ECM_EN; - ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_USB_CTRL, CDC_ECM_EN); break; default: @@ -3025,14 +3081,12 @@ static inline u16 rtl8152_get_speed(struct r8152 *tp) static void rtl_eee_plus_en(struct r8152 *tp, bool enable) { - u32 ocp_data; - - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEEP_CR); if (enable) - ocp_data |= EEEP_CR_EEEP_TX; + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_EEEP_CR, + EEEP_CR_EEEP_TX); else - ocp_data &= ~EEEP_CR_EEEP_TX; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEEP_CR, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_EEEP_CR, + EEEP_CR_EEEP_TX); } static void rtl_set_eee_plus(struct r8152 *tp) @@ -3045,14 +3099,10 @@ static void rtl_set_eee_plus(struct r8152 *tp) static void rxdy_gated_en(struct r8152 *tp, bool enable) { - u32 ocp_data; - - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MISC_1); if (enable) - ocp_data |= RXDY_GATED_EN; + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_MISC_1, RXDY_GATED_EN); else - ocp_data &= ~RXDY_GATED_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MISC_1, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_MISC_1, RXDY_GATED_EN); } static int rtl_start_rx(struct r8152 *tp) @@ -3140,24 +3190,16 @@ static int rtl_stop_rx(struct r8152 *tp) static void rtl_set_ifg(struct r8152 *tp, u16 speed) { - u32 ocp_data; - - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TCR1); - ocp_data &= ~IFG_MASK; if ((speed & (_10bps | _100bps)) && !(speed & FULL_DUP)) { - ocp_data |= IFG_144NS; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_TCR1, ocp_data); + ocp_word_w0w1(tp, MCU_TYPE_PLA, PLA_TCR1, IFG_MASK, IFG_144NS); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4); - ocp_data &= ~TX10MIDLE_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, + TX10MIDLE_EN); } else { - ocp_data |= IFG_96NS; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_TCR1, ocp_data); + ocp_word_w0w1(tp, MCU_TYPE_PLA, PLA_TCR1, IFG_MASK, IFG_96NS); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4); - ocp_data |= TX10MIDLE_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, + TX10MIDLE_EN); } } @@ -3169,13 +3211,9 @@ static inline void r8153b_rx_agg_chg_indicate(struct r8152 *tp) static int rtl_enable(struct r8152 *tp) { - u32 ocp_data; - r8152b_reset_packet_filter(tp); - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CR); - ocp_data |= CR_RE | CR_TE; - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CR, ocp_data); + ocp_byte_set_bits(tp, MCU_TYPE_PLA, PLA_CR, CR_RE | CR_TE); switch (tp->version) { case RTL_VER_01: @@ -3283,8 +3321,6 @@ static void r8153_set_rx_early_size(struct r8152 *tp) static int rtl8153_enable(struct r8152 *tp) { - u32 ocp_data; - if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return -ENODEV; @@ -3298,12 +3334,9 @@ static int rtl8153_enable(struct r8152 *tp) switch (tp->version) { case RTL_VER_09: case RTL_VER_14: - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_TASK); - ocp_data &= ~FC_PATCH_TASK; - ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_FW_TASK, FC_PATCH_TASK); usleep_range(1000, 2000); - ocp_data |= FC_PATCH_TASK; - ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_FW_TASK, FC_PATCH_TASK); break; default: break; @@ -3322,9 +3355,7 @@ static void rtl_disable(struct r8152 *tp) return; } - ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR); - ocp_data &= ~RCR_ACPT_ALL; - ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); + ocp_dword_clr_bits(tp, MCU_TYPE_PLA, PLA_RCR, RCR_ACPT_ALL); rtl_drop_queued_tx(tp); @@ -3357,24 +3388,17 @@ static void rtl_disable(struct r8152 *tp) static void r8152_power_cut_en(struct r8152 *tp, bool enable) { - u32 ocp_data; - - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_UPS_CTRL); if (enable) - ocp_data |= POWER_CUT; + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_UPS_CTRL, POWER_CUT); else - ocp_data &= ~POWER_CUT; - ocp_write_word(tp, MCU_TYPE_USB, USB_UPS_CTRL, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_UPS_CTRL, POWER_CUT); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_PM_CTRL_STATUS); - ocp_data &= ~RESUME_INDICATE; - ocp_write_word(tp, MCU_TYPE_USB, USB_PM_CTRL_STATUS, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_PM_CTRL_STATUS, + RESUME_INDICATE); } static void rtl_rx_vlan_en(struct r8152 *tp, bool enable) { - u32 ocp_data; - switch (tp->version) { case RTL_VER_01: case RTL_VER_02: @@ -3386,12 +3410,12 @@ static void rtl_rx_vlan_en(struct r8152 *tp, bool enable) case RTL_VER_08: case RTL_VER_09: case RTL_VER_14: - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CPCR); if (enable) - ocp_data |= CPCR_RX_VLAN; + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_CPCR, + CPCR_RX_VLAN); else - ocp_data &= ~CPCR_RX_VLAN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_CPCR, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_CPCR, + CPCR_RX_VLAN); break; case RTL_TEST_01: @@ -3401,12 +3425,12 @@ static void rtl_rx_vlan_en(struct r8152 *tp, bool enable) case RTL_VER_13: case RTL_VER_15: default: - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_RCR1); if (enable) - ocp_data |= OUTER_VLAN | INNER_VLAN; + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_RCR1, + OUTER_VLAN | INNER_VLAN); else - ocp_data &= ~(OUTER_VLAN | INNER_VLAN); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_RCR1, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_RCR1, + OUTER_VLAN | INNER_VLAN); break; } } @@ -3467,33 +3491,33 @@ static u32 __rtl_get_wol(struct r8152 *tp) static void __rtl_set_wol(struct r8152 *tp, u32 wolopts) { - u32 ocp_data; + u16 ocp_data; ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG34); - ocp_data &= ~LINK_ON_WAKE_EN; if (wolopts & WAKE_PHY) - ocp_data |= LINK_ON_WAKE_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG34, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_CONFIG34, + LINK_ON_WAKE_EN); + else + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_CONFIG34, + LINK_ON_WAKE_EN); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG5); - ocp_data &= ~(UWF_EN | BWF_EN | MWF_EN); + ocp_data = 0; if (wolopts & WAKE_UCAST) ocp_data |= UWF_EN; if (wolopts & WAKE_BCAST) ocp_data |= BWF_EN; if (wolopts & WAKE_MCAST) ocp_data |= MWF_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG5, ocp_data); + ocp_word_w0w1(tp, MCU_TYPE_PLA, PLA_CONFIG5, UWF_EN | BWF_EN | MWF_EN, + ocp_data); ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CFG_WOL); - ocp_data &= ~MAGIC_EN; if (wolopts & WAKE_MAGIC) - ocp_data |= MAGIC_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_CFG_WOL, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_CFG_WOL, MAGIC_EN); + else + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_CFG_WOL, MAGIC_EN); if (wolopts & WAKE_ANY) device_set_wakeup_enable(&tp->udev->dev, true); @@ -3503,35 +3527,27 @@ static void __rtl_set_wol(struct r8152 *tp, u32 wolopts) static void r8153_mac_clk_speed_down(struct r8152 *tp, bool enable) { - u32 ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2); - /* MAC clock speed down */ if (enable) - ocp_data |= MAC_CLK_SPDWN_EN; + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, + MAC_CLK_SPDWN_EN); else - ocp_data &= ~MAC_CLK_SPDWN_EN; - - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, + MAC_CLK_SPDWN_EN); } static void r8156_mac_clk_spd(struct r8152 *tp, bool enable) { - u32 ocp_data; - /* MAC clock speed down */ if (enable) { /* aldps_spdwn_ratio, tp10_spdwn_ratio */ - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, - 0x0403); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, 0x0403); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2); - ocp_data &= ~EEE_SPDWN_RATIO_MASK; - ocp_data |= MAC_CLK_SPDWN_EN | 0x03; /* eee_spdwn_ratio */ - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, ocp_data); + ocp_word_w0w1(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, + EEE_SPDWN_RATIO_MASK, MAC_CLK_SPDWN_EN | 0x03); } else { - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2); - ocp_data &= ~MAC_CLK_SPDWN_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, + MAC_CLK_SPDWN_EN); } } @@ -3549,27 +3565,20 @@ static void r8153_u1u2en(struct r8152 *tp, bool enable) static void r8153b_u1u2en(struct r8152 *tp, bool enable) { - u32 ocp_data; - - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_LPM_CONFIG); if (enable) - ocp_data |= LPM_U1U2_EN; + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_LPM_CONFIG, + LPM_U1U2_EN); else - ocp_data &= ~LPM_U1U2_EN; - - ocp_write_word(tp, MCU_TYPE_USB, USB_LPM_CONFIG, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_LPM_CONFIG, + LPM_U1U2_EN); } static void r8153_u2p3en(struct r8152 *tp, bool enable) { - u32 ocp_data; - - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_U2P3_CTRL); if (enable) - ocp_data |= U2P3_ENABLE; + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_U2P3_CTRL, U2P3_ENABLE); else - ocp_data &= ~U2P3_ENABLE; - ocp_write_word(tp, MCU_TYPE_USB, USB_U2P3_CTRL, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_U2P3_CTRL, U2P3_ENABLE); } static void r8153b_ups_flags(struct r8152 *tp) @@ -3784,24 +3793,20 @@ static u16 r8153_phy_status(struct r8152 *tp, u16 desired) static void r8153b_ups_en(struct r8152 *tp, bool enable) { - u32 ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_POWER_CUT); - if (enable) { r8153b_ups_flags(tp); - ocp_data |= UPS_EN | USP_PREWAKE | PHASE2_EN; - ocp_write_byte(tp, MCU_TYPE_USB, USB_POWER_CUT, ocp_data); + ocp_byte_set_bits(tp, MCU_TYPE_USB, USB_POWER_CUT, + UPS_EN | USP_PREWAKE | PHASE2_EN); - ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_2); - ocp_data |= UPS_FORCE_PWR_DOWN; - ocp_write_byte(tp, MCU_TYPE_USB, USB_MISC_2, ocp_data); + ocp_byte_set_bits(tp, MCU_TYPE_USB, USB_MISC_2, + UPS_FORCE_PWR_DOWN); } else { - ocp_data &= ~(UPS_EN | USP_PREWAKE); - ocp_write_byte(tp, MCU_TYPE_USB, USB_POWER_CUT, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_USB, USB_POWER_CUT, + UPS_EN | USP_PREWAKE); - ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_2); - ocp_data &= ~UPS_FORCE_PWR_DOWN; - ocp_write_byte(tp, MCU_TYPE_USB, USB_MISC_2, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_USB, USB_MISC_2, + UPS_FORCE_PWR_DOWN); if (ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0) & PCUT_STATUS) { int i; @@ -3825,25 +3830,20 @@ static void r8153b_ups_en(struct r8152 *tp, bool enable) static void r8153c_ups_en(struct r8152 *tp, bool enable) { - u32 ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_POWER_CUT); - if (enable) { r8153b_ups_flags(tp); - ocp_data |= UPS_EN | USP_PREWAKE | PHASE2_EN; - ocp_write_byte(tp, MCU_TYPE_USB, USB_POWER_CUT, ocp_data); + ocp_byte_set_bits(tp, MCU_TYPE_USB, USB_POWER_CUT, + UPS_EN | USP_PREWAKE | PHASE2_EN); - ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_2); - ocp_data |= UPS_FORCE_PWR_DOWN; - ocp_data &= ~BIT(7); - ocp_write_byte(tp, MCU_TYPE_USB, USB_MISC_2, ocp_data); + ocp_byte_w0w1(tp, MCU_TYPE_USB, USB_MISC_2, BIT(7), + UPS_FORCE_PWR_DOWN); } else { - ocp_data &= ~(UPS_EN | USP_PREWAKE); - ocp_write_byte(tp, MCU_TYPE_USB, USB_POWER_CUT, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_USB, USB_POWER_CUT, + UPS_EN | USP_PREWAKE); - ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_2); - ocp_data &= ~UPS_FORCE_PWR_DOWN; - ocp_write_byte(tp, MCU_TYPE_USB, USB_MISC_2, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_USB, USB_MISC_2, + UPS_FORCE_PWR_DOWN); if (ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0) & PCUT_STATUS) { int i; @@ -3865,9 +3865,7 @@ static void r8153c_ups_en(struct r8152 *tp, bool enable) ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG34); - ocp_data |= BIT(8); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG34, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_CONFIG34, BIT(8)); ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML); } @@ -3875,35 +3873,30 @@ static void r8153c_ups_en(struct r8152 *tp, bool enable) static void r8156_ups_en(struct r8152 *tp, bool enable) { - u32 ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_POWER_CUT); - if (enable) { r8156_ups_flags(tp); - ocp_data |= UPS_EN | USP_PREWAKE | PHASE2_EN; - ocp_write_byte(tp, MCU_TYPE_USB, USB_POWER_CUT, ocp_data); + ocp_byte_set_bits(tp, MCU_TYPE_USB, USB_POWER_CUT, + UPS_EN | USP_PREWAKE | PHASE2_EN); - ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_2); - ocp_data |= UPS_FORCE_PWR_DOWN; - ocp_write_byte(tp, MCU_TYPE_USB, USB_MISC_2, ocp_data); + ocp_byte_set_bits(tp, MCU_TYPE_USB, USB_MISC_2, + UPS_FORCE_PWR_DOWN); switch (tp->version) { case RTL_VER_13: case RTL_VER_15: - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_UPHY_XTAL); - ocp_data &= ~OOBS_POLLING; - ocp_write_word(tp, MCU_TYPE_USB, USB_UPHY_XTAL, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_UPHY_XTAL, + OOBS_POLLING); break; default: break; } } else { - ocp_data &= ~(UPS_EN | USP_PREWAKE); - ocp_write_byte(tp, MCU_TYPE_USB, USB_POWER_CUT, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_USB, USB_POWER_CUT, + UPS_EN | USP_PREWAKE); - ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_2); - ocp_data &= ~UPS_FORCE_PWR_DOWN; - ocp_write_byte(tp, MCU_TYPE_USB, USB_MISC_2, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_USB, USB_MISC_2, + UPS_FORCE_PWR_DOWN); if (ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0) & PCUT_STATUS) { tp->rtl_ops.hw_phy_cfg(tp); @@ -3916,54 +3909,38 @@ static void r8156_ups_en(struct r8152 *tp, bool enable) static void r8153_power_cut_en(struct r8152 *tp, bool enable) { - u32 ocp_data; - - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_POWER_CUT); if (enable) - ocp_data |= PWR_EN | PHASE2_EN; + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_POWER_CUT, + PWR_EN | PHASE2_EN); else - ocp_data &= ~(PWR_EN | PHASE2_EN); - ocp_write_word(tp, MCU_TYPE_USB, USB_POWER_CUT, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_POWER_CUT, + PWR_EN | PHASE2_EN); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0); - ocp_data &= ~PCUT_STATUS; - ocp_write_word(tp, MCU_TYPE_USB, USB_MISC_0, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_MISC_0, PCUT_STATUS); } static void r8153b_power_cut_en(struct r8152 *tp, bool enable) { - u32 ocp_data; - - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_POWER_CUT); if (enable) - ocp_data |= PWR_EN | PHASE2_EN; + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_POWER_CUT, + PWR_EN | PHASE2_EN); else - ocp_data &= ~PWR_EN; - ocp_write_word(tp, MCU_TYPE_USB, USB_POWER_CUT, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_POWER_CUT, PWR_EN); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0); - ocp_data &= ~PCUT_STATUS; - ocp_write_word(tp, MCU_TYPE_USB, USB_MISC_0, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_MISC_0, PCUT_STATUS); } static void r8153_queue_wake(struct r8152 *tp, bool enable) { - u32 ocp_data; - - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_INDICATE_FALG); if (enable) - ocp_data |= UPCOMING_RUNTIME_D3; + ocp_byte_set_bits(tp, MCU_TYPE_PLA, PLA_INDICATE_FALG, + UPCOMING_RUNTIME_D3); else - ocp_data &= ~UPCOMING_RUNTIME_D3; - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_INDICATE_FALG, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_PLA, PLA_INDICATE_FALG, + UPCOMING_RUNTIME_D3); - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_SUSPEND_FLAG); - ocp_data &= ~LINK_CHG_EVENT; - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_SUSPEND_FLAG, ocp_data); - - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS); - ocp_data &= ~LINK_CHANGE_FLAG; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_PLA, PLA_SUSPEND_FLAG, LINK_CHG_EVENT); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, LINK_CHANGE_FLAG); } static bool rtl_can_wakeup(struct r8152 *tp) @@ -3976,27 +3953,21 @@ static bool rtl_can_wakeup(struct r8152 *tp) static void rtl_runtime_suspend_enable(struct r8152 *tp, bool enable) { if (enable) { - u32 ocp_data; - __rtl_set_wol(tp, WAKE_ANY); ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG34); - ocp_data |= LINK_OFF_WAKE_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG34, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_CONFIG34, + LINK_OFF_WAKE_EN); ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML); } else { - u32 ocp_data; - __rtl_set_wol(tp, tp->saved_wolopts); ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG34); - ocp_data &= ~LINK_OFF_WAKE_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG34, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_CONFIG34, + LINK_OFF_WAKE_EN); ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML); } @@ -4077,8 +4048,6 @@ static void rtl8156_runtime_enable(struct r8152 *tp, bool enable) static void r8153_teredo_off(struct r8152 *tp) { - u32 ocp_data; - switch (tp->version) { case RTL_VER_01: case RTL_VER_02: @@ -4087,10 +4056,9 @@ static void r8153_teredo_off(struct r8152 *tp) case RTL_VER_05: case RTL_VER_06: case RTL_VER_07: - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TEREDO_CFG); - ocp_data &= ~(TEREDO_SEL | TEREDO_RS_EVENT_MASK | - OOB_TEREDO_EN); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_TEREDO_CFG, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_TEREDO_CFG, + TEREDO_SEL | TEREDO_RS_EVENT_MASK | + OOB_TEREDO_EN); break; case RTL_VER_08: @@ -4117,13 +4085,10 @@ static void r8153_teredo_off(struct r8152 *tp) static void rtl_reset_bmu(struct r8152 *tp) { - u32 ocp_data; - - ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_BMU_RESET); - ocp_data &= ~(BMU_RESET_EP_IN | BMU_RESET_EP_OUT); - ocp_write_byte(tp, MCU_TYPE_USB, USB_BMU_RESET, ocp_data); - ocp_data |= BMU_RESET_EP_IN | BMU_RESET_EP_OUT; - ocp_write_byte(tp, MCU_TYPE_USB, USB_BMU_RESET, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_USB, USB_BMU_RESET, + BMU_RESET_EP_IN | BMU_RESET_EP_OUT); + ocp_byte_set_bits(tp, MCU_TYPE_USB, USB_BMU_RESET, + BMU_RESET_EP_IN | BMU_RESET_EP_OUT); } /* Clear the bp to stop the firmware before loading a new one */ @@ -4944,7 +4909,7 @@ static void rtl_ram_code_speed_up(struct r8152 *tp, struct fw_phy_speed_up *phy, return; while (len) { - u32 ocp_data, size; + u32 size; int i; if (len < 2048) @@ -4952,18 +4917,16 @@ static void rtl_ram_code_speed_up(struct r8152 *tp, struct fw_phy_speed_up *phy, else size = 2048; - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_GPHY_CTRL); - ocp_data |= GPHY_PATCH_DONE | BACKUP_RESTRORE; - ocp_write_word(tp, MCU_TYPE_USB, USB_GPHY_CTRL, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_GPHY_CTRL, + GPHY_PATCH_DONE | BACKUP_RESTRORE); generic_ocp_write(tp, __le16_to_cpu(phy->fw_reg), 0xff, size, data, MCU_TYPE_USB); data += size; len -= size; - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_POL_GPIO_CTRL); - ocp_data |= POL_GPHY_PATCH; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_POL_GPIO_CTRL, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_POL_GPIO_CTRL, + POL_GPHY_PATCH); for (i = 0; i < 1000; i++) { if (!(ocp_read_word(tp, MCU_TYPE_PLA, PLA_POL_GPIO_CTRL) & POL_GPHY_PATCH)) @@ -5337,21 +5300,21 @@ static void r8152_mmd_write(struct r8152 *tp, u16 dev, u16 reg, u16 data) static void r8152_eee_en(struct r8152 *tp, bool enable) { u16 config1, config2, config3; - u32 ocp_data; - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR); config1 = ocp_reg_read(tp, OCP_EEE_CONFIG1) & ~sd_rise_time_mask; config2 = ocp_reg_read(tp, OCP_EEE_CONFIG2); config3 = ocp_reg_read(tp, OCP_EEE_CONFIG3) & ~fast_snr_mask; if (enable) { - ocp_data |= EEE_RX_EN | EEE_TX_EN; + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_EEE_CR, + EEE_RX_EN | EEE_TX_EN); config1 |= EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | RX_QUIET_EN; config1 |= sd_rise_time(1); config2 |= RG_DACQUIET_EN | RG_LDVQUIET_EN; config3 |= fast_snr(42); } else { - ocp_data &= ~(EEE_RX_EN | EEE_TX_EN); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_EEE_CR, + EEE_RX_EN | EEE_TX_EN); config1 &= ~(EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | RX_QUIET_EN); config1 |= sd_rise_time(7); @@ -5359,7 +5322,6 @@ static void r8152_eee_en(struct r8152 *tp, bool enable) config3 |= fast_snr(511); } - ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data); ocp_reg_write(tp, OCP_EEE_CONFIG1, config1); ocp_reg_write(tp, OCP_EEE_CONFIG2, config2); ocp_reg_write(tp, OCP_EEE_CONFIG3, config3); @@ -5367,21 +5329,20 @@ static void r8152_eee_en(struct r8152 *tp, bool enable) static void r8153_eee_en(struct r8152 *tp, bool enable) { - u32 ocp_data; u16 config; - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR); config = ocp_reg_read(tp, OCP_EEE_CFG); if (enable) { - ocp_data |= EEE_RX_EN | EEE_TX_EN; + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_EEE_CR, + EEE_RX_EN | EEE_TX_EN); config |= EEE10_EN; } else { - ocp_data &= ~(EEE_RX_EN | EEE_TX_EN); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_EEE_CR, + EEE_RX_EN | EEE_TX_EN); config &= ~EEE10_EN; } - ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data); ocp_reg_write(tp, OCP_EEE_CFG, config); tp->ups_info.eee = enable; @@ -5512,30 +5473,20 @@ static void r8156b_wait_loading_flash(struct r8152 *tp) static void r8152b_exit_oob(struct r8152 *tp) { - u32 ocp_data; - - ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR); - ocp_data &= ~RCR_ACPT_ALL; - ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); + ocp_dword_clr_bits(tp, MCU_TYPE_PLA, PLA_RCR, RCR_ACPT_ALL); rxdy_gated_en(tp, true); r8153_teredo_off(tp); ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML); ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CR, 0x00); - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); - ocp_data &= ~NOW_IS_OOB; - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, NOW_IS_OOB); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7); - ocp_data &= ~MCU_BORW_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, MCU_BORW_EN); wait_oob_link_list_ready(tp); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7); - ocp_data |= RE_INIT_LL; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, RE_INIT_LL); wait_oob_link_list_ready(tp); @@ -5571,18 +5522,12 @@ static void r8152b_exit_oob(struct r8152 *tp) ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, RTL8152_RMS); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TCR0); - ocp_data |= TCR0_AUTO_FIFO; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_TCR0, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_TCR0, TCR0_AUTO_FIFO); } static void r8152b_enter_oob(struct r8152 *tp) { - u32 ocp_data; - - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); - ocp_data &= ~NOW_IS_OOB; - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, NOW_IS_OOB); ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL0, RXFIFO_THR1_OOB); ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL1, RXFIFO_THR2_OOB); @@ -5592,9 +5537,7 @@ static void r8152b_enter_oob(struct r8152 *tp) wait_oob_link_list_ready(tp); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7); - ocp_data |= RE_INIT_LL; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, RE_INIT_LL); wait_oob_link_list_ready(tp); @@ -5602,19 +5545,15 @@ static void r8152b_enter_oob(struct r8152 *tp) rtl_rx_vlan_en(tp, true); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_BDC_CR); - ocp_data |= ALDPS_PROXY_MODE; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_BDC_CR, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_BDC_CR, ALDPS_PROXY_MODE); - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); - ocp_data |= NOW_IS_OOB | DIS_MCU_CLROOB; - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); + ocp_byte_set_bits(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, + NOW_IS_OOB | DIS_MCU_CLROOB); rxdy_gated_en(tp, false); - ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR); - ocp_data |= RCR_APM | RCR_AM | RCR_AB; - ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); + ocp_dword_set_bits(tp, MCU_TYPE_PLA, PLA_RCR, + RCR_APM | RCR_AM | RCR_AB); } static int r8153_pre_firmware_1(struct r8152 *tp) @@ -5649,27 +5588,18 @@ static int r8153_post_firmware_1(struct r8152 *tp) static int r8153_pre_firmware_2(struct r8152 *tp) { - u32 ocp_data; - r8153_pre_firmware_1(tp); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN0); - ocp_data &= ~FW_FIX_SUSPEND; - ocp_write_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN0, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_FW_FIX_EN0, FW_FIX_SUSPEND); return 0; } static int r8153_post_firmware_2(struct r8152 *tp) { - u32 ocp_data; - /* enable bp0 if support USB_SPEED_SUPER only */ - if (ocp_read_byte(tp, MCU_TYPE_USB, USB_CSTMR) & FORCE_SUPER) { - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_BP_EN); - ocp_data |= BIT(0); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_BP_EN, ocp_data); - } + if (ocp_read_byte(tp, MCU_TYPE_USB, USB_CSTMR) & FORCE_SUPER) + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_BP_EN, BIT(0)); /* reset UPHY timer to 36 ms */ ocp_write_word(tp, MCU_TYPE_PLA, PLA_UPHY_TIMER, 36000 / 16); @@ -5677,28 +5607,20 @@ static int r8153_post_firmware_2(struct r8152 *tp) /* enable U3P3 check, set the counter to 4 */ ocp_write_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, U3P3_CHECK_EN | 4); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN0); - ocp_data |= FW_FIX_SUSPEND; - ocp_write_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN0, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_FW_FIX_EN0, FW_FIX_SUSPEND); - ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_USB2PHY); - ocp_data |= USB2PHY_L1 | USB2PHY_SUSPEND; - ocp_write_byte(tp, MCU_TYPE_USB, USB_USB2PHY, ocp_data); + ocp_byte_set_bits(tp, MCU_TYPE_USB, USB_USB2PHY, + USB2PHY_L1 | USB2PHY_SUSPEND); return 0; } static int r8153_post_firmware_3(struct r8152 *tp) { - u32 ocp_data; + ocp_byte_set_bits(tp, MCU_TYPE_USB, USB_USB2PHY, + USB2PHY_L1 | USB2PHY_SUSPEND); - ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_USB2PHY); - ocp_data |= USB2PHY_L1 | USB2PHY_SUSPEND; - ocp_write_byte(tp, MCU_TYPE_USB, USB_USB2PHY, ocp_data); - - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN1); - ocp_data |= FW_IP_RESET_EN; - ocp_write_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN1, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_FW_FIX_EN1, FW_IP_RESET_EN); return 0; } @@ -5718,49 +5640,30 @@ static int r8153b_post_firmware_1(struct r8152 *tp) /* enable bp0 for RTL8153-BND */ ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_1); - if (ocp_data & BND_MASK) { - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_BP_EN); - ocp_data |= BIT(0); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_BP_EN, ocp_data); - } + if (ocp_data & BND_MASK) + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_BP_EN, BIT(0)); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_CTRL); - ocp_data |= FLOW_CTRL_PATCH_OPT; - ocp_write_word(tp, MCU_TYPE_USB, USB_FW_CTRL, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_FW_CTRL, FLOW_CTRL_PATCH_OPT); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_TASK); - ocp_data |= FC_PATCH_TASK; - ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_FW_TASK, FC_PATCH_TASK); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN1); - ocp_data |= FW_IP_RESET_EN; - ocp_write_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN1, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_FW_FIX_EN1, FW_IP_RESET_EN); return 0; } static int r8153c_post_firmware_1(struct r8152 *tp) { - u32 ocp_data; + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_FW_CTRL, FLOW_CTRL_PATCH_2); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_CTRL); - ocp_data |= FLOW_CTRL_PATCH_2; - ocp_write_word(tp, MCU_TYPE_USB, USB_FW_CTRL, ocp_data); - - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_TASK); - ocp_data |= FC_PATCH_TASK; - ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_FW_TASK, FC_PATCH_TASK); return 0; } static int r8156a_post_firmware_1(struct r8152 *tp) { - u32 ocp_data; - - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN1); - ocp_data |= FW_IP_RESET_EN; - ocp_write_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN1, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_FW_FIX_EN1, FW_IP_RESET_EN); /* Modify U3PHY parameter for compatibility issue */ ocp_write_dword(tp, MCU_TYPE_USB, USB_UPHY3_MDCMDIO, 0x4026840e); @@ -5796,7 +5699,6 @@ static void r8153_aldps_en(struct r8152 *tp, bool enable) static void r8153_hw_phy_cfg(struct r8152 *tp) { - u32 ocp_data; u16 data; /* disable ALDPS before updating the PHY parameters */ @@ -5823,11 +5725,10 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) data = ocp_reg_read(tp, OCP_POWER_CFG); data |= EN_10M_PLLOFF; ocp_reg_write(tp, OCP_POWER_CFG, data); + sram_write(tp, SRAM_IMPEDANCE, 0x0b13); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR); - ocp_data |= PFM_PWM_SWITCH; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_PHY_PWR, PFM_PWM_SWITCH); /* Enable LPF corner auto tune */ sram_write(tp, SRAM_LPF_CFG, 0xf70f); @@ -5873,11 +5774,7 @@ static void r8153b_hw_phy_cfg(struct r8152 *tp) u32 ocp_data; u16 data; - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0); - if (ocp_data & PCUT_STATUS) { - ocp_data &= ~PCUT_STATUS; - ocp_write_word(tp, MCU_TYPE_USB, USB_MISC_0, ocp_data); - } + ocp_word_test_and_clr_bits(tp, MCU_TYPE_USB, USB_MISC_0, PCUT_STATUS); /* disable ALDPS before updating the PHY parameters */ r8153_aldps_en(tp, false); @@ -5934,14 +5831,11 @@ static void r8153b_hw_phy_cfg(struct r8152 *tp) u32 swr_cnt_1ms_ini; swr_cnt_1ms_ini = (16000000 / ocp_data) & SAW_CNT_1MS_MASK; - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_UPS_CFG); - ocp_data = (ocp_data & ~SAW_CNT_1MS_MASK) | swr_cnt_1ms_ini; - ocp_write_word(tp, MCU_TYPE_USB, USB_UPS_CFG, ocp_data); + ocp_word_w0w1(tp, MCU_TYPE_USB, USB_UPS_CFG, SAW_CNT_1MS_MASK, + swr_cnt_1ms_ini); } - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR); - ocp_data |= PFM_PWM_SWITCH; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_PHY_PWR, PFM_PWM_SWITCH); /* Advnace EEE */ if (!rtl_phy_patch_request(tp, true, true)) { @@ -5988,31 +5882,21 @@ static void rtl8153_change_mtu(struct r8152 *tp) static void r8153_first_init(struct r8152 *tp) { - u32 ocp_data; - rxdy_gated_en(tp, true); r8153_teredo_off(tp); - ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR); - ocp_data &= ~RCR_ACPT_ALL; - ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); + ocp_dword_clr_bits(tp, MCU_TYPE_PLA, PLA_RCR, RCR_ACPT_ALL); rtl8152_nic_reset(tp); rtl_reset_bmu(tp); - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); - ocp_data &= ~NOW_IS_OOB; - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, NOW_IS_OOB); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7); - ocp_data &= ~MCU_BORW_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, MCU_BORW_EN); wait_oob_link_list_ready(tp); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7); - ocp_data |= RE_INIT_LL; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, RE_INIT_LL); wait_oob_link_list_ready(tp); @@ -6020,9 +5904,7 @@ static void r8153_first_init(struct r8152 *tp) rtl8153_change_mtu(tp); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TCR0); - ocp_data |= TCR0_AUTO_FIFO; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_TCR0, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_TCR0, TCR0_AUTO_FIFO); rtl8152_nic_reset(tp); @@ -6036,11 +5918,7 @@ static void r8153_first_init(struct r8152 *tp) static void r8153_enter_oob(struct r8152 *tp) { - u32 ocp_data; - - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); - ocp_data &= ~NOW_IS_OOB; - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, NOW_IS_OOB); /* RX FIFO settings for OOB */ ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL0, RXFIFO_THR1_OOB); @@ -6052,9 +5930,7 @@ static void r8153_enter_oob(struct r8152 *tp) wait_oob_link_list_ready(tp); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7); - ocp_data |= RE_INIT_LL; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, RE_INIT_LL); wait_oob_link_list_ready(tp); @@ -6066,9 +5942,8 @@ static void r8153_enter_oob(struct r8152 *tp) case RTL_VER_04: case RTL_VER_05: case RTL_VER_06: - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TEREDO_CFG); - ocp_data &= ~TEREDO_WAKE_MASK; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_TEREDO_CFG, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_TEREDO_CFG, + TEREDO_WAKE_MASK); break; case RTL_VER_08: @@ -6087,23 +5962,17 @@ static void r8153_enter_oob(struct r8152 *tp) rtl_rx_vlan_en(tp, true); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_BDC_CR); - ocp_data |= ALDPS_PROXY_MODE; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_BDC_CR, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_BDC_CR, ALDPS_PROXY_MODE); - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); - ocp_data |= NOW_IS_OOB | DIS_MCU_CLROOB; - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); + ocp_byte_set_bits(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, + NOW_IS_OOB | DIS_MCU_CLROOB); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7); - ocp_data |= MCU_BORW_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, MCU_BORW_EN); rxdy_gated_en(tp, false); - ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR); - ocp_data |= RCR_APM | RCR_AM | RCR_AB; - ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); + ocp_dword_set_bits(tp, MCU_TYPE_PLA, PLA_RCR, + RCR_APM | RCR_AM | RCR_AB); } static void rtl8153_disable(struct r8152 *tp) @@ -6135,7 +6004,6 @@ static void r8156_fc_parameter(struct r8152 *tp) static int rtl8156_enable(struct r8152 *tp) { - u32 ocp_data; u16 speed; if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) @@ -6150,12 +6018,12 @@ static int rtl8156_enable(struct r8152 *tp) speed = rtl8152_get_speed(tp); rtl_set_ifg(tp, speed); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4); if (speed & _2500bps) - ocp_data &= ~IDLE_SPDWN_EN; + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, + IDLE_SPDWN_EN); else - ocp_data |= IDLE_SPDWN_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, + IDLE_SPDWN_EN); if (speed & _1000bps) ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_TXTWSYS, 0x11); @@ -6164,21 +6032,15 @@ static int rtl8156_enable(struct r8152 *tp) if (tp->udev->speed == USB_SPEED_HIGH) { /* USB 0xb45e[3:0] l1_nyet_hird */ - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_L1_CTRL); - ocp_data &= ~0xf; if (is_flow_control(speed)) - ocp_data |= 0xf; + ocp_word_w0w1(tp, MCU_TYPE_USB, USB_L1_CTRL, 0xf, 0xf); else - ocp_data |= 0x1; - ocp_write_word(tp, MCU_TYPE_USB, USB_L1_CTRL, ocp_data); + ocp_word_w0w1(tp, MCU_TYPE_USB, USB_L1_CTRL, 0xf, 0x1); } - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_TASK); - ocp_data &= ~FC_PATCH_TASK; - ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_FW_TASK, FC_PATCH_TASK); usleep_range(1000, 2000); - ocp_data |= FC_PATCH_TASK; - ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_FW_TASK, FC_PATCH_TASK); return rtl_enable(tp); } @@ -6193,7 +6055,6 @@ static void rtl8156_disable(struct r8152 *tp) static int rtl8156b_enable(struct r8152 *tp) { - u32 ocp_data; u16 speed; if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) @@ -6202,9 +6063,7 @@ static int rtl8156b_enable(struct r8152 *tp) set_tx_qlen(tp); rtl_set_eee_plus(tp); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_RX_AGGR_NUM); - ocp_data &= ~RX_AGGR_NUM_MASK; - ocp_write_word(tp, MCU_TYPE_USB, USB_RX_AGGR_NUM, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_RX_AGGR_NUM, RX_AGGR_NUM_MASK); r8153_set_rx_early_timeout(tp); r8153_set_rx_early_size(tp); @@ -6212,29 +6071,23 @@ static int rtl8156b_enable(struct r8152 *tp) speed = rtl8152_get_speed(tp); rtl_set_ifg(tp, speed); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4); if (speed & _2500bps) - ocp_data &= ~IDLE_SPDWN_EN; + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, + IDLE_SPDWN_EN); else - ocp_data |= IDLE_SPDWN_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, + IDLE_SPDWN_EN); if (tp->udev->speed == USB_SPEED_HIGH) { - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_L1_CTRL); - ocp_data &= ~0xf; if (is_flow_control(speed)) - ocp_data |= 0xf; + ocp_word_w0w1(tp, MCU_TYPE_USB, USB_L1_CTRL, 0xf, 0xf); else - ocp_data |= 0x1; - ocp_write_word(tp, MCU_TYPE_USB, USB_L1_CTRL, ocp_data); + ocp_word_w0w1(tp, MCU_TYPE_USB, USB_L1_CTRL, 0xf, 0x1); } - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_TASK); - ocp_data &= ~FC_PATCH_TASK; - ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_FW_TASK, FC_PATCH_TASK); usleep_range(1000, 2000); - ocp_data |= FC_PATCH_TASK; - ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_FW_TASK, FC_PATCH_TASK); return rtl_enable(tp); } @@ -6405,8 +6258,6 @@ static void rtl8152_down(struct r8152 *tp) static void rtl8153_up(struct r8152 *tp) { - u32 ocp_data; - if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; @@ -6415,17 +6266,11 @@ static void rtl8153_up(struct r8152 *tp) r8153_aldps_en(tp, false); r8153_first_init(tp); - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CONFIG6); - ocp_data |= LANWAKE_CLR_EN; - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CONFIG6, ocp_data); + ocp_byte_set_bits(tp, MCU_TYPE_PLA, PLA_CONFIG6, LANWAKE_CLR_EN); - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_LWAKE_CTRL_REG); - ocp_data &= ~LANWAKE_PIN; - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_LWAKE_CTRL_REG, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_PLA, PLA_LWAKE_CTRL_REG, LANWAKE_PIN); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_SSPHYLINK1); - ocp_data &= ~DELAY_PHY_PWR_CHG; - ocp_write_word(tp, MCU_TYPE_USB, USB_SSPHYLINK1, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_SSPHYLINK1, DELAY_PHY_PWR_CHG); r8153_aldps_en(tp, true); @@ -6445,16 +6290,12 @@ static void rtl8153_up(struct r8152 *tp) static void rtl8153_down(struct r8152 *tp) { - u32 ocp_data; - if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) { rtl_drop_queued_tx(tp); return; } - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CONFIG6); - ocp_data &= ~LANWAKE_CLR_EN; - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CONFIG6, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_PLA, PLA_CONFIG6, LANWAKE_CLR_EN); r8153_u1u2en(tp, false); r8153_u2p3en(tp, false); @@ -6466,8 +6307,6 @@ static void rtl8153_down(struct r8152 *tp) static void rtl8153b_up(struct r8152 *tp) { - u32 ocp_data; - if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; @@ -6478,9 +6317,8 @@ static void rtl8153b_up(struct r8152 *tp) r8153_first_init(tp); ocp_write_dword(tp, MCU_TYPE_USB, USB_RX_BUF_TH, RX_THR_B); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3); - ocp_data &= ~PLA_MCU_SPDWN_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, + PLA_MCU_SPDWN_EN); r8153_aldps_en(tp, true); @@ -6490,16 +6328,13 @@ static void rtl8153b_up(struct r8152 *tp) static void rtl8153b_down(struct r8152 *tp) { - u32 ocp_data; - if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) { rtl_drop_queued_tx(tp); return; } - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3); - ocp_data |= PLA_MCU_SPDWN_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, + PLA_MCU_SPDWN_EN); r8153b_u1u2en(tp, false); r8153_u2p3en(tp, false); @@ -6527,8 +6362,6 @@ static void rtl8153c_change_mtu(struct r8152 *tp) static void rtl8153c_up(struct r8152 *tp) { - u32 ocp_data; - if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; @@ -6539,26 +6372,18 @@ static void rtl8153c_up(struct r8152 *tp) rxdy_gated_en(tp, true); r8153_teredo_off(tp); - ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR); - ocp_data &= ~RCR_ACPT_ALL; - ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); + ocp_dword_clr_bits(tp, MCU_TYPE_PLA, PLA_RCR, RCR_ACPT_ALL); rtl8152_nic_reset(tp); rtl_reset_bmu(tp); - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); - ocp_data &= ~NOW_IS_OOB; - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, NOW_IS_OOB); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7); - ocp_data &= ~MCU_BORW_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, MCU_BORW_EN); wait_oob_link_list_ready(tp); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7); - ocp_data |= RE_INIT_LL; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, RE_INIT_LL); wait_oob_link_list_ready(tp); @@ -6578,15 +6403,12 @@ static void rtl8153c_up(struct r8152 *tp) ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG34); - ocp_data |= BIT(8); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG34, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_CONFIG34, BIT(8)); ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3); - ocp_data &= ~PLA_MCU_SPDWN_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, + PLA_MCU_SPDWN_EN); r8153_aldps_en(tp, true); r8153b_u1u2en(tp, true); @@ -6608,8 +6430,6 @@ static void rtl8156_change_mtu(struct r8152 *tp) static void rtl8156_up(struct r8152 *tp) { - u32 ocp_data; - if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; @@ -6620,20 +6440,14 @@ static void rtl8156_up(struct r8152 *tp) rxdy_gated_en(tp, true); r8153_teredo_off(tp); - ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR); - ocp_data &= ~RCR_ACPT_ALL; - ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); + ocp_dword_clr_bits(tp, MCU_TYPE_PLA, PLA_RCR, RCR_ACPT_ALL); rtl8152_nic_reset(tp); rtl_reset_bmu(tp); - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); - ocp_data &= ~NOW_IS_OOB; - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, NOW_IS_OOB); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7); - ocp_data &= ~MCU_BORW_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, MCU_BORW_EN); rtl_rx_vlan_en(tp, tp->netdev->features & NETIF_F_HW_VLAN_CTAG_RX); @@ -6643,27 +6457,21 @@ static void rtl8156_up(struct r8152 *tp) case RTL_TEST_01: case RTL_VER_10: case RTL_VER_11: - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_BMU_CONFIG); - ocp_data |= ACT_ODMA; - ocp_write_word(tp, MCU_TYPE_USB, USB_BMU_CONFIG, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_BMU_CONFIG, ACT_ODMA); break; default: break; } /* share FIFO settings */ - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_FULL); - ocp_data &= ~RXFIFO_FULL_MASK; - ocp_data |= 0x08; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_FULL, ocp_data); + ocp_word_w0w1(tp, MCU_TYPE_PLA, PLA_RXFIFO_FULL, RXFIFO_FULL_MASK, + 0x08); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3); - ocp_data &= ~PLA_MCU_SPDWN_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, + PLA_MCU_SPDWN_EN); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_SPEED_OPTION); - ocp_data &= ~(RG_PWRDN_EN | ALL_SPEED_OFF); - ocp_write_word(tp, MCU_TYPE_USB, USB_SPEED_OPTION, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_SPEED_OPTION, + RG_PWRDN_EN | ALL_SPEED_OFF); ocp_write_dword(tp, MCU_TYPE_USB, USB_RX_BUF_TH, 0x00600400); @@ -6681,25 +6489,20 @@ static void rtl8156_up(struct r8152 *tp) static void rtl8156_down(struct r8152 *tp) { - u32 ocp_data; - if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) { rtl_drop_queued_tx(tp); return; } - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3); - ocp_data |= PLA_MCU_SPDWN_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, + PLA_MCU_SPDWN_EN); r8153b_u1u2en(tp, false); r8153_u2p3en(tp, false); r8153b_power_cut_en(tp, false); r8153_aldps_en(tp, false); - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); - ocp_data &= ~NOW_IS_OOB; - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, NOW_IS_OOB); /* RX FIFO settings for OOB */ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_FULL, 64 / 16); @@ -6718,20 +6521,15 @@ static void rtl8156_down(struct r8152 *tp) */ ocp_write_word(tp, MCU_TYPE_PLA, PLA_TEREDO_WAKE_BASE, 0x00ff); - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); - ocp_data |= NOW_IS_OOB; - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); + ocp_byte_set_bits(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, NOW_IS_OOB); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7); - ocp_data |= MCU_BORW_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, MCU_BORW_EN); rtl_rx_vlan_en(tp, true); rxdy_gated_en(tp, false); - ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR); - ocp_data |= RCR_APM | RCR_AM | RCR_AB; - ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); + ocp_dword_set_bits(tp, MCU_TYPE_PLA, PLA_RCR, + RCR_APM | RCR_AM | RCR_AB); r8153_aldps_en(tp, true); } @@ -7008,11 +6806,7 @@ static int rtl8152_close(struct net_device *netdev) static void rtl_tally_reset(struct r8152 *tp) { - u32 ocp_data; - - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_RSTTALLY); - ocp_data |= TALLY_RESET; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_RSTTALLY, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_RSTTALLY, TALLY_RESET); } static void r8152b_init(struct r8152 *tp) @@ -7031,21 +6825,18 @@ static void r8152b_init(struct r8152 *tp) r8152_aldps_en(tp, false); - if (tp->version == RTL_VER_01) { - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_LED_FEATURE); - ocp_data &= ~LED_MODE_MASK; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_LED_FEATURE, ocp_data); - } + if (tp->version == RTL_VER_01) + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_LED_FEATURE, + LED_MODE_MASK); r8152_power_cut_en(tp, false); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR); - ocp_data |= TX_10M_IDLE_EN | PFM_PWM_SWITCH; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR, ocp_data); - ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL); - ocp_data &= ~MCU_CLK_RATIO_MASK; - ocp_data |= MCU_CLK_RATIO | D3_CLK_GATED_EN; - ocp_write_dword(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_PHY_PWR, + TX_10M_IDLE_EN | PFM_PWM_SWITCH); + + ocp_dword_w0w1(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, MCU_CLK_RATIO_MASK, + MCU_CLK_RATIO | D3_CLK_GATED_EN); + ocp_data = GPHY_STS_MSK | SPEED_DOWN_MSK | SPDWN_RXDV_MSK | SPDWN_LINKCHG_MSK; ocp_write_word(tp, MCU_TYPE_PLA, PLA_GPHY_INTR_IMR, ocp_data); @@ -7053,9 +6844,8 @@ static void r8152b_init(struct r8152 *tp) rtl_tally_reset(tp); /* enable rx aggregation */ - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL); - ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN); - ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_USB_CTRL, + RX_AGG_DISABLE | RX_ZERO_EN); } static void r8153_init(struct r8152 *tp) @@ -7096,55 +6886,43 @@ static void r8153_init(struct r8152 *tp) r8153_u2p3en(tp, false); if (tp->version == RTL_VER_04) { - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_SSPHYLINK2); - ocp_data &= ~pwd_dn_scale_mask; - ocp_data |= pwd_dn_scale(96); - ocp_write_word(tp, MCU_TYPE_USB, USB_SSPHYLINK2, ocp_data); + ocp_word_w0w1(tp, MCU_TYPE_USB, USB_SSPHYLINK2, + pwd_dn_scale_mask, pwd_dn_scale(96)); - ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_USB2PHY); - ocp_data |= USB2PHY_L1 | USB2PHY_SUSPEND; - ocp_write_byte(tp, MCU_TYPE_USB, USB_USB2PHY, ocp_data); + ocp_byte_set_bits(tp, MCU_TYPE_USB, USB_USB2PHY, + USB2PHY_L1 | USB2PHY_SUSPEND); } else if (tp->version == RTL_VER_05) { - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_DMY_REG0); - ocp_data &= ~ECM_ALDPS; - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_DMY_REG0, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_PLA, PLA_DMY_REG0, ECM_ALDPS); - ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY1); if (ocp_read_word(tp, MCU_TYPE_USB, USB_BURST_SIZE) == 0) - ocp_data &= ~DYNAMIC_BURST; + ocp_byte_clr_bits(tp, MCU_TYPE_USB, USB_CSR_DUMMY1, + DYNAMIC_BURST); else - ocp_data |= DYNAMIC_BURST; - ocp_write_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY1, ocp_data); + ocp_byte_set_bits(tp, MCU_TYPE_USB, USB_CSR_DUMMY1, + DYNAMIC_BURST); } else if (tp->version == RTL_VER_06) { - ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY1); if (ocp_read_word(tp, MCU_TYPE_USB, USB_BURST_SIZE) == 0) - ocp_data &= ~DYNAMIC_BURST; + ocp_byte_clr_bits(tp, MCU_TYPE_USB, USB_CSR_DUMMY1, + DYNAMIC_BURST); else - ocp_data |= DYNAMIC_BURST; - ocp_write_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY1, ocp_data); + ocp_byte_set_bits(tp, MCU_TYPE_USB, USB_CSR_DUMMY1, + DYNAMIC_BURST); r8153_queue_wake(tp, false); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS); if (rtl8152_get_speed(tp) & LINK_STATUS) - ocp_data |= CUR_LINK_OK; + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, + CUR_LINK_OK | POLL_LINK_CHG); else - ocp_data &= ~CUR_LINK_OK; - ocp_data |= POLL_LINK_CHG; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, ocp_data); + ocp_word_w0w1(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, + CUR_LINK_OK, POLL_LINK_CHG); } - ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY2); - ocp_data |= EP4_FULL_FC; - ocp_write_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY2, ocp_data); + ocp_byte_set_bits(tp, MCU_TYPE_USB, USB_CSR_DUMMY2, EP4_FULL_FC); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_WDT11_CTRL); - ocp_data &= ~TIMER11_EN; - ocp_write_word(tp, MCU_TYPE_USB, USB_WDT11_CTRL, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_WDT11_CTRL, TIMER11_EN); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_LED_FEATURE); - ocp_data &= ~LED_MODE_MASK; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_LED_FEATURE, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_LED_FEATURE, LED_MODE_MASK); ocp_data = FIFO_EMPTY_1FB | ROK_EXIT_LPM; if (tp->version == RTL_VER_04 && tp->udev->speed < USB_SPEED_SUPER) @@ -7153,10 +6931,8 @@ static void r8153_init(struct r8152 *tp) ocp_data |= LPM_TIMER_500US; ocp_write_byte(tp, MCU_TYPE_USB, USB_LPM_CTRL, ocp_data); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_AFE_CTRL2); - ocp_data &= ~SEN_VAL_MASK; - ocp_data |= SEN_VAL_NORMAL | SEL_RXIDLE; - ocp_write_word(tp, MCU_TYPE_USB, USB_AFE_CTRL2, ocp_data); + ocp_word_w0w1(tp, MCU_TYPE_USB, USB_AFE_CTRL2, SEN_VAL_MASK, + SEN_VAL_NORMAL | SEL_RXIDLE); ocp_write_word(tp, MCU_TYPE_USB, USB_CONNECT_TIMER, 0x0001); @@ -7166,21 +6942,17 @@ static void r8153_init(struct r8152 *tp) r8153_u1u2en(tp, true); usb_enable_lpm(tp->udev); - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CONFIG6); - ocp_data |= LANWAKE_CLR_EN; - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CONFIG6, ocp_data); + ocp_byte_set_bits(tp, MCU_TYPE_PLA, PLA_CONFIG6, LANWAKE_CLR_EN); - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_LWAKE_CTRL_REG); - ocp_data &= ~LANWAKE_PIN; - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_LWAKE_CTRL_REG, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_PLA, PLA_LWAKE_CTRL_REG, LANWAKE_PIN); /* rx aggregation */ - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL); - ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN); if (tp->dell_tb_rx_agg_bug) - ocp_data |= RX_AGG_DISABLE; - - ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data); + ocp_word_w0w1(tp, MCU_TYPE_USB, USB_USB_CTRL, RX_ZERO_EN, + RX_AGG_DISABLE); + else + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_USB_CTRL, + RX_AGG_DISABLE | RX_ZERO_EN); rtl_tally_reset(tp); @@ -7200,7 +6972,6 @@ static void r8153_init(struct r8152 *tp) static void r8153b_init(struct r8152 *tp) { - u32 ocp_data; u16 data; int i; @@ -7239,13 +7010,12 @@ static void r8153b_init(struct r8152 *tp) r8153_queue_wake(tp, false); rtl_runtime_suspend_enable(tp, false); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS); if (rtl8152_get_speed(tp) & LINK_STATUS) - ocp_data |= CUR_LINK_OK; + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, + CUR_LINK_OK | POLL_LINK_CHG); else - ocp_data &= ~CUR_LINK_OK; - ocp_data |= POLL_LINK_CHG; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, ocp_data); + ocp_word_w0w1(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, CUR_LINK_OK, + POLL_LINK_CHG); if (tp->udev->speed >= USB_SPEED_SUPER) r8153b_u1u2en(tp, true); @@ -7255,25 +7025,20 @@ static void r8153b_init(struct r8152 *tp) /* MAC clock speed down */ r8153_mac_clk_speed_down(tp, true); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3); - ocp_data &= ~PLA_MCU_SPDWN_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, + PLA_MCU_SPDWN_EN); - if (tp->version == RTL_VER_09) { + if (tp->version == RTL_VER_09) /* Disable Test IO for 32QFN */ - if (ocp_read_byte(tp, MCU_TYPE_PLA, 0xdc00) & BIT(5)) { - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR); - ocp_data |= TEST_IO_OFF; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR, ocp_data); - } - } + if (ocp_read_byte(tp, MCU_TYPE_PLA, 0xdc00) & BIT(5)) + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_PHY_PWR, + TEST_IO_OFF); set_bit(GREEN_ETHERNET, &tp->flags); /* rx aggregation */ - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL); - ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN); - ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_USB_CTRL, + RX_AGG_DISABLE | RX_ZERO_EN); rtl_tally_reset(tp); @@ -7282,7 +7047,6 @@ static void r8153b_init(struct r8152 *tp) static void r8153c_init(struct r8152 *tp) { - u32 ocp_data; u16 data; int i; @@ -7293,12 +7057,10 @@ static void r8153c_init(struct r8152 *tp) /* Disable spi_en */ ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG5); - ocp_data &= ~BIT(3); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG5, ocp_data); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, 0xcbf0); - ocp_data |= BIT(1); - ocp_write_word(tp, MCU_TYPE_USB, 0xcbf0, ocp_data); + + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_CONFIG5, BIT(3)); + + ocp_word_set_bits(tp, MCU_TYPE_USB, 0xcbf0, BIT(1)); for (i = 0; i < 500; i++) { if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & @@ -7330,14 +7092,12 @@ static void r8153c_init(struct r8152 *tp) r8153_queue_wake(tp, false); rtl_runtime_suspend_enable(tp, false); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS); if (rtl8152_get_speed(tp) & LINK_STATUS) - ocp_data |= CUR_LINK_OK; + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, + CUR_LINK_OK | POLL_LINK_CHG); else - ocp_data &= ~CUR_LINK_OK; - - ocp_data |= POLL_LINK_CHG; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, ocp_data); + ocp_word_w0w1(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, CUR_LINK_OK, + POLL_LINK_CHG); r8153b_u1u2en(tp, true); @@ -7346,16 +7106,13 @@ static void r8153c_init(struct r8152 *tp) /* MAC clock speed down */ r8153_mac_clk_speed_down(tp, true); - ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_2); - ocp_data &= ~BIT(7); - ocp_write_byte(tp, MCU_TYPE_USB, USB_MISC_2, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_USB, USB_MISC_2, BIT(7)); set_bit(GREEN_ETHERNET, &tp->flags); /* rx aggregation */ - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL); - ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN); - ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_USB_CTRL, + RX_AGG_DISABLE | RX_ZERO_EN); rtl_tally_reset(tp); @@ -7364,14 +7121,9 @@ static void r8153c_init(struct r8152 *tp) static void r8156_hw_phy_cfg(struct r8152 *tp) { - u32 ocp_data; u16 data; - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0); - if (ocp_data & PCUT_STATUS) { - ocp_data &= ~PCUT_STATUS; - ocp_write_word(tp, MCU_TYPE_USB, USB_MISC_0, ocp_data); - } + ocp_word_test_and_clr_bits(tp, MCU_TYPE_USB, USB_MISC_0, PCUT_STATUS); data = r8153_phy_status(tp, 0); switch (data) { @@ -7398,9 +7150,7 @@ static void r8156_hw_phy_cfg(struct r8152 *tp) data = r8153_phy_status(tp, PHY_STAT_LAN_ON); WARN_ON_ONCE(data != PHY_STAT_LAN_ON); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR); - ocp_data |= PFM_PWM_SWITCH; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_PHY_PWR, PFM_PWM_SWITCH); switch (tp->version) { case RTL_VER_10: @@ -7561,9 +7311,8 @@ static void r8156_hw_phy_cfg(struct r8152 *tp) /* EEE parameter */ ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_TXTWSYS_2P5G, 0x0056); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_USB_CFG); - ocp_data |= EN_XG_LIP | EN_G_LIP; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_USB_CFG, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_USB_CFG, + EN_XG_LIP | EN_G_LIP); sram_write(tp, 0x8257, 0x020f); /* XG PLL */ sram_write(tp, 0x80ea, 0x7843); /* GIGA Master */ @@ -7572,9 +7321,8 @@ static void r8156_hw_phy_cfg(struct r8152 *tp) return; /* Advance EEE */ - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4); - ocp_data |= EEE_SPDWN_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, + EEE_SPDWN_EN); data = ocp_reg_read(tp, OCP_DOWN_SPEED); data &= ~(EN_EEE_100 | EN_EEE_1000); @@ -7714,7 +7462,6 @@ static void r8156_hw_phy_cfg(struct r8152 *tp) static void r8156b_hw_phy_cfg(struct r8152 *tp) { - u32 ocp_data; u16 data; switch (tp->version) { @@ -7744,11 +7491,7 @@ static void r8156b_hw_phy_cfg(struct r8152 *tp) break; } - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0); - if (ocp_data & PCUT_STATUS) { - ocp_data &= ~PCUT_STATUS; - ocp_write_word(tp, MCU_TYPE_USB, USB_MISC_0, ocp_data); - } + ocp_word_test_and_clr_bits(tp, MCU_TYPE_USB, USB_MISC_0, PCUT_STATUS); data = r8153_phy_status(tp, 0); switch (data) { @@ -7785,9 +7528,7 @@ static void r8156b_hw_phy_cfg(struct r8152 *tp) data = r8153_phy_status(tp, PHY_STAT_LAN_ON); WARN_ON_ONCE(data != PHY_STAT_LAN_ON); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR); - ocp_data |= PFM_PWM_SWITCH; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_PHY_PWR, PFM_PWM_SWITCH); switch (tp->version) { case RTL_VER_12: @@ -7874,9 +7615,9 @@ static void r8156b_hw_phy_cfg(struct r8152 *tp) ocp_reg_write(tp, 0xb87c, 0x8fd8); ocp_reg_write(tp, 0xb87e, 0xf600); - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_USB_CFG); - ocp_data |= EN_XG_LIP | EN_G_LIP; - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_USB_CFG, ocp_data); + ocp_byte_set_bits(tp, MCU_TYPE_PLA, PLA_USB_CFG, + EN_XG_LIP | EN_G_LIP); + ocp_reg_write(tp, 0xb87c, 0x813d); ocp_reg_write(tp, 0xb87e, 0x390e); ocp_reg_write(tp, 0xb87c, 0x814f); @@ -8090,9 +7831,7 @@ static void r8156b_hw_phy_cfg(struct r8152 *tp) if (rtl_phy_patch_request(tp, true, true)) return; - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4); - ocp_data |= EEE_SPDWN_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, EEE_SPDWN_EN); data = ocp_reg_read(tp, OCP_DOWN_SPEED); data &= ~(EN_EEE_100 | EN_EEE_1000); @@ -8131,22 +7870,17 @@ static void r8156b_hw_phy_cfg(struct r8152 *tp) static void r8156_init(struct r8152 *tp) { - u32 ocp_data; u16 data; int i; if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; - ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_ECM_OP); - ocp_data &= ~EN_ALL_SPEED; - ocp_write_byte(tp, MCU_TYPE_USB, USB_ECM_OP, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_USB, USB_ECM_OP, EN_ALL_SPEED); ocp_write_word(tp, MCU_TYPE_USB, USB_SPEED_OPTION, 0); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_ECM_OPTION); - ocp_data |= BYPASS_MAC_RESET; - ocp_write_word(tp, MCU_TYPE_USB, USB_ECM_OPTION, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_ECM_OPTION, BYPASS_MAC_RESET); r8153b_u1u2en(tp, false); @@ -8196,28 +7930,23 @@ static void r8156_init(struct r8152 *tp) r8156_mac_clk_spd(tp, true); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3); - ocp_data &= ~PLA_MCU_SPDWN_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, + PLA_MCU_SPDWN_EN); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS); if (rtl8152_get_speed(tp) & LINK_STATUS) - ocp_data |= CUR_LINK_OK; + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, + CUR_LINK_OK | POLL_LINK_CHG); else - ocp_data &= ~CUR_LINK_OK; - ocp_data |= POLL_LINK_CHG; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, ocp_data); + ocp_word_w0w1(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, CUR_LINK_OK, + POLL_LINK_CHG); set_bit(GREEN_ETHERNET, &tp->flags); /* rx aggregation */ - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL); - ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN); - ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_USB_CTRL, + RX_AGG_DISABLE | RX_ZERO_EN); - ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_BMU_CONFIG); - ocp_data |= ACT_ODMA; - ocp_write_byte(tp, MCU_TYPE_USB, USB_BMU_CONFIG, ocp_data); + ocp_byte_set_bits(tp, MCU_TYPE_USB, USB_BMU_CONFIG, ACT_ODMA); r8156_mdio_force_mode(tp); rtl_tally_reset(tp); @@ -8227,26 +7956,19 @@ static void r8156_init(struct r8152 *tp) static void r8156b_init(struct r8152 *tp) { - u32 ocp_data; u16 data; int i; if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; - ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_ECM_OP); - ocp_data &= ~EN_ALL_SPEED; - ocp_write_byte(tp, MCU_TYPE_USB, USB_ECM_OP, ocp_data); + ocp_byte_clr_bits(tp, MCU_TYPE_USB, USB_ECM_OP, EN_ALL_SPEED); ocp_write_word(tp, MCU_TYPE_USB, USB_SPEED_OPTION, 0); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_ECM_OPTION); - ocp_data |= BYPASS_MAC_RESET; - ocp_write_word(tp, MCU_TYPE_USB, USB_ECM_OPTION, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_ECM_OPTION, BYPASS_MAC_RESET); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_U2P3_CTRL); - ocp_data |= RX_DETECT8; - ocp_write_word(tp, MCU_TYPE_USB, USB_U2P3_CTRL, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_U2P3_CTRL, RX_DETECT8); r8153b_u1u2en(tp, false); @@ -8306,48 +8028,39 @@ static void r8156b_init(struct r8152 *tp) usb_enable_lpm(tp->udev); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_RCR); - ocp_data &= ~SLOT_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_RCR, SLOT_EN); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CPCR); - ocp_data |= FLOW_CTRL_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_CPCR, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_CPCR, FLOW_CTRL_EN); /* enable fc timer and set timer to 600 ms. */ ocp_write_word(tp, MCU_TYPE_USB, USB_FC_TIMER, CTRL_TIMER_EN | (600 / 8)); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_CTRL); if (!(ocp_read_word(tp, MCU_TYPE_PLA, PLA_POL_GPIO_CTRL) & DACK_DET_EN)) - ocp_data |= FLOW_CTRL_PATCH_2; - ocp_data &= ~AUTO_SPEEDUP; - ocp_write_word(tp, MCU_TYPE_USB, USB_FW_CTRL, ocp_data); + ocp_word_w0w1(tp, MCU_TYPE_USB, USB_FW_CTRL, AUTO_SPEEDUP, + FLOW_CTRL_PATCH_2); + else + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_FW_CTRL, AUTO_SPEEDUP); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_TASK); - ocp_data |= FC_PATCH_TASK; - ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data); + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_FW_TASK, FC_PATCH_TASK); r8156_mac_clk_spd(tp, true); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3); - ocp_data &= ~PLA_MCU_SPDWN_EN; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, + PLA_MCU_SPDWN_EN); - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS); if (rtl8152_get_speed(tp) & LINK_STATUS) - ocp_data |= CUR_LINK_OK; + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, + CUR_LINK_OK | POLL_LINK_CHG); else - ocp_data &= ~CUR_LINK_OK; - ocp_data |= POLL_LINK_CHG; - ocp_write_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, ocp_data); + ocp_word_w0w1(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, CUR_LINK_OK, + POLL_LINK_CHG); set_bit(GREEN_ETHERNET, &tp->flags); /* rx aggregation */ - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL); - ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN); - ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_USB_CTRL, + RX_AGG_DISABLE | RX_ZERO_EN); r8156_mdio_force_mode(tp); rtl_tally_reset(tp); From 7db154aa58e18bb663cc317f0b62631f8b3d2b87 Mon Sep 17 00:00:00 2001 From: Chih Kai Hsu Date: Thu, 26 Mar 2026 15:39:25 +0800 Subject: [PATCH 1058/1561] r8152: add helper functions for PHY OCP registers Add the following bitwise operation functions for PHY OCP registers to simplify the code. - ocp_reg_w0w1() - ocp_reg_clr_bits() - ocp_reg_set_bits() - sram_write_w0w1() - sram_clr_bits() - sram_set_bits() - r8152_mdio_clr_bit() - r8152_mdio_set_bit() - r8152_mdio_test_and_clr_bit() In addition, remove variable set but not used from r8153_init(), r8153b_init() and r8153c_init(). Signed-off-by: Chih Kai Hsu Reviewed-by: Hayes Wang Link: https://patch.msgid.link/20260326073925.32976-456-nic_swsd@realtek.com Signed-off-by: Paolo Abeni --- drivers/net/usb/r8152.c | 685 +++++++++++++--------------------------- 1 file changed, 220 insertions(+), 465 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 32a4e8d42311..8747c55e0a48 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1726,6 +1726,71 @@ static void ocp_byte_set_bits(struct r8152 *tp, u16 type, u16 index, u8 set) ocp_byte_w0w1(tp, type, index, 0, set); } +static void ocp_reg_w0w1(struct r8152 *tp, u16 addr, u16 clear, u16 set) +{ + u16 data; + + data = ocp_reg_read(tp, addr); + data = (data & ~clear) | set; + ocp_reg_write(tp, addr, data); +} + +static void ocp_reg_clr_bits(struct r8152 *tp, u16 addr, u16 clear) +{ + ocp_reg_w0w1(tp, addr, clear, 0); +} + +static void ocp_reg_set_bits(struct r8152 *tp, u16 addr, u16 set) +{ + ocp_reg_w0w1(tp, addr, 0, set); +} + +static void sram_write_w0w1(struct r8152 *tp, u16 addr, u16 clear, u16 set) +{ + u16 data; + + data = sram_read(tp, addr); + data = (data & ~clear) | set; + ocp_reg_write(tp, OCP_SRAM_DATA, data); +} + +static void sram_clr_bits(struct r8152 *tp, u16 addr, u16 clear) +{ + sram_write_w0w1(tp, addr, clear, 0); +} + +static void sram_set_bits(struct r8152 *tp, u16 addr, u16 set) +{ + sram_write_w0w1(tp, addr, 0, set); +} + +static void r8152_mdio_clr_bit(struct r8152 *tp, u16 addr, u16 clear) +{ + int data; + + data = r8152_mdio_read(tp, addr); + r8152_mdio_write(tp, addr, data & ~clear); +} + +static void r8152_mdio_set_bit(struct r8152 *tp, u16 addr, u16 set) +{ + int data; + + data = r8152_mdio_read(tp, addr); + r8152_mdio_write(tp, addr, data | set); +} + +static int r8152_mdio_test_and_clr_bit(struct r8152 *tp, u16 addr, u16 clear) +{ + int data; + + data = r8152_mdio_read(tp, addr); + if (data & clear) + r8152_mdio_write(tp, addr, data & ~clear); + + return data & clear; +} + static int r8152_submit_rx(struct r8152 *tp, struct rx_agg *agg, gfp_t mem_flags); @@ -3740,14 +3805,10 @@ static void r8156_ups_flags(struct r8152 *tp) static void rtl_green_en(struct r8152 *tp, bool enable) { - u16 data; - - data = sram_read(tp, SRAM_GREEN_CFG); if (enable) - data |= GREEN_ETH_EN; + sram_set_bits(tp, SRAM_GREEN_CFG, GREEN_ETH_EN); else - data &= ~GREEN_ETH_EN; - sram_write(tp, SRAM_GREEN_CFG, data); + sram_clr_bits(tp, SRAM_GREEN_CFG, GREEN_ETH_EN); tp->ups_info.green = enable; } @@ -4143,18 +4204,16 @@ static inline void rtl_reset_ocp_base(struct r8152 *tp) static int rtl_phy_patch_request(struct r8152 *tp, bool request, bool wait) { - u16 data, check; + u16 check; int i; - data = ocp_reg_read(tp, OCP_PHY_PATCH_CMD); if (request) { - data |= PATCH_REQUEST; + ocp_reg_set_bits(tp, OCP_PHY_PATCH_CMD, PATCH_REQUEST); check = 0; } else { - data &= ~PATCH_REQUEST; + ocp_reg_clr_bits(tp, OCP_PHY_PATCH_CMD, PATCH_REQUEST); check = PATCH_READY; } - ocp_reg_write(tp, OCP_PHY_PATCH_CMD, data); for (i = 0; wait && i < 5000; i++) { u32 ocp_data; @@ -4184,14 +4243,8 @@ static void rtl_patch_key_set(struct r8152 *tp, u16 key_addr, u16 patch_key) sram_write(tp, key_addr, patch_key); sram_write(tp, SRAM_PHY_LOCK, PHY_PATCH_LOCK); } else if (key_addr) { - u16 data; - sram_write(tp, 0x0000, 0x0000); - - data = ocp_reg_read(tp, OCP_PHY_LOCK); - data &= ~PATCH_LOCK; - ocp_reg_write(tp, OCP_PHY_LOCK, data); - + ocp_reg_clr_bits(tp, OCP_PHY_LOCK, PATCH_LOCK); sram_write(tp, key_addr, 0x0000); } else { WARN_ON_ONCE(1); @@ -5299,69 +5352,58 @@ static void r8152_mmd_write(struct r8152 *tp, u16 dev, u16 reg, u16 data) static void r8152_eee_en(struct r8152 *tp, bool enable) { - u16 config1, config2, config3; - - config1 = ocp_reg_read(tp, OCP_EEE_CONFIG1) & ~sd_rise_time_mask; - config2 = ocp_reg_read(tp, OCP_EEE_CONFIG2); - config3 = ocp_reg_read(tp, OCP_EEE_CONFIG3) & ~fast_snr_mask; - if (enable) { ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_EEE_CR, EEE_RX_EN | EEE_TX_EN); - config1 |= EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | RX_QUIET_EN; - config1 |= sd_rise_time(1); - config2 |= RG_DACQUIET_EN | RG_LDVQUIET_EN; - config3 |= fast_snr(42); + + ocp_reg_w0w1(tp, OCP_EEE_CONFIG1, sd_rise_time_mask, + EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | + RX_QUIET_EN | sd_rise_time(1)); + + ocp_reg_set_bits(tp, OCP_EEE_CONFIG2, + RG_DACQUIET_EN | RG_LDVQUIET_EN); + + ocp_reg_w0w1(tp, OCP_EEE_CONFIG3, fast_snr_mask, fast_snr(42)); } else { ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_EEE_CR, EEE_RX_EN | EEE_TX_EN); - config1 &= ~(EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | - RX_QUIET_EN); - config1 |= sd_rise_time(7); - config2 &= ~(RG_DACQUIET_EN | RG_LDVQUIET_EN); - config3 |= fast_snr(511); - } - ocp_reg_write(tp, OCP_EEE_CONFIG1, config1); - ocp_reg_write(tp, OCP_EEE_CONFIG2, config2); - ocp_reg_write(tp, OCP_EEE_CONFIG3, config3); + ocp_reg_w0w1(tp, OCP_EEE_CONFIG1, sd_rise_time_mask | + EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | + RX_QUIET_EN, sd_rise_time(7)); + + ocp_reg_clr_bits(tp, OCP_EEE_CONFIG2, + RG_DACQUIET_EN | RG_LDVQUIET_EN); + + ocp_reg_w0w1(tp, OCP_EEE_CONFIG3, fast_snr_mask, fast_snr(511)); + } } static void r8153_eee_en(struct r8152 *tp, bool enable) { - u16 config; - - config = ocp_reg_read(tp, OCP_EEE_CFG); - if (enable) { ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_EEE_CR, EEE_RX_EN | EEE_TX_EN); - config |= EEE10_EN; + + ocp_reg_set_bits(tp, OCP_EEE_CFG, EEE10_EN); } else { ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_EEE_CR, EEE_RX_EN | EEE_TX_EN); - config &= ~EEE10_EN; - } - ocp_reg_write(tp, OCP_EEE_CFG, config); + ocp_reg_clr_bits(tp, OCP_EEE_CFG, EEE10_EN); + } tp->ups_info.eee = enable; } static void r8156_eee_en(struct r8152 *tp, bool enable) { - u16 config; - r8153_eee_en(tp, enable); - config = ocp_reg_read(tp, OCP_EEE_ADV2); - if (enable && (tp->eee_adv2 & MDIO_EEE_2_5GT)) - config |= MDIO_EEE_2_5GT; + ocp_reg_set_bits(tp, OCP_EEE_ADV2, MDIO_EEE_2_5GT); else - config &= ~MDIO_EEE_2_5GT; - - ocp_reg_write(tp, OCP_EEE_ADV2, config); + ocp_reg_clr_bits(tp, OCP_EEE_ADV2, MDIO_EEE_2_5GT); } static void rtl_eee_enable(struct r8152 *tp, bool enable) @@ -5414,11 +5456,8 @@ static void rtl_eee_enable(struct r8152 *tp, bool enable) static void r8152b_enable_fc(struct r8152 *tp) { - u16 anar; - - anar = r8152_mdio_read(tp, MII_ADVERTISE); - anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM; - r8152_mdio_write(tp, MII_ADVERTISE, anar); + r8152_mdio_set_bit(tp, MII_ADVERTISE, + ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM); tp->ups_info.flow_control = true; } @@ -5674,17 +5713,12 @@ static int r8156a_post_firmware_1(struct r8152 *tp) static void r8153_aldps_en(struct r8152 *tp, bool enable) { - u16 data; - - data = ocp_reg_read(tp, OCP_POWER_CFG); if (enable) { - data |= EN_ALDPS; - ocp_reg_write(tp, OCP_POWER_CFG, data); + ocp_reg_set_bits(tp, OCP_POWER_CFG, EN_ALDPS); } else { int i; - data &= ~EN_ALDPS; - ocp_reg_write(tp, OCP_POWER_CFG, data); + ocp_reg_clr_bits(tp, OCP_POWER_CFG, EN_ALDPS); for (i = 0; i < 20; i++) { if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; @@ -5699,8 +5733,6 @@ static void r8153_aldps_en(struct r8152 *tp, bool enable) static void r8153_hw_phy_cfg(struct r8152 *tp) { - u16 data; - /* disable ALDPS before updating the PHY parameters */ r8153_aldps_en(tp, false); @@ -5709,22 +5741,14 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) rtl8152_apply_firmware(tp, false); - if (tp->version == RTL_VER_03) { - data = ocp_reg_read(tp, OCP_EEE_CFG); - data &= ~CTAP_SHORT_EN; - ocp_reg_write(tp, OCP_EEE_CFG, data); - } + if (tp->version == RTL_VER_03) + ocp_reg_clr_bits(tp, OCP_EEE_CFG, CTAP_SHORT_EN); - data = ocp_reg_read(tp, OCP_POWER_CFG); - data |= EEE_CLKDIV_EN; - ocp_reg_write(tp, OCP_POWER_CFG, data); + ocp_reg_set_bits(tp, OCP_POWER_CFG, EEE_CLKDIV_EN); - data = ocp_reg_read(tp, OCP_DOWN_SPEED); - data |= EN_10M_BGOFF; - ocp_reg_write(tp, OCP_DOWN_SPEED, data); - data = ocp_reg_read(tp, OCP_POWER_CFG); - data |= EN_10M_PLLOFF; - ocp_reg_write(tp, OCP_POWER_CFG, data); + ocp_reg_set_bits(tp, OCP_DOWN_SPEED, EN_10M_BGOFF); + + ocp_reg_set_bits(tp, OCP_POWER_CFG, EN_10M_PLLOFF); sram_write(tp, SRAM_IMPEDANCE, 0x0b13); @@ -5792,9 +5816,7 @@ static void r8153b_hw_phy_cfg(struct r8152 *tp) case PHY_STAT_EXT_INIT: rtl8152_apply_firmware(tp, true); - data = r8152_mdio_read(tp, MII_BMCR); - data &= ~BMCR_PDOWN; - r8152_mdio_write(tp, MII_BMCR, data); + r8152_mdio_clr_bit(tp, MII_BMCR, BMCR_PDOWN); break; case PHY_STAT_LAN_ON: default: @@ -5804,12 +5826,9 @@ static void r8153b_hw_phy_cfg(struct r8152 *tp) r8153b_green_en(tp, test_bit(GREEN_ETHERNET, &tp->flags)); - data = sram_read(tp, SRAM_GREEN_CFG); - data |= R_TUNE_EN; - sram_write(tp, SRAM_GREEN_CFG, data); - data = ocp_reg_read(tp, OCP_NCTL_CFG); - data |= PGA_RETURN_EN; - ocp_reg_write(tp, OCP_NCTL_CFG, data); + sram_set_bits(tp, SRAM_GREEN_CFG, R_TUNE_EN); + + ocp_reg_set_bits(tp, OCP_NCTL_CFG, PGA_RETURN_EN); /* ADC Bias Calibration: * read efuse offset 0x7d to get a 17-bit data. Remove the dummy/fake @@ -5839,14 +5858,11 @@ static void r8153b_hw_phy_cfg(struct r8152 *tp) /* Advnace EEE */ if (!rtl_phy_patch_request(tp, true, true)) { - data = ocp_reg_read(tp, OCP_POWER_CFG); - data |= EEE_CLKDIV_EN; - ocp_reg_write(tp, OCP_POWER_CFG, data); + ocp_reg_set_bits(tp, OCP_POWER_CFG, EEE_CLKDIV_EN); tp->ups_info.eee_ckdiv = true; - data = ocp_reg_read(tp, OCP_DOWN_SPEED); - data |= EN_EEE_CMODE | EN_EEE_1000 | EN_10M_CLKDIV; - ocp_reg_write(tp, OCP_DOWN_SPEED, data); + ocp_reg_set_bits(tp, OCP_DOWN_SPEED, + EN_EEE_CMODE | EN_EEE_1000 | EN_10M_CLKDIV); tp->ups_info.eee_cmod_lv = true; tp->ups_info._10m_ckdiv = true; tp->ups_info.eee_plloff_giga = true; @@ -6812,16 +6828,11 @@ static void rtl_tally_reset(struct r8152 *tp) static void r8152b_init(struct r8152 *tp) { u32 ocp_data; - u16 data; if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; - data = r8152_mdio_read(tp, MII_BMCR); - if (data & BMCR_PDOWN) { - data &= ~BMCR_PDOWN; - r8152_mdio_write(tp, MII_BMCR, data); - } + r8152_mdio_test_and_clr_bit(tp, MII_BMCR, BMCR_PDOWN); r8152_aldps_en(tp, false); @@ -6851,7 +6862,6 @@ static void r8152b_init(struct r8152 *tp) static void r8153_init(struct r8152 *tp) { u32 ocp_data; - u16 data; int i; if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) @@ -6869,19 +6879,15 @@ static void r8153_init(struct r8152 *tp) break; } - data = r8153_phy_status(tp, 0); + r8153_phy_status(tp, 0); if (tp->version == RTL_VER_03 || tp->version == RTL_VER_04 || tp->version == RTL_VER_05) ocp_reg_write(tp, OCP_ADC_CFG, CKADSEL_L | ADC_EN | EN_EMI_L); - data = r8152_mdio_read(tp, MII_BMCR); - if (data & BMCR_PDOWN) { - data &= ~BMCR_PDOWN; - r8152_mdio_write(tp, MII_BMCR, data); - } + r8152_mdio_test_and_clr_bit(tp, MII_BMCR, BMCR_PDOWN); - data = r8153_phy_status(tp, PHY_STAT_LAN_ON); + r8153_phy_status(tp, PHY_STAT_LAN_ON); r8153_u2p3en(tp, false); @@ -6972,7 +6978,6 @@ static void r8153_init(struct r8152 *tp) static void r8153b_init(struct r8152 *tp) { - u16 data; int i; if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) @@ -6990,15 +6995,11 @@ static void r8153b_init(struct r8152 *tp) break; } - data = r8153_phy_status(tp, 0); + r8153_phy_status(tp, 0); - data = r8152_mdio_read(tp, MII_BMCR); - if (data & BMCR_PDOWN) { - data &= ~BMCR_PDOWN; - r8152_mdio_write(tp, MII_BMCR, data); - } + r8152_mdio_test_and_clr_bit(tp, MII_BMCR, BMCR_PDOWN); - data = r8153_phy_status(tp, PHY_STAT_LAN_ON); + r8153_phy_status(tp, PHY_STAT_LAN_ON); r8153_u2p3en(tp, false); @@ -7047,7 +7048,6 @@ static void r8153b_init(struct r8152 *tp) static void r8153c_init(struct r8152 *tp) { - u16 data; int i; if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) @@ -7072,15 +7072,11 @@ static void r8153c_init(struct r8152 *tp) return; } - data = r8153_phy_status(tp, 0); + r8153_phy_status(tp, 0); - data = r8152_mdio_read(tp, MII_BMCR); - if (data & BMCR_PDOWN) { - data &= ~BMCR_PDOWN; - r8152_mdio_write(tp, MII_BMCR, data); - } + r8152_mdio_test_and_clr_bit(tp, MII_BMCR, BMCR_PDOWN); - data = r8153_phy_status(tp, PHY_STAT_LAN_ON); + r8153_phy_status(tp, PHY_STAT_LAN_ON); r8153_u2p3en(tp, false); @@ -7130,9 +7126,7 @@ static void r8156_hw_phy_cfg(struct r8152 *tp) case PHY_STAT_EXT_INIT: rtl8152_apply_firmware(tp, true); - data = ocp_reg_read(tp, 0xa468); - data &= ~(BIT(3) | BIT(1)); - ocp_reg_write(tp, 0xa468, data); + ocp_reg_clr_bits(tp, 0xa468, BIT(3) | BIT(1)); break; case PHY_STAT_LAN_ON: case PHY_STAT_PWRDN: @@ -7154,153 +7148,57 @@ static void r8156_hw_phy_cfg(struct r8152 *tp) switch (tp->version) { case RTL_VER_10: - data = ocp_reg_read(tp, 0xad40); - data &= ~0x3ff; - data |= BIT(7) | BIT(2); - ocp_reg_write(tp, 0xad40, data); + ocp_reg_w0w1(tp, 0xad40, 0x3ff, BIT(7) | BIT(2)); - data = ocp_reg_read(tp, 0xad4e); - data |= BIT(4); - ocp_reg_write(tp, 0xad4e, data); - data = ocp_reg_read(tp, 0xad16); - data &= ~0x3ff; - data |= 0x6; - ocp_reg_write(tp, 0xad16, data); - data = ocp_reg_read(tp, 0xad32); - data &= ~0x3f; - data |= 6; - ocp_reg_write(tp, 0xad32, data); - data = ocp_reg_read(tp, 0xac08); - data &= ~(BIT(12) | BIT(8)); - ocp_reg_write(tp, 0xac08, data); - data = ocp_reg_read(tp, 0xac8a); - data |= BIT(12) | BIT(13) | BIT(14); - data &= ~BIT(15); - ocp_reg_write(tp, 0xac8a, data); - data = ocp_reg_read(tp, 0xad18); - data |= BIT(10); - ocp_reg_write(tp, 0xad18, data); - data = ocp_reg_read(tp, 0xad1a); - data |= 0x3ff; - ocp_reg_write(tp, 0xad1a, data); - data = ocp_reg_read(tp, 0xad1c); - data |= 0x3ff; - ocp_reg_write(tp, 0xad1c, data); + ocp_reg_set_bits(tp, 0xad4e, BIT(4)); + ocp_reg_w0w1(tp, 0xad16, 0x3ff, 0x6); + ocp_reg_w0w1(tp, 0xad32, 0x3f, 0x6); + ocp_reg_clr_bits(tp, 0xac08, BIT(12) | BIT(8)); + ocp_reg_w0w1(tp, 0xac8a, BIT(15), BIT(12) | BIT(13) | BIT(14)); + ocp_reg_set_bits(tp, 0xad18, BIT(10)); + ocp_reg_set_bits(tp, 0xad1a, 0x3ff); + ocp_reg_set_bits(tp, 0xad1c, 0x3ff); - data = sram_read(tp, 0x80ea); - data &= ~0xff00; - data |= 0xc400; - sram_write(tp, 0x80ea, data); - data = sram_read(tp, 0x80eb); - data &= ~0x0700; - data |= 0x0300; - sram_write(tp, 0x80eb, data); - data = sram_read(tp, 0x80f8); - data &= ~0xff00; - data |= 0x1c00; - sram_write(tp, 0x80f8, data); - data = sram_read(tp, 0x80f1); - data &= ~0xff00; - data |= 0x3000; - sram_write(tp, 0x80f1, data); + sram_write_w0w1(tp, 0x80ea, 0xff00, 0xc400); + sram_write_w0w1(tp, 0x80eb, 0x0700, 0x0300); + sram_write_w0w1(tp, 0x80f8, 0xff00, 0x1c00); + sram_write_w0w1(tp, 0x80f1, 0xff00, 0x3000); - data = sram_read(tp, 0x80fe); - data &= ~0xff00; - data |= 0xa500; - sram_write(tp, 0x80fe, data); - data = sram_read(tp, 0x8102); - data &= ~0xff00; - data |= 0x5000; - sram_write(tp, 0x8102, data); - data = sram_read(tp, 0x8015); - data &= ~0xff00; - data |= 0x3300; - sram_write(tp, 0x8015, data); - data = sram_read(tp, 0x8100); - data &= ~0xff00; - data |= 0x7000; - sram_write(tp, 0x8100, data); - data = sram_read(tp, 0x8014); - data &= ~0xff00; - data |= 0xf000; - sram_write(tp, 0x8014, data); - data = sram_read(tp, 0x8016); - data &= ~0xff00; - data |= 0x6500; - sram_write(tp, 0x8016, data); - data = sram_read(tp, 0x80dc); - data &= ~0xff00; - data |= 0xed00; - sram_write(tp, 0x80dc, data); - data = sram_read(tp, 0x80df); - data |= BIT(8); - sram_write(tp, 0x80df, data); - data = sram_read(tp, 0x80e1); - data &= ~BIT(8); - sram_write(tp, 0x80e1, data); + sram_write_w0w1(tp, 0x80fe, 0xff00, 0xa500); + sram_write_w0w1(tp, 0x8102, 0xff00, 0x5000); + sram_write_w0w1(tp, 0x8015, 0xff00, 0x3300); + sram_write_w0w1(tp, 0x8100, 0xff00, 0x7000); + sram_write_w0w1(tp, 0x8014, 0xff00, 0xf000); + sram_write_w0w1(tp, 0x8016, 0xff00, 0x6500); + sram_write_w0w1(tp, 0x80dc, 0xff00, 0xed00); + sram_set_bits(tp, 0x80df, BIT(8)); + sram_clr_bits(tp, 0x80e1, BIT(8)); - data = ocp_reg_read(tp, 0xbf06); - data &= ~0x003f; - data |= 0x0038; - ocp_reg_write(tp, 0xbf06, data); + ocp_reg_w0w1(tp, 0xbf06, 0x003f, 0x0038); sram_write(tp, 0x819f, 0xddb6); ocp_reg_write(tp, 0xbc34, 0x5555); - data = ocp_reg_read(tp, 0xbf0a); - data &= ~0x0e00; - data |= 0x0a00; - ocp_reg_write(tp, 0xbf0a, data); + ocp_reg_w0w1(tp, 0xbf0a, 0x0e00, 0x0a00); - data = ocp_reg_read(tp, 0xbd2c); - data &= ~BIT(13); - ocp_reg_write(tp, 0xbd2c, data); + ocp_reg_clr_bits(tp, 0xbd2c, BIT(13)); break; case RTL_VER_11: - data = ocp_reg_read(tp, 0xad16); - data |= 0x3ff; - ocp_reg_write(tp, 0xad16, data); - data = ocp_reg_read(tp, 0xad32); - data &= ~0x3f; - data |= 6; - ocp_reg_write(tp, 0xad32, data); - data = ocp_reg_read(tp, 0xac08); - data &= ~(BIT(12) | BIT(8)); - ocp_reg_write(tp, 0xac08, data); - data = ocp_reg_read(tp, 0xacc0); - data &= ~0x3; - data |= BIT(1); - ocp_reg_write(tp, 0xacc0, data); - data = ocp_reg_read(tp, 0xad40); - data &= ~0xe7; - data |= BIT(6) | BIT(2); - ocp_reg_write(tp, 0xad40, data); - data = ocp_reg_read(tp, 0xac14); - data &= ~BIT(7); - ocp_reg_write(tp, 0xac14, data); - data = ocp_reg_read(tp, 0xac80); - data &= ~(BIT(8) | BIT(9)); - ocp_reg_write(tp, 0xac80, data); - data = ocp_reg_read(tp, 0xac5e); - data &= ~0x7; - data |= BIT(1); - ocp_reg_write(tp, 0xac5e, data); + ocp_reg_set_bits(tp, 0xad16, 0x3ff); + ocp_reg_w0w1(tp, 0xad32, 0x3f, 0x6); + ocp_reg_clr_bits(tp, 0xac08, BIT(12) | BIT(8)); + ocp_reg_w0w1(tp, 0xacc0, 0x3, BIT(1)); + ocp_reg_w0w1(tp, 0xad40, 0xe7, BIT(6) | BIT(2)); + ocp_reg_clr_bits(tp, 0xac14, BIT(7)); + ocp_reg_clr_bits(tp, 0xac80, BIT(8) | BIT(9)); + ocp_reg_w0w1(tp, 0xac5e, 0x7, BIT(1)); ocp_reg_write(tp, 0xad4c, 0x00a8); ocp_reg_write(tp, 0xac5c, 0x01ff); - data = ocp_reg_read(tp, 0xac8a); - data &= ~0xf0; - data |= BIT(4) | BIT(5); - ocp_reg_write(tp, 0xac8a, data); + ocp_reg_w0w1(tp, 0xac8a, 0xf0, BIT(4) | BIT(5)); ocp_reg_write(tp, 0xb87c, 0x8157); - data = ocp_reg_read(tp, 0xb87e); - data &= ~0xff00; - data |= 0x0500; - ocp_reg_write(tp, 0xb87e, data); + ocp_reg_w0w1(tp, 0xb87e, 0xff00, 0x0500); ocp_reg_write(tp, 0xb87c, 0x8159); - data = ocp_reg_read(tp, 0xb87e); - data &= ~0xff00; - data |= 0x0700; - ocp_reg_write(tp, 0xb87e, data); + ocp_reg_w0w1(tp, 0xb87e, 0xff00, 0x0700); /* AAGC */ ocp_reg_write(tp, 0xb87c, 0x80a2); @@ -7324,17 +7222,13 @@ static void r8156_hw_phy_cfg(struct r8152 *tp) ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, EEE_SPDWN_EN); - data = ocp_reg_read(tp, OCP_DOWN_SPEED); - data &= ~(EN_EEE_100 | EN_EEE_1000); - data |= EN_10M_CLKDIV; - ocp_reg_write(tp, OCP_DOWN_SPEED, data); + ocp_reg_w0w1(tp, OCP_DOWN_SPEED, EN_EEE_100 | EN_EEE_1000, + EN_10M_CLKDIV); tp->ups_info._10m_ckdiv = true; tp->ups_info.eee_plloff_100 = false; tp->ups_info.eee_plloff_giga = false; - data = ocp_reg_read(tp, OCP_POWER_CFG); - data &= ~EEE_CLKDIV_EN; - ocp_reg_write(tp, OCP_POWER_CFG, data); + ocp_reg_clr_bits(tp, OCP_POWER_CFG, EEE_CLKDIV_EN); tp->ups_info.eee_ckdiv = false; ocp_reg_write(tp, OCP_SYSCLK_CFG, 0); @@ -7344,34 +7238,19 @@ static void r8156_hw_phy_cfg(struct r8152 *tp) rtl_phy_patch_request(tp, false, true); /* enable ADC Ibias Cal */ - data = ocp_reg_read(tp, 0xd068); - data |= BIT(13); - ocp_reg_write(tp, 0xd068, data); + ocp_reg_set_bits(tp, 0xd068, BIT(13)); /* enable Thermal Sensor */ - data = sram_read(tp, 0x81a2); - data &= ~BIT(8); - sram_write(tp, 0x81a2, data); - data = ocp_reg_read(tp, 0xb54c); - data &= ~0xff00; - data |= 0xdb00; - ocp_reg_write(tp, 0xb54c, data); + sram_clr_bits(tp, 0x81a2, BIT(8)); + ocp_reg_w0w1(tp, 0xb54c, 0xff00, 0xdb00); /* Nway 2.5G Lite */ - data = ocp_reg_read(tp, 0xa454); - data &= ~BIT(0); - ocp_reg_write(tp, 0xa454, data); + ocp_reg_clr_bits(tp, 0xa454, BIT(0)); /* CS DSP solution */ - data = ocp_reg_read(tp, OCP_10GBT_CTRL); - data |= RTL_ADV2_5G_F_R; - ocp_reg_write(tp, OCP_10GBT_CTRL, data); - data = ocp_reg_read(tp, 0xad4e); - data &= ~BIT(4); - ocp_reg_write(tp, 0xad4e, data); - data = ocp_reg_read(tp, 0xa86a); - data &= ~BIT(0); - ocp_reg_write(tp, 0xa86a, data); + ocp_reg_set_bits(tp, OCP_10GBT_CTRL, RTL_ADV2_5G_F_R); + ocp_reg_clr_bits(tp, 0xad4e, BIT(4)); + ocp_reg_clr_bits(tp, 0xa86a, BIT(0)); /* MDI SWAP */ if ((ocp_read_word(tp, MCU_TYPE_USB, USB_UPS_CFG) & MID_REVERSE) && @@ -7432,9 +7311,7 @@ static void r8156_hw_phy_cfg(struct r8152 *tp) } /* Notify the MAC when the speed is changed to force mode. */ - data = ocp_reg_read(tp, OCP_INTR_EN); - data |= INTR_SPEED_FORCE; - ocp_reg_write(tp, OCP_INTR_EN, data); + ocp_reg_set_bits(tp, OCP_INTR_EN, INTR_SPEED_FORCE); break; default: break; @@ -7442,12 +7319,8 @@ static void r8156_hw_phy_cfg(struct r8152 *tp) rtl_green_en(tp, test_bit(GREEN_ETHERNET, &tp->flags)); - data = ocp_reg_read(tp, 0xa428); - data &= ~BIT(9); - ocp_reg_write(tp, 0xa428, data); - data = ocp_reg_read(tp, 0xa5ea); - data &= ~BIT(0); - ocp_reg_write(tp, 0xa5ea, data); + ocp_reg_clr_bits(tp, 0xa428, BIT(9)); + ocp_reg_clr_bits(tp, 0xa5ea, BIT(0)); tp->ups_info.lite_mode = 0; if (tp->eee_en) @@ -7467,21 +7340,12 @@ static void r8156b_hw_phy_cfg(struct r8152 *tp) switch (tp->version) { case RTL_VER_12: ocp_reg_write(tp, 0xbf86, 0x9000); - data = ocp_reg_read(tp, 0xc402); - data |= BIT(10); - ocp_reg_write(tp, 0xc402, data); - data &= ~BIT(10); - ocp_reg_write(tp, 0xc402, data); + ocp_reg_set_bits(tp, 0xc402, BIT(10)); + ocp_reg_clr_bits(tp, 0xc402, BIT(10)); ocp_reg_write(tp, 0xbd86, 0x1010); ocp_reg_write(tp, 0xbd88, 0x1010); - data = ocp_reg_read(tp, 0xbd4e); - data &= ~(BIT(10) | BIT(11)); - data |= BIT(11); - ocp_reg_write(tp, 0xbd4e, data); - data = ocp_reg_read(tp, 0xbf46); - data &= ~0xf00; - data |= 0x700; - ocp_reg_write(tp, 0xbf46, data); + ocp_reg_w0w1(tp, 0xbd4e, BIT(10) | BIT(11), BIT(11)); + ocp_reg_w0w1(tp, 0xbf46, 0xf00, 0x700); break; case RTL_VER_13: case RTL_VER_15: @@ -7498,13 +7362,8 @@ static void r8156b_hw_phy_cfg(struct r8152 *tp) case PHY_STAT_EXT_INIT: rtl8152_apply_firmware(tp, true); - data = ocp_reg_read(tp, 0xa466); - data &= ~BIT(0); - ocp_reg_write(tp, 0xa466, data); - - data = ocp_reg_read(tp, 0xa468); - data &= ~(BIT(3) | BIT(1)); - ocp_reg_write(tp, 0xa468, data); + ocp_reg_clr_bits(tp, 0xa466, BIT(0)); + ocp_reg_clr_bits(tp, 0xa468, BIT(3) | BIT(1)); break; case PHY_STAT_LAN_ON: case PHY_STAT_PWRDN: @@ -7513,11 +7372,7 @@ static void r8156b_hw_phy_cfg(struct r8152 *tp) break; } - data = r8152_mdio_read(tp, MII_BMCR); - if (data & BMCR_PDOWN) { - data &= ~BMCR_PDOWN; - r8152_mdio_write(tp, MII_BMCR, data); - } + r8152_mdio_test_and_clr_bit(tp, MII_BMCR, BMCR_PDOWN); /* disable ALDPS before updating the PHY parameters */ r8153_aldps_en(tp, false); @@ -7532,21 +7387,12 @@ static void r8156b_hw_phy_cfg(struct r8152 *tp) switch (tp->version) { case RTL_VER_12: - data = ocp_reg_read(tp, 0xbc08); - data |= BIT(3) | BIT(2); - ocp_reg_write(tp, 0xbc08, data); + ocp_reg_set_bits(tp, 0xbc08, BIT(3) | BIT(2)); - data = sram_read(tp, 0x8fff); - data &= ~0xff00; - data |= 0x0400; - sram_write(tp, 0x8fff, data); + sram_write_w0w1(tp, 0x8fff, 0xff00, 0x0400); - data = ocp_reg_read(tp, 0xacda); - data |= 0xff00; - ocp_reg_write(tp, 0xacda, data); - data = ocp_reg_read(tp, 0xacde); - data |= 0xf000; - ocp_reg_write(tp, 0xacde, data); + ocp_reg_set_bits(tp, 0xacda, 0xff00); + ocp_reg_set_bits(tp, 0xacde, 0xf000); ocp_reg_write(tp, 0xac8c, 0x0ffc); ocp_reg_write(tp, 0xac46, 0xb7b4); ocp_reg_write(tp, 0xac50, 0x0fbc); @@ -7624,21 +7470,15 @@ static void r8156b_hw_phy_cfg(struct r8152 *tp) ocp_reg_write(tp, 0xb87e, 0x790e); ocp_reg_write(tp, 0xb87c, 0x80b0); ocp_reg_write(tp, 0xb87e, 0x0f31); - data = ocp_reg_read(tp, 0xbf4c); - data |= BIT(1); - ocp_reg_write(tp, 0xbf4c, data); - data = ocp_reg_read(tp, 0xbcca); - data |= BIT(9) | BIT(8); - ocp_reg_write(tp, 0xbcca, data); + ocp_reg_set_bits(tp, 0xbf4c, BIT(1)); + ocp_reg_set_bits(tp, 0xbcca, BIT(9) | BIT(8)); ocp_reg_write(tp, 0xb87c, 0x8141); ocp_reg_write(tp, 0xb87e, 0x320e); ocp_reg_write(tp, 0xb87c, 0x8153); ocp_reg_write(tp, 0xb87e, 0x720e); ocp_reg_write(tp, 0xb87c, 0x8529); ocp_reg_write(tp, 0xb87e, 0x050e); - data = ocp_reg_read(tp, OCP_EEE_CFG); - data &= ~CTAP_SHORT_EN; - ocp_reg_write(tp, OCP_EEE_CFG, data); + ocp_reg_clr_bits(tp, OCP_EEE_CFG, CTAP_SHORT_EN); sram_write(tp, 0x816c, 0xc4a0); sram_write(tp, 0x8170, 0xc4a0); @@ -7671,64 +7511,33 @@ static void r8156b_hw_phy_cfg(struct r8152 *tp) ocp_reg_write(tp, 0xb87c, 0x817b); ocp_reg_write(tp, 0xb87e, 0x1d0a); - data = sram_read(tp, 0x8217); - data &= ~0xff00; - data |= 0x5000; - sram_write(tp, 0x8217, data); - data = sram_read(tp, 0x821a); - data &= ~0xff00; - data |= 0x5000; - sram_write(tp, 0x821a, data); + sram_write_w0w1(tp, 0x8217, 0xff00, 0x5000); + sram_write_w0w1(tp, 0x821a, 0xff00, 0x5000); sram_write(tp, 0x80da, 0x0403); - data = sram_read(tp, 0x80dc); - data &= ~0xff00; - data |= 0x1000; - sram_write(tp, 0x80dc, data); + sram_write_w0w1(tp, 0x80dc, 0xff00, 0x1000); sram_write(tp, 0x80b3, 0x0384); sram_write(tp, 0x80b7, 0x2007); - data = sram_read(tp, 0x80ba); - data &= ~0xff00; - data |= 0x6c00; - sram_write(tp, 0x80ba, data); + sram_write_w0w1(tp, 0x80ba, 0xff00, 0x6c00); sram_write(tp, 0x80b5, 0xf009); - data = sram_read(tp, 0x80bd); - data &= ~0xff00; - data |= 0x9f00; - sram_write(tp, 0x80bd, data); + sram_write_w0w1(tp, 0x80bd, 0xff00, 0x9f00); sram_write(tp, 0x80c7, 0xf083); sram_write(tp, 0x80dd, 0x03f0); - data = sram_read(tp, 0x80df); - data &= ~0xff00; - data |= 0x1000; - sram_write(tp, 0x80df, data); + sram_write_w0w1(tp, 0x80df, 0xff00, 0x1000); sram_write(tp, 0x80cb, 0x2007); - data = sram_read(tp, 0x80ce); - data &= ~0xff00; - data |= 0x6c00; - sram_write(tp, 0x80ce, data); + sram_write_w0w1(tp, 0x80ce, 0xff00, 0x6c00); sram_write(tp, 0x80c9, 0x8009); - data = sram_read(tp, 0x80d1); - data &= ~0xff00; - data |= 0x8000; - sram_write(tp, 0x80d1, data); + sram_write_w0w1(tp, 0x80d1, 0xff00, 0x8000); sram_write(tp, 0x80a3, 0x200a); sram_write(tp, 0x80a5, 0xf0ad); sram_write(tp, 0x809f, 0x6073); sram_write(tp, 0x80a1, 0x000b); - data = sram_read(tp, 0x80a9); - data &= ~0xff00; - data |= 0xc000; - sram_write(tp, 0x80a9, data); + sram_write_w0w1(tp, 0x80a9, 0xff00, 0xc000); if (rtl_phy_patch_request(tp, true, true)) return; - data = ocp_reg_read(tp, 0xb896); - data &= ~BIT(0); - ocp_reg_write(tp, 0xb896, data); - data = ocp_reg_read(tp, 0xb892); - data &= ~0xff00; - ocp_reg_write(tp, 0xb892, data); + ocp_reg_clr_bits(tp, 0xb896, BIT(0)); + ocp_reg_clr_bits(tp, 0xb892, 0xff00); ocp_reg_write(tp, 0xb88e, 0xc23e); ocp_reg_write(tp, 0xb890, 0x0000); ocp_reg_write(tp, 0xb88e, 0xc240); @@ -7743,41 +7552,25 @@ static void r8156b_hw_phy_cfg(struct r8152 *tp) ocp_reg_write(tp, 0xb890, 0x1012); ocp_reg_write(tp, 0xb88e, 0xc24a); ocp_reg_write(tp, 0xb890, 0x1416); - data = ocp_reg_read(tp, 0xb896); - data |= BIT(0); - ocp_reg_write(tp, 0xb896, data); + ocp_reg_set_bits(tp, 0xb896, BIT(0)); rtl_phy_patch_request(tp, false, true); - data = ocp_reg_read(tp, 0xa86a); - data |= BIT(0); - ocp_reg_write(tp, 0xa86a, data); - data = ocp_reg_read(tp, 0xa6f0); - data |= BIT(0); - ocp_reg_write(tp, 0xa6f0, data); + ocp_reg_set_bits(tp, 0xa86a, BIT(0)); + ocp_reg_set_bits(tp, 0xa6f0, BIT(0)); ocp_reg_write(tp, 0xbfa0, 0xd70d); ocp_reg_write(tp, 0xbfa2, 0x4100); ocp_reg_write(tp, 0xbfa4, 0xe868); ocp_reg_write(tp, 0xbfa6, 0xdc59); ocp_reg_write(tp, 0xb54c, 0x3c18); - data = ocp_reg_read(tp, 0xbfa4); - data &= ~BIT(5); - ocp_reg_write(tp, 0xbfa4, data); - data = sram_read(tp, 0x817d); - data |= BIT(12); - sram_write(tp, 0x817d, data); + ocp_reg_clr_bits(tp, 0xbfa4, BIT(5)); + sram_set_bits(tp, 0x817d, BIT(12)); break; case RTL_VER_13: /* 2.5G INRX */ - data = ocp_reg_read(tp, 0xac46); - data &= ~0x00f0; - data |= 0x0090; - ocp_reg_write(tp, 0xac46, data); - data = ocp_reg_read(tp, 0xad30); - data &= ~0x0003; - data |= 0x0001; - ocp_reg_write(tp, 0xad30, data); + ocp_reg_w0w1(tp, 0xac46, 0x00f0, 0x0090); + ocp_reg_w0w1(tp, 0xad30, 0x0003, 0x0001); fallthrough; case RTL_VER_15: /* EEE parameter */ @@ -7786,20 +7579,11 @@ static void r8156b_hw_phy_cfg(struct r8152 *tp) ocp_reg_write(tp, 0xb87c, 0x8107); ocp_reg_write(tp, 0xb87e, 0x360e); ocp_reg_write(tp, 0xb87c, 0x8551); - data = ocp_reg_read(tp, 0xb87e); - data &= ~0xff00; - data |= 0x0800; - ocp_reg_write(tp, 0xb87e, data); + ocp_reg_w0w1(tp, 0xb87e, 0xff00, 0x0800); /* ADC_PGA parameter */ - data = ocp_reg_read(tp, 0xbf00); - data &= ~0xe000; - data |= 0xa000; - ocp_reg_write(tp, 0xbf00, data); - data = ocp_reg_read(tp, 0xbf46); - data &= ~0x0f00; - data |= 0x0300; - ocp_reg_write(tp, 0xbf46, data); + ocp_reg_w0w1(tp, 0xbf00, 0xe000, 0xa000); + ocp_reg_w0w1(tp, 0xbf46, 0x0f00, 0x0300); /* Green Table-PGA, 1G full viterbi */ sram_write(tp, 0x8044, 0x2417); @@ -7814,48 +7598,35 @@ static void r8156b_hw_phy_cfg(struct r8152 *tp) sram_write(tp, 0x807a, 0x2417); /* XG PLL */ - data = ocp_reg_read(tp, 0xbf84); - data &= ~0xe000; - data |= 0xa000; - ocp_reg_write(tp, 0xbf84, data); + ocp_reg_w0w1(tp, 0xbf84, 0xe000, 0xa000); break; default: break; } /* Notify the MAC when the speed is changed to force mode. */ - data = ocp_reg_read(tp, OCP_INTR_EN); - data |= INTR_SPEED_FORCE; - ocp_reg_write(tp, OCP_INTR_EN, data); + ocp_reg_set_bits(tp, OCP_INTR_EN, INTR_SPEED_FORCE); if (rtl_phy_patch_request(tp, true, true)) return; ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, EEE_SPDWN_EN); - data = ocp_reg_read(tp, OCP_DOWN_SPEED); - data &= ~(EN_EEE_100 | EN_EEE_1000); - data |= EN_10M_CLKDIV; - ocp_reg_write(tp, OCP_DOWN_SPEED, data); + ocp_reg_w0w1(tp, OCP_DOWN_SPEED, EN_EEE_100 | EN_EEE_1000, + EN_10M_CLKDIV); tp->ups_info._10m_ckdiv = true; tp->ups_info.eee_plloff_100 = false; tp->ups_info.eee_plloff_giga = false; - data = ocp_reg_read(tp, OCP_POWER_CFG); - data &= ~EEE_CLKDIV_EN; - ocp_reg_write(tp, OCP_POWER_CFG, data); + ocp_reg_clr_bits(tp, OCP_POWER_CFG, EEE_CLKDIV_EN); tp->ups_info.eee_ckdiv = false; rtl_phy_patch_request(tp, false, true); rtl_green_en(tp, test_bit(GREEN_ETHERNET, &tp->flags)); - data = ocp_reg_read(tp, 0xa428); - data &= ~BIT(9); - ocp_reg_write(tp, 0xa428, data); - data = ocp_reg_read(tp, 0xa5ea); - data &= ~BIT(0); - ocp_reg_write(tp, 0xa5ea, data); + ocp_reg_clr_bits(tp, 0xa428, BIT(9)); + ocp_reg_clr_bits(tp, 0xa5ea, BIT(0)); tp->ups_info.lite_mode = 0; if (tp->eee_en) @@ -7895,17 +7666,10 @@ static void r8156_init(struct r8152 *tp) } data = r8153_phy_status(tp, 0); - if (data == PHY_STAT_EXT_INIT) { - data = ocp_reg_read(tp, 0xa468); - data &= ~(BIT(3) | BIT(1)); - ocp_reg_write(tp, 0xa468, data); - } + if (data == PHY_STAT_EXT_INIT) + ocp_reg_clr_bits(tp, 0xa468, BIT(3) | BIT(1)); - data = r8152_mdio_read(tp, MII_BMCR); - if (data & BMCR_PDOWN) { - data &= ~BMCR_PDOWN; - r8152_mdio_write(tp, MII_BMCR, data); - } + r8152_mdio_test_and_clr_bit(tp, MII_BMCR, BMCR_PDOWN); data = r8153_phy_status(tp, PHY_STAT_LAN_ON); WARN_ON_ONCE(data != PHY_STAT_LAN_ON); @@ -7993,20 +7757,11 @@ static void r8156b_init(struct r8152 *tp) data = r8153_phy_status(tp, 0); if (data == PHY_STAT_EXT_INIT) { - data = ocp_reg_read(tp, 0xa468); - data &= ~(BIT(3) | BIT(1)); - ocp_reg_write(tp, 0xa468, data); - - data = ocp_reg_read(tp, 0xa466); - data &= ~BIT(0); - ocp_reg_write(tp, 0xa466, data); + ocp_reg_clr_bits(tp, 0xa468, BIT(3) | BIT(1)); + ocp_reg_clr_bits(tp, 0xa466, BIT(0)); } - data = r8152_mdio_read(tp, MII_BMCR); - if (data & BMCR_PDOWN) { - data &= ~BMCR_PDOWN; - r8152_mdio_write(tp, MII_BMCR, data); - } + r8152_mdio_test_and_clr_bit(tp, MII_BMCR, BMCR_PDOWN); data = r8153_phy_status(tp, PHY_STAT_LAN_ON); From 9229cb5a941c981633a79c9867f339c4589a9da9 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 27 Mar 2026 13:44:45 -0700 Subject: [PATCH 1059/1561] fbnic: Set Relaxed Ordering PCIe TLP attributes for DMA engines Add ATTR CSR bit field definitions for the DMA engine TLP header configuration registers: AW_CFG: RDE_ATTR[17:15], RQM_ATTR[14:12], TQM_ATTR[11:9] AR_CFG: TDE_ATTR[17:15], RQM_ATTR[14:12], TQM_ATTR[11:9] These fields control the PCIe TLP attribute bits for outbound transactions from the TQM, RQM, RDE (write path), and TDE (read path) DMA engines. An enum is added with standard PCIe TLP attribute values: NS (No Snoop), RO (Relaxed Ordering), and IDO (ID-based Ordering). Read the PCIe Relaxed Ordering capability at probe time and store it in fbnic_dev. Configure Relaxed Ordering on the PCIe TLP attributes in fbnic_mbx_init_desc_ring when the capability is enabled. For the write path (AW_CFG), set RO on RDE and TQM attributes. For the read path (AR_CFG), set RO on all three attributes (TDE, RQM, TQM). This allows the PCIe fabric to reorder these transactions for improved throughput. Signed-off-by: Alexander Duyck Signed-off-by: Dimitri Daskalakis Link: https://patch.msgid.link/20260327204445.3074446-1-dimitri.daskalakis1@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic.h | 1 + drivers/net/ethernet/meta/fbnic/fbnic_csr.h | 12 ++++++++++++ drivers/net/ethernet/meta/fbnic/fbnic_devlink.c | 1 + drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 15 +++++++++++++-- 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index f7df5302e91a..d0715695c43e 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -57,6 +57,7 @@ struct fbnic_dev { u64 dsn; u32 mps; u32 readrq; + u8 relaxed_ord; /* Local copy of the devices TCAM */ struct fbnic_act_tcam act_tcam[FBNIC_RPC_TCAM_ACT_NUM_ENTRIES]; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h index 43de522af172..81794bd326e1 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h @@ -975,9 +975,21 @@ enum { #define FBNIC_PUL_OB_TLP_HDR_AW_CFG 0x3103d /* 0xc40f4 */ #define FBNIC_PUL_OB_TLP_HDR_AW_CFG_FLUSH CSR_BIT(19) #define FBNIC_PUL_OB_TLP_HDR_AW_CFG_BME CSR_BIT(18) +#define FBNIC_PUL_OB_TLP_HDR_AW_CFG_RDE_ATTR CSR_GENMASK(17, 15) +#define FBNIC_PUL_OB_TLP_HDR_AW_CFG_RQM_ATTR CSR_GENMASK(14, 12) +#define FBNIC_PUL_OB_TLP_HDR_AW_CFG_TQM_ATTR CSR_GENMASK(11, 9) +enum { + FBNIC_TLP_ATTR_NS = 1, + FBNIC_TLP_ATTR_RO = 2, + FBNIC_TLP_ATTR_IDO = 4, +}; + #define FBNIC_PUL_OB_TLP_HDR_AR_CFG 0x3103e /* 0xc40f8 */ #define FBNIC_PUL_OB_TLP_HDR_AR_CFG_FLUSH CSR_BIT(19) #define FBNIC_PUL_OB_TLP_HDR_AR_CFG_BME CSR_BIT(18) +#define FBNIC_PUL_OB_TLP_HDR_AR_CFG_TDE_ATTR CSR_GENMASK(17, 15) +#define FBNIC_PUL_OB_TLP_HDR_AR_CFG_RQM_ATTR CSR_GENMASK(14, 12) +#define FBNIC_PUL_OB_TLP_HDR_AR_CFG_TQM_ATTR CSR_GENMASK(11, 9) #define FBNIC_PUL_USER_OB_RD_TLP_CNT_31_0 \ 0x3106e /* 0xc41b8 */ #define FBNIC_PUL_USER_OB_RD_DWORD_CNT_31_0 \ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c b/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c index f1c992f5fe94..546e1c12d287 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c @@ -647,6 +647,7 @@ struct fbnic_dev *fbnic_devlink_alloc(struct pci_dev *pdev) fbd->dsn = pci_get_dsn(pdev); fbd->mps = pcie_get_mps(pdev); fbd->readrq = pcie_get_readrq(pdev); + fbd->relaxed_ord = pcie_relaxed_ordering_enabled(pdev); fbd->mac_addr_boundary = FBNIC_RPC_TCAM_MACDA_DEFAULT_BOUNDARY; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index c2bad51bdde6..0c6812fcf185 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -448,6 +448,7 @@ static int fbnic_fw_xmit_simple_msg(struct fbnic_dev *fbd, u32 msg_type) static int fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) { + u8 tlp_attr = fbd->relaxed_ord ? FBNIC_TLP_ATTR_RO : 0; struct fbnic_fw_mbx *mbx = &fbd->mbx[mbx_idx]; mbx->ready = true; @@ -456,14 +457,24 @@ static int fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) case FBNIC_IPC_MBX_RX_IDX: /* Enable DMA writes from the device */ wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AW_CFG, - FBNIC_PUL_OB_TLP_HDR_AW_CFG_BME); + FBNIC_PUL_OB_TLP_HDR_AW_CFG_BME | + FIELD_PREP(FBNIC_PUL_OB_TLP_HDR_AW_CFG_RDE_ATTR, + tlp_attr) | + FIELD_PREP(FBNIC_PUL_OB_TLP_HDR_AW_CFG_TQM_ATTR, + tlp_attr)); /* Make sure we have a page for the FW to write to */ return fbnic_mbx_alloc_rx_msgs(fbd); case FBNIC_IPC_MBX_TX_IDX: /* Enable DMA reads from the device */ wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AR_CFG, - FBNIC_PUL_OB_TLP_HDR_AR_CFG_BME); + FBNIC_PUL_OB_TLP_HDR_AR_CFG_BME | + FIELD_PREP(FBNIC_PUL_OB_TLP_HDR_AR_CFG_TDE_ATTR, + tlp_attr) | + FIELD_PREP(FBNIC_PUL_OB_TLP_HDR_AR_CFG_RQM_ATTR, + tlp_attr) | + FIELD_PREP(FBNIC_PUL_OB_TLP_HDR_AR_CFG_TQM_ATTR, + tlp_attr)); break; } From e9abf1da0af3f787a03b249945e5ca726c1b8013 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 30 Mar 2026 23:52:09 +0100 Subject: [PATCH 1060/1561] net: dsa: mxl862xx: cancel pending work on probe error Call mxl862xx_host_shutdown() in case dsa_register_switch() returns an error, so any still pending crc_err_work get canceled. Fixes: a319d0c8c8ce ("net: dsa: mxl862xx: add CRC for MDIO communication") Signed-off-by: Daniel Golle Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/3fd163f5bb88de426ca9847549f94b4296170ef0.1774911025.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mxl862xx/mxl862xx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mxl862xx/mxl862xx.c b/drivers/net/dsa/mxl862xx/mxl862xx.c index 78eef639628a..af9b00b2d2c4 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx.c +++ b/drivers/net/dsa/mxl862xx/mxl862xx.c @@ -407,6 +407,7 @@ static int mxl862xx_probe(struct mdio_device *mdiodev) struct device *dev = &mdiodev->dev; struct mxl862xx_priv *priv; struct dsa_switch *ds; + int err; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) @@ -428,7 +429,11 @@ static int mxl862xx_probe(struct mdio_device *mdiodev) dev_set_drvdata(dev, ds); - return dsa_register_switch(ds); + err = dsa_register_switch(ds); + if (err) + mxl862xx_host_shutdown(priv); + + return err; } static void mxl862xx_remove(struct mdio_device *mdiodev) From 75171eeff3538c6859b50f6a516342c8f1ed6e00 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 30 Mar 2026 23:53:10 +0100 Subject: [PATCH 1061/1561] net: phy: bcm84881: add BCM84891/BCM84892 support The BCM84891 and BCM84892 are 10GBASE-T PHYs in the same family as the BCM84881, sharing the register map and most callbacks. They add USXGMII as a host interface mode. bcm8489x_config_init() is separate from bcm84881_config_init(): it allows only USXGMII (the only host mode available on the tested hardware) and clears MDIO_CTRL1_LPOWER, which is set at boot on the tested platform. Does not recur on ifdown/ifup, cable events, or link-partner advertisement changes, so config_init is sufficient. For USXGMII, read_status() skips the 0x4011 host-mode register: it returns the same value regardless of negotiated copper speed (USXGMII symbol replication). Speed comes from phy_resolve_aneg_linkmode() via standard C45 AN resolution. Tested on TRENDnet TEG-S750 (RTL9303 + 1x BCM84891 + 4x BCM84892) running OpenWrt, where the MDIO controller driver is currently OpenWrt-specific. Link verified at 100M, 1G, 2.5G, 10G. Signed-off-by: Daniel Wagner Reviewed-by: Florian Fainelli Reviewed-by: Nicolai Buchwitz Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260330225310.2801264-1-wagner.daniel.t@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/bcm84881.c | 48 +++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/bcm84881.c b/drivers/net/phy/bcm84881.c index d7f7cc44c532..f114212dd303 100644 --- a/drivers/net/phy/bcm84881.c +++ b/drivers/net/phy/bcm84881.c @@ -54,6 +54,21 @@ static int bcm84881_config_init(struct phy_device *phydev) return 0; } +static int bcm8489x_config_init(struct phy_device *phydev) +{ + __set_bit(PHY_INTERFACE_MODE_USXGMII, phydev->possible_interfaces); + + if (phydev->interface != PHY_INTERFACE_MODE_USXGMII) + return -ENODEV; + + /* MDIO_CTRL1_LPOWER is set at boot on the tested platform. Does not + * recur on ifdown/ifup, cable events, or link-partner advertisement + * changes; clear it once. + */ + return phy_clear_bits_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL1, + MDIO_CTRL1_LPOWER); +} + static int bcm84881_probe(struct phy_device *phydev) { /* This driver requires PMAPMD and AN blocks */ @@ -201,6 +216,15 @@ static int bcm84881_read_status(struct phy_device *phydev) return 0; } + /* BCM84891/92 on USXGMII: the host interface mode doesn't change + * with copper speed (USXGMII symbol replication; the MAC receives + * the negotiated copper speed, not 10G, so no rate adaptation). + * Skip 0x4011; phy_resolve_aneg_linkmode() above already set the + * speed. Only bcm8489x_config_init() allows USXGMII. + */ + if (phydev->interface == PHY_INTERFACE_MODE_USXGMII) + return genphy_c45_read_mdix(phydev); + /* Set the host link mode - we set the phy interface mode and * the speed according to this register so that downshift works. * We leave the duplex setting as per the resolution from the @@ -256,6 +280,26 @@ static struct phy_driver bcm84881_drivers[] = { .config_aneg = bcm84881_config_aneg, .aneg_done = bcm84881_aneg_done, .read_status = bcm84881_read_status, + }, { + PHY_ID_MATCH_MODEL(0x35905080), + .name = "Broadcom BCM84891", + .inband_caps = bcm84881_inband_caps, + .config_init = bcm8489x_config_init, + .probe = bcm84881_probe, + .get_features = bcm84881_get_features, + .config_aneg = bcm84881_config_aneg, + .aneg_done = bcm84881_aneg_done, + .read_status = bcm84881_read_status, + }, { + PHY_ID_MATCH_MODEL(0x359050a0), + .name = "Broadcom BCM84892", + .inband_caps = bcm84881_inband_caps, + .config_init = bcm8489x_config_init, + .probe = bcm84881_probe, + .get_features = bcm84881_get_features, + .config_aneg = bcm84881_config_aneg, + .aneg_done = bcm84881_aneg_done, + .read_status = bcm84881_read_status, }, }; @@ -264,9 +308,11 @@ module_phy_driver(bcm84881_drivers); /* FIXME: module auto-loading for Clause 45 PHYs seems non-functional */ static const struct mdio_device_id __maybe_unused bcm84881_tbl[] = { { 0xae025150, 0xfffffff0 }, + { PHY_ID_MATCH_MODEL(0x35905080) }, + { PHY_ID_MATCH_MODEL(0x359050a0) }, { }, }; MODULE_AUTHOR("Russell King"); -MODULE_DESCRIPTION("Broadcom BCM84881 PHY driver"); +MODULE_DESCRIPTION("Broadcom BCM84881/BCM84891/BCM84892 PHY driver"); MODULE_DEVICE_TABLE(mdio, bcm84881_tbl); MODULE_LICENSE("GPL"); From f843687c30272d55739ef153ace29c58db2575ee Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 30 Mar 2026 17:19:30 -0700 Subject: [PATCH 1062/1561] selftests: drv-net: update the README with variants Test authors need to know about variants, existing tests don't use them because variants are relatively recent. Reviewed-by: Joe Damato Link: https://patch.msgid.link/20260331001930.3411279-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .../testing/selftests/drivers/net/README.rst | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tools/testing/selftests/drivers/net/README.rst b/tools/testing/selftests/drivers/net/README.rst index c94992acf10b..b26b364be534 100644 --- a/tools/testing/selftests/drivers/net/README.rst +++ b/tools/testing/selftests/drivers/net/README.rst @@ -253,6 +253,39 @@ By default the tests are expected to be able to run on single-interface systems. All tests which may disconnect ``NETIF`` must be annotated with ``@ksft_disruptive``. +ksft_variants +~~~~~~~~~~~~~ + +Use the ``@ksft_variants`` decorator to run a test with multiple sets +of inputs as separate test cases. This avoids duplicating test functions +that only differ in parameters. + +Parameters can be a single value, a tuple, or a ``KsftNamedVariant`` +(which gives an explicit name to the sub-case). The argument to the +decorator can be a list or a generator. + +Example:: + + @ksft_variants([ + KsftNamedVariant("main", False), + KsftNamedVariant("ctx", True), + ]) + def resize_periodic(cfg, create_context): + # test body receives (cfg, create_context) where create_context + # is False for the "main" variant and True for "ctx" + pass + +or:: + + def _gro_variants(): + for mode in ["sw", "hw"]: + for protocol in ["tcp4", "tcp6"]: + yield (mode, protocol) + + @ksft_variants(_gro_variants()) + def test(cfg, mode, protocol): + pass + Running tests CI-style ====================== From 6730f184abdcf0cc9b6ee20f97c467ab497a900d Mon Sep 17 00:00:00 2001 From: Izabela Bakollari Date: Mon, 30 Mar 2026 08:32:35 +0200 Subject: [PATCH 1063/1561] sfc: add transmit timestamping support Add ethtool op to advertise TX timestamping. Insert a skb_tx_timestamp call in __efx_enqueue_skb. Signed-off-by: Izabela Bakollari Reviewed-by: Edward Cree Link: https://patch.msgid.link/20260330063236.247057-1-ibakolla@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/ethtool.c | 2 ++ drivers/net/ethernet/sfc/tx.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 362388754a29..c0038b942913 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -231,6 +231,8 @@ static int efx_ethtool_get_ts_info(struct net_device *net_dev, { struct efx_nic *efx = efx_netdev_priv(net_dev); + ts_info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE; + efx_ptp_get_ts_info(efx, ts_info); return 0; } diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 4dff19b6ef17..ea0fc1000476 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -371,6 +371,8 @@ netdev_tx_t __efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb if (!data_mapped && (efx_tx_map_data(tx_queue, skb, segments))) goto err; + skb_tx_timestamp(skb); + efx_tx_maybe_stop_queue(tx_queue); tx_queue->xmit_pending = true; From da45a55748f2cd89fb6100df46821af69bdcea99 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 30 Mar 2026 12:26:10 +0200 Subject: [PATCH 1064/1561] net: hso: refactor endpoint lookup Use the common USB helpers for looking up bulk and interrupt endpoints instead of a custom implementation. Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260330102611.1671546-2-johan@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/usb/hso.c | 65 +++++++++++-------------------------------- 1 file changed, 17 insertions(+), 48 deletions(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 1825f7cf5dc0..c1aec67688ae 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -298,8 +298,6 @@ static int hso_mux_submit_intr_urb(struct hso_shared_int *mux_int, struct usb_device *usb, gfp_t gfp); static void handle_usb_error(int status, const char *function, struct hso_device *hso_dev); -static struct usb_endpoint_descriptor *hso_get_ep(struct usb_interface *intf, - int type, int dir); static int hso_get_mux_ports(struct usb_interface *intf, unsigned char *ports); static void hso_free_interface(struct usb_interface *intf); static int hso_start_serial_device(struct hso_device *hso_dev, gfp_t flags); @@ -2497,16 +2495,11 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, hso_net->net = net; hso_net->parent = hso_dev; - hso_net->in_endp = hso_get_ep(interface, USB_ENDPOINT_XFER_BULK, - USB_DIR_IN); - if (!hso_net->in_endp) { - dev_err(&interface->dev, "Can't find BULK IN endpoint\n"); - goto err_net; - } - hso_net->out_endp = hso_get_ep(interface, USB_ENDPOINT_XFER_BULK, - USB_DIR_OUT); - if (!hso_net->out_endp) { - dev_err(&interface->dev, "Can't find BULK OUT endpoint\n"); + result = usb_find_common_endpoints(interface->cur_altsetting, + &hso_net->in_endp, &hso_net->out_endp, + NULL, NULL); + if (result) { + dev_err(&interface->dev, "Can't find BULK endpoints\n"); goto err_net; } SET_NETDEV_DEV(net, &interface->dev); @@ -2608,10 +2601,12 @@ static void hso_free_serial_device(struct hso_device *hso_dev) static struct hso_device *hso_create_bulk_serial_device( struct usb_interface *interface, int port) { + struct usb_host_interface *iface_desc = interface->cur_altsetting; struct hso_device *hso_dev; struct hso_serial *serial; int num_urbs; struct hso_tiocmget *tiocmget; + int ret; hso_dev = hso_create_device(interface, port); if (!hso_dev) @@ -2634,10 +2629,8 @@ static struct hso_device *hso_create_bulk_serial_device( if (!serial->tiocmget->serial_state_notification) goto exit; tiocmget = serial->tiocmget; - tiocmget->endp = hso_get_ep(interface, - USB_ENDPOINT_XFER_INT, - USB_DIR_IN); - if (!tiocmget->endp) { + ret = usb_find_int_in_endpoint(iface_desc, &tiocmget->endp); + if (ret) { dev_err(&interface->dev, "Failed to find INT IN ep\n"); goto exit; } @@ -2656,17 +2649,10 @@ static struct hso_device *hso_create_bulk_serial_device( BULK_URB_TX_SIZE)) goto exit; - serial->in_endp = hso_get_ep(interface, USB_ENDPOINT_XFER_BULK, - USB_DIR_IN); - if (!serial->in_endp) { - dev_err(&interface->dev, "Failed to find BULK IN ep\n"); - goto exit2; - } - - if (! - (serial->out_endp = - hso_get_ep(interface, USB_ENDPOINT_XFER_BULK, USB_DIR_OUT))) { - dev_err(&interface->dev, "Failed to find BULK OUT ep\n"); + ret = usb_find_common_endpoints(iface_desc, &serial->in_endp, + &serial->out_endp, NULL, NULL); + if (ret) { + dev_err(&interface->dev, "Failed to find BULK eps\n"); goto exit2; } @@ -2754,13 +2740,14 @@ static struct hso_shared_int *hso_create_shared_int(struct usb_interface *interface) { struct hso_shared_int *mux = kzalloc_obj(*mux); + int ret; if (!mux) return NULL; - mux->intr_endp = hso_get_ep(interface, USB_ENDPOINT_XFER_INT, - USB_DIR_IN); - if (!mux->intr_endp) { + ret = usb_find_int_in_endpoint(interface->cur_altsetting, + &mux->intr_endp); + if (ret) { dev_err(&interface->dev, "Can't find INT IN endpoint\n"); goto exit; } @@ -3134,24 +3121,6 @@ static void hso_free_interface(struct usb_interface *interface) /* Helper functions */ -/* Get the endpoint ! */ -static struct usb_endpoint_descriptor *hso_get_ep(struct usb_interface *intf, - int type, int dir) -{ - int i; - struct usb_host_interface *iface = intf->cur_altsetting; - struct usb_endpoint_descriptor *endp; - - for (i = 0; i < iface->desc.bNumEndpoints; i++) { - endp = &iface->endpoint[i].desc; - if (((endp->bEndpointAddress & USB_ENDPOINT_DIR_MASK) == dir) && - (usb_endpoint_type(endp) == type)) - return endp; - } - - return NULL; -} - /* Get the byte that describes which ports are enabled */ static int hso_get_mux_ports(struct usb_interface *intf, unsigned char *ports) { From abfca6b13bcfe4f2b70a88190d61f162656ed44e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 30 Mar 2026 12:26:11 +0200 Subject: [PATCH 1065/1561] net: ipeth: refactor endpoint lookup Use the common USB helper for looking up bulk and interrupt endpoints instead of open coding. Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260330102611.1671546-3-johan@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/usb/ipheth.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index a19789b57190..bb1364f85bd1 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -573,11 +573,10 @@ static int ipheth_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); + struct usb_endpoint_descriptor *ep_in, *ep_out; struct usb_host_interface *hintf; - struct usb_endpoint_descriptor *endp; struct ipheth_device *dev; struct net_device *netdev; - int i; int retval; netdev = alloc_etherdev(sizeof(struct ipheth_device)); @@ -603,19 +602,16 @@ static int ipheth_probe(struct usb_interface *intf, goto err_endpoints; } - for (i = 0; i < hintf->desc.bNumEndpoints; i++) { - endp = &hintf->endpoint[i].desc; - if (usb_endpoint_is_bulk_in(endp)) - dev->bulk_in = endp->bEndpointAddress; - else if (usb_endpoint_is_bulk_out(endp)) - dev->bulk_out = endp->bEndpointAddress; - } - if (!(dev->bulk_in && dev->bulk_out)) { - retval = -ENODEV; + retval = usb_find_common_endpoints_reverse(hintf, &ep_in, &ep_out, + NULL, NULL); + if (retval) { dev_err(&intf->dev, "Unable to find endpoints\n"); goto err_endpoints; } + dev->bulk_in = ep_in->bEndpointAddress; + dev->bulk_out = ep_out->bEndpointAddress; + dev->ctrl_buf = kmalloc(IPHETH_CTRL_BUF_SIZE, GFP_KERNEL); if (dev->ctrl_buf == NULL) { retval = -ENOMEM; From dba69cba4a5d827a0d05b9692bf8b4d9cbfe9206 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 30 Mar 2026 12:36:53 +0200 Subject: [PATCH 1066/1561] nfc: nfcmrvl: refactor endpoint lookup Use the common USB helper for looking up bulk and interrupt endpoints instead of open coding. Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260330103655.1672331-2-johan@kernel.org Signed-off-by: Jakub Kicinski --- drivers/nfc/nfcmrvl/usb.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/nfc/nfcmrvl/usb.c b/drivers/nfc/nfcmrvl/usb.c index ac62358445bf..4babde8e4249 100644 --- a/drivers/nfc/nfcmrvl/usb.c +++ b/drivers/nfc/nfcmrvl/usb.c @@ -288,9 +288,9 @@ static int nfcmrvl_probe(struct usb_interface *intf, { struct nfcmrvl_usb_drv_data *drv_data; struct nfcmrvl_private *priv; - int i; struct usb_device *udev = interface_to_usbdev(intf); struct nfcmrvl_platform_data config; + int ret; /* No configuration for USB */ memset(&config, 0, sizeof(config)); @@ -302,21 +302,9 @@ static int nfcmrvl_probe(struct usb_interface *intf, if (!drv_data) return -ENOMEM; - for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; i++) { - struct usb_endpoint_descriptor *ep_desc; - - ep_desc = &intf->cur_altsetting->endpoint[i].desc; - - if (!drv_data->bulk_tx_ep && - usb_endpoint_is_bulk_out(ep_desc)) { - drv_data->bulk_tx_ep = ep_desc; - } else if (!drv_data->bulk_rx_ep && - usb_endpoint_is_bulk_in(ep_desc)) { - drv_data->bulk_rx_ep = ep_desc; - } - } - - if (!drv_data->bulk_tx_ep || !drv_data->bulk_rx_ep) + ret = usb_find_common_endpoints(intf->cur_altsetting, &drv_data->bulk_rx_ep, + &drv_data->bulk_tx_ep, NULL, NULL); + if (ret) return -ENODEV; drv_data->udev = udev; From 13f2e141b944cf24171db83ae4ee07caf83d2123 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 30 Mar 2026 12:36:54 +0200 Subject: [PATCH 1067/1561] nfc: pn533: refactor endpoint lookup Use the common USB helpers for looking up bulk and interrupt endpoints (and determining endpoint numbers) instead of open coding. Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260330103655.1672331-3-johan@kernel.org Signed-off-by: Jakub Kicinski --- drivers/nfc/pn533/usb.c | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c index 0ff2f0d7caf4..efb07f944fce 100644 --- a/drivers/nfc/pn533/usb.c +++ b/drivers/nfc/pn533/usb.c @@ -476,14 +476,9 @@ static const struct pn533_phy_ops usb_phy_ops = { static int pn533_usb_probe(struct usb_interface *interface, const struct usb_device_id *id) { + struct usb_endpoint_descriptor *ep_in, *ep_out; struct pn533 *priv; struct pn533_usb_phy *phy; - struct usb_host_interface *iface_desc; - struct usb_endpoint_descriptor *endpoint; - int in_endpoint = 0; - int out_endpoint = 0; - int rc = -ENOMEM; - int i; u32 protocols; enum pn533_protocol_type protocol_type = PN533_PROTO_REQ_ACK_RESP; struct pn533_frame_ops *fops = NULL; @@ -491,6 +486,7 @@ static int pn533_usb_probe(struct usb_interface *interface, int in_buf_len = PN533_EXT_FRAME_HEADER_LEN + PN533_STD_FRAME_MAX_PAYLOAD_LEN + PN533_STD_FRAME_TAIL_LEN; + int rc; phy = devm_kzalloc(&interface->dev, sizeof(*phy), GFP_KERNEL); if (!phy) @@ -503,21 +499,11 @@ static int pn533_usb_probe(struct usb_interface *interface, phy->udev = interface_to_usbdev(interface); phy->interface = interface; - iface_desc = interface->cur_altsetting; - for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { - endpoint = &iface_desc->endpoint[i].desc; - - if (!in_endpoint && usb_endpoint_is_bulk_in(endpoint)) - in_endpoint = endpoint->bEndpointAddress; - - if (!out_endpoint && usb_endpoint_is_bulk_out(endpoint)) - out_endpoint = endpoint->bEndpointAddress; - } - - if (!in_endpoint || !out_endpoint) { + rc = usb_find_common_endpoints(interface->cur_altsetting, &ep_in, + &ep_out, NULL, NULL); + if (rc) { nfc_err(&interface->dev, "Could not find bulk-in or bulk-out endpoint\n"); - rc = -ENODEV; goto error; } @@ -525,18 +511,20 @@ static int pn533_usb_probe(struct usb_interface *interface, phy->out_urb = usb_alloc_urb(0, GFP_KERNEL); phy->ack_urb = usb_alloc_urb(0, GFP_KERNEL); - if (!phy->in_urb || !phy->out_urb || !phy->ack_urb) + if (!phy->in_urb || !phy->out_urb || !phy->ack_urb) { + rc = -ENOMEM; goto error; + } usb_fill_bulk_urb(phy->in_urb, phy->udev, - usb_rcvbulkpipe(phy->udev, in_endpoint), + usb_rcvbulkpipe(phy->udev, usb_endpoint_num(ep_in)), in_buf, in_buf_len, NULL, phy); usb_fill_bulk_urb(phy->out_urb, phy->udev, - usb_sndbulkpipe(phy->udev, out_endpoint), + usb_sndbulkpipe(phy->udev, usb_endpoint_num(ep_out)), NULL, 0, pn533_out_complete, phy); usb_fill_bulk_urb(phy->ack_urb, phy->udev, - usb_sndbulkpipe(phy->udev, out_endpoint), + usb_sndbulkpipe(phy->udev, usb_endpoint_num(ep_out)), NULL, 0, pn533_ack_complete, phy); switch (id->driver_info) { From 771a627ddd3b31fc5a27dc7af28a0e59ba13aea1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 30 Mar 2026 12:36:55 +0200 Subject: [PATCH 1068/1561] nfc: port100: refactor endpoint lookup Use the common USB helpers for looking up bulk and interrupt endpoints (and determining endpoint numbers) instead of open coding. Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260330103655.1672331-4-johan@kernel.org Signed-off-by: Jakub Kicinski --- drivers/nfc/port100.c | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c index 09dcc6f4c4f3..5ae61d7ebcfe 100644 --- a/drivers/nfc/port100.c +++ b/drivers/nfc/port100.c @@ -1489,15 +1489,11 @@ MODULE_DEVICE_TABLE(usb, port100_table); static int port100_probe(struct usb_interface *interface, const struct usb_device_id *id) { + struct usb_endpoint_descriptor *ep_in, *ep_out; struct port100 *dev; int rc; - struct usb_host_interface *iface_desc; - struct usb_endpoint_descriptor *endpoint; - int in_endpoint; - int out_endpoint; u16 fw_version; u64 cmd_type_mask; - int i; dev = devm_kzalloc(&interface->dev, sizeof(struct port100), GFP_KERNEL); if (!dev) @@ -1508,22 +1504,11 @@ static int port100_probe(struct usb_interface *interface, dev->interface = interface; usb_set_intfdata(interface, dev); - in_endpoint = out_endpoint = 0; - iface_desc = interface->cur_altsetting; - for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { - endpoint = &iface_desc->endpoint[i].desc; - - if (!in_endpoint && usb_endpoint_is_bulk_in(endpoint)) - in_endpoint = endpoint->bEndpointAddress; - - if (!out_endpoint && usb_endpoint_is_bulk_out(endpoint)) - out_endpoint = endpoint->bEndpointAddress; - } - - if (!in_endpoint || !out_endpoint) { + rc = usb_find_common_endpoints(interface->cur_altsetting, &ep_in, + &ep_out, NULL, NULL); + if (rc) { nfc_err(&interface->dev, "Could not find bulk-in or bulk-out endpoint\n"); - rc = -ENODEV; goto error; } @@ -1537,10 +1522,10 @@ static int port100_probe(struct usb_interface *interface, } usb_fill_bulk_urb(dev->in_urb, dev->udev, - usb_rcvbulkpipe(dev->udev, in_endpoint), + usb_rcvbulkpipe(dev->udev, usb_endpoint_num(ep_in)), NULL, 0, NULL, dev); usb_fill_bulk_urb(dev->out_urb, dev->udev, - usb_sndbulkpipe(dev->udev, out_endpoint), + usb_sndbulkpipe(dev->udev, usb_endpoint_num(ep_out)), NULL, 0, port100_send_complete, dev); dev->out_urb->transfer_flags = URB_ZERO_PACKET; From a94ddc191f19579a7e0a5da2c012f1048ce10262 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 30 Mar 2026 00:03:49 +0200 Subject: [PATCH 1069/1561] net: airoha: Fix typo in airoha_set_gdm2_loopback routine name Rename airhoha_set_gdm2_loopback() in airoha_set_gdm2_loopback() Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260330-airoha_set_gdm2_loopback-fix-typo-v1-1-a1320ff6b6cc@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 82e53c60f561..2beba017e791 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -1709,7 +1709,7 @@ static int airoha_dev_set_macaddr(struct net_device *dev, void *p) return 0; } -static int airhoha_set_gdm2_loopback(struct airoha_gdm_port *port) +static int airoha_set_gdm2_loopback(struct airoha_gdm_port *port) { struct airoha_eth *eth = port->qdma->eth; u32 val, pse_port, chan, nbq; @@ -1785,7 +1785,7 @@ static int airoha_dev_init(struct net_device *dev) if (!eth->ports[1]) { int err; - err = airhoha_set_gdm2_loopback(port); + err = airoha_set_gdm2_loopback(port); if (err) return err; } From ee769323b1bf60c0ec0338cc5ee6b1c725624ec6 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 29 Mar 2026 19:07:24 +0100 Subject: [PATCH 1070/1561] declance: Rate-limit DMA errors Prevent the system from becoming unusable due to a flood of DMA error messages. Signed-off-by: Maciej W. Rozycki Link: https://patch.msgid.link/alpine.DEB.2.21.2603291838370.60268@angie.orcam.me.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/declance.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index e6d56fcdc1dd..24aa4803b4ae 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -727,7 +727,7 @@ static irqreturn_t lance_dma_merr_int(int irq, void *dev_id) { struct net_device *dev = dev_id; - printk(KERN_ERR "%s: DMA error\n", dev->name); + pr_err_ratelimited("%s: DMA error\n", dev->name); return IRQ_HANDLED; } From aae5efaeb8aa4ada710d5b0cdbab77b9539c69eb Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 29 Mar 2026 19:07:41 +0100 Subject: [PATCH 1071/1561] declance: Include the offending address with DMA errors The address latched in the I/O ASIC LANCE DMA Pointer Register uses the TURBOchannel bus address encoding and therefore bits 33:29 of location referred occupy bits 4:0, bits 28:2 are left-shifted by 3, and bits 1:0 are hardwired to zero. In reality no TURBOchannel system exceeds 1GiB of RAM though, so the address reported will always fit in 8 hex digits. Signed-off-by: Maciej W. Rozycki Link: https://patch.msgid.link/alpine.DEB.2.21.2603291839220.60268@angie.orcam.me.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/declance.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index 24aa4803b4ae..c7d47ca603a8 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -726,8 +726,10 @@ out: static irqreturn_t lance_dma_merr_int(int irq, void *dev_id) { struct net_device *dev = dev_id; + u64 ldp = ioasic_read(IO_REG_LANCE_DMA_P); - pr_err_ratelimited("%s: DMA error\n", dev->name); + pr_err_ratelimited("%s: DMA error at %#010llx\n", dev->name, + (ldp & 0x1f) << 29 | (ldp & 0xffffffe0) >> 3); return IRQ_HANDLED; } From d7709812e13d06132ddae3d21540472ea5cb11c5 Mon Sep 17 00:00:00 2001 From: Erni Sri Satya Vennela Date: Thu, 26 Mar 2026 10:30:56 -0700 Subject: [PATCH 1072/1561] net: mana: hardening: Validate adapter_mtu from MANA_QUERY_DEV_CONFIG As a part of MANA hardening for CVM, validate the adapter_mtu value returned from the MANA_QUERY_DEV_CONFIG HWC command. The adapter_mtu value is used to compute ndev->max_mtu via: gc->adapter_mtu - ETH_HLEN. If hardware returns a bogus adapter_mtu smaller than ETH_HLEN (e.g. 0), the unsigned subtraction wraps to a huge value, silently allowing oversized MTU settings. Add a validation check to reject adapter_mtu values below ETH_MIN_MTU + ETH_HLEN, returning -EPROTO to fail the device configuration early with a clear error message. Signed-off-by: Erni Sri Satya Vennela Link: https://patch.msgid.link/20260326173101.2010514-1-ernis@linux.microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/mana/mana_en.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index a9e0f99105aa..565375cc20d3 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1207,10 +1207,16 @@ static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, *max_num_vports = resp.max_num_vports; - if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V2) + if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V2) { + if (resp.adapter_mtu < ETH_MIN_MTU + ETH_HLEN) { + dev_err(dev, "Adapter MTU too small: %u\n", + resp.adapter_mtu); + return -EPROTO; + } gc->adapter_mtu = resp.adapter_mtu; - else + } else { gc->adapter_mtu = ETH_FRAME_LEN; + } if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V3) *bm_hostmode = resp.bm_hostmode; From 58e416e283284c15b8daf0b2b425f43242f62b0a Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Mon, 30 Mar 2026 14:10:33 +0200 Subject: [PATCH 1073/1561] net: vxlan: check ipv6_mod_enabled() on neigh_reduce() IPv6 must be enabled or otherwise neigh_reduce() might cause a kernel panic. This was prevented by a check on in6_dev. Use ipv6_mod_enabled() instead as it is cleaner and also consistent with the code at route_shortcircuit(). Signed-off-by: Fernando Fernandez Mancera Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20260330121033.4479-1-fmancera@suse.de Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index b5fbd03418b6..b3b692c4b628 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2027,13 +2027,11 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) struct vxlan_dev *vxlan = netdev_priv(dev); const struct in6_addr *daddr; const struct ipv6hdr *iphdr; - struct inet6_dev *in6_dev; struct neighbour *n; struct nd_msg *msg; rcu_read_lock(); - in6_dev = __in6_dev_get(dev); - if (!in6_dev) + if (unlikely(!ipv6_mod_enabled())) goto out; iphdr = ipv6_hdr(skb); From f1359c240191e686614847905fc861cbda480b47 Mon Sep 17 00:00:00 2001 From: Nagamani PV Date: Mon, 30 Mar 2026 13:44:36 +0200 Subject: [PATCH 1074/1561] net/iucv: Add missing kernel-doc return value descriptions Add missing return value descriptions for several functions in net/iucv/af_iucv.c and net/iucv/iucv.c to address kernel-doc warnings. Warnings detected with: scripts/kernel-doc -none -Wall net/iucv/* Warning: net/iucv/af_iucv.c:131 No description found for return value of 'iucv_msg_length' Warning: net/iucv/af_iucv.c:150 No description found for return value of 'iucv_sock_in_state' ... No functional change. Reviewed-by: Aswin Karuvally Reviewed-by: Alexandra Winter Signed-off-by: Nagamani PV Signed-off-by: Alexandra Winter Link: https://patch.msgid.link/20260330114436.2010108-1-wintera@linux.ibm.com Signed-off-by: Jakub Kicinski --- net/iucv/af_iucv.c | 12 +++++++----- net/iucv/iucv.c | 6 ++++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 6554d2cffc19..72dfccd4e3d5 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -127,6 +127,8 @@ static inline void low_nmcpy(unsigned char *dst, char *src) * if the socket data len is > 7, the function returns 8. * * Use this function to allocate socket buffers to store iucv message data. + * + * Returns: Length of the IUCV message. */ static inline size_t iucv_msg_length(struct iucv_message *msg) { @@ -145,7 +147,7 @@ static inline size_t iucv_msg_length(struct iucv_message *msg) * @state: first iucv sk state * @state2: second iucv sk state * - * Returns true if the socket in either in the first or second state. + * Returns: true if the socket is either in the first or second state. */ static int iucv_sock_in_state(struct sock *sk, int state, int state2) { @@ -156,9 +158,9 @@ static int iucv_sock_in_state(struct sock *sk, int state, int state2) * iucv_below_msglim() - function to check if messages can be sent * @sk: sock structure * - * Returns true if the send queue length is lower than the message limit. - * Always returns true if the socket is not connected (no iucv path for - * checking the message limit). + * Returns: true, if either the socket is not connected (no iucv path for + * checking the message limit) or if the send queue length is lower + * than the message limit. */ static inline int iucv_below_msglim(struct sock *sk) { @@ -883,7 +885,7 @@ static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr, * list and the socket data len at index 7 (last byte). * See also iucv_msg_length(). * - * Returns the error code from the iucv_message_send() call. + * Returns: the return code from the iucv_message_send() call. */ static int iucv_send_iprm(struct iucv_path *path, struct iucv_message *msg, struct sk_buff *skb) diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 6641c49b09ac..e03bc4a74f67 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -687,6 +687,8 @@ __free_cpumask: * * @pathid: path identification number. * @userdata: 16-bytes of user data. + * + * Returns: 0 on success, the result of the CP b2f0 IUCV call. */ static int iucv_sever_pathid(u16 pathid, u8 *userdata) { @@ -1092,6 +1094,8 @@ EXPORT_SYMBOL(iucv_message_purge); * * Internal function used by iucv_message_receive and __iucv_message_receive * to receive RMDATA data stored in struct iucv_message. + * + * Returns: 0 */ static int iucv_message_receive_iprmdata(struct iucv_path *path, struct iucv_message *msg, @@ -1852,6 +1856,8 @@ static enum cpuhp_state iucv_online; /** * iucv_init - Allocates and initializes various data structures. + * + * Returns: 0 on success, return code on failure. */ static int __init iucv_init(void) { From 2f41d7867800a78f339fbec3ab3a64147c33a9f1 Mon Sep 17 00:00:00 2001 From: Viken Dadhaniya Date: Sat, 21 Mar 2026 19:20:30 +0530 Subject: [PATCH 1075/1561] dt-bindings: can: mcp251xfd: add microchip,xstbyen property Add the boolean property 'microchip,xstbyen' to enable the dedicated transceiver standby control function on the INT0/GPIO0/XSTBY pin of the MCP251xFD family. Signed-off-by: Viken Dadhaniya Acked-by: Conor Dooley Link: https://patch.msgid.link/20260321135031.3107408-2-viken.dadhaniya@oss.qualcomm.com Signed-off-by: Marc Kleine-Budde --- .../devicetree/bindings/net/can/microchip,mcp251xfd.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml b/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml index 2d13638ebc6a..28e494262cd9 100644 --- a/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml +++ b/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml @@ -44,6 +44,14 @@ properties: signals a pending RX interrupt. maxItems: 1 + microchip,xstbyen: + type: boolean + description: + If present, configure the INT0/GPIO0/XSTBY pin as transceiver standby + control. The pin is driven low when the controller is active and high + when it enters Sleep mode, allowing automatic standby control of an + external CAN transceiver connected to this pin. + spi-max-frequency: description: Must be half or less of "clocks" frequency. From 1e41cbbe68e6753e786ddff528de15050bc52803 Mon Sep 17 00:00:00 2001 From: Ethan Nelson-Moore Date: Fri, 30 Jan 2026 03:41:33 -0800 Subject: [PATCH 1076/1561] net: can: ctucanfd: remove useless copy of PCI_DEVICE_DATA macro The ctucanfd driver has its own copy of the PCI_DEVICE_DATA macro. I assume this was done to support older kernel versions where it didn't exist, but that is irrelevant once the driver is in the mainline kernel. Remove it. Signed-off-by: Ethan Nelson-Moore Link: https://patch.msgid.link/20260130114134.47421-1-enelsonmoore@gmail.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ctucanfd/ctucanfd_pci.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/net/can/ctucanfd/ctucanfd_pci.c b/drivers/net/can/ctucanfd/ctucanfd_pci.c index 7b847b667973..625788fa8976 100644 --- a/drivers/net/can/ctucanfd/ctucanfd_pci.c +++ b/drivers/net/can/ctucanfd/ctucanfd_pci.c @@ -22,14 +22,6 @@ #include "ctucanfd.h" -#ifndef PCI_DEVICE_DATA -#define PCI_DEVICE_DATA(vend, dev, data) \ -.vendor = PCI_VENDOR_ID_##vend, \ -.device = PCI_DEVICE_ID_##vend##_##dev, \ -.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, 0, 0, \ -.driver_data = (kernel_ulong_t)(data) -#endif - #ifndef PCI_VENDOR_ID_TEDIA #define PCI_VENDOR_ID_TEDIA 0x1760 #endif From 495fac90b8ec3d6de11539b2b05c55ba360586ab Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 30 Mar 2026 12:18:16 +0200 Subject: [PATCH 1077/1561] can: kvaser_usb: leaf: refactor endpoint lookup Use the common USB helper for looking up bulk and interrupt endpoints instead of open coding. Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260330101817.1664787-2-johan@kernel.org Signed-off-by: Marc Kleine-Budde --- .../net/can/usb/kvaser_usb/kvaser_usb_leaf.c | 25 ++++++------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c index fd191ec5738b..df737cfc5ea0 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c @@ -1957,27 +1957,18 @@ static int kvaser_usb_leaf_get_berr_counter(const struct net_device *netdev, static int kvaser_usb_leaf_setup_endpoints(struct kvaser_usb *dev) { - const struct usb_host_interface *iface_desc; - struct usb_endpoint_descriptor *endpoint; - int i; + struct usb_host_interface *iface_desc; + int ret; iface_desc = dev->intf->cur_altsetting; - for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { - endpoint = &iface_desc->endpoint[i].desc; + /* use first bulk endpoint for in and out */ + ret = usb_find_common_endpoints(iface_desc, &dev->bulk_in, &dev->bulk_out, + NULL, NULL); + if (ret) + return -ENODEV; - if (!dev->bulk_in && usb_endpoint_is_bulk_in(endpoint)) - dev->bulk_in = endpoint; - - if (!dev->bulk_out && usb_endpoint_is_bulk_out(endpoint)) - dev->bulk_out = endpoint; - - /* use first bulk endpoint for in and out */ - if (dev->bulk_in && dev->bulk_out) - return 0; - } - - return -ENODEV; + return 0; } const struct kvaser_usb_dev_ops kvaser_usb_leaf_dev_ops = { From ae20301b6119257f533332836c46d8e8824433dd Mon Sep 17 00:00:00 2001 From: Viken Dadhaniya Date: Sat, 21 Mar 2026 19:20:31 +0530 Subject: [PATCH 1078/1561] can: mcp251xfd: add support for XSTBYEN transceiver standby control The MCP251xFD has a dedicated transceiver standby control function on the INT0/GPIO0/XSTBY pin, controlled by the XSTBYEN bit in IOCON. When enabled, the hardware automatically manages the transceiver standby state: the pin is driven low when the controller is active and high when it enters Sleep mode. Enable this feature when the 'microchip,xstbyen' device tree property is present. Signed-off-by: Viken Dadhaniya Link: https://patch.msgid.link/20260321135031.3107408-3-viken.dadhaniya@oss.qualcomm.com Signed-off-by: Marc Kleine-Budde --- .../net/can/spi/mcp251xfd/mcp251xfd-core.c | 37 +++++++++++++++++++ drivers/net/can/spi/mcp251xfd/mcp251xfd.h | 1 + 2 files changed, 38 insertions(+) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 9c86df08c2c5..92a86083c896 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -764,6 +764,31 @@ static void mcp251xfd_chip_stop(struct mcp251xfd_priv *priv, mcp251xfd_chip_set_mode(priv, MCP251XFD_REG_CON_MODE_CONFIG); } +static int mcp251xfd_chip_xstbyen_enable(const struct mcp251xfd_priv *priv) +{ + /* Configure the INT0/GPIO0/XSTBY pin as transceiver standby control: + * + * - XSTBYEN=1: route the pin to the transceiver standby function + * - TRIS0=0: set output direction; the reset default is 1 (input), + * which leaves the pin floating HIGH and keeps the + * transceiver in standby regardless of XSTBYEN + * - LAT0=0: drive pin LOW => transceiver active (not in standby) + * + * All three bits are included in the mask; only XSTBYEN is set in + * val, so TRIS0 and LAT0 are cleared to 0 atomically. + * + * Pin behaviour by mode: + * - Config mode: controlled by LAT0 (LAT0=0 => LOW => active) + * - Normal mode: hardware drives pin LOW (active) + * - Sleep mode: hardware drives pin HIGH (standby) + */ + return regmap_update_bits(priv->map_reg, MCP251XFD_REG_IOCON, + MCP251XFD_REG_IOCON_XSTBYEN | + MCP251XFD_REG_IOCON_TRIS0 | + MCP251XFD_REG_IOCON_LAT0, + MCP251XFD_REG_IOCON_XSTBYEN); +} + static int mcp251xfd_chip_start(struct mcp251xfd_priv *priv) { int err; @@ -796,6 +821,12 @@ static int mcp251xfd_chip_start(struct mcp251xfd_priv *priv) priv->can.state = CAN_STATE_ERROR_ACTIVE; + if (priv->xstbyen) { + err = mcp251xfd_chip_xstbyen_enable(priv); + if (err) + goto out_chip_stop; + } + err = mcp251xfd_chip_set_normal_mode(priv); if (err) goto out_chip_stop; @@ -1805,6 +1836,11 @@ static int mcp251xfd_gpio_request(struct gpio_chip *chip, unsigned int offset) u32 pin_mask = MCP251XFD_REG_IOCON_PM(offset); int ret; + if (priv->xstbyen && offset == 0) { + netdev_err(priv->ndev, "Can't use GPIO 0 with XSTBYEN!\n"); + return -EINVAL; + } + if (priv->rx_int && offset == 1) { netdev_err(priv->ndev, "Can't use GPIO 1 with RX-INT!\n"); return -EINVAL; @@ -2271,6 +2307,7 @@ static int mcp251xfd_probe(struct spi_device *spi) priv->pll_enable = pll_enable; priv->reg_vdd = reg_vdd; priv->reg_xceiver = reg_xceiver; + priv->xstbyen = device_property_present(&spi->dev, "microchip,xstbyen"); priv->devtype_data = *(struct mcp251xfd_devtype_data *)spi_get_device_match_data(spi); /* Errata Reference: diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h index 085d7101e595..d3f4704e2678 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h @@ -672,6 +672,7 @@ struct mcp251xfd_priv { struct gpio_desc *rx_int; struct clk *clk; bool pll_enable; + bool xstbyen; struct regulator *reg_vdd; struct regulator *reg_xceiver; From 11d94d3516c0c549752061b6a576e6c547d61e86 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 5 Mar 2026 11:14:37 +0100 Subject: [PATCH 1079/1561] can: rcar_can: Convert to FIELD_MODIFY() Use the FIELD_MODIFY() helper instead of open-coding the same operation. Signed-off-by: Geert Uytterhoeven Link: https://patch.msgid.link/ee2e6aaacd5e061c972716ecaf8a929be7ef5f2e.1772705647.git.geert+renesas@glider.be Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rcar/rcar_can.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/can/rcar/rcar_can.c b/drivers/net/can/rcar/rcar_can.c index fc3df328e877..2727c5ce029c 100644 --- a/drivers/net/can/rcar/rcar_can.c +++ b/drivers/net/can/rcar/rcar_can.c @@ -496,8 +496,7 @@ static void rcar_can_start(struct net_device *ndev) priv->can.state = CAN_STATE_ERROR_ACTIVE; /* Go to operation mode */ - ctlr &= ~RCAR_CAN_CTLR_CANM; - ctlr |= FIELD_PREP(RCAR_CAN_CTLR_CANM, RCAR_CAN_CTLR_CANM_OPER); + FIELD_MODIFY(RCAR_CAN_CTLR_CANM, &ctlr, RCAR_CAN_CTLR_CANM_OPER); writew(ctlr, &priv->regs->ctlr); for (i = 0; i < MAX_STR_READS; i++) { if (!(readw(&priv->regs->str) & RCAR_CAN_STR_RSTST)) From 581281cb5a1b72fe31ab67e0074feabb1b5abcc7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 30 Mar 2026 12:18:17 +0200 Subject: [PATCH 1080/1561] can: ucan: refactor endpoint lookup Use the common USB helpers for looking up bulk and interrupt endpoints (and determining endpoint numbers and max packet sizes) instead of open coding. Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260330101817.1664787-3-johan@kernel.org Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/ucan.c | 40 +++++++++++--------------------------- 1 file changed, 11 insertions(+), 29 deletions(-) diff --git a/drivers/net/can/usb/ucan.c b/drivers/net/can/usb/ucan.c index 0ea0ac75e42f..506d9e74aa79 100644 --- a/drivers/net/can/usb/ucan.c +++ b/drivers/net/can/usb/ucan.c @@ -1302,13 +1302,12 @@ static int ucan_probe(struct usb_interface *intf, const struct usb_device_id *id) { int ret; - int i; u32 protocol_version; struct usb_device *udev; struct net_device *netdev; struct usb_host_interface *iface_desc; struct ucan_priv *up; - struct usb_endpoint_descriptor *ep; + struct usb_endpoint_descriptor *ep_in, *ep_out; u16 in_ep_size; u16 out_ep_size; u8 in_ep_addr; @@ -1343,37 +1342,20 @@ static int ucan_probe(struct usb_interface *intf, } /* check interface endpoints */ - in_ep_addr = 0; - out_ep_addr = 0; - in_ep_size = 0; - out_ep_size = 0; - for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) { - ep = &iface_desc->endpoint[i].desc; - - if (((ep->bEndpointAddress & USB_ENDPOINT_DIR_MASK) != 0) && - ((ep->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == - USB_ENDPOINT_XFER_BULK)) { - /* In Endpoint */ - in_ep_addr = ep->bEndpointAddress; - in_ep_addr &= USB_ENDPOINT_NUMBER_MASK; - in_ep_size = le16_to_cpu(ep->wMaxPacketSize); - } else if (((ep->bEndpointAddress & USB_ENDPOINT_DIR_MASK) == - 0) && - ((ep->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == - USB_ENDPOINT_XFER_BULK)) { - /* Out Endpoint */ - out_ep_addr = ep->bEndpointAddress; - out_ep_addr &= USB_ENDPOINT_NUMBER_MASK; - out_ep_size = le16_to_cpu(ep->wMaxPacketSize); - } - } - - /* check if interface is sane */ - if (!in_ep_addr || !out_ep_addr) { + ret = usb_find_common_endpoints_reverse(iface_desc, &ep_in, &ep_out, + NULL, NULL); + if (ret) { dev_err(&udev->dev, "%s: invalid endpoint configuration\n", UCAN_DRIVER_NAME); goto err_firmware_needs_update; } + + in_ep_addr = usb_endpoint_num(ep_in); + out_ep_addr = usb_endpoint_num(ep_out); + in_ep_size = usb_endpoint_maxp(ep_in); + out_ep_size = usb_endpoint_maxp(ep_out); + + /* check if interface is sane */ if (in_ep_size < sizeof(struct ucan_message_in)) { dev_err(&udev->dev, "%s: invalid in_ep MaxPacketSize\n", UCAN_DRIVER_NAME); From 63081dec9e370ac0a1f53565ffa693274630380e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Mar 2026 17:47:22 +0000 Subject: [PATCH 1081/1561] ipv6: move ip6_dst_hoplimit() to net/ipv6/ip6_output.c Move ip6_dst_hoplimit() to net/ipv6/ip6_output.c so that compiler can (auto)inline it from ip6_xmit(). $ scripts/bloat-o-meter -t vmlinux.0 vmlinux.1 add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-11 (-11) Function old new delta ip6_xmit 1684 1673 -11 Total: Before=29655407, After=29655396, chg -0.00% Signed-off-by: Eric Dumazet Reviewed-by: Fernando Fernandez Mancera Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260331174722.4128061-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_output.c | 21 +++++++++++++++++++++ net/ipv6/output_core.c | 23 ----------------------- 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 63abc4fddaee..7e92909ab5be 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -259,6 +259,27 @@ bool ip6_autoflowlabel(struct net *net, const struct sock *sk) return inet6_test_bit(AUTOFLOWLABEL, sk); } +int ip6_dst_hoplimit(struct dst_entry *dst) +{ + int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); + + rcu_read_lock(); + if (hoplimit == 0) { + struct net_device *dev = dst_dev_rcu(dst); + struct inet6_dev *idev; + + idev = __in6_dev_get(dev); + if (idev) + hoplimit = READ_ONCE(idev->cnf.hop_limit); + else + hoplimit = READ_ONCE(dev_net(dev)->ipv6.devconf_all->hop_limit); + } + rcu_read_unlock(); + + return hoplimit; +} +EXPORT_SYMBOL(ip6_dst_hoplimit); + /* * xmit an sk_buff (used by TCP and SCTP) * Note : socket lock is not held for SYNACK packets, but might be modified diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index cba1684a3f30..64b1eeb79b57 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -100,29 +100,6 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) } EXPORT_SYMBOL(ip6_find_1stfragopt); -#if IS_ENABLED(CONFIG_IPV6) -int ip6_dst_hoplimit(struct dst_entry *dst) -{ - int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); - - rcu_read_lock(); - if (hoplimit == 0) { - struct net_device *dev = dst_dev_rcu(dst); - struct inet6_dev *idev; - - idev = __in6_dev_get(dev); - if (idev) - hoplimit = READ_ONCE(idev->cnf.hop_limit); - else - hoplimit = READ_ONCE(dev_net(dev)->ipv6.devconf_all->hop_limit); - } - rcu_read_unlock(); - - return hoplimit; -} -EXPORT_SYMBOL(ip6_dst_hoplimit); -#endif - int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { ipv6_set_payload_len(ipv6_hdr(skb), skb->len - sizeof(struct ipv6hdr)); From 4cbcf82e26b6324a56c6d9c5bc49d59b6076b54d Mon Sep 17 00:00:00 2001 From: Julian Braha Date: Mon, 30 Mar 2026 22:32:58 +0100 Subject: [PATCH 1082/1561] ppp: dead code cleanup in Kconfig There is already an 'if PPP' condition wrapping several config options e.g. PPP_MPPE and PPPOE, making the 'depends on PPP' statement for each of these a duplicate dependency (dead code). I propose leaving the outer 'if PPP...endif' and removing the individual 'depends on PPP' statement from each option. This dead code was found by kconfirm, a static analysis tool for Kconfig. Signed-off-by: Julian Braha Reviewed-by: Qingfang Deng Link: https://patch.msgid.link/20260330213258.13982-1-julianbraha@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ppp/Kconfig | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/net/ppp/Kconfig b/drivers/net/ppp/Kconfig index a1806b4b84be..f57fba84fe55 100644 --- a/drivers/net/ppp/Kconfig +++ b/drivers/net/ppp/Kconfig @@ -37,7 +37,6 @@ if PPP config PPP_BSDCOMP tristate "PPP BSD-Compress compression" - depends on PPP help Support for the BSD-Compress compression method for PPP, which uses the LZW compression method to compress each PPP packet before it is @@ -56,7 +55,6 @@ config PPP_BSDCOMP config PPP_DEFLATE tristate "PPP Deflate compression" - depends on PPP select ZLIB_INFLATE select ZLIB_DEFLATE help @@ -71,7 +69,6 @@ config PPP_DEFLATE config PPP_FILTER bool "PPP filtering" - depends on PPP help Say Y here if you want to be able to filter the packets passing over PPP interfaces. This allows you to control which packets count as @@ -84,7 +81,6 @@ config PPP_FILTER config PPP_MPPE tristate "PPP MPPE compression (encryption)" - depends on PPP select CRYPTO_LIB_ARC4 select CRYPTO_LIB_SHA1 help @@ -96,7 +92,6 @@ config PPP_MPPE config PPP_MULTILINK bool "PPP multilink support" - depends on PPP help PPP multilink is a protocol (defined in RFC 1990) which allows you to combine several (logical or physical) lines into one logical PPP @@ -109,7 +104,7 @@ config PPP_MULTILINK config PPPOATM tristate "PPP over ATM" - depends on ATM && PPP + depends on ATM help Support PPP (Point to Point Protocol) encapsulated in ATM frames. This implementation does not yet comply with section 8 of RFC2364, @@ -118,7 +113,6 @@ config PPPOATM config PPPOE tristate "PPP over Ethernet" - depends on PPP help Support for PPP over Ethernet. @@ -164,7 +158,7 @@ config PPPOE_HASH_BITS config PPTP tristate "PPP over IPv4 (PPTP)" - depends on PPP && NET_IPGRE_DEMUX + depends on NET_IPGRE_DEMUX help Support for PPP over IPv4.(Point-to-Point Tunneling Protocol) @@ -175,7 +169,7 @@ config PPTP config PPPOL2TP tristate "PPP over L2TP" - depends on L2TP && PPP + depends on L2TP help Support for PPP-over-L2TP socket family. L2TP is a protocol used by ISPs and enterprises to tunnel PPP traffic over UDP @@ -184,7 +178,6 @@ if TTY config PPP_ASYNC tristate "PPP support for async serial ports" - depends on PPP select CRC_CCITT help Say Y (or M) here if you want to be able to use PPP over standard @@ -198,7 +191,6 @@ config PPP_ASYNC config PPP_SYNC_TTY tristate "PPP support for sync tty ports" - depends on PPP help Say Y (or M) here if you want to be able to use PPP over synchronous (HDLC) tty devices, such as the SyncLink adapter. These devices From 2897c697b3266cae753e2349098cd98f36891c19 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 31 Mar 2026 08:26:04 -0400 Subject: [PATCH 1083/1561] net/mlx5: Move command entry freeing outside of spinlock Move the kfree() call outside the critical section to reduce lock holding time. This aligns with the general principle of minimizing work under locks. Signed-off-by: Li RongQing Reviewed-by: Tariq Toukan Link: https://patch.msgid.link/20260331122604.1933-1-lirongqing@baidu.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 6c99c7f36163..c89417c1a1f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -196,17 +196,18 @@ static void cmd_ent_put(struct mlx5_cmd_work_ent *ent) unsigned long flags; spin_lock_irqsave(&cmd->alloc_lock, flags); - if (!refcount_dec_and_test(&ent->refcnt)) - goto out; + if (!refcount_dec_and_test(&ent->refcnt)) { + spin_unlock_irqrestore(&cmd->alloc_lock, flags); + return; + } if (ent->idx >= 0) { cmd_free_index(cmd, ent->idx); up(ent->page_queue ? &cmd->vars.pages_sem : &cmd->vars.sem); } + spin_unlock_irqrestore(&cmd->alloc_lock, flags); cmd_free_ent(ent); -out: - spin_unlock_irqrestore(&cmd->alloc_lock, flags); } static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx) From 249716daa9955f9fb8a9a80ed99b3b9071a838de Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Thu, 26 Mar 2026 17:21:19 +0200 Subject: [PATCH 1084/1561] wifi: rtw88: Fill fw_version member of struct wiphy Let userspace tools like lshw show the firmware version by filling the fw_version member of struct wiphy. Before: configuration: broadcast=yes driver=rtw88_8814au driverversion=6.19.6-arch1-1 firmware=N/A link=no multicast=yes wireless=IEEE 802.11 After: configuration: broadcast=yes driver=rtw88_8814au driverversion=6.19.6-arch1-1 firmware=33.6.0 link=no multicast=yes wireless=IEEE 802.11 Signed-off-by: Bitterblue Smith Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/3701cce1-42c4-4382-9120-cd21012c1b21@gmail.com --- drivers/net/wireless/realtek/rtw88/main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c index c4f9758b4e96..cd9254370fcc 100644 --- a/drivers/net/wireless/realtek/rtw88/main.c +++ b/drivers/net/wireless/realtek/rtw88/main.c @@ -1805,6 +1805,7 @@ static void rtw_load_firmware_cb(const struct firmware *firmware, void *context) { struct rtw_fw_state *fw = context; struct rtw_dev *rtwdev = fw->rtwdev; + struct wiphy *wiphy = rtwdev->hw->wiphy; if (!firmware || !firmware->data) { rtw_err(rtwdev, "failed to request firmware\n"); @@ -1819,6 +1820,11 @@ static void rtw_load_firmware_cb(const struct firmware *firmware, void *context) rtw_info(rtwdev, "%sFirmware version %u.%u.%u, H2C version %u\n", fw->type == RTW_WOWLAN_FW ? "WOW " : "", fw->version, fw->sub_version, fw->sub_index, fw->h2c_version); + + if (fw->type == RTW_NORMAL_FW) + snprintf(wiphy->fw_version, sizeof(wiphy->fw_version), + "%u.%u.%u", + fw->version, fw->sub_version, fw->sub_index); } static int rtw_load_firmware(struct rtw_dev *rtwdev, enum rtw_fw_type type) From c2ca7b9d27f8806772687b547ae08a1127fbe3f9 Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Thu, 26 Mar 2026 17:22:33 +0200 Subject: [PATCH 1085/1561] wifi: rtw89: Fill fw_version member of struct wiphy Let userspace tools like lshw show the firmware version by filling the fw_version member of struct wiphy. Before: configuration: broadcast=yes driver=rtw89_8852au driverversion=6.19.6-arch1-1 firmware=N/A link=no multicast=yes wireless=IEEE 802.11 After: configuration: broadcast=yes driver=rtw89_8852au driverversion=6.19.6-arch1-1 firmware=0.13.36.2 link=no multicast=yes wireless=IEEE 802.11 Signed-off-by: Bitterblue Smith Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/a46ed12c-387c-4063-849c-c6457bf97810@gmail.com --- drivers/net/wireless/realtek/rtw89/fw.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index 52db41c67265..17704f054727 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -730,6 +730,7 @@ static int rtw89_fw_update_ver(struct rtw89_dev *rtwdev, { const struct rtw89_fw_hdr *v0 = (const struct rtw89_fw_hdr *)fw_suit->data; const struct rtw89_fw_hdr_v1 *v1 = (const struct rtw89_fw_hdr_v1 *)fw_suit->data; + struct wiphy *wiphy = rtwdev->hw->wiphy; if (type == RTW89_FW_LOGFMT) return 0; @@ -755,6 +756,13 @@ static int rtw89_fw_update_ver(struct rtw89_dev *rtwdev, fw_suit->major_ver, fw_suit->minor_ver, fw_suit->sub_ver, fw_suit->sub_idex, fw_suit->commitid, fw_suit->cmd_ver, type); + if (type == RTW89_FW_NORMAL || type == RTW89_FW_NORMAL_CE || + type == RTW89_FW_NORMAL_B) + snprintf(wiphy->fw_version, sizeof(wiphy->fw_version), + "%u.%u.%u.%u", + fw_suit->major_ver, fw_suit->minor_ver, + fw_suit->sub_ver, fw_suit->sub_idex); + return 0; } From 48c6255cda812b04a23d92bdc82eb523d9abd712 Mon Sep 17 00:00:00 2001 From: Julian Braha Date: Sun, 29 Mar 2026 19:53:48 +0100 Subject: [PATCH 1086/1561] net: microchip: dead code cleanup in kconfig for FDMA The Kconfig in the parent directory already has the first 'if NET_VENDOR_MICROCHIP' gating the inclusion of this Kconfig, meaning that the second 'if NET_VENDOR_MICROCHIP' condition is effectively dead code. I propose removing the second 'if NET_VENDOR_MICROCHIP' in drivers/net/ethernet/microchip/fdma/Kconfig This dead code was found by kconfirm, a static analysis tool for Kconfig. Signed-off-by: Julian Braha Link: https://patch.msgid.link/20260329185348.526893-1-julianbraha@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/fdma/Kconfig | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/microchip/fdma/Kconfig b/drivers/net/ethernet/microchip/fdma/Kconfig index ec228c061351..57a54e7167d3 100644 --- a/drivers/net/ethernet/microchip/fdma/Kconfig +++ b/drivers/net/ethernet/microchip/fdma/Kconfig @@ -3,8 +3,6 @@ # Microchip FDMA API configuration # -if NET_VENDOR_MICROCHIP - config FDMA bool "FDMA API" if COMPILE_TEST help @@ -14,5 +12,3 @@ config FDMA Say Y here if you want to build the FDMA API that provides a common set of functions and data structures for interacting with the Frame DMA engine in multiple microchip switchcores. - -endif # NET_VENDOR_MICROCHIP From 127ea8d0b0687509e3067b1e90a38067d2298f49 Mon Sep 17 00:00:00 2001 From: Christos Longros Date: Sun, 29 Mar 2026 09:45:50 +0200 Subject: [PATCH 1087/1561] wifi: rtw89: fix typo "frome" -> "from" in rx_freq_frome_ie The chip_info field rx_freq_frome_ie (RX frequency from Information Element) has a typo. The function that uses it is already spelled correctly: rtw89_core_update_rx_freq_from_ie. Rename the field to match. Signed-off-by: Christos Longros Acked-by: Ping-Ke Shih Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260329074550.114787-1-chris.longros@gmail.com --- drivers/net/wireless/realtek/rtw89/core.c | 2 +- drivers/net/wireless/realtek/rtw89/core.h | 2 +- drivers/net/wireless/realtek/rtw89/rtw8851b.c | 2 +- drivers/net/wireless/realtek/rtw89/rtw8852a.c | 2 +- drivers/net/wireless/realtek/rtw89/rtw8852b.c | 2 +- drivers/net/wireless/realtek/rtw89/rtw8852bt.c | 2 +- drivers/net/wireless/realtek/rtw89/rtw8852c.c | 2 +- drivers/net/wireless/realtek/rtw89/rtw8922a.c | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c index 9d3f651798ff..70feab97dccb 100644 --- a/drivers/net/wireless/realtek/rtw89/core.c +++ b/drivers/net/wireless/realtek/rtw89/core.c @@ -3203,7 +3203,7 @@ static void rtw89_core_update_rx_freq_from_ie(struct rtw89_dev *rtwdev, u8 *variable; int chan; - if (!rtwdev->chip->rx_freq_frome_ie) + if (!rtwdev->chip->rx_freq_from_ie) return; if (!rtwdev->scanning) diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index 696a1d92d62b..4b4d25d8ba98 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -4546,7 +4546,7 @@ struct rtw89_chip_info { bool support_noise; bool ul_tb_waveform_ctrl; bool ul_tb_pwr_diff; - bool rx_freq_frome_ie; + bool rx_freq_from_ie; bool hw_sec_hdr; bool hw_mgmt_tx_encrypt; bool hw_tkip_crypto; diff --git a/drivers/net/wireless/realtek/rtw89/rtw8851b.c b/drivers/net/wireless/realtek/rtw89/rtw8851b.c index a3a5b6fbe46c..2005934f64d2 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8851b.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8851b.c @@ -2641,7 +2641,7 @@ const struct rtw89_chip_info rtw8851b_chip_info = { .support_noise = false, .ul_tb_waveform_ctrl = true, .ul_tb_pwr_diff = false, - .rx_freq_frome_ie = true, + .rx_freq_from_ie = true, .hw_sec_hdr = false, .hw_mgmt_tx_encrypt = false, .hw_tkip_crypto = false, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852a.c b/drivers/net/wireless/realtek/rtw89/rtw8852a.c index 898c534a5762..12ed35f548c1 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852a.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852a.c @@ -2378,7 +2378,7 @@ const struct rtw89_chip_info rtw8852a_chip_info = { .support_noise = true, .ul_tb_waveform_ctrl = false, .ul_tb_pwr_diff = false, - .rx_freq_frome_ie = true, + .rx_freq_from_ie = true, .hw_sec_hdr = false, .hw_mgmt_tx_encrypt = false, .hw_tkip_crypto = false, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852b.c b/drivers/net/wireless/realtek/rtw89/rtw8852b.c index 19a9f07e8714..dcb15f26c1b1 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852b.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852b.c @@ -974,7 +974,7 @@ const struct rtw89_chip_info rtw8852b_chip_info = { .support_noise = false, .ul_tb_waveform_ctrl = true, .ul_tb_pwr_diff = false, - .rx_freq_frome_ie = true, + .rx_freq_from_ie = true, .hw_sec_hdr = false, .hw_mgmt_tx_encrypt = false, .hw_tkip_crypto = false, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852bt.c b/drivers/net/wireless/realtek/rtw89/rtw8852bt.c index c86b995a7cb1..2afc6908cc7e 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852bt.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852bt.c @@ -813,7 +813,7 @@ const struct rtw89_chip_info rtw8852bt_chip_info = { .support_sar_by_ant = true, .ul_tb_waveform_ctrl = true, .ul_tb_pwr_diff = false, - .rx_freq_frome_ie = true, + .rx_freq_from_ie = true, .hw_sec_hdr = false, .hw_mgmt_tx_encrypt = false, .hw_tkip_crypto = true, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852c.c b/drivers/net/wireless/realtek/rtw89/rtw8852c.c index 7ea0a8905282..5f9da0f85bb5 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852c.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852c.c @@ -3171,7 +3171,7 @@ const struct rtw89_chip_info rtw8852c_chip_info = { .support_noise = false, .ul_tb_waveform_ctrl = false, .ul_tb_pwr_diff = true, - .rx_freq_frome_ie = false, + .rx_freq_from_ie = false, .hw_sec_hdr = true, .hw_mgmt_tx_encrypt = true, .hw_tkip_crypto = true, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922a.c b/drivers/net/wireless/realtek/rtw89/rtw8922a.c index a489aaf90f23..5f83e71ed218 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922a.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922a.c @@ -2975,7 +2975,7 @@ const struct rtw89_chip_info rtw8922a_chip_info = { .support_noise = false, .ul_tb_waveform_ctrl = false, .ul_tb_pwr_diff = false, - .rx_freq_frome_ie = false, + .rx_freq_from_ie = false, .hw_sec_hdr = true, .hw_mgmt_tx_encrypt = true, .hw_tkip_crypto = true, From a77fb1ace44ed11874b9bf064208cad5fc0d94a5 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Mon, 30 Mar 2026 11:54:05 +0200 Subject: [PATCH 1088/1561] Octeontx2-af: add WQ_PERCPU to alloc_workqueue users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This continues the effort to refactor workqueue APIs, which began with the introduction of new workqueues and a new alloc_workqueue flag in: commit 128ea9f6ccfb ("workqueue: Add system_percpu_wq and system_dfl_wq") commit 930c2ea566af ("workqueue: Add new WQ_PERCPU flag") The refactoring is going to alter the default behavior of alloc_workqueue() to be unbound by default. With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND), any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND must now use WQ_PERCPU. For more details see the Link tag below. In order to keep alloc_workqueue() behavior identical, explicitly request WQ_PERCPU. Link: https://lore.kernel.org/all/20250221112003.1dSuoGyc@linutronix.de/ Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Link: https://patch.msgid.link/20260330095405.116990-1-marco.crivellari@suse.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 1c8f657719b2..e40b79076358 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2645,7 +2645,7 @@ static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw, } mw->mbox_wq = alloc_workqueue("%s", - WQ_HIGHPRI | WQ_MEM_RECLAIM, + WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_PERCPU, num, name); if (!mw->mbox_wq) { err = -ENOMEM; From 347fbc602795f9595a226585968bc23f0073cf77 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Mon, 30 Mar 2026 14:58:39 +0800 Subject: [PATCH 1089/1561] wifi: rtw89: 8922d: BB hardware pre-/post-init, TX/RX path and power settings More settings related to BB pre-/post-initial settings, the TX/RX path settings, and digital power compensation. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260330065847.48946-2-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/reg.h | 64 ++++ drivers/net/wireless/realtek/rtw89/rtw8922d.c | 318 ++++++++++++++++++ 2 files changed, 382 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/reg.h b/drivers/net/wireless/realtek/rtw89/reg.h index 5d284f310069..e64bd74db83a 100644 --- a/drivers/net/wireless/realtek/rtw89/reg.h +++ b/drivers/net/wireless/realtek/rtw89/reg.h @@ -8384,6 +8384,9 @@ #define B_BE_PWR_BT_VAL GENMASK(8, 0) #define B_BE_PWR_FORCE_COEX_ON GENMASK(29, 27) +#define R_PWR_BOOST_BE4 0x11A64 +#define B_PWR_BOOST_BE4 BIT(8) + #define R_BE_PWR_TH 0x11A78 #define R_BE_PWR_RSSI_TARGET_LMT 0x11A84 @@ -10248,6 +10251,8 @@ #define R_TSSI_K_P1 0xE7A0 #define B_TSSI_K_OFDM_P1 GENMASK(29, 20) +#define R_BBWRAP_ELMSR_BE4 0x11974 +#define B_BBWRAP_ELMSR_EN_BE4 GENMASK(29, 28) #define R_COMP_CIM3K_BE4 0x11998 #define B_COMP_CIM3K_OW_BE4 BIT(1) #define B_COMP_CIM3K_TH_BE4 BIT(2) @@ -10452,6 +10457,12 @@ #define R_BANDEDGE_DBWY_BE4 0x11AD0 #define B_BANDEDGE_DBW160_BE4 BIT(0) +#define R_SYS_DBCC_BE4 0x20000 +#define B_SYS_DBCC_BE4 BIT(0) +#define B_SYS_DBCC_24G_BAND_SEL_BE4 BIT(1) +#define R_EMLSR_SWITCH_BE4 0x20044 +#define B_EMLSR_SWITCH_BE4 GENMASK(27, 12) +#define B_EMLSR_BB_CLK_BE4 GENMASK(31, 30) #define R_CHINFO_SEG_BE4 0x200B4 #define B_CHINFO_SEG_LEN_BE4 GENMASK(12, 10) #define R_STS_HDR2_PARSING_BE4 0x2070C @@ -10467,11 +10478,25 @@ #define B_RXBW7_BE4 GENMASK(25, 23) #define R_RXBW_BE4 0x20410 #define B_RXBW_BE4 GENMASK(29, 27) +#define R_TXERRCT_EN_BE4 0x20518 +#define B_TXERRCT_EN_BE4 BIT(13) +#define R_TXERRCT1_EN_BE4 0x2051C +#define B_TXERRCT1_EN_BE4 BIT(31) #define R_ENABLE_CCK0_BE4 0x20700 #define B_ENABLE_CCK0_BE4 BIT(5) +#define R_RSTB_ASYNC_BE4 0x20704 +#define B_RSTB_ASYNC_BE4 BIT(1) #define R_EDCCA_RPT_SEL_BE4 0x20780 #define R_EDCCA_RPT_SEL_BE4_C1 0x21780 #define B_EDCCA_RPT_SEL_BE4_MSK 0xE0000 +#define R_IMR_TX_ERROR_BE4 0x20920 +#define B_IMR_TX_ERROR_BE4 BIT(30) +#define R_TXINFO_PATH_BE4 0x209A4 +#define B_TXINFO_PATH_EN_BE4 BIT(17) +#define B_TXINFO_PATH_MA_BE4 BIT(18) +#define B_TXINFO_PATH_MB_BE4 BIT(19) +#define R_SHAPER_COEFF_BE4 0x20CBC +#define B_SHAPER_COEFF_BE4 BIT(19) #define R_IFS_T1_AVG_BE4 0x20EDC #define B_IFS_T1_AVG_BE4 GENMASK(15, 0) #define B_IFS_T2_AVG_BE4 GENMASK(31, 16) @@ -10494,6 +10519,12 @@ #define B_IFS_T3_HIS_BE4 GENMASK(15, 0) #define B_IFS_T4_HIS_BE4 GENMASK(31, 16) +#define R_TX_ERROR_SEL_BE4 0x21254 +#define B_TX_ERROR_PSDU_BE4 BIT(11) +#define B_TX_ERROR_NSYM_BE4 BIT(10) +#define B_TX_ERROR_LSIG_BE4 BIT(9) +#define B_TX_ERROR_TXINFO_BE4 BIT(8) + #define R_TXPWR_RSTB0_BE4 0x2250C #define B_TXPWR_RSTB0_BE4 BIT(16) #define R_TXPWR_RSTB1_BE4 0x2260C @@ -10546,6 +10577,10 @@ #define B_PRISB_BE4 GENMASK(3, 0) #define R_FC0_BE4 0x24EE8 #define B_FC0_BE4 GENMASK(12, 0) +#define R_ANT_RX_1RCCA_BE4 0x24EEC +#define B_ANT_RX_1RCCA_BE4 GENMASK(17, 14) +#define R_ANT_RX_BE4 0x24EF0 +#define B_ANT_RX_BE4 GENMASK(3, 0) #define R_FC0_INV_BE4 0x24EF4 #define B_FC0_INV_BE4 GENMASK(15, 0) @@ -10569,6 +10604,32 @@ #define B_CHINFO_NX_BE4 GENMASK(16, 6) #define R_CHINFO_ALG_BE4 0x267C8 #define B_CHINFO_ALG_BE4 GENMASK(31, 30) +#define R_RX_AWGN02_BE4 0x2680C +#define B_RX_AWGN11_BE4 GENMASK(23, 18) +#define R_RX_AWGN00_BE4 0x26814 +#define B_RX_AWGN04_BE4 GENMASK(5, 0) +#define B_RX_AWGN07_BE4 GENMASK(23, 18) +#define R_RX_AWGN01_BE4 0x26818 +#define B_RX_AWGN09_BE4 GENMASK(5, 0) +#define R_RXCH_BCC0_BE4 0x26824 +#define B_RXCH_MCS4_BE4 GENMASK(29, 24) +#define R_RXCH_BCC1_BE4 0x26828 +#define B_RXCH_MCS5_BE4 GENMASK(5, 0) +#define B_RXCH_MCS6_BE4 GENMASK(11, 6) +#define B_RXCH_MCS7_BE4 GENMASK(17, 12) +#define B_RXCH_MCS8_BE4 GENMASK(23, 18) +#define B_RXCH_MCS9_BE4 GENMASK(29, 24) +#define R_RX_LDPC02_BE4 0x26834 +#define B_RX_LDPC10_BE4 GENMASK(17, 12) +#define B_RX_LDPC11_BE4 GENMASK(23, 18) +#define R_RX_LDPC00_BE4 0x2683C +#define B_RX_LDPC04_BE4 GENMASK(5, 0) +#define B_RX_LDPC05_BE4 GENMASK(11, 6) +#define B_RX_LDPC06_BE4 GENMASK(17, 12) +#define B_RX_LDPC07_BE4 GENMASK(23, 18) +#define B_RX_LDPC08_BE4 GENMASK(29, 24) +#define R_RX_LDPC01_BE4 0x26840 +#define B_RX_LDPC09_BE4 GENMASK(5, 0) #define R_SW_SI_DATA_BE4 0x2CF4C #define B_SW_SI_READ_DATA_BE4 GENMASK(19, 0) @@ -10576,6 +10637,9 @@ #define B_SW_SI_R_BUSY_BE4 BIT(25) #define B_SW_SI_READ_DATA_DONE_BE4 BIT(26) +#define R_RX_PATH0_TBL0_BE4 0x2E028 +#define R_RX_PATH1_TBL0_BE4 0x2E128 + /* WiFi CPU local domain */ #define R_AX_WDT_CTRL 0x0040 #define B_AX_WDT_EN BIT(31) diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d.c b/drivers/net/wireless/realtek/rtw89/rtw8922d.c index 1b5fc6c9ea85..51b025d898ff 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922d.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d.c @@ -2,6 +2,7 @@ /* Copyright(c) 2026 Realtek Corporation */ +#include "chan.h" #include "debug.h" #include "efuse.h" #include "mac.h" @@ -1690,6 +1691,94 @@ static void rtw8922d_spur_elimination(struct rtw89_dev *rtwdev, rtw8922d_set_nbi_tone_idx(rtwdev, chan, RF_PATH_B, phy_idx); } +static const u32 bbrst_mask[2] = {B_BE_FEN_BBPLAT_RSTB, B_BE_FEN_BB1PLAT_RSTB}; +static const u32 glbrst_mask[2] = {B_BE_FEN_BB_IP_RSTN, B_BE_FEN_BB1_IP_RSTN}; +static const u32 chip_top_bitmask[2] = {0xffff, 0xffff0000}; + +static void rtw8922d_bb_preinit(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx) +{ + rtw89_write32_mask(rtwdev, R_BE_FEN_RST_ENABLE, glbrst_mask[phy_idx], 0x0); + rtw89_write32_mask(rtwdev, R_BE_FEN_RST_ENABLE, bbrst_mask[phy_idx], 0x0); + rtw89_write32_mask(rtwdev, R_BE_DMAC_SYS_CR32B, chip_top_bitmask[phy_idx], 0x74F9); + rtw89_write32_mask(rtwdev, R_BE_FEN_RST_ENABLE, glbrst_mask[phy_idx], 0x1); + rtw89_phy_write32_idx(rtwdev, R_RSTB_ASYNC_BE4, B_RSTB_ASYNC_BE4, 0, phy_idx); + rtw89_write32_mask(rtwdev, R_BE_FEN_RST_ENABLE, bbrst_mask[phy_idx], 0x1); +} + +static void rtw8922d_bb_postinit(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx) +{ + rtw89_phy_write32_idx_clr(rtwdev, R_SHAPER_COEFF_BE4, B_SHAPER_COEFF_BE4, phy_idx); + rtw89_phy_write32_idx_set(rtwdev, R_SHAPER_COEFF_BE4, B_SHAPER_COEFF_BE4, phy_idx); +} + +static void rtw8922d_bb_reset_en(struct rtw89_dev *rtwdev, enum rtw89_band band, + bool en, enum rtw89_phy_idx phy_idx) +{ + if (en) + rtw89_phy_write32_idx(rtwdev, R_RSTB_ASYNC_BE4, B_RSTB_ASYNC_BE4, 1, phy_idx); + else + rtw89_phy_write32_idx(rtwdev, R_RSTB_ASYNC_BE4, B_RSTB_ASYNC_BE4, 0, phy_idx); +} + +static int rtw8922d_ctrl_tx_path_tmac(struct rtw89_dev *rtwdev, + enum rtw89_rf_path tx_path, + enum rtw89_phy_idx phy_idx) +{ + struct rtw89_reg2_def path_com_cr[] = { + {0x11A00, 0x21C86900}, + {0x11A04, 0x00E4E433}, + {0x11A08, 0x39390CC9}, + {0x11A10, 0x10CC0000}, + {0x11A14, 0x00240393}, + {0x11A18, 0x201C8600}, + {0x11B38, 0x39393FDB}, + {0x11B3C, 0x00E4E4FF}, + }; + int ret = 0; + u32 reg; + int i; + + rtw89_phy_write32_idx(rtwdev, R_TXINFO_PATH_BE4, B_TXINFO_PATH_EN_BE4, 0x0, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_TXINFO_PATH_BE4, B_TXINFO_PATH_MA_BE4, 0x0, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_TXINFO_PATH_BE4, B_TXINFO_PATH_MB_BE4, 0x0, phy_idx); + + if (phy_idx == RTW89_PHY_1 && !rtwdev->dbcc_en) + return 0; + + if (tx_path == RF_PATH_A) { + path_com_cr[1].data = 0x40031; + path_com_cr[2].data = 0x1000C48; + path_com_cr[5].data = 0x200; + path_com_cr[6].data = 0x1000C48; + path_com_cr[7].data = 0x40031; + } else if (tx_path == RF_PATH_B) { + path_com_cr[1].data = 0x40032; + path_com_cr[2].data = 0x1000C88; + path_com_cr[5].data = 0x400; + path_com_cr[6].data = 0x1000C88; + path_com_cr[7].data = 0x40032; + } else if (tx_path == RF_PATH_AB) { + path_com_cr[1].data = 0x00E4E433; + path_com_cr[2].data = 0x39390CC9; + path_com_cr[5].data = 0x201C8600; + path_com_cr[6].data = 0x1010CC9; + path_com_cr[7].data = 0x40433; + } else { + ret = -EINVAL; + } + + for (i = 0; i < ARRAY_SIZE(path_com_cr); i++) { + reg = rtw89_mac_reg_by_idx(rtwdev, path_com_cr[i].addr, phy_idx); + rtw89_write32(rtwdev, reg, path_com_cr[i].data); + } + + return ret; +} + +static void rtw8922d_bb_reset(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx) +{ +} + static void rtw8922d_tssi_reset(struct rtw89_dev *rtwdev, enum rtw89_rf_path path, enum rtw89_phy_idx phy_idx) @@ -1714,6 +1803,235 @@ static void rtw8922d_tssi_reset(struct rtw89_dev *rtwdev, } } +static int rtw8922d_ctrl_rx_path_tmac(struct rtw89_dev *rtwdev, + enum rtw89_rf_path rx_path, + enum rtw89_phy_idx phy_idx) +{ + enum rtw89_rf_path_bit path; + + if (rx_path == RF_PATH_A) + path = RF_A; + else if (rx_path == RF_PATH_B) + path = RF_B; + else + path = RF_AB; + + rtw89_phy_write32_idx(rtwdev, R_ANT_RX_BE4, B_ANT_RX_BE4, path, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_ANT_RX_1RCCA_BE4, B_ANT_RX_1RCCA_BE4, + path, phy_idx); + + if (rx_path == RF_PATH_AB) { + rtw89_phy_write32_idx(rtwdev, R_RXCH_BCC0_BE4, B_RXCH_MCS4_BE4, 8, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXCH_BCC1_BE4, B_RXCH_MCS5_BE4, 4, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXCH_BCC1_BE4, B_RXCH_MCS6_BE4, 3, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXCH_BCC1_BE4, B_RXCH_MCS7_BE4, 7, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXCH_BCC1_BE4, B_RXCH_MCS8_BE4, 2, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXCH_BCC1_BE4, B_RXCH_MCS9_BE4, 2, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_AWGN00_BE4, B_RX_AWGN04_BE4, 4, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_AWGN00_BE4, B_RX_AWGN07_BE4, 2, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_AWGN01_BE4, B_RX_AWGN09_BE4, 0, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_AWGN02_BE4, B_RX_AWGN11_BE4, 1, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_LDPC00_BE4, B_RX_LDPC04_BE4, 8, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_LDPC00_BE4, B_RX_LDPC05_BE4, 5, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_LDPC00_BE4, B_RX_LDPC06_BE4, 3, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_LDPC00_BE4, B_RX_LDPC07_BE4, 5, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_LDPC00_BE4, B_RX_LDPC08_BE4, 1, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_LDPC01_BE4, B_RX_LDPC09_BE4, 2, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_LDPC02_BE4, B_RX_LDPC10_BE4, 4, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_LDPC02_BE4, B_RX_LDPC11_BE4, 2, phy_idx); + } else { + rtw89_phy_write32_idx(rtwdev, R_RXCH_BCC0_BE4, B_RXCH_MCS4_BE4, 13, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXCH_BCC1_BE4, B_RXCH_MCS5_BE4, 15, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXCH_BCC1_BE4, B_RXCH_MCS6_BE4, 6, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXCH_BCC1_BE4, B_RXCH_MCS7_BE4, 15, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXCH_BCC1_BE4, B_RXCH_MCS8_BE4, 4, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXCH_BCC1_BE4, B_RXCH_MCS9_BE4, 15, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_AWGN00_BE4, B_RX_AWGN04_BE4, 9, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_AWGN00_BE4, B_RX_AWGN07_BE4, 3, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_AWGN01_BE4, B_RX_AWGN09_BE4, 1, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_AWGN02_BE4, B_RX_AWGN11_BE4, 0, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_LDPC00_BE4, B_RX_LDPC04_BE4, 9, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_LDPC00_BE4, B_RX_LDPC05_BE4, 8, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_LDPC00_BE4, B_RX_LDPC06_BE4, 6, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_LDPC00_BE4, B_RX_LDPC07_BE4, 16, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_LDPC00_BE4, B_RX_LDPC08_BE4, 4, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_LDPC01_BE4, B_RX_LDPC09_BE4, 9, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_LDPC02_BE4, B_RX_LDPC10_BE4, 9, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RX_LDPC02_BE4, B_RX_LDPC11_BE4, 7, phy_idx); + } + + return 0; +} + +static void rtw8922d_set_digital_pwr_comp(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, u8 nss, + enum rtw89_rf_path path, + enum rtw89_phy_idx phy_idx) +{ +#define DIGITAL_PWR_COMP_REG_NUM 22 + static const u32 pw_comp_cr[2] = {R_RX_PATH0_TBL0_BE4, R_RX_PATH1_TBL0_BE4}; + const __le32 (*pwr_comp_val)[2][RTW89_TX_COMP_BAND_NR] + [BB_PATH_NUM_8922D][DIGITAL_PWR_COMP_REG_NUM]; + struct rtw89_fw_elm_info *elm_info = &rtwdev->fw.elm_info; + const struct rtw89_fw_element_hdr *txcomp_elm = elm_info->tx_comp; + const __le32 *digital_pwr_comp; + u32 addr, val; + u32 i; + + if (sizeof(*pwr_comp_val) != le32_to_cpu(txcomp_elm->size)) { + rtw89_debug(rtwdev, RTW89_DBG_UNEXP, + "incorrect power comp size %d\n", + le32_to_cpu(txcomp_elm->size)); + return; + } + + pwr_comp_val = (const void *)txcomp_elm->u.common.contents; + digital_pwr_comp = (*pwr_comp_val)[nss][chan->tx_comp_band][path]; + addr = pw_comp_cr[path]; + + for (i = 0; i < DIGITAL_PWR_COMP_REG_NUM; i++, addr += 4) { + val = le32_to_cpu(digital_pwr_comp[i]); + rtw89_phy_write32_idx(rtwdev, addr, MASKDWORD, val, phy_idx); + } +} + +static void rtw8922d_digital_pwr_comp(struct rtw89_dev *rtwdev, + enum rtw89_phy_idx phy_idx) +{ + const struct rtw89_chan *chan0 = rtw89_mgnt_chan_get(rtwdev, 0); + const struct rtw89_chan *chan1 = rtw89_mgnt_chan_get(rtwdev, 1); + + if (rtwdev->mlo_dbcc_mode == MLO_1_PLUS_1_1RF) { + rtw8922d_set_digital_pwr_comp(rtwdev, chan0, 0, RF_PATH_A, RTW89_PHY_0); + rtw8922d_set_digital_pwr_comp(rtwdev, chan1, 0, RF_PATH_B, RTW89_PHY_1); + } else { + rtw8922d_set_digital_pwr_comp(rtwdev, chan0, 1, RF_PATH_A, phy_idx); + rtw8922d_set_digital_pwr_comp(rtwdev, chan0, 1, RF_PATH_B, phy_idx); + } +} + +static int rtw8922d_ctrl_mlo(struct rtw89_dev *rtwdev, enum rtw89_mlo_dbcc_mode mode, + bool pwr_comp) +{ + const struct rtw89_chan *chan1; + u32 reg0, reg1; + u8 cck_phy_idx; + + if (mode == MLO_2_PLUS_0_1RF) { + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_SWITCH_BE4, 0xBBBB); + udelay(1); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_BB_CLK_BE4, 0x3); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_SWITCH_BE4, 0xAFFF); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_SWITCH_BE4, 0xEBAD); + udelay(1); + + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_BB_CLK_BE4, 0x0); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_SWITCH_BE4, 0xEAAD); + } else if (mode == MLO_0_PLUS_2_1RF) { + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_SWITCH_BE4, 0xBBBB); + udelay(1); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_BB_CLK_BE4, 0x3); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_SWITCH_BE4, 0xAFFF); + udelay(1); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_SWITCH_BE4, 0xEFFF); + + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_BB_CLK_BE4, 0x0); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_SWITCH_BE4, 0xEEFF); + } else if ((mode == MLO_1_PLUS_1_1RF) || (mode == DBCC_LEGACY)) { + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_SWITCH_BE4, 0xBBBB); + udelay(1); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_BB_CLK_BE4, 0x3); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_SWITCH_BE4, 0xAFFF); + udelay(1); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_BB_CLK_BE4, 0x0); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_SWITCH_BE4, 0x3AAB); + } else { + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_SWITCH_BE4, 0x6180); + udelay(1); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_BB_CLK_BE4, 0x3); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_SWITCH_BE4, 0x180); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_BB_CLK_BE4, 0x0); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_SWITCH_BE4, 0x0); + } + + if (pwr_comp) + rtw8922d_digital_pwr_comp(rtwdev, RTW89_PHY_0); + + reg0 = R_BBWRAP_ELMSR_BE4; + reg1 = rtw89_mac_reg_by_idx(rtwdev, reg0, 1); + + if (mode == MLO_2_PLUS_0_1RF) { + rtw89_phy_write32_mask(rtwdev, R_SYS_DBCC_BE4, + B_SYS_DBCC_24G_BAND_SEL_BE4, RTW89_PHY_0); + rtw89_write32_mask(rtwdev, reg0, B_BBWRAP_ELMSR_EN_BE4, 0); + rtw89_write32_mask(rtwdev, reg1, B_BBWRAP_ELMSR_EN_BE4, 0); + } else if (mode == MLO_0_PLUS_2_1RF) { + rtw89_phy_write32_mask(rtwdev, R_SYS_DBCC_BE4, + B_SYS_DBCC_24G_BAND_SEL_BE4, RTW89_PHY_0); + rtw89_write32_mask(rtwdev, reg0, B_BBWRAP_ELMSR_EN_BE4, 0); + rtw89_write32_mask(rtwdev, reg1, B_BBWRAP_ELMSR_EN_BE4, 0); + } else if ((mode == MLO_1_PLUS_1_1RF) || (mode == DBCC_LEGACY)) { + chan1 = rtw89_mgnt_chan_get(rtwdev, 1); + cck_phy_idx = chan1->band_type == RTW89_BAND_2G ? + RTW89_PHY_1 : RTW89_PHY_0; + + rtw89_phy_write32_mask(rtwdev, R_SYS_DBCC_BE4, + B_SYS_DBCC_24G_BAND_SEL_BE4, cck_phy_idx); + rtw89_write32_mask(rtwdev, reg0, B_BBWRAP_ELMSR_EN_BE4, 0x3); + rtw89_write32_mask(rtwdev, reg1, B_BBWRAP_ELMSR_EN_BE4, 0x3); + } else { + rtw89_phy_write32_mask(rtwdev, R_SYS_DBCC_BE4, + B_SYS_DBCC_24G_BAND_SEL_BE4, RTW89_PHY_0); + rtw89_write32_mask(rtwdev, reg0, B_BBWRAP_ELMSR_EN_BE4, 0); + rtw89_write32_mask(rtwdev, reg1, B_BBWRAP_ELMSR_EN_BE4, 0); + } + + udelay(1); + + return 0; +} + +static void rtw8922d_bb_sethw(struct rtw89_dev *rtwdev) +{ + struct rtw89_phy_efuse_gain *gain = &rtwdev->efuse_gain; + struct rtw89_hal *hal = &rtwdev->hal; + enum rtw89_phy_idx phy_idx; + u32 reg; + + reg = rtw89_mac_reg_by_idx(rtwdev, R_BE_PWR_BOOST, RTW89_PHY_0); + rtw89_write32_clr(rtwdev, reg, B_BE_PWR_CTRL_SEL); + reg = rtw89_mac_reg_by_idx(rtwdev, R_BE_PWR_BOOST, RTW89_PHY_1); + rtw89_write32_clr(rtwdev, reg, B_BE_PWR_CTRL_SEL); + + if (hal->cid == RTL8922D_CID7090) { + reg = rtw89_mac_reg_by_idx(rtwdev, R_PWR_BOOST_BE4, RTW89_PHY_0); + rtw89_write32_set(rtwdev, reg, B_PWR_BOOST_BE4); + reg = rtw89_mac_reg_by_idx(rtwdev, R_PWR_BOOST_BE4, RTW89_PHY_1); + rtw89_write32_set(rtwdev, reg, B_PWR_BOOST_BE4); + } + + rtw89_phy_write32_mask(rtwdev, R_TX_ERROR_SEL_BE4, B_TX_ERROR_PSDU_BE4, 0); + rtw89_phy_write32_mask(rtwdev, R_TX_ERROR_SEL_BE4, B_TX_ERROR_NSYM_BE4, 1); + rtw89_phy_write32_mask(rtwdev, R_TX_ERROR_SEL_BE4, B_TX_ERROR_LSIG_BE4, 1); + rtw89_phy_write32_mask(rtwdev, R_TX_ERROR_SEL_BE4, B_TX_ERROR_TXINFO_BE4, 1); + rtw89_phy_write32_mask(rtwdev, R_TXERRCT_EN_BE4, B_TXERRCT_EN_BE4, 0); + rtw89_phy_write32_mask(rtwdev, R_TXERRCT1_EN_BE4, B_TXERRCT1_EN_BE4, 0); + rtw89_phy_write32_idx(rtwdev, R_IMR_TX_ERROR_BE4, B_IMR_TX_ERROR_BE4, 1, RTW89_PHY_0); + rtw89_phy_write32_idx(rtwdev, R_IMR_TX_ERROR_BE4, B_IMR_TX_ERROR_BE4, 1, RTW89_PHY_1); + + rtw8922d_ctrl_mlo(rtwdev, rtwdev->mlo_dbcc_mode, false); + + /* read these registers after loading BB parameters */ + for (phy_idx = RTW89_PHY_0; phy_idx < RTW89_PHY_NUM; phy_idx++) { + gain->ref_gain_base[phy_idx] = + rtw89_phy_read32_idx(rtwdev, R_OFDM_OFST_P0_BE4, + B_OFDM_OFST_P0_BE4, phy_idx); + gain->cck_rpl_base[phy_idx] = + rtw89_phy_read32_idx(rtwdev, R_CCK_RPL_OFST_BE4, + B_CCK_RPL_OFST_BE4, phy_idx); + } +} + static void rtw8922d_set_channel_bb(struct rtw89_dev *rtwdev, const struct rtw89_chan *chan, enum rtw89_phy_idx phy_idx) From edf9f583c05cebe8933903dc3e96ffa2acc62c5b Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Mon, 30 Mar 2026 14:58:40 +0800 Subject: [PATCH 1090/1561] wifi: rtw89: 8922d: add set channel with pre-/post- helpers The main set channel function calls MAC/BB/RF ones, and pre-/post- helpers are called before/after the main function to backup/restore and stop/restart circuits, including TX scheduler, PPDU status, DACK and TSSI. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260330065847.48946-3-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/reg.h | 14 ++ drivers/net/wireless/realtek/rtw89/rtw8922d.c | 102 ++++++++ .../net/wireless/realtek/rtw89/rtw8922d_rfk.c | 230 ++++++++++++++++++ .../net/wireless/realtek/rtw89/rtw8922d_rfk.h | 4 + 4 files changed, 350 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/reg.h b/drivers/net/wireless/realtek/rtw89/reg.h index e64bd74db83a..195d4806c4ef 100644 --- a/drivers/net/wireless/realtek/rtw89/reg.h +++ b/drivers/net/wireless/realtek/rtw89/reg.h @@ -8445,6 +8445,8 @@ #define RR_MOD_M_RXBB GENMASK(9, 5) #define RR_MOD_LO_SEL BIT(1) #define RR_MODOPT 0x01 +#define RR_MODOPT_V1 0x10001 +#define RR_SW_SEL BIT(19) #define RR_TXG_SEL GENMASK(19, 17) #define RR_MODOPT_M_TXPWR GENMASK(5, 0) #define RR_WLSEL 0x02 @@ -10527,6 +10529,8 @@ #define R_TXPWR_RSTB0_BE4 0x2250C #define B_TXPWR_RSTB0_BE4 BIT(16) +#define R_TSSI_EN_P0_BE4 0x22510 +#define B_TSSI_EN_P0_BE4 GENMASK(3, 0) #define R_TXPWR_RSTB1_BE4 0x2260C #define B_TXPWR_RSTB1_BE4 BIT(16) @@ -10640,6 +10644,16 @@ #define R_RX_PATH0_TBL0_BE4 0x2E028 #define R_RX_PATH1_TBL0_BE4 0x2E128 +#define R_KTBL0A_BE4 0x38104 +#define R_KTBL0B_BE4 0x38204 +#define B_KTBL0_IDX0 GENMASK(1, 0) +#define B_KTBL0_IDX1 GENMASK(9, 8) +#define B_KTBL0_RST BIT(31) +#define R_KTBL1A_BE4 0x38154 +#define R_KTBL1B_BE4 0x38254 +#define B_KTBL1_TBL0 BIT(3) +#define B_KTBL1_TBL1 BIT(5) + /* WiFi CPU local domain */ #define R_AX_WDT_CTRL 0x0040 #define B_AX_WDT_EN BIT(31) diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d.c b/drivers/net/wireless/realtek/rtw89/rtw8922d.c index 51b025d898ff..a2dd504c99ed 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922d.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d.c @@ -2066,6 +2066,108 @@ static void rtw8922d_set_channel_bb(struct rtw89_dev *rtwdev, rtw8922d_tssi_reset(rtwdev, RF_PATH_AB, phy_idx); } +static void rtw8922d_pre_set_channel_bb(struct rtw89_dev *rtwdev, + enum rtw89_phy_idx phy_idx) +{ + if (!rtwdev->dbcc_en) + return; + + rtw89_phy_write32_mask(rtwdev, R_SYS_DBCC_BE4, B_SYS_DBCC_BE4, 0x0); + + if (phy_idx == RTW89_PHY_0) { + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_SWITCH_BE4, 0xBBBB); + fsleep(1); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_BB_CLK_BE4, 0x3); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_SWITCH_BE4, 0xAFFF); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_SWITCH_BE4, 0xEBAD); + fsleep(1); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_BB_CLK_BE4, 0x0); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_SWITCH_BE4, 0xEAAD); + } else { + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_SWITCH_BE4, 0xBBBB); + fsleep(1); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_BB_CLK_BE4, 0x3); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_SWITCH_BE4, 0xAFFF); + fsleep(1); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_SWITCH_BE4, 0xEFFF); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_BB_CLK_BE4, 0x0); + rtw89_phy_write32_mask(rtwdev, R_EMLSR_SWITCH_BE4, B_EMLSR_SWITCH_BE4, 0xEEFF); + } + + fsleep(1); +} + +static void rtw8922d_post_set_channel_bb(struct rtw89_dev *rtwdev, + enum rtw89_mlo_dbcc_mode mode, + enum rtw89_phy_idx phy_idx) +{ + if (!rtwdev->dbcc_en) + return; + + rtw8922d_ctrl_mlo(rtwdev, mode, true); +} + +static void rtw8922d_set_channel(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_mac_idx mac_idx, + enum rtw89_phy_idx phy_idx) +{ + rtw8922d_set_channel_mac(rtwdev, chan, mac_idx); + rtw8922d_set_channel_bb(rtwdev, chan, phy_idx); + rtw8922d_set_channel_rf(rtwdev, chan, phy_idx); +} + +static void __rtw8922d_dack_reset(struct rtw89_dev *rtwdev, enum rtw89_rf_path path) +{ + rtw89_phy_write32_mask(rtwdev, 0x3c000 + (path << 8), BIT(17), 0x0); + rtw89_phy_write32_mask(rtwdev, 0x3c000 + (path << 8), BIT(17), 0x1); +} + +static void rtw8922d_dack_reset(struct rtw89_dev *rtwdev) +{ + __rtw8922d_dack_reset(rtwdev, RF_PATH_A); + __rtw8922d_dack_reset(rtwdev, RF_PATH_B); +} + +static +void rtw8922d_hal_reset(struct rtw89_dev *rtwdev, + enum rtw89_phy_idx phy_idx, enum rtw89_mac_idx mac_idx, + enum rtw89_band band, u32 *tx_en, bool enter) +{ + if (enter) { + rtw89_chip_stop_sch_tx(rtwdev, mac_idx, tx_en, RTW89_SCH_TX_SEL_ALL); + rtw89_mac_cfg_ppdu_status(rtwdev, mac_idx, false); + rtw8922d_dack_reset(rtwdev); + rtw8922d_tssi_cont_en_phyidx(rtwdev, false, phy_idx); + fsleep(40); + rtw8922d_bb_reset_en(rtwdev, band, false, phy_idx); + } else { + rtw89_mac_cfg_ppdu_status(rtwdev, mac_idx, true); + rtw8922d_tssi_cont_en_phyidx(rtwdev, true, phy_idx); + rtw8922d_bb_reset_en(rtwdev, band, true, phy_idx); + rtw89_chip_resume_sch_tx(rtwdev, mac_idx, *tx_en); + } +} + +static void rtw8922d_set_channel_help(struct rtw89_dev *rtwdev, bool enter, + struct rtw89_channel_help_params *p, + const struct rtw89_chan *chan, + enum rtw89_mac_idx mac_idx, + enum rtw89_phy_idx phy_idx) +{ + if (enter) { + rtw8922d_pre_set_channel_bb(rtwdev, phy_idx); + rtw8922d_pre_set_channel_rf(rtwdev, phy_idx); + } + + rtw8922d_hal_reset(rtwdev, phy_idx, mac_idx, chan->band_type, &p->tx_en, enter); + + if (!enter) { + rtw8922d_post_set_channel_bb(rtwdev, rtwdev->mlo_dbcc_mode, phy_idx); + rtw8922d_post_set_channel_rf(rtwdev, phy_idx); + } +} + MODULE_FIRMWARE(RTW8922D_MODULE_FIRMWARE); MODULE_FIRMWARE(RTW8922DS_MODULE_FIRMWARE); MODULE_AUTHOR("Realtek Corporation"); diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.c b/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.c index 6b35d196cb81..d1eda19a39a9 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.c @@ -2,11 +2,40 @@ /* Copyright(c) 2026 Realtek Corporation */ +#include "chan.h" +#include "debug.h" #include "phy.h" #include "reg.h" #include "rtw8922d.h" #include "rtw8922d_rfk.h" +static void rtw8922d_tssi_cont_en(struct rtw89_dev *rtwdev, bool en, + enum rtw89_rf_path path, u8 phy_idx) +{ + static const u32 tssi_trk_man[2] = {R_TSSI_EN_P0_BE4, + R_TSSI_EN_P0_BE4 + 0x100}; + + if (en) + rtw89_phy_write32_idx(rtwdev, tssi_trk_man[path], + B_TSSI_CONT_EN, 0, phy_idx); + else + rtw89_phy_write32_idx(rtwdev, tssi_trk_man[path], + B_TSSI_CONT_EN, 1, phy_idx); +} + +void rtw8922d_tssi_cont_en_phyidx(struct rtw89_dev *rtwdev, bool en, u8 phy_idx) +{ + if (rtwdev->mlo_dbcc_mode == MLO_1_PLUS_1_1RF) { + if (phy_idx == RTW89_PHY_0) + rtw8922d_tssi_cont_en(rtwdev, en, RF_PATH_A, phy_idx); + else + rtw8922d_tssi_cont_en(rtwdev, en, RF_PATH_B, phy_idx); + } else { + rtw8922d_tssi_cont_en(rtwdev, en, RF_PATH_A, phy_idx); + rtw8922d_tssi_cont_en(rtwdev, en, RF_PATH_B, phy_idx); + } +} + static void rtw8922d_ctl_band_ch_bw(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy, const struct rtw89_chan *chan) @@ -31,3 +60,204 @@ void rtw8922d_set_channel_rf(struct rtw89_dev *rtwdev, { rtw8922d_ctl_band_ch_bw(rtwdev, phy_idx, chan); } + +enum _rf_syn_pow { + RF_SYN_ON_OFF, + RF_SYN_OFF_ON, + RF_SYN_ALLON, + RF_SYN_ALLOFF, +}; + +static void rtw8922d_set_syn01(struct rtw89_dev *rtwdev, enum _rf_syn_pow syn) +{ + rtw89_debug(rtwdev, RTW89_DBG_RFK, "SYN config=%d\n", syn); + + if (syn == RF_SYN_ALLON) { + rtw89_write_rf(rtwdev, RF_PATH_A, RR_MOD, BIT(1), 0x0); + rtw89_write_rf(rtwdev, RF_PATH_B, RR_MOD, BIT(1), 0x0); + rtw89_write_rf(rtwdev, RF_PATH_A, RR_RSV1, MASKDWORD, 0x0); + rtw89_write_rf(rtwdev, RF_PATH_B, RR_RSV1, MASKDWORD, 0x0); + rtw89_write_rf(rtwdev, RF_PATH_A, RR_POW, RR_POW_SYN_V1, 0xf); + rtw89_write_rf(rtwdev, RF_PATH_B, RR_POW, RR_POW_SYN_V1, 0xf); + rtw89_write_rf(rtwdev, RF_PATH_A, RR_RSV1, MASKDWORD, 0x1); + rtw89_write_rf(rtwdev, RF_PATH_B, RR_RSV1, MASKDWORD, 0x1); + } else if (syn == RF_SYN_ON_OFF) { + rtw89_write_rf(rtwdev, RF_PATH_A, RR_MOD, BIT(1), 0x0); + rtw89_write_rf(rtwdev, RF_PATH_A, RR_RSV1, MASKDWORD, 0x0); + rtw89_write_rf(rtwdev, RF_PATH_A, RR_POW, RR_POW_SYN_V1, 0xf); + rtw89_write_rf(rtwdev, RF_PATH_B, RR_POW, RR_POW_SYN_V1, 0x0); + rtw89_write_rf(rtwdev, RF_PATH_A, RR_RSV1, MASKDWORD, 0x1); + } else if (syn == RF_SYN_OFF_ON) { + rtw89_write_rf(rtwdev, RF_PATH_B, RR_MOD, BIT(1), 0x0); + rtw89_write_rf(rtwdev, RF_PATH_B, RR_RSV1, MASKDWORD, 0x0); + rtw89_write_rf(rtwdev, RF_PATH_A, RR_POW, RR_POW_SYN_V1, 0x0); + rtw89_write_rf(rtwdev, RF_PATH_B, RR_POW, RR_POW_SYN_V1, 0xf); + rtw89_write_rf(rtwdev, RF_PATH_B, RR_RSV1, MASKDWORD, 0x1); + } else if (syn == RF_SYN_ALLOFF) { + rtw89_write_rf(rtwdev, RF_PATH_A, RR_POW, RR_POW_SYN_V1, 0x0); + rtw89_write_rf(rtwdev, RF_PATH_B, RR_POW, RR_POW_SYN_V1, 0x0); + } +} + +static void rtw8922d_chlk_ktbl_sel(struct rtw89_dev *rtwdev, u8 kpath, u8 idx) +{ + bool mlo_linking = false; + + if (idx > 2) { + rtw89_warn(rtwdev, "[DBCC][ERROR]indx is out of limit!! index(%d)", idx); + return; + } + + if (mlo_linking) { + if (kpath & RF_A) { + rtw89_write_rf(rtwdev, RF_PATH_A, RR_MODOPT, RR_SW_SEL, 0x0); + rtw89_write_rf(rtwdev, RF_PATH_A, RR_MODOPT_V1, RR_SW_SEL, 0x0); + } + + if (kpath & RF_B) { + rtw89_write_rf(rtwdev, RF_PATH_B, RR_MODOPT, RR_SW_SEL, 0x0); + rtw89_write_rf(rtwdev, RF_PATH_B, RR_MODOPT_V1, RR_SW_SEL, 0x0); + } + + return; + } + + if (kpath & RF_A) { + rtw89_phy_write32_mask(rtwdev, R_KTBL0A_BE4, B_KTBL0_RST, 0x1); + rtw89_phy_write32_mask(rtwdev, R_KTBL0A_BE4, B_KTBL0_IDX0, idx); + rtw89_phy_write32_mask(rtwdev, R_KTBL0A_BE4, B_KTBL0_IDX1, idx); + + rtw89_write_rf(rtwdev, RF_PATH_A, RR_MODOPT, RR_TXG_SEL, 0x4 | idx); + rtw89_write_rf(rtwdev, RF_PATH_A, RR_MODOPT_V1, RR_TXG_SEL, 0x4 | idx); + + rtw89_phy_write32_mask(rtwdev, R_KTBL1A_BE4, B_KTBL1_TBL0, idx & BIT(0)); + rtw89_phy_write32_mask(rtwdev, R_KTBL1A_BE4, B_KTBL1_TBL1, (idx & BIT(1)) >> 1); + } + + if (kpath & RF_B) { + rtw89_phy_write32_mask(rtwdev, R_KTBL0B_BE4, B_KTBL0_RST, 0x1); + rtw89_phy_write32_mask(rtwdev, R_KTBL0B_BE4, B_KTBL0_IDX0, idx); + rtw89_phy_write32_mask(rtwdev, R_KTBL0B_BE4, B_KTBL0_IDX1, idx); + + rtw89_write_rf(rtwdev, RF_PATH_B, RR_MODOPT, RR_TXG_SEL, 0x4 | idx); + rtw89_write_rf(rtwdev, RF_PATH_B, RR_MODOPT_V1, RR_TXG_SEL, 0x4 | idx); + + rtw89_phy_write32_mask(rtwdev, R_KTBL1B_BE4, B_KTBL1_TBL0, idx & BIT(0)); + rtw89_phy_write32_mask(rtwdev, R_KTBL1B_BE4, B_KTBL1_TBL1, (idx & BIT(1)) >> 1); + } +} + +static u8 rtw8922d_chlk_reload_sel_tbl(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, u8 path) +{ + struct rtw89_rfk_mcc_info_data *rfk_mcc = rtwdev->rfk_mcc.data; + struct rtw89_rfk_chan_desc desc[__RTW89_RFK_CHS_NR_V1] = {}; + u8 tbl_sel; + + for (tbl_sel = 0; tbl_sel < ARRAY_SIZE(desc); tbl_sel++) { + struct rtw89_rfk_chan_desc *p = &desc[tbl_sel]; + + p->ch = rfk_mcc->ch[tbl_sel]; + + p->has_band = true; + p->band = rfk_mcc->band[tbl_sel]; + + p->has_bw = true; + p->bw = rfk_mcc->bw[tbl_sel]; + } + + tbl_sel = rtw89_rfk_chan_lookup(rtwdev, desc, ARRAY_SIZE(desc), chan); + + rfk_mcc->ch[tbl_sel] = chan->channel; + rfk_mcc->band[tbl_sel] = chan->band_type; + rfk_mcc->bw[tbl_sel] = chan->band_width; + rfk_mcc->rf18[tbl_sel] = rtw89_chip_chan_to_rf18_val(rtwdev, chan); + + /* shared table array, but tbl_sel can be independent by path */ + rfk_mcc[path].table_idx = tbl_sel; + + return tbl_sel; +} + +static void rtw8922d_chlk_reload(struct rtw89_dev *rtwdev) +{ + const struct rtw89_chan *chan0, *chan1; + u8 s0_tbl, s1_tbl; + + switch (rtwdev->mlo_dbcc_mode) { + default: + case MLO_2_PLUS_0_1RF: + chan0 = rtw89_mgnt_chan_get(rtwdev, 0); + chan1 = chan0; + break; + case MLO_0_PLUS_2_1RF: + chan1 = rtw89_mgnt_chan_get(rtwdev, 1); + chan0 = chan1; + break; + case MLO_1_PLUS_1_1RF: + chan0 = rtw89_mgnt_chan_get(rtwdev, 0); + chan1 = rtw89_mgnt_chan_get(rtwdev, 1); + break; + } + + s0_tbl = rtw8922d_chlk_reload_sel_tbl(rtwdev, chan0, 0); + s1_tbl = rtw8922d_chlk_reload_sel_tbl(rtwdev, chan1, 1); + + rtw8922d_chlk_ktbl_sel(rtwdev, RF_A, s0_tbl); + rtw8922d_chlk_ktbl_sel(rtwdev, RF_B, s1_tbl); +} + +static enum _rf_syn_pow rtw8922d_get_syn_pow(struct rtw89_dev *rtwdev) +{ + switch (rtwdev->mlo_dbcc_mode) { + case MLO_0_PLUS_2_1RF: + return RF_SYN_OFF_ON; + case MLO_0_PLUS_2_2RF: + case MLO_1_PLUS_1_2RF: + case MLO_2_PLUS_0_1RF: + case MLO_2_PLUS_0_2RF: + case MLO_2_PLUS_2_2RF: + case MLO_DBCC_NOT_SUPPORT: + default: + return RF_SYN_ON_OFF; + case MLO_1_PLUS_1_1RF: + case DBCC_LEGACY: + return RF_SYN_ALLON; + } +} + +void rtw8922d_rfk_mlo_ctrl(struct rtw89_dev *rtwdev) +{ + enum _rf_syn_pow syn_pow = rtw8922d_get_syn_pow(rtwdev); + + if (!rtwdev->dbcc_en) + goto set_rfk_reload; + + rtw8922d_set_syn01(rtwdev, syn_pow); + +set_rfk_reload: + rtw8922d_chlk_reload(rtwdev); +} + +void rtw8922d_pre_set_channel_rf(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx) +{ + bool mlo_1_1; + + if (!rtwdev->dbcc_en) + return; + + mlo_1_1 = rtw89_is_mlo_1_1(rtwdev); + if (mlo_1_1) + rtw8922d_set_syn01(rtwdev, RF_SYN_ALLON); + else if (phy_idx == RTW89_PHY_0) + rtw8922d_set_syn01(rtwdev, RF_SYN_ON_OFF); + else + rtw8922d_set_syn01(rtwdev, RF_SYN_OFF_ON); + + fsleep(1000); +} + +void rtw8922d_post_set_channel_rf(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx) +{ + rtw8922d_rfk_mlo_ctrl(rtwdev); +} diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.h b/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.h index 03af1f0497ac..4c505ae24261 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.h +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.h @@ -7,8 +7,12 @@ #include "core.h" +void rtw8922d_tssi_cont_en_phyidx(struct rtw89_dev *rtwdev, bool en, u8 phy_idx); void rtw8922d_set_channel_rf(struct rtw89_dev *rtwdev, const struct rtw89_chan *chan, enum rtw89_phy_idx phy_idx); +void rtw8922d_rfk_mlo_ctrl(struct rtw89_dev *rtwdev); +void rtw8922d_pre_set_channel_rf(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx); +void rtw8922d_post_set_channel_rf(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx); #endif From 2ef4363f1388258f8073cd38d2ba059963f25fcf Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Mon, 30 Mar 2026 14:58:41 +0800 Subject: [PATCH 1091/1561] wifi: rtw89: 8922d: add RF calibration ops The chips ops related to RF calibration include init, init_late, channel, band_change, scan, and track. The init_late is similar to init, but HCI is ready, so receiving C2H event is possible. The ops channel is the main function that do all RF calibration on operating channel. The ops band_change and scan are to reset RF calibration because channel is switching at these moment, we need to reset RF state. The ops track is to monitor temperature to check if re-calibrate RF again. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260330065847.48946-4-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/reg.h | 4 + drivers/net/wireless/realtek/rtw89/rtw8922d.c | 154 ++++++++++++++++++ .../net/wireless/realtek/rtw89/rtw8922d_rfk.c | 85 ++++++++++ .../net/wireless/realtek/rtw89/rtw8922d_rfk.h | 1 + 4 files changed, 244 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/reg.h b/drivers/net/wireless/realtek/rtw89/reg.h index 195d4806c4ef..1a5a5b30a28e 100644 --- a/drivers/net/wireless/realtek/rtw89/reg.h +++ b/drivers/net/wireless/realtek/rtw89/reg.h @@ -10531,6 +10531,10 @@ #define B_TXPWR_RSTB0_BE4 BIT(16) #define R_TSSI_EN_P0_BE4 0x22510 #define B_TSSI_EN_P0_BE4 GENMASK(3, 0) +#define R_USED_TSSI_TRK_ON_P0_BE4 0x22534 +#define B_USED_TSSI_TRK_ON_P0_BE4 BIT(22) +#define R_TSSI_DCK_MOV_AVG_LEN_P0_BE4 0x225CC +#define B_TSSI_DCK_MOV_AVG_LEN_P0_BE4 GENMASK(8, 6) #define R_TXPWR_RSTB1_BE4 0x2260C #define B_TXPWR_RSTB1_BE4 BIT(16) diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d.c b/drivers/net/wireless/realtek/rtw89/rtw8922d.c index a2dd504c99ed..2e6f4504caeb 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922d.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d.c @@ -3,6 +3,7 @@ */ #include "chan.h" +#include "coex.h" #include "debug.h" #include "efuse.h" #include "mac.h" @@ -2168,6 +2169,159 @@ static void rtw8922d_set_channel_help(struct rtw89_dev *rtwdev, bool enter, } } +static void rtw8922d_rfk_init(struct rtw89_dev *rtwdev) +{ + struct rtw89_rfk_mcc_info *rfk_mcc = &rtwdev->rfk_mcc; + struct rtw89_lck_info *lck = &rtwdev->lck; + + rtwdev->is_tssi_mode[RF_PATH_A] = false; + rtwdev->is_tssi_mode[RF_PATH_B] = false; + memset(rfk_mcc, 0, sizeof(*rfk_mcc)); + memset(lck, 0, sizeof(*lck)); +} + +static void __rtw8922d_rfk_init_late(struct rtw89_dev *rtwdev, + enum rtw89_phy_idx phy_idx, + const struct rtw89_chan *chan) +{ + rtw8922d_rfk_mlo_ctrl(rtwdev); + + rtw89_phy_rfk_pre_ntfy_and_wait(rtwdev, phy_idx, 5); + if (!test_bit(RTW89_FLAG_SER_HANDLING, rtwdev->flags)) + rtw89_phy_rfk_rxdck_and_wait(rtwdev, phy_idx, chan, false, 128); + if (phy_idx == RTW89_PHY_0) + rtw89_phy_rfk_dack_and_wait(rtwdev, phy_idx, chan, 58); +} + +static void rtw8922d_rfk_init_late(struct rtw89_dev *rtwdev) +{ + const struct rtw89_chan *chan = rtw89_chan_get(rtwdev, RTW89_CHANCTX_0); + + __rtw8922d_rfk_init_late(rtwdev, RTW89_PHY_0, chan); + if (rtwdev->dbcc_en) + __rtw8922d_rfk_init_late(rtwdev, RTW89_PHY_1, chan); +} + +static void _wait_rx_mode(struct rtw89_dev *rtwdev, u8 kpath) +{ + u32 rf_mode; + u8 path; + int ret; + + for (path = 0; path < RF_PATH_NUM_8922D; path++) { + if (!(kpath & BIT(path))) + continue; + + ret = read_poll_timeout_atomic(rtw89_read_rf, rf_mode, rf_mode != 2, + 2, 5000, false, rtwdev, path, 0x00, + RR_MOD_MASK); + rtw89_debug(rtwdev, RTW89_DBG_RFK, + "[RFK] Wait S%d to Rx mode!! (ret = %d)\n", + path, ret); + } +} + +static void __rtw8922d_tssi_enable(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx) +{ + u8 path; + + for (path = RF_PATH_A; path <= RF_PATH_B; path++) { + u32 addr_ofst = (phy_idx << 12) + (path << 8); + + rtw89_phy_write32_mask(rtwdev, R_TSSI_DCK_MOV_AVG_LEN_P0_BE4 + addr_ofst, + B_TSSI_DCK_MOV_AVG_LEN_P0_BE4, 0x4); + rtw89_phy_write32_clr(rtwdev, R_USED_TSSI_TRK_ON_P0_BE4 + addr_ofst, + B_USED_TSSI_TRK_ON_P0_BE4); + rtw89_phy_write32_set(rtwdev, R_USED_TSSI_TRK_ON_P0_BE4 + addr_ofst, + B_USED_TSSI_TRK_ON_P0_BE4); + rtw89_phy_write32_clr(rtwdev, R_TSSI_EN_P0_BE4 + addr_ofst, + B_TSSI_EN_P0_BE4); + rtw89_phy_write32_mask(rtwdev, R_TSSI_EN_P0_BE4 + addr_ofst, + B_TSSI_EN_P0_BE4, 0x3); + } +} + +static void __rtw8922d_tssi_disable(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx) +{ + u8 path; + + for (path = RF_PATH_A; path <= RF_PATH_B; path++) { + u32 addr_ofst = (phy_idx << 12) + (path << 8); + + rtw89_phy_write32_clr(rtwdev, R_TSSI_DCK_MOV_AVG_LEN_P0_BE4 + addr_ofst, + B_TSSI_DCK_MOV_AVG_LEN_P0_BE4); + rtw89_phy_write32_clr(rtwdev, R_USED_TSSI_TRK_ON_P0_BE4 + addr_ofst, + B_USED_TSSI_TRK_ON_P0_BE4); + rtw89_phy_write32_clr(rtwdev, R_TSSI_EN_P0_BE4 + addr_ofst, + B_TSSI_EN_P0_BE4); + } +} + +static void rtw8922d_rfk_tssi(struct rtw89_dev *rtwdev, + enum rtw89_phy_idx phy_idx, + const struct rtw89_chan *chan, + enum rtw89_tssi_mode tssi_mode, + unsigned int ms) +{ + int ret; + + ret = rtw89_phy_rfk_tssi_and_wait(rtwdev, phy_idx, chan, tssi_mode, ms); + if (ret) { + rtwdev->is_tssi_mode[RF_PATH_A] = false; + rtwdev->is_tssi_mode[RF_PATH_B] = false; + } else { + rtwdev->is_tssi_mode[RF_PATH_A] = true; + rtwdev->is_tssi_mode[RF_PATH_B] = true; + } +} + +static void rtw8922d_rfk_channel(struct rtw89_dev *rtwdev, + struct rtw89_vif_link *rtwvif_link) +{ + enum rtw89_chanctx_idx chanctx_idx = rtwvif_link->chanctx_idx; + const struct rtw89_chan *chan = rtw89_chan_get(rtwdev, chanctx_idx); + enum rtw89_phy_idx phy_idx = rtwvif_link->phy_idx; + u8 phy_map = rtw89_btc_phymap(rtwdev, phy_idx, RF_AB, chanctx_idx); + u32 tx_en; + + rtw89_btc_ntfy_wl_rfk(rtwdev, phy_map, BTC_WRFKT_CHLK, BTC_WRFK_START); + rtw89_chip_stop_sch_tx(rtwdev, phy_idx, &tx_en, RTW89_SCH_TX_SEL_ALL); + _wait_rx_mode(rtwdev, RF_AB); + + rtw89_phy_rfk_pre_ntfy_and_wait(rtwdev, phy_idx, 5); + rtw89_phy_rfk_txgapk_and_wait(rtwdev, phy_idx, chan, 54); + rtw89_phy_rfk_txiqk_and_wait(rtwdev, phy_idx, chan, 45); + rtw89_phy_rfk_iqk_and_wait(rtwdev, phy_idx, chan, 84); + rtw8922d_rfk_tssi(rtwdev, phy_idx, chan, RTW89_TSSI_NORMAL, 20); + rtw89_phy_rfk_cim3k_and_wait(rtwdev, phy_idx, chan, 44); + rtw89_phy_rfk_dpk_and_wait(rtwdev, phy_idx, chan, 68); + rtw89_phy_rfk_rxdck_and_wait(rtwdev, RTW89_PHY_0, chan, true, 32); + + rtw89_chip_resume_sch_tx(rtwdev, phy_idx, tx_en); + rtw89_btc_ntfy_wl_rfk(rtwdev, phy_map, BTC_WRFKT_CHLK, BTC_WRFK_STOP); +} + +static void rtw8922d_rfk_band_changed(struct rtw89_dev *rtwdev, + enum rtw89_phy_idx phy_idx, + const struct rtw89_chan *chan) +{ +} + +static void rtw8922d_rfk_scan(struct rtw89_dev *rtwdev, + struct rtw89_vif_link *rtwvif_link, + bool start) +{ + if (start) + __rtw8922d_tssi_disable(rtwdev, rtwvif_link->phy_idx); + else + __rtw8922d_tssi_enable(rtwdev, rtwvif_link->phy_idx); +} + +static void rtw8922d_rfk_track(struct rtw89_dev *rtwdev) +{ + rtw8922d_lck_track(rtwdev); +} + MODULE_FIRMWARE(RTW8922D_MODULE_FIRMWARE); MODULE_FIRMWARE(RTW8922DS_MODULE_FIRMWARE); MODULE_AUTHOR("Realtek Corporation"); diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.c b/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.c index d1eda19a39a9..147cf91d2cb0 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.c @@ -261,3 +261,88 @@ void rtw8922d_post_set_channel_rf(struct rtw89_dev *rtwdev, enum rtw89_phy_idx p { rtw8922d_rfk_mlo_ctrl(rtwdev); } + +static u8 _get_thermal(struct rtw89_dev *rtwdev, enum rtw89_rf_path path) +{ + rtw89_write_rf(rtwdev, path, RR_TM, RR_TM_TRI, 0x1); + rtw89_write_rf(rtwdev, path, RR_TM, RR_TM_TRI, 0x0); + rtw89_write_rf(rtwdev, path, RR_TM, RR_TM_TRI, 0x1); + + fsleep(200); + + return rtw89_read_rf(rtwdev, path, RR_TM, RR_TM_VAL_V1); +} + +static void _lck_keep_thermal(struct rtw89_dev *rtwdev) +{ + struct rtw89_lck_info *lck = &rtwdev->lck; + int path; + + for (path = 0; path < rtwdev->chip->rf_path_num; path++) { + lck->thermal[path] = _get_thermal(rtwdev, path); + rtw89_debug(rtwdev, RTW89_DBG_RFK_TRACK, + "[LCK] path=%d thermal=0x%x", path, lck->thermal[path]); + } +} + +static void _lck(struct rtw89_dev *rtwdev) +{ + enum _rf_syn_pow syn_pow = rtw8922d_get_syn_pow(rtwdev); + u8 path_mask = 0; + u32 tmp18, tmp5; + int path; + + rtw89_debug(rtwdev, RTW89_DBG_RFK_TRACK, "[LCK] DO LCK\n"); + + if (syn_pow == RF_SYN_ALLON) + path_mask = BIT(RF_PATH_A) | BIT(RF_PATH_B); + else if (syn_pow == RF_SYN_ON_OFF) + path_mask = BIT(RF_PATH_A); + else if (syn_pow == RF_SYN_OFF_ON) + path_mask = BIT(RF_PATH_B); + else + return; + + for (path = 0; path < rtwdev->chip->rf_path_num; path++) { + if (!(path_mask & BIT(path))) + continue; + + tmp18 = rtw89_read_rf(rtwdev, path, RR_CFGCH, MASKDWORD); + tmp5 = rtw89_read_rf(rtwdev, path, RR_RSV1, MASKDWORD); + + rtw89_write_rf(rtwdev, path, RR_MOD, MASKDWORD, 0x10000); + rtw89_write_rf(rtwdev, path, RR_RSV1, MASKDWORD, 0x0); + rtw89_write_rf(rtwdev, path, RR_LCK_TRG, RR_LCK_TRGSEL, 0x1); + rtw89_write_rf(rtwdev, path, RR_CFGCH, MASKDWORD, tmp18); + rtw89_write_rf(rtwdev, path, RR_LCK_TRG, RR_LCK_TRGSEL, 0x0); + + fsleep(400); + + rtw89_write_rf(rtwdev, path, RR_RSV1, MASKDWORD, tmp5); + } + + _lck_keep_thermal(rtwdev); +} + +#define RTW8922D_LCK_TH 16 +void rtw8922d_lck_track(struct rtw89_dev *rtwdev) +{ + struct rtw89_lck_info *lck = &rtwdev->lck; + u8 cur_thermal; + int delta; + int path; + + for (path = 0; path < rtwdev->chip->rf_path_num; path++) { + cur_thermal = _get_thermal(rtwdev, path); + delta = abs((int)cur_thermal - lck->thermal[path]); + + rtw89_debug(rtwdev, RTW89_DBG_RFK_TRACK, + "[LCK] path=%d current thermal=0x%x delta=0x%x\n", + path, cur_thermal, delta); + + if (delta >= RTW8922D_LCK_TH) { + _lck(rtwdev); + return; + } + } +} diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.h b/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.h index 4c505ae24261..8a5f4b56b8ce 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.h +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.h @@ -14,5 +14,6 @@ void rtw8922d_set_channel_rf(struct rtw89_dev *rtwdev, void rtw8922d_rfk_mlo_ctrl(struct rtw89_dev *rtwdev); void rtw8922d_pre_set_channel_rf(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx); void rtw8922d_post_set_channel_rf(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx); +void rtw8922d_lck_track(struct rtw89_dev *rtwdev); #endif From cee10a01e286e88e0949979e91231270ca9fdb8e Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Tue, 31 Mar 2026 14:10:21 +0100 Subject: [PATCH 1092/1561] net: macb: fix use of at91_default_usrio without CONFIG_OF If CONFIG_OF is not enabled, at91_default_usrio is used undeclared in gem_default_config. Move at91_default_usrio back above the CONFIG_OF section where macb_default_usrio used to be, so that it is unconditionally defined and defined prior to any of the users. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202603280028.wQjUrIvv-lkp@intel.com/ Reported-by: Jiawen Wu Closes: https://lore.kernel.org/all/06a701dcc014$86def5b0$949ce110$@trustnetic.com/ Fixes: a17871778ee28 ("net: macb: rename macb_default_usrio to at91_default_usrio as not all platforms have mii mode control in usrio") Signed-off-by: Conor Dooley Link: https://patch.msgid.link/20260331-enroll-sensation-50901318a419@spud Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 7a48ebe0741f..7f061d7c95af 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4949,6 +4949,13 @@ static int macb_init(struct platform_device *pdev, return macb_init_dflt(pdev); } +static const struct macb_usrio_config at91_default_usrio = { + .mii = MACB_BIT(MII), + .rmii = MACB_BIT(RMII), + .rgmii = GEM_BIT(RGMII), + .clken = MACB_BIT(CLKEN), +}; + #if defined(CONFIG_OF) /* 1518 rounded up */ #define AT91ETHER_MAX_RBUFF_SZ 0x600 @@ -5523,13 +5530,6 @@ static int eyeq5_init(struct platform_device *pdev) return ret; } -static const struct macb_usrio_config at91_default_usrio = { - .mii = MACB_BIT(MII), - .rmii = MACB_BIT(RMII), - .rgmii = GEM_BIT(RGMII), - .clken = MACB_BIT(CLKEN), -}; - static const struct macb_usrio_config mpfs_usrio = { .tsu_source = 0, }; From baa6ea4e5e034c59fd8899d200ead348e148ea06 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Mon, 30 Mar 2026 14:58:42 +0800 Subject: [PATCH 1093/1561] wifi: rtw89: 8922d: add set TX power callback Set TX power depends on operating channel. The Tx power factors are data rate, channel, bandwidth and etc. Also, consider SAR as a factor of TX power limit. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260330065847.48946-5-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/reg.h | 13 ++ drivers/net/wireless/realtek/rtw89/rtw8922d.c | 115 ++++++++++++++++++ 2 files changed, 128 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/reg.h b/drivers/net/wireless/realtek/rtw89/reg.h index 1a5a5b30a28e..37de1c827814 100644 --- a/drivers/net/wireless/realtek/rtw89/reg.h +++ b/drivers/net/wireless/realtek/rtw89/reg.h @@ -10235,6 +10235,8 @@ #define B_TSSI_CONT_EN BIT(3) #define R_P0_TXPWRB_BE 0xE61C #define R_P1_TXPWRB_BE 0xE71C +#define R_P0_TXPWRB_BE4 0x2251C +#define R_P1_TXPWRB_BE4 0x2261C #define B_TXPWRB_MAX_BE GENMASK(20, 12) #define R_TSSI_MAP_OFST_P0 0xE620 #define R_TSSI_MAP_OFST_P1 0xE720 @@ -10531,13 +10533,24 @@ #define B_TXPWR_RSTB0_BE4 BIT(16) #define R_TSSI_EN_P0_BE4 0x22510 #define B_TSSI_EN_P0_BE4 GENMASK(3, 0) +#define R_TXAGC_REF_DBM_PATH0_TBL0_BE4 0x22528 +#define B_TXAGC_OFDM_REF_DBM_PATH0_TBL0_BE4 GENMASK(8, 0) +#define B_TXAGC_CCK_REF_DBM_PATH0_TBL0_BE4 GENMASK(17, 9) #define R_USED_TSSI_TRK_ON_P0_BE4 0x22534 #define B_USED_TSSI_TRK_ON_P0_BE4 BIT(22) +#define R_TSSI_K_OFDM_PATH0_TBL0_BE4 0x225A0 +#define B_TSSI_K_OFDM_PATH0_TBL0_BE4 GENMASK(29, 20) #define R_TSSI_DCK_MOV_AVG_LEN_P0_BE4 0x225CC #define B_TSSI_DCK_MOV_AVG_LEN_P0_BE4 GENMASK(8, 6) #define R_TXPWR_RSTB1_BE4 0x2260C #define B_TXPWR_RSTB1_BE4 BIT(16) +#define R_TXAGC_REF_DBM_PATH0_TBL1_BE4 0x23528 +#define B_TXAGC_OFDM_REF_DBM_PATH0_TBL1_BE4 GENMASK(8, 0) +#define B_TXAGC_CCK_REF_DBM_PATH0_TBL1_BE4 GENMASK(17, 9) +#define R_TSSI_K_OFDM_PATH0_TBL1_BE4 0x235A0 +#define B_TSSI_K_OFDM_PATH0_TBL1_BE4 GENMASK(29, 20) + #define R_OFDM_OFST_P0_BE4 0x240C8 #define B_OFDM_OFST_P0_BE4 GENMASK(31, 24) #define R_PATH0_RXIDX_INIT_BE4 0x24108 diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d.c b/drivers/net/wireless/realtek/rtw89/rtw8922d.c index 2e6f4504caeb..9c62a5f12962 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922d.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d.c @@ -11,6 +11,7 @@ #include "reg.h" #include "rtw8922d.h" #include "rtw8922d_rfk.h" +#include "sar.h" #include "util.h" #define RTW8922D_FW_FORMAT_MAX 0 @@ -2322,6 +2323,120 @@ static void rtw8922d_rfk_track(struct rtw89_dev *rtwdev) rtw8922d_lck_track(rtwdev); } +static const struct rtw89_reg_def rtw8922d_txpwr_ref[][3] = { + {{ .addr = R_TXAGC_REF_DBM_PATH0_TBL0_BE4, + .mask = B_TXAGC_OFDM_REF_DBM_PATH0_TBL0_BE4 }, + { .addr = R_TXAGC_REF_DBM_PATH0_TBL0_BE4, + .mask = B_TXAGC_CCK_REF_DBM_PATH0_TBL0_BE4 }, + { .addr = R_TSSI_K_OFDM_PATH0_TBL0_BE4, + .mask = B_TSSI_K_OFDM_PATH0_TBL0_BE4 } + }, + {{ .addr = R_TXAGC_REF_DBM_PATH0_TBL1_BE4, + .mask = B_TXAGC_OFDM_REF_DBM_PATH0_TBL1_BE4 }, + { .addr = R_TXAGC_REF_DBM_PATH0_TBL1_BE4, + .mask = B_TXAGC_CCK_REF_DBM_PATH0_TBL1_BE4 }, + { .addr = R_TSSI_K_OFDM_PATH0_TBL1_BE4, + .mask = B_TSSI_K_OFDM_PATH0_TBL1_BE4 } + }, +}; + +static void rtw8922d_set_txpwr_diff(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_phy_idx phy_idx) +{ + s16 pwr_ofst = rtw89_phy_ant_gain_pwr_offset(rtwdev, chan); + const struct rtw89_chip_info *chip = rtwdev->chip; + static const u32 path_ofst[] = {0x0, 0x100}; + const struct rtw89_reg_def *txpwr_ref; + s16 tssi_k_ofst = abs(pwr_ofst); + s16 ofst_dec[RF_PATH_NUM_8922D]; + s16 tssi_k[RF_PATH_NUM_8922D]; + s16 pwr_ref_ofst; + s16 pwr_ref = 16; + u8 i; + + pwr_ref <<= chip->txpwr_factor_rf; + pwr_ref_ofst = pwr_ref - rtw89_phy_txpwr_bb_to_rf(rtwdev, abs(pwr_ofst)); + + ofst_dec[RF_PATH_A] = pwr_ofst > 0 ? pwr_ref : pwr_ref_ofst; + ofst_dec[RF_PATH_B] = pwr_ofst > 0 ? pwr_ref_ofst : pwr_ref; + tssi_k[RF_PATH_A] = pwr_ofst > 0 ? 0 : tssi_k_ofst; + tssi_k[RF_PATH_B] = pwr_ofst > 0 ? tssi_k_ofst : 0; + + for (i = 0; i < RF_PATH_NUM_8922D; i++) { + txpwr_ref = rtw8922d_txpwr_ref[phy_idx]; + + rtw89_phy_write32_mask(rtwdev, txpwr_ref[0].addr + path_ofst[i], + txpwr_ref[0].mask, ofst_dec[i]); + rtw89_phy_write32_mask(rtwdev, txpwr_ref[1].addr + path_ofst[i], + txpwr_ref[1].mask, ofst_dec[i]); + rtw89_phy_write32_mask(rtwdev, txpwr_ref[2].addr + path_ofst[i], + txpwr_ref[2].mask, tssi_k[i]); + } +} + +static void rtw8922d_set_txpwr_ref(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_phy_idx phy_idx) +{ + s16 ref_ofdm = 0; + s16 ref_cck = 0; + + rtw89_debug(rtwdev, RTW89_DBG_TXPWR, "[TXPWR] set txpwr reference\n"); + + rtw8922d_set_txpwr_diff(rtwdev, chan, phy_idx); + + rtw89_mac_txpwr_write32_mask(rtwdev, phy_idx, R_BE_PWR_REF_CTRL, + B_BE_PWR_REF_CTRL_OFDM, ref_ofdm); + rtw89_mac_txpwr_write32_mask(rtwdev, phy_idx, R_BE_PWR_REF_CTRL, + B_BE_PWR_REF_CTRL_CCK, ref_cck); +} + +static void rtw8922d_set_txpwr_sar_diff(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_phy_idx phy_idx) +{ + struct rtw89_sar_parm sar_parm = { + .center_freq = chan->freq, + .force_path = true, + }; + s16 sar_rf; + s8 sar_mac; + + if (phy_idx != RTW89_PHY_0) + return; + + sar_parm.path = RF_PATH_A; + sar_mac = rtw89_query_sar(rtwdev, &sar_parm); + sar_rf = rtw89_phy_txpwr_mac_to_rf(rtwdev, sar_mac); + rtw89_phy_write32_mask(rtwdev, R_P0_TXPWRB_BE4, B_TXPWRB_MAX_BE, sar_rf); + + sar_parm.path = RF_PATH_B; + sar_mac = rtw89_query_sar(rtwdev, &sar_parm); + sar_rf = rtw89_phy_txpwr_mac_to_rf(rtwdev, sar_mac); + rtw89_phy_write32_mask(rtwdev, R_P1_TXPWRB_BE4, B_TXPWRB_MAX_BE, sar_rf); +} + +static void rtw8922d_set_txpwr(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_phy_idx phy_idx) +{ + rtw89_phy_set_txpwr_byrate(rtwdev, chan, phy_idx); + rtw89_phy_set_txpwr_offset(rtwdev, chan, phy_idx); + rtw89_phy_set_txpwr_limit(rtwdev, chan, phy_idx); + rtw89_phy_set_txpwr_limit_ru(rtwdev, chan, phy_idx); + rtw8922d_set_txpwr_ref(rtwdev, chan, phy_idx); + rtw8922d_set_txpwr_sar_diff(rtwdev, chan, phy_idx); +} + +static void rtw8922d_set_txpwr_ctrl(struct rtw89_dev *rtwdev, + enum rtw89_phy_idx phy_idx) +{ + const struct rtw89_chan *chan = rtw89_mgnt_chan_get(rtwdev, phy_idx); + + rtw8922d_set_txpwr_ref(rtwdev, chan, phy_idx); +} + MODULE_FIRMWARE(RTW8922D_MODULE_FIRMWARE); MODULE_FIRMWARE(RTW8922DS_MODULE_FIRMWARE); MODULE_AUTHOR("Realtek Corporation"); From 2b19199952e6f2d6fbddd20c81d48180e98a3c3e Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Mon, 30 Mar 2026 14:58:43 +0800 Subject: [PATCH 1094/1561] wifi: rtw89: 8922d: configure TX/RX path assisting in BT coexistence The BT coexistence mechanism needs to control TX/RX path to co-work with BT well, and these helpers are provided by BB to configure path. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260330065847.48946-6-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/reg.h | 9 +- drivers/net/wireless/realtek/rtw89/rtw8922d.c | 125 ++++++++++++++++++ 2 files changed, 132 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/reg.h b/drivers/net/wireless/realtek/rtw89/reg.h index 37de1c827814..78f2cf579fa6 100644 --- a/drivers/net/wireless/realtek/rtw89/reg.h +++ b/drivers/net/wireless/realtek/rtw89/reg.h @@ -10469,8 +10469,9 @@ #define B_EMLSR_BB_CLK_BE4 GENMASK(31, 30) #define R_CHINFO_SEG_BE4 0x200B4 #define B_CHINFO_SEG_LEN_BE4 GENMASK(12, 10) -#define R_STS_HDR2_PARSING_BE4 0x2070C -#define B_STS_HDR2_PARSING_BE4 BIT(10) +#define R_SEL_GNT_BT_RX_BE4 0x2010C +#define B_SEL_GNT_BT_RX_PATH0_BE4 GENMASK(3, 0) +#define B_SEL_GNT_BT_RX_PATH1_BE4 GENMASK(11, 8) #define R_SW_SI_WDATA_BE4 0x20370 #define B_SW_SI_DATA_PATH_BE4 GENMASK(31, 28) #define B_SW_SI_DATA_ADR_BE4 GENMASK(27, 20) @@ -10490,9 +10491,13 @@ #define B_ENABLE_CCK0_BE4 BIT(5) #define R_RSTB_ASYNC_BE4 0x20704 #define B_RSTB_ASYNC_BE4 BIT(1) +#define R_STS_HDR2_PARSING_BE4 0x2070C +#define B_STS_HDR2_PARSING_BE4 BIT(10) #define R_EDCCA_RPT_SEL_BE4 0x20780 #define R_EDCCA_RPT_SEL_BE4_C1 0x21780 #define B_EDCCA_RPT_SEL_BE4_MSK 0xE0000 +#define R_SEL_GNT_BT_RXPHY_BE4 0x2079C +#define B_SEL_GNT_BT_RXPHY_BE4 GENMASK(11, 8) #define R_IMR_TX_ERROR_BE4 0x20920 #define B_IMR_TX_ERROR_BE4 BIT(30) #define R_TXINFO_PATH_BE4 0x209A4 diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d.c b/drivers/net/wireless/realtek/rtw89/rtw8922d.c index 9c62a5f12962..cfe9752abddc 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922d.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d.c @@ -238,6 +238,35 @@ static const struct rtw89_efuse_block_cfg rtw8922d_efuse_blocks[] = { [RTW89_EFUSE_BLOCK_ADIE] = {.offset = 0x70000, .size = 0x10}, }; +static void rtw8922d_sel_bt_rx_path(struct rtw89_dev *rtwdev, u8 val, + enum rtw89_rf_path rx_path) +{ + if (rx_path == RF_PATH_A) + rtw89_phy_write32_mask(rtwdev, R_SEL_GNT_BT_RX_BE4, + B_SEL_GNT_BT_RX_PATH0_BE4, val); + else if (rx_path == RF_PATH_B) + rtw89_phy_write32_mask(rtwdev, R_SEL_GNT_BT_RX_BE4, + B_SEL_GNT_BT_RX_PATH1_BE4, val); + else + rtw89_warn(rtwdev, "[%s] Not support path = %d\n", __func__, rx_path); +} + +static void rtw8922d_sel_bt_rx_phy(struct rtw89_dev *rtwdev, u8 val, + enum rtw89_phy_idx phy_idx) +{ + rtw89_phy_write32_idx(rtwdev, R_SEL_GNT_BT_RXPHY_BE4, + B_SEL_GNT_BT_RXPHY_BE4, val, phy_idx); +} + +static void rtw8922d_set_gbt_bt_rx_sel(struct rtw89_dev *rtwdev, bool en, + enum rtw89_phy_idx phy_idx) +{ + rtw8922d_sel_bt_rx_path(rtwdev, 0x3, RF_PATH_A); + rtw8922d_sel_bt_rx_phy(rtwdev, 0x0, RTW89_PHY_0); + rtw8922d_sel_bt_rx_path(rtwdev, 0x3, RF_PATH_B); + rtw8922d_sel_bt_rx_phy(rtwdev, 0x0, RTW89_PHY_1); +} + static int rtw8922d_pwr_on_func(struct rtw89_dev *rtwdev) { struct rtw89_hal *hal = &rtwdev->hal; @@ -2437,6 +2466,102 @@ static void rtw8922d_set_txpwr_ctrl(struct rtw89_dev *rtwdev, rtw8922d_set_txpwr_ref(rtwdev, chan, phy_idx); } +static void rtw8922d_ctrl_trx_path(struct rtw89_dev *rtwdev, + enum rtw89_rf_path tx_path, u8 tx_nss, + enum rtw89_rf_path rx_path, u8 rx_nss) +{ + enum rtw89_phy_idx phy_idx; + + for (phy_idx = RTW89_PHY_0; phy_idx <= RTW89_PHY_1; phy_idx++) { + rtw8922d_ctrl_tx_path_tmac(rtwdev, tx_path, phy_idx); + rtw8922d_ctrl_rx_path_tmac(rtwdev, rx_path, phy_idx); + + rtw8922d_tssi_reset(rtwdev, rx_path, phy_idx); + } +} + +static void rtw8922d_ctrl_nbtg_bt_tx(struct rtw89_dev *rtwdev, bool en, + enum rtw89_phy_idx phy_idx) +{ + if (en) { + rtw89_phy_write32_idx(rtwdev, R_FORCE_FIR_A, B_FORCE_FIR_A, 0x3, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXBY_WBADC_A, B_RXBY_WBADC_A, + 0xf, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_BT_RXBY_WBADC_A, B_BT_RXBY_WBADC_A, + 0x0, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_BT_SHARE_A, B_BT_TRK_OFF_A, 0x0, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_OP1DB_A, B_OP1DB_A, 0x80, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_OP1DB1_A, B_TIA10_A, 0x8080, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_BACKOFF_A, B_LNA_IBADC_A, 0x34, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_BKOFF_A, B_BKOFF_IBADC_A, 0x34, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_FORCE_FIR_B, B_FORCE_FIR_B, 0x3, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXBY_WBADC_B, B_RXBY_WBADC_B, + 0xf, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_BT_RXBY_WBADC_B, B_BT_RXBY_WBADC_B, + 0x0, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_BT_SHARE_B, B_BT_TRK_OFF_B, 0x0, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_LNA_TIA, B_TIA1_B, 0x80, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_BACKOFF_B, B_LNA_IBADC_B, 0x34, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_BKOFF_B, B_BKOFF_IBADC_B, 0x34, phy_idx); + } else { + rtw89_phy_write32_idx(rtwdev, R_FORCE_FIR_A, B_FORCE_FIR_A, 0x0, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXBY_WBADC_A, B_RXBY_WBADC_A, + 0x0, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_BT_RXBY_WBADC_A, B_BT_RXBY_WBADC_A, + 0x1, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_BT_SHARE_A, B_BT_TRK_OFF_A, 0x1, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_OP1DB_A, B_OP1DB_A, 0x1a, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_OP1DB1_A, B_TIA10_A, 0x2a2a, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_BACKOFF_A, B_LNA_IBADC_A, 0x7a6, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_BKOFF_A, B_BKOFF_IBADC_A, 0x26, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_FORCE_FIR_B, B_FORCE_FIR_B, 0x0, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_RXBY_WBADC_B, B_RXBY_WBADC_B, + 0x0, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_BT_RXBY_WBADC_B, B_BT_RXBY_WBADC_B, + 0x1, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_BT_SHARE_B, B_BT_TRK_OFF_B, 0x1, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_LNA_TIA, B_TIA1_B, 0x2a, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_BACKOFF_B, B_LNA_IBADC_B, 0x7a6, phy_idx); + rtw89_phy_write32_idx(rtwdev, R_BKOFF_B, B_BKOFF_IBADC_B, 0x26, phy_idx); + } +} + +static void rtw8922d_bb_cfg_txrx_path(struct rtw89_dev *rtwdev) +{ + const struct rtw89_chan *chan = rtw89_chan_get(rtwdev, RTW89_CHANCTX_0); + enum rtw89_band band = chan->band_type; + struct rtw89_hal *hal = &rtwdev->hal; + u8 ntx_path = RF_PATH_AB; + u8 nrx_path = RF_PATH_AB; + u32 tx_en0, tx_en1; + u8 rx_nss = 2; + + if (hal->antenna_tx == RF_A) + ntx_path = RF_PATH_A; + else if (hal->antenna_tx == RF_B) + ntx_path = RF_PATH_B; + + if (hal->antenna_rx == RF_A) + nrx_path = RF_PATH_A; + else if (hal->antenna_rx == RF_B) + nrx_path = RF_PATH_B; + + if (nrx_path != RF_PATH_AB) + rx_nss = 1; + + rtw8922d_hal_reset(rtwdev, RTW89_PHY_0, RTW89_MAC_0, band, &tx_en0, true); + if (rtwdev->dbcc_en) + rtw8922d_hal_reset(rtwdev, RTW89_PHY_1, RTW89_MAC_1, band, + &tx_en1, true); + + rtw8922d_ctrl_trx_path(rtwdev, ntx_path, 2, nrx_path, rx_nss); + + rtw8922d_hal_reset(rtwdev, RTW89_PHY_0, RTW89_MAC_0, band, &tx_en0, false); + if (rtwdev->dbcc_en) + rtw8922d_hal_reset(rtwdev, RTW89_PHY_1, RTW89_MAC_1, band, + &tx_en1, false); +} + MODULE_FIRMWARE(RTW8922D_MODULE_FIRMWARE); MODULE_FIRMWARE(RTW8922DS_MODULE_FIRMWARE); MODULE_AUTHOR("Realtek Corporation"); From 9c2f79b4d912dc96bbe5b4dadccd1099d430436d Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Mon, 30 Mar 2026 14:58:44 +0800 Subject: [PATCH 1095/1561] wifi: rtw89: 8922d: add RF ops of init hardware and get thermal One is to initialize hardware for RF circuit, and the ops of get thermal is used to monitor temperature to re-calibrate RF or reduce TX duty to prevent overheating. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260330065847.48946-7-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/reg.h | 6 ++ drivers/net/wireless/realtek/rtw89/rtw8922d.c | 57 +++++++++++++++++++ .../net/wireless/realtek/rtw89/rtw8922d_rfk.c | 24 ++++++++ .../net/wireless/realtek/rtw89/rtw8922d_rfk.h | 3 + 4 files changed, 90 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/reg.h b/drivers/net/wireless/realtek/rtw89/reg.h index 78f2cf579fa6..e5e689f1bfa3 100644 --- a/drivers/net/wireless/realtek/rtw89/reg.h +++ b/drivers/net/wireless/realtek/rtw89/reg.h @@ -10676,6 +10676,12 @@ #define B_KTBL1_TBL0 BIT(3) #define B_KTBL1_TBL1 BIT(5) +#define R_TC_EN_BE4 0x3c200 +#define B_TC_EN_BE4 BIT(0) +#define B_TC_TRIG_BE4 BIT(1) +#define R_TC_VAL_BE4 0x3c208 +#define B_TC_VAL_BE4 GENMASK(7, 0) + /* WiFi CPU local domain */ #define R_AX_WDT_CTRL 0x0040 #define B_AX_WDT_EN BIT(31) diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d.c b/drivers/net/wireless/realtek/rtw89/rtw8922d.c index cfe9752abddc..49b84d49ccac 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922d.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d.c @@ -2562,6 +2562,63 @@ static void rtw8922d_bb_cfg_txrx_path(struct rtw89_dev *rtwdev) &tx_en1, false); } +static u8 rtw8922d_get_thermal(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_path) +{ + u8 val; + + rtw89_phy_write32_mask(rtwdev, R_TC_EN_BE4, B_TC_EN_BE4, 0x1); + rtw89_phy_write32_mask(rtwdev, R_TC_EN_BE4, B_TC_TRIG_BE4, 0x0); + rtw89_phy_write32_mask(rtwdev, R_TC_EN_BE4, B_TC_TRIG_BE4, 0x1); + + fsleep(100); + + val = rtw89_phy_read32_mask(rtwdev, R_TC_VAL_BE4, B_TC_VAL_BE4); + + return val; +} + +static u32 rtw8922d_chan_to_rf18_val(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan) +{ + u32 val = u32_encode_bits(chan->channel, RR_CFGCH_CH); + + switch (chan->band_type) { + case RTW89_BAND_2G: + default: + break; + case RTW89_BAND_5G: + val |= u32_encode_bits(CFGCH_BAND1_5G, RR_CFGCH_BAND1) | + u32_encode_bits(CFGCH_BAND0_5G, RR_CFGCH_BAND0); + break; + case RTW89_BAND_6G: + val |= u32_encode_bits(CFGCH_BAND1_6G, RR_CFGCH_BAND1) | + u32_encode_bits(CFGCH_BAND0_6G, RR_CFGCH_BAND0); + break; + } + + switch (chan->band_width) { + case RTW89_CHANNEL_WIDTH_5: + case RTW89_CHANNEL_WIDTH_10: + case RTW89_CHANNEL_WIDTH_20: + default: + break; + case RTW89_CHANNEL_WIDTH_40: + val |= u32_encode_bits(CFGCH_BW_V2_40M, RR_CFGCH_BW_V2); + break; + case RTW89_CHANNEL_WIDTH_80: + val |= u32_encode_bits(CFGCH_BW_V2_80M, RR_CFGCH_BW_V2); + break; + case RTW89_CHANNEL_WIDTH_160: + val |= u32_encode_bits(CFGCH_BW_V2_160M, RR_CFGCH_BW_V2); + break; + case RTW89_CHANNEL_WIDTH_320: + val |= u32_encode_bits(CFGCH_BW_V2_320M, RR_CFGCH_BW_V2); + break; + } + + return val; +} + MODULE_FIRMWARE(RTW8922D_MODULE_FIRMWARE); MODULE_FIRMWARE(RTW8922DS_MODULE_FIRMWARE); MODULE_AUTHOR("Realtek Corporation"); diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.c b/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.c index 147cf91d2cb0..4e6a8e88a71e 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.c @@ -9,6 +9,12 @@ #include "rtw8922d.h" #include "rtw8922d_rfk.h" +static const struct rtw89_reg5_def rtw8922d_nctl_post_defs[] = { + RTW89_DECL_RFK_WM(0x20c7c, 0x00e00000, 0x1), +}; + +RTW89_DECLARE_RFK_TBL(rtw8922d_nctl_post_defs); + static void rtw8922d_tssi_cont_en(struct rtw89_dev *rtwdev, bool en, enum rtw89_rf_path path, u8 phy_idx) { @@ -239,6 +245,24 @@ set_rfk_reload: rtw8922d_chlk_reload(rtwdev); } +static void rtw8922d_x4k_setting(struct rtw89_dev *rtwdev) +{ + u32 val; + + val = rtw89_read_rf(rtwdev, RF_PATH_A, 0xB9, 0xF000); + rtw89_write_rf(rtwdev, RF_PATH_A, 0xB9, 0xF000, val); + val = rtw89_read_rf(rtwdev, RF_PATH_B, 0xB9, 0xF000); + rtw89_write_rf(rtwdev, RF_PATH_B, 0xB9, 0xF000, val); + + rtw89_write_rf(rtwdev, RF_PATH_A, 0xC2, BIT(19), 0x1); + rtw89_write_rf(rtwdev, RF_PATH_B, 0xC2, BIT(19), 0x1); +} + +void rtw8922d_rfk_hw_init(struct rtw89_dev *rtwdev) +{ + rtw8922d_x4k_setting(rtwdev); +} + void rtw8922d_pre_set_channel_rf(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx) { bool mlo_1_1; diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.h b/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.h index 8a5f4b56b8ce..c5bbe0eb972a 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.h +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d_rfk.h @@ -7,10 +7,13 @@ #include "core.h" +extern const struct rtw89_rfk_tbl rtw8922d_nctl_post_defs_tbl; + void rtw8922d_tssi_cont_en_phyidx(struct rtw89_dev *rtwdev, bool en, u8 phy_idx); void rtw8922d_set_channel_rf(struct rtw89_dev *rtwdev, const struct rtw89_chan *chan, enum rtw89_phy_idx phy_idx); +void rtw8922d_rfk_hw_init(struct rtw89_dev *rtwdev); void rtw8922d_rfk_mlo_ctrl(struct rtw89_dev *rtwdev); void rtw8922d_pre_set_channel_rf(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx); void rtw8922d_post_set_channel_rf(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx); From 553fd44ce8ce3caccf580aa8286d8c211fea2a4a Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Mon, 30 Mar 2026 14:58:45 +0800 Subject: [PATCH 1096/1561] wifi: rtw89: 8922d: add ops related to BT coexistence mechanism The ops is used by shared BT coexistence mechanism to set WiFi TX power, get BT RSSI, and TX/RX parameters. The RTL8922D uses TX/RX parameter v9, so define it and fill NULL for other chips. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260330065847.48946-8-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.h | 21 +++ drivers/net/wireless/realtek/rtw89/rtw8851b.c | 4 + drivers/net/wireless/realtek/rtw89/rtw8852a.c | 4 + drivers/net/wireless/realtek/rtw89/rtw8852b.c | 4 + .../net/wireless/realtek/rtw89/rtw8852bt.c | 4 + drivers/net/wireless/realtek/rtw89/rtw8852c.c | 4 + drivers/net/wireless/realtek/rtw89/rtw8922a.c | 4 + drivers/net/wireless/realtek/rtw89/rtw8922d.c | 123 ++++++++++++++++++ 8 files changed, 168 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index 4b4d25d8ba98..fd29dbbb120d 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -873,6 +873,14 @@ enum rtw89_phy_idx { RTW89_PHY_NUM, }; +enum rtw89_fbtc_bt_index { + BTC_BT_1ST = 0x0, + BTC_BT_2ND = 0x1, + BTC_BT_EXT = 0x2, + BTC_ALL_BT = 0x2, + BTC_ALL_BT_EZL = 0x3 /* BT0+BT1+Ext-ZB(or Thread, or LTE) */ +}; + #define __RTW89_MLD_MAX_LINK_NUM 2 #define RTW89_MLD_NON_STA_LINK_NUM 1 @@ -2197,6 +2205,15 @@ struct rtw89_btc_bt_info { u32 rsvd: 17; }; +struct rtw89_btc_rf_trx_para_v9 { + u32 wl_tx_power[RTW89_PHY_NUM]; /* absolute Tx power (dBm), 1's complement -5->0x85 */ + u32 wl_rx_gain[RTW89_PHY_NUM]; /* rx gain table index (TBD.) */ + u32 bt_tx_power[BTC_ALL_BT]; /* decrease Tx power (dB) */ + u32 bt_rx_gain[BTC_ALL_BT]; /* LNA constrain level */ + u32 zb_tx_power[BTC_ALL_BT]; /* 15.4 devrease Tx power (dB) */ + u32 zb_rx_gain[BTC_ALL_BT]; /* 15.4 constrain level */ +}; + struct rtw89_btc_cx { struct rtw89_btc_wl_info wl; struct rtw89_btc_bt_info bt; @@ -4609,6 +4626,10 @@ struct rtw89_chip_info { const struct rtw89_btc_rf_trx_para *rf_para_ulink; u8 rf_para_dlink_num; const struct rtw89_btc_rf_trx_para *rf_para_dlink; + const struct rtw89_btc_rf_trx_para_v9 *rf_para_ulink_v9; + const struct rtw89_btc_rf_trx_para_v9 *rf_para_dlink_v9; + u8 rf_para_ulink_num_v9; + u8 rf_para_dlink_num_v9; u8 ps_mode_supported; u8 low_power_hci_modes; diff --git a/drivers/net/wireless/realtek/rtw89/rtw8851b.c b/drivers/net/wireless/realtek/rtw89/rtw8851b.c index 2005934f64d2..84bdd39b3ceb 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8851b.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8851b.c @@ -2681,6 +2681,10 @@ const struct rtw89_chip_info rtw8851b_chip_info = { .rf_para_ulink = rtw89_btc_8851b_rf_ul, .rf_para_dlink_num = ARRAY_SIZE(rtw89_btc_8851b_rf_dl), .rf_para_dlink = rtw89_btc_8851b_rf_dl, + .rf_para_ulink_v9 = NULL, + .rf_para_dlink_v9 = NULL, + .rf_para_ulink_num_v9 = 0, + .rf_para_dlink_num_v9 = 0, .ps_mode_supported = BIT(RTW89_PS_MODE_RFOFF) | BIT(RTW89_PS_MODE_CLK_GATED), .low_power_hci_modes = 0, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852a.c b/drivers/net/wireless/realtek/rtw89/rtw8852a.c index 12ed35f548c1..1d4f1df524a1 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852a.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852a.c @@ -2418,6 +2418,10 @@ const struct rtw89_chip_info rtw8852a_chip_info = { .rf_para_ulink = rtw89_btc_8852a_rf_ul, .rf_para_dlink_num = ARRAY_SIZE(rtw89_btc_8852a_rf_dl), .rf_para_dlink = rtw89_btc_8852a_rf_dl, + .rf_para_ulink_v9 = NULL, + .rf_para_dlink_v9 = NULL, + .rf_para_ulink_num_v9 = 0, + .rf_para_dlink_num_v9 = 0, .ps_mode_supported = BIT(RTW89_PS_MODE_RFOFF) | BIT(RTW89_PS_MODE_CLK_GATED) | BIT(RTW89_PS_MODE_PWR_GATED), diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852b.c b/drivers/net/wireless/realtek/rtw89/rtw8852b.c index dcb15f26c1b1..5e8738bb2dc2 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852b.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852b.c @@ -1014,6 +1014,10 @@ const struct rtw89_chip_info rtw8852b_chip_info = { .rf_para_ulink = rtw89_btc_8852b_rf_ul, .rf_para_dlink_num = ARRAY_SIZE(rtw89_btc_8852b_rf_dl), .rf_para_dlink = rtw89_btc_8852b_rf_dl, + .rf_para_ulink_v9 = NULL, + .rf_para_dlink_v9 = NULL, + .rf_para_ulink_num_v9 = 0, + .rf_para_dlink_num_v9 = 0, .ps_mode_supported = BIT(RTW89_PS_MODE_RFOFF) | BIT(RTW89_PS_MODE_CLK_GATED) | BIT(RTW89_PS_MODE_PWR_GATED), diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852bt.c b/drivers/net/wireless/realtek/rtw89/rtw8852bt.c index 2afc6908cc7e..ab4263bc8b9f 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852bt.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852bt.c @@ -853,6 +853,10 @@ const struct rtw89_chip_info rtw8852bt_chip_info = { .rf_para_ulink = rtw89_btc_8852bt_rf_ul, .rf_para_dlink_num = ARRAY_SIZE(rtw89_btc_8852bt_rf_dl), .rf_para_dlink = rtw89_btc_8852bt_rf_dl, + .rf_para_ulink_v9 = NULL, + .rf_para_dlink_v9 = NULL, + .rf_para_ulink_num_v9 = 0, + .rf_para_dlink_num_v9 = 0, .ps_mode_supported = BIT(RTW89_PS_MODE_RFOFF) | BIT(RTW89_PS_MODE_CLK_GATED) | BIT(RTW89_PS_MODE_PWR_GATED), diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852c.c b/drivers/net/wireless/realtek/rtw89/rtw8852c.c index 5f9da0f85bb5..40db7e3c0d97 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852c.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852c.c @@ -3211,6 +3211,10 @@ const struct rtw89_chip_info rtw8852c_chip_info = { .rf_para_ulink = rtw89_btc_8852c_rf_ul, .rf_para_dlink_num = ARRAY_SIZE(rtw89_btc_8852c_rf_dl), .rf_para_dlink = rtw89_btc_8852c_rf_dl, + .rf_para_ulink_v9 = NULL, + .rf_para_dlink_v9 = NULL, + .rf_para_ulink_num_v9 = 0, + .rf_para_dlink_num_v9 = 0, .ps_mode_supported = BIT(RTW89_PS_MODE_RFOFF) | BIT(RTW89_PS_MODE_CLK_GATED) | BIT(RTW89_PS_MODE_PWR_GATED), diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922a.c b/drivers/net/wireless/realtek/rtw89/rtw8922a.c index 5f83e71ed218..8f6cf64271e8 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922a.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922a.c @@ -3015,6 +3015,10 @@ const struct rtw89_chip_info rtw8922a_chip_info = { .rf_para_ulink = rtw89_btc_8922a_rf_ul, .rf_para_dlink_num = ARRAY_SIZE(rtw89_btc_8922a_rf_dl), .rf_para_dlink = rtw89_btc_8922a_rf_dl, + .rf_para_ulink_v9 = NULL, + .rf_para_dlink_v9 = NULL, + .rf_para_ulink_num_v9 = 0, + .rf_para_dlink_num_v9 = 0, .ps_mode_supported = BIT(RTW89_PS_MODE_RFOFF) | BIT(RTW89_PS_MODE_CLK_GATED) | BIT(RTW89_PS_MODE_PWR_GATED), diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d.c b/drivers/net/wireless/realtek/rtw89/rtw8922d.c index 49b84d49ccac..a9a7ffb5fb58 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922d.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d.c @@ -2619,6 +2619,129 @@ static u32 rtw8922d_chan_to_rf18_val(struct rtw89_dev *rtwdev, return val; } +static void rtw8922d_btc_set_rfe(struct rtw89_dev *rtwdev) +{ +} + +static void rtw8922d_btc_init_cfg(struct rtw89_dev *rtwdev) +{ + /* offload to firmware */ +} + +static void +rtw8922d_btc_set_wl_txpwr_ctrl(struct rtw89_dev *rtwdev, u32 txpwr_val) +{ + u16 ctrl_all_time = u32_get_bits(txpwr_val, GENMASK(15, 0)); + u16 ctrl_gnt_bt = u32_get_bits(txpwr_val, GENMASK(31, 16)); + + switch (ctrl_all_time) { + case 0xffff: + rtw89_mac_txpwr_write32_mask(rtwdev, RTW89_PHY_0, R_BE_PWR_RATE_CTRL, + B_BE_FORCE_PWR_BY_RATE_EN, 0x0); + rtw89_mac_txpwr_write32_mask(rtwdev, RTW89_PHY_0, R_BE_PWR_RATE_CTRL, + B_BE_FORCE_PWR_BY_RATE_VAL, 0x0); + break; + default: + rtw89_mac_txpwr_write32_mask(rtwdev, RTW89_PHY_0, R_BE_PWR_RATE_CTRL, + B_BE_FORCE_PWR_BY_RATE_VAL, ctrl_all_time); + rtw89_mac_txpwr_write32_mask(rtwdev, RTW89_PHY_0, R_BE_PWR_RATE_CTRL, + B_BE_FORCE_PWR_BY_RATE_EN, 0x1); + break; + } + + switch (ctrl_gnt_bt) { + case 0xffff: + rtw89_mac_txpwr_write32_mask(rtwdev, RTW89_PHY_0, R_BE_PWR_REG_CTRL, + B_BE_PWR_BT_EN, 0x0); + rtw89_mac_txpwr_write32_mask(rtwdev, RTW89_PHY_0, R_BE_PWR_COEX_CTRL, + B_BE_PWR_BT_VAL, 0x0); + break; + default: + rtw89_mac_txpwr_write32_mask(rtwdev, RTW89_PHY_0, R_BE_PWR_COEX_CTRL, + B_BE_PWR_BT_VAL, ctrl_gnt_bt); + rtw89_mac_txpwr_write32_mask(rtwdev, RTW89_PHY_0, R_BE_PWR_REG_CTRL, + B_BE_PWR_BT_EN, 0x1); + break; + } +} + +static +s8 rtw8922d_btc_get_bt_rssi(struct rtw89_dev *rtwdev, s8 val) +{ + return clamp_t(s8, val, -100, 0) + 100; +} + +static const struct rtw89_btc_rf_trx_para_v9 rtw89_btc_8922d_rf_ul_v9[] = { + /* + * 0 -> original + * 1 -> for BT-connected ACI issue && BTG co-rx + * 2 ~ 4 ->reserved for shared-antenna + * 5 ~ 8 ->for non-shared-antenna free-run + */ + {{15, 15}, {0, 0}, {0, 0}, {7, 7}, {0, 0}, {0, 0}}, + {{15, 15}, {2, 2}, {0, 0}, {7, 7}, {0, 0}, {0, 0}}, + {{15, 15}, {0, 0}, {0, 0}, {7, 7}, {0, 0}, {0, 0}}, + {{15, 15}, {0, 0}, {0, 0}, {7, 7}, {0, 0}, {0, 0}}, + {{15, 15}, {0, 0}, {0, 0}, {7, 7}, {0, 0}, {0, 0}}, + {{15, 15}, {1, 1}, {0, 0}, {7, 7}, {0, 0}, {0, 0}}, + {{ 6, 6}, {1, 1}, {0, 0}, {7, 7}, {0, 0}, {0, 0}}, + {{13, 13}, {1, 1}, {0, 0}, {7, 7}, {0, 0}, {0, 0}}, + {{13, 13}, {1, 1}, {0, 0}, {7, 7}, {0, 0}, {0, 0}}, +}; + +static const struct rtw89_btc_rf_trx_para_v9 rtw89_btc_8922d_rf_dl_v9[] = { + /* + * 0 -> original + * 1 -> for BT-connected ACI issue && BTG co-rx + * 2 ~ 4 ->reserved for shared-antenna + * 5 ~ 8 ->for non-shared-antenna free-run + */ + {{15, 15}, {0, 0}, {0, 0}, {7, 7}, {0, 0}, {0, 0}}, + {{15, 15}, {2, 2}, {0, 0}, {7, 7}, {0, 0}, {0, 0}}, + {{15, 15}, {0, 0}, {0, 0}, {7, 7}, {0, 0}, {0, 0}}, + {{15, 15}, {0, 0}, {0, 0}, {7, 7}, {0, 0}, {0, 0}}, + {{15, 15}, {0, 0}, {0, 0}, {7, 7}, {0, 0}, {0, 0}}, + {{15, 15}, {1, 1}, {0, 0}, {7, 7}, {0, 0}, {0, 0}}, + {{15, 15}, {1, 1}, {0, 0}, {7, 7}, {0, 0}, {0, 0}}, + {{15, 15}, {1, 1}, {0, 0}, {7, 7}, {0, 0}, {0, 0}}, + {{15, 15}, {1, 1}, {0, 0}, {7, 7}, {0, 0}, {0, 0}}, +}; + +static const u8 rtw89_btc_8922d_wl_rssi_thres[BTC_WL_RSSI_THMAX] = {60, 50, 40, 30}; +static const u8 rtw89_btc_8922d_bt_rssi_thres[BTC_BT_RSSI_THMAX] = {50, 40, 30, 20}; + +static const struct rtw89_btc_fbtc_mreg rtw89_btc_8922d_mon_reg[] = { + RTW89_DEF_FBTC_MREG(REG_MAC, 4, 0xe300), + RTW89_DEF_FBTC_MREG(REG_MAC, 4, 0xe330), + RTW89_DEF_FBTC_MREG(REG_MAC, 4, 0xe334), + RTW89_DEF_FBTC_MREG(REG_MAC, 4, 0xe338), + RTW89_DEF_FBTC_MREG(REG_MAC, 4, 0xe344), + RTW89_DEF_FBTC_MREG(REG_MAC, 4, 0xe348), + RTW89_DEF_FBTC_MREG(REG_MAC, 4, 0xe34c), + RTW89_DEF_FBTC_MREG(REG_MAC, 4, 0xe350), + RTW89_DEF_FBTC_MREG(REG_MAC, 4, 0xe354), + RTW89_DEF_FBTC_MREG(REG_MAC, 4, 0xe35c), + RTW89_DEF_FBTC_MREG(REG_MAC, 4, 0xe370), + RTW89_DEF_FBTC_MREG(REG_MAC, 4, 0xe380), +}; + +static +void rtw8922d_btc_update_bt_cnt(struct rtw89_dev *rtwdev) +{ + /* Feature move to firmware */ +} + +static +void rtw8922d_btc_wl_s1_standby(struct rtw89_dev *rtwdev, bool state) +{ + /* Feature move to firmware */ +} + +static void rtw8922d_btc_set_wl_rx_gain(struct rtw89_dev *rtwdev, u32 level) +{ + /* Feature move to firmware */ +} + MODULE_FIRMWARE(RTW8922D_MODULE_FIRMWARE); MODULE_FIRMWARE(RTW8922DS_MODULE_FIRMWARE); MODULE_AUTHOR("Realtek Corporation"); From 0737a803bae462e69ef030dc9d050a29c1d6d265 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Mon, 30 Mar 2026 14:58:46 +0800 Subject: [PATCH 1097/1561] wifi: rtw89: 8922d: add chip_info and chip_ops struct Add remaining functions including calculate RX gain for power saving, channel frequency and RSSI from PPDU status, and WoWLAN declaration. Then fill chip_info and chip_ops tables, which RTL8922D has two variants RTL8922D and RTL8922DS supporting 4096 and 1024 QAM respectively. Other features, such as support of 2/5/6 GHz and up to 160 MHz bandwidth, for variants are the same, Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260330065847.48946-9-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/fw.h | 23 +- drivers/net/wireless/realtek/rtw89/reg.h | 17 + drivers/net/wireless/realtek/rtw89/rtw8922d.c | 344 ++++++++++++++++++ drivers/net/wireless/realtek/rtw89/rtw8922d.h | 3 + 4 files changed, 380 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/fw.h b/drivers/net/wireless/realtek/rtw89/fw.h index 4574a281d352..db252d45e498 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.h +++ b/drivers/net/wireless/realtek/rtw89/fw.h @@ -4298,13 +4298,22 @@ enum rtw89_fw_element_id { BIT(RTW89_FW_ELEMENT_ID_TXPWR_TRK) | \ BITS_OF_RTW89_TXPWR_FW_ELEMENTS_NO_6GHZ) -#define RTW89_BE_GEN_DEF_NEEDED_FW_ELEMENTS (BIT(RTW89_FW_ELEMENT_ID_BBMCU0) | \ - BIT(RTW89_FW_ELEMENT_ID_BB_REG) | \ - BIT(RTW89_FW_ELEMENT_ID_RADIO_A) | \ - BIT(RTW89_FW_ELEMENT_ID_RADIO_B) | \ - BIT(RTW89_FW_ELEMENT_ID_RF_NCTL) | \ - BIT(RTW89_FW_ELEMENT_ID_TXPWR_TRK) | \ - BITS_OF_RTW89_TXPWR_FW_ELEMENTS) +#define RTW89_BE_GEN_DEF_NEEDED_FW_ELEMENTS_BASE \ + (BIT(RTW89_FW_ELEMENT_ID_BB_REG) | \ + BIT(RTW89_FW_ELEMENT_ID_RADIO_A) | \ + BIT(RTW89_FW_ELEMENT_ID_RADIO_B) | \ + BIT(RTW89_FW_ELEMENT_ID_RF_NCTL) | \ + BIT(RTW89_FW_ELEMENT_ID_TXPWR_TRK) | \ + BITS_OF_RTW89_TXPWR_FW_ELEMENTS) + +#define RTW89_BE_GEN_DEF_NEEDED_FW_ELEMENTS \ + (RTW89_BE_GEN_DEF_NEEDED_FW_ELEMENTS_BASE | \ + BIT(RTW89_FW_ELEMENT_ID_BBMCU0)) + +#define RTW89_BE_GEN_DEF_NEEDED_FW_ELEMENTS_V1 \ + (RTW89_BE_GEN_DEF_NEEDED_FW_ELEMENTS_BASE | \ + BIT(RTW89_FW_ELEMENT_ID_AFE_PWR_SEQ) | \ + BIT(RTW89_FW_ELEMENT_ID_TX_COMP)) struct __rtw89_fw_txpwr_element { u8 rsvd0; diff --git a/drivers/net/wireless/realtek/rtw89/reg.h b/drivers/net/wireless/realtek/rtw89/reg.h index e5e689f1bfa3..42ffe83931a3 100644 --- a/drivers/net/wireless/realtek/rtw89/reg.h +++ b/drivers/net/wireless/realtek/rtw89/reg.h @@ -4291,6 +4291,20 @@ #define B_BE_VERIFY_ENV_MASK GENMASK(9, 8) #define B_BE_HW_ID_MASK GENMASK(7, 0) +#define R_BE_SCOREBOARD_0 0x0110 +#define B_BE_SB0_TOGGLE BIT(31) +#define B_BE_SB0_WL_DATA_LINE_MASK GENMASK(30, 0) + +#define R_BE_SCOREBOARD_0_BT_DATA 0x0114 +#define B_BE_SB0_BT_DATA_LINE_MASK GENMASK(30, 0) + +#define R_BE_SCOREBOARD_1 0x0118 +#define B_BE_SB1_TOGGLE BIT(31) +#define B_BE_SB1_WL_DATA_LINE_MASK GENMASK(30, 0) + +#define R_BE_SCOREBOARD_1_BT_DATA 0x011C +#define B_BE_SB1_BT_DATA_LINE_MASK GENMASK(30, 0) + #define R_BE_HALT_H2C_CTRL 0x0160 #define B_BE_HALT_H2C_TRIGGER BIT(0) @@ -10656,6 +10670,9 @@ #define B_RX_LDPC08_BE4 GENMASK(29, 24) #define R_RX_LDPC01_BE4 0x26840 #define B_RX_LDPC09_BE4 GENMASK(5, 0) +#define R_BSS_CLR_MAP_BE4 0x26914 +#define R_BSS_CLR_VLD_BE4 0x26920 +#define B_BSS_CLR_VLD_BE4 BIT(2) #define R_SW_SI_DATA_BE4 0x2CF4C #define B_SW_SI_READ_DATA_BE4 GENMASK(19, 0) diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d.c b/drivers/net/wireless/realtek/rtw89/rtw8922d.c index a9a7ffb5fb58..e3b77cd23514 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922d.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d.c @@ -1447,6 +1447,20 @@ static void rtw8922d_set_rx_gain_normal(struct rtw89_dev *rtwdev, rtw8922d_set_rx_gain_normal_ofdm(rtwdev, chan, path, phy_idx); } +static void rtw8922d_calc_rx_gain_normal(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_rf_path path, + enum rtw89_phy_idx phy_idx, + struct rtw89_phy_calc_efuse_gain *calc) +{ + rtw8922d_calc_rx_gain_normal_ofdm(rtwdev, chan, path, phy_idx, calc); + + if (chan->band_type != RTW89_BAND_2G) + return; + + rtw8922d_calc_rx_gain_normal_cck(rtwdev, chan, path, phy_idx, calc); +} + static void rtw8922d_set_cck_parameters(struct rtw89_dev *rtwdev, const struct rtw89_chan *chan, enum rtw89_phy_idx phy_idx) @@ -2742,6 +2756,336 @@ static void rtw8922d_btc_set_wl_rx_gain(struct rtw89_dev *rtwdev, u32 level) /* Feature move to firmware */ } +static void rtw8922d_fill_freq_with_ppdu(struct rtw89_dev *rtwdev, + struct rtw89_rx_phy_ppdu *phy_ppdu, + struct ieee80211_rx_status *status) +{ + u8 chan_idx = phy_ppdu->chan_idx; + enum nl80211_band band; + u8 ch; + + if (chan_idx == 0) + return; + + rtw89_decode_chan_idx(rtwdev, chan_idx, &ch, &band); + status->freq = ieee80211_channel_to_frequency(ch, band); + status->band = band; +} + +static void rtw8922d_query_ppdu(struct rtw89_dev *rtwdev, + struct rtw89_rx_phy_ppdu *phy_ppdu, + struct ieee80211_rx_status *status) +{ + u8 path; + u8 *rx_power = phy_ppdu->rssi; + + if (!status->signal) + status->signal = RTW89_RSSI_RAW_TO_DBM(max(rx_power[RF_PATH_A], + rx_power[RF_PATH_B])); + + for (path = 0; path < rtwdev->chip->rf_path_num; path++) { + status->chains |= BIT(path); + status->chain_signal[path] = RTW89_RSSI_RAW_TO_DBM(rx_power[path]); + } + if (phy_ppdu->valid) + rtw8922d_fill_freq_with_ppdu(rtwdev, phy_ppdu, status); +} + +static void rtw8922d_convert_rpl_to_rssi(struct rtw89_dev *rtwdev, + struct rtw89_rx_phy_ppdu *phy_ppdu) +{ + /* Mapping to BW: 5, 10, 20, 40, 80, 160, 80_80 */ + static const u8 bw_compensate[] = {0, 0, 0, 6, 12, 18, 0}; + u8 *rssi = phy_ppdu->rssi; + u8 compensate = 0; + u8 i; + + if (phy_ppdu->bw_idx < ARRAY_SIZE(bw_compensate)) + compensate = bw_compensate[phy_ppdu->bw_idx]; + + for (i = 0; i < RF_PATH_NUM_8922D; i++) { + if (!(phy_ppdu->rx_path_en & BIT(i))) { + rssi[i] = 0; + phy_ppdu->rpl_path[i] = 0; + phy_ppdu->rpl_fd[i] = 0; + } + + if (phy_ppdu->ie != RTW89_CCK_PKT && rssi[i]) + rssi[i] += compensate; + + phy_ppdu->rpl_path[i] = rssi[i]; + } +} + +static void rtw8922d_phy_rpt_to_rssi(struct rtw89_dev *rtwdev, + struct rtw89_rx_desc_info *desc_info, + struct ieee80211_rx_status *rx_status) +{ + if (desc_info->rssi <= 0x1 || (desc_info->rssi >> 2) > MAX_RSSI) + return; + + rx_status->signal = (desc_info->rssi >> 2) - MAX_RSSI; +} + +static int rtw8922d_mac_enable_bb_rf(struct rtw89_dev *rtwdev) +{ + return 0; +} + +static int rtw8922d_mac_disable_bb_rf(struct rtw89_dev *rtwdev) +{ + return 0; +} + +static const struct rtw89_chanctx_listener rtw8922d_chanctx_listener = { + .callbacks[RTW89_CHANCTX_CALLBACK_TAS] = rtw89_tas_chanctx_cb, +}; + +#ifdef CONFIG_PM +static const struct wiphy_wowlan_support rtw_wowlan_stub_8922d = { + .flags = WIPHY_WOWLAN_MAGIC_PKT | WIPHY_WOWLAN_DISCONNECT | + WIPHY_WOWLAN_NET_DETECT, + .n_patterns = RTW89_MAX_PATTERN_NUM, + .pattern_max_len = RTW89_MAX_PATTERN_SIZE, + .pattern_min_len = 1, + .max_nd_match_sets = RTW89_SCANOFLD_MAX_SSID, +}; +#endif + +static const struct rtw89_chip_ops rtw8922d_chip_ops = { + .enable_bb_rf = rtw8922d_mac_enable_bb_rf, + .disable_bb_rf = rtw8922d_mac_disable_bb_rf, + .bb_preinit = rtw8922d_bb_preinit, + .bb_postinit = rtw8922d_bb_postinit, + .bb_reset = rtw8922d_bb_reset, + .bb_sethw = rtw8922d_bb_sethw, + .read_rf = rtw89_phy_read_rf_v3, + .write_rf = rtw89_phy_write_rf_v3, + .set_channel = rtw8922d_set_channel, + .set_channel_help = rtw8922d_set_channel_help, + .read_efuse = rtw8922d_read_efuse, + .read_phycap = rtw8922d_read_phycap, + .fem_setup = NULL, + .rfe_gpio = NULL, + .rfk_hw_init = rtw8922d_rfk_hw_init, + .rfk_init = rtw8922d_rfk_init, + .rfk_init_late = rtw8922d_rfk_init_late, + .rfk_channel = rtw8922d_rfk_channel, + .rfk_band_changed = rtw8922d_rfk_band_changed, + .rfk_scan = rtw8922d_rfk_scan, + .rfk_track = rtw8922d_rfk_track, + .power_trim = rtw8922d_power_trim, + .set_txpwr = rtw8922d_set_txpwr, + .set_txpwr_ctrl = rtw8922d_set_txpwr_ctrl, + .init_txpwr_unit = NULL, + .get_thermal = rtw8922d_get_thermal, + .chan_to_rf18_val = rtw8922d_chan_to_rf18_val, + .ctrl_btg_bt_rx = rtw8922d_set_gbt_bt_rx_sel, + .query_ppdu = rtw8922d_query_ppdu, + .convert_rpl_to_rssi = rtw8922d_convert_rpl_to_rssi, + .phy_rpt_to_rssi = rtw8922d_phy_rpt_to_rssi, + .ctrl_nbtg_bt_tx = rtw8922d_ctrl_nbtg_bt_tx, + .cfg_txrx_path = rtw8922d_bb_cfg_txrx_path, + .set_txpwr_ul_tb_offset = NULL, + .digital_pwr_comp = rtw8922d_digital_pwr_comp, + .calc_rx_gain_normal = rtw8922d_calc_rx_gain_normal, + .pwr_on_func = rtw8922d_pwr_on_func, + .pwr_off_func = rtw8922d_pwr_off_func, + .query_rxdesc = rtw89_core_query_rxdesc_v3, + .fill_txdesc = rtw89_core_fill_txdesc_v3, + .fill_txdesc_fwcmd = rtw89_core_fill_txdesc_fwcmd_v2, + .get_ch_dma = {rtw89_core_get_ch_dma_v1, + NULL, + NULL,}, + .cfg_ctrl_path = rtw89_mac_cfg_ctrl_path_v2, + .mac_cfg_gnt = rtw89_mac_cfg_gnt_v3, + .stop_sch_tx = rtw89_mac_stop_sch_tx_v2, + .resume_sch_tx = rtw89_mac_resume_sch_tx_v2, + .h2c_dctl_sec_cam = rtw89_fw_h2c_dctl_sec_cam_v3, + .h2c_default_cmac_tbl = rtw89_fw_h2c_default_cmac_tbl_be, + .h2c_assoc_cmac_tbl = rtw89_fw_h2c_assoc_cmac_tbl_be, + .h2c_ampdu_cmac_tbl = rtw89_fw_h2c_ampdu_cmac_tbl_be, + .h2c_txtime_cmac_tbl = rtw89_fw_h2c_txtime_cmac_tbl_be, + .h2c_punctured_cmac_tbl = rtw89_fw_h2c_punctured_cmac_tbl_be, + .h2c_default_dmac_tbl = rtw89_fw_h2c_default_dmac_tbl_v3, + .h2c_update_beacon = rtw89_fw_h2c_update_beacon_be, + .h2c_ba_cam = rtw89_fw_h2c_ba_cam_v1, + .h2c_wow_cam_update = rtw89_fw_h2c_wow_cam_update_v1, + + .btc_set_rfe = rtw8922d_btc_set_rfe, + .btc_init_cfg = rtw8922d_btc_init_cfg, + .btc_set_wl_pri = NULL, + .btc_set_wl_txpwr_ctrl = rtw8922d_btc_set_wl_txpwr_ctrl, + .btc_get_bt_rssi = rtw8922d_btc_get_bt_rssi, + .btc_update_bt_cnt = rtw8922d_btc_update_bt_cnt, + .btc_wl_s1_standby = rtw8922d_btc_wl_s1_standby, + .btc_set_wl_rx_gain = rtw8922d_btc_set_wl_rx_gain, + .btc_set_policy = rtw89_btc_set_policy_v1, +}; + +const struct rtw89_chip_info rtw8922d_chip_info = { + .chip_id = RTL8922D, + .chip_gen = RTW89_CHIP_BE, + .ops = &rtw8922d_chip_ops, + .mac_def = &rtw89_mac_gen_be, + .phy_def = &rtw89_phy_gen_be_v1, + .fw_def = { + .fw_basename = RTW8922D_FW_BASENAME, + .fw_format_max = RTW8922D_FW_FORMAT_MAX, + .fw_b_aid = RTL8922D_AID7102, + }, + .try_ce_fw = false, + .bbmcu_nr = 0, + .needed_fw_elms = RTW89_BE_GEN_DEF_NEEDED_FW_ELEMENTS_V1, + .fw_blacklist = &rtw89_fw_blacklist_default, + .fifo_size = 393216, + .small_fifo_size = false, + .dle_scc_rsvd_size = 0, + .max_amsdu_limit = 11000, + .max_vht_mpdu_cap = IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991, + .max_eht_mpdu_cap = IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_7991, + .max_tx_agg_num = 128, + .max_rx_agg_num = 256, + .dis_2g_40m_ul_ofdma = false, + .rsvd_ple_ofst = 0x5f800, + .hfc_param_ini = {rtw8922d_hfc_param_ini_pcie, NULL, NULL}, + .dle_mem = {rtw8922d_dle_mem_pcie, NULL, NULL, NULL}, + .wde_qempty_acq_grpnum = 8, + .wde_qempty_mgq_grpsel = 8, + .rf_base_addr = {0x3e000, 0x3f000}, + .thermal_th = {0xac, 0xad}, + .pwr_on_seq = NULL, + .pwr_off_seq = NULL, + .bb_table = NULL, + .bb_gain_table = NULL, + .rf_table = {}, + .nctl_table = NULL, + .nctl_post_table = &rtw8922d_nctl_post_defs_tbl, + .dflt_parms = NULL, /* load parm from fw */ + .rfe_parms_conf = NULL, /* load parm from fw */ + .chanctx_listener = &rtw8922d_chanctx_listener, + .txpwr_factor_bb = 3, + .txpwr_factor_rf = 2, + .txpwr_factor_mac = 1, + .dig_table = NULL, + .dig_regs = &rtw8922d_dig_regs, + .tssi_dbw_table = NULL, + .support_macid_num = 64, + .support_link_num = 2, + .support_chanctx_num = 2, + .support_rnr = true, + .support_bands = BIT(NL80211_BAND_2GHZ) | + BIT(NL80211_BAND_5GHZ) | + BIT(NL80211_BAND_6GHZ), + .support_bandwidths = BIT(NL80211_CHAN_WIDTH_20) | + BIT(NL80211_CHAN_WIDTH_40) | + BIT(NL80211_CHAN_WIDTH_80) | + BIT(NL80211_CHAN_WIDTH_160), + .support_unii4 = true, + .support_ant_gain = false, + .support_tas = false, + .support_sar_by_ant = true, + .support_noise = false, + .ul_tb_waveform_ctrl = false, + .ul_tb_pwr_diff = false, + .rx_freq_frome_ie = false, + .hw_sec_hdr = true, + .hw_mgmt_tx_encrypt = true, + .hw_tkip_crypto = true, + .hw_mlo_bmc_crypto = true, + .rf_path_num = 2, + .tx_nss = 2, + .rx_nss = 2, + .acam_num = 128, + .bcam_num = 16, + .scam_num = 32, + .bacam_num = 24, + .bacam_dynamic_num = 8, + .bacam_ver = RTW89_BACAM_V1, + .addrcam_ver = 1, + .ppdu_max_usr = 16, + .sec_ctrl_efuse_size = 4, + .physical_efuse_size = 0x1300, + .logical_efuse_size = 0x70000, + .limit_efuse_size = 0x40000, + .dav_phy_efuse_size = 0, + .dav_log_efuse_size = 0, + .efuse_blocks = rtw8922d_efuse_blocks, + .phycap_addr = 0x1700, + .phycap_size = 0x60, + .para_ver = 0x3ff, + .wlcx_desired = 0x09150000, + .scbd = 0x1, + .mailbox = 0x1, + + .afh_guard_ch = 6, + .wl_rssi_thres = rtw89_btc_8922d_wl_rssi_thres, + .bt_rssi_thres = rtw89_btc_8922d_bt_rssi_thres, + .rssi_tol = 2, + .mon_reg_num = ARRAY_SIZE(rtw89_btc_8922d_mon_reg), + .mon_reg = rtw89_btc_8922d_mon_reg, + .rf_para_ulink_v9 = rtw89_btc_8922d_rf_ul_v9, + .rf_para_dlink_v9 = rtw89_btc_8922d_rf_dl_v9, + .rf_para_ulink_num_v9 = ARRAY_SIZE(rtw89_btc_8922d_rf_ul_v9), + .rf_para_dlink_num_v9 = ARRAY_SIZE(rtw89_btc_8922d_rf_dl_v9), + .ps_mode_supported = BIT(RTW89_PS_MODE_RFOFF) | + BIT(RTW89_PS_MODE_CLK_GATED) | + BIT(RTW89_PS_MODE_PWR_GATED), + .low_power_hci_modes = 0, + .h2c_cctl_func_id = H2C_FUNC_MAC_CCTLINFO_UD_G7, + .hci_func_en_addr = R_BE_HCI_FUNC_EN, + .h2c_desc_size = sizeof(struct rtw89_rxdesc_short_v3), + .txwd_body_size = sizeof(struct rtw89_txwd_body_v2), + .txwd_info_size = sizeof(struct rtw89_txwd_info_v2), + .h2c_ctrl_reg = R_BE_H2CREG_CTRL, + .h2c_counter_reg = {R_BE_UDM1 + 1, B_BE_UDM1_HALMAC_H2C_DEQ_CNT_MASK >> 8}, + .h2c_regs = rtw8922d_h2c_regs, + .c2h_ctrl_reg = R_BE_C2HREG_CTRL, + .c2h_counter_reg = {R_BE_UDM1 + 1, B_BE_UDM1_HALMAC_C2H_ENQ_CNT_MASK >> 8}, + .c2h_regs = rtw8922d_c2h_regs, + .page_regs = &rtw8922d_page_regs, + .wow_reason_reg = rtw8922d_wow_wakeup_regs, + .cfo_src_fd = true, + .cfo_hw_comp = true, + .dcfo_comp = NULL, + .dcfo_comp_sft = 0, + .nhm_report = NULL, + .nhm_th = NULL, + .imr_info = NULL, + .imr_dmac_table = &rtw8922d_imr_dmac_table, + .imr_cmac_table = &rtw8922d_imr_cmac_table, + .rrsr_cfgs = &rtw8922d_rrsr_cfgs, + .bss_clr_vld = {R_BSS_CLR_VLD_BE4, B_BSS_CLR_VLD_BE4}, + .bss_clr_map_reg = R_BSS_CLR_MAP_BE4, + .rfkill_init = &rtw8922d_rfkill_regs, + .rfkill_get = {R_BE_GPIO_EXT_CTRL, B_BE_GPIO_IN_9}, + .btc_sb = {{{R_BE_SCOREBOARD_0, R_BE_SCOREBOARD_0_BT_DATA}, + {R_BE_SCOREBOARD_1, R_BE_SCOREBOARD_1_BT_DATA}}}, + .dma_ch_mask = BIT(RTW89_DMA_ACH1) | BIT(RTW89_DMA_ACH3) | + BIT(RTW89_DMA_ACH5) | BIT(RTW89_DMA_ACH7) | + BIT(RTW89_DMA_B0HI) | BIT(RTW89_DMA_B1HI), + .edcca_regs = &rtw8922d_edcca_regs, +#ifdef CONFIG_PM + .wowlan_stub = &rtw_wowlan_stub_8922d, +#endif + .xtal_info = NULL, + .default_quirks = BIT(RTW89_QUIRK_THERMAL_PROT_120C), +}; +EXPORT_SYMBOL(rtw8922d_chip_info); + +static const struct rtw89_fw_def rtw8922de_vs_fw_def = { + .fw_basename = RTW8922DS_FW_BASENAME, + .fw_format_max = RTW8922DS_FW_FORMAT_MAX, + .fw_b_aid = RTL8922D_AID7060, +}; + +const struct rtw89_chip_variant rtw8922de_vs_variant = { + .no_mcs_12_13 = true, + .fw_min_ver_code = RTW89_FW_VER_CODE(0, 0, 0, 0), + .fw_def_override = &rtw8922de_vs_fw_def, +}; +EXPORT_SYMBOL(rtw8922de_vs_variant); + MODULE_FIRMWARE(RTW8922D_MODULE_FIRMWARE); MODULE_FIRMWARE(RTW8922DS_MODULE_FIRMWARE); MODULE_AUTHOR("Realtek Corporation"); diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922d.h b/drivers/net/wireless/realtek/rtw89/rtw8922d.h index a3b98ad6636c..22a7d1cc244f 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922d.h +++ b/drivers/net/wireless/realtek/rtw89/rtw8922d.h @@ -77,4 +77,7 @@ struct rtw8922d_efuse { struct rtw8922d_rx_gain_6g rx_gain_6g_b_2; } __packed; +extern const struct rtw89_chip_info rtw8922d_chip_info; +extern const struct rtw89_chip_variant rtw8922de_vs_variant; + #endif From a16ca7d91e4eb37a4faf0a2d2b44603489021725 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Mon, 30 Mar 2026 14:58:47 +0800 Subject: [PATCH 1098/1561] wifi: rtw89: 8922d: add PCI ID of RTL8922DE and RTL8922DE-VS Add PCI ID tables with RTL8922DE whose ID is 10EC:895D, and with RTL8922DE-VS whose ID are 10EC:892D and 10EC:882D. Also, add pci_info struct to describe the hardware capabilities and registers accordingly. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260330065847.48946-10-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/pci.h | 3 + .../net/wireless/realtek/rtw89/rtw8922de.c | 119 ++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 drivers/net/wireless/realtek/rtw89/rtw8922de.c diff --git a/drivers/net/wireless/realtek/rtw89/pci.h b/drivers/net/wireless/realtek/rtw89/pci.h index c7cd34e99349..e7da37b9da7d 100644 --- a/drivers/net/wireless/realtek/rtw89/pci.h +++ b/drivers/net/wireless/realtek/rtw89/pci.h @@ -1105,6 +1105,9 @@ B_BE_CH6_BUSY | B_BE_CH7_BUSY | B_BE_CH8_BUSY | \ B_BE_CH9_BUSY | B_BE_CH10_BUSY | B_BE_CH11_BUSY | \ B_BE_CH12_BUSY | B_BE_CH13_BUSY | B_BE_CH14_BUSY) +#define DMA_BUSY1_CHECK_BE_V1 (B_BE_CH0_BUSY | B_BE_CH2_BUSY | B_BE_CH4_BUSY | \ + B_BE_CH6_BUSY | B_BE_CH8_BUSY | B_BE_CH10_BUSY | \ + B_BE_CH12_BUSY) #define R_BE_HAXI_EXP_CTRL_V1 0xB020 #define B_BE_R_NO_SEC_ACCESS BIT(31) diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922de.c b/drivers/net/wireless/realtek/rtw89/rtw8922de.c new file mode 100644 index 000000000000..f144e7fc76de --- /dev/null +++ b/drivers/net/wireless/realtek/rtw89/rtw8922de.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright(c) 2026 Realtek Corporation + */ + +#include +#include + +#include "pci.h" +#include "reg.h" +#include "rtw8922d.h" + +static const struct rtw89_pci_info rtw8922d_pci_info = { + .gen_def = &rtw89_pci_gen_be, + .isr_def = &rtw89_pci_isr_be_v1, + .txbd_trunc_mode = MAC_AX_BD_TRUNC, + .rxbd_trunc_mode = MAC_AX_BD_TRUNC, + .rxbd_mode = MAC_AX_RXBD_PKT, + .tag_mode = MAC_AX_TAG_MULTI, + .tx_burst = MAC_AX_TX_BURST_V1_256B, + .rx_burst = MAC_AX_RX_BURST_V1_128B, + .wd_dma_idle_intvl = MAC_AX_WD_DMA_INTVL_256NS, + .wd_dma_act_intvl = MAC_AX_WD_DMA_INTVL_256NS, + .multi_tag_num = MAC_AX_TAG_NUM_8, + .lbc_en = MAC_AX_PCIE_ENABLE, + .lbc_tmr = MAC_AX_LBC_TMR_2MS, + .autok_en = MAC_AX_PCIE_DISABLE, + .io_rcy_en = MAC_AX_PCIE_ENABLE, + .io_rcy_tmr = MAC_AX_IO_RCY_ANA_TMR_DEF, + .rx_ring_eq_is_full = true, + .check_rx_tag = true, + .no_rxbd_fs = true, + .group_bd_addr = true, + .rpp_fmt_size = sizeof(struct rtw89_pci_rpp_fmt_v1), + + .init_cfg_reg = R_BE_HAXI_INIT_CFG1, + .txhci_en_bit = B_BE_TXDMA_EN, + .rxhci_en_bit = B_BE_RXDMA_EN, + .rxbd_mode_bit = B_BE_RXQ_RXBD_MODE_MASK, + .exp_ctrl_reg = R_BE_HAXI_EXP_CTRL_V1, + .max_tag_num_mask = B_BE_MAX_TAG_NUM_MASK, + .rxbd_rwptr_clr_reg = R_BE_RXBD_RWPTR_CLR1_V1, + .txbd_rwptr_clr2_reg = R_BE_TXBD_RWPTR_CLR1, + .dma_io_stop = {R_BE_HAXI_INIT_CFG1, B_BE_STOP_AXI_MST}, + .dma_stop1 = {R_BE_HAXI_DMA_STOP1, B_BE_TX_STOP1_MASK_V1}, + .dma_stop2 = {0}, + .dma_busy1 = {R_BE_HAXI_DMA_BUSY1, DMA_BUSY1_CHECK_BE_V1}, + .dma_busy2_reg = 0, + .dma_busy3_reg = R_BE_HAXI_DMA_BUSY1, + + .rpwm_addr = R_BE_PCIE_HRPWM, + .cpwm_addr = R_BE_PCIE_CRPWM, + .mit_addr = R_BE_PCIE_MIT_CH_EN, + .wp_sel_addr = R_BE_WP_ADDR_H_SEL0_3_V1, + .tx_dma_ch_mask = BIT(RTW89_TXCH_ACH1) | BIT(RTW89_TXCH_ACH3) | + BIT(RTW89_TXCH_ACH5) | BIT(RTW89_TXCH_ACH7) | + BIT(RTW89_TXCH_CH9) | BIT(RTW89_TXCH_CH11), + .bd_idx_addr_low_power = NULL, + .dma_addr_set = &rtw89_pci_ch_dma_addr_set_be_v1, + .bd_ram_table = NULL, + + .ltr_set = rtw89_pci_ltr_set_v2, + .fill_txaddr_info = rtw89_pci_fill_txaddr_info_v1, + .parse_rpp = rtw89_pci_parse_rpp_v1, + .config_intr_mask = rtw89_pci_config_intr_mask_v3, + .enable_intr = rtw89_pci_enable_intr_v3, + .disable_intr = rtw89_pci_disable_intr_v3, + .recognize_intrs = rtw89_pci_recognize_intrs_v3, + + .ssid_quirks = NULL, +}; + +static const struct rtw89_driver_info rtw89_8922de_vs_info = { + .chip = &rtw8922d_chip_info, + .variant = &rtw8922de_vs_variant, + .quirks = NULL, + .bus = { + .pci = &rtw8922d_pci_info, + }, +}; + +static const struct rtw89_driver_info rtw89_8922de_info = { + .chip = &rtw8922d_chip_info, + .variant = NULL, + .quirks = NULL, + .bus = { + .pci = &rtw8922d_pci_info, + }, +}; + +static const struct pci_device_id rtw89_8922de_id_table[] = { + { + PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x892D), + .driver_data = (kernel_ulong_t)&rtw89_8922de_vs_info, + }, + { + PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x882D), + .driver_data = (kernel_ulong_t)&rtw89_8922de_vs_info, + }, + { + PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x895D), + .driver_data = (kernel_ulong_t)&rtw89_8922de_info, + }, + {}, +}; +MODULE_DEVICE_TABLE(pci, rtw89_8922de_id_table); + +static struct pci_driver rtw89_8922de_driver = { + .name = "rtw89_8922de", + .id_table = rtw89_8922de_id_table, + .probe = rtw89_pci_probe, + .remove = rtw89_pci_remove, + .driver.pm = &rtw89_pm_ops_be, + .err_handler = &rtw89_pci_err_handler, +}; +module_pci_driver(rtw89_8922de_driver); + +MODULE_AUTHOR("Realtek Corporation"); +MODULE_DESCRIPTION("Realtek 802.11be wireless 8922DE/8922DE-VS driver"); +MODULE_LICENSE("Dual BSD/GPL"); From 4e4fc2149b0a983670fd99bbd549012839bda79e Mon Sep 17 00:00:00 2001 From: Zenm Chen Date: Mon, 30 Mar 2026 17:34:39 +0800 Subject: [PATCH 1099/1561] wifi: rtw89: Add support for Elecom WDC-XE2402TU3-B Add the ID 056e:4024 to the table to support an additional RTL8832CU adapter: Elecom WDC-XE2402TU3-B. Link: https://github.com/morrownr/rtw89/commit/55c059e2bd49acd5cf93edbc8eda7b9e042f4efd Signed-off-by: Zenm Chen Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260330093440.3615-1-zenmchen@gmail.com --- drivers/net/wireless/realtek/rtw89/rtw8852cu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852cu.c b/drivers/net/wireless/realtek/rtw89/rtw8852cu.c index de3f8358bbe7..092d2812a4d5 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852cu.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852cu.c @@ -41,6 +41,8 @@ static const struct rtw89_driver_info rtw89_8852cu_info = { static const struct usb_device_id rtw_8852cu_id_table[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x0411, 0x03a6, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&rtw89_8852cu_info }, + { USB_DEVICE_AND_INTERFACE_INFO(0x056e, 0x4024, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&rtw89_8852cu_info }, { USB_DEVICE_AND_INTERFACE_INFO(0x0bda, 0xc832, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&rtw89_8852cu_info }, { USB_DEVICE_AND_INTERFACE_INFO(0x0bda, 0xc85a, 0xff, 0xff, 0xff), From 80119a77e5b03747b8886505df1b3cb26f49168d Mon Sep 17 00:00:00 2001 From: Lucid Duck Date: Wed, 1 Apr 2026 22:22:16 -0700 Subject: [PATCH 1100/1561] wifi: rtw89: usb: fix TX flow control by tracking in-flight URBs rtw89_usb_ops_check_and_reclaim_tx_resource() returns a hardcoded placeholder value (42) instead of actual TX resource availability. This violates mac80211's flow control contract, preventing backpressure and causing uncontrolled URB accumulation under sustained TX load. Fix by adding per-channel atomic counters (tx_inflight[]) that track in-flight URBs. Increment before usb_submit_urb() with rollback on failure, decrement in the completion callback, and return the remaining capacity to mac80211. The firmware command channel (CH12) always returns 1 since it has its own flow control. The pre-increment pattern prevents a race where USB core completes the URB on another CPU before the submitting code increments the counter. 128 URBs per channel provides headroom for RTL8832CU at 160 MHz bandwidth. Tested on RTL8852AU (USB3 80 MHz) where 64 and 128 showed equivalent throughput, and on RTL8832AU where 128 sustained full throughput under 8-stream parallel load. Tested on D-Link DWA-X1850 (RTL8832AU), kernel 6.19.8, Fedora 43: Unpatched -> Patched (128 URBs) USB3 5GHz UL: 844 -> 837 Mbps (no regression) USB3 5GHz retx: 3 -> 0 USB3 2.4GHz UL: 162 -> 164 Mbps (no regression) 4-stream UL: 858 -> 826 Mbps (within variance) 8-stream UL: 872 -> 826 Mbps (within variance) UDP flood: 0% loss (690K datagrams) 60-second soak: 855 Mbps, 0 retransmits Reported-by: morrownr Signed-off-by: Lucid Duck Acked-by: Ping-Ke Shih Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20260402052216.207858-1-lucid_duck@justthetip.ca --- drivers/net/wireless/realtek/rtw89/usb.c | 20 ++++++++++++++++++-- drivers/net/wireless/realtek/rtw89/usb.h | 3 +++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/usb.c b/drivers/net/wireless/realtek/rtw89/usb.c index 581b8c05f930..767a95f759b1 100644 --- a/drivers/net/wireless/realtek/rtw89/usb.c +++ b/drivers/net/wireless/realtek/rtw89/usb.c @@ -161,16 +161,24 @@ static u32 rtw89_usb_ops_check_and_reclaim_tx_resource(struct rtw89_dev *rtwdev, u8 txch) { + struct rtw89_usb *rtwusb = rtw89_usb_priv(rtwdev); + int inflight; + if (txch == RTW89_TXCH_CH12) return 1; - return 42; /* TODO some kind of calculation? */ + inflight = atomic_read(&rtwusb->tx_inflight[txch]); + if (inflight >= RTW89_USB_MAX_TX_URBS_PER_CH) + return 0; + + return RTW89_USB_MAX_TX_URBS_PER_CH - inflight; } static void rtw89_usb_write_port_complete(struct urb *urb) { struct rtw89_usb_tx_ctrl_block *txcb = urb->context; struct rtw89_dev *rtwdev = txcb->rtwdev; + struct rtw89_usb *rtwusb = rtw89_usb_priv(rtwdev); struct ieee80211_tx_info *info; struct rtw89_txwd_body *txdesc; struct sk_buff *skb; @@ -229,6 +237,8 @@ static void rtw89_usb_write_port_complete(struct urb *urb) break; } + atomic_dec(&rtwusb->tx_inflight[txcb->txch]); + kfree(txcb); } @@ -306,9 +316,13 @@ static void rtw89_usb_ops_tx_kick_off(struct rtw89_dev *rtwdev, u8 txch) skb_queue_tail(&txcb->tx_ack_queue, skb); + atomic_inc(&rtwusb->tx_inflight[txch]); + ret = rtw89_usb_write_port(rtwdev, txch, skb->data, skb->len, txcb); if (ret) { + atomic_dec(&rtwusb->tx_inflight[txch]); + if (ret != -ENODEV) rtw89_err(rtwdev, "write port txch %d failed: %d\n", txch, ret); @@ -684,8 +698,10 @@ static void rtw89_usb_init_tx(struct rtw89_dev *rtwdev) struct rtw89_usb *rtwusb = rtw89_usb_priv(rtwdev); int i; - for (i = 0; i < ARRAY_SIZE(rtwusb->tx_queue); i++) + for (i = 0; i < ARRAY_SIZE(rtwusb->tx_queue); i++) { skb_queue_head_init(&rtwusb->tx_queue[i]); + atomic_set(&rtwusb->tx_inflight[i], 0); + } } static void rtw89_usb_deinit_tx(struct rtw89_dev *rtwdev) diff --git a/drivers/net/wireless/realtek/rtw89/usb.h b/drivers/net/wireless/realtek/rtw89/usb.h index 3d17e514e346..507f61f58ed9 100644 --- a/drivers/net/wireless/realtek/rtw89/usb.h +++ b/drivers/net/wireless/realtek/rtw89/usb.h @@ -31,6 +31,8 @@ #define R_AX_RXAGG_0 0x8900 #define B_AX_RXAGG_0_BUF_SZ_4K GENMASK(7, 0) +#define RTW89_USB_MAX_TX_URBS_PER_CH 128 + struct rtw89_usb_info { u32 usb_host_request_2; u32 usb_wlan0_1; @@ -75,6 +77,7 @@ struct rtw89_usb { struct usb_anchor tx_submitted; struct sk_buff_head tx_queue[RTW89_TXCH_NUM]; + atomic_t tx_inflight[RTW89_TXCH_NUM]; }; static inline struct rtw89_usb *rtw89_usb_priv(struct rtw89_dev *rtwdev) From 4799ff80c68a76e408b8dadd4aeaa5311962033c Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Mon, 30 Mar 2026 18:29:25 +0300 Subject: [PATCH 1101/1561] selftests: forwarding: extend ethtool_std_stats_get with pause statistics Even though pause frame statistics are not exported through the same ethtool command, there is no point in adding another helper just for them. Extent the ethtool_std_stats_get() function so that we are able to interrogate using the same helper all the standard statistics. And since we are touching the function, convert the initial ethtool call as well to the jq --arg form in order to be easier to read. Signed-off-by: Ioana Ciornei Reviewed-by: Petr Machata Link: https://patch.msgid.link/20260330152933.2195885-2-ioana.ciornei@nxp.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/forwarding/lib.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index a9034f0bb58b..3009ce00c5dc 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -831,8 +831,14 @@ ethtool_std_stats_get() local name=$1; shift local src=$1; shift - ethtool --json -S $dev --groups $grp -- --src $src | \ - jq '.[]."'"$grp"'"."'$name'"' + if [[ "$grp" == "pause" ]]; then + ethtool -I --json -a "$dev" --src "$src" | \ + jq --arg name "$name" '.[].statistics[$name]' + return + fi + + ethtool --json -S "$dev" --groups "$grp" -- --src "$src" | \ + jq --arg grp "$grp" --arg name "$name" '.[][$grp][$name]' } qdisc_stats_get() From 2de16ebe78f0509d5fa0b41ed6e841603058b6a6 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Mon, 30 Mar 2026 18:29:26 +0300 Subject: [PATCH 1102/1561] selftests: net: extend lib.sh to parse drivers/net/net.config Extend lib.sh so that it's able to parse driver/net/net.config and environment variables such as NETIF, REMOTE_TYPE, LOCAL_V4 etc described in drivers/net/README.rst. In order to make the transition towards running with a single local interface smoother for the bash networking driver tests, beside sourcing the net.config file also translate the new env variables into the old style based on the NETIFS array. Since the NETIFS array only holds the network interface names, also add a new array - TARGETS - which keeps track of the target on which a specific interfaces resides - local, netns or accesible through an ssh command. For example, a net.config which looks like below: NETIF=eth0 LOCAL_V4=192.168.1.1 REMOTE_V4=192.168.1.2 REMOTE_TYPE=ssh REMOTE_ARGS=root@192.168.1.2 will generate the NETIFS and TARGETS arrays with the following data. NETIFS[p1]="eth0" NETIFS[p2]="eth2" TARGETS[eth0]="local:" TARGETS[eth2]="ssh:root@192.168.1.2" The above will be true if on the remote target, the interface which has the 192.168.1.2 address is named eth2. Since the TARGETS array is indexed by the network interface name, document a new restriction README.rst which states that the remote interface cannot have the same name as the local one. Keep the old way of populating the NETIFS variable based on the command line arguments. This will be invoked in case DRIVER_TEST_CONFORMANT = "no". Also add a couple of helpers which can be used by tests which need to run a specific bash command on a different target than the local system, be it either another netns or a remote system accessible through ssh. The __run_on() function is passed through $1 the target on which the command should be executed while run_on() is passed the name of the interface that is then used to retrieve the target from the TARGETS array. Also add a stub run_on() function in net/lib.sh so that users of the net/lib.sh are going through the stub only since neither NETIFS nor TARGETS are valid in that circumstance. Signed-off-by: Ioana Ciornei Reviewed-by: Petr Machata Link: https://patch.msgid.link/20260330152933.2195885-3-ioana.ciornei@nxp.com Signed-off-by: Paolo Abeni --- .../testing/selftests/drivers/net/README.rst | 4 + tools/testing/selftests/net/forwarding/lib.sh | 148 ++++++++++++++++-- tools/testing/selftests/net/lib.sh | 5 + 3 files changed, 147 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/drivers/net/README.rst b/tools/testing/selftests/drivers/net/README.rst index b26b364be534..c8588436c224 100644 --- a/tools/testing/selftests/drivers/net/README.rst +++ b/tools/testing/selftests/drivers/net/README.rst @@ -26,6 +26,10 @@ The netdevice against which tests will be run must exist, be running Refer to list of :ref:`Variables` later in this file to set up running the tests against a real device. +The current support for bash tests restricts the use of the same interface name +on the local system and the remote one and will bail if this case is +encountered. + Both modes required ~~~~~~~~~~~~~~~~~~~ diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 3009ce00c5dc..83249aafa669 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1,5 +1,6 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +#shellcheck disable=SC2034 # SC doesn't see our uses of global variables ############################################################################## # Topology description. p1 looped back to p2, p3 to p4 and so on. @@ -340,17 +341,144 @@ fi ############################################################################## # Command line options handling -count=0 - -while [[ $# -gt 0 ]]; do - if [[ "$count" -eq "0" ]]; then - unset NETIFS - declare -A NETIFS +check_env() { + if [[ ! (( -n "$LOCAL_V4" && -n "$REMOTE_V4") || + ( -n "$LOCAL_V6" && -n "$REMOTE_V6" )) ]]; then + echo "SKIP: Invalid environment, missing or inconsistent LOCAL_V4/REMOTE_V4/LOCAL_V6/REMOTE_V6" + echo "Please see tools/testing/selftests/drivers/net/README.rst" + exit "$ksft_skip" fi - count=$((count + 1)) - NETIFS[p$count]="$1" - shift -done + + if [[ -z "$REMOTE_TYPE" ]]; then + echo "SKIP: Invalid environment, missing REMOTE_TYPE" + exit "$ksft_skip" + fi + + if [[ -z "$REMOTE_ARGS" ]]; then + echo "SKIP: Invalid environment, missing REMOTE_ARGS" + exit "$ksft_skip" + fi +} + +__run_on() +{ + local target=$1; shift + local type args + + IFS=':' read -r type args <<< "$target" + + case "$type" in + netns) + # Execute command in network namespace + # args contains the namespace name + ip netns exec "$args" "$@" + ;; + ssh) + # Execute command via SSH args contains user@host + ssh -n "$args" "$@" + ;; + local|*) + # Execute command locally. This is also the fallback + # case for when the interface's target is not found in + # the TARGETS array. + "$@" + ;; + esac +} + +run_on() +{ + local iface=$1; shift + local target="local:" + + if [ "${DRIVER_TEST_CONFORMANT}" = "yes" ]; then + target="${TARGETS[$iface]}" + fi + + __run_on "$target" "$@" +} + +get_ifname_by_ip() +{ + local target=$1; shift + local ip_addr=$1; shift + + __run_on "$target" ip -j addr show to "$ip_addr" | jq -r '.[].ifname' +} + +# Whether the test is conforming to the requirements and usage described in +# drivers/net/README.rst. +: "${DRIVER_TEST_CONFORMANT:=no}" + +declare -A TARGETS + +# Based on DRIVER_TEST_CONFORMANT, decide if to source drivers/net/net.config +# or not. In the "yes" case, the test expects to pass the arguments through the +# variables specified in drivers/net/README.rst file. If not, fallback on +# parsing the script arguments for interface names. +if [ "${DRIVER_TEST_CONFORMANT}" = "yes" ]; then + if [[ -f $net_forwarding_dir/../../drivers/net/net.config ]]; then + source "$net_forwarding_dir/../../drivers/net/net.config" + fi + + if (( NUM_NETIFS > 2)); then + echo "SKIP: DRIVER_TEST_CONFORMANT=yes and NUM_NETIFS is bigger than 2" + exit "$ksft_skip" + fi + + check_env + + # Populate the NETIFS and TARGETS arrays automatically based on the + # environment variables. The TARGETS array is indexed by the network + # interface name keeping track of the target on which the interface + # resides. Values will be strings of the following format - + # :. + # + # TARGETS[eth0]="local:" - meaning that the eth0 interface is + # accessible locally + # TARGETS[eth1]="netns:foo" - eth1 is in the foo netns + # TARGETS[eth2]="ssh:root@10.0.0.2" - eth2 is accessible through + # running the 'ssh root@10.0.0.2' command. + + unset NETIFS + declare -A NETIFS + + NETIFS[p1]="$NETIF" + TARGETS[$NETIF]="local:" + + # Locate the name of the remote interface + remote_target="$REMOTE_TYPE:$REMOTE_ARGS" + if [[ -v REMOTE_V4 ]]; then + remote_netif=$(get_ifname_by_ip "$remote_target" "$REMOTE_V4") + else + remote_netif=$(get_ifname_by_ip "$remote_target" "$REMOTE_V6") + fi + if [[ ! -n "$remote_netif" ]]; then + echo "SKIP: cannot find remote interface" + exit "$ksft_skip" + fi + + if [[ "$NETIF" == "$remote_netif" ]]; then + echo "SKIP: local and remote interfaces cannot have the same name" + exit "$ksft_skip" + fi + + NETIFS[p2]="$remote_netif" + TARGETS[$remote_netif]="$REMOTE_TYPE:$REMOTE_ARGS" +else + count=0 + + while [[ $# -gt 0 ]]; do + if [[ "$count" -eq "0" ]]; then + unset NETIFS + declare -A NETIFS + fi + count=$((count + 1)) + NETIFS[p$count]="$1" + TARGETS[$1]="local:" + shift + done +fi ############################################################################## # Network interfaces configuration diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index b40694573f4c..3f79bf1ec977 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -670,3 +670,8 @@ cmd_jq() # return success only in case of non-empty output [ ! -z "$output" ] } + +run_on() +{ + shift; "$@" +} From 7db9da4c67c7781c2c917657d5d9bdc171836d5f Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Mon, 30 Mar 2026 18:29:27 +0300 Subject: [PATCH 1103/1561] selftests: net: update some helpers to use run_on Update some helpers so that they are capable to run commands on different targets than the local one. This patch makes the necesasy modification for those helpers / sections of code which are needed for the ethtool_rmon.sh test that will be converted in the next patches. For example, mac_addr_prepare() and mac_addr_restore() used when STABLE_MAC_ADDRS=yes need to ensure stable MAC addresses on interfaces located even in other namespaces. In order to do that, append the 'ip link' commands with a 'run_on $dev' tag. The same run_on is necessary also when verifying if all the interfaces listed in NETIFS are indeed available. Signed-off-by: Ioana Ciornei Reviewed-by: Petr Machata Link: https://patch.msgid.link/20260330152933.2195885-4-ioana.ciornei@nxp.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/forwarding/lib.sh | 19 ++++++++++++------- tools/testing/selftests/net/lib.sh | 3 ++- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 83249aafa669..d8cc4c64148d 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -546,10 +546,11 @@ mac_addr_prepare() dev=${NETIFS[p$i]} new_addr=$(printf "00:01:02:03:04:%02x" $i) - MAC_ADDR_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].address') + MAC_ADDR_ORIG["$dev"]=$(run_on "$dev" \ + ip -j link show dev "$dev" | jq -e '.[].address') # Strip quotes MAC_ADDR_ORIG["$dev"]=${MAC_ADDR_ORIG["$dev"]//\"/} - ip link set dev $dev address $new_addr + run_on "$dev" ip link set dev "$dev" address $new_addr done } @@ -559,7 +560,8 @@ mac_addr_restore() for ((i = 1; i <= NUM_NETIFS; ++i)); do dev=${NETIFS[p$i]} - ip link set dev $dev address ${MAC_ADDR_ORIG["$dev"]} + run_on "$dev" \ + ip link set dev "$dev" address ${MAC_ADDR_ORIG["$dev"]} done } @@ -572,7 +574,9 @@ if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then fi for ((i = 1; i <= NUM_NETIFS; ++i)); do - ip link show dev ${NETIFS[p$i]} &> /dev/null + int="${NETIFS[p$i]}" + + run_on "$int" ip link show dev "$int" &> /dev/null if [[ $? -ne 0 ]]; then echo "SKIP: could not find all required interfaces" exit $ksft_skip @@ -655,7 +659,7 @@ setup_wait_dev_with_timeout() local i for ((i = 1; i <= $max_iterations; ++i)); do - ip link show dev $dev up \ + run_on "$dev" ip link show dev "$dev" up \ | grep 'state UP' &> /dev/null if [[ $? -ne 0 ]]; then sleep 1 @@ -960,12 +964,13 @@ ethtool_std_stats_get() local src=$1; shift if [[ "$grp" == "pause" ]]; then - ethtool -I --json -a "$dev" --src "$src" | \ + run_on "$dev" ethtool -I --json -a "$dev" --src "$src" | \ jq --arg name "$name" '.[].statistics[$name]' return fi - ethtool --json -S "$dev" --groups "$grp" -- --src "$src" | \ + run_on "$dev" \ + ethtool --json -S "$dev" --groups "$grp" -- --src "$src" | \ jq --arg grp "$grp" --arg name "$name" '.[][$grp][$name]' } diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 3f79bf1ec977..e915386daf1b 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -514,7 +514,8 @@ mac_get() { local if_name=$1 - ip -j link show dev $if_name | jq -r '.[]["address"]' + run_on "$if_name" \ + ip -j link show dev "$if_name" | jq -r '.[]["address"]' } kill_process() From afd1b9bf7529d853284205f618e314134a5ad9ad Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Mon, 30 Mar 2026 18:29:28 +0300 Subject: [PATCH 1104/1561] selftests: drivers: hw: cleanup shellcheck warnings in the rmon test If run on the ethtool_rmon.sh script, shellcheck generates a bunch of false positive errors. Suppress those checks that generate them. Also cleanup the remaining warnings by using double quoting around the used variables. Signed-off-by: Ioana Ciornei Reviewed-by: Petr Machata Link: https://patch.msgid.link/20260330152933.2195885-5-ioana.ciornei@nxp.com Signed-off-by: Paolo Abeni --- .../selftests/drivers/net/hw/ethtool_rmon.sh | 54 ++++++++++--------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh index 8f60c1685ad4..13b3760e3a40 100755 --- a/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh +++ b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh @@ -1,5 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +#shellcheck disable=SC2034 # SC does not see the global variables +#shellcheck disable=SC2317,SC2329 # unused functions ALL_TESTS=" rmon_rx_histogram @@ -19,11 +21,12 @@ ensure_mtu() { local iface=$1; shift local len=$1; shift - local current=$(ip -j link show dev $iface | jq -r '.[0].mtu') local required=$((len - ETH_HLEN - ETH_FCS_LEN)) + local current - if [ $current -lt $required ]; then - ip link set dev $iface mtu $required || return 1 + current=$(ip -j link show dev "$iface" | jq -r '.[0].mtu') + if [ "$current" -lt "$required" ]; then + ip link set dev "$iface" mtu "$required" || return 1 fi } @@ -46,23 +49,23 @@ bucket_test() len=$((len - ETH_FCS_LEN)) len=$((len > 0 ? len : 0)) - before=$(ethtool --json -S $iface --groups rmon | \ + before=$(ethtool --json -S "$iface" --groups rmon | \ jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val") # Send 10k one way and 20k in the other, to detect counters # mapped to the wrong direction - $MZ $neigh -q -c $num_rx -p $len -a own -b bcast -d 10us - $MZ $iface -q -c $num_tx -p $len -a own -b bcast -d 10us + "$MZ" "$neigh" -q -c "$num_rx" -p "$len" -a own -b bcast -d 10us + "$MZ" "$iface" -q -c "$num_tx" -p "$len" -a own -b bcast -d 10us - after=$(ethtool --json -S $iface --groups rmon | \ + after=$(ethtool --json -S "$iface" --groups rmon | \ jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val") delta=$((after - before)) - expected=$([ $set = rx ] && echo $num_rx || echo $num_tx) + expected=$([ "$set" = rx ] && echo "$num_rx" || echo "$num_tx") # Allow some extra tolerance for other packets sent by the stack - [ $delta -ge $expected ] && [ $delta -le $((expected + 100)) ] + [ "$delta" -ge "$expected" ] && [ "$delta" -le $((expected + 100)) ] } rmon_histogram() @@ -78,23 +81,23 @@ rmon_histogram() while read -r -a bucket; do step="$set-pkts${bucket[0]}to${bucket[1]} on $iface" - for if in $iface $neigh; do - if ! ensure_mtu $if ${bucket[0]}; then + for if in "$iface" "$neigh"; do + if ! ensure_mtu "$if" "${bucket[0]}"; then log_test_xfail "$if does not support the required MTU for $step" return fi done - if ! bucket_test $iface $neigh $set $nbuckets ${bucket[0]}; then + if ! bucket_test "$iface" "$neigh" "$set" "$nbuckets" "${bucket[0]}"; then check_err 1 "$step failed" return 1 fi log_test "$step" nbuckets=$((nbuckets + 1)) - done < <(ethtool --json -S $iface --groups rmon | \ + done < <(ethtool --json -S "$iface" --groups rmon | \ jq -r ".[0].rmon[\"${set}-pktsNtoM\"][]|[.low, .high]|@tsv" 2>/dev/null) - if [ $nbuckets -eq 0 ]; then + if [ "$nbuckets" -eq 0 ]; then log_test_xfail "$iface does not support $set histogram counters" return fi @@ -102,14 +105,14 @@ rmon_histogram() rmon_rx_histogram() { - rmon_histogram $h1 $h2 rx - rmon_histogram $h2 $h1 rx + rmon_histogram "$h1" "$h2" rx + rmon_histogram "$h2" "$h1" rx } rmon_tx_histogram() { - rmon_histogram $h1 $h2 tx - rmon_histogram $h2 $h1 tx + rmon_histogram "$h1" "$h2" tx + rmon_histogram "$h2" "$h1" tx } setup_prepare() @@ -117,9 +120,10 @@ setup_prepare() h1=${NETIFS[p1]} h2=${NETIFS[p2]} - for iface in $h1 $h2; do - netif_mtu[$iface]=$(ip -j link show dev $iface | jq -r '.[0].mtu') - ip link set dev $iface up + for iface in "$h1" "$h2"; do + netif_mtu["$iface"]=$(ip -j link show dev "$iface" | \ + jq -r '.[0].mtu') + ip link set dev "$iface" up done } @@ -127,9 +131,9 @@ cleanup() { pre_cleanup - for iface in $h2 $h1; do - ip link set dev $iface \ - mtu ${netif_mtu[$iface]} \ + for iface in "$h2" "$h1"; do + ip link set dev "$iface" \ + mtu "${netif_mtu[$iface]}" \ down done } @@ -142,4 +146,4 @@ setup_wait tests_run -exit $EXIT_STATUS +exit "$EXIT_STATUS" From e76c71483a5600eb9ef8f1741c92fe1282bfd461 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Mon, 30 Mar 2026 18:29:29 +0300 Subject: [PATCH 1105/1561] selftests: drivers: hw: test rmon counters only on first interface The selftests in drivers/net are slowly transitioning to being able to be used on systems with a single network interface. The first step for the ethtool_rmon.sh test is to only validate that the rmon counters are properly exported on the first interface supplied as an argument. Remove the rmon_histogram calls which intend to test also the rmon counters on the 2nd interface. This also removes the need for the remote system, which should be used only to inject traffic, to also support rmon counters. Signed-off-by: Ioana Ciornei Reviewed-by: Petr Machata Link: https://patch.msgid.link/20260330152933.2195885-6-ioana.ciornei@nxp.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh index 13b3760e3a40..80e75b9b40fd 100755 --- a/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh +++ b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh @@ -106,13 +106,11 @@ rmon_histogram() rmon_rx_histogram() { rmon_histogram "$h1" "$h2" rx - rmon_histogram "$h2" "$h1" rx } rmon_tx_histogram() { rmon_histogram "$h1" "$h2" tx - rmon_histogram "$h2" "$h1" tx } setup_prepare() From 557c067c0001b2e1538633d921790a1094e675ff Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Mon, 30 Mar 2026 18:29:30 +0300 Subject: [PATCH 1106/1561] selftests: drivers: hw: replace counter upper limit with UINT32_MAX in rmon test The ethtool_rmon.sh script checks that the number of packets sent / received during a test matches the expected value with a 1% tolerance. Since in the next patches this test will gain the capability to also be run on systems with a single interface where the traffic generator is accesible through ssh, use the UINT32_MAX as the upper limit. This is necessary since the same interface will be used also for control traffic (the ssh commands) as well as the mausezahn generated one. Signed-off-by: Ioana Ciornei Reviewed-by: Petr Machata Link: https://patch.msgid.link/20260330152933.2195885-7-ioana.ciornei@nxp.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh index 80e75b9b40fd..f290ce1832f1 100755 --- a/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh +++ b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh @@ -12,6 +12,7 @@ NUM_NETIFS=2 lib_dir=$(dirname "$0") source "$lib_dir"/../../../net/forwarding/lib.sh +UINT32_MAX=$((2**32 - 1)) ETH_FCS_LEN=4 ETH_HLEN=$((6+6+2)) @@ -64,8 +65,7 @@ bucket_test() expected=$([ "$set" = rx ] && echo "$num_rx" || echo "$num_tx") - # Allow some extra tolerance for other packets sent by the stack - [ "$delta" -ge "$expected" ] && [ "$delta" -le $((expected + 100)) ] + [ "$delta" -ge "$expected" ] && [ "$delta" -le "$UINT32_MAX" ] } rmon_histogram() From eec1b9057c24d3f7c37d98208bb0c9be7130d3c6 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Mon, 30 Mar 2026 18:29:31 +0300 Subject: [PATCH 1107/1561] selftests: drivers: hw: move to KTAP output Update the ethtool_rmon.sh test so that it uses the KTAP format for its output. This is achieved by using the helpers found in ktap_helpers.sh. An example output can be found below. $ ./ethtool_rmon.sh endpmac3 endpmac4 TAP version 13 1..14 ok 1 ethtool_rmon.rx-pkts64to64 ok 2 ethtool_rmon.rx-pkts65to127 ok 3 ethtool_rmon.rx-pkts128to255 ok 4 ethtool_rmon.rx-pkts256to511 ok 5 ethtool_rmon.rx-pkts512to1023 ok 6 ethtool_rmon.rx-pkts1024to1518 ok 7 ethtool_rmon.rx-pkts1519to10240 ok 8 ethtool_rmon.tx-pkts64to64 ok 9 ethtool_rmon.tx-pkts65to127 ok 10 ethtool_rmon.tx-pkts128to255 ok 11 ethtool_rmon.tx-pkts256to511 ok 12 ethtool_rmon.tx-pkts512to1023 ok 13 ethtool_rmon.tx-pkts1024to1518 ok 14 ethtool_rmon.tx-pkts1519to10240 # Totals: pass:14 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Ioana Ciornei Reviewed-by: Petr Machata Link: https://patch.msgid.link/20260330152933.2195885-8-ioana.ciornei@nxp.com Signed-off-by: Paolo Abeni --- .../selftests/drivers/net/hw/ethtool_rmon.sh | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh index f290ce1832f1..ed81bdc33536 100755 --- a/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh +++ b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh @@ -11,10 +11,12 @@ ALL_TESTS=" NUM_NETIFS=2 lib_dir=$(dirname "$0") source "$lib_dir"/../../../net/forwarding/lib.sh +source "$lib_dir"/../../../kselftest/ktap_helpers.sh UINT32_MAX=$((2**32 - 1)) ETH_FCS_LEN=4 ETH_HLEN=$((6+6+2)) +TEST_NAME=$(basename "$0" .sh) declare -A netif_mtu @@ -76,29 +78,28 @@ rmon_histogram() local nbuckets=0 local step= - RET=0 - while read -r -a bucket; do - step="$set-pkts${bucket[0]}to${bucket[1]} on $iface" + step="$set-pkts${bucket[0]}to${bucket[1]}" for if in "$iface" "$neigh"; do if ! ensure_mtu "$if" "${bucket[0]}"; then - log_test_xfail "$if does not support the required MTU for $step" + ktap_print_msg "$if does not support the required MTU for $step" + ktap_test_xfail "$TEST_NAME.$step" return fi done if ! bucket_test "$iface" "$neigh" "$set" "$nbuckets" "${bucket[0]}"; then - check_err 1 "$step failed" + ktap_test_fail "$TEST_NAME.$step" return 1 fi - log_test "$step" + ktap_test_pass "$TEST_NAME.$step" nbuckets=$((nbuckets + 1)) done < <(ethtool --json -S "$iface" --groups rmon | \ jq -r ".[0].rmon[\"${set}-pktsNtoM\"][]|[.low, .high]|@tsv" 2>/dev/null) if [ "$nbuckets" -eq 0 ]; then - log_test_xfail "$iface does not support $set histogram counters" + ktap_print_msg "$iface does not support $set histogram counters" return fi } @@ -139,9 +140,16 @@ cleanup() check_ethtool_counter_group_support trap cleanup EXIT +bucket_count=$(ethtool --json -S "${NETIFS[p1]}" --groups rmon | \ + jq -r '.[0].rmon | + "\((."rx-pktsNtoM" | length) + + (."tx-pktsNtoM" | length))"') +ktap_print_header +ktap_set_plan "$bucket_count" + setup_prepare setup_wait tests_run -exit "$EXIT_STATUS" +ktap_finished From abe4929bc7d0e29c5dda8b0bb0f6beeec76fd7b6 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Mon, 30 Mar 2026 18:29:32 +0300 Subject: [PATCH 1108/1561] selftests: drivers: hw: update ethtool_rmon to work with a single local interface This patch finalizes the transition to work with a single local interface for the ethtool_rmon.sh test. Each 'ip link' and 'ethtool' command used by the test is annotated with the necessary run_on in order to be executed on the necessary target system, be it local, in another network namespace or through ssh. Since we need NETIF up and running also for control traffic, we now expect that the interfaces are up and running and do not touch bring them up or down at the end of the test. This is also documented in the drivers/net/README.rst. The ethtool_rmon.sh script can still be used in the older fashion by passing two interfaces as command line arguments, the only restriction is that those interfaces need to be already up. $ DRIVER_TEST_CONFORMANT=no ./ethtool_rmon.sh eth0 eth1 As part of the kselftest infrastructure, this test can be run in the following manner: $ make -C tools/testing/selftests/ TARGETS="drivers/net drivers/net/hw" \ install INSTALL_PATH=/tmp/ksft-net-drv $ cd /tmp/ksft-net-drv/ $ cat > ./drivers/net/net.config < Reviewed-by: Petr Machata Link: https://patch.msgid.link/20260330152933.2195885-9-ioana.ciornei@nxp.com Signed-off-by: Paolo Abeni --- .../selftests/drivers/net/hw/ethtool_rmon.sh | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh index ed81bdc33536..2ec19edddfaa 100755 --- a/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh +++ b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh @@ -8,6 +8,7 @@ ALL_TESTS=" rmon_tx_histogram " +: "${DRIVER_TEST_CONFORMANT:=yes}" NUM_NETIFS=2 lib_dir=$(dirname "$0") source "$lib_dir"/../../../net/forwarding/lib.sh @@ -27,9 +28,11 @@ ensure_mtu() local required=$((len - ETH_HLEN - ETH_FCS_LEN)) local current - current=$(ip -j link show dev "$iface" | jq -r '.[0].mtu') + current=$(run_on "$iface" \ + ip -j link show dev "$iface" | jq -r '.[0].mtu') if [ "$current" -lt "$required" ]; then - ip link set dev "$iface" mtu "$required" || return 1 + run_on "$iface" ip link set dev "$iface" mtu "$required" \ + || return 1 fi } @@ -52,15 +55,17 @@ bucket_test() len=$((len - ETH_FCS_LEN)) len=$((len > 0 ? len : 0)) - before=$(ethtool --json -S "$iface" --groups rmon | \ + before=$(run_on "$iface" ethtool --json -S "$iface" --groups rmon | \ jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val") # Send 10k one way and 20k in the other, to detect counters # mapped to the wrong direction - "$MZ" "$neigh" -q -c "$num_rx" -p "$len" -a own -b bcast -d 10us - "$MZ" "$iface" -q -c "$num_tx" -p "$len" -a own -b bcast -d 10us + run_on "$neigh" \ + "$MZ" "$neigh" -q -c "$num_rx" -p "$len" -a own -b bcast -d 10us + run_on "$iface" \ + "$MZ" "$iface" -q -c "$num_tx" -p "$len" -a own -b bcast -d 10us - after=$(ethtool --json -S "$iface" --groups rmon | \ + after=$(run_on "$iface" ethtool --json -S "$iface" --groups rmon | \ jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val") delta=$((after - before)) @@ -95,7 +100,7 @@ rmon_histogram() fi ktap_test_pass "$TEST_NAME.$step" nbuckets=$((nbuckets + 1)) - done < <(ethtool --json -S "$iface" --groups rmon | \ + done < <(run_on "$iface" ethtool --json -S "$iface" --groups rmon | \ jq -r ".[0].rmon[\"${set}-pktsNtoM\"][]|[.low, .high]|@tsv" 2>/dev/null) if [ "$nbuckets" -eq 0 ]; then @@ -120,9 +125,8 @@ setup_prepare() h2=${NETIFS[p2]} for iface in "$h1" "$h2"; do - netif_mtu["$iface"]=$(ip -j link show dev "$iface" | \ - jq -r '.[0].mtu') - ip link set dev "$iface" up + netif_mtu["$iface"]=$(run_on "$iface" \ + ip -j link show dev "$iface" | jq -r '.[0].mtu') done } @@ -130,10 +134,10 @@ cleanup() { pre_cleanup + # Do not bring down the interfaces, just configure the initial MTU for iface in "$h2" "$h1"; do - ip link set dev "$iface" \ - mtu "${netif_mtu[$iface]}" \ - down + run_on "$iface" ip link set dev "$iface" \ + mtu "${netif_mtu[$iface]}" done } From 3016574ea2f8a8cff69286574ac027fa5aed5a81 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Mon, 30 Mar 2026 18:29:33 +0300 Subject: [PATCH 1109/1561] selftests: drivers: hw: add test for the ethtool standard counters Add a new selftest - ethtool_std_stats.sh - which validates the eth-ctrl, eth-mac and pause standard statistics exported by an interface. Collision related eth-mac counters as well as the error ones will be checked against zero since that is the most likely correct scenario. The central part of this patch is the traffic_test() function which gathers the 'before' counter values, sends a batch of traffic and then interrogates again the same counters in order to determine if the delta is on target. The function receives an array through which the caller can request what counters to be interrogated and, for each of them, what is their target delta value. The output from this selftest looks as follows on a LX2160ARDB board: $ ./run_kselftest.sh -t drivers/net/hw:ethtool_std_stats.sh TAP version 13 1..1 # timeout set to 0 # selftests: drivers/net/hw: ethtool_std_stats.sh # TAP version 13 # 1..26 # ok 1 ethtool_std_stats.eth-ctrl-MACControlFramesTransmitted # ok 2 ethtool_std_stats.eth-ctrl-MACControlFramesReceived # ok 3 ethtool_std_stats.eth-mac-FrameCheckSequenceErrors # ok 4 ethtool_std_stats.eth-mac-AlignmentErrors # ok 5 ethtool_std_stats.eth-mac-FramesLostDueToIntMACXmitError # ok 6 ethtool_std_stats.eth-mac-CarrierSenseErrors # SKIP # ok 7 ethtool_std_stats.eth-mac-FramesLostDueToIntMACRcvError # ok 8 ethtool_std_stats.eth-mac-InRangeLengthErrors # SKIP # ok 9 ethtool_std_stats.eth-mac-OutOfRangeLengthField # SKIP # ok 10 ethtool_std_stats.eth-mac-FrameTooLongErrors # SKIP # ok 11 ethtool_std_stats.eth-mac-FramesAbortedDueToXSColls # SKIP # ok 12 ethtool_std_stats.eth-mac-SingleCollisionFrames # SKIP # ok 13 ethtool_std_stats.eth-mac-MultipleCollisionFrames # SKIP # ok 14 ethtool_std_stats.eth-mac-FramesWithDeferredXmissions # SKIP # ok 15 ethtool_std_stats.eth-mac-LateCollisions # SKIP # ok 16 ethtool_std_stats.eth-mac-FramesWithExcessiveDeferral # SKIP # ok 17 ethtool_std_stats.eth-mac-BroadcastFramesXmittedOK # ok 18 ethtool_std_stats.eth-mac-OctetsTransmittedOK # ok 19 ethtool_std_stats.eth-mac-BroadcastFramesReceivedOK # ok 20 ethtool_std_stats.eth-mac-OctetsReceivedOK # ok 21 ethtool_std_stats.eth-mac-FramesTransmittedOK # ok 22 ethtool_std_stats.eth-mac-MulticastFramesXmittedOK # ok 23 ethtool_std_stats.eth-mac-FramesReceivedOK # ok 24 ethtool_std_stats.eth-mac-MulticastFramesReceivedOK # ok 25 ethtool_std_stats.pause-tx_pause_frames # ok 26 ethtool_std_stats.pause-rx_pause_frames # # 10 skipped test(s) detected. Consider enabling relevant config options to improve coverage. # # Totals: pass:16 fail:0 xfail:0 xpass:0 skip:10 error:0 ok 1 selftests: drivers/net/hw: ethtool_std_stats.sh Please note that not all MACs are counting the software injected pause frames as real Tx pause. For example, on a LS1028ARDB the selftest output will reflect the fact that neither the ENETC MAC, nor the Felix switch MAC are able to detect Tx pause frames injected by software. $ ./run_kselftest.sh -t drivers/net/hw:ethtool_std_stats.sh (...) # # software sent pause frames not detected # ok 25 ethtool_std_stats.pause-tx_pause_frames # XFAIL # ok 26 ethtool_std_stats.pause-rx_pause_frames Signed-off-by: Ioana Ciornei Acked-by: Petr Machata Link: https://patch.msgid.link/20260330152933.2195885-10-ioana.ciornei@nxp.com Signed-off-by: Paolo Abeni --- .../testing/selftests/drivers/net/hw/Makefile | 1 + .../drivers/net/hw/ethtool_std_stats.sh | 206 ++++++++++++++++++ 2 files changed, 207 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/hw/ethtool_std_stats.sh diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 884cc77daeaa..deeca3f8d080 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -26,6 +26,7 @@ TEST_PROGS = \ ethtool_extended_state.sh \ ethtool_mm.sh \ ethtool_rmon.sh \ + ethtool_std_stats.sh \ gro_hw.py \ hw_stats_l3.sh \ hw_stats_l3_gre.sh \ diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_std_stats.sh b/tools/testing/selftests/drivers/net/hw/ethtool_std_stats.sh new file mode 100755 index 000000000000..c085d2a4c989 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/ethtool_std_stats.sh @@ -0,0 +1,206 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +#shellcheck disable=SC2034 # SC does not see the global variables +#shellcheck disable=SC2317,SC2329 # unused functions + +ALL_TESTS=" + test_eth_ctrl_stats + test_eth_mac_stats + test_pause_stats +" +: "${DRIVER_TEST_CONFORMANT:=yes}" +STABLE_MAC_ADDRS=yes +NUM_NETIFS=2 +lib_dir=$(dirname "$0") +# shellcheck source=./../../../net/forwarding/lib.sh +source "$lib_dir"/../../../net/forwarding/lib.sh +# shellcheck source=./../../../kselftest/ktap_helpers.sh +source "$lib_dir"/../../../kselftest/ktap_helpers.sh + +UINT32_MAX=$((2**32 - 1)) +SUBTESTS=0 +TEST_NAME=$(basename "$0" .sh) + +traffic_test() +{ + local iface=$1; shift + local neigh=$1; shift + local num_tx=$1; shift + local pkt_format="$1"; shift + local -a counters=("$@") + local int grp cnt target exact_check + local before after delta + local num_rx=$((num_tx * 2)) + local xfail_message + local src="aggregate" + local i + + for i in "${!counters[@]}"; do + read -r int grp cnt target exact_check xfail_message \ + <<< "${counters[$i]}" + + before[i]=$(ethtool_std_stats_get "$int" "$grp" "$cnt" "$src") + done + + # shellcheck disable=SC2086 # needs split options + run_on "$iface" "$MZ" "$iface" -q -c "$num_tx" $pkt_format + + # shellcheck disable=SC2086 # needs split options + run_on "$neigh" "$MZ" "$neigh" -q -c "$num_rx" $pkt_format + + for i in "${!counters[@]}"; do + read -r int grp cnt target exact_check xfail_message \ + <<< "${counters[$i]}" + + after[i]=$(ethtool_std_stats_get "$int" "$grp" "$cnt" "$src") + if [[ "${after[$i]}" == "null" ]]; then + ktap_test_skip "$TEST_NAME.$grp-$cnt" + continue; + fi + + delta=$((after[i] - before[i])) + + if [ "$exact_check" -ne 0 ]; then + [ "$delta" -eq "$target" ] + else + [ "$delta" -ge "$target" ] && \ + [ "$delta" -le "$UINT32_MAX" ] + fi + err="$?" + + if [[ $err != 0 ]] && [[ -n $xfail_message ]]; then + ktap_print_msg "$xfail_message" + ktap_test_xfail "$TEST_NAME.$grp-$cnt" + continue; + fi + + if [[ $err != 0 ]]; then + ktap_print_msg "$grp-$cnt is not valid on $int (expected $target, got $delta)" + ktap_test_fail "$TEST_NAME.$grp-$cnt" + else + ktap_test_pass "$TEST_NAME.$grp-$cnt" + fi + done +} + +test_eth_ctrl_stats() +{ + local pkt_format="-a own -b bcast 88:08 -p 64" + local num_pkts=1000 + local -a counters + + counters=("$h1 eth-ctrl MACControlFramesTransmitted $num_pkts 0") + traffic_test "$h1" "$h2" "$num_pkts" "$pkt_format" \ + "${counters[@]}" + + counters=("$h1 eth-ctrl MACControlFramesReceived $num_pkts 0") + traffic_test "$h2" "$h1" "$num_pkts" "$pkt_format" \ + "${counters[@]}" +} +SUBTESTS=$((SUBTESTS + 2)) + +test_eth_mac_stats() +{ + local pkt_size=100 + local pkt_size_fcs=$((pkt_size + 4)) + local bcast_pkt_format="-a own -b bcast -p $pkt_size" + local mcast_pkt_format="-a own -b 01:00:5E:00:00:01 -p $pkt_size" + local num_pkts=2000 + local octets=$((pkt_size_fcs * num_pkts)) + local -a counters error_cnt collision_cnt + + # Error counters should be exactly zero + counters=("$h1 eth-mac FrameCheckSequenceErrors 0 1" + "$h1 eth-mac AlignmentErrors 0 1" + "$h1 eth-mac FramesLostDueToIntMACXmitError 0 1" + "$h1 eth-mac CarrierSenseErrors 0 1" + "$h1 eth-mac FramesLostDueToIntMACRcvError 0 1" + "$h1 eth-mac InRangeLengthErrors 0 1" + "$h1 eth-mac OutOfRangeLengthField 0 1" + "$h1 eth-mac FrameTooLongErrors 0 1" + "$h1 eth-mac FramesAbortedDueToXSColls 0 1") + traffic_test "$h1" "$h2" "$num_pkts" "$bcast_pkt_format" \ + "${counters[@]}" + + # Collision related counters should also be zero + counters=("$h1 eth-mac SingleCollisionFrames 0 1" + "$h1 eth-mac MultipleCollisionFrames 0 1" + "$h1 eth-mac FramesWithDeferredXmissions 0 1" + "$h1 eth-mac LateCollisions 0 1" + "$h1 eth-mac FramesWithExcessiveDeferral 0 1") + traffic_test "$h1" "$h2" "$num_pkts" "$bcast_pkt_format" \ + "${counters[@]}" + + counters=("$h1 eth-mac BroadcastFramesXmittedOK $num_pkts 0" + "$h1 eth-mac OctetsTransmittedOK $octets 0") + traffic_test "$h1" "$h2" "$num_pkts" "$bcast_pkt_format" \ + "${counters[@]}" + + counters=("$h1 eth-mac BroadcastFramesReceivedOK $num_pkts 0" + "$h1 eth-mac OctetsReceivedOK $octets 0") + traffic_test "$h2" "$h1" "$num_pkts" "$bcast_pkt_format" \ + "${counters[@]}" + + counters=("$h1 eth-mac FramesTransmittedOK $num_pkts 0" + "$h1 eth-mac MulticastFramesXmittedOK $num_pkts 0") + traffic_test "$h1" "$h2" "$num_pkts" "$mcast_pkt_format" \ + "${counters[@]}" + + counters=("$h1 eth-mac FramesReceivedOK $num_pkts 0" + "$h1 eth-mac MulticastFramesReceivedOK $num_pkts 0") + traffic_test "$h2" "$h1" "$num_pkts" "$mcast_pkt_format" \ + "${counters[@]}" +} +SUBTESTS=$((SUBTESTS + 22)) + +test_pause_stats() +{ + local pkt_format="-a own -b 01:80:c2:00:00:01 88:08:00:01:00:01" + local xfail_message="software sent pause frames not detected" + local num_pkts=2000 + local -a counters + local int + local i + + # Check that there is pause frame support + for ((i = 1; i <= NUM_NETIFS; ++i)); do + int="${NETIFS[p$i]}" + if ! run_on "$int" ethtool -I --json -a "$int" > /dev/null 2>&1; then + ktap_test_skip "$TEST_NAME.tx_pause_frames" + ktap_test_skip "$TEST_NAME.rx_pause_frames" + return + fi + done + + counters=("$h1 pause tx_pause_frames $num_pkts 0 $xfail_message") + traffic_test "$h1" "$h2" "$num_pkts" "$pkt_format" \ + "${counters[@]}" + + counters=("$h1 pause rx_pause_frames $num_pkts 0") + traffic_test "$h2" "$h1" "$num_pkts" "$pkt_format" \ + "${counters[@]}" +} +SUBTESTS=$((SUBTESTS + 2)) + +setup_prepare() +{ + local iface + + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + h2_mac=$(mac_get "$h2") +} + +ktap_print_header +ktap_set_plan $SUBTESTS + +check_ethtool_counter_group_support +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +ktap_finished From 22cb45afd221b9e4f2a1dcc74a8ff645b7293aa1 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Tue, 31 Mar 2026 15:41:06 +0800 Subject: [PATCH 1110/1561] net: mctp: perform source address lookups when we populate our dst Rather than querying the output device for its address in mctp_local_output, set up the source address when we're populating the dst structure. If no address is assigned, use MCTP_ADDR_NULL. This will allow us more flexibility when routing for NULL-source-eid cases. For now though, we still reject a NULL source address in the output path. We need to update the tests a little, so that addresses are assigned before we do the dst lookups. Signed-off-by: Jeremy Kerr Link: https://patch.msgid.link/20260331-dev-mctp-null-eids-v1-1-b4d047372eaf@codeconstruct.com.au Signed-off-by: Paolo Abeni --- include/net/mctp.h | 1 + net/mctp/route.c | 51 ++++++++++++++++++++++++-------------- net/mctp/test/route-test.c | 23 +++++++++-------- net/mctp/test/utils.c | 27 ++++++++++++++++++++ net/mctp/test/utils.h | 1 + 5 files changed, 73 insertions(+), 30 deletions(-) diff --git a/include/net/mctp.h b/include/net/mctp.h index c3207ce98f07..e1e0a69afdce 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -270,6 +270,7 @@ struct mctp_dst { struct mctp_dev *dev; unsigned int mtu; mctp_eid_t nexthop; + mctp_eid_t saddr; /* set for direct addressing */ unsigned char halen; diff --git a/net/mctp/route.c b/net/mctp/route.c index 021e04f1ea7c..f6a88e668e68 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -880,6 +880,21 @@ static bool mctp_rt_compare_exact(struct mctp_route *rt1, rt1->max == rt2->max; } +static mctp_eid_t mctp_dev_saddr(struct mctp_dev *dev) +{ + mctp_eid_t addr = MCTP_ADDR_NULL; + unsigned long flags; + + spin_lock_irqsave(&dev->addrs_lock, flags); + if (dev->num_addrs) { + /* use the outbound interface's first address as our source */ + addr = dev->addrs[0]; + } + spin_unlock_irqrestore(&dev->addrs_lock, flags); + + return addr; +} + /* must only be called on a direct route, as the final output hop */ static void mctp_dst_from_route(struct mctp_dst *dst, mctp_eid_t eid, unsigned int mtu, struct mctp_route *route) @@ -892,6 +907,7 @@ static void mctp_dst_from_route(struct mctp_dst *dst, mctp_eid_t eid, dst->mtu = min(dst->mtu, mtu); dst->halen = 0; dst->output = route->output; + dst->saddr = mctp_dev_saddr(route->dev); } int mctp_dst_from_extaddr(struct mctp_dst *dst, struct net *net, int ifindex, @@ -924,6 +940,7 @@ int mctp_dst_from_extaddr(struct mctp_dst *dst, struct net *net, int ifindex, dst->halen = halen; dst->output = mctp_dst_output; dst->nexthop = 0; + dst->saddr = mctp_dev_saddr(dev); memcpy(dst->haddr, haddr, halen); rc = 0; @@ -958,6 +975,7 @@ int mctp_route_lookup(struct net *net, unsigned int dnet, { const unsigned int max_depth = 32; unsigned int depth, mtu = 0; + struct mctp_dst dst_tmp; int rc = -EHOSTUNREACH; rcu_read_lock(); @@ -978,9 +996,15 @@ int mctp_route_lookup(struct net *net, unsigned int dnet, mtu = mtu ?: rt->mtu; if (rt->dst_type == MCTP_ROUTE_DIRECT) { - if (dst) - mctp_dst_from_route(dst, daddr, mtu, rt); - rc = 0; + mctp_dst_from_route(&dst_tmp, daddr, mtu, rt); + /* we need a source address */ + if (dst_tmp.saddr == MCTP_ADDR_NULL) { + mctp_dst_release(&dst_tmp); + } else { + if (dst) + *dst = dst_tmp; + rc = 0; + } break; } else if (rt->dst_type == MCTP_ROUTE_GATEWAY) { @@ -1116,26 +1140,15 @@ int mctp_local_output(struct sock *sk, struct mctp_dst *dst, struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); struct mctp_sk_key *key; struct mctp_hdr *hdr; - unsigned long flags; unsigned int netid; - mctp_eid_t saddr; - int rc; + int rc = 0; u8 tag; KUNIT_STATIC_STUB_REDIRECT(mctp_local_output, sk, dst, skb, daddr, req_tag); - rc = -ENODEV; - - spin_lock_irqsave(&dst->dev->addrs_lock, flags); - if (dst->dev->num_addrs == 0) { + if (dst->saddr == MCTP_ADDR_NULL) rc = -EHOSTUNREACH; - } else { - /* use the outbound interface's first address as our source */ - saddr = dst->dev->addrs[0]; - rc = 0; - } - spin_unlock_irqrestore(&dst->dev->addrs_lock, flags); netid = READ_ONCE(dst->dev->net); if (rc) @@ -1146,8 +1159,8 @@ int mctp_local_output(struct sock *sk, struct mctp_dst *dst, key = mctp_lookup_prealloc_tag(msk, netid, daddr, req_tag, &tag); else - key = mctp_alloc_local_tag(msk, netid, saddr, daddr, - false, &tag); + key = mctp_alloc_local_tag(msk, netid, dst->saddr, + daddr, false, &tag); if (IS_ERR(key)) { rc = PTR_ERR(key); @@ -1174,7 +1187,7 @@ int mctp_local_output(struct sock *sk, struct mctp_dst *dst, hdr = mctp_hdr(skb); hdr->ver = 1; hdr->dest = daddr; - hdr->src = saddr; + hdr->src = dst->saddr; /* route output functions consume the skb, even on error */ return mctp_do_fragment_route(dst, skb, dst->mtu, tag); diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 61c989c43ec0..639b7c41c2a2 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -174,7 +174,9 @@ static void mctp_rx_input_test_to_desc(const struct mctp_rx_input_test *t, KUNIT_ARRAY_PARAM(mctp_rx_input, mctp_rx_input_tests, mctp_rx_input_test_to_desc); -/* set up a local dev, route on EID 8, and a socket listening on type 0 */ +/* set up a local dev (with addr 8), route on EID 8, and a socket listening on + * type 0 + */ static void __mctp_route_test_init(struct kunit *test, struct mctp_test_dev **devp, struct mctp_dst *dst, @@ -191,6 +193,10 @@ static void __mctp_route_test_init(struct kunit *test, if (netid != MCTP_NET_ANY) WRITE_ONCE(dev->mdev->net, netid); + dev->mdev->addrs = kmalloc_objs(u8, 1, GFP_KERNEL); + dev->mdev->num_addrs = 1; + dev->mdev->addrs[0] = 8; + mctp_test_dst_setup(test, dst, dev, 68); rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock); @@ -928,11 +934,6 @@ static void mctp_test_flow_init(struct kunit *test, */ __mctp_route_test_init(test, &dev, dst, sock, MCTP_NET_ANY); - /* Assign a single EID. ->addrs is freed on mctp netdev release */ - dev->mdev->addrs = kmalloc(sizeof(u8), GFP_KERNEL); - dev->mdev->num_addrs = 1; - dev->mdev->addrs[0] = 8; - skb = alloc_skb(len + sizeof(struct mctp_hdr) + 1, GFP_KERNEL); KUNIT_ASSERT_TRUE(test, skb); __mctp_cb(skb); @@ -1058,8 +1059,6 @@ static void mctp_test_route_output_key_create(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); WRITE_ONCE(dev->mdev->net, netid); - mctp_test_dst_setup(test, &dst, dev, 68); - rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock); KUNIT_ASSERT_EQ(test, rc, 0); @@ -1067,6 +1066,8 @@ static void mctp_test_route_output_key_create(struct kunit *test) dev->mdev->num_addrs = 1; dev->mdev->addrs[0] = src_eid; + mctp_test_dst_setup(test, &dst, dev, 68); + skb = alloc_skb(sizeof(struct mctp_hdr) + 1 + len, GFP_KERNEL); KUNIT_ASSERT_TRUE(test, skb); __mctp_cb(skb); @@ -1165,7 +1166,7 @@ static void mctp_test_route_gw_lookup(struct kunit *test) struct mctp_test_dev *dev; int rc; - dev = mctp_test_create_dev(); + dev = mctp_test_create_dev_with_addr(8); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); /* 8 (local) -> 10 (gateway) via 9 (direct) */ @@ -1195,7 +1196,7 @@ static void mctp_test_route_gw_loop(struct kunit *test) struct mctp_test_dev *dev; int rc; - dev = mctp_test_create_dev(); + dev = mctp_test_create_dev_with_addr(8); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); /* two routes using each other as the gw */ @@ -1254,7 +1255,7 @@ static void mctp_test_route_gw_mtu(struct kunit *test) unsigned int netid; int rc; - dev = mctp_test_create_dev(); + dev = mctp_test_create_dev_with_addr(8); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); dev->ndev->mtu = mtus->dev; mdev = dev->mdev; diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c index 97afe8cd2b05..2f79f8c1a2b4 100644 --- a/net/mctp/test/utils.c +++ b/net/mctp/test/utils.c @@ -80,6 +80,26 @@ struct mctp_test_dev *mctp_test_create_dev(void) return __mctp_test_create_dev(0, NULL); } +struct mctp_test_dev *mctp_test_create_dev_with_addr(mctp_eid_t addr) +{ + struct mctp_test_dev *dev; + + dev = __mctp_test_create_dev(0, NULL); + if (!dev) + return NULL; + + dev->mdev->addrs = kmalloc_objs(u8, 1, GFP_KERNEL); + if (!dev->mdev->addrs) { + mctp_test_destroy_dev(dev); + return NULL; + } + + dev->mdev->num_addrs = 1; + dev->mdev->addrs[0] = 8; + + return dev; +} + struct mctp_test_dev *mctp_test_create_dev_lladdr(unsigned short lladdr_len, const unsigned char *lladdr) { @@ -171,6 +191,8 @@ struct mctp_test_route *mctp_test_create_route_gw(struct net *net, void mctp_test_dst_setup(struct kunit *test, struct mctp_dst *dst, struct mctp_test_dev *dev, unsigned int mtu) { + unsigned long flags; + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, dev); memset(dst, 0, sizeof(*dst)); @@ -179,6 +201,11 @@ void mctp_test_dst_setup(struct kunit *test, struct mctp_dst *dst, __mctp_dev_get(dst->dev->dev); dst->mtu = mtu; dst->output = mctp_test_dst_output; + dst->saddr = MCTP_ADDR_NULL; + spin_lock_irqsave(&dev->mdev->addrs_lock, flags); + if (dev->mdev->num_addrs) + dst->saddr = dev->mdev->addrs[0]; + spin_unlock_irqrestore(&dev->mdev->addrs_lock, flags); } void mctp_test_route_destroy(struct kunit *test, struct mctp_test_route *rt) diff --git a/net/mctp/test/utils.h b/net/mctp/test/utils.h index 4cc90c9da4d1..47603732e6a5 100644 --- a/net/mctp/test/utils.h +++ b/net/mctp/test/utils.h @@ -42,6 +42,7 @@ struct mctp_test_bind_setup { }; struct mctp_test_dev *mctp_test_create_dev(void); +struct mctp_test_dev *mctp_test_create_dev_with_addr(mctp_eid_t eid); struct mctp_test_dev *mctp_test_create_dev_lladdr(unsigned short lladdr_len, const unsigned char *lladdr); void mctp_test_destroy_dev(struct mctp_test_dev *dev); From 8af20defc4edb9e5ded39d36e1c7541569cd84d2 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Tue, 31 Mar 2026 15:41:07 +0800 Subject: [PATCH 1111/1561] net: mctp: allow local TX with no address assigned If we're operating as a non-bus-owner endpoint, we may want to perform MCTP communication to get an address assigned. In this case, we'll have no local addresses, but can TX just fine either with extended routing, or where a direct route exists. Signed-off-by: Jeremy Kerr Link: https://patch.msgid.link/20260331-dev-mctp-null-eids-v1-2-b4d047372eaf@codeconstruct.com.au Signed-off-by: Paolo Abeni --- net/mctp/route.c | 18 ++---- net/mctp/test/route-test.c | 114 +++++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 14 deletions(-) diff --git a/net/mctp/route.c b/net/mctp/route.c index f6a88e668e68..8c484776e9e4 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -997,8 +997,8 @@ int mctp_route_lookup(struct net *net, unsigned int dnet, if (rt->dst_type == MCTP_ROUTE_DIRECT) { mctp_dst_from_route(&dst_tmp, daddr, mtu, rt); - /* we need a source address */ - if (dst_tmp.saddr == MCTP_ADDR_NULL) { + /* cannot do gateway-ed routes without a src */ + if (dst_tmp.saddr == MCTP_ADDR_NULL && depth != 0) { mctp_dst_release(&dst_tmp); } else { if (dst) @@ -1141,19 +1141,13 @@ int mctp_local_output(struct sock *sk, struct mctp_dst *dst, struct mctp_sk_key *key; struct mctp_hdr *hdr; unsigned int netid; - int rc = 0; u8 tag; KUNIT_STATIC_STUB_REDIRECT(mctp_local_output, sk, dst, skb, daddr, req_tag); - if (dst->saddr == MCTP_ADDR_NULL) - rc = -EHOSTUNREACH; netid = READ_ONCE(dst->dev->net); - if (rc) - goto err_free; - if (req_tag & MCTP_TAG_OWNER) { if (req_tag & MCTP_TAG_PREALLOC) key = mctp_lookup_prealloc_tag(msk, netid, daddr, @@ -1163,8 +1157,8 @@ int mctp_local_output(struct sock *sk, struct mctp_dst *dst, daddr, false, &tag); if (IS_ERR(key)) { - rc = PTR_ERR(key); - goto err_free; + kfree_skb(skb); + return PTR_ERR(key); } mctp_skb_set_flow(skb, key); /* done with the key in this scope */ @@ -1191,10 +1185,6 @@ int mctp_local_output(struct sock *sk, struct mctp_dst *dst, /* route output functions consume the skb, even on error */ return mctp_do_fragment_route(dst, skb, dst->mtu, tag); - -err_free: - kfree_skb(skb); - return rc; } /* route management */ diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 639b7c41c2a2..fa28af9ac18e 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -1570,6 +1570,117 @@ cleanup: __mctp_route_test_fini(test, dev, &dst, sock_ty0); } +static void mctp_test_route_output_direct_no_eids(struct kunit *test) +{ + struct mctp_dst dst = { 0 }; + struct sk_buff *skb, *skb2; + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct socket *sock; + const int len = 2; + int rc; + + dev = mctp_test_create_dev(); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + rt = mctp_test_create_route_direct(&init_net, dev->mdev, 9, 68); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt); + + rc = mctp_route_lookup(&init_net, dev->mdev->net, 9, &dst); + KUNIT_ASSERT_EQ(test, rc, 0); + + rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock); + KUNIT_ASSERT_EQ(test, rc, 0); + + skb = alloc_skb(sizeof(struct mctp_hdr) + 1 + len, GFP_KERNEL); + KUNIT_ASSERT_TRUE(test, skb); + __mctp_cb(skb); + skb_reserve(skb, sizeof(struct mctp_hdr) + 1 + len); + memset(skb_put(skb, len), 0, len); + + rc = mctp_local_output(sock->sk, &dst, skb, 9, MCTP_TAG_OWNER); + KUNIT_ASSERT_EQ(test, rc, 0); + + KUNIT_ASSERT_EQ(test, dev->pkts.qlen, 1); + + skb2 = skb_dequeue(&dev->pkts); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb2); + + kfree_skb(skb2); + sock_release(sock); + mctp_dst_release(&dst); + mctp_test_route_destroy(test, rt); + mctp_test_destroy_dev(dev); +} + +static void mctp_test_route_output_gw_no_eids(struct kunit *test) +{ + struct mctp_test_route *rt1, *rt2; + struct mctp_test_dev *dev; + struct mctp_dst dst = { 0 }; + int rc; + + dev = mctp_test_create_dev(); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + /* route: direct to bridge */ + rt1 = mctp_test_create_route_direct(&init_net, dev->mdev, 9, 68); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt1); + + /* route: bridge gw to final dest */ + rt2 = mctp_test_create_route_gw(&init_net, dev->mdev->net, 10, 9, 0); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt2); + + /* route lookup should fail, due to no source address on dev */ + rc = mctp_route_lookup(&init_net, dev->mdev->net, 10, &dst); + KUNIT_ASSERT_NE(test, rc, 0); + + mctp_test_route_destroy(test, rt1); + mctp_test_route_destroy(test, rt2); + mctp_test_destroy_dev(dev); +} + +static void mctp_test_route_output_extaddr_no_eids(struct kunit *test) +{ + struct mctp_dst dst = { 0 }; + struct sk_buff *skb, *skb2; + struct mctp_test_dev *dev; + struct socket *sock; + const int len = 1; + struct net *net; + int rc; + + dev = mctp_test_create_dev(); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + net = dev_net(dev->ndev); + + rc = mctp_dst_from_extaddr(&dst, net, dev->ndev->ifindex, 0, NULL); + KUNIT_ASSERT_EQ(test, rc, 0); + + rc = sock_create_kern(net, AF_MCTP, SOCK_DGRAM, 0, &sock); + KUNIT_ASSERT_EQ(test, rc, 0); + + skb = alloc_skb(sizeof(struct mctp_hdr) + 1 + len, GFP_KERNEL); + KUNIT_ASSERT_TRUE(test, skb); + __mctp_cb(skb); + skb_reserve(skb, sizeof(struct mctp_hdr) + 1 + len); + memset(skb_put(skb, len), 0, len); + + rc = mctp_local_output(sock->sk, &dst, skb, 9, MCTP_TAG_OWNER); + KUNIT_ASSERT_EQ(test, rc, 0); + + KUNIT_ASSERT_EQ(test, dev->pkts.qlen, 1); + + skb2 = skb_dequeue(&dev->pkts); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb2); + + kfree_skb(skb2); + sock_release(sock); + mctp_dst_release(&dst); + mctp_test_destroy_dev(dev); +} + static struct kunit_case mctp_test_cases[] = { KUNIT_CASE_PARAM(mctp_test_fragment, mctp_frag_gen_params), KUNIT_CASE_PARAM(mctp_test_rx_input, mctp_rx_input_gen_params), @@ -1592,6 +1703,9 @@ static struct kunit_case mctp_test_cases[] = { KUNIT_CASE_PARAM(mctp_test_route_gw_mtu, mctp_route_gw_mtu_gen_params), KUNIT_CASE(mctp_test_route_gw_output), KUNIT_CASE_PARAM(mctp_test_bind_lookup, mctp_bind_lookup_gen_params), + KUNIT_CASE(mctp_test_route_output_direct_no_eids), + KUNIT_CASE(mctp_test_route_output_gw_no_eids), + KUNIT_CASE(mctp_test_route_output_extaddr_no_eids), {} }; From 0d8647bc74cb50edf02e8c88977657596f20fb17 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Tue, 31 Mar 2026 15:41:08 +0800 Subject: [PATCH 1112/1561] net: mctp: don't require a route for null-EID ingress Currently, if we receive a physically-addressed packet for the local stack, we perform a route_lookup_null to find a matching device-based route. If a route is present, it will always have the ->output fn set to mctp_dst_input, which provides our delivery mechanism. However, if we don't yet have any local addresses assigned, we won't have any local routes to lookup, so this will fail. One of the use-cases for physical addressing is initial address assignment on endpoint nodes, where we would have no addresses, and therefore no local routes. Instead of iterating routes (looking for one matching the dev), just create a suitable mctp_dst for the device directly. Add a testcase for the no-route case too. Signed-off-by: Jeremy Kerr Link: https://patch.msgid.link/20260331-dev-mctp-null-eids-v1-3-b4d047372eaf@codeconstruct.com.au Signed-off-by: Paolo Abeni --- net/mctp/route.c | 33 ++++++++++++----------------- net/mctp/test/route-test.c | 43 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 20 deletions(-) diff --git a/net/mctp/route.c b/net/mctp/route.c index 8c484776e9e4..78263e7ae423 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -1017,29 +1017,22 @@ int mctp_route_lookup(struct net *net, unsigned int dnet, return rc; } -static int mctp_route_lookup_null(struct net *net, struct net_device *dev, - struct mctp_dst *dst) +static int mctp_dst_input_null(struct net *net, struct net_device *dev, + struct mctp_dst *dst) { - int rc = -EHOSTUNREACH; - struct mctp_route *rt; - rcu_read_lock(); - - list_for_each_entry_rcu(rt, &net->mctp.routes, list) { - if (rt->dst_type != MCTP_ROUTE_DIRECT || rt->type != RTN_LOCAL) - continue; - - if (rt->dev->dev != dev) - continue; - - mctp_dst_from_route(dst, 0, 0, rt); - rc = 0; - break; - } - + dst->dev = __mctp_dev_get(dev); rcu_read_unlock(); - return rc; + if (!dst->dev) + return -EHOSTUNREACH; + + dst->mtu = READ_ONCE(dev->mtu); + dst->halen = 0; + dst->output = mctp_dst_input; + dst->nexthop = 0; + + return 0; } static int mctp_do_fragment_route(struct mctp_dst *dst, struct sk_buff *skb, @@ -1369,7 +1362,7 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, /* NULL EID, but addressed to our physical address */ if (rc && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST) - rc = mctp_route_lookup_null(net, dev, &dst); + rc = mctp_dst_input_null(net, dev, &dst); if (rc) goto err_drop; diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index fa28af9ac18e..e1033643fab0 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -914,6 +914,48 @@ static void mctp_test_route_input_cloned_frag(struct kunit *test) __mctp_route_test_fini(test, dev, &dst, sock); } +/* check we can receive an incoming packet with the null EID as daddr, when + * no RTN_LOCAL routes are present. + */ +static void mctp_test_route_input_null_eid(struct kunit *test) +{ + struct mctp_hdr hdr = RX_HDR(1, 10, 0, FL_S | FL_E | FL_TO); + struct sk_buff *skb_pkt, *skb_sk; + struct mctp_test_dev *dev; + struct sockaddr_mctp addr; + struct socket *sock; + u8 type = 0; + int rc; + + dev = mctp_test_create_dev(); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock); + KUNIT_ASSERT_EQ(test, rc, 0); + + addr.smctp_family = AF_MCTP; + addr.smctp_network = MCTP_NET_ANY; + addr.smctp_addr.s_addr = MCTP_ADDR_ANY; + addr.smctp_type = type; + rc = kernel_bind(sock, (struct sockaddr_unsized *)&addr, sizeof(addr)); + KUNIT_ASSERT_EQ(test, rc, 0); + + skb_pkt = mctp_test_create_skb_data(&hdr, &type); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb_pkt); + + skb_pkt->dev = dev->ndev; + skb_pkt->pkt_type = PACKET_HOST; + + mctp_pkttype_receive(skb_pkt, dev->ndev, &mctp_packet_type, NULL); + + skb_sk = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb_sk); + + skb_free_datagram(sock->sk, skb_sk); + sock_release(sock); + mctp_test_destroy_dev(dev); +} + #if IS_ENABLED(CONFIG_MCTP_FLOWS) static void mctp_test_flow_init(struct kunit *test, @@ -1693,6 +1735,7 @@ static struct kunit_case mctp_test_cases[] = { KUNIT_CASE(mctp_test_route_input_sk_fail_frag), KUNIT_CASE(mctp_test_route_input_multiple_nets_bind), KUNIT_CASE(mctp_test_route_input_multiple_nets_key), + KUNIT_CASE(mctp_test_route_input_null_eid), KUNIT_CASE(mctp_test_packet_flow), KUNIT_CASE(mctp_test_fragment_flow), KUNIT_CASE(mctp_test_route_output_key_create), From 269389ba539834ec80e4d55583fca2cd70e4dc9c Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 31 Mar 2026 12:33:24 +0200 Subject: [PATCH 1113/1561] net: airoha: Set REG_RX_CPU_IDX() once in airoha_qdma_fill_rx_queue() It is not necessary to update REG_RX_CPU_IDX register for each iteration of the descriptor loop in airoha_qdma_fill_rx_queue routine. Move REG_RX_CPU_IDX configuration out of the descriptor loop and rely on the last queue head value updated in the descriptor loop. Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260331-airoha-cpu-idx-out-off-loop-v1-1-75c66b428f50@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_eth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 2beba017e791..a9fa96e103ed 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -572,11 +572,12 @@ static int airoha_qdma_fill_rx_queue(struct airoha_queue *q) WRITE_ONCE(desc->msg1, 0); WRITE_ONCE(desc->msg2, 0); WRITE_ONCE(desc->msg3, 0); + } + if (nframes) airoha_qdma_rmw(qdma, REG_RX_CPU_IDX(qid), RX_RING_CPU_IDX_MASK, FIELD_PREP(RX_RING_CPU_IDX_MASK, q->head)); - } return nframes; } From 989a9c20f63e378e0bb8b05bf82560d7da945de4 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 1 Apr 2026 08:21:14 +0100 Subject: [PATCH 1114/1561] net: stmmac: fix channel TSO enable on resume Rather than configuring the channels depending on whether GSO/TSO is currently enabled by the user, always enable if the hardware has TSO support and the platform wants TSO to be enabled. This avoids the channel TSO enable bit being disabled after a resume when the user has disabled TSO features. This will cause problems when the user re-enables TSO. This bug goes back to commit f748be531d70 ("stmmac: support new GMAC4") Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w7pt4-0000000Easn-14WL@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 35cf4491ec16..52864561f14e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3705,7 +3705,7 @@ static int stmmac_hw_setup(struct net_device *dev) stmmac_set_rings_length(priv); /* Enable TSO */ - if (priv->tso) { + if (priv->dma_cap.tsoen && priv->plat->flags & STMMAC_FLAG_TSO_EN) { for (chan = 0; chan < tx_cnt; chan++) { struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan]; From afe840ddf15c9960e4c4522c95dea459f4fa055b Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 1 Apr 2026 08:21:19 +0100 Subject: [PATCH 1115/1561] net: stmmac: fix .ndo_fix_features() netdev features documentation requires that .ndo_fix_features() is stateless: it shouldn't modify driver state. Yet, stmmac_fix_features() does exactly that, changing whether GSO frames are processed by the driver. Move this code to stmmac_set_features() instead, which is the correct place for it. We don't need to check whether TSO is supported; this is already handled via the setup of netdev->hw_features, and we are guaranteed that if netdev->hw_features indicates that a feature is not supported, .ndo_set_features() won't be called with it set. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w7pt9-0000000East-1YAO@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 52864561f14e..f76ed64fb9e6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -6073,14 +6073,6 @@ static netdev_features_t stmmac_fix_features(struct net_device *dev, if (priv->plat->bugged_jumbo && (dev->mtu > ETH_DATA_LEN)) features &= ~NETIF_F_CSUM_MASK; - /* Disable tso if asked by ethtool */ - if ((priv->plat->flags & STMMAC_FLAG_TSO_EN) && (priv->dma_cap.tsoen)) { - if (features & NETIF_F_TSO) - priv->tso = true; - else - priv->tso = false; - } - return features; } @@ -6107,6 +6099,8 @@ static int stmmac_set_features(struct net_device *netdev, stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan); } + priv->tso = !!(features & NETIF_F_TSO); + if (features & NETIF_F_HW_VLAN_CTAG_RX) priv->hw->hw_vlan_en = true; else From e32820264c2949a3e75bc5083e49c885e3bbaf62 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 1 Apr 2026 08:21:24 +0100 Subject: [PATCH 1116/1561] net: stmmac: fix TSO support when some channels have TBS available According to the STM32MP25xx manual, which is dwmac v5.3, TBS (time based scheduling) is not permitted for channels which have hardware TSO enabled. Intel's commit 5e6038b88a57 ("net: stmmac: fix TSO and TBS feature enabling during driver open") concurs with this, but it is incomplete. This commit avoids enabling TSO support on the channels which have TBS available, which, as far as the hardware is concerned, means we do not set the TSE bit in the DMA channel's transmit control register. However, the net device's features apply to all queues(channels), which means these channels may still be handed TSO skbs to transmit, and the driver will pass them to stmmac_tso_xmit(). This will generate the descriptors for TSO, even though the channel has the TSE bit clear. Fix this by checking whether the queue(channel) has TBS available, and if it does, fall back to software GSO support. Fixes: 5e6038b88a57 ("net: stmmac: fix TSO and TBS feature enabling during driver open") Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w7ptE-0000000Easz-28tv@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 32 ++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f76ed64fb9e6..729072622fcb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3619,6 +3619,17 @@ static void stmmac_safety_feat_configuration(struct stmmac_priv *priv) } } +/* STM32MP25xx (dwmac v5.3) states "Do not enable time-based scheduling for + * channels on which the TSO feature is enabled." If we have a skb for a + * channel which has TBS enabled, fall back to software GSO. + */ +static bool stmmac_tso_channel_permitted(struct stmmac_priv *priv, + unsigned int chan) +{ + /* TSO and TBS cannot co-exist */ + return !(priv->dma_conf.tx_queue[chan].tbs & STMMAC_TBS_AVAIL); +} + /** * stmmac_hw_setup - setup mac in a usable state. * @dev : pointer to the device structure. @@ -3707,10 +3718,7 @@ static int stmmac_hw_setup(struct net_device *dev) /* Enable TSO */ if (priv->dma_cap.tsoen && priv->plat->flags & STMMAC_FLAG_TSO_EN) { for (chan = 0; chan < tx_cnt; chan++) { - struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan]; - - /* TSO and TBS cannot co-exist */ - if (tx_q->tbs & STMMAC_TBS_AVAIL) + if (!stmmac_tso_channel_permitted(priv, chan)) continue; stmmac_enable_tso(priv, priv->ioaddr, 1, chan); @@ -4919,6 +4927,21 @@ max_sdu_err: return NETDEV_TX_OK; } +static netdev_features_t stmmac_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + u16 queue; + + if (skb_is_gso(skb)) { + queue = skb_get_queue_mapping(skb); + if (!stmmac_tso_channel_permitted(netdev_priv(dev), queue)) + features &= ~NETIF_F_GSO_MASK; + } + + return vlan_features_check(skb, features); +} + static void stmmac_rx_vlan(struct net_device *dev, struct sk_buff *skb) { struct vlan_ethhdr *veth = skb_vlan_eth_hdr(skb); @@ -7208,6 +7231,7 @@ static void stmmac_get_stats64(struct net_device *dev, struct rtnl_link_stats64 static const struct net_device_ops stmmac_netdev_ops = { .ndo_open = stmmac_open, .ndo_start_xmit = stmmac_xmit, + .ndo_features_check = stmmac_features_check, .ndo_stop = stmmac_release, .ndo_change_mtu = stmmac_change_mtu, .ndo_fix_features = stmmac_fix_features, From f799b5dab9c98a4b52a93d1c357916e0f0b7e663 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 1 Apr 2026 08:21:29 +0100 Subject: [PATCH 1117/1561] net: stmmac: add stmmac_tso_header_size() We will need to compute the size of the protocol headers in two places, so move this into a separate function. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w7ptJ-0000000Eat5-2ZlA@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 729072622fcb..3e7675cfd57e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4373,6 +4373,18 @@ static void stmmac_flush_tx_descriptors(struct stmmac_priv *priv, int queue) stmmac_set_queue_tx_tail_ptr(priv, tx_q, queue, tx_q->cur_tx); } +static size_t stmmac_tso_header_size(struct sk_buff *skb) +{ + size_t size; + + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) + size = skb_transport_offset(skb) + sizeof(struct udphdr); + else + size = skb_tcp_all_headers(skb); + + return size; +} + /** * stmmac_tso_xmit - Tx entry point of the driver for oversized frames (TSO) * @skb : the socket buffer @@ -4444,13 +4456,11 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) first_tx = tx_q->cur_tx; /* Compute header lengths */ - if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { - proto_hdr_len = skb_transport_offset(skb) + sizeof(struct udphdr); + proto_hdr_len = stmmac_tso_header_size(skb); + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) hdr = sizeof(struct udphdr); - } else { - proto_hdr_len = skb_tcp_all_headers(skb); + else hdr = tcp_hdrlen(skb); - } /* Desc availability based on threshold should be enough safe */ if (unlikely(stmmac_tx_avail(priv, queue) < From 6732e474f880468435a79d05811c8c814c5d71f8 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 1 Apr 2026 08:21:34 +0100 Subject: [PATCH 1118/1561] net: stmmac: add TSO check for header length According to the STM32MP151 documentation which covers dwmac v4.2, the hardware TSO feature can handle header lengths up to a maximum of 1023 bytes. Add a .ndo_features_check() method implementation to check the header length meets these requirements, otherwise fall back to software GSO. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w7ptO-0000000EatC-39il@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 3e7675cfd57e..c23de6622724 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4385,6 +4385,20 @@ static size_t stmmac_tso_header_size(struct sk_buff *skb) return size; } +/* STM32MP151 (dwmac v4.2) and STM32MP25xx (dwmac v5.3) states for TDES2 normal + * (read format) descriptor that the maximum header length supported for the + * TSO feature is 1023 bytes. + * + * While IPv4 is limited to MAC+VLAN+IPv4+ext+TCP+ext = 138 bytes, the IPv6 + * extension headers aren't similarly limited. + */ +static bool stmmac_tso_valid_packet(struct sk_buff *skb) +{ + size_t header_len = stmmac_tso_header_size(skb); + + return header_len <= 1023; +} + /** * stmmac_tso_xmit - Tx entry point of the driver for oversized frames (TSO) * @skb : the socket buffer @@ -4945,7 +4959,8 @@ static netdev_features_t stmmac_features_check(struct sk_buff *skb, if (skb_is_gso(skb)) { queue = skb_get_queue_mapping(skb); - if (!stmmac_tso_channel_permitted(netdev_priv(dev), queue)) + if (!stmmac_tso_channel_permitted(netdev_priv(dev), queue) || + !stmmac_tso_valid_packet(skb)) features &= ~NETIF_F_GSO_MASK; } From c05a81cbee87e511b823a6f64f9315aef41eab57 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 1 Apr 2026 08:21:39 +0100 Subject: [PATCH 1119/1561] net: stmmac: add GSO MSS checks Add GSO MSS checks to stmmac_features_check(). Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w7ptT-0000000EatI-3feh@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index c23de6622724..e6dff31188e6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4391,12 +4391,19 @@ static size_t stmmac_tso_header_size(struct sk_buff *skb) * * While IPv4 is limited to MAC+VLAN+IPv4+ext+TCP+ext = 138 bytes, the IPv6 * extension headers aren't similarly limited. + * + * Fall back to software GSO for these skbs. Also check that the MSS is >= + * the recommended 64 bytes (documented in ETH_DMACxCR register description), + * and that a the header plus MSS is not larger than 16383 (documented in + * "Building the Descriptor and the packet for the TSO feature"). */ static bool stmmac_tso_valid_packet(struct sk_buff *skb) { size_t header_len = stmmac_tso_header_size(skb); + unsigned int gso_size = skb_shinfo(skb)->gso_size; - return header_len <= 1023; + return header_len <= 1023 && gso_size >= 64 && + header_len + gso_size < 16383; } /** From 3f6a6eb9ef21b2eb30b3a3ea096759b37368b257 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 1 Apr 2026 08:21:44 +0100 Subject: [PATCH 1120/1561] net: stmmac: move TSO VLAN tag insertion to core code stmmac_tso_xmit() checks whether the skbuff is trying to offload vlan tag insertion to hardware, which from the comment in the code appears to be buggy when the TSO feature is used. Rather than stmmac_tso_xmit() inserting the VLAN tag, handle this in stmmac_features_check() which will then use core net code to handle this. See net/core/dev.c::validate_xmit_skb() Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w7ptY-0000000EatO-42Qv@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e6dff31188e6..ce6c3cc71bf0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4456,19 +4456,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) u8 proto_hdr_len, hdr; dma_addr_t des; - /* Always insert VLAN tag to SKB payload for TSO frames. - * - * Never insert VLAN tag by HW, since segments split by - * TSO engine will be un-tagged by mistake. - */ - if (skb_vlan_tag_present(skb)) { - skb = __vlan_hwaccel_push_inside(skb); - if (unlikely(!skb)) { - priv->xstats.tx_dropped++; - return NETDEV_TX_OK; - } - } - nfrags = skb_shinfo(skb)->nr_frags; queue = skb_get_queue_mapping(skb); @@ -4969,6 +4956,16 @@ static netdev_features_t stmmac_features_check(struct sk_buff *skb, if (!stmmac_tso_channel_permitted(netdev_priv(dev), queue) || !stmmac_tso_valid_packet(skb)) features &= ~NETIF_F_GSO_MASK; + + /* If we are going to be using hardware TSO, always insert + * VLAN tag to SKB payload for TSO frames. + * + * Never insert VLAN tag by HW, since segments split by + * TSO engine will be un-tagged by mistake. + */ + if (features & NETIF_F_GSO_MASK) + features &= ~(NETIF_F_HW_VLAN_STAG_TX | + NETIF_F_HW_VLAN_CTAG_TX); } return vlan_features_check(skb, features); From b55dfb173ce808ecc8c598f2c378a9b04c5ce241 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 1 Apr 2026 08:21:50 +0100 Subject: [PATCH 1121/1561] net: stmmac: move check for hardware checksum supported Add a check in .ndo_features_check() to indicate whether hardware checksum can be performed on the skbuff. Where hardware checksum is not supported - either because the channel does not support Tx COE or the skb isn't suitable (stmmac uses a tighter test than can_checksum_protocol()) we also need to disable TSO, which will be done by harmonize_features() in net/core/dev.c This fixes a bug where a channel which has COE disabled may still receive TSO skbuffs. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w7pte-0000000EatU-0ILt@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ce6c3cc71bf0..e88107a0baf2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4757,22 +4757,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) /* Check if VLAN can be inserted by HW */ has_vlan = stmmac_vlan_insert(priv, skb, tx_q); - csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL); - /* DWMAC IPs can be synthesized to support tx coe only for a few tx - * queues. In that case, checksum offloading for those queues that don't - * support tx coe needs to fallback to software checksum calculation. - * - * Packets that won't trigger the COE e.g. most DSA-tagged packets will - * also have to be checksummed in software. - */ - if (csum_insertion && - (priv->plat->tx_queues_cfg[queue].coe_unsupported || - !stmmac_has_ip_ethertype(skb))) { - if (unlikely(skb_checksum_help(skb))) - goto dma_map_err; - csum_insertion = !csum_insertion; - } - entry = tx_q->cur_tx; first_entry = entry; WARN_ON(tx_q->tx_skbuff[first_entry]); @@ -4788,6 +4772,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) if (enh_desc) is_jumbo = stmmac_is_jumbo_frm(priv, skb->len, enh_desc); + csum_insertion = skb->ip_summed == CHECKSUM_PARTIAL; + if (unlikely(is_jumbo)) { entry = stmmac_jumbo_frm(priv, tx_q, skb, csum_insertion); if (unlikely(entry < 0) && (entry != -EINVAL)) @@ -4949,11 +4935,25 @@ static netdev_features_t stmmac_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { - u16 queue; + struct stmmac_priv *priv = netdev_priv(dev); + u16 queue = skb_get_queue_mapping(skb); + + /* DWMAC IPs can be synthesized to support tx coe only for a few tx + * queues. In that case, checksum offloading for those queues that don't + * support tx coe needs to fallback to software checksum calculation. + * + * Packets that won't trigger the COE e.g. most DSA-tagged packets will + * also have to be checksummed in software. + * + * Note that disabling hardware checksumming also disables TSO. See + * harmonize_features() in net/core/dev.c + */ + if (priv->plat->tx_queues_cfg[queue].coe_unsupported || + !stmmac_has_ip_ethertype(skb)) + features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); if (skb_is_gso(skb)) { - queue = skb_get_queue_mapping(skb); - if (!stmmac_tso_channel_permitted(netdev_priv(dev), queue) || + if (!stmmac_tso_channel_permitted(priv, queue) || !stmmac_tso_valid_packet(skb)) features &= ~NETIF_F_GSO_MASK; From 2e4082e4b739191d71e03fe6c55cec68d36f67fe Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 1 Apr 2026 08:21:55 +0100 Subject: [PATCH 1122/1561] net: stmmac: simplify GSO/TSO test in stmmac_xmit() The test in stmmac_xmit() to see whether we should pass the skbuff to stmmac_tso_xmit() is more complex than it needs to be. This test can be simplified by storing the mask of GSO types that we will pass, and setting it according to the enabled features. Note that "tso" is a mis-nomer since commit b776620651a1 ("net: stmmac: Implement UDP Segmentation Offload"). Also note that this commit controls both via the TSO feature. We preserve this behaviour in this commit. Also, this commit unconditionally accessed skb_shinfo(skb)->gso_type for all frames, even when skb_is_gso() was false. This access is eliminated. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w7ptj-0000000Eatb-11zK@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 3 +- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 28 +++++++++++-------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 919a93a52390..8ba8f03e1ce0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -265,8 +265,9 @@ struct stmmac_priv { u32 rx_coal_frames[MTL_MAX_RX_QUEUES]; int hwts_tx_en; + /* skb_shinfo(skb)->gso_type types that we handle */ + unsigned int gso_enabled_types; bool tx_path_in_lpi_mode; - bool tso; bool sph_active; bool sph_capable; u32 sarc_type; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e88107a0baf2..72180c749add 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4373,6 +4373,18 @@ static void stmmac_flush_tx_descriptors(struct stmmac_priv *priv, int queue) stmmac_set_queue_tx_tail_ptr(priv, tx_q, queue, tx_q->cur_tx); } +static void stmmac_set_gso_types(struct stmmac_priv *priv, bool tso) +{ + if (!tso) { + priv->gso_enabled_types = 0; + } else { + /* Manage oversized TCP frames for GMAC4 device */ + priv->gso_enabled_types = SKB_GSO_TCPV4 | SKB_GSO_TCPV6; + if (priv->plat->core_type == DWMAC_CORE_GMAC4) + priv->gso_enabled_types |= SKB_GSO_UDP_L4; + } +} + static size_t stmmac_tso_header_size(struct sk_buff *skb) { size_t size; @@ -4706,7 +4718,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) u32 queue = skb_get_queue_mapping(skb); int nfrags = skb_shinfo(skb)->nr_frags; unsigned int first_entry, tx_packets; - int gso = skb_shinfo(skb)->gso_type; struct stmmac_txq_stats *txq_stats; struct dma_desc *desc, *first_desc; struct stmmac_tx_queue *tx_q; @@ -4718,14 +4729,9 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) if (priv->tx_path_in_lpi_mode && priv->eee_sw_timer_en) stmmac_stop_sw_lpi(priv); - /* Manage oversized TCP frames for GMAC4 device */ - if (skb_is_gso(skb) && priv->tso) { - if (gso & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) - return stmmac_tso_xmit(skb, dev); - if (priv->plat->core_type == DWMAC_CORE_GMAC4 && - (gso & SKB_GSO_UDP_L4)) - return stmmac_tso_xmit(skb, dev); - } + if (skb_is_gso(skb) && + skb_shinfo(skb)->gso_type & priv->gso_enabled_types) + return stmmac_tso_xmit(skb, dev); if (priv->est && priv->est->enable && priv->est->max_sdu[queue]) { @@ -6151,7 +6157,7 @@ static int stmmac_set_features(struct net_device *netdev, stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan); } - priv->tso = !!(features & NETIF_F_TSO); + stmmac_set_gso_types(priv, features & NETIF_F_TSO); if (features & NETIF_F_HW_VLAN_CTAG_RX) priv->hw->hw_vlan_en = true; @@ -7880,7 +7886,7 @@ static int __stmmac_dvr_probe(struct device *device, ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; if (priv->plat->core_type == DWMAC_CORE_GMAC4) ndev->hw_features |= NETIF_F_GSO_UDP_L4; - priv->tso = true; + stmmac_set_gso_types(priv, true); dev_info(priv->device, "TSO feature enabled\n"); } From c04939cb9851a1f024f03784dec71cd7a7ea4004 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 1 Apr 2026 08:22:00 +0100 Subject: [PATCH 1123/1561] net: stmmac: split out gso features setup Move the GSO features setup into a separate function, co-loated with other GSO/TSO support. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w7pto-0000000Eath-1VDH@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 72180c749add..7f8335b69301 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4385,6 +4385,19 @@ static void stmmac_set_gso_types(struct stmmac_priv *priv, bool tso) } } +static void stmmac_set_gso_features(struct net_device *ndev) +{ + struct stmmac_priv *priv = netdev_priv(ndev); + + if ((priv->plat->flags & STMMAC_FLAG_TSO_EN) && (priv->dma_cap.tsoen)) { + ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; + if (priv->plat->core_type == DWMAC_CORE_GMAC4) + ndev->hw_features |= NETIF_F_GSO_UDP_L4; + stmmac_set_gso_types(priv, true); + dev_info(priv->device, "TSO feature enabled\n"); + } +} + static size_t stmmac_tso_header_size(struct sk_buff *skb) { size_t size; @@ -7882,13 +7895,7 @@ static int __stmmac_dvr_probe(struct device *device, ndev->hw_features |= NETIF_F_HW_TC; } - if ((priv->plat->flags & STMMAC_FLAG_TSO_EN) && (priv->dma_cap.tsoen)) { - ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; - if (priv->plat->core_type == DWMAC_CORE_GMAC4) - ndev->hw_features |= NETIF_F_GSO_UDP_L4; - stmmac_set_gso_types(priv, true); - dev_info(priv->device, "TSO feature enabled\n"); - } + stmmac_set_gso_features(ndev); if (priv->dma_cap.sphen && !(priv->plat->flags & STMMAC_FLAG_SPH_DISABLE)) { From 6ad0044428973f1d5e311c77e2e4f54192a37248 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 1 Apr 2026 08:22:05 +0100 Subject: [PATCH 1124/1561] net: stmmac: make stmmac_set_gso_features() more readable Make stmmac_set_gso_features() more readable by adding some whitespace and getting rid of the indentation. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w7ptt-0000000Eatn-1ziK@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 7f8335b69301..c9cb81ae812e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4389,13 +4389,19 @@ static void stmmac_set_gso_features(struct net_device *ndev) { struct stmmac_priv *priv = netdev_priv(ndev); - if ((priv->plat->flags & STMMAC_FLAG_TSO_EN) && (priv->dma_cap.tsoen)) { - ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; - if (priv->plat->core_type == DWMAC_CORE_GMAC4) - ndev->hw_features |= NETIF_F_GSO_UDP_L4; - stmmac_set_gso_types(priv, true); - dev_info(priv->device, "TSO feature enabled\n"); - } + if (!(priv->plat->flags & STMMAC_FLAG_TSO_EN)) + return; + + if (!priv->dma_cap.tsoen) + return; + + ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; + if (priv->plat->core_type == DWMAC_CORE_GMAC4) + ndev->hw_features |= NETIF_F_GSO_UDP_L4; + + stmmac_set_gso_types(priv, true); + + dev_info(priv->device, "TSO feature enabled\n"); } static size_t stmmac_tso_header_size(struct sk_buff *skb) From f8c70ab540c1a15e281baa10eec270396aa04d1a Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 1 Apr 2026 08:22:10 +0100 Subject: [PATCH 1125/1561] net: stmmac: add warning when TSO is requested but unsupported Add a warning message if TSO is requested by the platform glue code but the core wasn't configured for TSO. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w7pty-0000000Eatt-2TjZ@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index c9cb81ae812e..3c416fe79c15 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4392,8 +4392,10 @@ static void stmmac_set_gso_features(struct net_device *ndev) if (!(priv->plat->flags & STMMAC_FLAG_TSO_EN)) return; - if (!priv->dma_cap.tsoen) + if (!priv->dma_cap.tsoen) { + dev_warn(priv->device, "platform requests unsupported TSO\n"); return; + } ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; if (priv->plat->core_type == DWMAC_CORE_GMAC4) From 33f5cc83bbbdf73bd0c145403b30b49eec4b0311 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 1 Apr 2026 08:22:15 +0100 Subject: [PATCH 1126/1561] net: stmmac: check txpbl for TSO Documentation states that TxPBL must be >= 4 to allow TSO support, but the driver doesn't check this. TxPBL comes from the platform glue code or DT. Add a check with a warning if platform glue code attempts to enable TSO support with TxPBL too low. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w7pu3-0000000Eatz-39ts@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 3c416fe79c15..350efbd9d85b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4388,6 +4388,8 @@ static void stmmac_set_gso_types(struct stmmac_priv *priv, bool tso) static void stmmac_set_gso_features(struct net_device *ndev) { struct stmmac_priv *priv = netdev_priv(ndev); + const struct stmmac_dma_cfg *dma_cfg; + int txpbl; if (!(priv->plat->flags & STMMAC_FLAG_TSO_EN)) return; @@ -4397,6 +4399,18 @@ static void stmmac_set_gso_features(struct net_device *ndev) return; } + /* FIXME: + * STM32MP151 (v4.2 userver v4.0) states that TxPBL must be >= 4. It + * is not clear whether PBLx8 (which multiplies the PBL value by 8) + * influences this. + */ + dma_cfg = priv->plat->dma_cfg; + txpbl = dma_cfg->txpbl ?: dma_cfg->pbl; + if (txpbl < 4) { + dev_warn(priv->device, "txpbl(%d) is too low for TSO\n", txpbl); + return; + } + ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; if (priv->plat->core_type == DWMAC_CORE_GMAC4) ndev->hw_features |= NETIF_F_GSO_UDP_L4; From 0f96212a5142cdacdc4133024f0e82626a23209c Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 1 Apr 2026 08:22:20 +0100 Subject: [PATCH 1127/1561] net: stmmac: move "TSO supported" message to stmmac_set_gso_features() Move the "TSO supported" message to stmmac_set_gso_features() so that we group all probe-time TSO stuff in one place. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w7pu8-0000000Eau5-3Zne@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 350efbd9d85b..01a983001ab4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4391,6 +4391,9 @@ static void stmmac_set_gso_features(struct net_device *ndev) const struct stmmac_dma_cfg *dma_cfg; int txpbl; + if (priv->dma_cap.tsoen) + dev_info(priv->device, "TSO supported\n"); + if (!(priv->plat->flags & STMMAC_FLAG_TSO_EN)) return; @@ -7463,9 +7466,6 @@ static int stmmac_hw_init(struct stmmac_priv *priv) devm_pm_set_wake_irq(priv->device, priv->wol_irq); } - if (priv->dma_cap.tsoen) - dev_info(priv->device, "TSO supported\n"); - if (priv->dma_cap.number_rx_queues && priv->plat->rx_queues_to_use > priv->dma_cap.number_rx_queues) { dev_warn(priv->device, From 1ef5789d9906df3771c99b7f413caaf2bf473ca5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 Apr 2026 10:38:08 +0000 Subject: [PATCH 1128/1561] macvlan: annotate data-races around port->bc_queue_len_used port->bc_queue_len_used is read and written locklessly, add READ_ONCE()/WRITE_ONCE() annotations. While WRITE_ONCE() in macvlan_fill_info() is not yet needed, it is a prereq for future RTNL avoidance. Fixes: d4bff72c8401 ("macvlan: Support for high multicast packet rate") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260401103809.3038139-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/macvlan.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index ea22909cb09d..bbb5c32541f9 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -356,6 +356,7 @@ static void macvlan_broadcast_enqueue(struct macvlan_port *port, const struct macvlan_dev *src, struct sk_buff *skb) { + u32 bc_queue_len_used = READ_ONCE(port->bc_queue_len_used); struct sk_buff *nskb; int err = -ENOMEM; @@ -366,7 +367,7 @@ static void macvlan_broadcast_enqueue(struct macvlan_port *port, MACVLAN_SKB_CB(nskb)->src = src; spin_lock(&port->bc_queue.lock); - if (skb_queue_len(&port->bc_queue) < port->bc_queue_len_used) { + if (skb_queue_len(&port->bc_queue) < bc_queue_len_used) { if (src) dev_hold(src->dev); __skb_queue_tail(&port->bc_queue, nskb); @@ -1731,7 +1732,8 @@ static int macvlan_fill_info(struct sk_buff *skb, } if (nla_put_u32(skb, IFLA_MACVLAN_BC_QUEUE_LEN, vlan->bc_queue_len_req)) goto nla_put_failure; - if (nla_put_u32(skb, IFLA_MACVLAN_BC_QUEUE_LEN_USED, port->bc_queue_len_used)) + if (nla_put_u32(skb, IFLA_MACVLAN_BC_QUEUE_LEN_USED, + READ_ONCE(port->bc_queue_len_used))) goto nla_put_failure; if (port->bc_cutoff != 1 && nla_put_s32(skb, IFLA_MACVLAN_BC_CUTOFF, port->bc_cutoff)) @@ -1791,7 +1793,7 @@ static void update_port_bc_queue_len(struct macvlan_port *port) if (vlan->bc_queue_len_req > max_bc_queue_len_req) max_bc_queue_len_req = vlan->bc_queue_len_req; } - port->bc_queue_len_used = max_bc_queue_len_req; + WRITE_ONCE(port->bc_queue_len_used, max_bc_queue_len_req); } static int macvlan_device_event(struct notifier_block *unused, From 0d5dc1d7aad144b6c561e72e96f3107d812c6026 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 Apr 2026 10:38:09 +0000 Subject: [PATCH 1129/1561] macvlan: avoid spinlock contention in macvlan_broadcast_enqueue() Under high stress, we spend a lot of time cloning skbs, then acquiring a spinlock, then freeing the clone because the queue is full. Add a shortcut to avoid these costs under pressure. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260401103809.3038139-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/macvlan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index bbb5c32541f9..54c514acacc5 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -360,6 +360,9 @@ static void macvlan_broadcast_enqueue(struct macvlan_port *port, struct sk_buff *nskb; int err = -ENOMEM; + if (skb_queue_len_lockless(&port->bc_queue) >= bc_queue_len_used) + goto err; + nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) goto err; From 7eaff1eff003dc9c5881edc37741795a5dfc5ff8 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 1 Apr 2026 12:49:31 +0100 Subject: [PATCH 1130/1561] net: phy: bcm84881: add LED framework support for BCM84891/BCM84892 Expose LED1 and LED2 pins via the PHY LED framework. Each pin has a source mask (MASK_LOW + MASK_EXT registers) selecting which hardware events light it, plus a CTL field in the shared 0xA83B register (RMW; LED4 is firmware-controlled per the datasheet). Hardware can offload per-speed link triggers (1000/2500/5000/10000), RX/TX activity, and force-on. LINK_100 is accepted only alongside LINK_1000: source bit 4 lights at both speeds and 100-alone isn't representable, so the unrepresentable case falls to software. The chip has five LED pins; only LED1/LED2 are exposed here as those are the only ones characterized on tested hardware. LED4 is firmware- controlled regardless of strap configuration. Tested on TRENDnet TEG-S750 (LED1/LED2 wired to an antiparallel bicolor LED): brightness_set via sysfs; netdev trigger offloaded=1 with amber lit at 100M/1G/2.5G and green lit at 10G via respective link_* modes; LED off immediately on cable unplug with no software involvement. Signed-off-by: Daniel Wagner Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20260401114931.3091818-1-wagner.daniel.t@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/bcm84881.c | 156 +++++++++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) diff --git a/drivers/net/phy/bcm84881.c b/drivers/net/phy/bcm84881.c index f114212dd303..2ae70dcf82ec 100644 --- a/drivers/net/phy/bcm84881.c +++ b/drivers/net/phy/bcm84881.c @@ -20,6 +20,33 @@ enum { MDIO_AN_C22 = 0xffe0, }; +/* BCM8489x LED controller (BCM84891L datasheet 2.4.1.58). Each pin has + * CTL bits in 0xA83B (stride 3: 2-bit CTL + 1-bit OE_N) plus MASK_LOW/ + * MASK_EXT source selects. LED4 is firmware-controlled; always RMW. + */ +#define BCM8489X_LED_CTL 0xa83b +#define BCM8489X_LED_CTL_ON(i) (0x2 << ((i) * 3)) +#define BCM8489X_LED_CTL_MASK(i) (0x3 << ((i) * 3)) + +#define BCM8489X_LED_SRC_RX BIT(1) +#define BCM8489X_LED_SRC_TX BIT(2) +#define BCM8489X_LED_SRC_1000 BIT(3) /* high only at 1000 */ +#define BCM8489X_LED_SRC_100_1000 BIT(4) /* high at 100 and 1000 */ +#define BCM8489X_LED_SRC_FORCE BIT(5) /* always-1 source */ +#define BCM8489X_LED_SRC_10G BIT(7) +#define BCM8489X_LED_SRCX_2500 BIT(2) +#define BCM8489X_LED_SRCX_5000 BIT(3) + +#define BCM8489X_MAX_LEDS 2 + +static const struct { + u16 mask_low; + u16 mask_ext; +} bcm8489x_led_regs[BCM8489X_MAX_LEDS] = { + { 0xa82c, 0xa8ef }, /* LED1 */ + { 0xa82f, 0xa8f0 }, /* LED2 */ +}; + static int bcm84881_wait_init(struct phy_device *phydev) { int val; @@ -69,6 +96,127 @@ static int bcm8489x_config_init(struct phy_device *phydev) MDIO_CTRL1_LPOWER); } +static int bcm8489x_led_write(struct phy_device *phydev, u8 index, + u16 low, u16 ext) +{ + int ret; + + ret = phy_write_mmd(phydev, MDIO_MMD_PMAPMD, + bcm8489x_led_regs[index].mask_low, low); + if (ret) + return ret; + ret = phy_write_mmd(phydev, MDIO_MMD_PMAPMD, + bcm8489x_led_regs[index].mask_ext, ext); + if (ret) + return ret; + return phy_modify_mmd(phydev, MDIO_MMD_PMAPMD, BCM8489X_LED_CTL, + BCM8489X_LED_CTL_MASK(index), + (low | ext) ? BCM8489X_LED_CTL_ON(index) : 0); +} + +static int bcm8489x_led_brightness_set(struct phy_device *phydev, + u8 index, enum led_brightness value) +{ + if (index >= BCM8489X_MAX_LEDS) + return -EINVAL; + + return bcm8489x_led_write(phydev, index, + value ? BCM8489X_LED_SRC_FORCE : 0, 0); +} + +static const unsigned long bcm8489x_supported_triggers = + BIT(TRIGGER_NETDEV_LINK) | + BIT(TRIGGER_NETDEV_LINK_100) | + BIT(TRIGGER_NETDEV_LINK_1000) | + BIT(TRIGGER_NETDEV_LINK_2500) | + BIT(TRIGGER_NETDEV_LINK_5000) | + BIT(TRIGGER_NETDEV_LINK_10000) | + BIT(TRIGGER_NETDEV_RX) | + BIT(TRIGGER_NETDEV_TX); + +static int bcm8489x_led_hw_is_supported(struct phy_device *phydev, u8 index, + unsigned long rules) +{ + if (index >= BCM8489X_MAX_LEDS) + return -EINVAL; + + if (rules & ~bcm8489x_supported_triggers) + return -EOPNOTSUPP; + + /* Source bit 4 lights at both 100 and 1000; "100 only" isn't + * representable in hardware. Accept LINK_100 only alongside + * LINK_1000 or LINK so the offload is precise. + */ + if ((rules & BIT(TRIGGER_NETDEV_LINK_100)) && + !(rules & (BIT(TRIGGER_NETDEV_LINK_1000) | + BIT(TRIGGER_NETDEV_LINK)))) + return -EOPNOTSUPP; + + return 0; +} + +static int bcm8489x_led_hw_control_set(struct phy_device *phydev, u8 index, + unsigned long rules) +{ + u16 low = 0, ext = 0; + + if (index >= BCM8489X_MAX_LEDS) + return -EINVAL; + + if (rules & (BIT(TRIGGER_NETDEV_LINK_100) | BIT(TRIGGER_NETDEV_LINK))) + low |= BCM8489X_LED_SRC_100_1000; + if (rules & (BIT(TRIGGER_NETDEV_LINK_1000) | BIT(TRIGGER_NETDEV_LINK))) + low |= BCM8489X_LED_SRC_1000; + if (rules & (BIT(TRIGGER_NETDEV_LINK_2500) | BIT(TRIGGER_NETDEV_LINK))) + ext |= BCM8489X_LED_SRCX_2500; + if (rules & (BIT(TRIGGER_NETDEV_LINK_5000) | BIT(TRIGGER_NETDEV_LINK))) + ext |= BCM8489X_LED_SRCX_5000; + if (rules & (BIT(TRIGGER_NETDEV_LINK_10000) | BIT(TRIGGER_NETDEV_LINK))) + low |= BCM8489X_LED_SRC_10G; + if (rules & BIT(TRIGGER_NETDEV_RX)) + low |= BCM8489X_LED_SRC_RX; + if (rules & BIT(TRIGGER_NETDEV_TX)) + low |= BCM8489X_LED_SRC_TX; + + return bcm8489x_led_write(phydev, index, low, ext); +} + +static int bcm8489x_led_hw_control_get(struct phy_device *phydev, u8 index, + unsigned long *rules) +{ + int low, ext; + + if (index >= BCM8489X_MAX_LEDS) + return -EINVAL; + + low = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, + bcm8489x_led_regs[index].mask_low); + if (low < 0) + return low; + ext = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, + bcm8489x_led_regs[index].mask_ext); + if (ext < 0) + return ext; + + *rules = 0; + if (low & BCM8489X_LED_SRC_100_1000) + *rules |= BIT(TRIGGER_NETDEV_LINK_100); + if (low & BCM8489X_LED_SRC_1000) + *rules |= BIT(TRIGGER_NETDEV_LINK_1000); + if (ext & BCM8489X_LED_SRCX_2500) + *rules |= BIT(TRIGGER_NETDEV_LINK_2500); + if (ext & BCM8489X_LED_SRCX_5000) + *rules |= BIT(TRIGGER_NETDEV_LINK_5000); + if (low & BCM8489X_LED_SRC_10G) + *rules |= BIT(TRIGGER_NETDEV_LINK_10000); + if (low & BCM8489X_LED_SRC_RX) + *rules |= BIT(TRIGGER_NETDEV_RX); + if (low & BCM8489X_LED_SRC_TX) + *rules |= BIT(TRIGGER_NETDEV_TX); + + return 0; +} + static int bcm84881_probe(struct phy_device *phydev) { /* This driver requires PMAPMD and AN blocks */ @@ -290,6 +438,10 @@ static struct phy_driver bcm84881_drivers[] = { .config_aneg = bcm84881_config_aneg, .aneg_done = bcm84881_aneg_done, .read_status = bcm84881_read_status, + .led_brightness_set = bcm8489x_led_brightness_set, + .led_hw_is_supported = bcm8489x_led_hw_is_supported, + .led_hw_control_set = bcm8489x_led_hw_control_set, + .led_hw_control_get = bcm8489x_led_hw_control_get, }, { PHY_ID_MATCH_MODEL(0x359050a0), .name = "Broadcom BCM84892", @@ -300,6 +452,10 @@ static struct phy_driver bcm84881_drivers[] = { .config_aneg = bcm84881_config_aneg, .aneg_done = bcm84881_aneg_done, .read_status = bcm84881_read_status, + .led_brightness_set = bcm8489x_led_brightness_set, + .led_hw_is_supported = bcm8489x_led_hw_is_supported, + .led_hw_control_set = bcm8489x_led_hw_control_set, + .led_hw_control_get = bcm8489x_led_hw_control_get, }, }; From 86f5dd4e0ff282a0acf1f058e947fd5f4ba58a9d Mon Sep 17 00:00:00 2001 From: Chih Kai Hsu Date: Wed, 1 Apr 2026 19:55:41 +0800 Subject: [PATCH 1131/1561] r8152: Add helper functions for SRAM2 Add the following helper functions for SRAM2 access to simplify the code and improve readability: - sram2_write() - write data to SRAM2 address - sram2_read() - read data from SRAM2 address - sram2_write_w0w1() - read-modify-write operation Signed-off-by: Chih Kai Hsu Reviewed-by: Hayes Wang Link: https://patch.msgid.link/20260401115542.34601-1-nic_swsd@realtek.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8152.c | 176 +++++++++++++++++----------------------- 1 file changed, 74 insertions(+), 102 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 8747c55e0a48..1765da5bd6cf 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -213,6 +213,8 @@ #define OCP_PHY_PATCH_STAT 0xb800 #define OCP_PHY_PATCH_CMD 0xb820 #define OCP_PHY_LOCK 0xb82e +#define OCP_SRAM2_ADDR 0xb87c +#define OCP_SRAM2_DATA 0xb87e #define OCP_ADC_IOFFSET 0xbcfc #define OCP_ADC_CFG 0xbc06 #define OCP_SYSCLK_CFG 0xc416 @@ -1764,6 +1766,27 @@ static void sram_set_bits(struct r8152 *tp, u16 addr, u16 set) sram_write_w0w1(tp, addr, 0, set); } +static void sram2_write(struct r8152 *tp, u16 addr, u16 data) +{ + ocp_reg_write(tp, OCP_SRAM2_ADDR, addr); + ocp_reg_write(tp, OCP_SRAM2_DATA, data); +} + +static u16 sram2_read(struct r8152 *tp, u16 addr) +{ + ocp_reg_write(tp, OCP_SRAM2_ADDR, addr); + return ocp_reg_read(tp, OCP_SRAM2_DATA); +} + +static void sram2_write_w0w1(struct r8152 *tp, u16 addr, u16 clear, u16 set) +{ + u16 data; + + data = sram2_read(tp, addr); + data = (data & ~clear) | set; + ocp_reg_write(tp, OCP_SRAM2_DATA, data); +} + static void r8152_mdio_clr_bit(struct r8152 *tp, u16 addr, u16 clear) { int data; @@ -7195,16 +7218,12 @@ static void r8156_hw_phy_cfg(struct r8152 *tp) ocp_reg_write(tp, 0xad4c, 0x00a8); ocp_reg_write(tp, 0xac5c, 0x01ff); ocp_reg_w0w1(tp, 0xac8a, 0xf0, BIT(4) | BIT(5)); - ocp_reg_write(tp, 0xb87c, 0x8157); - ocp_reg_w0w1(tp, 0xb87e, 0xff00, 0x0500); - ocp_reg_write(tp, 0xb87c, 0x8159); - ocp_reg_w0w1(tp, 0xb87e, 0xff00, 0x0700); + sram2_write_w0w1(tp, 0x8157, 0xff00, 0x0500); + sram2_write_w0w1(tp, 0x8159, 0xff00, 0x0700); /* AAGC */ - ocp_reg_write(tp, 0xb87c, 0x80a2); - ocp_reg_write(tp, 0xb87e, 0x0153); - ocp_reg_write(tp, 0xb87c, 0x809c); - ocp_reg_write(tp, 0xb87e, 0x0153); + sram2_write(tp, 0x80a2, 0x0153); + sram2_write(tp, 0x809c, 0x0153); /* EEE parameter */ ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_TXTWSYS_2P5G, 0x0056); @@ -7402,82 +7421,48 @@ static void r8156b_hw_phy_cfg(struct r8152 *tp) ocp_reg_write(tp, 0xacc8, 0xa0d3); ocp_reg_write(tp, 0xad08, 0x0007); - ocp_reg_write(tp, 0xb87c, 0x8560); - ocp_reg_write(tp, 0xb87e, 0x19cc); - ocp_reg_write(tp, 0xb87c, 0x8562); - ocp_reg_write(tp, 0xb87e, 0x19cc); - ocp_reg_write(tp, 0xb87c, 0x8564); - ocp_reg_write(tp, 0xb87e, 0x19cc); - ocp_reg_write(tp, 0xb87c, 0x8566); - ocp_reg_write(tp, 0xb87e, 0x147d); - ocp_reg_write(tp, 0xb87c, 0x8568); - ocp_reg_write(tp, 0xb87e, 0x147d); - ocp_reg_write(tp, 0xb87c, 0x856a); - ocp_reg_write(tp, 0xb87e, 0x147d); - ocp_reg_write(tp, 0xb87c, 0x8ffe); - ocp_reg_write(tp, 0xb87e, 0x0907); - ocp_reg_write(tp, 0xb87c, 0x80d6); - ocp_reg_write(tp, 0xb87e, 0x2801); - ocp_reg_write(tp, 0xb87c, 0x80f2); - ocp_reg_write(tp, 0xb87e, 0x2801); - ocp_reg_write(tp, 0xb87c, 0x80f4); - ocp_reg_write(tp, 0xb87e, 0x6077); + sram2_write(tp, 0x8560, 0x19cc); + sram2_write(tp, 0x8562, 0x19cc); + sram2_write(tp, 0x8564, 0x19cc); + sram2_write(tp, 0x8566, 0x147d); + sram2_write(tp, 0x8568, 0x147d); + sram2_write(tp, 0x856a, 0x147d); + sram2_write(tp, 0x8ffe, 0x0907); + sram2_write(tp, 0x80d6, 0x2801); + sram2_write(tp, 0x80f2, 0x2801); + sram2_write(tp, 0x80f4, 0x6077); ocp_reg_write(tp, 0xb506, 0x01e7); - ocp_reg_write(tp, 0xb87c, 0x8013); - ocp_reg_write(tp, 0xb87e, 0x0700); - ocp_reg_write(tp, 0xb87c, 0x8fb9); - ocp_reg_write(tp, 0xb87e, 0x2801); - ocp_reg_write(tp, 0xb87c, 0x8fba); - ocp_reg_write(tp, 0xb87e, 0x0100); - ocp_reg_write(tp, 0xb87c, 0x8fbc); - ocp_reg_write(tp, 0xb87e, 0x1900); - ocp_reg_write(tp, 0xb87c, 0x8fbe); - ocp_reg_write(tp, 0xb87e, 0xe100); - ocp_reg_write(tp, 0xb87c, 0x8fc0); - ocp_reg_write(tp, 0xb87e, 0x0800); - ocp_reg_write(tp, 0xb87c, 0x8fc2); - ocp_reg_write(tp, 0xb87e, 0xe500); - ocp_reg_write(tp, 0xb87c, 0x8fc4); - ocp_reg_write(tp, 0xb87e, 0x0f00); - ocp_reg_write(tp, 0xb87c, 0x8fc6); - ocp_reg_write(tp, 0xb87e, 0xf100); - ocp_reg_write(tp, 0xb87c, 0x8fc8); - ocp_reg_write(tp, 0xb87e, 0x0400); - ocp_reg_write(tp, 0xb87c, 0x8fca); - ocp_reg_write(tp, 0xb87e, 0xf300); - ocp_reg_write(tp, 0xb87c, 0x8fcc); - ocp_reg_write(tp, 0xb87e, 0xfd00); - ocp_reg_write(tp, 0xb87c, 0x8fce); - ocp_reg_write(tp, 0xb87e, 0xff00); - ocp_reg_write(tp, 0xb87c, 0x8fd0); - ocp_reg_write(tp, 0xb87e, 0xfb00); - ocp_reg_write(tp, 0xb87c, 0x8fd2); - ocp_reg_write(tp, 0xb87e, 0x0100); - ocp_reg_write(tp, 0xb87c, 0x8fd4); - ocp_reg_write(tp, 0xb87e, 0xf400); - ocp_reg_write(tp, 0xb87c, 0x8fd6); - ocp_reg_write(tp, 0xb87e, 0xff00); - ocp_reg_write(tp, 0xb87c, 0x8fd8); - ocp_reg_write(tp, 0xb87e, 0xf600); + sram2_write(tp, 0x8013, 0x0700); + sram2_write(tp, 0x8fb9, 0x2801); + sram2_write(tp, 0x8fba, 0x0100); + sram2_write(tp, 0x8fbc, 0x1900); + sram2_write(tp, 0x8fbe, 0xe100); + sram2_write(tp, 0x8fc0, 0x0800); + sram2_write(tp, 0x8fc2, 0xe500); + sram2_write(tp, 0x8fc4, 0x0f00); + sram2_write(tp, 0x8fc6, 0xf100); + sram2_write(tp, 0x8fc8, 0x0400); + sram2_write(tp, 0x8fca, 0xf300); + sram2_write(tp, 0x8fcc, 0xfd00); + sram2_write(tp, 0x8fce, 0xff00); + sram2_write(tp, 0x8fd0, 0xfb00); + sram2_write(tp, 0x8fd2, 0x0100); + sram2_write(tp, 0x8fd4, 0xf400); + sram2_write(tp, 0x8fd6, 0xff00); + sram2_write(tp, 0x8fd8, 0xf600); ocp_byte_set_bits(tp, MCU_TYPE_PLA, PLA_USB_CFG, EN_XG_LIP | EN_G_LIP); - ocp_reg_write(tp, 0xb87c, 0x813d); - ocp_reg_write(tp, 0xb87e, 0x390e); - ocp_reg_write(tp, 0xb87c, 0x814f); - ocp_reg_write(tp, 0xb87e, 0x790e); - ocp_reg_write(tp, 0xb87c, 0x80b0); - ocp_reg_write(tp, 0xb87e, 0x0f31); + sram2_write(tp, 0x813d, 0x390e); + sram2_write(tp, 0x814f, 0x790e); + sram2_write(tp, 0x80b0, 0x0f31); ocp_reg_set_bits(tp, 0xbf4c, BIT(1)); ocp_reg_set_bits(tp, 0xbcca, BIT(9) | BIT(8)); - ocp_reg_write(tp, 0xb87c, 0x8141); - ocp_reg_write(tp, 0xb87e, 0x320e); - ocp_reg_write(tp, 0xb87c, 0x8153); - ocp_reg_write(tp, 0xb87e, 0x720e); - ocp_reg_write(tp, 0xb87c, 0x8529); - ocp_reg_write(tp, 0xb87e, 0x050e); + sram2_write(tp, 0x8141, 0x320e); + sram2_write(tp, 0x8153, 0x720e); + sram2_write(tp, 0x8529, 0x050e); ocp_reg_clr_bits(tp, OCP_EEE_CFG, CTAP_SHORT_EN); sram_write(tp, 0x816c, 0xc4a0); @@ -7489,27 +7474,17 @@ static void r8156b_hw_phy_cfg(struct r8152 *tp) sram_write(tp, 0x8ff1, 0x0404); ocp_reg_write(tp, 0xbf4a, 0x001b); - ocp_reg_write(tp, 0xb87c, 0x8033); - ocp_reg_write(tp, 0xb87e, 0x7c13); - ocp_reg_write(tp, 0xb87c, 0x8037); - ocp_reg_write(tp, 0xb87e, 0x7c13); - ocp_reg_write(tp, 0xb87c, 0x803b); - ocp_reg_write(tp, 0xb87e, 0xfc32); - ocp_reg_write(tp, 0xb87c, 0x803f); - ocp_reg_write(tp, 0xb87e, 0x7c13); - ocp_reg_write(tp, 0xb87c, 0x8043); - ocp_reg_write(tp, 0xb87e, 0x7c13); - ocp_reg_write(tp, 0xb87c, 0x8047); - ocp_reg_write(tp, 0xb87e, 0x7c13); + sram2_write(tp, 0x8033, 0x7c13); + sram2_write(tp, 0x8037, 0x7c13); + sram2_write(tp, 0x803b, 0xfc32); + sram2_write(tp, 0x803f, 0x7c13); + sram2_write(tp, 0x8043, 0x7c13); + sram2_write(tp, 0x8047, 0x7c13); - ocp_reg_write(tp, 0xb87c, 0x8145); - ocp_reg_write(tp, 0xb87e, 0x370e); - ocp_reg_write(tp, 0xb87c, 0x8157); - ocp_reg_write(tp, 0xb87e, 0x770e); - ocp_reg_write(tp, 0xb87c, 0x8169); - ocp_reg_write(tp, 0xb87e, 0x0d0a); - ocp_reg_write(tp, 0xb87c, 0x817b); - ocp_reg_write(tp, 0xb87e, 0x1d0a); + sram2_write(tp, 0x8145, 0x370e); + sram2_write(tp, 0x8157, 0x770e); + sram2_write(tp, 0x8169, 0x0d0a); + sram2_write(tp, 0x817b, 0x1d0a); sram_write_w0w1(tp, 0x8217, 0xff00, 0x5000); sram_write_w0w1(tp, 0x821a, 0xff00, 0x5000); @@ -7574,12 +7549,9 @@ static void r8156b_hw_phy_cfg(struct r8152 *tp) fallthrough; case RTL_VER_15: /* EEE parameter */ - ocp_reg_write(tp, 0xb87c, 0x80f5); - ocp_reg_write(tp, 0xb87e, 0x760e); - ocp_reg_write(tp, 0xb87c, 0x8107); - ocp_reg_write(tp, 0xb87e, 0x360e); - ocp_reg_write(tp, 0xb87c, 0x8551); - ocp_reg_w0w1(tp, 0xb87e, 0xff00, 0x0800); + sram2_write(tp, 0x80f5, 0x760e); + sram2_write(tp, 0x8107, 0x360e); + sram2_write_w0w1(tp, 0x8551, 0xff00, 0x0800); /* ADC_PGA parameter */ ocp_reg_w0w1(tp, 0xbf00, 0xe000, 0xa000); From e417ac73d24ae68b8dd6a1b02f9db03a7a5c184b Mon Sep 17 00:00:00 2001 From: Nicolai Buchwitz Date: Wed, 1 Apr 2026 14:38:44 +0200 Subject: [PATCH 1132/1561] net: phy: microchip: add downshift tunable support for LAN88xx Implement the standard ETHTOOL_PHY_DOWNSHIFT tunable for the LAN88xx PHY. This allows runtime configuration of the auto-downshift feature via ethtool: ethtool --set-phy-tunable eth0 downshift on count 3 The LAN88xx PHY supports downshifting from 1000BASE-T to 100BASE-TX after 2-5 failed auto-negotiation attempts. Valid count values are 2, 3, 4 and 5. This is based on an earlier downstream implementation by Phil Elwell. Signed-off-by: Nicolai Buchwitz Reviewed-by: Andrew Lunn Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260401123848.696766-2-nb@tipi-net.de Signed-off-by: Jakub Kicinski --- drivers/net/phy/microchip.c | 64 ++++++++++++++++++++++++++++++++++++ include/linux/microchipphy.h | 5 +++ 2 files changed, 69 insertions(+) diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index dc8634e7bcbe..bc293d2dd130 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2015 Microchip Technology */ +#include #include #include #include @@ -193,6 +194,67 @@ static void lan88xx_config_TR_regs(struct phy_device *phydev) phydev_warn(phydev, "Failed to Set Register[0x1686]\n"); } +static int lan88xx_get_downshift(struct phy_device *phydev, u8 *data) +{ + int val; + + val = phy_read_paged(phydev, 1, LAN78XX_PHY_CTRL3); + if (val < 0) + return val; + + if (!(val & LAN78XX_PHY_CTRL3_AUTO_DOWNSHIFT)) { + *data = DOWNSHIFT_DEV_DISABLE; + return 0; + } + + *data = FIELD_GET(LAN78XX_PHY_CTRL3_DOWNSHIFT_CTRL_MASK, val) + 2; + + return 0; +} + +static int lan88xx_set_downshift(struct phy_device *phydev, u8 cnt) +{ + u32 mask = LAN78XX_PHY_CTRL3_DOWNSHIFT_CTRL_MASK | + LAN78XX_PHY_CTRL3_AUTO_DOWNSHIFT; + + if (cnt == DOWNSHIFT_DEV_DISABLE) + return phy_modify_paged(phydev, 1, LAN78XX_PHY_CTRL3, + LAN78XX_PHY_CTRL3_AUTO_DOWNSHIFT, 0); + + if (cnt == DOWNSHIFT_DEV_DEFAULT_COUNT) + cnt = 2; + + if (cnt < 2 || cnt > 5) + return -EINVAL; + + return phy_modify_paged(phydev, 1, LAN78XX_PHY_CTRL3, mask, + FIELD_PREP(LAN78XX_PHY_CTRL3_DOWNSHIFT_CTRL_MASK, + cnt - 2) | + LAN78XX_PHY_CTRL3_AUTO_DOWNSHIFT); +} + +static int lan88xx_get_tunable(struct phy_device *phydev, + struct ethtool_tunable *tuna, void *data) +{ + switch (tuna->id) { + case ETHTOOL_PHY_DOWNSHIFT: + return lan88xx_get_downshift(phydev, data); + default: + return -EOPNOTSUPP; + } +} + +static int lan88xx_set_tunable(struct phy_device *phydev, + struct ethtool_tunable *tuna, const void *data) +{ + switch (tuna->id) { + case ETHTOOL_PHY_DOWNSHIFT: + return lan88xx_set_downshift(phydev, *(const u8 *)data); + default: + return -EOPNOTSUPP; + } +} + static int lan88xx_probe(struct phy_device *phydev) { struct device *dev = &phydev->mdio.dev; @@ -499,6 +561,8 @@ static struct phy_driver microchip_phy_driver[] = { .set_wol = lan88xx_set_wol, .read_page = lan88xx_read_page, .write_page = lan88xx_write_page, + .get_tunable = lan88xx_get_tunable, + .set_tunable = lan88xx_set_tunable, }, { PHY_ID_MATCH_MODEL(PHY_ID_LAN937X_TX), diff --git a/include/linux/microchipphy.h b/include/linux/microchipphy.h index 517288da19fd..7da956c666a0 100644 --- a/include/linux/microchipphy.h +++ b/include/linux/microchipphy.h @@ -61,6 +61,11 @@ /* Registers specific to the LAN7800/LAN7850 embedded phy */ #define LAN78XX_PHY_LED_MODE_SELECT (0x1D) +/* PHY Control 3 register (page 1) */ +#define LAN78XX_PHY_CTRL3 (0x14) +#define LAN78XX_PHY_CTRL3_AUTO_DOWNSHIFT BIT(4) +#define LAN78XX_PHY_CTRL3_DOWNSHIFT_CTRL_MASK GENMASK(3, 2) + /* DSP registers */ #define PHY_ARDENNES_MMD_DEV_3_PHY_CFG (0x806A) #define PHY_ARDENNES_MMD_DEV_3_PHY_CFG_ZD_DLY_EN_ (0x2000) From 70180f72d91144f4c7b308ffa87bbf3592cd8d65 Mon Sep 17 00:00:00 2001 From: Nicolai Buchwitz Date: Wed, 1 Apr 2026 14:38:45 +0200 Subject: [PATCH 1133/1561] net: phy: microchip: enable downshift by default on LAN88xx Enable auto-downshift from 1000BASE-T to 100BASE-TX after 2 failed auto-negotiation attempts by default. This ensures that links with faulty or missing cable pairs (C and D) fall back to 100Mbps without requiring userspace configuration. The downshift count is stored in the driver's private data and applied in config_init, so user changes via ethtool are preserved across suspend/resume cycles. Users can override or disable downshift at runtime: ethtool --set-phy-tunable eth0 downshift off Signed-off-by: Nicolai Buchwitz Link: https://patch.msgid.link/20260401123848.696766-3-nb@tipi-net.de Signed-off-by: Jakub Kicinski --- drivers/net/phy/microchip.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index bc293d2dd130..33bc633bdba0 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -26,6 +26,7 @@ struct lan88xx_priv { int chip_id; int chip_rev; __u32 wolopts; + u8 downshift_cnt; }; static int lan88xx_read_page(struct phy_device *phydev) @@ -247,9 +248,15 @@ static int lan88xx_get_tunable(struct phy_device *phydev, static int lan88xx_set_tunable(struct phy_device *phydev, struct ethtool_tunable *tuna, const void *data) { + struct lan88xx_priv *priv = phydev->priv; + int ret; + switch (tuna->id) { case ETHTOOL_PHY_DOWNSHIFT: - return lan88xx_set_downshift(phydev, *(const u8 *)data); + ret = lan88xx_set_downshift(phydev, *(const u8 *)data); + if (!ret) + priv->downshift_cnt = *(const u8 *)data; + return ret; default: return -EOPNOTSUPP; } @@ -267,6 +274,7 @@ static int lan88xx_probe(struct phy_device *phydev) return -ENOMEM; priv->wolopts = 0; + priv->downshift_cnt = 2; len = of_property_read_variable_u32_array(dev->of_node, "microchip,led-modes", @@ -346,7 +354,8 @@ static void lan88xx_set_mdix(struct phy_device *phydev) static int lan88xx_config_init(struct phy_device *phydev) { - int val; + struct lan88xx_priv *priv = phydev->priv; + int val, err; /*Zerodetect delay enable */ val = phy_read_mmd(phydev, MDIO_MMD_PCS, @@ -359,6 +368,10 @@ static int lan88xx_config_init(struct phy_device *phydev) /* Config DSP registers */ lan88xx_config_TR_regs(phydev); + err = lan88xx_set_downshift(phydev, priv->downshift_cnt); + if (err < 0) + return err; + return 0; } From 74fb32ed733ceacd2daa3d22471f4b10705abbbd Mon Sep 17 00:00:00 2001 From: Satish Kharat Date: Wed, 1 Apr 2026 08:31:11 -0700 Subject: [PATCH 1134/1561] enic: extend resource discovery for SR-IOV admin channel VIC firmware exposes admin channel resources (WQ, RQ, CQ) for PF-VF communication when SR-IOV is active. Add the corresponding resource type definitions and teach the discovery and access functions to handle them. Signed-off-by: Satish Kharat Link: https://patch.msgid.link/20260401-enic-sriov-v2-prep-v4-1-d5834b2ef1b9@cisco.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cisco/enic/enic.h | 2 ++ drivers/net/ethernet/cisco/enic/vnic_dev.c | 10 ++++++++++ drivers/net/ethernet/cisco/enic/vnic_resource.h | 4 ++++ 3 files changed, 16 insertions(+) diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index 6959e85ab516..366c65d072fc 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -297,6 +297,8 @@ static inline struct net_device *vnic_get_netdev(struct vnic_dev *vdev) dev_warn(&(vdev)->pdev->dev, fmt, ##__VA_ARGS__) #define vdev_info(vdev, fmt, ...) \ dev_info(&(vdev)->pdev->dev, fmt, ##__VA_ARGS__) +#define vdev_dbg(vdev, fmt, ...) \ + dev_dbg(&(vdev)->pdev->dev, fmt, ##__VA_ARGS__) #define vdev_neterr(vdev, fmt, ...) \ netdev_err(vnic_get_netdev(vdev), fmt, ##__VA_ARGS__) diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.c b/drivers/net/ethernet/cisco/enic/vnic_dev.c index c72452749f5e..c8d657e97094 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_dev.c +++ b/drivers/net/ethernet/cisco/enic/vnic_dev.c @@ -77,6 +77,9 @@ static int vnic_dev_discover_res(struct vnic_dev *vdev, u32 count = ioread32(&r->count); u32 len; + vdev_dbg(vdev, "res type %u bar %u offset 0x%x count %u\n", + type, bar_num, bar_offset, count); + r++; if (bar_num >= num_bars) @@ -90,6 +93,9 @@ static int vnic_dev_discover_res(struct vnic_dev *vdev, case RES_TYPE_RQ: case RES_TYPE_CQ: case RES_TYPE_INTR_CTRL: + case RES_TYPE_ADMIN_WQ: + case RES_TYPE_ADMIN_RQ: + case RES_TYPE_ADMIN_CQ: /* each count is stride bytes long */ len = count * VNIC_RES_STRIDE; if (len + bar_offset > bar[bar_num].len) { @@ -102,6 +108,7 @@ static int vnic_dev_discover_res(struct vnic_dev *vdev, case RES_TYPE_INTR_PBA_LEGACY: case RES_TYPE_DEVCMD: case RES_TYPE_DEVCMD2: + case RES_TYPE_SRIOV_INTR: len = count; break; default: @@ -135,6 +142,9 @@ void __iomem *vnic_dev_get_res(struct vnic_dev *vdev, enum vnic_res_type type, case RES_TYPE_RQ: case RES_TYPE_CQ: case RES_TYPE_INTR_CTRL: + case RES_TYPE_ADMIN_WQ: + case RES_TYPE_ADMIN_RQ: + case RES_TYPE_ADMIN_CQ: return (char __iomem *)vdev->res[type].vaddr + index * VNIC_RES_STRIDE; default: diff --git a/drivers/net/ethernet/cisco/enic/vnic_resource.h b/drivers/net/ethernet/cisco/enic/vnic_resource.h index b4776e334d63..d327821fa9b9 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_resource.h +++ b/drivers/net/ethernet/cisco/enic/vnic_resource.h @@ -42,6 +42,10 @@ enum vnic_res_type { RES_TYPE_DEPRECATED1, /* Old version of devcmd 2 */ RES_TYPE_DEPRECATED2, /* Old version of devcmd 2 */ RES_TYPE_DEVCMD2, /* Device control region */ + RES_TYPE_SRIOV_INTR = 45, /* SR-IOV VF interrupt */ + RES_TYPE_ADMIN_WQ = 49, /* Admin channel WQ */ + RES_TYPE_ADMIN_RQ, /* Admin channel RQ */ + RES_TYPE_ADMIN_CQ, /* Admin channel CQ */ RES_TYPE_MAX, /* Count of resource types */ }; From 803a1b02027918450b58803190aa7cacb8056265 Mon Sep 17 00:00:00 2001 From: Satish Kharat Date: Wed, 1 Apr 2026 08:31:12 -0700 Subject: [PATCH 1135/1561] enic: add V2 SR-IOV VF device ID Register the V2 VF PCI device ID (0x02b7) so the driver binds to V2 virtual functions created via sriov_configure. Update enic_is_sriov_vf() to recognize V2 VFs alongside the existing V1 type. Signed-off-by: Satish Kharat Link: https://patch.msgid.link/20260401-enic-sriov-v2-prep-v4-2-d5834b2ef1b9@cisco.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cisco/enic/enic_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index e839081f9ee4..e16dfbcd2c22 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -66,12 +66,14 @@ #define PCI_DEVICE_ID_CISCO_VIC_ENET 0x0043 /* ethernet vnic */ #define PCI_DEVICE_ID_CISCO_VIC_ENET_DYN 0x0044 /* enet dynamic vnic */ #define PCI_DEVICE_ID_CISCO_VIC_ENET_VF 0x0071 /* enet SRIOV VF */ +#define PCI_DEVICE_ID_CISCO_VIC_ENET_VF_V2 0x02b7 /* enet SRIOV V2 VF */ /* Supported devices */ static const struct pci_device_id enic_id_table[] = { { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET) }, { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_DYN) }, { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_VF) }, + { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_VF_V2) }, { 0, } /* end of table */ }; @@ -307,7 +309,8 @@ int enic_sriov_enabled(struct enic *enic) static int enic_is_sriov_vf(struct enic *enic) { - return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_VF; + return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_VF || + enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_VF_V2; } int enic_is_valid_vf(struct enic *enic, int vf) From 56a4d7a865860ac0e52e26a4c8d13de78e7a9707 Mon Sep 17 00:00:00 2001 From: Satish Kharat Date: Wed, 1 Apr 2026 08:31:13 -0700 Subject: [PATCH 1136/1561] enic: detect SR-IOV VF type from PCI capability Read the VF device ID from the SR-IOV PCI capability at probe time to determine whether the PF is configured for V1, USNIC, or V2 virtual functions. Store the result in enic->vf_type for use by subsequent SR-IOV operations. The VF type is a firmware-configured property (set via UCSM, CIMC, Intersight etc) that is immutable from the driver's perspective. Only PFs are probed for this capability; VFs and dynamic vnics skip detection. Signed-off-by: Satish Kharat Link: https://patch.msgid.link/20260401-enic-sriov-v2-prep-v4-3-d5834b2ef1b9@cisco.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cisco/enic/enic.h | 8 +++++ drivers/net/ethernet/cisco/enic/enic_main.c | 37 +++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index 366c65d072fc..0fd9cd917132 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -225,6 +225,13 @@ struct enic_rq { struct page_pool *pool; } ____cacheline_aligned; +enum enic_vf_type { + ENIC_VF_TYPE_NONE, + ENIC_VF_TYPE_V1, + ENIC_VF_TYPE_USNIC, + ENIC_VF_TYPE_V2, +}; + /* Per-instance private data structure */ struct enic { struct net_device *netdev; @@ -252,6 +259,7 @@ struct enic { #ifdef CONFIG_PCI_IOV u16 num_vfs; #endif + enum enic_vf_type vf_type; spinlock_t enic_api_lock; bool enic_api_busy; struct enic_port_profile *pp; diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index e16dfbcd2c22..acd05350ec1a 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -67,6 +67,7 @@ #define PCI_DEVICE_ID_CISCO_VIC_ENET_DYN 0x0044 /* enet dynamic vnic */ #define PCI_DEVICE_ID_CISCO_VIC_ENET_VF 0x0071 /* enet SRIOV VF */ #define PCI_DEVICE_ID_CISCO_VIC_ENET_VF_V2 0x02b7 /* enet SRIOV V2 VF */ +#define PCI_DEVICE_ID_CISCO_VIC_ENET_VF_USNIC 0x00cf /* enet USNIC VF */ /* Supported devices */ static const struct pci_device_id enic_id_table[] = { @@ -2621,6 +2622,41 @@ static void enic_iounmap(struct enic *enic) iounmap(enic->bar[i].vaddr); } +#ifdef CONFIG_PCI_IOV +static void enic_sriov_detect_vf_type(struct enic *enic) +{ + struct pci_dev *pdev = enic->pdev; + int pos; + u16 vf_dev_id; + + if (enic_is_sriov_vf(enic) || enic_is_dynamic(enic)) + return; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); + if (!pos) { + enic->vf_type = ENIC_VF_TYPE_NONE; + return; + } + + pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &vf_dev_id); + + switch (vf_dev_id) { + case PCI_DEVICE_ID_CISCO_VIC_ENET_VF: + enic->vf_type = ENIC_VF_TYPE_V1; + break; + case PCI_DEVICE_ID_CISCO_VIC_ENET_VF_USNIC: + enic->vf_type = ENIC_VF_TYPE_USNIC; + break; + case PCI_DEVICE_ID_CISCO_VIC_ENET_VF_V2: + enic->vf_type = ENIC_VF_TYPE_V2; + break; + default: + enic->vf_type = ENIC_VF_TYPE_NONE; + break; + } +} +#endif + static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct device *dev = &pdev->dev; @@ -2734,6 +2770,7 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) num_pps = enic->num_vfs; } } + enic_sriov_detect_vf_type(enic); #endif /* Allocate structure for port profiles */ From 0266ecb59d5242a5d66261a671e42d53a1372f69 Mon Sep 17 00:00:00 2001 From: Satish Kharat Date: Wed, 1 Apr 2026 08:31:14 -0700 Subject: [PATCH 1137/1561] enic: make enic_dev_enable/disable ref-counted Both the data path (ndo_open/ndo_stop) and the upcoming admin channel need to enable and disable the vNIC device independently. Without reference counting, closing the admin channel while the netdev is up would inadvertently disable the entire device. Add an enable_count to struct enic, protected by the existing devcmd_lock. enic_dev_enable() issues CMD_ENABLE_WAIT only on the first caller (0 -> 1 transition), and enic_dev_disable() issues CMD_DISABLE only when the last caller releases (1 -> 0 transition). Also check the return value of enic_dev_enable() in enic_open() and fail the open if the firmware enable command fails. Without this check, a failed enable leaves enable_count at zero while the interface appears up, which can cause a later admin channel enable/disable cycle to incorrectly disable the hardware under the active data path. Signed-off-by: Satish Kharat Link: https://patch.msgid.link/20260401-enic-sriov-v2-prep-v4-4-d5834b2ef1b9@cisco.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cisco/enic/enic.h | 1 + drivers/net/ethernet/cisco/enic/enic_dev.c | 17 +++++++++++++---- drivers/net/ethernet/cisco/enic/enic_main.c | 17 ++++++++++++++++- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index 0fd9cd917132..67fd780b1fa1 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -260,6 +260,7 @@ struct enic { u16 num_vfs; #endif enum enic_vf_type vf_type; + unsigned int enable_count; spinlock_t enic_api_lock; bool enic_api_busy; struct enic_port_profile *pp; diff --git a/drivers/net/ethernet/cisco/enic/enic_dev.c b/drivers/net/ethernet/cisco/enic/enic_dev.c index 2cbae7c6cc3d..659787f73cf1 100644 --- a/drivers/net/ethernet/cisco/enic/enic_dev.c +++ b/drivers/net/ethernet/cisco/enic/enic_dev.c @@ -131,10 +131,13 @@ int enic_dev_set_ig_vlan_rewrite_mode(struct enic *enic) int enic_dev_enable(struct enic *enic) { - int err; + int err = 0; spin_lock_bh(&enic->devcmd_lock); - err = vnic_dev_enable_wait(enic->vdev); + if (enic->enable_count == 0) + err = vnic_dev_enable_wait(enic->vdev); + if (!err) + enic->enable_count++; spin_unlock_bh(&enic->devcmd_lock); return err; @@ -142,10 +145,16 @@ int enic_dev_enable(struct enic *enic) int enic_dev_disable(struct enic *enic) { - int err; + int err = 0; spin_lock_bh(&enic->devcmd_lock); - err = vnic_dev_disable(enic->vdev); + if (enic->enable_count == 0) { + spin_unlock_bh(&enic->devcmd_lock); + return 0; + } + enic->enable_count--; + if (enic->enable_count == 0) + err = vnic_dev_disable(enic->vdev); spin_unlock_bh(&enic->devcmd_lock); return err; diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index acd05350ec1a..e7125b818087 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1750,7 +1750,11 @@ static int enic_open(struct net_device *netdev) if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) for (i = 0; i < enic->wq_count; i++) napi_enable(&enic->napi[enic_cq_wq(enic, i)]); - enic_dev_enable(enic); + err = enic_dev_enable(enic); + if (err) { + netdev_err(netdev, "Failed to enable device: %d\n", err); + goto err_out_dev_enable; + } for (i = 0; i < enic->intr_count; i++) vnic_intr_unmask(&enic->intr[i]); @@ -1760,6 +1764,17 @@ static int enic_open(struct net_device *netdev) return 0; +err_out_dev_enable: + for (i = 0; i < enic->rq_count; i++) + napi_disable(&enic->napi[i]); + if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) + for (i = 0; i < enic->wq_count; i++) + napi_disable(&enic->napi[enic_cq_wq(enic, i)]); + netif_tx_disable(netdev); + if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic)) + enic_dev_del_station_addr(enic); + for (i = 0; i < enic->wq_count; i++) + vnic_wq_disable(&enic->wq[i].vwq); err_out_free_rq: for (i = 0; i < enic->rq_count; i++) { ret = vnic_rq_disable(&enic->rq[i].vrq); From 730ce15d44971204866575549683c956b3fcfe39 Mon Sep 17 00:00:00 2001 From: Satish Kharat Date: Wed, 1 Apr 2026 08:31:15 -0700 Subject: [PATCH 1138/1561] enic: add type-aware alloc for WQ, RQ, CQ and INTR resources The existing vnic_wq_alloc(), vnic_rq_alloc(), vnic_cq_alloc() and vnic_intr_alloc() hardcode data-path resource types (RES_TYPE_WQ, RES_TYPE_RQ, RES_TYPE_CQ, RES_TYPE_INTR_CTRL). The upcoming admin channel uses different BAR resource types (RES_TYPE_ADMIN_WQ/RQ/CQ, RES_TYPE_SRIOV_INTR) for its queues. Add _with_type() variants that accept an explicit resource type parameter. Refactor the original functions as thin wrappers that pass the default data-path type. No functional change. Signed-off-by: Satish Kharat Link: https://patch.msgid.link/20260401-enic-sriov-v2-prep-v4-5-d5834b2ef1b9@cisco.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cisco/enic/vnic_cq.c | 14 +++++++++++--- drivers/net/ethernet/cisco/enic/vnic_cq.h | 3 +++ drivers/net/ethernet/cisco/enic/vnic_intr.c | 12 +++++++++--- drivers/net/ethernet/cisco/enic/vnic_intr.h | 2 ++ drivers/net/ethernet/cisco/enic/vnic_rq.c | 14 +++++++++++--- drivers/net/ethernet/cisco/enic/vnic_rq.h | 3 +++ drivers/net/ethernet/cisco/enic/vnic_wq.c | 14 +++++++++++--- drivers/net/ethernet/cisco/enic/vnic_wq.h | 3 +++ 8 files changed, 53 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/vnic_cq.c b/drivers/net/ethernet/cisco/enic/vnic_cq.c index 27c885e91552..5a0dbb816223 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_cq.c +++ b/drivers/net/ethernet/cisco/enic/vnic_cq.c @@ -20,13 +20,14 @@ void vnic_cq_free(struct vnic_cq *cq) cq->ctrl = NULL; } -int vnic_cq_alloc(struct vnic_dev *vdev, struct vnic_cq *cq, unsigned int index, - unsigned int desc_count, unsigned int desc_size) +int vnic_cq_alloc_with_type(struct vnic_dev *vdev, struct vnic_cq *cq, + unsigned int index, unsigned int desc_count, + unsigned int desc_size, unsigned int res_type) { cq->index = index; cq->vdev = vdev; - cq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_CQ, index); + cq->ctrl = vnic_dev_get_res(vdev, res_type, index); if (!cq->ctrl) { vdev_err(vdev, "Failed to hook CQ[%d] resource\n", index); return -EINVAL; @@ -35,6 +36,13 @@ int vnic_cq_alloc(struct vnic_dev *vdev, struct vnic_cq *cq, unsigned int index, return vnic_dev_alloc_desc_ring(vdev, &cq->ring, desc_count, desc_size); } +int vnic_cq_alloc(struct vnic_dev *vdev, struct vnic_cq *cq, unsigned int index, + unsigned int desc_count, unsigned int desc_size) +{ + return vnic_cq_alloc_with_type(vdev, cq, index, desc_count, desc_size, + RES_TYPE_CQ); +} + void vnic_cq_init(struct vnic_cq *cq, unsigned int flow_control_enable, unsigned int color_enable, unsigned int cq_head, unsigned int cq_tail, unsigned int cq_tail_color, unsigned int interrupt_enable, diff --git a/drivers/net/ethernet/cisco/enic/vnic_cq.h b/drivers/net/ethernet/cisco/enic/vnic_cq.h index 0e37f5d5e527..d46d4d2ef6bb 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_cq.h +++ b/drivers/net/ethernet/cisco/enic/vnic_cq.h @@ -73,6 +73,9 @@ static inline void vnic_cq_inc_to_clean(struct vnic_cq *cq) void vnic_cq_free(struct vnic_cq *cq); int vnic_cq_alloc(struct vnic_dev *vdev, struct vnic_cq *cq, unsigned int index, unsigned int desc_count, unsigned int desc_size); +int vnic_cq_alloc_with_type(struct vnic_dev *vdev, struct vnic_cq *cq, + unsigned int index, unsigned int desc_count, + unsigned int desc_size, unsigned int res_type); void vnic_cq_init(struct vnic_cq *cq, unsigned int flow_control_enable, unsigned int color_enable, unsigned int cq_head, unsigned int cq_tail, unsigned int cq_tail_color, unsigned int interrupt_enable, diff --git a/drivers/net/ethernet/cisco/enic/vnic_intr.c b/drivers/net/ethernet/cisco/enic/vnic_intr.c index 25319f072a04..010ad8c2108d 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_intr.c +++ b/drivers/net/ethernet/cisco/enic/vnic_intr.c @@ -19,13 +19,13 @@ void vnic_intr_free(struct vnic_intr *intr) intr->ctrl = NULL; } -int vnic_intr_alloc(struct vnic_dev *vdev, struct vnic_intr *intr, - unsigned int index) +int vnic_intr_alloc_with_type(struct vnic_dev *vdev, struct vnic_intr *intr, + unsigned int index, unsigned int res_type) { intr->index = index; intr->vdev = vdev; - intr->ctrl = vnic_dev_get_res(vdev, RES_TYPE_INTR_CTRL, index); + intr->ctrl = vnic_dev_get_res(vdev, res_type, index); if (!intr->ctrl) { vdev_err(vdev, "Failed to hook INTR[%d].ctrl resource\n", index); @@ -35,6 +35,12 @@ int vnic_intr_alloc(struct vnic_dev *vdev, struct vnic_intr *intr, return 0; } +int vnic_intr_alloc(struct vnic_dev *vdev, struct vnic_intr *intr, + unsigned int index) +{ + return vnic_intr_alloc_with_type(vdev, intr, index, RES_TYPE_INTR_CTRL); +} + void vnic_intr_init(struct vnic_intr *intr, u32 coalescing_timer, unsigned int coalescing_type, unsigned int mask_on_assertion) { diff --git a/drivers/net/ethernet/cisco/enic/vnic_intr.h b/drivers/net/ethernet/cisco/enic/vnic_intr.h index 33a72aa10b26..2bc2dbffdb80 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_intr.h +++ b/drivers/net/ethernet/cisco/enic/vnic_intr.h @@ -89,6 +89,8 @@ static inline u32 vnic_intr_legacy_pba(u32 __iomem *legacy_pba) void vnic_intr_free(struct vnic_intr *intr); int vnic_intr_alloc(struct vnic_dev *vdev, struct vnic_intr *intr, unsigned int index); +int vnic_intr_alloc_with_type(struct vnic_dev *vdev, struct vnic_intr *intr, + unsigned int index, unsigned int res_type); void vnic_intr_init(struct vnic_intr *intr, u32 coalescing_timer, unsigned int coalescing_type, unsigned int mask_on_assertion); void vnic_intr_coalescing_timer_set(struct vnic_intr *intr, diff --git a/drivers/net/ethernet/cisco/enic/vnic_rq.c b/drivers/net/ethernet/cisco/enic/vnic_rq.c index 5ae80551f17c..a662d9fd1199 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_rq.c +++ b/drivers/net/ethernet/cisco/enic/vnic_rq.c @@ -69,15 +69,16 @@ void vnic_rq_free(struct vnic_rq *rq) rq->ctrl = NULL; } -int vnic_rq_alloc(struct vnic_dev *vdev, struct vnic_rq *rq, unsigned int index, - unsigned int desc_count, unsigned int desc_size) +int vnic_rq_alloc_with_type(struct vnic_dev *vdev, struct vnic_rq *rq, + unsigned int index, unsigned int desc_count, + unsigned int desc_size, unsigned int res_type) { int err; rq->index = index; rq->vdev = vdev; - rq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_RQ, index); + rq->ctrl = vnic_dev_get_res(vdev, res_type, index); if (!rq->ctrl) { vdev_err(vdev, "Failed to hook RQ[%d] resource\n", index); return -EINVAL; @@ -98,6 +99,13 @@ int vnic_rq_alloc(struct vnic_dev *vdev, struct vnic_rq *rq, unsigned int index, return 0; } +int vnic_rq_alloc(struct vnic_dev *vdev, struct vnic_rq *rq, unsigned int index, + unsigned int desc_count, unsigned int desc_size) +{ + return vnic_rq_alloc_with_type(vdev, rq, index, desc_count, desc_size, + RES_TYPE_RQ); +} + static void vnic_rq_init_start(struct vnic_rq *rq, unsigned int cq_index, unsigned int fetch_index, unsigned int posted_index, unsigned int error_interrupt_enable, diff --git a/drivers/net/ethernet/cisco/enic/vnic_rq.h b/drivers/net/ethernet/cisco/enic/vnic_rq.h index a1cdd729caec..9fc2090eac44 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_rq.h +++ b/drivers/net/ethernet/cisco/enic/vnic_rq.h @@ -196,6 +196,9 @@ static inline int vnic_rq_fill(struct vnic_rq *rq, void vnic_rq_free(struct vnic_rq *rq); int vnic_rq_alloc(struct vnic_dev *vdev, struct vnic_rq *rq, unsigned int index, unsigned int desc_count, unsigned int desc_size); +int vnic_rq_alloc_with_type(struct vnic_dev *vdev, struct vnic_rq *rq, + unsigned int index, unsigned int desc_count, + unsigned int desc_size, unsigned int res_type); void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index, unsigned int error_interrupt_enable, unsigned int error_interrupt_offset); diff --git a/drivers/net/ethernet/cisco/enic/vnic_wq.c b/drivers/net/ethernet/cisco/enic/vnic_wq.c index 29c7900349b2..5a20bdc62141 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_wq.c +++ b/drivers/net/ethernet/cisco/enic/vnic_wq.c @@ -72,15 +72,16 @@ void vnic_wq_free(struct vnic_wq *wq) wq->ctrl = NULL; } -int vnic_wq_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int index, - unsigned int desc_count, unsigned int desc_size) +int vnic_wq_alloc_with_type(struct vnic_dev *vdev, struct vnic_wq *wq, + unsigned int index, unsigned int desc_count, + unsigned int desc_size, unsigned int res_type) { int err; wq->index = index; wq->vdev = vdev; - wq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_WQ, index); + wq->ctrl = vnic_dev_get_res(vdev, res_type, index); if (!wq->ctrl) { vdev_err(vdev, "Failed to hook WQ[%d] resource\n", index); return -EINVAL; @@ -101,6 +102,13 @@ int vnic_wq_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int index, return 0; } +int vnic_wq_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int index, + unsigned int desc_count, unsigned int desc_size) +{ + return vnic_wq_alloc_with_type(vdev, wq, index, desc_count, desc_size, + RES_TYPE_WQ); +} + int enic_wq_devcmd2_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int desc_count, unsigned int desc_size) { diff --git a/drivers/net/ethernet/cisco/enic/vnic_wq.h b/drivers/net/ethernet/cisco/enic/vnic_wq.h index 3bb4758100ba..1d448b7a9ba2 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_wq.h +++ b/drivers/net/ethernet/cisco/enic/vnic_wq.h @@ -165,6 +165,9 @@ static inline void vnic_wq_service(struct vnic_wq *wq, void vnic_wq_free(struct vnic_wq *wq); int vnic_wq_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int index, unsigned int desc_count, unsigned int desc_size); +int vnic_wq_alloc_with_type(struct vnic_dev *vdev, struct vnic_wq *wq, + unsigned int index, unsigned int desc_count, + unsigned int desc_size, unsigned int res_type); void vnic_wq_init(struct vnic_wq *wq, unsigned int cq_index, unsigned int error_interrupt_enable, unsigned int error_interrupt_offset); From 4368f5fab419b43068dea912536b1f4a724b4bba Mon Sep 17 00:00:00 2001 From: Satish Kharat Date: Wed, 1 Apr 2026 08:31:16 -0700 Subject: [PATCH 1139/1561] enic: detect admin channel resources for SR-IOV Check for the presence of admin channel BAR resources (RES_TYPE_ADMIN_WQ, ADMIN_RQ, ADMIN_CQ, SRIOV_INTR) during resource discovery. Set has_admin_channel when all four are available. Use ARRAY_SIZE(enic->admin_cq) for the admin CQ count check since the driver allocates two admin CQs (one for WQ completions, one for RQ completions) and both must be backed by hardware resources. Add admin WQ, RQ, CQ and INTR fields to struct enic for use by the upcoming admin channel open/close paths. Signed-off-by: Satish Kharat Link: https://patch.msgid.link/20260401-enic-sriov-v2-prep-v4-6-d5834b2ef1b9@cisco.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cisco/enic/enic.h | 7 +++++++ drivers/net/ethernet/cisco/enic/enic_res.c | 12 ++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index 67fd780b1fa1..08472420f3a1 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -289,6 +289,13 @@ struct enic { u8 rss_key[ENIC_RSS_LEN]; struct vnic_gen_stats gen_stats; enum ext_cq ext_cq; + + /* Admin channel resources for SR-IOV MBOX */ + bool has_admin_channel; + struct vnic_wq admin_wq; + struct vnic_rq admin_rq; + struct vnic_cq admin_cq[2]; + struct vnic_intr admin_intr; }; static inline struct net_device *vnic_get_netdev(struct vnic_dev *vdev) diff --git a/drivers/net/ethernet/cisco/enic/enic_res.c b/drivers/net/ethernet/cisco/enic/enic_res.c index bbd3143ed73e..2b7545d6a67f 100644 --- a/drivers/net/ethernet/cisco/enic/enic_res.c +++ b/drivers/net/ethernet/cisco/enic/enic_res.c @@ -205,10 +205,18 @@ void enic_get_res_counts(struct enic *enic) enic->cq_count = enic->cq_avail; enic->intr_count = enic->intr_avail; + enic->has_admin_channel = + vnic_dev_get_res_count(enic->vdev, RES_TYPE_ADMIN_WQ) >= 1 && + vnic_dev_get_res_count(enic->vdev, RES_TYPE_ADMIN_RQ) >= 1 && + vnic_dev_get_res_count(enic->vdev, RES_TYPE_ADMIN_CQ) >= + ARRAY_SIZE(enic->admin_cq) && + vnic_dev_get_res_count(enic->vdev, RES_TYPE_SRIOV_INTR) >= 1; + dev_info(enic_get_dev(enic), - "vNIC resources avail: wq %d rq %d cq %d intr %d\n", + "vNIC resources avail: wq %d rq %d cq %d intr %d admin %s\n", enic->wq_avail, enic->rq_avail, - enic->cq_avail, enic->intr_avail); + enic->cq_avail, enic->intr_avail, + enic->has_admin_channel ? "yes" : "no"); } void enic_init_vnic_resources(struct enic *enic) From a9b460225e47a3d98296eba71c62ff0ad58a2032 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Apr 2026 15:26:54 +0000 Subject: [PATCH 1140/1561] net: always inline some skb helpers Some performance critical helpers from include/linux/skbuff.h are not inlined by clang. Use __always_inline hint for: - __skb_fill_netmem_desc() - __skb_fill_page_desc() - skb_fill_netmem_desc() - skb_fill_page_desc() - __skb_pull() - pskb_may_pull_reason() - pskb_may_pull() - pskb_pull() - pskb_trim() - skb_orphan() - skb_postpull_rcsum() - skb_header_pointer() - skb_clear_delivery_time() - skb_tstamp_cond() - skb_warn_if_lro() This increases performance and saves ~1200 bytes of text. $ scripts/bloat-o-meter -t vmlinux.old vmlinux.new add/remove: 4/24 grow/shrink: 66/12 up/down: 4104/-5306 (-1202) Function old new delta ip_multipath_l3_keys - 303 +303 tcp_sendmsg_locked 4560 4848 +288 xfrm_input 6240 6455 +215 esp_output_head 1516 1711 +195 skb_try_coalesce 696 866 +170 bpf_prog_test_run_skb 1951 2091 +140 tls_strp_read_copy 528 667 +139 gue_udp_recv 738 871 +133 __ip6_append_data 4159 4279 +120 __bond_xmit_hash 1019 1122 +103 ip6_multipath_l3_keys 394 495 +101 bpf_lwt_seg6_action 1096 1197 +101 input_action_end_dx2 344 442 +98 vxlan_remcsum 487 581 +94 udpv6_queue_rcv_skb 393 480 +87 udp_queue_rcv_skb 385 471 +86 gue_remcsum 453 539 +86 udp_lib_checksum_complete 84 168 +84 vxlan_xmit 2777 2857 +80 nf_reset_ct 456 532 +76 igmp_rcv 1902 1978 +76 mpls_forward 1097 1169 +72 tcp_add_backlog 1226 1292 +66 nfulnl_log_packet 3091 3156 +65 tcp_rcv_established 1966 2026 +60 __strp_recv 1547 1603 +56 eth_type_trans 357 411 +54 bond_flow_ip 392 444 +52 __icmp_send 1584 1630 +46 ip_defrag 1636 1681 +45 tpacket_rcv 2793 2837 +44 refcount_add 132 176 +44 nf_ct_frag6_gather 1959 2003 +44 napi_skb_free_stolen_head 199 240 +41 __pskb_trim - 41 +41 napi_reuse_skb 319 358 +39 icmpv6_rcv 1877 1916 +39 br_handle_frame_finish 1672 1711 +39 ip_rcv_core 841 879 +38 ip_check_defrag 377 415 +38 br_stp_rcv 909 947 +38 qdisc_pkt_len_segs_init 366 399 +33 mld_query_work 2945 2975 +30 bpf_sk_assign_tcp_reqsk 607 637 +30 udp_gro_receive 1657 1686 +29 ip6_rcv_core 1170 1193 +23 ah_input 1176 1197 +21 tun_get_user 5174 5194 +20 llc_rcv 815 834 +19 __pfx_udp_lib_checksum_complete 16 32 +16 __pfx_refcount_add 48 64 +16 __pfx_nf_reset_ct 96 112 +16 __pfx_ip_multipath_l3_keys - 16 +16 __pfx___pskb_trim - 16 +16 packet_sendmsg 5771 5781 +10 esp_output_tail 1460 1470 +10 alloc_skb_with_frags 433 443 +10 xsk_generic_xmit 3477 3486 +9 mptcp_sendmsg_frag 2250 2259 +9 __ip_append_data 4166 4175 +9 __ip6_tnl_rcv 1159 1168 +9 skb_zerocopy 1215 1220 +5 gre_parse_header 1358 1362 +4 __iptunnel_pull_header 405 407 +2 skb_vlan_untag 692 693 +1 psp_dev_rcv 701 702 +1 netkit_xmit 1263 1264 +1 gre_rcv 2776 2777 +1 gre_gso_segment 1521 1522 +1 bpf_skb_net_hdr_pop 535 536 +1 udp6_ufo_fragment 888 884 -4 br_multicast_rcv 9154 9148 -6 snap_rcv 312 305 -7 skb_copy_ubufs 1841 1834 -7 __pfx_skb_tstamp_cond 16 - -16 __pfx_skb_clear_delivery_time 16 - -16 __pfx_pskb_trim 16 - -16 __pfx_pskb_pull 16 - -16 ipv6_gso_segment 1400 1383 -17 ipv6_frag_rcv 2511 2492 -19 erspan_xmit 1221 1190 -31 __pfx_skb_warn_if_lro 32 - -32 __pfx___skb_fill_page_desc 32 - -32 skb_tstamp_cond 42 - -42 pskb_trim 46 - -46 __pfx_skb_postpull_rcsum 48 - -48 tcp_gso_segment 1524 1475 -49 skb_clear_delivery_time 54 - -54 __pfx_skb_fill_page_desc 64 - -64 __pfx_skb_header_pointer 80 - -80 pskb_pull 91 - -91 skb_warn_if_lro 110 - -110 tcp_v6_rcv 3288 3170 -118 __pfx___skb_pull 128 - -128 __pfx_skb_orphan 144 - -144 __pfx_pskb_may_pull 160 - -160 tcp_v4_rcv 3334 3153 -181 __skb_fill_page_desc 231 - -231 udp_rcv 1809 1553 -256 skb_postpull_rcsum 318 - -318 skb_header_pointer 367 - -367 fib_multipath_hash 3399 3018 -381 skb_orphan 513 - -513 skb_fill_page_desc 534 - -534 __skb_pull 568 - -568 pskb_may_pull 604 - -604 Total: Before=29652698, After=29651496, chg -0.00% Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260402152654.1720627-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 46 ++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fbfa9852e82a..26fe18bcfad8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2605,8 +2605,9 @@ static inline void skb_len_add(struct sk_buff *skb, int delta) * * Does not take any additional reference on the fragment. */ -static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i, - netmem_ref netmem, int off, int size) +static __always_inline void +__skb_fill_netmem_desc(struct sk_buff *skb, int i, netmem_ref netmem, + int off, int size) { struct page *page; @@ -2628,14 +2629,16 @@ static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i, skb->pfmemalloc = true; } -static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, - struct page *page, int off, int size) +static __always_inline void +__skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, + int off, int size) { __skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size); } -static inline void skb_fill_netmem_desc(struct sk_buff *skb, int i, - netmem_ref netmem, int off, int size) +static __always_inline void +skb_fill_netmem_desc(struct sk_buff *skb, int i, netmem_ref netmem, + int off, int size) { __skb_fill_netmem_desc(skb, i, netmem, off, size); skb_shinfo(skb)->nr_frags = i + 1; @@ -2655,8 +2658,9 @@ static inline void skb_fill_netmem_desc(struct sk_buff *skb, int i, * * Does not take any additional reference on the fragment. */ -static inline void skb_fill_page_desc(struct sk_buff *skb, int i, - struct page *page, int off, int size) +static __always_inline void +skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, + int off, int size) { skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size); } @@ -2828,7 +2832,7 @@ static inline void *__skb_push(struct sk_buff *skb, unsigned int len) } void *skb_pull(struct sk_buff *skb, unsigned int len); -static inline void *__skb_pull(struct sk_buff *skb, unsigned int len) +static __always_inline void *__skb_pull(struct sk_buff *skb, unsigned int len) { DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); @@ -2853,7 +2857,7 @@ void *skb_pull_data(struct sk_buff *skb, size_t len); void *__pskb_pull_tail(struct sk_buff *skb, int delta); -static inline enum skb_drop_reason +static __always_inline enum skb_drop_reason pskb_may_pull_reason(struct sk_buff *skb, unsigned int len) { DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); @@ -2871,12 +2875,13 @@ pskb_may_pull_reason(struct sk_buff *skb, unsigned int len) return SKB_NOT_DROPPED_YET; } -static inline bool pskb_may_pull(struct sk_buff *skb, unsigned int len) +static __always_inline bool +pskb_may_pull(struct sk_buff *skb, unsigned int len) { return pskb_may_pull_reason(skb, len) == SKB_NOT_DROPPED_YET; } -static inline void *pskb_pull(struct sk_buff *skb, unsigned int len) +static __always_inline void *pskb_pull(struct sk_buff *skb, unsigned int len) { if (!pskb_may_pull(skb, len)) return NULL; @@ -3337,7 +3342,7 @@ static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) return 0; } -static inline int pskb_trim(struct sk_buff *skb, unsigned int len) +static __always_inline int pskb_trim(struct sk_buff *skb, unsigned int len) { skb_might_realloc(skb); return (len < skb->len) ? __pskb_trim(skb, len) : 0; @@ -3380,7 +3385,7 @@ static inline int __skb_grow(struct sk_buff *skb, unsigned int len) * destructor function and make the @skb unowned. The buffer continues * to exist but is no longer charged to its former owner. */ -static inline void skb_orphan(struct sk_buff *skb) +static __always_inline void skb_orphan(struct sk_buff *skb) { if (skb->destructor) { skb->destructor(skb); @@ -4044,8 +4049,8 @@ __skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len, * update the CHECKSUM_COMPLETE checksum, or set ip_summed to * CHECKSUM_NONE so that it can be recomputed from scratch. */ -static inline void skb_postpull_rcsum(struct sk_buff *skb, - const void *start, unsigned int len) +static __always_inline void +skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = wsum_negate(csum_partial(start, len, @@ -4304,7 +4309,7 @@ __skb_header_pointer(const struct sk_buff *skb, int offset, int len, return buffer; } -static inline void * __must_check +static __always_inline void * __must_check skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) { return __skb_header_pointer(skb, offset, len, skb->data, @@ -4476,7 +4481,7 @@ DECLARE_STATIC_KEY_FALSE(netstamp_needed_key); /* It is used in the ingress path to clear the delivery_time. * If needed, set the skb->tstamp to the (rcv) timestamp. */ -static inline void skb_clear_delivery_time(struct sk_buff *skb) +static __always_inline void skb_clear_delivery_time(struct sk_buff *skb) { if (skb->tstamp_type) { skb->tstamp_type = SKB_CLOCK_REALTIME; @@ -4503,7 +4508,8 @@ static inline ktime_t skb_tstamp(const struct sk_buff *skb) return skb->tstamp; } -static inline ktime_t skb_tstamp_cond(const struct sk_buff *skb, bool cond) +static __always_inline ktime_t +skb_tstamp_cond(const struct sk_buff *skb, bool cond) { if (skb->tstamp_type != SKB_CLOCK_MONOTONIC && skb->tstamp) return skb->tstamp; @@ -5293,7 +5299,7 @@ static inline void skb_decrease_gso_size(struct skb_shared_info *shinfo, void __skb_warn_lro_forwarding(const struct sk_buff *skb); -static inline bool skb_warn_if_lro(const struct sk_buff *skb) +static __always_inline bool skb_warn_if_lro(const struct sk_buff *skb) { /* LRO sets gso_size but not gso_type, whereas if GSO is really * wanted then gso_type will be set. */ From e2f152c822cf5d37b3fc5db8e10ce25448dc12d5 Mon Sep 17 00:00:00 2001 From: Julian Braha Date: Thu, 2 Apr 2026 15:58:58 +0100 Subject: [PATCH 1141/1561] stmmac: cleanup dead dependencies on STMMAC_PLATFORM and STMMAC_ETH in Kconfig There are already 'if STMMAC_ETH' and 'STMMAC_PLATFORM' conditions wrapping these config options, making the 'depends on' statements duplicate dependencies (dead code). I propose leaving the outer 'if STMMAC_PLATFORM...endif' and 'if STMMAC_ETH...endif' conditions, and removing the individual 'depends on' statements. This dead code was found by kconfirm, a static analysis tool for Kconfig. Signed-off-by: Julian Braha Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260402145858.240231-1-julianbraha@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index c2cb530fd0a2..e3dd5adda5ac 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -20,7 +20,6 @@ if STMMAC_ETH config STMMAC_SELFTESTS bool "Support for STMMAC Selftests" depends on INET - depends on STMMAC_ETH default n help This adds support for STMMAC Selftests using ethtool. Enable this @@ -29,7 +28,6 @@ config STMMAC_SELFTESTS config STMMAC_PLATFORM tristate "STMMAC Platform bus support" - depends on STMMAC_ETH select MFD_SYSCON default y help @@ -336,7 +334,6 @@ config DWMAC_IMX8 config DWMAC_INTEL_PLAT tristate "Intel dwmac support" depends on OF && COMMON_CLK - depends on STMMAC_ETH help Support for ethernet controllers on Intel SoCs @@ -371,7 +368,7 @@ config DWMAC_VISCONTI help Support for ethernet controller on Visconti SoCs. -endif +endif # STMMAC_PLATFORM config STMMAC_LIBPCI tristate @@ -381,7 +378,7 @@ config STMMAC_LIBPCI config DWMAC_INTEL tristate "Intel GMAC support" default X86 - depends on X86 && STMMAC_ETH && PCI + depends on X86 && PCI depends on COMMON_CLK depends on ACPI help @@ -420,4 +417,4 @@ config STMMAC_PCI If you have a controller with this interface, say Y or M here. If unsure, say N. -endif +endif # STMMAC_ETH From 789ec16eb397e7d1286e92a859859493f35878fe Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 2 Apr 2026 15:47:53 +0100 Subject: [PATCH 1142/1561] net: stmmac: qcom-ethqos: set clk_csr The clocks for qcom-ethqos return a rate of zero as firmware manages their rate. According to hardware documentation, the clock which is fed to the slave AHB interface can range between 50 to 100MHz for non-RGMII and 30 to 75MHz for boards with a RGMII interfaces. Currently, stmmac uses an undefined divisor value. Instead, use STMMAC_CSR_60_100M which will mean we meet IEEE 802.3 specification since this will generate: 714kHz @ 30MHz 1.19MHz @ 50MHz 1.79MHz @ 75MHz 2.42MHz @ 100MHz This gives MDC rates within the IEEE 802.3 specification, although the 30MHz case is particularly slow. Selecting the next lowest divisor, STMMAC_CSR_35_60M, which is /26 will give: 1.15MHz @ 30MHz 1.92MHz @ 50MHz 2.88MHz @ 75MHz (exceeding 802.3 spec) 3.85MHz @ 100MHz (exceeding 802.3 spec) Unfortunately, this divisor makes the upper bound of both ranges exeed the IEEE 802.3 specification, and thus we can not use it without knowing for certain what the current CSR clock rate actually is. So, STMMAC_CSR_60_100M is the best fit for all boards based on the information provided thus far. Link: https://lore.kernel.org/r/acGhQ0oui+dVRdLY@oss.qualcomm.com Link: https://lore.kernel.org/r/acw1habUsiSqlrky@oss.qualcomm.com Reviewed-by: Mohd Ayaan Anwar Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1w8JKr-0000000EdLC-41Bt@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index ad3a983d2a08..ac7d6d3e205a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -764,6 +764,12 @@ static int qcom_ethqos_probe(struct platform_device *pdev) qcom_ethqos_set_sgmii_loopback(ethqos, true); ethqos_set_func_clk_en(ethqos); + /* The clocks are controlled by firmware, so we don't know for certain + * what clock rate is being used. Hardware documentation mentions that + * the AHB slave clock will be in the range of 50 to 100MHz, which + * equates to a MDC between 1.19 and 2.38MHz. + */ + plat_dat->clk_csr = STMMAC_CSR_60_100M; plat_dat->bsp_priv = ethqos; plat_dat->set_clk_tx_rate = ethqos_set_clk_tx_rate; plat_dat->dump_debug_regs = rgmii_dump; From 30f831b44a98a660ccaf608f88d8bb945318dc59 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 2 Apr 2026 13:59:53 -0700 Subject: [PATCH 1143/1561] selftests: drv-net: gro: add data burst test case Add a test trying to induce a GRO context timeout followed by another sequence of packets for the same flow. The second burst arrives 100ms after the first one so any implementation (SW or HW) must time out waiting at that point. We expect both bursts to be aggregated successfully but separately. Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260402210000.1512696-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/gro.py | 2 +- tools/testing/selftests/net/lib/gro.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/gro.py b/tools/testing/selftests/drivers/net/gro.py index 70709bf670c7..10da5d4bee9b 100755 --- a/tools/testing/selftests/drivers/net/gro.py +++ b/tools/testing/selftests/drivers/net/gro.py @@ -289,7 +289,7 @@ def _gro_variants(): # Tests that work for all protocols common_tests = [ - "data_same", "data_lrg_sml", "data_sml_lrg", + "data_same", "data_lrg_sml", "data_sml_lrg", "data_burst", "ack", "flags_psh", "flags_syn", "flags_rst", "flags_urg", "flags_cwr", "tcp_csum", "tcp_seq", "tcp_ts", "tcp_opt", diff --git a/tools/testing/selftests/net/lib/gro.c b/tools/testing/selftests/net/lib/gro.c index 3e611ae25f61..4d002af4a7aa 100644 --- a/tools/testing/selftests/net/lib/gro.c +++ b/tools/testing/selftests/net/lib/gro.c @@ -12,6 +12,7 @@ * - data_same: same size packets coalesce * - data_lrg_sml: large then small coalesces * - data_sml_lrg: small then large doesn't coalesce + * - data_burst: two bursts of two, separated by 100ms * * ack: * Pure ACK does not coalesce. @@ -1298,6 +1299,21 @@ static void gro_sender(void) } else if (strcmp(testname, "data_sml_lrg") == 0) { send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "data_burst") == 0) { + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(txfd, buf, total_hdr_len + PAYLOAD_LEN, &daddr); + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + write_packet(txfd, buf, total_hdr_len + PAYLOAD_LEN, &daddr); + + usleep(100 * 1000); /* 100ms */ + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + write_packet(txfd, buf, total_hdr_len + PAYLOAD_LEN, &daddr); + create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0); + write_packet(txfd, buf, total_hdr_len + PAYLOAD_LEN, &daddr); + + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); /* ack test */ } else if (strcmp(testname, "ack") == 0) { @@ -1463,6 +1479,11 @@ static void gro_receiver(void) correct_payload[0] = PAYLOAD_LEN / 2; correct_payload[1] = PAYLOAD_LEN; check_recv_pkts(rxfd, correct_payload, 2); + } else if (strcmp(testname, "data_burst") == 0) { + printf("two bursts of two data packets: "); + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 2); /* ack test */ } else if (strcmp(testname, "ack") == 0) { From 436ea8a1b7ed1328cf24cebb3ebaa21aa0c6a81a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 2 Apr 2026 13:59:54 -0700 Subject: [PATCH 1144/1561] selftests: drv-net: gro: add 1 byte payload test Small IPv4 packets get padded to 60B, this may break / confuse some buggy implementations. Add a test to coalesce a 1B payload. Keep this separate from the lrg_sml test because I suspect some implementations may not handle this case (treat padded frames as ineligible for coalescing). Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260402210000.1512696-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/gro.py | 4 +++- tools/testing/selftests/net/lib/gro.c | 12 ++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/drivers/net/gro.py b/tools/testing/selftests/drivers/net/gro.py index 10da5d4bee9b..73436d16b40d 100755 --- a/tools/testing/selftests/drivers/net/gro.py +++ b/tools/testing/selftests/drivers/net/gro.py @@ -11,6 +11,7 @@ coalescing behavior. Test cases: - data_same: Same size data packets coalesce - data_lrg_sml: Large packet followed by smaller one coalesces + - data_lrg_1byte: Large packet followed by 1B one coalesces (Ethernet padding) - data_sml_lrg: Small packet followed by larger one doesn't coalesce - ack: Pure ACK packets do not coalesce - flags_psh: Packets with PSH flag don't coalesce @@ -289,7 +290,8 @@ def _gro_variants(): # Tests that work for all protocols common_tests = [ - "data_same", "data_lrg_sml", "data_sml_lrg", "data_burst", + "data_same", "data_lrg_sml", "data_sml_lrg", "data_lrg_1byte", + "data_burst", "ack", "flags_psh", "flags_syn", "flags_rst", "flags_urg", "flags_cwr", "tcp_csum", "tcp_seq", "tcp_ts", "tcp_opt", diff --git a/tools/testing/selftests/net/lib/gro.c b/tools/testing/selftests/net/lib/gro.c index 4d002af4a7aa..27ccd742615a 100644 --- a/tools/testing/selftests/net/lib/gro.c +++ b/tools/testing/selftests/net/lib/gro.c @@ -10,8 +10,9 @@ * packet coalesced: it can be smaller than the rest and coalesced * as long as it is in the same flow. * - data_same: same size packets coalesce - * - data_lrg_sml: large then small coalesces - * - data_sml_lrg: small then large doesn't coalesce + * - data_lrg_sml: large then small coalesces + * - data_lrg_1byte: large then 1 byte coalesces (Ethernet padding) + * - data_sml_lrg: small then large doesn't coalesce * - data_burst: two bursts of two, separated by 100ms * * ack: @@ -1296,6 +1297,9 @@ static void gro_sender(void) } else if (strcmp(testname, "data_lrg_sml") == 0) { send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "data_lrg_1byte") == 0) { + send_data_pkts(txfd, &daddr, PAYLOAD_LEN, 1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); } else if (strcmp(testname, "data_sml_lrg") == 0) { send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); @@ -1474,6 +1478,10 @@ static void gro_receiver(void) printf("large data packets followed by a smaller one: "); correct_payload[0] = PAYLOAD_LEN * 1.5; check_recv_pkts(rxfd, correct_payload, 1); + } else if (strcmp(testname, "data_lrg_1byte") == 0) { + printf("large data packet followed by a 1 byte one: "); + correct_payload[0] = PAYLOAD_LEN + 1; + check_recv_pkts(rxfd, correct_payload, 1); } else if (strcmp(testname, "data_sml_lrg") == 0) { printf("small data packets followed by a larger one: "); correct_payload[0] = PAYLOAD_LEN / 2; From d97348474708d5f67bec6da28c2e14e12598b965 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 2 Apr 2026 13:59:55 -0700 Subject: [PATCH 1145/1561] selftests: drv-net: gro: always wait for FIN in the capacity test The new capacity/order test exits as soon as it sees the expected packet sequence. This may allow the "flushing" FIN packet to spill over to the next test. Let's always wait for the FIN before exiting. Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260402210000.1512696-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib/gro.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/lib/gro.c b/tools/testing/selftests/net/lib/gro.c index 27ccd742615a..f05398c18e0c 100644 --- a/tools/testing/selftests/net/lib/gro.c +++ b/tools/testing/selftests/net/lib/gro.c @@ -1154,7 +1154,7 @@ static void check_capacity_pkts(int fd) memset(coalesced, 0, sizeof(coalesced)); memset(flow_order, -1, sizeof(flow_order)); - while (total_data < num_flows * CAPACITY_PAYLOAD_LEN * 2) { + while (1) { ip_ext_len = 0; pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0); if (pkt_size < 0) @@ -1167,7 +1167,6 @@ static void check_capacity_pkts(int fd) tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len); - /* FIN packet terminates reception */ if (tcph->fin) break; @@ -1189,7 +1188,13 @@ static void check_capacity_pkts(int fd) data_len = pkt_size - total_hdr_len - ip_ext_len; } - flow_order[num_pkt] = flow_id; + if (num_pkt < num_flows * 2) { + flow_order[num_pkt] = flow_id; + } else if (num_pkt == num_flows * 2) { + vlog("More packets than expected (%d)\n", + num_flows * 2); + fail_reason = fail_reason ?: "too many packets"; + } coalesced[flow_id] = data_len; if (data_len == CAPACITY_PAYLOAD_LEN * 2) { From 5469b695f23642611c47e31329025a21de86c629 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 2 Apr 2026 13:59:56 -0700 Subject: [PATCH 1146/1561] selftests: drv-net: gro: prepare for ip6ip6 support Try to use already calculated offsets and not depend on the ipip flag as much. This patch should not change any functionality, it's just a cleanup to make ip6ip6 support easier. Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260402210000.1512696-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib/gro.c | 73 +++++++++++++++------------ 1 file changed, 41 insertions(+), 32 deletions(-) diff --git a/tools/testing/selftests/net/lib/gro.c b/tools/testing/selftests/net/lib/gro.c index f05398c18e0c..57080ecc3df8 100644 --- a/tools/testing/selftests/net/lib/gro.c +++ b/tools/testing/selftests/net/lib/gro.c @@ -139,6 +139,14 @@ static bool order_check; #define TXTIME_DELAY_MS 5 +/* Max TCP payload that GRO will coalesce. The outer header overhead + * varies by encapsulation, reducing the effective max payload. + */ +static int max_payload(void) +{ + return IP_MAXPACKET - (total_hdr_len - ETH_HLEN); +} + static void vlog(const char *fmt, ...) { va_list args; @@ -156,15 +164,13 @@ static void setup_sock_filter(int fd) const int ethproto_off = offsetof(struct ethhdr, h_proto); int optlen = 0; int ipproto_off, opt_ipproto_off; - int next_off; - if (ipip) - next_off = sizeof(struct iphdr) + offsetof(struct iphdr, protocol); - else if (proto == PF_INET) - next_off = offsetof(struct iphdr, protocol); + if (proto == PF_INET) + ipproto_off = tcp_offset - sizeof(struct iphdr) + + offsetof(struct iphdr, protocol); else - next_off = offsetof(struct ipv6hdr, nexthdr); - ipproto_off = ETH_HLEN + next_off; + ipproto_off = tcp_offset - sizeof(struct ipv6hdr) + + offsetof(struct ipv6hdr, nexthdr); /* Overridden later if exthdrs are used: */ opt_ipproto_off = ipproto_off; @@ -381,19 +387,23 @@ static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr) static void create_packet(void *buf, int seq_offset, int ack_offset, int payload_len, int fin) { + int ip_hdr_len = (proto == PF_INET) ? + sizeof(struct iphdr) : sizeof(struct ipv6hdr); + int inner_ip_off = tcp_offset - ip_hdr_len; + memset(buf, 0, total_hdr_len); memset(buf + total_hdr_len, 'a', payload_len); fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset, payload_len, fin); - if (ipip) { - fill_networklayer(buf + ETH_HLEN, payload_len + sizeof(struct iphdr), - IPPROTO_IPIP); - fill_networklayer(buf + ETH_HLEN + sizeof(struct iphdr), - payload_len, IPPROTO_TCP); - } else { - fill_networklayer(buf + ETH_HLEN, payload_len, IPPROTO_TCP); + fill_networklayer(buf + inner_ip_off, payload_len, IPPROTO_TCP); + if (inner_ip_off > ETH_HLEN) { + int encap_proto = (proto == PF_INET) ? + IPPROTO_IPIP : IPPROTO_IPV6; + + fill_networklayer(buf + ETH_HLEN, + payload_len + ip_hdr_len, encap_proto); } fill_datalinklayer(buf); @@ -547,8 +557,7 @@ static void send_ack(int fd, struct sockaddr_ll *daddr) static void recompute_packet(char *buf, char *no_ext, int extlen) { struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset); - struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); - struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + int off; memmove(buf, no_ext, total_hdr_len); memmove(buf + total_hdr_len + extlen, @@ -558,18 +567,22 @@ static void recompute_packet(char *buf, char *no_ext, int extlen) tcphdr->check = 0; tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen); if (proto == PF_INET) { - iph->tot_len = htons(ntohs(iph->tot_len) + extlen); - iph->check = 0; - iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + for (off = ETH_HLEN; off < tcp_offset; + off += sizeof(struct iphdr)) { + struct iphdr *iph = (struct iphdr *)(buf + off); - if (ipip) { - iph += 1; iph->tot_len = htons(ntohs(iph->tot_len) + extlen); iph->check = 0; iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); } } else { - ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen); + for (off = ETH_HLEN; off < tcp_offset; + off += sizeof(struct ipv6hdr)) { + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + off); + + ip6h->payload_len = + htons(ntohs(ip6h->payload_len) + extlen); + } } } @@ -1425,14 +1438,12 @@ static void gro_sender(void) /* large sub-tests */ } else if (strcmp(testname, "large_max") == 0) { - int offset = (proto == PF_INET && !ipip) ? 20 : 0; - int remainder = (MAX_PAYLOAD + offset) % MSS; + int remainder = max_payload() % MSS; send_large(txfd, &daddr, remainder); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); } else if (strcmp(testname, "large_rem") == 0) { - int offset = (proto == PF_INET && !ipip) ? 20 : 0; - int remainder = (MAX_PAYLOAD + offset) % MSS; + int remainder = max_payload() % MSS; send_large(txfd, &daddr, remainder + 1); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); @@ -1636,19 +1647,17 @@ static void gro_receiver(void) /* large sub-tests */ } else if (strcmp(testname, "large_max") == 0) { - int offset = (proto == PF_INET && !ipip) ? 20 : 0; - int remainder = (MAX_PAYLOAD + offset) % MSS; + int remainder = max_payload() % MSS; - correct_payload[0] = (MAX_PAYLOAD + offset); + correct_payload[0] = max_payload(); correct_payload[1] = remainder; printf("Shouldn't coalesce if exceed IP max pkt size: "); check_recv_pkts(rxfd, correct_payload, 2); } else if (strcmp(testname, "large_rem") == 0) { - int offset = (proto == PF_INET && !ipip) ? 20 : 0; - int remainder = (MAX_PAYLOAD + offset) % MSS; + int remainder = max_payload() % MSS; /* last segment sent individually, doesn't start new segment */ - correct_payload[0] = (MAX_PAYLOAD + offset) - remainder; + correct_payload[0] = max_payload() - remainder; correct_payload[1] = remainder + 1; correct_payload[2] = remainder + 1; printf("last segment sent individually: "); From 166b0cc6df8c8bd961058aa3ed0c2d1a2bdaffca Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 2 Apr 2026 13:59:57 -0700 Subject: [PATCH 1147/1561] selftests: drv-net: gro: remove TOTAL_HDR_LEN Willem points out TOTAL_HDR_LEN is identical to MAX_HDR_LEN. This seems to have been the case ever since the test was added. Replace the uses of TOTAL_HDR_LEN with MAX_HDR_LEN, MAX seems more common for what this value is. Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260402210000.1512696-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib/gro.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/lib/gro.c b/tools/testing/selftests/net/lib/gro.c index 57080ecc3df8..3616ecc42a01 100644 --- a/tools/testing/selftests/net/lib/gro.c +++ b/tools/testing/selftests/net/lib/gro.c @@ -94,7 +94,6 @@ #define START_SEQ 100 #define START_ACK 100 #define ETH_P_NONE 0 -#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) #define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) #define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) #define NUM_LARGE_PKT (MAX_PAYLOAD / MSS) @@ -526,9 +525,9 @@ static void send_data_pkts(int fd, struct sockaddr_ll *daddr, */ static void send_large(int fd, struct sockaddr_ll *daddr, int remainder) { - static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS]; - static char last[TOTAL_HDR_LEN + MSS]; - static char new_seg[TOTAL_HDR_LEN + MSS]; + static char pkts[NUM_LARGE_PKT][MAX_HDR_LEN + MSS]; + static char last[MAX_HDR_LEN + MSS]; + static char new_seg[MAX_HDR_LEN + MSS]; int i; for (i = 0; i < NUM_LARGE_PKT; i++) From 024597cc2077e36dc69327e46fa6dfb1cd0885cd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 2 Apr 2026 13:59:58 -0700 Subject: [PATCH 1148/1561] selftests: drv-net: gro: make large packet math more precise When constructing the packets for large_* test cases we use a static value for packet count and MSS. It works okay for ipv4 vs ipv6 but the gap between ipv4 and ip6ip6 is going to be quite significant. Make the defines calculate the worst case values, those are only used for sizing stack arrays. Create helpers for calculating precise values based on the exact test case. Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260402210000.1512696-7-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib/gro.c | 45 +++++++++++++++++---------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/net/lib/gro.c b/tools/testing/selftests/net/lib/gro.c index 3616ecc42a01..dc8638d5d74d 100644 --- a/tools/testing/selftests/net/lib/gro.c +++ b/tools/testing/selftests/net/lib/gro.c @@ -94,10 +94,11 @@ #define START_SEQ 100 #define START_ACK 100 #define ETH_P_NONE 0 -#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) -#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) -#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS) +#define ASSUMED_MTU 4096 +#define MAX_MSS (ASSUMED_MTU - sizeof(struct iphdr) - sizeof(struct tcphdr)) #define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) +#define MAX_LARGE_PKT_CNT ((IP_MAXPACKET - (MAX_HDR_LEN - ETH_HLEN)) / \ + (ASSUMED_MTU - (MAX_HDR_LEN - ETH_HLEN))) #define MIN_EXTHDR_SIZE 8 #define EXT_PAYLOAD_1 "\x00\x00\x00\x00\x00\x00" #define EXT_PAYLOAD_2 "\x11\x11\x11\x11\x11\x11" @@ -146,6 +147,16 @@ static int max_payload(void) return IP_MAXPACKET - (total_hdr_len - ETH_HLEN); } +static int calc_mss(void) +{ + return ASSUMED_MTU - (total_hdr_len - ETH_HLEN); +} + +static int num_large_pkt(void) +{ + return max_payload() / calc_mss(); +} + static void vlog(const char *fmt, ...) { va_list args; @@ -525,18 +536,20 @@ static void send_data_pkts(int fd, struct sockaddr_ll *daddr, */ static void send_large(int fd, struct sockaddr_ll *daddr, int remainder) { - static char pkts[NUM_LARGE_PKT][MAX_HDR_LEN + MSS]; - static char last[MAX_HDR_LEN + MSS]; - static char new_seg[MAX_HDR_LEN + MSS]; + static char pkts[MAX_LARGE_PKT_CNT][MAX_HDR_LEN + MAX_MSS]; + static char new_seg[MAX_HDR_LEN + MAX_MSS]; + static char last[MAX_HDR_LEN + MAX_MSS]; + const int num_pkt = num_large_pkt(); + const int mss = calc_mss(); int i; - for (i = 0; i < NUM_LARGE_PKT; i++) - create_packet(pkts[i], i * MSS, 0, MSS, 0); - create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0); - create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0); + for (i = 0; i < num_pkt; i++) + create_packet(pkts[i], i * mss, 0, mss, 0); + create_packet(last, num_pkt * mss, 0, remainder, 0); + create_packet(new_seg, (num_pkt + 1) * mss, 0, remainder, 0); - for (i = 0; i < NUM_LARGE_PKT; i++) - write_packet(fd, pkts[i], total_hdr_len + MSS, daddr); + for (i = 0; i < num_pkt; i++) + write_packet(fd, pkts[i], total_hdr_len + mss, daddr); write_packet(fd, last, total_hdr_len + remainder, daddr); write_packet(fd, new_seg, total_hdr_len + remainder, daddr); } @@ -1437,12 +1450,12 @@ static void gro_sender(void) /* large sub-tests */ } else if (strcmp(testname, "large_max") == 0) { - int remainder = max_payload() % MSS; + int remainder = max_payload() % calc_mss(); send_large(txfd, &daddr, remainder); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); } else if (strcmp(testname, "large_rem") == 0) { - int remainder = max_payload() % MSS; + int remainder = max_payload() % calc_mss(); send_large(txfd, &daddr, remainder + 1); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); @@ -1646,14 +1659,14 @@ static void gro_receiver(void) /* large sub-tests */ } else if (strcmp(testname, "large_max") == 0) { - int remainder = max_payload() % MSS; + int remainder = max_payload() % calc_mss(); correct_payload[0] = max_payload(); correct_payload[1] = remainder; printf("Shouldn't coalesce if exceed IP max pkt size: "); check_recv_pkts(rxfd, correct_payload, 2); } else if (strcmp(testname, "large_rem") == 0) { - int remainder = max_payload() % MSS; + int remainder = max_payload() % calc_mss(); /* last segment sent individually, doesn't start new segment */ correct_payload[0] = max_payload() - remainder; From 9a84a4047df79f7f58a4915abaf09d1089103233 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 2 Apr 2026 13:59:59 -0700 Subject: [PATCH 1149/1561] selftests: drv-net: gro: test ip6ip6 We explicitly test ipip encap. Let's add ip6ip6, too. Having just ipip seems like favoring IPv4 which we should not do :) Testing all combinations is left for future work, not sure it's actually worth it. Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260402210000.1512696-8-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/gro.py | 2 +- tools/testing/selftests/net/lib/gro.c | 23 +++++++++++++++++----- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/drivers/net/gro.py b/tools/testing/selftests/drivers/net/gro.py index 73436d16b40d..ee95144b73ac 100755 --- a/tools/testing/selftests/drivers/net/gro.py +++ b/tools/testing/selftests/drivers/net/gro.py @@ -313,7 +313,7 @@ def _gro_variants(): ] for mode in ["sw", "hw", "lro"]: - for protocol in ["ipv4", "ipv6", "ipip"]: + for protocol in ["ipv4", "ipv6", "ipip", "ip6ip6"]: for test_name in common_tests: yield mode, protocol, test_name diff --git a/tools/testing/selftests/net/lib/gro.c b/tools/testing/selftests/net/lib/gro.c index dc8638d5d74d..b99c0f00b8fe 100644 --- a/tools/testing/selftests/net/lib/gro.c +++ b/tools/testing/selftests/net/lib/gro.c @@ -96,7 +96,8 @@ #define ETH_P_NONE 0 #define ASSUMED_MTU 4096 #define MAX_MSS (ASSUMED_MTU - sizeof(struct iphdr) - sizeof(struct tcphdr)) -#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) +#define MAX_HDR_LEN \ + (ETH_HLEN + sizeof(struct ipv6hdr) * 2 + sizeof(struct tcphdr)) #define MAX_LARGE_PKT_CNT ((IP_MAXPACKET - (MAX_HDR_LEN - ETH_HLEN)) / \ (ASSUMED_MTU - (MAX_HDR_LEN - ETH_HLEN))) #define MIN_EXTHDR_SIZE 8 @@ -131,6 +132,7 @@ static int tcp_offset = -1; static int total_hdr_len = -1; static int ethhdr_proto = -1; static bool ipip; +static bool ip6ip6; static uint64_t txtime_ns; static int num_flows = 4; static bool order_check; @@ -1125,7 +1127,8 @@ static void check_recv_pkts(int fd, int *correct_payload, if (iph->version == 4) ip_ext_len = (iph->ihl - 5) * 4; - else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP) + else if (ip6h->version == 6 && !ip6ip6 && + ip6h->nexthdr != IPPROTO_TCP) ip_ext_len = MIN_EXTHDR_SIZE; tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len); @@ -1187,7 +1190,8 @@ static void check_capacity_pkts(int fd) if (iph->version == 4) ip_ext_len = (iph->ihl - 5) * 4; - else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP) + else if (ip6h->version == 6 && !ip6ip6 && + ip6h->nexthdr != IPPROTO_TCP) ip_ext_len = MIN_EXTHDR_SIZE; tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len); @@ -1700,6 +1704,7 @@ static void parse_args(int argc, char **argv) { "ipv4", no_argument, NULL, '4' }, { "ipv6", no_argument, NULL, '6' }, { "ipip", no_argument, NULL, 'e' }, + { "ip6ip6", no_argument, NULL, 'E' }, { "num-flows", required_argument, NULL, 'n' }, { "rx", no_argument, NULL, 'r' }, { "saddr", required_argument, NULL, 's' }, @@ -1711,7 +1716,7 @@ static void parse_args(int argc, char **argv) }; int c; - while ((c = getopt_long(argc, argv, "46d:D:ei:n:rs:S:t:ov", opts, NULL)) != -1) { + while ((c = getopt_long(argc, argv, "46d:D:eEi:n:rs:S:t:ov", opts, NULL)) != -1) { switch (c) { case '4': proto = PF_INET; @@ -1726,6 +1731,11 @@ static void parse_args(int argc, char **argv) proto = PF_INET; ethhdr_proto = htons(ETH_P_IP); break; + case 'E': + ip6ip6 = true; + proto = PF_INET6; + ethhdr_proto = htons(ETH_P_IPV6); + break; case 'd': addr4_dst = addr6_dst = optarg; break; @@ -1770,12 +1780,15 @@ int main(int argc, char **argv) if (ipip) { tcp_offset = ETH_HLEN + sizeof(struct iphdr) * 2; total_hdr_len = tcp_offset + sizeof(struct tcphdr); + } else if (ip6ip6) { + tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr) * 2; + total_hdr_len = tcp_offset + sizeof(struct tcphdr); } else if (proto == PF_INET) { tcp_offset = ETH_HLEN + sizeof(struct iphdr); total_hdr_len = tcp_offset + sizeof(struct tcphdr); } else if (proto == PF_INET6) { tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr); - total_hdr_len = MAX_HDR_LEN; + total_hdr_len = tcp_offset + sizeof(struct tcphdr); } else { error(1, 0, "Protocol family is not ipv4 or ipv6"); } From 764d0833e795916ffe33906ace17bab027c093f8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 2 Apr 2026 14:00:00 -0700 Subject: [PATCH 1150/1561] selftests: drv-net: gro: add a test for bad IPv4 csum We have a test for coalescing with bad TCP checksum, let's also test bad IPv4 header checksum. Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260402210000.1512696-9-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/gro.py | 1 + tools/testing/selftests/net/lib/gro.c | 29 ++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/tools/testing/selftests/drivers/net/gro.py b/tools/testing/selftests/drivers/net/gro.py index ee95144b73ac..221f27e57147 100755 --- a/tools/testing/selftests/drivers/net/gro.py +++ b/tools/testing/selftests/drivers/net/gro.py @@ -301,6 +301,7 @@ def _gro_variants(): # Tests specific to IPv4 ipv4_tests = [ + "ip_csum", "ip_ttl", "ip_opt", "ip_frag4", "ip_id_df1_inc", "ip_id_df1_fixed", "ip_id_df0_inc", "ip_id_df0_fixed", diff --git a/tools/testing/selftests/net/lib/gro.c b/tools/testing/selftests/net/lib/gro.c index b99c0f00b8fe..11b16ae5f0e8 100644 --- a/tools/testing/selftests/net/lib/gro.c +++ b/tools/testing/selftests/net/lib/gro.c @@ -36,6 +36,7 @@ * Packets with different (ECN, TTL, TOS) header, IP options or * IP fragments shouldn't coalesce. * - ip_ecn, ip_tos: shared between IPv4/IPv6 + * - ip_csum: IPv4 only, bad IP header checksum * - ip_ttl, ip_opt, ip_frag4: IPv4 only * - ip_id_df*: IPv4 IP ID field coalescing tests * - ip_frag6, ip_v6ext_*: IPv6 only @@ -685,6 +686,24 @@ static void send_changed_checksum(int fd, struct sockaddr_ll *daddr) write_packet(fd, buf, pkt_size, daddr); } +/* Packets with incorrect IPv4 header checksum don't coalesce. */ +static void send_changed_ip_checksum(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + int pkt_size = total_hdr_len + PAYLOAD_LEN; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + iph->check = iph->check - 1; + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); +} + /* Packets with non-consecutive sequence number don't coalesce.*/ static void send_changed_seq(int fd, struct sockaddr_ll *daddr) { @@ -1402,6 +1421,10 @@ static void gro_sender(void) write_packet(txfd, fin_pkt, total_hdr_len, &daddr); /* ip sub-tests - IPv4 only */ + } else if (strcmp(testname, "ip_csum") == 0) { + send_changed_ip_checksum(txfd, &daddr); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); } else if (strcmp(testname, "ip_ttl") == 0) { send_changed_ttl(txfd, &daddr); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); @@ -1598,6 +1621,12 @@ static void gro_receiver(void) check_recv_pkts(rxfd, correct_payload, 2); /* ip sub-tests - IPv4 only */ + } else if (strcmp(testname, "ip_csum") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + correct_payload[2] = PAYLOAD_LEN; + printf("bad ip checksum doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 3); } else if (strcmp(testname, "ip_ttl") == 0) { correct_payload[0] = PAYLOAD_LEN; correct_payload[1] = PAYLOAD_LEN; From 1666d945b57b5a10bdea2d229b8ac43d2970f5f8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Apr 2026 17:44:30 +0000 Subject: [PATCH 1151/1561] inet: remove leftover EXPORT_SYMBOL() IPv6 is no longer a module, we no longer need to export these symbols. Signed-off-by: Eric Dumazet Reviewed-by: Fernando Fernandez Mancera Link: https://patch.msgid.link/20260402174430.2462800-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/icmp.c | 4 ---- net/ipv4/ipmr_base.c | 13 ------------- net/ipv4/udp_offload.c | 4 ---- 3 files changed, 21 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index fb397fbb28fc..2f4fac22d1ab 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -263,7 +263,6 @@ bool icmp_global_allow(struct net *net) } return true; } -EXPORT_SYMBOL(icmp_global_allow); void icmp_global_consume(struct net *net) { @@ -273,7 +272,6 @@ void icmp_global_consume(struct net *net) if (credits) atomic_sub(credits, &net->ipv4.icmp_global_credit); } -EXPORT_SYMBOL(icmp_global_consume); static bool icmpv4_mask_allow(struct net *net, int type, int code) { @@ -1378,7 +1376,6 @@ send_mal_query: icmphdr->code = ICMP_EXT_CODE_MAL_QUERY; return true; } -EXPORT_SYMBOL_GPL(icmp_build_probe); /* * Handle ICMP Timestamp requests. @@ -1600,7 +1597,6 @@ void ip_icmp_error_rfc4884(const struct sk_buff *skb, if (!ip_icmp_error_rfc4884_validate(skb, off)) out->flags |= SO_EE_RFC4884_FLAG_INVALID; } -EXPORT_SYMBOL_GPL(ip_icmp_error_rfc4884); int icmp_err(struct sk_buff *skb, u32 info) { diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index fd27f2ca6978..37a3c144276c 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -27,7 +27,6 @@ void vif_device_init(struct vif_device *v, else v->link = dev->ifindex; } -EXPORT_SYMBOL(vif_device_init); struct mr_table * mr_table_alloc(struct net *net, u32 id, @@ -60,7 +59,6 @@ mr_table_alloc(struct net *net, u32 id, table_set(mrt, net); return mrt; } -EXPORT_SYMBOL(mr_table_alloc); void *mr_mfc_find_parent(struct mr_table *mrt, void *hasharg, int parent) { @@ -74,7 +72,6 @@ void *mr_mfc_find_parent(struct mr_table *mrt, void *hasharg, int parent) return NULL; } -EXPORT_SYMBOL(mr_mfc_find_parent); void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi) { @@ -89,7 +86,6 @@ void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi) return NULL; } -EXPORT_SYMBOL(mr_mfc_find_any_parent); void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg) { @@ -109,7 +105,6 @@ void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg) return mr_mfc_find_any_parent(mrt, vifi); } -EXPORT_SYMBOL(mr_mfc_find_any); #ifdef CONFIG_PROC_FS void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos) @@ -124,7 +119,6 @@ void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos) } return NULL; } -EXPORT_SYMBOL(mr_vif_seq_idx); void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) { @@ -143,7 +137,6 @@ void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) } return NULL; } -EXPORT_SYMBOL(mr_vif_seq_next); void *mr_mfc_seq_idx(struct net *net, struct mr_mfc_iter *it, loff_t pos) @@ -168,7 +161,6 @@ void *mr_mfc_seq_idx(struct net *net, it->cache = NULL; return NULL; } -EXPORT_SYMBOL(mr_mfc_seq_idx); void *mr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) @@ -203,7 +195,6 @@ end_of_list: return NULL; } -EXPORT_SYMBOL(mr_mfc_seq_next); #endif int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, @@ -275,7 +266,6 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, rtm->rtm_type = RTN_MULTICAST; return 1; } -EXPORT_SYMBOL(mr_fill_mroute); static bool mr_mfc_uses_dev(const struct mr_table *mrt, const struct mr_mfc *c, @@ -347,7 +337,6 @@ out: cb->args[1] = e; return err; } -EXPORT_SYMBOL(mr_table_dump); int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, struct mr_table *(*iter)(struct net *net, @@ -390,7 +379,6 @@ next_table: return skb->len; } -EXPORT_SYMBOL(mr_rtm_dumproute); int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, int (*rules_dump)(struct net *net, @@ -444,4 +432,3 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, return 0; } -EXPORT_SYMBOL(mr_dump); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 98e92da726b5..a0813d425b71 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -343,7 +343,6 @@ out_unlock: return segs; } -EXPORT_SYMBOL(skb_udp_tunnel_segment); static void __udpv4_gso_segment_csum(struct sk_buff *seg, __be32 *oldip, __be32 *newip, @@ -635,7 +634,6 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, } return segs; } -EXPORT_SYMBOL_GPL(__udp_gso_segment); static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, netdev_features_t features) @@ -852,7 +850,6 @@ out: skb_gro_flush_final(skb, pp, flush); return pp; } -EXPORT_SYMBOL(udp_gro_receive); static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport) @@ -957,7 +954,6 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff, return err; } -EXPORT_SYMBOL(udp_gro_complete); INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff) { From 779fae61a3c84bf5a086509558eb5b432fa5ab62 Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Thu, 2 Apr 2026 13:00:50 +0800 Subject: [PATCH 1152/1561] ppp: update Kconfig help message Both links of the PPPoE section are no longer valid, and the CVS version is no longer relevant. - Replace the TLDP URL with the pppd project homepage. - Update pppd version requirement for PPPoE. - Update RP-PPPoE project homepage, and clarify that it's only needed for server mode. Signed-off-by: Qingfang Deng Reviewed-by: Julian Braha Link: https://patch.msgid.link/20260402050053.144250-1-qingfang.deng@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ppp/Kconfig | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/net/ppp/Kconfig b/drivers/net/ppp/Kconfig index f57fba84fe55..753354b4e36c 100644 --- a/drivers/net/ppp/Kconfig +++ b/drivers/net/ppp/Kconfig @@ -13,10 +13,9 @@ config PPP otherwise you can't use it; most Internet access providers these days support PPP rather than SLIP. - To use PPP, you need an additional program called pppd as described - in the PPP-HOWTO, available at - . Make sure that you have - the version of pppd recommended in . + To use PPP, you need an additional program called pppd, available at + . Make sure that you have the version of pppd + recommended in . The PPP option enlarges your kernel by about 16 KB. There are actually two versions of PPP: the traditional PPP for @@ -116,11 +115,10 @@ config PPPOE help Support for PPP over Ethernet. - This driver requires the latest version of pppd from the CVS - repository at cvs.samba.org. Alternatively, see the - RoaringPenguin package () - which contains instruction on how to use this driver (under - the heading "Kernel mode PPPoE"). + To work in client mode, pppd version 2.4.2 or later is required. + To work in server mode, the pppoe-server command from the RP-PPPoE + package is also required, available at + . choice prompt "Number of PPPoE hash bits" From dc3bd465ea36af7fd6f9197c05353effc616145c Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 2 Apr 2026 21:41:22 +0800 Subject: [PATCH 1153/1561] net: macb: Replace open-coded implementation with napi_schedule() The driver currently duplicates the logic of napi_schedule() primarily to include additional debug information. However, these debug details are not essential for a specific driver and can be effectively obtained through existing tracepoints in the networking core, such as /sys/kernel/tracing/events/napi/napi_poll. Therefore, this patch replaces the open-coded implementation with napi_schedule() to simplify the driver's code. Signed-off-by: Kevin Hao Link: https://patch.msgid.link/20260402-macb-irq-v2-1-942d98ab1154@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 7f061d7c95af..8983e2f5b04a 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -2120,10 +2120,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) queue_writel(queue, ISR, MACB_BIT(RCOMP)); - if (napi_schedule_prep(&queue->napi_rx)) { - netdev_vdbg(bp->dev, "scheduling RX softirq\n"); - __napi_schedule(&queue->napi_rx); - } + napi_schedule(&queue->napi_rx); } if (status & (MACB_BIT(TCOMP) | @@ -2138,10 +2135,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) wmb(); // ensure softirq can see update } - if (napi_schedule_prep(&queue->napi_tx)) { - netdev_vdbg(bp->dev, "scheduling TX softirq\n"); - __napi_schedule(&queue->napi_tx); - } + napi_schedule(&queue->napi_tx); } if (unlikely(status & (MACB_TX_ERR_FLAGS))) { From 5986ff6e4136105f16d481bd0532b7da2c6344ae Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 2 Apr 2026 21:41:23 +0800 Subject: [PATCH 1154/1561] net: macb: Introduce macb_queue_isr_clear() helper function The current implementation includes several occurrences of the following pattern: if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) queue_writel(queue, ISR, value); Introduces a helper function to consolidate these repeated code segments. No functional changes are made. Suggested-by: Jakub Kicinski Signed-off-by: Kevin Hao Link: https://patch.msgid.link/20260402-macb-irq-v2-2-942d98ab1154@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb.h | 7 ++++ drivers/net/ethernet/cadence/macb_main.c | 51 +++++++----------------- 2 files changed, 22 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 16527dbab875..2de56017ee0d 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -1474,6 +1474,13 @@ static inline bool macb_dma_ptp(struct macb *bp) bp->caps & MACB_CAPS_DMA_PTP; } +static inline void macb_queue_isr_clear(struct macb *bp, + struct macb_queue *queue, u32 value) +{ + if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) + queue_writel(queue, ISR, value); +} + /** * struct macb_platform_data - platform data for MACB Ethernet used for PCI registration * @pclk: platform clock diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 8983e2f5b04a..248b82af8d3a 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1887,8 +1887,7 @@ static int macb_rx_poll(struct napi_struct *napi, int budget) */ if (macb_rx_pending(queue)) { queue_writel(queue, IDR, bp->rx_intr_mask); - if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) - queue_writel(queue, ISR, MACB_BIT(RCOMP)); + macb_queue_isr_clear(bp, queue, MACB_BIT(RCOMP)); netdev_vdbg(bp->dev, "poll: packets pending, reschedule\n"); napi_schedule(napi); } @@ -1975,8 +1974,7 @@ static int macb_tx_poll(struct napi_struct *napi, int budget) */ if (macb_tx_complete_pending(queue)) { queue_writel(queue, IDR, MACB_BIT(TCOMP)); - if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) - queue_writel(queue, ISR, MACB_BIT(TCOMP)); + macb_queue_isr_clear(bp, queue, MACB_BIT(TCOMP)); netdev_vdbg(bp->dev, "TX poll: packets pending, reschedule\n"); napi_schedule(napi); } @@ -2043,8 +2041,7 @@ static irqreturn_t macb_wol_interrupt(int irq, void *dev_id) netdev_vdbg(bp->dev, "MACB WoL: queue = %u, isr = 0x%08lx\n", (unsigned int)(queue - bp->queues), (unsigned long)status); - if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) - queue_writel(queue, ISR, MACB_BIT(WOL)); + macb_queue_isr_clear(bp, queue, MACB_BIT(WOL)); pm_wakeup_event(&bp->pdev->dev, 0); } @@ -2072,8 +2069,7 @@ static irqreturn_t gem_wol_interrupt(int irq, void *dev_id) netdev_vdbg(bp->dev, "GEM WoL: queue = %u, isr = 0x%08lx\n", (unsigned int)(queue - bp->queues), (unsigned long)status); - if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) - queue_writel(queue, ISR, GEM_BIT(WOL)); + macb_queue_isr_clear(bp, queue, GEM_BIT(WOL)); pm_wakeup_event(&bp->pdev->dev, 0); } @@ -2100,8 +2096,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) /* close possible race with dev_close */ if (unlikely(!netif_running(dev))) { queue_writel(queue, IDR, -1); - if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) - queue_writel(queue, ISR, -1); + macb_queue_isr_clear(bp, queue, -1); break; } @@ -2117,19 +2112,15 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) * now. */ queue_writel(queue, IDR, bp->rx_intr_mask); - if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) - queue_writel(queue, ISR, MACB_BIT(RCOMP)); - + macb_queue_isr_clear(bp, queue, MACB_BIT(RCOMP)); napi_schedule(&queue->napi_rx); } if (status & (MACB_BIT(TCOMP) | MACB_BIT(TXUBR))) { queue_writel(queue, IDR, MACB_BIT(TCOMP)); - if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) - queue_writel(queue, ISR, MACB_BIT(TCOMP) | - MACB_BIT(TXUBR)); - + macb_queue_isr_clear(bp, queue, MACB_BIT(TCOMP) | + MACB_BIT(TXUBR)); if (status & MACB_BIT(TXUBR)) { queue->txubr_pending = true; wmb(); // ensure softirq can see update @@ -2141,10 +2132,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) if (unlikely(status & (MACB_TX_ERR_FLAGS))) { queue_writel(queue, IDR, MACB_TX_INT_FLAGS); schedule_work(&queue->tx_error_task); - - if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) - queue_writel(queue, ISR, MACB_TX_ERR_FLAGS); - + macb_queue_isr_clear(bp, queue, MACB_TX_ERR_FLAGS); break; } @@ -2164,9 +2152,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) macb_writel(bp, NCR, ctrl & ~MACB_BIT(RE)); wmb(); macb_writel(bp, NCR, ctrl | MACB_BIT(RE)); - - if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) - queue_writel(queue, ISR, MACB_BIT(RXUBR)); + macb_queue_isr_clear(bp, queue, MACB_BIT(RXUBR)); } if (status & MACB_BIT(ISR_ROVR)) { @@ -2177,17 +2163,13 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) else bp->hw_stats.macb.rx_overruns++; spin_unlock(&bp->stats_lock); - - if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) - queue_writel(queue, ISR, MACB_BIT(ISR_ROVR)); + macb_queue_isr_clear(bp, queue, MACB_BIT(ISR_ROVR)); } if (status & MACB_BIT(HRESP)) { queue_work(system_bh_wq, &bp->hresp_err_bh_work); netdev_err(dev, "DMA bus error: HRESP not OK\n"); - - if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) - queue_writel(queue, ISR, MACB_BIT(HRESP)); + macb_queue_isr_clear(bp, queue, MACB_BIT(HRESP)); } status = queue_readl(queue, ISR); } @@ -2883,8 +2865,7 @@ static void macb_reset_hw(struct macb *bp) for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { queue_writel(queue, IDR, -1); queue_readl(queue, ISR); - if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) - queue_writel(queue, ISR, -1); + macb_queue_isr_clear(bp, queue, -1); } } @@ -6053,8 +6034,7 @@ static int __maybe_unused macb_suspend(struct device *dev) /* Disable all interrupts */ queue_writel(queue, IDR, -1); queue_readl(queue, ISR); - if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) - queue_writel(queue, ISR, -1); + macb_queue_isr_clear(bp, queue, -1); } /* Enable Receive engine */ macb_writel(bp, NCR, tmp | MACB_BIT(RE)); @@ -6165,8 +6145,7 @@ static int __maybe_unused macb_resume(struct device *dev) } /* Clear ISR on queue 0 */ queue_readl(bp->queues, ISR); - if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) - queue_writel(bp->queues, ISR, -1); + macb_queue_isr_clear(bp, bp->queues, -1); spin_unlock_irqrestore(&bp->lock, flags); /* Replace interrupt handler on queue 0 */ From 6d55ce805b267400e682aa335787775bf253e5d8 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 2 Apr 2026 21:41:24 +0800 Subject: [PATCH 1155/1561] net: macb: Factor out the handling of non-hot IRQ events into a separate function In the current code, the IRQ handler checks each IRQ event sequentially. Since most IRQ events are related to TX/RX operations, while other events occur infrequently, this approach introduces unnecessary overhead in the hot path for TX/RX processing. This patch reduces such overhead by extracting the handling of all non-TX/RX events into a new function and consolidating these events under a new flag. As a result, only a single check is required to determine whether any non-TX/RX events have occurred. If such events exist, the handler jumps to the new function. This optimization reduces four conditional checks to one and prevents the instruction cache from being polluted with rarely used code in the hot path. Signed-off-by: Kevin Hao Link: https://patch.msgid.link/20260402-macb-irq-v2-3-942d98ab1154@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 103 ++++++++++++++--------- 1 file changed, 61 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 248b82af8d3a..4fa75e6ed6d6 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -70,6 +70,9 @@ struct sifive_fu540_macb_mgmt { #define MACB_TX_INT_FLAGS (MACB_TX_ERR_FLAGS | MACB_BIT(TCOMP) \ | MACB_BIT(TXUBR)) +#define MACB_INT_MISC_FLAGS (MACB_TX_ERR_FLAGS | MACB_BIT(RXUBR) | \ + MACB_BIT(ISR_ROVR) | MACB_BIT(HRESP)) + /* Max length of transmit frame must be a multiple of 8 bytes */ #define MACB_TX_LEN_ALIGN 8 #define MACB_MAX_TX_LEN ((unsigned int)((1 << MACB_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1))) @@ -2078,12 +2081,66 @@ static irqreturn_t gem_wol_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +static int macb_interrupt_misc(struct macb_queue *queue, u32 status) +{ + struct macb *bp = queue->bp; + struct net_device *dev; + u32 ctrl; + + dev = bp->dev; + + if (unlikely(status & (MACB_TX_ERR_FLAGS))) { + queue_writel(queue, IDR, MACB_TX_INT_FLAGS); + schedule_work(&queue->tx_error_task); + macb_queue_isr_clear(bp, queue, MACB_TX_ERR_FLAGS); + return -1; + } + + /* Link change detection isn't possible with RMII, so we'll + * add that if/when we get our hands on a full-blown MII PHY. + */ + + /* There is a hardware issue under heavy load where DMA can + * stop, this causes endless "used buffer descriptor read" + * interrupts but it can be cleared by re-enabling RX. See + * the at91rm9200 manual, section 41.3.1 or the Zynq manual + * section 16.7.4 for details. RXUBR is only enabled for + * these two versions. + */ + if (status & MACB_BIT(RXUBR)) { + ctrl = macb_readl(bp, NCR); + macb_writel(bp, NCR, ctrl & ~MACB_BIT(RE)); + wmb(); + macb_writel(bp, NCR, ctrl | MACB_BIT(RE)); + macb_queue_isr_clear(bp, queue, MACB_BIT(RXUBR)); + } + + if (status & MACB_BIT(ISR_ROVR)) { + /* We missed at least one packet */ + spin_lock(&bp->stats_lock); + if (macb_is_gem(bp)) + bp->hw_stats.gem.rx_overruns++; + else + bp->hw_stats.macb.rx_overruns++; + spin_unlock(&bp->stats_lock); + macb_queue_isr_clear(bp, queue, MACB_BIT(ISR_ROVR)); + } + + if (status & MACB_BIT(HRESP)) { + queue_work(system_bh_wq, &bp->hresp_err_bh_work); + netdev_err(dev, "DMA bus error: HRESP not OK\n"); + macb_queue_isr_clear(bp, queue, MACB_BIT(HRESP)); + } + + return 0; +} + static irqreturn_t macb_interrupt(int irq, void *dev_id) { struct macb_queue *queue = dev_id; struct macb *bp = queue->bp; struct net_device *dev = bp->dev; - u32 status, ctrl; + u32 status; status = queue_readl(queue, ISR); @@ -2129,48 +2186,10 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) napi_schedule(&queue->napi_tx); } - if (unlikely(status & (MACB_TX_ERR_FLAGS))) { - queue_writel(queue, IDR, MACB_TX_INT_FLAGS); - schedule_work(&queue->tx_error_task); - macb_queue_isr_clear(bp, queue, MACB_TX_ERR_FLAGS); - break; - } + if (unlikely(status & MACB_INT_MISC_FLAGS)) + if (macb_interrupt_misc(queue, status)) + break; - /* Link change detection isn't possible with RMII, so we'll - * add that if/when we get our hands on a full-blown MII PHY. - */ - - /* There is a hardware issue under heavy load where DMA can - * stop, this causes endless "used buffer descriptor read" - * interrupts but it can be cleared by re-enabling RX. See - * the at91rm9200 manual, section 41.3.1 or the Zynq manual - * section 16.7.4 for details. RXUBR is only enabled for - * these two versions. - */ - if (status & MACB_BIT(RXUBR)) { - ctrl = macb_readl(bp, NCR); - macb_writel(bp, NCR, ctrl & ~MACB_BIT(RE)); - wmb(); - macb_writel(bp, NCR, ctrl | MACB_BIT(RE)); - macb_queue_isr_clear(bp, queue, MACB_BIT(RXUBR)); - } - - if (status & MACB_BIT(ISR_ROVR)) { - /* We missed at least one packet */ - spin_lock(&bp->stats_lock); - if (macb_is_gem(bp)) - bp->hw_stats.gem.rx_overruns++; - else - bp->hw_stats.macb.rx_overruns++; - spin_unlock(&bp->stats_lock); - macb_queue_isr_clear(bp, queue, MACB_BIT(ISR_ROVR)); - } - - if (status & MACB_BIT(HRESP)) { - queue_work(system_bh_wq, &bp->hresp_err_bh_work); - netdev_err(dev, "DMA bus error: HRESP not OK\n"); - macb_queue_isr_clear(bp, queue, MACB_BIT(HRESP)); - } status = queue_readl(queue, ISR); } From 6637c03f35fa75447d5c2c31db610929cff05813 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 2 Apr 2026 21:41:25 +0800 Subject: [PATCH 1156/1561] net: macb: Remove dedicated IRQ handler for WoL In the current implementation, the suspend/resume path frees the existing IRQ handler and sets up a dedicated WoL IRQ handler, then restores the original handler upon resume. This approach is not used by any other Ethernet driver and unnecessarily complicates the suspend/resume process. After adjusting the IRQ handler in the previous patches, we can now handle WoL interrupts without introducing any overhead in the TX/RX hot path. Therefore, the dedicated WoL IRQ handler is removed. I have verified WoL functionality on my AMD ZynqMP board using the following steps: root@amd-zynqmp:~# ifconfig end0 192.168.3.3 root@amd-zynqmp:~# ethtool -s end0 wol a root@amd-zynqmp:~# echo mem >/sys/power/state Signed-off-by: Kevin Hao Link: https://patch.msgid.link/20260402-macb-irq-v2-4-942d98ab1154@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 112 ++++++----------------- 1 file changed, 27 insertions(+), 85 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 4fa75e6ed6d6..6ec7b110dc72 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -71,7 +71,8 @@ struct sifive_fu540_macb_mgmt { | MACB_BIT(TXUBR)) #define MACB_INT_MISC_FLAGS (MACB_TX_ERR_FLAGS | MACB_BIT(RXUBR) | \ - MACB_BIT(ISR_ROVR) | MACB_BIT(HRESP)) + MACB_BIT(ISR_ROVR) | MACB_BIT(HRESP) | \ + GEM_BIT(WOL) | MACB_BIT(WOL)) /* Max length of transmit frame must be a multiple of 8 bytes */ #define MACB_TX_LEN_ALIGN 8 @@ -2025,60 +2026,30 @@ static void macb_hresp_error_task(struct work_struct *work) netif_tx_start_all_queues(dev); } -static irqreturn_t macb_wol_interrupt(int irq, void *dev_id) +static void macb_wol_interrupt(struct macb_queue *queue, u32 status) { - struct macb_queue *queue = dev_id; struct macb *bp = queue->bp; - u32 status; - status = queue_readl(queue, ISR); - - if (unlikely(!status)) - return IRQ_NONE; - - spin_lock(&bp->lock); - - if (status & MACB_BIT(WOL)) { - queue_writel(queue, IDR, MACB_BIT(WOL)); - macb_writel(bp, WOL, 0); - netdev_vdbg(bp->dev, "MACB WoL: queue = %u, isr = 0x%08lx\n", - (unsigned int)(queue - bp->queues), - (unsigned long)status); - macb_queue_isr_clear(bp, queue, MACB_BIT(WOL)); - pm_wakeup_event(&bp->pdev->dev, 0); - } - - spin_unlock(&bp->lock); - - return IRQ_HANDLED; + queue_writel(queue, IDR, MACB_BIT(WOL)); + macb_writel(bp, WOL, 0); + netdev_vdbg(bp->dev, "MACB WoL: queue = %u, isr = 0x%08lx\n", + (unsigned int)(queue - bp->queues), + (unsigned long)status); + macb_queue_isr_clear(bp, queue, MACB_BIT(WOL)); + pm_wakeup_event(&bp->pdev->dev, 0); } -static irqreturn_t gem_wol_interrupt(int irq, void *dev_id) +static void gem_wol_interrupt(struct macb_queue *queue, u32 status) { - struct macb_queue *queue = dev_id; struct macb *bp = queue->bp; - u32 status; - status = queue_readl(queue, ISR); - - if (unlikely(!status)) - return IRQ_NONE; - - spin_lock(&bp->lock); - - if (status & GEM_BIT(WOL)) { - queue_writel(queue, IDR, GEM_BIT(WOL)); - gem_writel(bp, WOL, 0); - netdev_vdbg(bp->dev, "GEM WoL: queue = %u, isr = 0x%08lx\n", - (unsigned int)(queue - bp->queues), - (unsigned long)status); - macb_queue_isr_clear(bp, queue, GEM_BIT(WOL)); - pm_wakeup_event(&bp->pdev->dev, 0); - } - - spin_unlock(&bp->lock); - - return IRQ_HANDLED; + queue_writel(queue, IDR, GEM_BIT(WOL)); + gem_writel(bp, WOL, 0); + netdev_vdbg(bp->dev, "GEM WoL: queue = %u, isr = 0x%08lx\n", + (unsigned int)(queue - bp->queues), + (unsigned long)status); + macb_queue_isr_clear(bp, queue, GEM_BIT(WOL)); + pm_wakeup_event(&bp->pdev->dev, 0); } static int macb_interrupt_misc(struct macb_queue *queue, u32 status) @@ -2132,6 +2103,14 @@ static int macb_interrupt_misc(struct macb_queue *queue, u32 status) macb_queue_isr_clear(bp, queue, MACB_BIT(HRESP)); } + if (macb_is_gem(bp)) { + if (status & GEM_BIT(WOL)) + gem_wol_interrupt(queue, status); + } else { + if (status & MACB_BIT(WOL)) + macb_wol_interrupt(queue, status); + } + return 0; } @@ -6004,7 +5983,6 @@ static int __maybe_unused macb_suspend(struct device *dev) unsigned long flags; u32 tmp, ifa_local; unsigned int q; - int err; if (!device_may_wakeup(&bp->dev->dev)) phy_exit(bp->phy); @@ -6067,39 +6045,15 @@ static int __maybe_unused macb_suspend(struct device *dev) /* write IP address into register */ tmp |= MACB_BFEXT(IP, ifa_local); } - spin_unlock_irqrestore(&bp->lock, flags); - /* Change interrupt handler and - * Enable WoL IRQ on queue 0 - */ - devm_free_irq(dev, bp->queues[0].irq, bp->queues); if (macb_is_gem(bp)) { - err = devm_request_irq(dev, bp->queues[0].irq, gem_wol_interrupt, - IRQF_SHARED, netdev->name, bp->queues); - if (err) { - dev_err(dev, - "Unable to request IRQ %d (error %d)\n", - bp->queues[0].irq, err); - return err; - } - spin_lock_irqsave(&bp->lock, flags); queue_writel(bp->queues, IER, GEM_BIT(WOL)); gem_writel(bp, WOL, tmp); - spin_unlock_irqrestore(&bp->lock, flags); } else { - err = devm_request_irq(dev, bp->queues[0].irq, macb_wol_interrupt, - IRQF_SHARED, netdev->name, bp->queues); - if (err) { - dev_err(dev, - "Unable to request IRQ %d (error %d)\n", - bp->queues[0].irq, err); - return err; - } - spin_lock_irqsave(&bp->lock, flags); queue_writel(bp->queues, IER, MACB_BIT(WOL)); macb_writel(bp, WOL, tmp); - spin_unlock_irqrestore(&bp->lock, flags); } + spin_unlock_irqrestore(&bp->lock, flags); enable_irq_wake(bp->queues[0].irq); } @@ -6141,7 +6095,6 @@ static int __maybe_unused macb_resume(struct device *dev) struct macb_queue *queue; unsigned long flags; unsigned int q; - int err; if (!device_may_wakeup(&bp->dev->dev)) phy_init(bp->phy); @@ -6167,17 +6120,6 @@ static int __maybe_unused macb_resume(struct device *dev) macb_queue_isr_clear(bp, bp->queues, -1); spin_unlock_irqrestore(&bp->lock, flags); - /* Replace interrupt handler on queue 0 */ - devm_free_irq(dev, bp->queues[0].irq, bp->queues); - err = devm_request_irq(dev, bp->queues[0].irq, macb_interrupt, - IRQF_SHARED, netdev->name, bp->queues); - if (err) { - dev_err(dev, - "Unable to request IRQ %d (error %d)\n", - bp->queues[0].irq, err); - return err; - } - disable_irq_wake(bp->queues[0].irq); /* Now make sure we disable phy before moving From 353d8e7989b6babe8fe4ae06272230c7941c3f73 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 1 Apr 2026 20:36:08 +0200 Subject: [PATCH 1157/1561] net: ethernet: ravb: Suspend and resume the transmission flow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current driver does not follow the latest datasheet and does not suspend the flow when stopping DMA and resume it when starting. Update the driver to do so. Signed-off-by: Yoshihiro Shimoda [Niklas: Rebase from BSP and reword commit message] Signed-off-by: Niklas Söderlund Link: https://patch.msgid.link/20260401183608.1852225-1-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 2c725824b348..1dbfadb2a881 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -694,6 +694,9 @@ static int ravb_dmac_init(struct net_device *ndev) const struct ravb_hw_info *info = priv->info; int error; + /* Clear transmission suspension */ + ravb_modify(ndev, CCC, CCC_DTSR, 0); + /* Set CONFIG mode */ error = ravb_set_opmode(ndev, CCC_OPC_CONFIG); if (error) @@ -1103,6 +1106,12 @@ static int ravb_stop_dma(struct net_device *ndev) if (error) return error; + /* Request for transmission suspension */ + ravb_modify(ndev, CCC, CCC_DTSR, CCC_DTSR); + error = ravb_wait(ndev, CSR, CSR_DTS, CSR_DTS); + if (error) + netdev_err(ndev, "failed to stop AXI BUS\n"); + /* Stop AVB-DMAC process */ return ravb_set_opmode(ndev, CCC_OPC_CONFIG); } From 3fdea79c09d169b6ea172b8d36232c3773f39973 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 2 Apr 2026 20:40:55 +0200 Subject: [PATCH 1158/1561] dpll: add frequency monitoring to netlink spec Add DPLL_A_FREQUENCY_MONITOR device attribute to allow control over the frequency monitor feature. The attribute uses the existing dpll_feature_state enum (enable/disable) and is present in both device-get reply and device-set request. Add DPLL_A_PIN_MEASURED_FREQUENCY pin attribute to expose the measured input frequency in millihertz (mHz). The attribute is present in the pin-get reply. Add DPLL_PIN_MEASURED_FREQUENCY_DIVIDER constant to allow userspace to extract integer and fractional parts. Reviewed-by: Vadim Fedorenko Signed-off-by: Ivan Vecera Link: https://patch.msgid.link/20260402184057.1890514-2-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- Documentation/driver-api/dpll.rst | 20 +++++++++++++++ Documentation/netlink/specs/dpll.yaml | 35 +++++++++++++++++++++++++++ drivers/dpll/dpll_nl.c | 5 ++-- include/uapi/linux/dpll.h | 5 +++- 4 files changed, 62 insertions(+), 3 deletions(-) diff --git a/Documentation/driver-api/dpll.rst b/Documentation/driver-api/dpll.rst index 83118c728ed9..93c191b2d089 100644 --- a/Documentation/driver-api/dpll.rst +++ b/Documentation/driver-api/dpll.rst @@ -250,6 +250,24 @@ in the ``DPLL_A_PIN_PHASE_OFFSET`` attribute. ``DPLL_A_PHASE_OFFSET_MONITOR`` attr state of a feature =============================== ======================== +Frequency monitor +================= + +Some DPLL devices may offer the capability to measure the actual +frequency of all available input pins. The attribute and current feature state +shall be included in the response message of the ``DPLL_CMD_DEVICE_GET`` +command for supported DPLL devices. In such cases, users can also control +the feature using the ``DPLL_CMD_DEVICE_SET`` command by setting the +``enum dpll_feature_state`` values for the attribute. +Once enabled the measured input frequency for each input pin shall be +returned in the ``DPLL_A_PIN_MEASURED_FREQUENCY`` attribute. The value +is in millihertz (mHz), using ``DPLL_PIN_MEASURED_FREQUENCY_DIVIDER`` +as the divider. + + =============================== ======================== + ``DPLL_A_FREQUENCY_MONITOR`` attr state of a feature + =============================== ======================== + Embedded SYNC ============= @@ -411,6 +429,8 @@ according to attribute purpose. ``DPLL_A_PIN_STATE`` attr state of pin on the parent pin ``DPLL_A_PIN_CAPABILITIES`` attr bitmask of pin capabilities + ``DPLL_A_PIN_MEASURED_FREQUENCY`` attr measured frequency of + an input pin in mHz ==================================== ================================== ==================================== ================================= diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml index 3dd48a32f783..40465a3d7fc2 100644 --- a/Documentation/netlink/specs/dpll.yaml +++ b/Documentation/netlink/specs/dpll.yaml @@ -240,6 +240,20 @@ definitions: integer part of a measured phase offset value. Value of (DPLL_A_PHASE_OFFSET % DPLL_PHASE_OFFSET_DIVIDER) is a fractional part of a measured phase offset value. + - + type: const + name: pin-measured-frequency-divider + value: 1000 + doc: | + pin measured frequency divider allows userspace to calculate + a value of measured input frequency as a fractional value with + three digit decimal precision (millihertz). + Value of (DPLL_A_PIN_MEASURED_FREQUENCY / + DPLL_PIN_MEASURED_FREQUENCY_DIVIDER) is an integer part of + a measured frequency value. + Value of (DPLL_A_PIN_MEASURED_FREQUENCY % + DPLL_PIN_MEASURED_FREQUENCY_DIVIDER) is a fractional part of + a measured frequency value. - type: enum name: feature-state @@ -319,6 +333,13 @@ attribute-sets: name: phase-offset-avg-factor type: u32 doc: Averaging factor applied to calculation of reported phase offset. + - + name: frequency-monitor + type: u32 + enum: feature-state + doc: Current or desired state of the frequency monitor feature. + If enabled, dpll device shall measure all currently available + inputs for their actual input frequency. - name: pin enum-name: dpll_a_pin @@ -456,6 +477,17 @@ attribute-sets: Value is in PPT (parts per trillion, 10^-12). Note: This attribute provides higher resolution than the standard fractional-frequency-offset (which is in PPM). + - + name: measured-frequency + type: u64 + doc: | + The measured frequency of the input pin in millihertz (mHz). + Value of (DPLL_A_PIN_MEASURED_FREQUENCY / + DPLL_PIN_MEASURED_FREQUENCY_DIVIDER) is an integer part (Hz) + of a measured frequency value. + Value of (DPLL_A_PIN_MEASURED_FREQUENCY % + DPLL_PIN_MEASURED_FREQUENCY_DIVIDER) is a fractional part + of a measured frequency value. - name: pin-parent-device @@ -544,6 +576,7 @@ operations: - type - phase-offset-monitor - phase-offset-avg-factor + - frequency-monitor dump: reply: *dev-attrs @@ -563,6 +596,7 @@ operations: - mode - phase-offset-monitor - phase-offset-avg-factor + - frequency-monitor - name: device-create-ntf doc: Notification about device appearing @@ -643,6 +677,7 @@ operations: - esync-frequency-supported - esync-pulse - reference-sync + - measured-frequency dump: request: diff --git a/drivers/dpll/dpll_nl.c b/drivers/dpll/dpll_nl.c index a2b22d492114..1e652340a5d7 100644 --- a/drivers/dpll/dpll_nl.c +++ b/drivers/dpll/dpll_nl.c @@ -43,11 +43,12 @@ static const struct nla_policy dpll_device_get_nl_policy[DPLL_A_ID + 1] = { }; /* DPLL_CMD_DEVICE_SET - do */ -static const struct nla_policy dpll_device_set_nl_policy[DPLL_A_PHASE_OFFSET_AVG_FACTOR + 1] = { +static const struct nla_policy dpll_device_set_nl_policy[DPLL_A_FREQUENCY_MONITOR + 1] = { [DPLL_A_ID] = { .type = NLA_U32, }, [DPLL_A_MODE] = NLA_POLICY_RANGE(NLA_U32, 1, 2), [DPLL_A_PHASE_OFFSET_MONITOR] = NLA_POLICY_MAX(NLA_U32, 1), [DPLL_A_PHASE_OFFSET_AVG_FACTOR] = { .type = NLA_U32, }, + [DPLL_A_FREQUENCY_MONITOR] = NLA_POLICY_MAX(NLA_U32, 1), }; /* DPLL_CMD_PIN_ID_GET - do */ @@ -115,7 +116,7 @@ static const struct genl_split_ops dpll_nl_ops[] = { .doit = dpll_nl_device_set_doit, .post_doit = dpll_post_doit, .policy = dpll_device_set_nl_policy, - .maxattr = DPLL_A_PHASE_OFFSET_AVG_FACTOR, + .maxattr = DPLL_A_FREQUENCY_MONITOR, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h index de0005f28e5c..871685f7c353 100644 --- a/include/uapi/linux/dpll.h +++ b/include/uapi/linux/dpll.h @@ -191,7 +191,8 @@ enum dpll_pin_capabilities { DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE = 4, }; -#define DPLL_PHASE_OFFSET_DIVIDER 1000 +#define DPLL_PHASE_OFFSET_DIVIDER 1000 +#define DPLL_PIN_MEASURED_FREQUENCY_DIVIDER 1000 /** * enum dpll_feature_state - Allow control (enable/disable) and status checking @@ -218,6 +219,7 @@ enum dpll_a { DPLL_A_CLOCK_QUALITY_LEVEL, DPLL_A_PHASE_OFFSET_MONITOR, DPLL_A_PHASE_OFFSET_AVG_FACTOR, + DPLL_A_FREQUENCY_MONITOR, __DPLL_A_MAX, DPLL_A_MAX = (__DPLL_A_MAX - 1) @@ -254,6 +256,7 @@ enum dpll_a_pin { DPLL_A_PIN_REFERENCE_SYNC, DPLL_A_PIN_PHASE_ADJUST_GRAN, DPLL_A_PIN_FRACTIONAL_FREQUENCY_OFFSET_PPT, + DPLL_A_PIN_MEASURED_FREQUENCY, __DPLL_A_PIN_MAX, DPLL_A_PIN_MAX = (__DPLL_A_PIN_MAX - 1) From 15ed91aa84ea7bacef3c24286d5136055b4335a8 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 2 Apr 2026 20:40:56 +0200 Subject: [PATCH 1159/1561] dpll: add frequency monitoring callback ops Add new callback operations for a dpll device: - freq_monitor_get(..) - to obtain current state of frequency monitor feature from dpll device, - freq_monitor_set(..) - to allow feature configuration. Add new callback operation for a dpll pin: - measured_freq_get(..) - to obtain the measured frequency in mHz. Obtain the feature state value using the get callback and provide it to the user if the device driver implements callbacks. The measured_freq_get pin callback is only invoked when the frequency monitor is enabled. The freq_monitor_get device callback is required when measured_freq_get is provided by the driver. Execute the set callback upon user requests. Reviewed-by: Vadim Fedorenko Signed-off-by: Ivan Vecera Link: https://patch.msgid.link/20260402184057.1890514-3-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/dpll_core.c | 5 ++- drivers/dpll/dpll_netlink.c | 90 +++++++++++++++++++++++++++++++++++++ include/linux/dpll.h | 10 +++++ 3 files changed, 104 insertions(+), 1 deletion(-) diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c index 3f54754cdec4..cbb635db4321 100644 --- a/drivers/dpll/dpll_core.c +++ b/drivers/dpll/dpll_core.c @@ -876,7 +876,10 @@ dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin, if (WARN_ON(!ops) || WARN_ON(!ops->state_on_dpll_get) || - WARN_ON(!ops->direction_get)) + WARN_ON(!ops->direction_get) || + WARN_ON(ops->measured_freq_get && + (!dpll_device_ops(dpll)->freq_monitor_get || + !dpll_device_ops(dpll)->freq_monitor_set))) return -EINVAL; mutex_lock(&dpll_lock); diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index 83cbd64abf5a..af7ce62ec55c 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -175,6 +175,26 @@ dpll_msg_add_phase_offset_monitor(struct sk_buff *msg, struct dpll_device *dpll, return 0; } +static int +dpll_msg_add_freq_monitor(struct sk_buff *msg, struct dpll_device *dpll, + struct netlink_ext_ack *extack) +{ + const struct dpll_device_ops *ops = dpll_device_ops(dpll); + enum dpll_feature_state state; + int ret; + + if (ops->freq_monitor_set && ops->freq_monitor_get) { + ret = ops->freq_monitor_get(dpll, dpll_priv(dpll), + &state, extack); + if (ret) + return ret; + if (nla_put_u32(msg, DPLL_A_FREQUENCY_MONITOR, state)) + return -EMSGSIZE; + } + + return 0; +} + static int dpll_msg_add_phase_offset_avg_factor(struct sk_buff *msg, struct dpll_device *dpll, @@ -400,6 +420,38 @@ static int dpll_msg_add_ffo(struct sk_buff *msg, struct dpll_pin *pin, ffo); } +static int dpll_msg_add_measured_freq(struct sk_buff *msg, struct dpll_pin *pin, + struct dpll_pin_ref *ref, + struct netlink_ext_ack *extack) +{ + const struct dpll_device_ops *dev_ops = dpll_device_ops(ref->dpll); + const struct dpll_pin_ops *ops = dpll_pin_ops(ref); + struct dpll_device *dpll = ref->dpll; + enum dpll_feature_state state; + u64 measured_freq; + int ret; + + if (!ops->measured_freq_get) + return 0; + ret = dev_ops->freq_monitor_get(dpll, dpll_priv(dpll), + &state, extack); + if (ret) + return ret; + if (state == DPLL_FEATURE_STATE_DISABLE) + return 0; + ret = ops->measured_freq_get(pin, dpll_pin_on_dpll_priv(dpll, pin), + dpll, dpll_priv(dpll), &measured_freq, + extack); + if (ret) + return ret; + if (nla_put_64bit(msg, DPLL_A_PIN_MEASURED_FREQUENCY, + sizeof(measured_freq), &measured_freq, + DPLL_A_PIN_PAD)) + return -EMSGSIZE; + + return 0; +} + static int dpll_msg_add_pin_freq(struct sk_buff *msg, struct dpll_pin *pin, struct dpll_pin_ref *ref, struct netlink_ext_ack *extack) @@ -670,6 +722,9 @@ dpll_cmd_pin_get_one(struct sk_buff *msg, struct dpll_pin *pin, if (ret) return ret; ret = dpll_msg_add_ffo(msg, pin, ref, extack); + if (ret) + return ret; + ret = dpll_msg_add_measured_freq(msg, pin, ref, extack); if (ret) return ret; ret = dpll_msg_add_pin_esync(msg, pin, ref, extack); @@ -722,6 +777,9 @@ dpll_device_get_one(struct dpll_device *dpll, struct sk_buff *msg, if (ret) return ret; ret = dpll_msg_add_phase_offset_avg_factor(msg, dpll, extack); + if (ret) + return ret; + ret = dpll_msg_add_freq_monitor(msg, dpll, extack); if (ret) return ret; @@ -948,6 +1006,32 @@ dpll_phase_offset_avg_factor_set(struct dpll_device *dpll, struct nlattr *a, extack); } +static int +dpll_freq_monitor_set(struct dpll_device *dpll, struct nlattr *a, + struct netlink_ext_ack *extack) +{ + const struct dpll_device_ops *ops = dpll_device_ops(dpll); + enum dpll_feature_state state = nla_get_u32(a), old_state; + int ret; + + if (!(ops->freq_monitor_set && ops->freq_monitor_get)) { + NL_SET_ERR_MSG_ATTR(extack, a, + "dpll device not capable of frequency monitor"); + return -EOPNOTSUPP; + } + ret = ops->freq_monitor_get(dpll, dpll_priv(dpll), &old_state, + extack); + if (ret) { + NL_SET_ERR_MSG(extack, + "unable to get current state of frequency monitor"); + return ret; + } + if (state == old_state) + return 0; + + return ops->freq_monitor_set(dpll, dpll_priv(dpll), state, extack); +} + static int dpll_pin_freq_set(struct dpll_pin *pin, struct nlattr *a, struct netlink_ext_ack *extack) @@ -1878,6 +1962,12 @@ dpll_set_from_nlattr(struct dpll_device *dpll, struct genl_info *info) if (ret) return ret; break; + case DPLL_A_FREQUENCY_MONITOR: + ret = dpll_freq_monitor_set(dpll, a, + info->extack); + if (ret) + return ret; + break; } } diff --git a/include/linux/dpll.h b/include/linux/dpll.h index 2ce295b46b8c..b7277a8b484d 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -52,6 +52,12 @@ struct dpll_device_ops { int (*phase_offset_avg_factor_get)(const struct dpll_device *dpll, void *dpll_priv, u32 *factor, struct netlink_ext_ack *extack); + int (*freq_monitor_set)(const struct dpll_device *dpll, void *dpll_priv, + enum dpll_feature_state state, + struct netlink_ext_ack *extack); + int (*freq_monitor_get)(const struct dpll_device *dpll, void *dpll_priv, + enum dpll_feature_state *state, + struct netlink_ext_ack *extack); }; struct dpll_pin_ops { @@ -110,6 +116,10 @@ struct dpll_pin_ops { int (*ffo_get)(const struct dpll_pin *pin, void *pin_priv, const struct dpll_device *dpll, void *dpll_priv, s64 *ffo, struct netlink_ext_ack *extack); + int (*measured_freq_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, u64 *measured_freq, + struct netlink_ext_ack *extack); int (*esync_set)(const struct dpll_pin *pin, void *pin_priv, const struct dpll_device *dpll, void *dpll_priv, u64 freq, struct netlink_ext_ack *extack); From bfc923b642874ea6f94763d6060782072944ebd5 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 2 Apr 2026 20:40:57 +0200 Subject: [PATCH 1160/1561] dpll: zl3073x: implement frequency monitoring Extract common measurement latch logic from zl3073x_ref_ffo_update() into a new zl3073x_ref_freq_meas_latch() helper and add zl3073x_ref_freq_meas_update() that uses it to latch and read absolute input reference frequencies in Hz. Add meas_freq field to struct zl3073x_ref and the corresponding zl3073x_ref_meas_freq_get() accessor. The measured frequencies are updated periodically alongside the existing FFO measurements. Add freq_monitor boolean to struct zl3073x_dpll and implement the freq_monitor_set/get device callbacks to enable/disable frequency monitoring via the DPLL netlink interface. Implement measured_freq_get pin callback for input pins that returns the measured input frequency in mHz. Reviewed-by: Petr Oros Signed-off-by: Ivan Vecera Link: https://patch.msgid.link/20260402184057.1890514-4-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/core.c | 88 ++++++++++++++++++++++++++----- drivers/dpll/zl3073x/dpll.c | 100 ++++++++++++++++++++++++++++++++++-- drivers/dpll/zl3073x/dpll.h | 2 + drivers/dpll/zl3073x/ref.h | 14 +++++ 4 files changed, 187 insertions(+), 17 deletions(-) diff --git a/drivers/dpll/zl3073x/core.c b/drivers/dpll/zl3073x/core.c index 6363002d48d4..cb47a5db061a 100644 --- a/drivers/dpll/zl3073x/core.c +++ b/drivers/dpll/zl3073x/core.c @@ -632,22 +632,21 @@ int zl3073x_ref_phase_offsets_update(struct zl3073x_dev *zldev, int channel) } /** - * zl3073x_ref_ffo_update - update reference fractional frequency offsets + * zl3073x_ref_freq_meas_latch - latch reference frequency measurements * @zldev: pointer to zl3073x_dev structure + * @type: measurement type (ZL_REF_FREQ_MEAS_CTRL_*) * - * The function asks device to update fractional frequency offsets latch - * registers the latest measured values, reads and stores them into + * The function waits for the previous measurement to finish, selects all + * references and requests a new measurement of the given type. * * Return: 0 on success, <0 on error */ static int -zl3073x_ref_ffo_update(struct zl3073x_dev *zldev) +zl3073x_ref_freq_meas_latch(struct zl3073x_dev *zldev, u8 type) { - int i, rc; + int rc; - /* Per datasheet we have to wait for 'ref_freq_meas_ctrl' to be zero - * to ensure that the measured data are coherent. - */ + /* Wait for previous measurement to finish */ rc = zl3073x_poll_zero_u8(zldev, ZL_REG_REF_FREQ_MEAS_CTRL, ZL_REF_FREQ_MEAS_CTRL); if (rc) @@ -663,15 +662,64 @@ zl3073x_ref_ffo_update(struct zl3073x_dev *zldev) if (rc) return rc; - /* Request frequency offset measurement */ - rc = zl3073x_write_u8(zldev, ZL_REG_REF_FREQ_MEAS_CTRL, - ZL_REF_FREQ_MEAS_CTRL_REF_FREQ_OFF); + /* Request measurement */ + rc = zl3073x_write_u8(zldev, ZL_REG_REF_FREQ_MEAS_CTRL, type); if (rc) return rc; /* Wait for finish */ - rc = zl3073x_poll_zero_u8(zldev, ZL_REG_REF_FREQ_MEAS_CTRL, - ZL_REF_FREQ_MEAS_CTRL); + return zl3073x_poll_zero_u8(zldev, ZL_REG_REF_FREQ_MEAS_CTRL, + ZL_REF_FREQ_MEAS_CTRL); +} + +/** + * zl3073x_ref_freq_meas_update - update measured input reference frequencies + * @zldev: pointer to zl3073x_dev structure + * + * The function asks device to latch measured input reference frequencies + * and stores the results in the ref state. + * + * Return: 0 on success, <0 on error + */ +static int +zl3073x_ref_freq_meas_update(struct zl3073x_dev *zldev) +{ + int i, rc; + + rc = zl3073x_ref_freq_meas_latch(zldev, ZL_REF_FREQ_MEAS_CTRL_REF_FREQ); + if (rc) + return rc; + + /* Read measured frequencies in Hz (unsigned 32-bit, LSB = 1 Hz) */ + for (i = 0; i < ZL3073X_NUM_REFS; i++) { + u32 value; + + rc = zl3073x_read_u32(zldev, ZL_REG_REF_FREQ(i), &value); + if (rc) + return rc; + + zldev->ref[i].meas_freq = value; + } + + return 0; +} + +/** + * zl3073x_ref_ffo_update - update reference fractional frequency offsets + * @zldev: pointer to zl3073x_dev structure + * + * The function asks device to latch the latest measured fractional + * frequency offset values, reads and stores them into the ref state. + * + * Return: 0 on success, <0 on error + */ +static int +zl3073x_ref_ffo_update(struct zl3073x_dev *zldev) +{ + int i, rc; + + rc = zl3073x_ref_freq_meas_latch(zldev, + ZL_REF_FREQ_MEAS_CTRL_REF_FREQ_OFF); if (rc) return rc; @@ -714,6 +762,20 @@ zl3073x_dev_periodic_work(struct kthread_work *work) dev_warn(zldev->dev, "Failed to update phase offsets: %pe\n", ERR_PTR(rc)); + /* Update measured input reference frequencies if any DPLL has + * frequency monitoring enabled. + */ + list_for_each_entry(zldpll, &zldev->dplls, list) { + if (zldpll->freq_monitor) { + rc = zl3073x_ref_freq_meas_update(zldev); + if (rc) + dev_warn(zldev->dev, + "Failed to update measured frequencies: %pe\n", + ERR_PTR(rc)); + break; + } + } + /* Update references' fractional frequency offsets */ rc = zl3073x_ref_ffo_update(zldev); if (rc) diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c index a29f606318f6..d788ca45a17e 100644 --- a/drivers/dpll/zl3073x/dpll.c +++ b/drivers/dpll/zl3073x/dpll.c @@ -39,6 +39,7 @@ * @pin_state: last saved pin state * @phase_offset: last saved pin phase offset * @freq_offset: last saved fractional frequency offset + * @measured_freq: last saved measured frequency */ struct zl3073x_dpll_pin { struct list_head list; @@ -54,6 +55,7 @@ struct zl3073x_dpll_pin { enum dpll_pin_state pin_state; s64 phase_offset; s64 freq_offset; + u32 measured_freq; }; /* @@ -202,6 +204,21 @@ zl3073x_dpll_input_pin_ffo_get(const struct dpll_pin *dpll_pin, void *pin_priv, return 0; } +static int +zl3073x_dpll_input_pin_measured_freq_get(const struct dpll_pin *dpll_pin, + void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, u64 *measured_freq, + struct netlink_ext_ack *extack) +{ + struct zl3073x_dpll_pin *pin = pin_priv; + + *measured_freq = pin->measured_freq; + *measured_freq *= DPLL_PIN_MEASURED_FREQUENCY_DIVIDER; + + return 0; +} + static int zl3073x_dpll_input_pin_frequency_get(const struct dpll_pin *dpll_pin, void *pin_priv, @@ -1116,6 +1133,35 @@ zl3073x_dpll_phase_offset_monitor_set(const struct dpll_device *dpll, return 0; } +static int +zl3073x_dpll_freq_monitor_get(const struct dpll_device *dpll, + void *dpll_priv, + enum dpll_feature_state *state, + struct netlink_ext_ack *extack) +{ + struct zl3073x_dpll *zldpll = dpll_priv; + + if (zldpll->freq_monitor) + *state = DPLL_FEATURE_STATE_ENABLE; + else + *state = DPLL_FEATURE_STATE_DISABLE; + + return 0; +} + +static int +zl3073x_dpll_freq_monitor_set(const struct dpll_device *dpll, + void *dpll_priv, + enum dpll_feature_state state, + struct netlink_ext_ack *extack) +{ + struct zl3073x_dpll *zldpll = dpll_priv; + + zldpll->freq_monitor = (state == DPLL_FEATURE_STATE_ENABLE); + + return 0; +} + static const struct dpll_pin_ops zl3073x_dpll_input_pin_ops = { .direction_get = zl3073x_dpll_pin_direction_get, .esync_get = zl3073x_dpll_input_pin_esync_get, @@ -1123,6 +1169,7 @@ static const struct dpll_pin_ops zl3073x_dpll_input_pin_ops = { .ffo_get = zl3073x_dpll_input_pin_ffo_get, .frequency_get = zl3073x_dpll_input_pin_frequency_get, .frequency_set = zl3073x_dpll_input_pin_frequency_set, + .measured_freq_get = zl3073x_dpll_input_pin_measured_freq_get, .phase_offset_get = zl3073x_dpll_input_pin_phase_offset_get, .phase_adjust_get = zl3073x_dpll_input_pin_phase_adjust_get, .phase_adjust_set = zl3073x_dpll_input_pin_phase_adjust_set, @@ -1151,6 +1198,8 @@ static const struct dpll_device_ops zl3073x_dpll_device_ops = { .phase_offset_avg_factor_set = zl3073x_dpll_phase_offset_avg_factor_set, .phase_offset_monitor_get = zl3073x_dpll_phase_offset_monitor_get, .phase_offset_monitor_set = zl3073x_dpll_phase_offset_monitor_set, + .freq_monitor_get = zl3073x_dpll_freq_monitor_get, + .freq_monitor_set = zl3073x_dpll_freq_monitor_set, .supported_modes_get = zl3073x_dpll_supported_modes_get, }; @@ -1572,6 +1621,7 @@ zl3073x_dpll_pin_ffo_check(struct zl3073x_dpll_pin *pin) struct zl3073x_dev *zldev = zldpll->dev; const struct zl3073x_ref *ref; u8 ref_id; + s64 ffo; /* Get reference monitor status */ ref_id = zl3073x_input_pin_ref_get(pin->id); @@ -1582,10 +1632,47 @@ zl3073x_dpll_pin_ffo_check(struct zl3073x_dpll_pin *pin) return false; /* Compare with previous value */ - if (pin->freq_offset != ref->ffo) { + ffo = zl3073x_ref_ffo_get(ref); + if (pin->freq_offset != ffo) { dev_dbg(zldev->dev, "%s freq offset changed: %lld -> %lld\n", - pin->label, pin->freq_offset, ref->ffo); - pin->freq_offset = ref->ffo; + pin->label, pin->freq_offset, ffo); + pin->freq_offset = ffo; + + return true; + } + + return false; +} + +/** + * zl3073x_dpll_pin_measured_freq_check - check for pin measured frequency + * change + * @pin: pin to check + * + * Check for the given pin's measured frequency change. + * + * Return: true on measured frequency change, false otherwise + */ +static bool +zl3073x_dpll_pin_measured_freq_check(struct zl3073x_dpll_pin *pin) +{ + struct zl3073x_dpll *zldpll = pin->dpll; + struct zl3073x_dev *zldev = zldpll->dev; + const struct zl3073x_ref *ref; + u8 ref_id; + u32 freq; + + if (!zldpll->freq_monitor) + return false; + + ref_id = zl3073x_input_pin_ref_get(pin->id); + ref = zl3073x_ref_state_get(zldev, ref_id); + + freq = zl3073x_ref_meas_freq_get(ref); + if (pin->measured_freq != freq) { + dev_dbg(zldev->dev, "%s measured freq changed: %u -> %u\n", + pin->label, pin->measured_freq, freq); + pin->measured_freq = freq; return true; } @@ -1677,13 +1764,18 @@ zl3073x_dpll_changes_check(struct zl3073x_dpll *zldpll) pin_changed = true; } - /* Check for phase offset and ffo change once per second */ + /* Check for phase offset, ffo, and measured freq change + * once per second. + */ if (zldpll->check_count % 2 == 0) { if (zl3073x_dpll_pin_phase_offset_check(pin)) pin_changed = true; if (zl3073x_dpll_pin_ffo_check(pin)) pin_changed = true; + + if (zl3073x_dpll_pin_measured_freq_check(pin)) + pin_changed = true; } if (pin_changed) diff --git a/drivers/dpll/zl3073x/dpll.h b/drivers/dpll/zl3073x/dpll.h index 115ee4f67e7a..434c32a7db12 100644 --- a/drivers/dpll/zl3073x/dpll.h +++ b/drivers/dpll/zl3073x/dpll.h @@ -15,6 +15,7 @@ * @id: DPLL index * @check_count: periodic check counter * @phase_monitor: is phase offset monitor enabled + * @freq_monitor: is frequency monitor enabled * @ops: DPLL device operations for this instance * @dpll_dev: pointer to registered DPLL device * @tracker: tracking object for the acquired reference @@ -28,6 +29,7 @@ struct zl3073x_dpll { u8 id; u8 check_count; bool phase_monitor; + bool freq_monitor; struct dpll_device_ops ops; struct dpll_device *dpll_dev; dpll_tracker tracker; diff --git a/drivers/dpll/zl3073x/ref.h b/drivers/dpll/zl3073x/ref.h index 06d8d4d97ea2..be16be20dbc7 100644 --- a/drivers/dpll/zl3073x/ref.h +++ b/drivers/dpll/zl3073x/ref.h @@ -23,6 +23,7 @@ struct zl3073x_dev; * @sync_ctrl: reference sync control * @config: reference config * @ffo: current fractional frequency offset + * @meas_freq: measured input frequency in Hz * @mon_status: reference monitor status */ struct zl3073x_ref { @@ -40,6 +41,7 @@ struct zl3073x_ref { ); struct_group(stat, /* Status */ s64 ffo; + u32 meas_freq; u8 mon_status; ); }; @@ -68,6 +70,18 @@ zl3073x_ref_ffo_get(const struct zl3073x_ref *ref) return ref->ffo; } +/** + * zl3073x_ref_meas_freq_get - get measured input frequency + * @ref: pointer to ref state + * + * Return: measured input frequency in Hz + */ +static inline u32 +zl3073x_ref_meas_freq_get(const struct zl3073x_ref *ref) +{ + return ref->meas_freq; +} + /** * zl3073x_ref_freq_get - get given input reference frequency * @ref: pointer to ref state From 3b45559f6c0af0eaf8a91dfc1037a423337cf21d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 2 Apr 2026 14:54:44 -0700 Subject: [PATCH 1161/1561] selftests: net: py: color the basics in the output Sometimes it's hard to spot the ok / not ok lines in the output. This is especially true for the GRO tests which retries a lot so there's a wall of non-fatal output printed. Try to color the crucial lines green / red / yellow when running in a terminal. Acked-by: Stanislav Fomichev Reviewed-by: Willem de Bruijn Acked-by: Joe Damato Link: https://patch.msgid.link/20260402215444.1589893-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib/py/ksft.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index 6cdfb8afccb5..7b8af463e35d 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -2,6 +2,7 @@ import functools import inspect +import os import signal import sys import time @@ -31,6 +32,17 @@ class KsftTerminate(KeyboardInterrupt): pass +@functools.lru_cache() +def _ksft_supports_color(): + if os.environ.get("NO_COLOR") is not None: + return False + if not hasattr(sys.stdout, "isatty") or not sys.stdout.isatty(): + return False + if os.environ.get("TERM") == "dumb": + return False + return True + + def ksft_pr(*objs, **kwargs): """ Print logs to stdout. @@ -165,6 +177,14 @@ def ktap_result(ok, cnt=1, case_name="", comment=""): res += "." + case_name if comment: res += " # " + comment + if _ksft_supports_color(): + if comment.startswith(("SKIP", "XFAIL")): + color = "\033[33m" + elif ok: + color = "\033[32m" + else: + color = "\033[31m" + res = color + res + "\033[0m" print(res, flush=True) From 70e32aadb5cab9ef706962e1293582ec03f89625 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Fri, 3 Apr 2026 10:21:04 +0800 Subject: [PATCH 1162/1561] net: mctp: tests: use actual address when creating dev with addr Sashiko reports: > This isn't a bug in the core networking code, but the addr parameter > appears to be ignored here. In mctp_test_create_dev_with_addr(), we are ignoring the addr argument and just using `8`. Use the passed address instead. All invocations use 8 anyway, so no effective change at present. Signed-off-by: Jeremy Kerr Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260403-dev-mctp-fix-test-addr-v1-1-b7fa789cdd9b@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- net/mctp/test/utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c index 2f79f8c1a2b4..c3987d5ade7a 100644 --- a/net/mctp/test/utils.c +++ b/net/mctp/test/utils.c @@ -95,7 +95,7 @@ struct mctp_test_dev *mctp_test_create_dev_with_addr(mctp_eid_t addr) } dev->mdev->num_addrs = 1; - dev->mdev->addrs[0] = 8; + dev->mdev->addrs[0] = addr; return dev; } From f32ba0963119b2520127077d72ebe7f6f8cb847f Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Fri, 3 Apr 2026 10:24:51 +0800 Subject: [PATCH 1163/1561] net: mctp: defer creation of dst after source-address check Sashiko reports: > mctp_dst_from_route() increments the device reference count by calling > mctp_dev_hold(). When a valid route is found and dst is NULL, the > structure copy is bypassed and rc is set to 0. Instead of optimistically creating a dst from the final route (then releasing it if the saddr is invalid), perform the saddr check first. This means we don't have an unuecessary hold/release on the dev, which could leak if the dst pointer is NULL. No caller passes a NULL dst at present though (so the leak is not possible), but this is an intended use of mctp_dst_from_route(). Signed-off-by: Jeremy Kerr Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260403-dev-mctp-dst-defer-v1-1-9c2c55faf9e9@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- net/mctp/route.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/net/mctp/route.c b/net/mctp/route.c index 78263e7ae423..26fb8c6bbad2 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -897,7 +897,8 @@ static mctp_eid_t mctp_dev_saddr(struct mctp_dev *dev) /* must only be called on a direct route, as the final output hop */ static void mctp_dst_from_route(struct mctp_dst *dst, mctp_eid_t eid, - unsigned int mtu, struct mctp_route *route) + mctp_eid_t saddr, unsigned int mtu, + struct mctp_route *route) { mctp_dev_hold(route->dev); dst->nexthop = eid; @@ -907,7 +908,7 @@ static void mctp_dst_from_route(struct mctp_dst *dst, mctp_eid_t eid, dst->mtu = min(dst->mtu, mtu); dst->halen = 0; dst->output = route->output; - dst->saddr = mctp_dev_saddr(route->dev); + dst->saddr = saddr; } int mctp_dst_from_extaddr(struct mctp_dst *dst, struct net *net, int ifindex, @@ -975,7 +976,6 @@ int mctp_route_lookup(struct net *net, unsigned int dnet, { const unsigned int max_depth = 32; unsigned int depth, mtu = 0; - struct mctp_dst dst_tmp; int rc = -EHOSTUNREACH; rcu_read_lock(); @@ -996,15 +996,15 @@ int mctp_route_lookup(struct net *net, unsigned int dnet, mtu = mtu ?: rt->mtu; if (rt->dst_type == MCTP_ROUTE_DIRECT) { - mctp_dst_from_route(&dst_tmp, daddr, mtu, rt); + mctp_eid_t saddr = mctp_dev_saddr(rt->dev); + /* cannot do gateway-ed routes without a src */ - if (dst_tmp.saddr == MCTP_ADDR_NULL && depth != 0) { - mctp_dst_release(&dst_tmp); - } else { - if (dst) - *dst = dst_tmp; - rc = 0; - } + if (saddr == MCTP_ADDR_NULL && depth != 0) + break; + + if (dst) + mctp_dst_from_route(dst, daddr, saddr, mtu, rt); + rc = 0; break; } else if (rt->dst_type == MCTP_ROUTE_GATEWAY) { From c8eee00c0fef5f709b9114be432d7b3afebb4c0a Mon Sep 17 00:00:00 2001 From: Daniel Zahka Date: Fri, 3 Apr 2026 03:26:31 -0700 Subject: [PATCH 1164/1561] psp: add missing device stats to get-stats reply attributes Commit f05d26198cf2 ("psp: add stats from psp spec to driver facing api") added device statistics (rx-packets, rx-bytes, rx-auth-fail, rx-error, rx-bad, tx-packets, tx-bytes, tx-error) to the stats attribute-set but did not add them to the get-stats operation reply attributes. The kernel reports these attributes in the reply, so list them in the spec to match. Signed-off-by: Daniel Zahka Acked-by: Willem de Bruijn Link: https://patch.msgid.link/20260403-psp-yaml-fix-v1-1-dacee0663903@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/psp.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/netlink/specs/psp.yaml b/Documentation/netlink/specs/psp.yaml index f3a57782d2cf..100c36cda8e5 100644 --- a/Documentation/netlink/specs/psp.yaml +++ b/Documentation/netlink/specs/psp.yaml @@ -267,6 +267,14 @@ operations: - dev-id - key-rotations - stale-events + - rx-packets + - rx-bytes + - rx-auth-fail + - rx-error + - rx-bad + - tx-packets + - tx-bytes + - tx-error pre: psp-device-get-locked post: psp-device-unlock dump: From 98f28d8d6e5a5ed058dd37854c19e9b3bae72eff Mon Sep 17 00:00:00 2001 From: Laurence Rowe Date: Thu, 2 Apr 2026 13:49:18 -0700 Subject: [PATCH 1165/1561] vsock: avoid timeout for non-blocking accept() with empty backlog A common pattern in epoll network servers is to eagerly accept all pending connections from the non-blocking listening socket after epoll_wait indicates the socket is ready by calling accept in a loop until EAGAIN is returned indicating that the backlog is empty. Scheduling a timeout for a non-blocking accept with an empty backlog meant AF_VSOCK sockets used by epoll network servers incurred hundreds of microseconds of additional latency per accept loop compared to AF_INET or AF_UNIX sockets. Signed-off-by: Laurence Rowe Reviewed-by: Bobby Eshleman Reviewed-by: Stefano Garzarella Link: https://patch.msgid.link/20260402204918.130395-1-laurencerowe@gmail.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/af_vsock.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index e4756604d50b..b8794ea0f01e 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1864,10 +1864,10 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, * created upon connection establishment. */ timeout = sock_rcvtimeo(listener, arg->flags & O_NONBLOCK); - prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); while ((connected = vsock_dequeue_accept(listener)) == NULL && - listener->sk_err == 0) { + listener->sk_err == 0 && timeout != 0) { + prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); release_sock(listener); timeout = schedule_timeout(timeout); finish_wait(sk_sleep(listener), &wait); @@ -1876,17 +1876,14 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, if (signal_pending(current)) { err = sock_intr_errno(timeout); goto out; - } else if (timeout == 0) { - err = -EAGAIN; - goto out; } - - prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk_sleep(listener), &wait); - if (listener->sk_err) + if (listener->sk_err) { err = -listener->sk_err; + } else if (!connected) { + err = -EAGAIN; + } if (connected) { sk_acceptq_removed(listener); From b0a79590d10847f190ed377d2664377d7068191d Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Wed, 1 Apr 2026 14:34:30 +0100 Subject: [PATCH 1166/1561] net: dsa: move dsa_bridge_ports() helper to dsa.h The yt921x driver contains a helper to create a bitmap of ports which are members of a bridge. Move the helper as static inline function into dsa.h, so other driver can make use of it as well. Signed-off-by: Daniel Golle Link: https://patch.msgid.link/4f8bbfce3e4e3a02064fc4dc366263136c6e0383.1775049897.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/yt921x.c | 13 ------------- include/net/dsa.h | 13 +++++++++++++ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/dsa/yt921x.c b/drivers/net/dsa/yt921x.c index 904613f4694a..5b66109ecc23 100644 --- a/drivers/net/dsa/yt921x.c +++ b/drivers/net/dsa/yt921x.c @@ -2154,19 +2154,6 @@ yt921x_bridge_join(struct yt921x_priv *priv, int port, u16 ports_mask) return 0; } -static u32 -dsa_bridge_ports(struct dsa_switch *ds, const struct net_device *bdev) -{ - struct dsa_port *dp; - u32 mask = 0; - - dsa_switch_for_each_user_port(dp, ds) - if (dsa_port_offloads_bridge_dev(dp, bdev)) - mask |= BIT(dp->index); - - return mask; -} - static int yt921x_bridge_flags(struct yt921x_priv *priv, int port, struct switchdev_brport_flags flags) diff --git a/include/net/dsa.h b/include/net/dsa.h index 6c17446f3dcc..e93b4feaca96 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -831,6 +831,19 @@ dsa_tree_offloads_bridge_dev(struct dsa_switch_tree *dst, return false; } +static inline u32 +dsa_bridge_ports(struct dsa_switch *ds, const struct net_device *bdev) +{ + struct dsa_port *dp; + u32 mask = 0; + + dsa_switch_for_each_user_port(dp, ds) + if (dsa_port_offloads_bridge_dev(dp, bdev)) + mask |= BIT(dp->index); + + return mask; +} + static inline bool dsa_port_tree_same(const struct dsa_port *a, const struct dsa_port *b) { From f259e08494c47c614ce7b6d3079d1e0d3f30ae66 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Wed, 1 Apr 2026 14:34:42 +0100 Subject: [PATCH 1167/1561] net: dsa: add bridge member iteration macro Drivers that offload bridges need to iterate over the ports that are members of a given bridge, for example to rebuild per-port forwarding bitmaps when membership changes. Currently drivers typically open-code this by combining dsa_switch_for_each_user_port() with a dsa_port_offloads_bridge_dev() check, or cache bridge membership within the driver. Add dsa_switch_for_each_bridge_member() macro to express this pattern directly, and use it for the existing dsa_bridge_ports() inline helper. Signed-off-by: Daniel Golle Link: https://patch.msgid.link/e7136aaa26773f39e805a00fe4ecf13cd2b83fc0.1775049897.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index e93b4feaca96..8b6d34e8a6f0 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -831,15 +831,18 @@ dsa_tree_offloads_bridge_dev(struct dsa_switch_tree *dst, return false; } +#define dsa_switch_for_each_bridge_member(_dp, _ds, _bdev) \ + dsa_switch_for_each_user_port(_dp, _ds) \ + if (dsa_port_offloads_bridge_dev(_dp, _bdev)) + static inline u32 dsa_bridge_ports(struct dsa_switch *ds, const struct net_device *bdev) { struct dsa_port *dp; u32 mask = 0; - dsa_switch_for_each_user_port(dp, ds) - if (dsa_port_offloads_bridge_dev(dp, bdev)) - mask |= BIT(dp->index); + dsa_switch_for_each_bridge_member(dp, ds, bdev) + mask |= BIT(dp->index); return mask; } From 4250ff1640ea1ede99bfe02ca949acbcc6c0927f Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Wed, 1 Apr 2026 14:34:52 +0100 Subject: [PATCH 1168/1561] dsa: tag_mxl862xx: set dsa_default_offload_fwd_mark() The MxL862xx offloads bridge forwarding in hardware, so set dsa_default_offload_fwd_mark() to avoid duplicate forwarding of packets of (eg. flooded) frames arriving at the CPU port. Link-local frames are directly trapped to the CPU port only, so don't set dsa_default_offload_fwd_mark() on those. Signed-off-by: Daniel Golle Link: https://patch.msgid.link/e1161c90894ddc519c57dc0224b3a0f6bfa1d2d6.1775049897.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- net/dsa/tag_mxl862xx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/dsa/tag_mxl862xx.c b/net/dsa/tag_mxl862xx.c index 01f215868271..8daefeb8d49d 100644 --- a/net/dsa/tag_mxl862xx.c +++ b/net/dsa/tag_mxl862xx.c @@ -86,6 +86,9 @@ static struct sk_buff *mxl862_tag_rcv(struct sk_buff *skb, return NULL; } + if (likely(!is_link_local_ether_addr(eth_hdr(skb)->h_dest))) + dsa_default_offload_fwd_mark(skb); + /* remove the MxL862xx special tag between the MAC addresses and the * current ethertype field. */ From 340bdf984613c4a9241d678915e513824f5a9b19 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Wed, 1 Apr 2026 14:35:01 +0100 Subject: [PATCH 1169/1561] net: dsa: mxl862xx: implement bridge offloading Implement joining and leaving bridges as well as add, delete and dump operations on isolated FDBs, port MDB membership management, and setting a port's STP state. The switch supports a maximum of 63 bridges, however, up to 12 may be used as "single-port bridges" to isolate standalone ports. Allowing up to 48 bridges to be offloaded seems more than enough on that hardware, hence that is set as max_num_bridges. A total of 128 bridge ports are supported in the bridge portmap, and virtual bridge ports have to be used eg. for link-aggregation, hence potentially exceeding the number of hardware ports. The firmware-assigned bridge identifier (FID) for each offloaded bridge is stored in an array used to map DSA bridge num to firmware bridge ID, avoiding the need for a driver-private bridge tracking structure. Bridge member portmaps are rebuilt on join/leave using dsa_switch_for_each_bridge_member(). As there are now more users of the BRIDGEPORT_CONFIG_SET API and the state of each port is cached locally, introduce a helper function mxl862xx_set_bridge_port(struct dsa_switch *ds, int port) which applies the cached per-port state to hardware. For standalone user ports (dp->bridge == NULL), it additionally resets the port to single-port bridge state: CPU-only portmap, learning and flooding disabled. The CPU port path sets its state explicitly before calling this helper and is therefore not affected by the reset. Note that MASK_VLAN_BASED_MAC_LEARNING is intentionally absent from the firmware write mask. After mxl862xx_reset(), the firmware initialises all VLAN-based MAC learning fields to 0 (disabled), so SVL is the active mode by default without having to set it explicitly. Note that there is no convenient way to control flooding on per-port level, so the driver is using a 0-rate QoS meter setup as a stopper in lack of any better option. In order to be perfect the firmware-enforced minimum bucket size is bypassed by directly writing 0s to the relevant registers -- without that at least one 64-byte packet could still pass before the meter would change from 'yellow' into 'red' state. Signed-off-by: Daniel Golle Link: https://patch.msgid.link/dd079180e2098e5f9626fcd149b9bad9a1b5a1b2.1775049897.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mxl862xx/mxl862xx-api.h | 225 ++++++- drivers/net/dsa/mxl862xx/mxl862xx-cmd.h | 20 +- drivers/net/dsa/mxl862xx/mxl862xx.c | 742 ++++++++++++++++++++++-- drivers/net/dsa/mxl862xx/mxl862xx.h | 101 +++- 4 files changed, 1026 insertions(+), 62 deletions(-) diff --git a/drivers/net/dsa/mxl862xx/mxl862xx-api.h b/drivers/net/dsa/mxl862xx/mxl862xx-api.h index a9f599dbca25..8677763544d7 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx-api.h +++ b/drivers/net/dsa/mxl862xx/mxl862xx-api.h @@ -3,6 +3,7 @@ #ifndef __MXL862XX_API_H #define __MXL862XX_API_H +#include #include /** @@ -34,6 +35,168 @@ struct mxl862xx_register_mod { __le16 mask; } __packed; +/** + * enum mxl862xx_mac_table_filter - Source/Destination MAC address filtering + * + * @MXL862XX_MAC_FILTER_NONE: no filter + * @MXL862XX_MAC_FILTER_SRC: source address filter + * @MXL862XX_MAC_FILTER_DEST: destination address filter + * @MXL862XX_MAC_FILTER_BOTH: both source and destination filter + */ +enum mxl862xx_mac_table_filter { + MXL862XX_MAC_FILTER_NONE = 0, + MXL862XX_MAC_FILTER_SRC = BIT(0), + MXL862XX_MAC_FILTER_DEST = BIT(1), + MXL862XX_MAC_FILTER_BOTH = BIT(0) | BIT(1), +}; + +#define MXL862XX_TCI_VLAN_ID GENMASK(11, 0) +#define MXL862XX_TCI_VLAN_CFI_DEI BIT(12) +#define MXL862XX_TCI_VLAN_PRI GENMASK(15, 13) + +/* Set in port_id to use port_map[] as a portmap bitmap instead of a single + * port ID. When clear, port_id selects one port; when set, the firmware + * ignores the lower bits of port_id and writes port_map[] directly into + * the PCE bridge port map. + */ +#define MXL862XX_PORTMAP_FLAG BIT(31) + +/** + * struct mxl862xx_mac_table_add - MAC Table Entry to be added + * @fid: Filtering Identifier (FID) (not supported by all switches) + * @port_id: Ethernet Port number + * @port_map: Bridge Port Map + * @sub_if_id: Sub-Interface Identifier Destination + * @age_timer: Aging Time in seconds + * @vlan_id: STAG VLAN Id + * @static_entry: Static Entry (value will be aged out if not set to static) + * @traffic_class: Egress queue traffic class + * @mac: MAC Address to add to the table + * @filter_flag: See &enum mxl862xx_mac_table_filter + * @igmp_controlled: Packet is marked as IGMP controlled if destination MAC + * address matches MAC in this entry + * @associated_mac: Associated Mac address + * @tci: TCI for B-Step + * Bit [0:11] - VLAN ID + * Bit [12] - VLAN CFI/DEI + * Bit [13:15] - VLAN PRI + */ +struct mxl862xx_mac_table_add { + __le16 fid; + __le32 port_id; + __le16 port_map[8]; + __le16 sub_if_id; + __le32 age_timer; + __le16 vlan_id; + u8 static_entry; + u8 traffic_class; + u8 mac[ETH_ALEN]; + u8 filter_flag; + u8 igmp_controlled; + u8 associated_mac[ETH_ALEN]; + __le16 tci; +} __packed; + +/** + * struct mxl862xx_mac_table_remove - MAC Table Entry to be removed + * @fid: Filtering Identifier (FID) + * @mac: MAC Address to be removed from the table. + * @filter_flag: See &enum mxl862xx_mac_table_filter + * @tci: TCI for B-Step + * Bit [0:11] - VLAN ID + * Bit [12] - VLAN CFI/DEI + * Bit [13:15] - VLAN PRI + */ +struct mxl862xx_mac_table_remove { + __le16 fid; + u8 mac[ETH_ALEN]; + u8 filter_flag; + __le16 tci; +} __packed; + +/** + * struct mxl862xx_mac_table_read - MAC Table Entry to be read + * @initial: Restart the get operation from the beginning of the table + * @last: Indicates that the read operation returned last entry + * @fid: Get the MAC table entry belonging to the given Filtering Identifier + * @port_id: The Bridge Port ID + * @port_map: Bridge Port Map + * @age_timer: Aging Time + * @vlan_id: STAG VLAN Id + * @static_entry: Indicates if this is a Static Entry + * @sub_if_id: Sub-Interface Identifier Destination + * @mac: MAC Address. Filled out by the switch API implementation. + * @filter_flag: See &enum mxl862xx_mac_table_filter + * @igmp_controlled: Packet is marked as IGMP controlled if destination MAC + * address matches the MAC in this entry + * @entry_changed: Indicate if the Entry has Changed + * @associated_mac: Associated MAC address + * @hit_status: MAC Table Hit Status Update + * @tci: TCI for B-Step + * Bit [0:11] - VLAN ID + * Bit [12] - VLAN CFI/DEI + * Bit [13:15] - VLAN PRI + * @first_bridge_port_id: The port this MAC address has first been learned. + * This is used for loop detection. + */ +struct mxl862xx_mac_table_read { + u8 initial; + u8 last; + __le16 fid; + __le32 port_id; + __le16 port_map[8]; + __le32 age_timer; + __le16 vlan_id; + u8 static_entry; + __le16 sub_if_id; + u8 mac[ETH_ALEN]; + u8 filter_flag; + u8 igmp_controlled; + u8 entry_changed; + u8 associated_mac[ETH_ALEN]; + u8 hit_status; + __le16 tci; + __le16 first_bridge_port_id; +} __packed; + +/** + * struct mxl862xx_mac_table_query - MAC Table Entry key-based lookup + * @mac: MAC Address to search for (input) + * @fid: Filtering Identifier (input) + * @found: Set by firmware: 1 if entry was found, 0 if not + * @port_id: Bridge Port ID (output; MSB set if portmap mode) + * @port_map: Bridge Port Map (output; valid for static entries) + * @sub_if_id: Sub-Interface Identifier Destination + * @age_timer: Aging Time + * @vlan_id: STAG VLAN Id + * @static_entry: Indicates if this is a Static Entry + * @filter_flag: See &enum mxl862xx_mac_table_filter (input+output) + * @igmp_controlled: IGMP controlled flag + * @entry_changed: Entry changed flag + * @associated_mac: Associated MAC address + * @hit_status: MAC Table Hit Status Update + * @tci: TCI (VLAN ID + CFI/DEI + PRI) (input) + * @first_bridge_port_id: First learned bridge port + */ +struct mxl862xx_mac_table_query { + u8 mac[ETH_ALEN]; + __le16 fid; + u8 found; + __le32 port_id; + __le16 port_map[8]; + __le16 sub_if_id; + __le32 age_timer; + __le16 vlan_id; + u8 static_entry; + u8 filter_flag; + u8 igmp_controlled; + u8 entry_changed; + u8 associated_mac[ETH_ALEN]; + u8 hit_status; + __le16 tci; + __le16 first_bridge_port_id; +} __packed; + /** * enum mxl862xx_mac_clear_type - MAC table clear type * @MXL862XX_MAC_CLEAR_PHY_PORT: clear dynamic entries based on port_id @@ -138,6 +301,40 @@ enum mxl862xx_bridge_port_egress_meter { MXL862XX_BRIDGE_PORT_EGRESS_METER_MAX, }; +/** + * struct mxl862xx_qos_meter_cfg - Rate meter configuration + * @enable: Enable/disable meter + * @meter_id: Meter ID (assigned by firmware on alloc) + * @meter_name: Meter name string + * @meter_type: Meter algorithm type (srTCM = 0, trTCM = 1) + * @cbs: Committed Burst Size (in bytes) + * @res1: Reserved + * @ebs: Excess Burst Size (in bytes) + * @res2: Reserved + * @rate: Committed Information Rate (in kbit/s) + * @pi_rate: Peak Information Rate (in kbit/s) + * @colour_blind_mode: Colour-blind mode enable + * @pkt_mode: Packet mode enable + * @local_overhd: Local overhead accounting enable + * @local_overhd_val: Local overhead accounting value + */ +struct mxl862xx_qos_meter_cfg { + u8 enable; + __le16 meter_id; + char meter_name[32]; + __le32 meter_type; + __le32 cbs; + __le32 res1; + __le32 ebs; + __le32 res2; + __le32 rate; + __le32 pi_rate; + u8 colour_blind_mode; + u8 pkt_mode; + u8 local_overhd; + __le16 local_overhd_val; +} __packed; + /** * enum mxl862xx_bridge_forward_mode - Bridge forwarding type of packet * @MXL862XX_BRIDGE_FORWARD_FLOOD: Packet is flooded to port members of @@ -456,7 +653,7 @@ struct mxl862xx_pmapper { */ struct mxl862xx_bridge_port_config { __le16 bridge_port_id; - __le32 mask; /* enum mxl862xx_bridge_port_config_mask */ + __le32 mask; /* enum mxl862xx_bridge_port_config_mask */ __le16 bridge_id; u8 ingress_extended_vlan_enable; __le16 ingress_extended_vlan_block_id; @@ -658,6 +855,32 @@ struct mxl862xx_ctp_port_assignment { __le16 bridge_port_id; } __packed; +/** + * enum mxl862xx_stp_port_state - Spanning Tree Protocol port states + * @MXL862XX_STP_PORT_STATE_FORWARD: Forwarding state + * @MXL862XX_STP_PORT_STATE_DISABLE: Disabled/Discarding state + * @MXL862XX_STP_PORT_STATE_LEARNING: Learning state + * @MXL862XX_STP_PORT_STATE_BLOCKING: Blocking/Listening + */ +enum mxl862xx_stp_port_state { + MXL862XX_STP_PORT_STATE_FORWARD = 0, + MXL862XX_STP_PORT_STATE_DISABLE, + MXL862XX_STP_PORT_STATE_LEARNING, + MXL862XX_STP_PORT_STATE_BLOCKING, +}; + +/** + * struct mxl862xx_stp_port_cfg - Configures the Spanning Tree Protocol state + * @port_id: Port number + * @fid: Filtering Identifier (FID) + * @port_state: See &enum mxl862xx_stp_port_state + */ +struct mxl862xx_stp_port_cfg { + __le16 port_id; + __le16 fid; + __le32 port_state; /* enum mxl862xx_stp_port_state */ +} __packed; + /** * struct mxl862xx_sys_fw_image_version - Firmware version information * @iv_major: firmware major version diff --git a/drivers/net/dsa/mxl862xx/mxl862xx-cmd.h b/drivers/net/dsa/mxl862xx/mxl862xx-cmd.h index f6852ade64e7..9f6c5bf9fdf2 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx-cmd.h +++ b/drivers/net/dsa/mxl862xx/mxl862xx-cmd.h @@ -15,12 +15,15 @@ #define MXL862XX_BRDG_MAGIC 0x300 #define MXL862XX_BRDGPORT_MAGIC 0x400 #define MXL862XX_CTP_MAGIC 0x500 +#define MXL862XX_QOS_MAGIC 0x600 #define MXL862XX_SWMAC_MAGIC 0xa00 +#define MXL862XX_STP_MAGIC 0xf00 #define MXL862XX_SS_MAGIC 0x1600 #define GPY_GPY2XX_MAGIC 0x1800 #define SYS_MISC_MAGIC 0x1900 #define MXL862XX_COMMON_CFGGET (MXL862XX_COMMON_MAGIC + 0x9) +#define MXL862XX_COMMON_CFGSET (MXL862XX_COMMON_MAGIC + 0xa) #define MXL862XX_COMMON_REGISTERMOD (MXL862XX_COMMON_MAGIC + 0x11) #define MXL862XX_BRIDGE_ALLOC (MXL862XX_BRDG_MAGIC + 0x1) @@ -35,14 +38,23 @@ #define MXL862XX_CTP_PORTASSIGNMENTSET (MXL862XX_CTP_MAGIC + 0x3) +#define MXL862XX_QOS_METERCFGSET (MXL862XX_QOS_MAGIC + 0x2) +#define MXL862XX_QOS_METERALLOC (MXL862XX_QOS_MAGIC + 0x2a) + +#define MXL862XX_MAC_TABLEENTRYADD (MXL862XX_SWMAC_MAGIC + 0x2) +#define MXL862XX_MAC_TABLEENTRYREAD (MXL862XX_SWMAC_MAGIC + 0x3) +#define MXL862XX_MAC_TABLEENTRYQUERY (MXL862XX_SWMAC_MAGIC + 0x4) +#define MXL862XX_MAC_TABLEENTRYREMOVE (MXL862XX_SWMAC_MAGIC + 0x5) #define MXL862XX_MAC_TABLECLEARCOND (MXL862XX_SWMAC_MAGIC + 0x8) -#define MXL862XX_SS_SPTAG_SET (MXL862XX_SS_MAGIC + 0x02) +#define MXL862XX_SS_SPTAG_SET (MXL862XX_SS_MAGIC + 0x2) -#define INT_GPHY_READ (GPY_GPY2XX_MAGIC + 0x01) -#define INT_GPHY_WRITE (GPY_GPY2XX_MAGIC + 0x02) +#define MXL862XX_STP_PORTCFGSET (MXL862XX_STP_MAGIC + 0x2) -#define SYS_MISC_FW_VERSION (SYS_MISC_MAGIC + 0x02) +#define INT_GPHY_READ (GPY_GPY2XX_MAGIC + 0x1) +#define INT_GPHY_WRITE (GPY_GPY2XX_MAGIC + 0x2) + +#define SYS_MISC_FW_VERSION (SYS_MISC_MAGIC + 0x2) #define MMD_API_MAXIMUM_ID 0x7fff diff --git a/drivers/net/dsa/mxl862xx/mxl862xx.c b/drivers/net/dsa/mxl862xx/mxl862xx.c index af9b00b2d2c4..e4e16c720763 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx.c +++ b/drivers/net/dsa/mxl862xx/mxl862xx.c @@ -7,8 +7,11 @@ * Copyright (C) 2025 Daniel Golle */ -#include +#include #include +#include +#include +#include #include #include #include @@ -36,6 +39,17 @@ #define MXL862XX_READY_TIMEOUT_MS 10000 #define MXL862XX_READY_POLL_MS 100 +#define MXL862XX_TCM_INST_SEL 0xe00 +#define MXL862XX_TCM_CBS 0xe12 +#define MXL862XX_TCM_EBS 0xe13 + +static const int mxl862xx_flood_meters[] = { + MXL862XX_BRIDGE_PORT_EGRESS_METER_UNKNOWN_UC, + MXL862XX_BRIDGE_PORT_EGRESS_METER_UNKNOWN_MC_IP, + MXL862XX_BRIDGE_PORT_EGRESS_METER_UNKNOWN_MC_NON_IP, + MXL862XX_BRIDGE_PORT_EGRESS_METER_BROADCAST, +}; + static enum dsa_tag_protocol mxl862xx_get_tag_protocol(struct dsa_switch *ds, int port, enum dsa_tag_protocol m) @@ -168,6 +182,199 @@ static int mxl862xx_setup_mdio(struct dsa_switch *ds) return ret; } +static int mxl862xx_bridge_config_fwd(struct dsa_switch *ds, u16 bridge_id, + bool ucast_flood, bool mcast_flood, + bool bcast_flood) +{ + struct mxl862xx_bridge_config bridge_config = {}; + struct mxl862xx_priv *priv = ds->priv; + int ret; + + bridge_config.mask = cpu_to_le32(MXL862XX_BRIDGE_CONFIG_MASK_FORWARDING_MODE); + bridge_config.bridge_id = cpu_to_le16(bridge_id); + + bridge_config.forward_unknown_unicast = cpu_to_le32(ucast_flood ? + MXL862XX_BRIDGE_FORWARD_FLOOD : MXL862XX_BRIDGE_FORWARD_DISCARD); + + bridge_config.forward_unknown_multicast_ip = cpu_to_le32(mcast_flood ? + MXL862XX_BRIDGE_FORWARD_FLOOD : MXL862XX_BRIDGE_FORWARD_DISCARD); + bridge_config.forward_unknown_multicast_non_ip = + bridge_config.forward_unknown_multicast_ip; + + bridge_config.forward_broadcast = cpu_to_le32(bcast_flood ? + MXL862XX_BRIDGE_FORWARD_FLOOD : MXL862XX_BRIDGE_FORWARD_DISCARD); + + ret = MXL862XX_API_WRITE(priv, MXL862XX_BRIDGE_CONFIGSET, bridge_config); + if (ret) + dev_err(ds->dev, "failed to configure bridge %u forwarding: %d\n", + bridge_id, ret); + + return ret; +} + +/* Allocate a single zero-rate meter shared by all ports and flood types. + * All flood-blocking egress sub-meters point to this one meter so that any + * packet hitting this meter is unconditionally dropped. + * + * The firmware API requires CBS >= 64 (its bs2ls encoder clamps smaller + * values), so the meter is initially configured with CBS=EBS=64. + * A zero-rate bucket starts full at CBS bytes, which would let one packet + * through before the bucket empties. To eliminate this one-packet leak we + * override CBS and EBS to zero via direct register writes after the API call; + * the hardware accepts CBS=0 and immediately flags the bucket as exceeded, + * so no traffic can ever pass. + */ +static int mxl862xx_setup_drop_meter(struct dsa_switch *ds) +{ + struct mxl862xx_qos_meter_cfg meter = {}; + struct mxl862xx_priv *priv = ds->priv; + struct mxl862xx_register_mod reg; + int ret; + + /* meter_id=0 means auto-alloc */ + ret = MXL862XX_API_READ(priv, MXL862XX_QOS_METERALLOC, meter); + if (ret) + return ret; + + meter.enable = true; + meter.cbs = cpu_to_le32(64); + meter.ebs = cpu_to_le32(64); + snprintf(meter.meter_name, sizeof(meter.meter_name), "drop"); + + ret = MXL862XX_API_WRITE(priv, MXL862XX_QOS_METERCFGSET, meter); + if (ret) + return ret; + + priv->drop_meter = le16_to_cpu(meter.meter_id); + + /* Select the meter instance for subsequent TCM register access. */ + reg.addr = cpu_to_le16(MXL862XX_TCM_INST_SEL); + reg.data = cpu_to_le16(priv->drop_meter); + reg.mask = cpu_to_le16(0xffff); + ret = MXL862XX_API_WRITE(priv, MXL862XX_COMMON_REGISTERMOD, reg); + if (ret) + return ret; + + /* Zero CBS so the committed bucket starts empty (exceeded). */ + reg.addr = cpu_to_le16(MXL862XX_TCM_CBS); + reg.data = 0; + ret = MXL862XX_API_WRITE(priv, MXL862XX_COMMON_REGISTERMOD, reg); + if (ret) + return ret; + + /* Zero EBS so the excess bucket starts empty (exceeded). */ + reg.addr = cpu_to_le16(MXL862XX_TCM_EBS); + return MXL862XX_API_WRITE(priv, MXL862XX_COMMON_REGISTERMOD, reg); +} + +static int mxl862xx_set_bridge_port(struct dsa_switch *ds, int port) +{ + struct mxl862xx_bridge_port_config br_port_cfg = {}; + struct dsa_port *dp = dsa_to_port(ds, port); + struct mxl862xx_priv *priv = ds->priv; + struct mxl862xx_port *p = &priv->ports[port]; + struct dsa_port *member_dp; + u16 bridge_id; + bool enable; + int i, idx; + + if (!p->setup_done) + return 0; + + if (dsa_port_is_cpu(dp)) { + dsa_switch_for_each_user_port(member_dp, ds) { + if (member_dp->cpu_dp->index != port) + continue; + mxl862xx_fw_portmap_set_bit(br_port_cfg.bridge_port_map, + member_dp->index); + } + } else if (dp->bridge) { + dsa_switch_for_each_bridge_member(member_dp, ds, + dp->bridge->dev) { + if (member_dp->index == port) + continue; + mxl862xx_fw_portmap_set_bit(br_port_cfg.bridge_port_map, + member_dp->index); + } + mxl862xx_fw_portmap_set_bit(br_port_cfg.bridge_port_map, + dp->cpu_dp->index); + } else { + mxl862xx_fw_portmap_set_bit(br_port_cfg.bridge_port_map, + dp->cpu_dp->index); + p->flood_block = 0; + p->learning = false; + } + + bridge_id = dp->bridge ? priv->bridges[dp->bridge->num] : p->fid; + + br_port_cfg.bridge_port_id = cpu_to_le16(port); + br_port_cfg.bridge_id = cpu_to_le16(bridge_id); + br_port_cfg.mask = cpu_to_le32(MXL862XX_BRIDGE_PORT_CONFIG_MASK_BRIDGE_ID | + MXL862XX_BRIDGE_PORT_CONFIG_MASK_BRIDGE_PORT_MAP | + MXL862XX_BRIDGE_PORT_CONFIG_MASK_MC_SRC_MAC_LEARNING | + MXL862XX_BRIDGE_PORT_CONFIG_MASK_EGRESS_SUB_METER); + br_port_cfg.src_mac_learning_disable = !p->learning; + + for (i = 0; i < ARRAY_SIZE(mxl862xx_flood_meters); i++) { + idx = mxl862xx_flood_meters[i]; + enable = !!(p->flood_block & BIT(idx)); + + br_port_cfg.egress_traffic_sub_meter_id[idx] = + enable ? cpu_to_le16(priv->drop_meter) : 0; + br_port_cfg.egress_sub_metering_enable[idx] = enable; + } + + return MXL862XX_API_WRITE(priv, MXL862XX_BRIDGEPORT_CONFIGSET, + br_port_cfg); +} + +static int mxl862xx_sync_bridge_members(struct dsa_switch *ds, + const struct dsa_bridge *bridge) +{ + struct dsa_port *dp; + int ret = 0, err; + + dsa_switch_for_each_bridge_member(dp, ds, bridge->dev) { + err = mxl862xx_set_bridge_port(ds, dp->index); + if (err) + ret = err; + } + + return ret; +} + +static int mxl862xx_allocate_bridge(struct mxl862xx_priv *priv) +{ + struct mxl862xx_bridge_alloc br_alloc = {}; + int ret; + + ret = MXL862XX_API_READ(priv, MXL862XX_BRIDGE_ALLOC, br_alloc); + if (ret) + return ret; + + return le16_to_cpu(br_alloc.bridge_id); +} + +static void mxl862xx_free_bridge(struct dsa_switch *ds, + const struct dsa_bridge *bridge) +{ + struct mxl862xx_priv *priv = ds->priv; + u16 fw_id = priv->bridges[bridge->num]; + struct mxl862xx_bridge_alloc br_alloc = { + .bridge_id = cpu_to_le16(fw_id), + }; + int ret; + + ret = MXL862XX_API_WRITE(priv, MXL862XX_BRIDGE_FREE, br_alloc); + if (ret) { + dev_err(ds->dev, "failed to free fw bridge %u: %pe\n", + fw_id, ERR_PTR(ret)); + return; + } + + priv->bridges[bridge->num] = 0; +} + static int mxl862xx_setup(struct dsa_switch *ds) { struct mxl862xx_priv *priv = ds->priv; @@ -181,6 +388,10 @@ static int mxl862xx_setup(struct dsa_switch *ds) if (ret) return ret; + ret = mxl862xx_setup_drop_meter(ds); + if (ret) + return ret; + return mxl862xx_setup_mdio(ds); } @@ -260,99 +471,137 @@ static int mxl862xx_configure_sp_tag_proto(struct dsa_switch *ds, int port, static int mxl862xx_setup_cpu_bridge(struct dsa_switch *ds, int port) { - struct mxl862xx_bridge_port_config br_port_cfg = {}; struct mxl862xx_priv *priv = ds->priv; - u16 bridge_port_map = 0; - struct dsa_port *dp; - /* CPU port bridge setup */ - br_port_cfg.mask = cpu_to_le32(MXL862XX_BRIDGE_PORT_CONFIG_MASK_BRIDGE_PORT_MAP | - MXL862XX_BRIDGE_PORT_CONFIG_MASK_MC_SRC_MAC_LEARNING | - MXL862XX_BRIDGE_PORT_CONFIG_MASK_VLAN_BASED_MAC_LEARNING); + priv->ports[port].fid = MXL862XX_DEFAULT_BRIDGE; + priv->ports[port].learning = true; - br_port_cfg.bridge_port_id = cpu_to_le16(port); - br_port_cfg.src_mac_learning_disable = false; - br_port_cfg.vlan_src_mac_vid_enable = true; - br_port_cfg.vlan_dst_mac_vid_enable = true; - - /* include all assigned user ports in the CPU portmap */ - dsa_switch_for_each_user_port(dp, ds) { - /* it's safe to rely on cpu_dp being valid for user ports */ - if (dp->cpu_dp->index != port) - continue; - - bridge_port_map |= BIT(dp->index); - } - br_port_cfg.bridge_port_map[0] |= cpu_to_le16(bridge_port_map); - - return MXL862XX_API_WRITE(priv, MXL862XX_BRIDGEPORT_CONFIGSET, br_port_cfg); + return mxl862xx_set_bridge_port(ds, port); } -static int mxl862xx_add_single_port_bridge(struct dsa_switch *ds, int port) +static int mxl862xx_port_bridge_join(struct dsa_switch *ds, int port, + const struct dsa_bridge bridge, + bool *tx_fwd_offload, + struct netlink_ext_ack *extack) { - struct mxl862xx_bridge_port_config br_port_cfg = {}; - struct dsa_port *dp = dsa_to_port(ds, port); - struct mxl862xx_bridge_alloc br_alloc = {}; + struct mxl862xx_priv *priv = ds->priv; int ret; - ret = MXL862XX_API_READ(ds->priv, MXL862XX_BRIDGE_ALLOC, br_alloc); - if (ret) { - dev_err(ds->dev, "failed to allocate a bridge for port %d\n", port); - return ret; + if (!priv->bridges[bridge.num]) { + ret = mxl862xx_allocate_bridge(priv); + if (ret < 0) + return ret; + + priv->bridges[bridge.num] = ret; + + /* Free bridge here on error, DSA rollback won't. */ + ret = mxl862xx_sync_bridge_members(ds, &bridge); + if (ret) { + mxl862xx_free_bridge(ds, &bridge); + return ret; + } + + return 0; } - br_port_cfg.bridge_id = br_alloc.bridge_id; - br_port_cfg.bridge_port_id = cpu_to_le16(port); - br_port_cfg.mask = cpu_to_le32(MXL862XX_BRIDGE_PORT_CONFIG_MASK_BRIDGE_ID | - MXL862XX_BRIDGE_PORT_CONFIG_MASK_BRIDGE_PORT_MAP | - MXL862XX_BRIDGE_PORT_CONFIG_MASK_MC_SRC_MAC_LEARNING | - MXL862XX_BRIDGE_PORT_CONFIG_MASK_VLAN_BASED_MAC_LEARNING); - br_port_cfg.src_mac_learning_disable = true; - br_port_cfg.vlan_src_mac_vid_enable = false; - br_port_cfg.vlan_dst_mac_vid_enable = false; - /* As this function is only called for user ports it is safe to rely on - * cpu_dp being valid - */ - br_port_cfg.bridge_port_map[0] = cpu_to_le16(BIT(dp->cpu_dp->index)); + return mxl862xx_sync_bridge_members(ds, &bridge); +} - return MXL862XX_API_WRITE(ds->priv, MXL862XX_BRIDGEPORT_CONFIGSET, br_port_cfg); +static void mxl862xx_port_bridge_leave(struct dsa_switch *ds, int port, + const struct dsa_bridge bridge) +{ + int err; + + err = mxl862xx_sync_bridge_members(ds, &bridge); + if (err) + dev_err(ds->dev, + "failed to sync bridge members after port %d left: %pe\n", + port, ERR_PTR(err)); + + /* Revert leaving port, omitted by the sync above, to its + * single-port bridge + */ + err = mxl862xx_set_bridge_port(ds, port); + if (err) + dev_err(ds->dev, + "failed to update bridge port %d state: %pe\n", port, + ERR_PTR(err)); + + if (!dsa_bridge_ports(ds, bridge.dev)) + mxl862xx_free_bridge(ds, &bridge); } static int mxl862xx_port_setup(struct dsa_switch *ds, int port) { + struct mxl862xx_priv *priv = ds->priv; struct dsa_port *dp = dsa_to_port(ds, port); bool is_cpu_port = dsa_port_is_cpu(dp); int ret; - /* disable port and flush MAC entries */ ret = mxl862xx_port_state(ds, port, false); if (ret) return ret; mxl862xx_port_fast_age(ds, port); - /* skip setup for unused and DSA ports */ if (dsa_port_is_unused(dp) || dsa_port_is_dsa(dp)) return 0; - /* configure tag protocol */ ret = mxl862xx_configure_sp_tag_proto(ds, port, is_cpu_port); if (ret) return ret; - /* assign CTP port IDs */ ret = mxl862xx_configure_ctp_port(ds, port, port, is_cpu_port ? 32 - port : 1); if (ret) return ret; if (is_cpu_port) - /* assign user ports to CPU port bridge */ return mxl862xx_setup_cpu_bridge(ds, port); - /* setup single-port bridge for user ports */ - return mxl862xx_add_single_port_bridge(ds, port); + /* setup single-port bridge for user ports. + * If this fails, the FID is leaked -- but the port then transitions + * to unused, and the FID pool is sized to tolerate this. + */ + ret = mxl862xx_allocate_bridge(priv); + if (ret < 0) { + dev_err(ds->dev, "failed to allocate a bridge for port %d\n", port); + return ret; + } + priv->ports[port].fid = ret; + /* Standalone ports should not flood unknown unicast or multicast + * towards the CPU by default; only broadcast is needed initially. + */ + ret = mxl862xx_bridge_config_fwd(ds, priv->ports[port].fid, + false, false, true); + if (ret) + return ret; + ret = mxl862xx_set_bridge_port(ds, port); + if (ret) + return ret; + + priv->ports[port].setup_done = true; + + return 0; +} + +static void mxl862xx_port_teardown(struct dsa_switch *ds, int port) +{ + struct mxl862xx_priv *priv = ds->priv; + struct dsa_port *dp = dsa_to_port(ds, port); + + if (dsa_port_is_unused(dp) || dsa_port_is_dsa(dp)) + return; + + /* Prevent deferred host_flood_work from acting on stale state. + * The flag is checked under rtnl_lock() by the worker; since + * teardown also runs under RTNL, this is race-free. + * + * HW EVLAN/VF blocks are not freed here -- the firmware receives + * a full reset on the next probe, which reclaims all resources. + */ + priv->ports[port].setup_done = false; } static void mxl862xx_phylink_get_caps(struct dsa_switch *ds, int port, @@ -365,14 +614,371 @@ static void mxl862xx_phylink_get_caps(struct dsa_switch *ds, int port, config->supported_interfaces); } +static int mxl862xx_get_fid(struct dsa_switch *ds, struct dsa_db db) +{ + struct mxl862xx_priv *priv = ds->priv; + + switch (db.type) { + case DSA_DB_PORT: + return priv->ports[db.dp->index].fid; + + case DSA_DB_BRIDGE: + if (!priv->bridges[db.bridge.num]) + return -ENOENT; + return priv->bridges[db.bridge.num]; + + default: + return -EOPNOTSUPP; + } +} + +static int mxl862xx_port_fdb_add(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, struct dsa_db db) +{ + struct mxl862xx_mac_table_add param = {}; + int fid = mxl862xx_get_fid(ds, db), ret; + struct mxl862xx_priv *priv = ds->priv; + + if (fid < 0) + return fid; + + param.port_id = cpu_to_le32(port); + param.static_entry = true; + param.fid = cpu_to_le16(fid); + param.tci = cpu_to_le16(FIELD_PREP(MXL862XX_TCI_VLAN_ID, vid)); + ether_addr_copy(param.mac, addr); + + ret = MXL862XX_API_WRITE(priv, MXL862XX_MAC_TABLEENTRYADD, param); + if (ret) + dev_err(ds->dev, "failed to add FDB entry on port %d\n", port); + + return ret; +} + +static int mxl862xx_port_fdb_del(struct dsa_switch *ds, int port, + const unsigned char *addr, u16 vid, const struct dsa_db db) +{ + struct mxl862xx_mac_table_remove param = {}; + int fid = mxl862xx_get_fid(ds, db), ret; + struct mxl862xx_priv *priv = ds->priv; + + if (fid < 0) + return fid; + + param.fid = cpu_to_le16(fid); + param.tci = cpu_to_le16(FIELD_PREP(MXL862XX_TCI_VLAN_ID, vid)); + ether_addr_copy(param.mac, addr); + + ret = MXL862XX_API_WRITE(priv, MXL862XX_MAC_TABLEENTRYREMOVE, param); + if (ret) + dev_err(ds->dev, "failed to remove FDB entry on port %d\n", port); + + return ret; +} + +static int mxl862xx_port_fdb_dump(struct dsa_switch *ds, int port, + dsa_fdb_dump_cb_t *cb, void *data) +{ + struct mxl862xx_mac_table_read param = { .initial = 1 }; + struct mxl862xx_priv *priv = ds->priv; + u32 entry_port_id; + int ret; + + while (true) { + ret = MXL862XX_API_READ(priv, MXL862XX_MAC_TABLEENTRYREAD, param); + if (ret) + return ret; + + if (param.last) + break; + + entry_port_id = le32_to_cpu(param.port_id); + + if (entry_port_id == port) { + ret = cb(param.mac, FIELD_GET(MXL862XX_TCI_VLAN_ID, + le16_to_cpu(param.tci)), + param.static_entry, data); + if (ret) + return ret; + } + + memset(¶m, 0, sizeof(param)); + } + + return 0; +} + +static int mxl862xx_port_mdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + const struct dsa_db db) +{ + struct mxl862xx_mac_table_query qparam = {}; + struct mxl862xx_mac_table_add aparam = {}; + struct mxl862xx_priv *priv = ds->priv; + int fid, ret; + + fid = mxl862xx_get_fid(ds, db); + if (fid < 0) + return fid; + + ether_addr_copy(qparam.mac, mdb->addr); + qparam.fid = cpu_to_le16(fid); + qparam.tci = cpu_to_le16(FIELD_PREP(MXL862XX_TCI_VLAN_ID, mdb->vid)); + + ret = MXL862XX_API_READ(priv, MXL862XX_MAC_TABLEENTRYQUERY, qparam); + if (ret) + return ret; + + /* Build the ADD command using portmap mode */ + ether_addr_copy(aparam.mac, mdb->addr); + aparam.fid = cpu_to_le16(fid); + aparam.tci = cpu_to_le16(FIELD_PREP(MXL862XX_TCI_VLAN_ID, mdb->vid)); + aparam.static_entry = true; + aparam.port_id = cpu_to_le32(MXL862XX_PORTMAP_FLAG); + + if (qparam.found) + memcpy(aparam.port_map, qparam.port_map, + sizeof(aparam.port_map)); + + mxl862xx_fw_portmap_set_bit(aparam.port_map, port); + + return MXL862XX_API_WRITE(priv, MXL862XX_MAC_TABLEENTRYADD, aparam); +} + +static int mxl862xx_port_mdb_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + const struct dsa_db db) +{ + struct mxl862xx_mac_table_remove rparam = {}; + struct mxl862xx_mac_table_query qparam = {}; + struct mxl862xx_mac_table_add aparam = {}; + int fid = mxl862xx_get_fid(ds, db), ret; + struct mxl862xx_priv *priv = ds->priv; + + if (fid < 0) + return fid; + + qparam.fid = cpu_to_le16(fid); + qparam.tci = cpu_to_le16(FIELD_PREP(MXL862XX_TCI_VLAN_ID, mdb->vid)); + ether_addr_copy(qparam.mac, mdb->addr); + + ret = MXL862XX_API_READ(priv, MXL862XX_MAC_TABLEENTRYQUERY, qparam); + if (ret) + return ret; + + if (!qparam.found) + return 0; + + mxl862xx_fw_portmap_clear_bit(qparam.port_map, port); + + if (mxl862xx_fw_portmap_is_empty(qparam.port_map)) { + rparam.fid = cpu_to_le16(fid); + rparam.tci = cpu_to_le16(FIELD_PREP(MXL862XX_TCI_VLAN_ID, mdb->vid)); + ether_addr_copy(rparam.mac, mdb->addr); + ret = MXL862XX_API_WRITE(priv, MXL862XX_MAC_TABLEENTRYREMOVE, rparam); + } else { + /* Write back with reduced portmap */ + aparam.fid = cpu_to_le16(fid); + aparam.tci = cpu_to_le16(FIELD_PREP(MXL862XX_TCI_VLAN_ID, mdb->vid)); + ether_addr_copy(aparam.mac, mdb->addr); + aparam.static_entry = true; + aparam.port_id = cpu_to_le32(MXL862XX_PORTMAP_FLAG); + memcpy(aparam.port_map, qparam.port_map, sizeof(aparam.port_map)); + ret = MXL862XX_API_WRITE(priv, MXL862XX_MAC_TABLEENTRYADD, aparam); + } + + return ret; +} + +static int mxl862xx_set_ageing_time(struct dsa_switch *ds, unsigned int msecs) +{ + struct mxl862xx_cfg param = {}; + int ret; + + ret = MXL862XX_API_READ(ds->priv, MXL862XX_COMMON_CFGGET, param); + if (ret) { + dev_err(ds->dev, "failed to read switch config\n"); + return ret; + } + + param.mac_table_age_timer = cpu_to_le32(MXL862XX_AGETIMER_CUSTOM); + param.age_timer = cpu_to_le32(msecs / 1000); + ret = MXL862XX_API_WRITE(ds->priv, MXL862XX_COMMON_CFGSET, param); + if (ret) + dev_err(ds->dev, "failed to set ageing\n"); + + return ret; +} + +static void mxl862xx_port_stp_state_set(struct dsa_switch *ds, int port, + u8 state) +{ + struct mxl862xx_stp_port_cfg param = { + .port_id = cpu_to_le16(port), + }; + struct mxl862xx_priv *priv = ds->priv; + int ret; + + switch (state) { + case BR_STATE_DISABLED: + param.port_state = cpu_to_le32(MXL862XX_STP_PORT_STATE_DISABLE); + break; + case BR_STATE_BLOCKING: + case BR_STATE_LISTENING: + param.port_state = cpu_to_le32(MXL862XX_STP_PORT_STATE_BLOCKING); + break; + case BR_STATE_LEARNING: + param.port_state = cpu_to_le32(MXL862XX_STP_PORT_STATE_LEARNING); + break; + case BR_STATE_FORWARDING: + param.port_state = cpu_to_le32(MXL862XX_STP_PORT_STATE_FORWARD); + break; + default: + dev_err(ds->dev, "invalid STP state: %d\n", state); + return; + } + + ret = MXL862XX_API_WRITE(priv, MXL862XX_STP_PORTCFGSET, param); + if (ret) { + dev_err(ds->dev, "failed to set STP state on port %d\n", port); + return; + } + + /* The firmware may re-enable MAC learning as a side-effect of entering + * LEARNING or FORWARDING state (per 802.1D defaults). + * Re-apply the driver's intended learning and metering config so that + * standalone ports keep learning disabled. + */ + ret = mxl862xx_set_bridge_port(ds, port); + if (ret) + dev_err(ds->dev, "failed to reapply brport flags on port %d\n", + port); + + mxl862xx_port_fast_age(ds, port); +} + +/* Deferred work handler for host flood configuration. + * + * port_set_host_flood is called from atomic context (under + * netif_addr_lock), so firmware calls must be deferred. The worker + * acquires rtnl_lock() to serialize with DSA callbacks that access the + * same driver state. + */ +static void mxl862xx_host_flood_work_fn(struct work_struct *work) +{ + struct mxl862xx_port *p = container_of(work, struct mxl862xx_port, + host_flood_work); + struct mxl862xx_priv *priv = p->priv; + struct dsa_switch *ds = priv->ds; + + rtnl_lock(); + + /* Port may have been torn down between scheduling and now. */ + if (!p->setup_done) { + rtnl_unlock(); + return; + } + + /* Always write to the standalone FID. When standalone it takes effect + * immediately; when bridged the port uses the shared bridge FID so the + * write is a no-op for current forwarding, but the state is preserved + * in hardware and is ready once the port returns to standalone. + */ + mxl862xx_bridge_config_fwd(ds, p->fid, p->host_flood_uc, + p->host_flood_mc, true); + + rtnl_unlock(); +} + +static void mxl862xx_port_set_host_flood(struct dsa_switch *ds, int port, + bool uc, bool mc) +{ + struct mxl862xx_priv *priv = ds->priv; + struct mxl862xx_port *p = &priv->ports[port]; + + p->host_flood_uc = uc; + p->host_flood_mc = mc; + schedule_work(&p->host_flood_work); +} + +static int mxl862xx_port_pre_bridge_flags(struct dsa_switch *ds, int port, + const struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack) +{ + if (flags.mask & ~(BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD | + BR_LEARNING)) + return -EINVAL; + + return 0; +} + +static int mxl862xx_port_bridge_flags(struct dsa_switch *ds, int port, + const struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack) +{ + struct mxl862xx_priv *priv = ds->priv; + unsigned long old_block = priv->ports[port].flood_block; + unsigned long block = old_block; + int ret; + + if (flags.mask & BR_FLOOD) { + if (flags.val & BR_FLOOD) + block &= ~BIT(MXL862XX_BRIDGE_PORT_EGRESS_METER_UNKNOWN_UC); + else + block |= BIT(MXL862XX_BRIDGE_PORT_EGRESS_METER_UNKNOWN_UC); + } + + if (flags.mask & BR_MCAST_FLOOD) { + if (flags.val & BR_MCAST_FLOOD) { + block &= ~BIT(MXL862XX_BRIDGE_PORT_EGRESS_METER_UNKNOWN_MC_IP); + block &= ~BIT(MXL862XX_BRIDGE_PORT_EGRESS_METER_UNKNOWN_MC_NON_IP); + } else { + block |= BIT(MXL862XX_BRIDGE_PORT_EGRESS_METER_UNKNOWN_MC_IP); + block |= BIT(MXL862XX_BRIDGE_PORT_EGRESS_METER_UNKNOWN_MC_NON_IP); + } + } + + if (flags.mask & BR_BCAST_FLOOD) { + if (flags.val & BR_BCAST_FLOOD) + block &= ~BIT(MXL862XX_BRIDGE_PORT_EGRESS_METER_BROADCAST); + else + block |= BIT(MXL862XX_BRIDGE_PORT_EGRESS_METER_BROADCAST); + } + + if (flags.mask & BR_LEARNING) + priv->ports[port].learning = !!(flags.val & BR_LEARNING); + + if (block != old_block || (flags.mask & BR_LEARNING)) { + priv->ports[port].flood_block = block; + ret = mxl862xx_set_bridge_port(ds, port); + if (ret) + return ret; + } + + return 0; +} + static const struct dsa_switch_ops mxl862xx_switch_ops = { .get_tag_protocol = mxl862xx_get_tag_protocol, .setup = mxl862xx_setup, .port_setup = mxl862xx_port_setup, + .port_teardown = mxl862xx_port_teardown, .phylink_get_caps = mxl862xx_phylink_get_caps, .port_enable = mxl862xx_port_enable, .port_disable = mxl862xx_port_disable, .port_fast_age = mxl862xx_port_fast_age, + .set_ageing_time = mxl862xx_set_ageing_time, + .port_bridge_join = mxl862xx_port_bridge_join, + .port_bridge_leave = mxl862xx_port_bridge_leave, + .port_pre_bridge_flags = mxl862xx_port_pre_bridge_flags, + .port_bridge_flags = mxl862xx_port_bridge_flags, + .port_stp_state_set = mxl862xx_port_stp_state_set, + .port_set_host_flood = mxl862xx_port_set_host_flood, + .port_fdb_add = mxl862xx_port_fdb_add, + .port_fdb_del = mxl862xx_port_fdb_del, + .port_fdb_dump = mxl862xx_port_fdb_dump, + .port_mdb_add = mxl862xx_port_mdb_add, + .port_mdb_del = mxl862xx_port_mdb_del, }; static void mxl862xx_phylink_mac_config(struct phylink_config *config, @@ -407,7 +1013,7 @@ static int mxl862xx_probe(struct mdio_device *mdiodev) struct device *dev = &mdiodev->dev; struct mxl862xx_priv *priv; struct dsa_switch *ds; - int err; + int err, i; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) @@ -425,14 +1031,25 @@ static int mxl862xx_probe(struct mdio_device *mdiodev) ds->ops = &mxl862xx_switch_ops; ds->phylink_mac_ops = &mxl862xx_phylink_mac_ops; ds->num_ports = MXL862XX_MAX_PORTS; + ds->fdb_isolation = true; + ds->max_num_bridges = MXL862XX_MAX_BRIDGES; + mxl862xx_host_init(priv); + for (i = 0; i < MXL862XX_MAX_PORTS; i++) { + priv->ports[i].priv = priv; + INIT_WORK(&priv->ports[i].host_flood_work, + mxl862xx_host_flood_work_fn); + } + dev_set_drvdata(dev, ds); err = dsa_register_switch(ds); - if (err) + if (err) { mxl862xx_host_shutdown(priv); - + for (i = 0; i < MXL862XX_MAX_PORTS; i++) + cancel_work_sync(&priv->ports[i].host_flood_work); + } return err; } @@ -440,6 +1057,7 @@ static void mxl862xx_remove(struct mdio_device *mdiodev) { struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); struct mxl862xx_priv *priv; + int i; if (!ds) return; @@ -449,12 +1067,21 @@ static void mxl862xx_remove(struct mdio_device *mdiodev) dsa_unregister_switch(ds); mxl862xx_host_shutdown(priv); + + /* Cancel any pending host flood work. dsa_unregister_switch() + * has already called port_teardown (which sets setup_done=false), + * but a worker could still be blocked on rtnl_lock(). Since we + * are now outside RTNL, cancel_work_sync() will not deadlock. + */ + for (i = 0; i < MXL862XX_MAX_PORTS; i++) + cancel_work_sync(&priv->ports[i].host_flood_work); } static void mxl862xx_shutdown(struct mdio_device *mdiodev) { struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); struct mxl862xx_priv *priv; + int i; if (!ds) return; @@ -465,6 +1092,9 @@ static void mxl862xx_shutdown(struct mdio_device *mdiodev) mxl862xx_host_shutdown(priv); + for (i = 0; i < MXL862XX_MAX_PORTS; i++) + cancel_work_sync(&priv->ports[i].host_flood_work); + dev_set_drvdata(&mdiodev->dev, NULL); } diff --git a/drivers/net/dsa/mxl862xx/mxl862xx.h b/drivers/net/dsa/mxl862xx/mxl862xx.h index 3ca0d386f3e8..8d03dd24faee 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx.h +++ b/drivers/net/dsa/mxl862xx/mxl862xx.h @@ -4,15 +4,114 @@ #define __MXL862XX_H #include +#include #include -#define MXL862XX_MAX_PORTS 17 +struct mxl862xx_priv; +#define MXL862XX_MAX_PORTS 17 +#define MXL862XX_DEFAULT_BRIDGE 0 +#define MXL862XX_MAX_BRIDGES 48 +#define MXL862XX_MAX_BRIDGE_PORTS 128 + +/* Number of __le16 words in a firmware portmap (128-bit bitmap). */ +#define MXL862XX_FW_PORTMAP_WORDS (MXL862XX_MAX_BRIDGE_PORTS / 16) + +/** + * mxl862xx_fw_portmap_set_bit - set a single port bit in a firmware portmap + * @map: firmware portmap array (MXL862XX_FW_PORTMAP_WORDS entries) + * @port: port index (0..MXL862XX_MAX_BRIDGE_PORTS-1) + */ +static inline void mxl862xx_fw_portmap_set_bit(__le16 *map, int port) +{ + map[port / 16] |= cpu_to_le16(BIT(port % 16)); +} + +/** + * mxl862xx_fw_portmap_clear_bit - clear a single port bit in a firmware portmap + * @map: firmware portmap array (MXL862XX_FW_PORTMAP_WORDS entries) + * @port: port index (0..MXL862XX_MAX_BRIDGE_PORTS-1) + */ +static inline void mxl862xx_fw_portmap_clear_bit(__le16 *map, int port) +{ + map[port / 16] &= ~cpu_to_le16(BIT(port % 16)); +} + +/** + * mxl862xx_fw_portmap_is_empty - check whether a firmware portmap has no + * bits set + * @map: firmware portmap array (MXL862XX_FW_PORTMAP_WORDS entries) + * + * Return: true if every word in @map is zero. + */ +static inline bool mxl862xx_fw_portmap_is_empty(const __le16 *map) +{ + int i; + + for (i = 0; i < MXL862XX_FW_PORTMAP_WORDS; i++) + if (map[i]) + return false; + return true; +} + +/** + * struct mxl862xx_port - per-port state tracked by the driver + * @priv: back-pointer to switch private data; needed by + * deferred work handlers to access ds and priv + * @fid: firmware FID for the permanent single-port bridge; + * kept alive for the lifetime of the port so traffic is + * never forwarded while the port is unbridged + * @flood_block: bitmask of firmware meter indices that are currently + * rate-limiting flood traffic on this port (zero-rate + * meters used to block flooding) + * @learning: true when address learning is enabled on this port + * @setup_done: set at end of port_setup, cleared at start of + * port_teardown; guards deferred work against + * acting on torn-down state + * @host_flood_uc: desired host unicast flood state (true = flood); + * updated atomically by port_set_host_flood, consumed + * by the deferred host_flood_work + * @host_flood_mc: desired host multicast flood state (true = flood) + * @host_flood_work: deferred work for applying host flood changes; + * port_set_host_flood runs in atomic context (under + * netif_addr_lock) so firmware calls must be deferred. + * The worker acquires rtnl_lock() to serialize with + * DSA callbacks and checks @setup_done to avoid + * acting on torn-down ports. + */ +struct mxl862xx_port { + struct mxl862xx_priv *priv; + u16 fid; + unsigned long flood_block; + bool learning; + bool setup_done; + bool host_flood_uc; + bool host_flood_mc; + struct work_struct host_flood_work; +}; + +/** + * struct mxl862xx_priv - driver private data for an MxL862xx switch + * @ds: pointer to the DSA switch instance + * @mdiodev: MDIO device used to communicate with the switch firmware + * @crc_err_work: deferred work for shutting down all ports on MDIO CRC errors + * @crc_err: set atomically before CRC-triggered shutdown, cleared after + * @drop_meter: index of the single shared zero-rate firmware meter used + * to unconditionally drop traffic (used to block flooding) + * @ports: per-port state, indexed by switch port number + * @bridges: maps DSA bridge number to firmware bridge ID; + * zero means no firmware bridge allocated for that + * DSA bridge number. Indexed by dsa_bridge.num + * (0 .. ds->max_num_bridges). + */ struct mxl862xx_priv { struct dsa_switch *ds; struct mdio_device *mdiodev; struct work_struct crc_err_work; unsigned long crc_err; + u16 drop_meter; + struct mxl862xx_port ports[MXL862XX_MAX_PORTS]; + u16 bridges[MXL862XX_MAX_BRIDGES + 1]; }; #endif /* __MXL862XX_H */ From 8a4e3ab61d496c37595219e5122d91f204cb05de Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 2 Apr 2026 11:41:14 +0800 Subject: [PATCH 1170/1561] net: advance skb_defer_disable_key check in napi_consume_skb When net.core.skb_defer_max is adjusted to zero, napi_consume_skb() shouldn't go into that deeper in skb_attempt_defer_free() because it adds an additional pair of local_bh_enable/disable() which is evidently not needed. Advancing the check of the static key saves more cycles and benefits non defer case. Signed-off-by: Jason Xing Link: https://patch.msgid.link/20260402034114.65766-1-kerneljasonxing@gmail.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3d6978dd0aa8..4045d7c484a1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -94,6 +94,7 @@ #include "dev.h" #include "devmem.h" +#include "net-sysfs.h" #include "netmem_priv.h" #include "sock_destructor.h" @@ -1519,7 +1520,8 @@ void napi_consume_skb(struct sk_buff *skb, int budget) DEBUG_NET_WARN_ON_ONCE(!in_softirq()); - if (skb->alloc_cpu != smp_processor_id() && !skb_shared(skb)) { + if (!static_branch_unlikely(&skb_defer_disable_key) && + skb->alloc_cpu != smp_processor_id() && !skb_shared(skb)) { skb_release_head_state(skb); return skb_attempt_defer_free(skb); } From 2f60df9e61aa48bf40c36254bf2e839f09cffd98 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Fri, 3 Apr 2026 16:46:19 +0800 Subject: [PATCH 1171/1561] ip6_tunnel: use generic for_each_ip_tunnel_rcu macro Remove the locally defined for_each_ip6_tunnel_rcu macro and use the generic for_each_ip_tunnel_rcu from linux/if_tunnel.h instead. This eliminates code duplication and ensures consistency across the kernel tunnel implementations. Signed-off-by: Yue Haibing Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260403084619.4107978-1-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_tunnel.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 0b53488a9229..46bc06506470 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -96,9 +96,6 @@ static inline int ip6_tnl_mpls_supported(void) return IS_ENABLED(CONFIG_MPLS); } -#define for_each_ip6_tunnel_rcu(start) \ - for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) - /** * ip6_tnl_lookup - fetch tunnel matching the end-point addresses * @net: network namespace @@ -121,7 +118,7 @@ ip6_tnl_lookup(struct net *net, int link, struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct in6_addr any; - for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + for_each_ip_tunnel_rcu(t, ip6n->tnls_r_l[hash]) { if (!ipv6_addr_equal(local, &t->parms.laddr) || !ipv6_addr_equal(remote, &t->parms.raddr) || !(t->dev->flags & IFF_UP)) @@ -135,7 +132,7 @@ ip6_tnl_lookup(struct net *net, int link, memset(&any, 0, sizeof(any)); hash = HASH(&any, local); - for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + for_each_ip_tunnel_rcu(t, ip6n->tnls_r_l[hash]) { if (!ipv6_addr_equal(local, &t->parms.laddr) || !ipv6_addr_any(&t->parms.raddr) || !(t->dev->flags & IFF_UP)) @@ -148,7 +145,7 @@ ip6_tnl_lookup(struct net *net, int link, } hash = HASH(remote, &any); - for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + for_each_ip_tunnel_rcu(t, ip6n->tnls_r_l[hash]) { if (!ipv6_addr_equal(remote, &t->parms.raddr) || !ipv6_addr_any(&t->parms.laddr) || !(t->dev->flags & IFF_UP)) From e72058a4bed070193893410e5f6545cc4f99cd24 Mon Sep 17 00:00:00 2001 From: David Heidelberg Date: Fri, 3 Apr 2026 15:58:46 +0200 Subject: [PATCH 1172/1561] dt-bindings: nfc: nxp,nci: Document PN557 compatible The PN557 uses the same hardware as the PN553 but ships with firmware compliant with NCI 2.0. Document PN557 as a compatible device. Signed-off-by: David Heidelberg Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20260403-oneplus-nfc-v3-1-fbdce57d63c1@ixit.cz Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml b/Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml index 364b36151180..4f3847f64983 100644 --- a/Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml +++ b/Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml @@ -18,6 +18,7 @@ properties: - nxp,nq310 - nxp,pn547 - nxp,pn553 + - nxp,pn557 - const: nxp,nxp-nci-i2c enable-gpios: From c321b5676d0c41de9155c1966aa6af8b7ca35091 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Fri, 3 Apr 2026 22:23:39 +0800 Subject: [PATCH 1173/1561] net: macb: Use netif_napi_add_tx() instead of netif_napi_add() for TX NAPI The TX NAPI should be registered via netif_napi_add_tx() to avoid unnecessarily polluting the napi_hash table. Signed-off-by: Kevin Hao Link: https://patch.msgid.link/20260403-macb-napi-tx-v1-1-08126a60c65e@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 6ec7b110dc72..d9716c56f705 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4743,7 +4743,7 @@ static int macb_init_dflt(struct platform_device *pdev) queue->bp = bp; spin_lock_init(&queue->tx_ptr_lock); netif_napi_add(dev, &queue->napi_rx, macb_rx_poll); - netif_napi_add(dev, &queue->napi_tx, macb_tx_poll); + netif_napi_add_tx(dev, &queue->napi_tx, macb_tx_poll); if (hw_q) { queue->ISR = GEM_ISR(hw_q - 1); queue->IER = GEM_IER(hw_q - 1); From e6b7e1a10cba354e9fd5b10e3a83359f84ad8535 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Apr 2026 15:05:01 -0700 Subject: [PATCH 1174/1561] eth: remove the driver for acenic / tigon1&2 The entire git history for this driver looks like tree-wide and automated cleanups. There's even more coming now with AI, so let's try to delete it instead. Acked-by: Jes Sorensen Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20260403220501.2263835-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 6 - arch/loongarch/configs/loongson32_defconfig | 1 - arch/loongarch/configs/loongson64_defconfig | 1 - arch/mips/configs/cavium_octeon_defconfig | 1 - arch/mips/configs/loongson2k_defconfig | 1 - arch/mips/configs/loongson3_defconfig | 1 - arch/mips/configs/malta_qemu_32r6_defconfig | 1 - arch/mips/configs/maltaaprp_defconfig | 1 - arch/mips/configs/maltasmvp_defconfig | 1 - arch/mips/configs/maltasmvp_eva_defconfig | 1 - arch/mips/configs/maltaup_defconfig | 1 - arch/mips/configs/mtx1_defconfig | 1 - arch/parisc/configs/generic-32bit_defconfig | 1 - arch/parisc/configs/generic-64bit_defconfig | 1 - arch/powerpc/configs/44x/akebono_defconfig | 1 - arch/powerpc/configs/g5_defconfig | 2 - arch/powerpc/configs/powernv_defconfig | 2 - arch/powerpc/configs/ppc64_defconfig | 2 - arch/powerpc/configs/ppc64e_defconfig | 2 - arch/powerpc/configs/ppc6xx_defconfig | 1 - arch/powerpc/configs/skiroot_defconfig | 2 - arch/s390/configs/debug_defconfig | 1 - arch/s390/configs/defconfig | 1 - drivers/net/ethernet/Kconfig | 1 - drivers/net/ethernet/Makefile | 1 - drivers/net/ethernet/alteon/Kconfig | 47 - drivers/net/ethernet/alteon/Makefile | 6 - drivers/net/ethernet/alteon/acenic.c | 3178 ------------------- drivers/net/ethernet/alteon/acenic.h | 791 ----- 29 files changed, 4057 deletions(-) delete mode 100644 drivers/net/ethernet/alteon/Kconfig delete mode 100644 drivers/net/ethernet/alteon/Makefile delete mode 100644 drivers/net/ethernet/alteon/acenic.c delete mode 100644 drivers/net/ethernet/alteon/acenic.h diff --git a/MAINTAINERS b/MAINTAINERS index c583c5478ef6..cb48c9ad9ddc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -261,12 +261,6 @@ L: linux-gpio@vger.kernel.org S: Maintained F: drivers/gpio/gpio-pcie-idio-24.c -ACENIC DRIVER -M: Jes Sorensen -L: linux-acenic@sunsite.dk -S: Maintained -F: drivers/net/ethernet/alteon/acenic* - ACER ASPIRE ONE TEMPERATURE AND FAN DRIVER M: Peter Kaestle L: platform-driver-x86@vger.kernel.org diff --git a/arch/loongarch/configs/loongson32_defconfig b/arch/loongarch/configs/loongson32_defconfig index 276b1577e0be..37e5eada5a67 100644 --- a/arch/loongarch/configs/loongson32_defconfig +++ b/arch/loongarch/configs/loongson32_defconfig @@ -571,7 +571,6 @@ CONFIG_VIRTIO_NET=m # CONFIG_NET_VENDOR_ADAPTEC is not set # CONFIG_NET_VENDOR_AGERE is not set # CONFIG_NET_VENDOR_ALACRITECH is not set -# CONFIG_NET_VENDOR_ALTEON is not set # CONFIG_NET_VENDOR_AMAZON is not set # CONFIG_NET_VENDOR_AMD is not set # CONFIG_NET_VENDOR_AQUANTIA is not set diff --git a/arch/loongarch/configs/loongson64_defconfig b/arch/loongarch/configs/loongson64_defconfig index a14db1a95e7e..7648f6c73929 100644 --- a/arch/loongarch/configs/loongson64_defconfig +++ b/arch/loongarch/configs/loongson64_defconfig @@ -587,7 +587,6 @@ CONFIG_VIRTIO_NET=m # CONFIG_NET_VENDOR_ADAPTEC is not set # CONFIG_NET_VENDOR_AGERE is not set # CONFIG_NET_VENDOR_ALACRITECH is not set -# CONFIG_NET_VENDOR_ALTEON is not set # CONFIG_NET_VENDOR_AMAZON is not set # CONFIG_NET_VENDOR_AMD is not set # CONFIG_NET_VENDOR_AQUANTIA is not set diff --git a/arch/mips/configs/cavium_octeon_defconfig b/arch/mips/configs/cavium_octeon_defconfig index 68c363366bce..64da32b570aa 100644 --- a/arch/mips/configs/cavium_octeon_defconfig +++ b/arch/mips/configs/cavium_octeon_defconfig @@ -60,7 +60,6 @@ CONFIG_PATA_OCTEON_CF=y CONFIG_NETDEVICES=y # CONFIG_NET_VENDOR_3COM is not set # CONFIG_NET_VENDOR_ADAPTEC is not set -# CONFIG_NET_VENDOR_ALTEON is not set # CONFIG_NET_VENDOR_AMD is not set # CONFIG_NET_VENDOR_ATHEROS is not set # CONFIG_NET_VENDOR_BROADCOM is not set diff --git a/arch/mips/configs/loongson2k_defconfig b/arch/mips/configs/loongson2k_defconfig index a5c50b63d478..ca534a6b66de 100644 --- a/arch/mips/configs/loongson2k_defconfig +++ b/arch/mips/configs/loongson2k_defconfig @@ -137,7 +137,6 @@ CONFIG_NETDEVICES=y CONFIG_TUN=m # CONFIG_NET_VENDOR_3COM is not set # CONFIG_NET_VENDOR_ADAPTEC is not set -# CONFIG_NET_VENDOR_ALTEON is not set # CONFIG_NET_VENDOR_AMD is not set # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_VENDOR_ATHEROS is not set diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig index 575aaf242361..12cb1a6a1360 100644 --- a/arch/mips/configs/loongson3_defconfig +++ b/arch/mips/configs/loongson3_defconfig @@ -184,7 +184,6 @@ CONFIG_VETH=m CONFIG_VIRTIO_NET=m # CONFIG_NET_VENDOR_3COM is not set # CONFIG_NET_VENDOR_ADAPTEC is not set -# CONFIG_NET_VENDOR_ALTEON is not set # CONFIG_NET_VENDOR_AMD is not set # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_VENDOR_ATHEROS is not set diff --git a/arch/mips/configs/malta_qemu_32r6_defconfig b/arch/mips/configs/malta_qemu_32r6_defconfig index accb471a1d93..dafc9a716e85 100644 --- a/arch/mips/configs/malta_qemu_32r6_defconfig +++ b/arch/mips/configs/malta_qemu_32r6_defconfig @@ -85,7 +85,6 @@ CONFIG_ATA_GENERIC=y CONFIG_NETDEVICES=y # CONFIG_NET_VENDOR_3COM is not set # CONFIG_NET_VENDOR_ADAPTEC is not set -# CONFIG_NET_VENDOR_ALTEON is not set CONFIG_PCNET32=y # CONFIG_NET_VENDOR_ATHEROS is not set # CONFIG_NET_VENDOR_BROADCOM is not set diff --git a/arch/mips/configs/maltaaprp_defconfig b/arch/mips/configs/maltaaprp_defconfig index 6bda67c5f68f..7361bca6a405 100644 --- a/arch/mips/configs/maltaaprp_defconfig +++ b/arch/mips/configs/maltaaprp_defconfig @@ -87,7 +87,6 @@ CONFIG_ATA_GENERIC=y CONFIG_NETDEVICES=y # CONFIG_NET_VENDOR_3COM is not set # CONFIG_NET_VENDOR_ADAPTEC is not set -# CONFIG_NET_VENDOR_ALTEON is not set CONFIG_PCNET32=y # CONFIG_NET_VENDOR_ATHEROS is not set # CONFIG_NET_VENDOR_BROADCOM is not set diff --git a/arch/mips/configs/maltasmvp_defconfig b/arch/mips/configs/maltasmvp_defconfig index e4082537f80f..c848a1bfca5c 100644 --- a/arch/mips/configs/maltasmvp_defconfig +++ b/arch/mips/configs/maltasmvp_defconfig @@ -86,7 +86,6 @@ CONFIG_ATA_PIIX=y CONFIG_NETDEVICES=y # CONFIG_NET_VENDOR_3COM is not set # CONFIG_NET_VENDOR_ADAPTEC is not set -# CONFIG_NET_VENDOR_ALTEON is not set CONFIG_PCNET32=y # CONFIG_NET_VENDOR_ATHEROS is not set # CONFIG_NET_VENDOR_BROADCOM is not set diff --git a/arch/mips/configs/maltasmvp_eva_defconfig b/arch/mips/configs/maltasmvp_eva_defconfig index 58f5af45fa98..905248e01b95 100644 --- a/arch/mips/configs/maltasmvp_eva_defconfig +++ b/arch/mips/configs/maltasmvp_eva_defconfig @@ -89,7 +89,6 @@ CONFIG_ATA_GENERIC=y CONFIG_NETDEVICES=y # CONFIG_NET_VENDOR_3COM is not set # CONFIG_NET_VENDOR_ADAPTEC is not set -# CONFIG_NET_VENDOR_ALTEON is not set CONFIG_PCNET32=y # CONFIG_NET_VENDOR_ATHEROS is not set # CONFIG_NET_VENDOR_BROADCOM is not set diff --git a/arch/mips/configs/maltaup_defconfig b/arch/mips/configs/maltaup_defconfig index 9bfef7de0d1c..b9bbe02f3595 100644 --- a/arch/mips/configs/maltaup_defconfig +++ b/arch/mips/configs/maltaup_defconfig @@ -86,7 +86,6 @@ CONFIG_ATA_GENERIC=y CONFIG_NETDEVICES=y # CONFIG_NET_VENDOR_3COM is not set # CONFIG_NET_VENDOR_ADAPTEC is not set -# CONFIG_NET_VENDOR_ALTEON is not set CONFIG_PCNET32=y # CONFIG_NET_VENDOR_ATHEROS is not set # CONFIG_NET_VENDOR_BROADCOM is not set diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index 77050ae3945f..2428a6a72747 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -257,7 +257,6 @@ CONFIG_PCMCIA_3C589=m CONFIG_VORTEX=m CONFIG_TYPHOON=m CONFIG_ADAPTEC_STARFIRE=m -CONFIG_ACENIC=m CONFIG_AMD8111_ETH=m CONFIG_PCNET32=m CONFIG_PCMCIA_NMCLAN=m diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index 5444ce6405f3..ad2fb69184f3 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -78,7 +78,6 @@ CONFIG_DUMMY=m CONFIG_TUN=m # CONFIG_NET_VENDOR_3COM is not set # CONFIG_NET_VENDOR_ADAPTEC is not set -# CONFIG_NET_VENDOR_ALTEON is not set # CONFIG_NET_VENDOR_AMD is not set # CONFIG_NET_VENDOR_ATHEROS is not set # CONFIG_NET_VENDOR_BROADCOM is not set diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig index ce91f9d1fdbf..b21287a9250c 100644 --- a/arch/parisc/configs/generic-64bit_defconfig +++ b/arch/parisc/configs/generic-64bit_defconfig @@ -97,7 +97,6 @@ CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_TUN=y # CONFIG_NET_VENDOR_3COM is not set # CONFIG_NET_VENDOR_ADAPTEC is not set -# CONFIG_NET_VENDOR_ALTEON is not set # CONFIG_NET_VENDOR_AMD is not set # CONFIG_NET_VENDOR_ATHEROS is not set # CONFIG_NET_VENDOR_BROADCOM is not set diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig index 02e88648a2e6..11ad5ed3cc90 100644 --- a/arch/powerpc/configs/44x/akebono_defconfig +++ b/arch/powerpc/configs/44x/akebono_defconfig @@ -47,7 +47,6 @@ CONFIG_SATA_AHCI_PLATFORM=y # CONFIG_ATA_SFF is not set # CONFIG_NET_VENDOR_3COM is not set # CONFIG_NET_VENDOR_ADAPTEC is not set -# CONFIG_NET_VENDOR_ALTEON is not set # CONFIG_NET_VENDOR_AMD is not set # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_VENDOR_ATHEROS is not set diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 428f17b45513..4247d9a30eba 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -92,8 +92,6 @@ CONFIG_NETDEVICES=y CONFIG_BONDING=m CONFIG_DUMMY=m CONFIG_TUN=m -CONFIG_ACENIC=m -CONFIG_ACENIC_OMIT_TIGON_I=y CONFIG_TIGON3=y CONFIG_E1000=y CONFIG_SUNGEM=y diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 9ac746cfb4be..ce72c6d22ca2 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -158,8 +158,6 @@ CONFIG_NETCONSOLE=m CONFIG_TUN=m CONFIG_VETH=m CONFIG_VORTEX=m -CONFIG_ACENIC=m -CONFIG_ACENIC_OMIT_TIGON_I=y CONFIG_PCNET32=m CONFIG_TIGON3=y CONFIG_BNX2X=m diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 2b0720f2753b..7603af319385 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -204,8 +204,6 @@ CONFIG_NETCONSOLE=y CONFIG_TUN=m CONFIG_VIRTIO_NET=m CONFIG_VORTEX=m -CONFIG_ACENIC=m -CONFIG_ACENIC_OMIT_TIGON_I=y CONFIG_PCNET32=m CONFIG_TIGON3=y CONFIG_BNX2X=m diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index 90247b2a0ab0..e44c65693b51 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -96,8 +96,6 @@ CONFIG_DUMMY=m CONFIG_NETCONSOLE=y CONFIG_TUN=m CONFIG_VORTEX=y -CONFIG_ACENIC=y -CONFIG_ACENIC_OMIT_TIGON_I=y CONFIG_PCNET32=y CONFIG_TIGON3=y CONFIG_E100=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index 3c08f46f3d41..b70307b70af0 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -410,7 +410,6 @@ CONFIG_PCMCIA_3C589=m CONFIG_VORTEX=m CONFIG_TYPHOON=m CONFIG_ADAPTEC_STARFIRE=m -CONFIG_ACENIC=m CONFIG_AMD8111_ETH=m CONFIG_PCNET32=m CONFIG_PCMCIA_NMCLAN=m diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 86c74146824a..ff1bed4b6d2c 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -125,8 +125,6 @@ CONFIG_DM_MULTIPATH=m # CONFIG_NET_VENDOR_ADAPTEC is not set # CONFIG_NET_VENDOR_AGERE is not set # CONFIG_NET_VENDOR_ALACRITECH is not set -CONFIG_ACENIC=m -CONFIG_ACENIC_OMIT_TIGON_I=y # CONFIG_NET_VENDOR_AMAZON is not set # CONFIG_NET_VENDOR_AMD is not set # CONFIG_NET_VENDOR_AQUANTIA is not set diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 98fd0a2f51c6..2a94abcabd72 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -524,7 +524,6 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_ADAPTEC is not set # CONFIG_NET_VENDOR_AGERE is not set # CONFIG_NET_VENDOR_ALACRITECH is not set -# CONFIG_NET_VENDOR_ALTEON is not set # CONFIG_NET_VENDOR_AMAZON is not set # CONFIG_NET_VENDOR_AMD is not set # CONFIG_NET_VENDOR_AQUANTIA is not set diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 0f4cedcab3ce..763fc7db101f 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -514,7 +514,6 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_ADAPTEC is not set # CONFIG_NET_VENDOR_AGERE is not set # CONFIG_NET_VENDOR_ALACRITECH is not set -# CONFIG_NET_VENDOR_ALTEON is not set # CONFIG_NET_VENDOR_AMAZON is not set # CONFIG_NET_VENDOR_AMD is not set # CONFIG_NET_VENDOR_AQUANTIA is not set diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index aa7103e7f47f..bdc29d143160 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -23,7 +23,6 @@ source "drivers/net/ethernet/agere/Kconfig" source "drivers/net/ethernet/airoha/Kconfig" source "drivers/net/ethernet/alacritech/Kconfig" source "drivers/net/ethernet/allwinner/Kconfig" -source "drivers/net/ethernet/alteon/Kconfig" source "drivers/net/ethernet/altera/Kconfig" source "drivers/net/ethernet/amazon/Kconfig" source "drivers/net/ethernet/amd/Kconfig" diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 6615a67a63d5..6bffb60ba644 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -13,7 +13,6 @@ obj-$(CONFIG_NET_VENDOR_AGERE) += agere/ obj-$(CONFIG_NET_VENDOR_AIROHA) += airoha/ obj-$(CONFIG_NET_VENDOR_ALACRITECH) += alacritech/ obj-$(CONFIG_NET_VENDOR_ALLWINNER) += allwinner/ -obj-$(CONFIG_NET_VENDOR_ALTEON) += alteon/ obj-$(CONFIG_ALTERA_TSE) += altera/ obj-$(CONFIG_NET_VENDOR_AMAZON) += amazon/ obj-$(CONFIG_NET_VENDOR_AMD) += amd/ diff --git a/drivers/net/ethernet/alteon/Kconfig b/drivers/net/ethernet/alteon/Kconfig deleted file mode 100644 index cfe1f3159d61..000000000000 --- a/drivers/net/ethernet/alteon/Kconfig +++ /dev/null @@ -1,47 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Alteon network device configuration -# - -config NET_VENDOR_ALTEON - bool "Alteon devices" - default y - depends on PCI - help - If you have a network (Ethernet) card belonging to this class, say Y. - - Note that the answer to this question doesn't directly affect the - kernel: saying N will just cause the configurator to skip all - the questions about Alteon cards. If you say Y, you will be asked for - your specific card in the following questions. - -if NET_VENDOR_ALTEON - -config ACENIC - tristate "Alteon AceNIC/3Com 3C985/NetGear GA620 Gigabit support" - depends on PCI - help - Say Y here if you have an Alteon AceNIC, 3Com 3C985(B), NetGear - GA620, SGI Gigabit or Farallon PN9000-SX PCI Gigabit Ethernet - adapter. The driver allows for using the Jumbo Frame option (9000 - bytes/frame) however it requires that your switches can handle this - as well. To enable Jumbo Frames, add `mtu 9000' to your ifconfig - line. - - To compile this driver as a module, choose M here: the - module will be called acenic. - -config ACENIC_OMIT_TIGON_I - bool "Omit support for old Tigon I based AceNICs" - depends on ACENIC - help - Say Y here if you only have Tigon II based AceNICs and want to leave - out support for the older Tigon I based cards which are no longer - being sold (ie. the original Alteon AceNIC and 3Com 3C985 (non B - version)). This will reduce the size of the driver object by - app. 100KB. If you are not sure whether your card is a Tigon I or a - Tigon II, say N here. - - The safe and default value for this is N. - -endif # NET_VENDOR_ALTEON diff --git a/drivers/net/ethernet/alteon/Makefile b/drivers/net/ethernet/alteon/Makefile deleted file mode 100644 index be5225559b6d..000000000000 --- a/drivers/net/ethernet/alteon/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Makefile for the Alteon network device drivers. -# - -obj-$(CONFIG_ACENIC) += acenic.o diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c deleted file mode 100644 index 455ee8930824..000000000000 --- a/drivers/net/ethernet/alteon/acenic.c +++ /dev/null @@ -1,3178 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * acenic.c: Linux driver for the Alteon AceNIC Gigabit Ethernet card - * and other Tigon based cards. - * - * Copyright 1998-2002 by Jes Sorensen, . - * - * Thanks to Alteon and 3Com for providing hardware and documentation - * enabling me to write this driver. - * - * A mailing list for discussing the use of this driver has been - * setup, please subscribe to the lists if you have any questions - * about the driver. Send mail to linux-acenic-help@sunsite.auc.dk to - * see how to subscribe. - * - * Additional credits: - * Pete Wyckoff : Initial Linux/Alpha and trace - * dump support. The trace dump support has not been - * integrated yet however. - * Troy Benjegerdes: Big Endian (PPC) patches. - * Nate Stahl: Better out of memory handling and stats support. - * Aman Singla: Nasty race between interrupt handler and tx code dealing - * with 'testing the tx_ret_csm and setting tx_full' - * David S. Miller : conversion to new PCI dma mapping - * infrastructure and Sparc support - * Pierrick Pinasseau (CERN): For lending me an Ultra 5 to test the - * driver under Linux/Sparc64 - * Matt Domsch : Detect Alteon 1000baseT cards - * ETHTOOL_GDRVINFO support - * Chip Salzenberg : Fix race condition between tx - * handler and close() cleanup. - * Ken Aaker : Correct check for whether - * memory mapped IO is enabled to - * make the driver work on RS/6000. - * Takayoshi Kouchi : Identifying problem - * where the driver would disable - * bus master mode if it had to disable - * write and invalidate. - * Stephen Hack : Fixed ace_set_mac_addr for little - * endian systems. - * Val Henson : Reset Jumbo skb producer and - * rx producer index when - * flushing the Jumbo ring. - * Hans Grobler : Memory leak fixes in the - * driver init path. - * Grant Grundler : PCI write posting fixes. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef SIOCETHTOOL -#include -#endif - -#include -#include - -#include -#include -#include -#include - - -#define DRV_NAME "acenic" - -#undef INDEX_DEBUG - -#ifdef CONFIG_ACENIC_OMIT_TIGON_I -#define ACE_IS_TIGON_I(ap) 0 -#define ACE_TX_RING_ENTRIES(ap) MAX_TX_RING_ENTRIES -#else -#define ACE_IS_TIGON_I(ap) (ap->version == 1) -#define ACE_TX_RING_ENTRIES(ap) ap->tx_ring_entries -#endif - -#ifndef PCI_VENDOR_ID_ALTEON -#define PCI_VENDOR_ID_ALTEON 0x12ae -#endif -#ifndef PCI_DEVICE_ID_ALTEON_ACENIC_FIBRE -#define PCI_DEVICE_ID_ALTEON_ACENIC_FIBRE 0x0001 -#define PCI_DEVICE_ID_ALTEON_ACENIC_COPPER 0x0002 -#endif -#ifndef PCI_DEVICE_ID_3COM_3C985 -#define PCI_DEVICE_ID_3COM_3C985 0x0001 -#endif -#ifndef PCI_VENDOR_ID_NETGEAR -#define PCI_VENDOR_ID_NETGEAR 0x1385 -#define PCI_DEVICE_ID_NETGEAR_GA620 0x620a -#endif -#ifndef PCI_DEVICE_ID_NETGEAR_GA620T -#define PCI_DEVICE_ID_NETGEAR_GA620T 0x630a -#endif - - -/* - * Farallon used the DEC vendor ID by mistake and they seem not - * to care - stinky! - */ -#ifndef PCI_DEVICE_ID_FARALLON_PN9000SX -#define PCI_DEVICE_ID_FARALLON_PN9000SX 0x1a -#endif -#ifndef PCI_DEVICE_ID_FARALLON_PN9100T -#define PCI_DEVICE_ID_FARALLON_PN9100T 0xfa -#endif -#ifndef PCI_VENDOR_ID_SGI -#define PCI_VENDOR_ID_SGI 0x10a9 -#endif -#ifndef PCI_DEVICE_ID_SGI_ACENIC -#define PCI_DEVICE_ID_SGI_ACENIC 0x0009 -#endif - -static const struct pci_device_id acenic_pci_tbl[] = { - { PCI_VENDOR_ID_ALTEON, PCI_DEVICE_ID_ALTEON_ACENIC_FIBRE, - PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, }, - { PCI_VENDOR_ID_ALTEON, PCI_DEVICE_ID_ALTEON_ACENIC_COPPER, - PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, }, - { PCI_VENDOR_ID_3COM, PCI_DEVICE_ID_3COM_3C985, - PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, }, - { PCI_VENDOR_ID_NETGEAR, PCI_DEVICE_ID_NETGEAR_GA620, - PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, }, - { PCI_VENDOR_ID_NETGEAR, PCI_DEVICE_ID_NETGEAR_GA620T, - PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, }, - /* - * Farallon used the DEC vendor ID on their cards incorrectly, - * then later Alteon's ID. - */ - { PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_FARALLON_PN9000SX, - PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, }, - { PCI_VENDOR_ID_ALTEON, PCI_DEVICE_ID_FARALLON_PN9100T, - PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, }, - { PCI_VENDOR_ID_SGI, PCI_DEVICE_ID_SGI_ACENIC, - PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, }, - { } -}; -MODULE_DEVICE_TABLE(pci, acenic_pci_tbl); - -#define ace_sync_irq(irq) synchronize_irq(irq) - -#ifndef offset_in_page -#define offset_in_page(ptr) ((unsigned long)(ptr) & ~PAGE_MASK) -#endif - -#define ACE_MAX_MOD_PARMS 8 -#define BOARD_IDX_STATIC 0 -#define BOARD_IDX_OVERFLOW -1 - -#include "acenic.h" - -/* - * These must be defined before the firmware is included. - */ -#define MAX_TEXT_LEN 96*1024 -#define MAX_RODATA_LEN 8*1024 -#define MAX_DATA_LEN 2*1024 - -#ifndef tigon2FwReleaseLocal -#define tigon2FwReleaseLocal 0 -#endif - -/* - * This driver currently supports Tigon I and Tigon II based cards - * including the Alteon AceNIC, the 3Com 3C985[B] and NetGear - * GA620. The driver should also work on the SGI, DEC and Farallon - * versions of the card, however I have not been able to test that - * myself. - * - * This card is really neat, it supports receive hardware checksumming - * and jumbo frames (up to 9000 bytes) and does a lot of work in the - * firmware. Also the programming interface is quite neat, except for - * the parts dealing with the i2c eeprom on the card ;-) - * - * Using jumbo frames: - * - * To enable jumbo frames, simply specify an mtu between 1500 and 9000 - * bytes to ifconfig. Jumbo frames can be enabled or disabled at any time - * by running `ifconfig eth mtu ' with being the Ethernet - * interface number and being the MTU value. - * - * Module parameters: - * - * When compiled as a loadable module, the driver allows for a number - * of module parameters to be specified. The driver supports the - * following module parameters: - * - * trace= - Firmware trace level. This requires special traced - * firmware to replace the firmware supplied with - * the driver - for debugging purposes only. - * - * link= - Link state. Normally you want to use the default link - * parameters set by the driver. This can be used to - * override these in case your switch doesn't negotiate - * the link properly. Valid values are: - * 0x0001 - Force half duplex link. - * 0x0002 - Do not negotiate line speed with the other end. - * 0x0010 - 10Mbit/sec link. - * 0x0020 - 100Mbit/sec link. - * 0x0040 - 1000Mbit/sec link. - * 0x0100 - Do not negotiate flow control. - * 0x0200 - Enable RX flow control Y - * 0x0400 - Enable TX flow control Y (Tigon II NICs only). - * Default value is 0x0270, ie. enable link+flow - * control negotiation. Negotiating the highest - * possible link speed with RX flow control enabled. - * - * When disabling link speed negotiation, only one link - * speed is allowed to be specified! - * - * tx_coal_tick= - number of coalescing clock ticks (us) allowed - * to wait for more packets to arive before - * interrupting the host, from the time the first - * packet arrives. - * - * rx_coal_tick= - number of coalescing clock ticks (us) allowed - * to wait for more packets to arive in the transmit ring, - * before interrupting the host, after transmitting the - * first packet in the ring. - * - * max_tx_desc= - maximum number of transmit descriptors - * (packets) transmitted before interrupting the host. - * - * max_rx_desc= - maximum number of receive descriptors - * (packets) received before interrupting the host. - * - * tx_ratio= - 7 bit value (0 - 63) specifying the split in 64th - * increments of the NIC's on board memory to be used for - * transmit and receive buffers. For the 1MB NIC app. 800KB - * is available, on the 1/2MB NIC app. 300KB is available. - * 68KB will always be available as a minimum for both - * directions. The default value is a 50/50 split. - * dis_pci_mem_inval= - disable PCI memory write and invalidate - * operations, default (1) is to always disable this as - * that is what Alteon does on NT. I have not been able - * to measure any real performance differences with - * this on my systems. Set =0 if you want to - * enable these operations. - * - * If you use more than one NIC, specify the parameters for the - * individual NICs with a comma, ie. trace=0,0x00001fff,0 you want to - * run tracing on NIC #2 but not on NIC #1 and #3. - * - * TODO: - * - * - Proper multicast support. - * - NIC dump support. - * - More tuning parameters. - * - * The mini ring is not used under Linux and I am not sure it makes sense - * to actually use it. - * - * New interrupt handler strategy: - * - * The old interrupt handler worked using the traditional method of - * replacing an skbuff with a new one when a packet arrives. However - * the rx rings do not need to contain a static number of buffer - * descriptors, thus it makes sense to move the memory allocation out - * of the main interrupt handler and do it in a bottom half handler - * and only allocate new buffers when the number of buffers in the - * ring is below a certain threshold. In order to avoid starving the - * NIC under heavy load it is however necessary to force allocation - * when hitting a minimum threshold. The strategy for alloction is as - * follows: - * - * RX_LOW_BUF_THRES - allocate buffers in the bottom half - * RX_PANIC_LOW_THRES - we are very low on buffers, allocate - * the buffers in the interrupt handler - * RX_RING_THRES - maximum number of buffers in the rx ring - * RX_MINI_THRES - maximum number of buffers in the mini ring - * RX_JUMBO_THRES - maximum number of buffers in the jumbo ring - * - * One advantagous side effect of this allocation approach is that the - * entire rx processing can be done without holding any spin lock - * since the rx rings and registers are totally independent of the tx - * ring and its registers. This of course includes the kmalloc's of - * new skb's. Thus start_xmit can run in parallel with rx processing - * and the memory allocation on SMP systems. - * - * Note that running the skb reallocation in a bottom half opens up - * another can of races which needs to be handled properly. In - * particular it can happen that the interrupt handler tries to run - * the reallocation while the bottom half is either running on another - * CPU or was interrupted on the same CPU. To get around this the - * driver uses bitops to prevent the reallocation routines from being - * reentered. - * - * TX handling can also be done without holding any spin lock, wheee - * this is fun! since tx_ret_csm is only written to by the interrupt - * handler. The case to be aware of is when shutting down the device - * and cleaning up where it is necessary to make sure that - * start_xmit() is not running while this is happening. Well DaveM - * informs me that this case is already protected against ... bye bye - * Mr. Spin Lock, it was nice to know you. - * - * TX interrupts are now partly disabled so the NIC will only generate - * TX interrupts for the number of coal ticks, not for the number of - * TX packets in the queue. This should reduce the number of TX only, - * ie. when no RX processing is done, interrupts seen. - */ - -/* - * Threshold values for RX buffer allocation - the low water marks for - * when to start refilling the rings are set to 75% of the ring - * sizes. It seems to make sense to refill the rings entirely from the - * intrrupt handler once it gets below the panic threshold, that way - * we don't risk that the refilling is moved to another CPU when the - * one running the interrupt handler just got the slab code hot in its - * cache. - */ -#define RX_RING_SIZE 72 -#define RX_MINI_SIZE 64 -#define RX_JUMBO_SIZE 48 - -#define RX_PANIC_STD_THRES 16 -#define RX_PANIC_STD_REFILL (3*RX_PANIC_STD_THRES)/2 -#define RX_LOW_STD_THRES (3*RX_RING_SIZE)/4 -#define RX_PANIC_MINI_THRES 12 -#define RX_PANIC_MINI_REFILL (3*RX_PANIC_MINI_THRES)/2 -#define RX_LOW_MINI_THRES (3*RX_MINI_SIZE)/4 -#define RX_PANIC_JUMBO_THRES 6 -#define RX_PANIC_JUMBO_REFILL (3*RX_PANIC_JUMBO_THRES)/2 -#define RX_LOW_JUMBO_THRES (3*RX_JUMBO_SIZE)/4 - - -/* - * Size of the mini ring entries, basically these just should be big - * enough to take TCP ACKs - */ -#define ACE_MINI_SIZE 100 - -#define ACE_MINI_BUFSIZE ACE_MINI_SIZE -#define ACE_STD_BUFSIZE (ACE_STD_MTU + ETH_HLEN + 4) -#define ACE_JUMBO_BUFSIZE (ACE_JUMBO_MTU + ETH_HLEN + 4) - -/* - * There seems to be a magic difference in the effect between 995 and 996 - * but little difference between 900 and 995 ... no idea why. - * - * There is now a default set of tuning parameters which is set, depending - * on whether or not the user enables Jumbo frames. It's assumed that if - * Jumbo frames are enabled, the user wants optimal tuning for that case. - */ -#define DEF_TX_COAL 400 /* 996 */ -#define DEF_TX_MAX_DESC 60 /* was 40 */ -#define DEF_RX_COAL 120 /* 1000 */ -#define DEF_RX_MAX_DESC 25 -#define DEF_TX_RATIO 21 /* 24 */ - -#define DEF_JUMBO_TX_COAL 20 -#define DEF_JUMBO_TX_MAX_DESC 60 -#define DEF_JUMBO_RX_COAL 30 -#define DEF_JUMBO_RX_MAX_DESC 6 -#define DEF_JUMBO_TX_RATIO 21 - -#if tigon2FwReleaseLocal < 20001118 -/* - * Standard firmware and early modifications duplicate - * IRQ load without this flag (coal timer is never reset). - * Note that with this flag tx_coal should be less than - * time to xmit full tx ring. - * 400usec is not so bad for tx ring size of 128. - */ -#define TX_COAL_INTS_ONLY 1 /* worth it */ -#else -/* - * With modified firmware, this is not necessary, but still useful. - */ -#define TX_COAL_INTS_ONLY 1 -#endif - -#define DEF_TRACE 0 -#define DEF_STAT (2 * TICKS_PER_SEC) - - -static int link_state[ACE_MAX_MOD_PARMS]; -static int trace[ACE_MAX_MOD_PARMS]; -static int tx_coal_tick[ACE_MAX_MOD_PARMS]; -static int rx_coal_tick[ACE_MAX_MOD_PARMS]; -static int max_tx_desc[ACE_MAX_MOD_PARMS]; -static int max_rx_desc[ACE_MAX_MOD_PARMS]; -static int tx_ratio[ACE_MAX_MOD_PARMS]; -static int dis_pci_mem_inval[ACE_MAX_MOD_PARMS] = {1, 1, 1, 1, 1, 1, 1, 1}; - -MODULE_AUTHOR("Jes Sorensen "); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("AceNIC/3C985/GA620 Gigabit Ethernet driver"); -#ifndef CONFIG_ACENIC_OMIT_TIGON_I -MODULE_FIRMWARE("acenic/tg1.bin"); -#endif -MODULE_FIRMWARE("acenic/tg2.bin"); - -module_param_array_named(link, link_state, int, NULL, 0); -module_param_array(trace, int, NULL, 0); -module_param_array(tx_coal_tick, int, NULL, 0); -module_param_array(max_tx_desc, int, NULL, 0); -module_param_array(rx_coal_tick, int, NULL, 0); -module_param_array(max_rx_desc, int, NULL, 0); -module_param_array(tx_ratio, int, NULL, 0); -MODULE_PARM_DESC(link, "AceNIC/3C985/NetGear link state"); -MODULE_PARM_DESC(trace, "AceNIC/3C985/NetGear firmware trace level"); -MODULE_PARM_DESC(tx_coal_tick, "AceNIC/3C985/GA620 max clock ticks to wait from first tx descriptor arrives"); -MODULE_PARM_DESC(max_tx_desc, "AceNIC/3C985/GA620 max number of transmit descriptors to wait"); -MODULE_PARM_DESC(rx_coal_tick, "AceNIC/3C985/GA620 max clock ticks to wait from first rx descriptor arrives"); -MODULE_PARM_DESC(max_rx_desc, "AceNIC/3C985/GA620 max number of receive descriptors to wait"); -MODULE_PARM_DESC(tx_ratio, "AceNIC/3C985/GA620 ratio of NIC memory used for TX/RX descriptors (range 0-63)"); - - -static const char version[] = - "acenic.c: v0.92 08/05/2002 Jes Sorensen, linux-acenic@SunSITE.dk\n" - " http://home.cern.ch/~jes/gige/acenic.html\n"; - -static int ace_get_link_ksettings(struct net_device *, - struct ethtool_link_ksettings *); -static int ace_set_link_ksettings(struct net_device *, - const struct ethtool_link_ksettings *); -static void ace_get_drvinfo(struct net_device *, struct ethtool_drvinfo *); - -static const struct ethtool_ops ace_ethtool_ops = { - .get_drvinfo = ace_get_drvinfo, - .get_link_ksettings = ace_get_link_ksettings, - .set_link_ksettings = ace_set_link_ksettings, -}; - -static void ace_watchdog(struct net_device *dev, unsigned int txqueue); - -static const struct net_device_ops ace_netdev_ops = { - .ndo_open = ace_open, - .ndo_stop = ace_close, - .ndo_tx_timeout = ace_watchdog, - .ndo_get_stats = ace_get_stats, - .ndo_start_xmit = ace_start_xmit, - .ndo_set_rx_mode = ace_set_multicast_list, - .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = ace_set_mac_addr, - .ndo_change_mtu = ace_change_mtu, -}; - -static int acenic_probe_one(struct pci_dev *pdev, - const struct pci_device_id *id) -{ - struct net_device *dev; - struct ace_private *ap; - static int boards_found; - - dev = alloc_etherdev(sizeof(struct ace_private)); - if (dev == NULL) - return -ENOMEM; - - SET_NETDEV_DEV(dev, &pdev->dev); - - ap = netdev_priv(dev); - ap->ndev = dev; - ap->pdev = pdev; - ap->name = pci_name(pdev); - - dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; - dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; - - dev->watchdog_timeo = 5*HZ; - dev->min_mtu = 0; - dev->max_mtu = ACE_JUMBO_MTU; - - dev->netdev_ops = &ace_netdev_ops; - dev->ethtool_ops = &ace_ethtool_ops; - - /* we only display this string ONCE */ - if (!boards_found) - printk(version); - - if (pci_enable_device(pdev)) - goto fail_free_netdev; - - /* - * Enable master mode before we start playing with the - * pci_command word since pci_set_master() will modify - * it. - */ - pci_set_master(pdev); - - pci_read_config_word(pdev, PCI_COMMAND, &ap->pci_command); - - /* OpenFirmware on Mac's does not set this - DOH.. */ - if (!(ap->pci_command & PCI_COMMAND_MEMORY)) { - printk(KERN_INFO "%s: Enabling PCI Memory Mapped " - "access - was not enabled by BIOS/Firmware\n", - ap->name); - ap->pci_command = ap->pci_command | PCI_COMMAND_MEMORY; - pci_write_config_word(ap->pdev, PCI_COMMAND, - ap->pci_command); - wmb(); - } - - pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &ap->pci_latency); - if (ap->pci_latency <= 0x40) { - ap->pci_latency = 0x40; - pci_write_config_byte(pdev, PCI_LATENCY_TIMER, ap->pci_latency); - } - - /* - * Remap the regs into kernel space - this is abuse of - * dev->base_addr since it was means for I/O port - * addresses but who gives a damn. - */ - dev->base_addr = pci_resource_start(pdev, 0); - ap->regs = ioremap(dev->base_addr, 0x4000); - if (!ap->regs) { - printk(KERN_ERR "%s: Unable to map I/O register, " - "AceNIC %i will be disabled.\n", - ap->name, boards_found); - goto fail_free_netdev; - } - - switch(pdev->vendor) { - case PCI_VENDOR_ID_ALTEON: - if (pdev->device == PCI_DEVICE_ID_FARALLON_PN9100T) { - printk(KERN_INFO "%s: Farallon PN9100-T ", - ap->name); - } else { - printk(KERN_INFO "%s: Alteon AceNIC ", - ap->name); - } - break; - case PCI_VENDOR_ID_3COM: - printk(KERN_INFO "%s: 3Com 3C985 ", ap->name); - break; - case PCI_VENDOR_ID_NETGEAR: - printk(KERN_INFO "%s: NetGear GA620 ", ap->name); - break; - case PCI_VENDOR_ID_DEC: - if (pdev->device == PCI_DEVICE_ID_FARALLON_PN9000SX) { - printk(KERN_INFO "%s: Farallon PN9000-SX ", - ap->name); - break; - } - fallthrough; - case PCI_VENDOR_ID_SGI: - printk(KERN_INFO "%s: SGI AceNIC ", ap->name); - break; - default: - printk(KERN_INFO "%s: Unknown AceNIC ", ap->name); - break; - } - - printk("Gigabit Ethernet at 0x%08lx, ", dev->base_addr); - printk("irq %d\n", pdev->irq); - -#ifdef CONFIG_ACENIC_OMIT_TIGON_I - if ((readl(&ap->regs->HostCtrl) >> 28) == 4) { - printk(KERN_ERR "%s: Driver compiled without Tigon I" - " support - NIC disabled\n", dev->name); - goto fail_uninit; - } -#endif - - if (ace_allocate_descriptors(dev)) - goto fail_free_netdev; - -#ifdef MODULE - if (boards_found >= ACE_MAX_MOD_PARMS) - ap->board_idx = BOARD_IDX_OVERFLOW; - else - ap->board_idx = boards_found; -#else - ap->board_idx = BOARD_IDX_STATIC; -#endif - - if (ace_init(dev)) - goto fail_free_netdev; - - if (register_netdev(dev)) { - printk(KERN_ERR "acenic: device registration failed\n"); - goto fail_uninit; - } - ap->name = dev->name; - - dev->features |= NETIF_F_HIGHDMA; - - pci_set_drvdata(pdev, dev); - - boards_found++; - return 0; - - fail_uninit: - ace_init_cleanup(dev); - fail_free_netdev: - free_netdev(dev); - return -ENODEV; -} - -static void acenic_remove_one(struct pci_dev *pdev) -{ - struct net_device *dev = pci_get_drvdata(pdev); - struct ace_private *ap = netdev_priv(dev); - struct ace_regs __iomem *regs = ap->regs; - short i; - - unregister_netdev(dev); - - writel(readl(®s->CpuCtrl) | CPU_HALT, ®s->CpuCtrl); - if (ap->version >= 2) - writel(readl(®s->CpuBCtrl) | CPU_HALT, ®s->CpuBCtrl); - - /* - * This clears any pending interrupts - */ - writel(1, ®s->Mb0Lo); - readl(®s->CpuCtrl); /* flush */ - - /* - * Make sure no other CPUs are processing interrupts - * on the card before the buffers are being released. - * Otherwise one might experience some `interesting' - * effects. - * - * Then release the RX buffers - jumbo buffers were - * already released in ace_close(). - */ - ace_sync_irq(dev->irq); - - for (i = 0; i < RX_STD_RING_ENTRIES; i++) { - struct sk_buff *skb = ap->skb->rx_std_skbuff[i].skb; - - if (skb) { - struct ring_info *ringp; - dma_addr_t mapping; - - ringp = &ap->skb->rx_std_skbuff[i]; - mapping = dma_unmap_addr(ringp, mapping); - dma_unmap_page(&ap->pdev->dev, mapping, - ACE_STD_BUFSIZE, DMA_FROM_DEVICE); - - ap->rx_std_ring[i].size = 0; - ap->skb->rx_std_skbuff[i].skb = NULL; - dev_kfree_skb(skb); - } - } - - if (ap->version >= 2) { - for (i = 0; i < RX_MINI_RING_ENTRIES; i++) { - struct sk_buff *skb = ap->skb->rx_mini_skbuff[i].skb; - - if (skb) { - struct ring_info *ringp; - dma_addr_t mapping; - - ringp = &ap->skb->rx_mini_skbuff[i]; - mapping = dma_unmap_addr(ringp,mapping); - dma_unmap_page(&ap->pdev->dev, mapping, - ACE_MINI_BUFSIZE, - DMA_FROM_DEVICE); - - ap->rx_mini_ring[i].size = 0; - ap->skb->rx_mini_skbuff[i].skb = NULL; - dev_kfree_skb(skb); - } - } - } - - for (i = 0; i < RX_JUMBO_RING_ENTRIES; i++) { - struct sk_buff *skb = ap->skb->rx_jumbo_skbuff[i].skb; - if (skb) { - struct ring_info *ringp; - dma_addr_t mapping; - - ringp = &ap->skb->rx_jumbo_skbuff[i]; - mapping = dma_unmap_addr(ringp, mapping); - dma_unmap_page(&ap->pdev->dev, mapping, - ACE_JUMBO_BUFSIZE, DMA_FROM_DEVICE); - - ap->rx_jumbo_ring[i].size = 0; - ap->skb->rx_jumbo_skbuff[i].skb = NULL; - dev_kfree_skb(skb); - } - } - - ace_init_cleanup(dev); - free_netdev(dev); -} - -static struct pci_driver acenic_pci_driver = { - .name = "acenic", - .id_table = acenic_pci_tbl, - .probe = acenic_probe_one, - .remove = acenic_remove_one, -}; - -static void ace_free_descriptors(struct net_device *dev) -{ - struct ace_private *ap = netdev_priv(dev); - int size; - - if (ap->rx_std_ring != NULL) { - size = (sizeof(struct rx_desc) * - (RX_STD_RING_ENTRIES + - RX_JUMBO_RING_ENTRIES + - RX_MINI_RING_ENTRIES + - RX_RETURN_RING_ENTRIES)); - dma_free_coherent(&ap->pdev->dev, size, ap->rx_std_ring, - ap->rx_ring_base_dma); - ap->rx_std_ring = NULL; - ap->rx_jumbo_ring = NULL; - ap->rx_mini_ring = NULL; - ap->rx_return_ring = NULL; - } - if (ap->evt_ring != NULL) { - size = (sizeof(struct event) * EVT_RING_ENTRIES); - dma_free_coherent(&ap->pdev->dev, size, ap->evt_ring, - ap->evt_ring_dma); - ap->evt_ring = NULL; - } - if (ap->tx_ring != NULL && !ACE_IS_TIGON_I(ap)) { - size = (sizeof(struct tx_desc) * MAX_TX_RING_ENTRIES); - dma_free_coherent(&ap->pdev->dev, size, ap->tx_ring, - ap->tx_ring_dma); - } - ap->tx_ring = NULL; - - if (ap->evt_prd != NULL) { - dma_free_coherent(&ap->pdev->dev, sizeof(u32), - (void *)ap->evt_prd, ap->evt_prd_dma); - ap->evt_prd = NULL; - } - if (ap->rx_ret_prd != NULL) { - dma_free_coherent(&ap->pdev->dev, sizeof(u32), - (void *)ap->rx_ret_prd, ap->rx_ret_prd_dma); - ap->rx_ret_prd = NULL; - } - if (ap->tx_csm != NULL) { - dma_free_coherent(&ap->pdev->dev, sizeof(u32), - (void *)ap->tx_csm, ap->tx_csm_dma); - ap->tx_csm = NULL; - } -} - - -static int ace_allocate_descriptors(struct net_device *dev) -{ - struct ace_private *ap = netdev_priv(dev); - int size; - - size = (sizeof(struct rx_desc) * - (RX_STD_RING_ENTRIES + - RX_JUMBO_RING_ENTRIES + - RX_MINI_RING_ENTRIES + - RX_RETURN_RING_ENTRIES)); - - ap->rx_std_ring = dma_alloc_coherent(&ap->pdev->dev, size, - &ap->rx_ring_base_dma, GFP_KERNEL); - if (ap->rx_std_ring == NULL) - goto fail; - - ap->rx_jumbo_ring = ap->rx_std_ring + RX_STD_RING_ENTRIES; - ap->rx_mini_ring = ap->rx_jumbo_ring + RX_JUMBO_RING_ENTRIES; - ap->rx_return_ring = ap->rx_mini_ring + RX_MINI_RING_ENTRIES; - - size = (sizeof(struct event) * EVT_RING_ENTRIES); - - ap->evt_ring = dma_alloc_coherent(&ap->pdev->dev, size, - &ap->evt_ring_dma, GFP_KERNEL); - - if (ap->evt_ring == NULL) - goto fail; - - /* - * Only allocate a host TX ring for the Tigon II, the Tigon I - * has to use PCI registers for this ;-( - */ - if (!ACE_IS_TIGON_I(ap)) { - size = (sizeof(struct tx_desc) * MAX_TX_RING_ENTRIES); - - ap->tx_ring = dma_alloc_coherent(&ap->pdev->dev, size, - &ap->tx_ring_dma, GFP_KERNEL); - - if (ap->tx_ring == NULL) - goto fail; - } - - ap->evt_prd = dma_alloc_coherent(&ap->pdev->dev, sizeof(u32), - &ap->evt_prd_dma, GFP_KERNEL); - if (ap->evt_prd == NULL) - goto fail; - - ap->rx_ret_prd = dma_alloc_coherent(&ap->pdev->dev, sizeof(u32), - &ap->rx_ret_prd_dma, GFP_KERNEL); - if (ap->rx_ret_prd == NULL) - goto fail; - - ap->tx_csm = dma_alloc_coherent(&ap->pdev->dev, sizeof(u32), - &ap->tx_csm_dma, GFP_KERNEL); - if (ap->tx_csm == NULL) - goto fail; - - return 0; - -fail: - /* Clean up. */ - ace_init_cleanup(dev); - return 1; -} - - -/* - * Generic cleanup handling data allocated during init. Used when the - * module is unloaded or if an error occurs during initialization - */ -static void ace_init_cleanup(struct net_device *dev) -{ - struct ace_private *ap; - - ap = netdev_priv(dev); - - ace_free_descriptors(dev); - - if (ap->info) - dma_free_coherent(&ap->pdev->dev, sizeof(struct ace_info), - ap->info, ap->info_dma); - kfree(ap->skb); - kfree(ap->trace_buf); - - if (dev->irq) - free_irq(dev->irq, dev); - - iounmap(ap->regs); -} - - -/* - * Commands are considered to be slow. - */ -static inline void ace_issue_cmd(struct ace_regs __iomem *regs, struct cmd *cmd) -{ - u32 idx; - - idx = readl(®s->CmdPrd); - - writel(*(u32 *)(cmd), ®s->CmdRng[idx]); - idx = (idx + 1) % CMD_RING_ENTRIES; - - writel(idx, ®s->CmdPrd); -} - - -static int ace_init(struct net_device *dev) -{ - struct ace_private *ap; - struct ace_regs __iomem *regs; - struct ace_info *info = NULL; - struct pci_dev *pdev; - unsigned long myjif; - u64 tmp_ptr; - u32 tig_ver, mac1, mac2, tmp, pci_state; - int board_idx, ecode = 0; - short i; - unsigned char cache_size; - u8 addr[ETH_ALEN]; - - ap = netdev_priv(dev); - regs = ap->regs; - - board_idx = ap->board_idx; - - /* - * aman@sgi.com - its useful to do a NIC reset here to - * address the `Firmware not running' problem subsequent - * to any crashes involving the NIC - */ - writel(HW_RESET | (HW_RESET << 24), ®s->HostCtrl); - readl(®s->HostCtrl); /* PCI write posting */ - udelay(5); - - /* - * Don't access any other registers before this point! - */ -#ifdef __BIG_ENDIAN - /* - * This will most likely need BYTE_SWAP once we switch - * to using __raw_writel() - */ - writel((WORD_SWAP | CLR_INT | ((WORD_SWAP | CLR_INT) << 24)), - ®s->HostCtrl); -#else - writel((CLR_INT | WORD_SWAP | ((CLR_INT | WORD_SWAP) << 24)), - ®s->HostCtrl); -#endif - readl(®s->HostCtrl); /* PCI write posting */ - - /* - * Stop the NIC CPU and clear pending interrupts - */ - writel(readl(®s->CpuCtrl) | CPU_HALT, ®s->CpuCtrl); - readl(®s->CpuCtrl); /* PCI write posting */ - writel(0, ®s->Mb0Lo); - - tig_ver = readl(®s->HostCtrl) >> 28; - - switch(tig_ver){ -#ifndef CONFIG_ACENIC_OMIT_TIGON_I - case 4: - case 5: - printk(KERN_INFO " Tigon I (Rev. %i), Firmware: %i.%i.%i, ", - tig_ver, ap->firmware_major, ap->firmware_minor, - ap->firmware_fix); - writel(0, ®s->LocalCtrl); - ap->version = 1; - ap->tx_ring_entries = TIGON_I_TX_RING_ENTRIES; - break; -#endif - case 6: - printk(KERN_INFO " Tigon II (Rev. %i), Firmware: %i.%i.%i, ", - tig_ver, ap->firmware_major, ap->firmware_minor, - ap->firmware_fix); - writel(readl(®s->CpuBCtrl) | CPU_HALT, ®s->CpuBCtrl); - readl(®s->CpuBCtrl); /* PCI write posting */ - /* - * The SRAM bank size does _not_ indicate the amount - * of memory on the card, it controls the _bank_ size! - * Ie. a 1MB AceNIC will have two banks of 512KB. - */ - writel(SRAM_BANK_512K, ®s->LocalCtrl); - writel(SYNC_SRAM_TIMING, ®s->MiscCfg); - ap->version = 2; - ap->tx_ring_entries = MAX_TX_RING_ENTRIES; - break; - default: - printk(KERN_WARNING " Unsupported Tigon version detected " - "(%i)\n", tig_ver); - ecode = -ENODEV; - goto init_error; - } - - /* - * ModeStat _must_ be set after the SRAM settings as this change - * seems to corrupt the ModeStat and possible other registers. - * The SRAM settings survive resets and setting it to the same - * value a second time works as well. This is what caused the - * `Firmware not running' problem on the Tigon II. - */ -#ifdef __BIG_ENDIAN - writel(ACE_BYTE_SWAP_DMA | ACE_WARN | ACE_FATAL | ACE_BYTE_SWAP_BD | - ACE_WORD_SWAP_BD | ACE_NO_JUMBO_FRAG, ®s->ModeStat); -#else - writel(ACE_BYTE_SWAP_DMA | ACE_WARN | ACE_FATAL | - ACE_WORD_SWAP_BD | ACE_NO_JUMBO_FRAG, ®s->ModeStat); -#endif - readl(®s->ModeStat); /* PCI write posting */ - - mac1 = 0; - for(i = 0; i < 4; i++) { - int t; - - mac1 = mac1 << 8; - t = read_eeprom_byte(dev, 0x8c+i); - if (t < 0) { - ecode = -EIO; - goto init_error; - } else - mac1 |= (t & 0xff); - } - mac2 = 0; - for(i = 4; i < 8; i++) { - int t; - - mac2 = mac2 << 8; - t = read_eeprom_byte(dev, 0x8c+i); - if (t < 0) { - ecode = -EIO; - goto init_error; - } else - mac2 |= (t & 0xff); - } - - writel(mac1, ®s->MacAddrHi); - writel(mac2, ®s->MacAddrLo); - - addr[0] = (mac1 >> 8) & 0xff; - addr[1] = mac1 & 0xff; - addr[2] = (mac2 >> 24) & 0xff; - addr[3] = (mac2 >> 16) & 0xff; - addr[4] = (mac2 >> 8) & 0xff; - addr[5] = mac2 & 0xff; - eth_hw_addr_set(dev, addr); - - printk("MAC: %pM\n", dev->dev_addr); - - /* - * Looks like this is necessary to deal with on all architectures, - * even this %$#%$# N440BX Intel based thing doesn't get it right. - * Ie. having two NICs in the machine, one will have the cache - * line set at boot time, the other will not. - */ - pdev = ap->pdev; - pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache_size); - cache_size <<= 2; - if (cache_size != SMP_CACHE_BYTES) { - printk(KERN_INFO " PCI cache line size set incorrectly " - "(%i bytes) by BIOS/FW, ", cache_size); - if (cache_size > SMP_CACHE_BYTES) - printk("expecting %i\n", SMP_CACHE_BYTES); - else { - printk("correcting to %i\n", SMP_CACHE_BYTES); - pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, - SMP_CACHE_BYTES >> 2); - } - } - - pci_state = readl(®s->PciState); - printk(KERN_INFO " PCI bus width: %i bits, speed: %iMHz, " - "latency: %i clks\n", - (pci_state & PCI_32BIT) ? 32 : 64, - (pci_state & PCI_66MHZ) ? 66 : 33, - ap->pci_latency); - - /* - * Set the max DMA transfer size. Seems that for most systems - * the performance is better when no MAX parameter is - * set. However for systems enabling PCI write and invalidate, - * DMA writes must be set to the L1 cache line size to get - * optimal performance. - * - * The default is now to turn the PCI write and invalidate off - * - that is what Alteon does for NT. - */ - tmp = READ_CMD_MEM | WRITE_CMD_MEM; - if (ap->version >= 2) { - tmp |= (MEM_READ_MULTIPLE | (pci_state & PCI_66MHZ)); - /* - * Tuning parameters only supported for 8 cards - */ - if (board_idx == BOARD_IDX_OVERFLOW || - dis_pci_mem_inval[board_idx]) { - if (ap->pci_command & PCI_COMMAND_INVALIDATE) { - ap->pci_command &= ~PCI_COMMAND_INVALIDATE; - pci_write_config_word(pdev, PCI_COMMAND, - ap->pci_command); - printk(KERN_INFO " Disabling PCI memory " - "write and invalidate\n"); - } - } else if (ap->pci_command & PCI_COMMAND_INVALIDATE) { - printk(KERN_INFO " PCI memory write & invalidate " - "enabled by BIOS, enabling counter measures\n"); - - switch(SMP_CACHE_BYTES) { - case 16: - tmp |= DMA_WRITE_MAX_16; - break; - case 32: - tmp |= DMA_WRITE_MAX_32; - break; - case 64: - tmp |= DMA_WRITE_MAX_64; - break; - case 128: - tmp |= DMA_WRITE_MAX_128; - break; - default: - printk(KERN_INFO " Cache line size %i not " - "supported, PCI write and invalidate " - "disabled\n", SMP_CACHE_BYTES); - ap->pci_command &= ~PCI_COMMAND_INVALIDATE; - pci_write_config_word(pdev, PCI_COMMAND, - ap->pci_command); - } - } - } - -#ifdef __sparc__ - /* - * On this platform, we know what the best dma settings - * are. We use 64-byte maximum bursts, because if we - * burst larger than the cache line size (or even cross - * a 64byte boundary in a single burst) the UltraSparc - * PCI controller will disconnect at 64-byte multiples. - * - * Read-multiple will be properly enabled above, and when - * set will give the PCI controller proper hints about - * prefetching. - */ - tmp &= ~DMA_READ_WRITE_MASK; - tmp |= DMA_READ_MAX_64; - tmp |= DMA_WRITE_MAX_64; -#endif -#ifdef __alpha__ - tmp &= ~DMA_READ_WRITE_MASK; - tmp |= DMA_READ_MAX_128; - /* - * All the docs say MUST NOT. Well, I did. - * Nothing terrible happens, if we load wrong size. - * Bit w&i still works better! - */ - tmp |= DMA_WRITE_MAX_128; -#endif - writel(tmp, ®s->PciState); - -#if 0 - /* - * The Host PCI bus controller driver has to set FBB. - * If all devices on that PCI bus support FBB, then the controller - * can enable FBB support in the Host PCI Bus controller (or on - * the PCI-PCI bridge if that applies). - * -ggg - */ - /* - * I have received reports from people having problems when this - * bit is enabled. - */ - if (!(ap->pci_command & PCI_COMMAND_FAST_BACK)) { - printk(KERN_INFO " Enabling PCI Fast Back to Back\n"); - ap->pci_command |= PCI_COMMAND_FAST_BACK; - pci_write_config_word(pdev, PCI_COMMAND, ap->pci_command); - } -#endif - - /* - * Configure DMA attributes. - */ - if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(64))) { - ecode = -ENODEV; - goto init_error; - } - - /* - * Initialize the generic info block and the command+event rings - * and the control blocks for the transmit and receive rings - * as they need to be setup once and for all. - */ - if (!(info = dma_alloc_coherent(&ap->pdev->dev, sizeof(struct ace_info), - &ap->info_dma, GFP_KERNEL))) { - ecode = -EAGAIN; - goto init_error; - } - ap->info = info; - - /* - * Get the memory for the skb rings. - */ - if (!(ap->skb = kzalloc_obj(struct ace_skb))) { - ecode = -EAGAIN; - goto init_error; - } - - ecode = request_irq(pdev->irq, ace_interrupt, IRQF_SHARED, - DRV_NAME, dev); - if (ecode) { - printk(KERN_WARNING "%s: Requested IRQ %d is busy\n", - DRV_NAME, pdev->irq); - goto init_error; - } else - dev->irq = pdev->irq; - -#ifdef INDEX_DEBUG - spin_lock_init(&ap->debug_lock); - ap->last_tx = ACE_TX_RING_ENTRIES(ap) - 1; - ap->last_std_rx = 0; - ap->last_mini_rx = 0; -#endif - - ecode = ace_load_firmware(dev); - if (ecode) - goto init_error; - - ap->fw_running = 0; - - tmp_ptr = ap->info_dma; - writel(tmp_ptr >> 32, ®s->InfoPtrHi); - writel(tmp_ptr & 0xffffffff, ®s->InfoPtrLo); - - memset(ap->evt_ring, 0, EVT_RING_ENTRIES * sizeof(struct event)); - - set_aceaddr(&info->evt_ctrl.rngptr, ap->evt_ring_dma); - info->evt_ctrl.flags = 0; - - *(ap->evt_prd) = 0; - wmb(); - set_aceaddr(&info->evt_prd_ptr, ap->evt_prd_dma); - writel(0, ®s->EvtCsm); - - set_aceaddr(&info->cmd_ctrl.rngptr, 0x100); - info->cmd_ctrl.flags = 0; - info->cmd_ctrl.max_len = 0; - - for (i = 0; i < CMD_RING_ENTRIES; i++) - writel(0, ®s->CmdRng[i]); - - writel(0, ®s->CmdPrd); - writel(0, ®s->CmdCsm); - - tmp_ptr = ap->info_dma; - tmp_ptr += (unsigned long) &(((struct ace_info *)0)->s.stats); - set_aceaddr(&info->stats2_ptr, (dma_addr_t) tmp_ptr); - - set_aceaddr(&info->rx_std_ctrl.rngptr, ap->rx_ring_base_dma); - info->rx_std_ctrl.max_len = ACE_STD_BUFSIZE; - info->rx_std_ctrl.flags = - RCB_FLG_TCP_UDP_SUM | RCB_FLG_NO_PSEUDO_HDR | RCB_FLG_VLAN_ASSIST; - - memset(ap->rx_std_ring, 0, - RX_STD_RING_ENTRIES * sizeof(struct rx_desc)); - - for (i = 0; i < RX_STD_RING_ENTRIES; i++) - ap->rx_std_ring[i].flags = BD_FLG_TCP_UDP_SUM; - - ap->rx_std_skbprd = 0; - atomic_set(&ap->cur_rx_bufs, 0); - - set_aceaddr(&info->rx_jumbo_ctrl.rngptr, - (ap->rx_ring_base_dma + - (sizeof(struct rx_desc) * RX_STD_RING_ENTRIES))); - info->rx_jumbo_ctrl.max_len = 0; - info->rx_jumbo_ctrl.flags = - RCB_FLG_TCP_UDP_SUM | RCB_FLG_NO_PSEUDO_HDR | RCB_FLG_VLAN_ASSIST; - - memset(ap->rx_jumbo_ring, 0, - RX_JUMBO_RING_ENTRIES * sizeof(struct rx_desc)); - - for (i = 0; i < RX_JUMBO_RING_ENTRIES; i++) - ap->rx_jumbo_ring[i].flags = BD_FLG_TCP_UDP_SUM | BD_FLG_JUMBO; - - ap->rx_jumbo_skbprd = 0; - atomic_set(&ap->cur_jumbo_bufs, 0); - - memset(ap->rx_mini_ring, 0, - RX_MINI_RING_ENTRIES * sizeof(struct rx_desc)); - - if (ap->version >= 2) { - set_aceaddr(&info->rx_mini_ctrl.rngptr, - (ap->rx_ring_base_dma + - (sizeof(struct rx_desc) * - (RX_STD_RING_ENTRIES + - RX_JUMBO_RING_ENTRIES)))); - info->rx_mini_ctrl.max_len = ACE_MINI_SIZE; - info->rx_mini_ctrl.flags = - RCB_FLG_TCP_UDP_SUM|RCB_FLG_NO_PSEUDO_HDR|RCB_FLG_VLAN_ASSIST; - - for (i = 0; i < RX_MINI_RING_ENTRIES; i++) - ap->rx_mini_ring[i].flags = - BD_FLG_TCP_UDP_SUM | BD_FLG_MINI; - } else { - set_aceaddr(&info->rx_mini_ctrl.rngptr, 0); - info->rx_mini_ctrl.flags = RCB_FLG_RNG_DISABLE; - info->rx_mini_ctrl.max_len = 0; - } - - ap->rx_mini_skbprd = 0; - atomic_set(&ap->cur_mini_bufs, 0); - - set_aceaddr(&info->rx_return_ctrl.rngptr, - (ap->rx_ring_base_dma + - (sizeof(struct rx_desc) * - (RX_STD_RING_ENTRIES + - RX_JUMBO_RING_ENTRIES + - RX_MINI_RING_ENTRIES)))); - info->rx_return_ctrl.flags = 0; - info->rx_return_ctrl.max_len = RX_RETURN_RING_ENTRIES; - - memset(ap->rx_return_ring, 0, - RX_RETURN_RING_ENTRIES * sizeof(struct rx_desc)); - - set_aceaddr(&info->rx_ret_prd_ptr, ap->rx_ret_prd_dma); - *(ap->rx_ret_prd) = 0; - - writel(TX_RING_BASE, ®s->WinBase); - - if (ACE_IS_TIGON_I(ap)) { - ap->tx_ring = (__force struct tx_desc *) regs->Window; - for (i = 0; i < (TIGON_I_TX_RING_ENTRIES - * sizeof(struct tx_desc)) / sizeof(u32); i++) - writel(0, (__force void __iomem *)ap->tx_ring + i * 4); - - set_aceaddr(&info->tx_ctrl.rngptr, TX_RING_BASE); - } else { - memset(ap->tx_ring, 0, - MAX_TX_RING_ENTRIES * sizeof(struct tx_desc)); - - set_aceaddr(&info->tx_ctrl.rngptr, ap->tx_ring_dma); - } - - info->tx_ctrl.max_len = ACE_TX_RING_ENTRIES(ap); - tmp = RCB_FLG_TCP_UDP_SUM | RCB_FLG_NO_PSEUDO_HDR | RCB_FLG_VLAN_ASSIST; - - /* - * The Tigon I does not like having the TX ring in host memory ;-( - */ - if (!ACE_IS_TIGON_I(ap)) - tmp |= RCB_FLG_TX_HOST_RING; -#if TX_COAL_INTS_ONLY - tmp |= RCB_FLG_COAL_INT_ONLY; -#endif - info->tx_ctrl.flags = tmp; - - set_aceaddr(&info->tx_csm_ptr, ap->tx_csm_dma); - - /* - * Potential item for tuning parameter - */ -#if 0 /* NO */ - writel(DMA_THRESH_16W, ®s->DmaReadCfg); - writel(DMA_THRESH_16W, ®s->DmaWriteCfg); -#else - writel(DMA_THRESH_8W, ®s->DmaReadCfg); - writel(DMA_THRESH_8W, ®s->DmaWriteCfg); -#endif - - writel(0, ®s->MaskInt); - writel(1, ®s->IfIdx); -#if 0 - /* - * McKinley boxes do not like us fiddling with AssistState - * this early - */ - writel(1, ®s->AssistState); -#endif - - writel(DEF_STAT, ®s->TuneStatTicks); - writel(DEF_TRACE, ®s->TuneTrace); - - ace_set_rxtx_parms(dev, 0); - - if (board_idx == BOARD_IDX_OVERFLOW) { - printk(KERN_WARNING "%s: more than %i NICs detected, " - "ignoring module parameters!\n", - ap->name, ACE_MAX_MOD_PARMS); - } else if (board_idx >= 0) { - if (tx_coal_tick[board_idx]) - writel(tx_coal_tick[board_idx], - ®s->TuneTxCoalTicks); - if (max_tx_desc[board_idx]) - writel(max_tx_desc[board_idx], ®s->TuneMaxTxDesc); - - if (rx_coal_tick[board_idx]) - writel(rx_coal_tick[board_idx], - ®s->TuneRxCoalTicks); - if (max_rx_desc[board_idx]) - writel(max_rx_desc[board_idx], ®s->TuneMaxRxDesc); - - if (trace[board_idx]) - writel(trace[board_idx], ®s->TuneTrace); - - if ((tx_ratio[board_idx] > 0) && (tx_ratio[board_idx] < 64)) - writel(tx_ratio[board_idx], ®s->TxBufRat); - } - - /* - * Default link parameters - */ - tmp = LNK_ENABLE | LNK_FULL_DUPLEX | LNK_1000MB | LNK_100MB | - LNK_10MB | LNK_RX_FLOW_CTL_Y | LNK_NEG_FCTL | LNK_NEGOTIATE; - if(ap->version >= 2) - tmp |= LNK_TX_FLOW_CTL_Y; - - /* - * Override link default parameters - */ - if ((board_idx >= 0) && link_state[board_idx]) { - int option = link_state[board_idx]; - - tmp = LNK_ENABLE; - - if (option & 0x01) { - printk(KERN_INFO "%s: Setting half duplex link\n", - ap->name); - tmp &= ~LNK_FULL_DUPLEX; - } - if (option & 0x02) - tmp &= ~LNK_NEGOTIATE; - if (option & 0x10) - tmp |= LNK_10MB; - if (option & 0x20) - tmp |= LNK_100MB; - if (option & 0x40) - tmp |= LNK_1000MB; - if ((option & 0x70) == 0) { - printk(KERN_WARNING "%s: No media speed specified, " - "forcing auto negotiation\n", ap->name); - tmp |= LNK_NEGOTIATE | LNK_1000MB | - LNK_100MB | LNK_10MB; - } - if ((option & 0x100) == 0) - tmp |= LNK_NEG_FCTL; - else - printk(KERN_INFO "%s: Disabling flow control " - "negotiation\n", ap->name); - if (option & 0x200) - tmp |= LNK_RX_FLOW_CTL_Y; - if ((option & 0x400) && (ap->version >= 2)) { - printk(KERN_INFO "%s: Enabling TX flow control\n", - ap->name); - tmp |= LNK_TX_FLOW_CTL_Y; - } - } - - ap->link = tmp; - writel(tmp, ®s->TuneLink); - if (ap->version >= 2) - writel(tmp, ®s->TuneFastLink); - - writel(ap->firmware_start, ®s->Pc); - - writel(0, ®s->Mb0Lo); - - /* - * Set tx_csm before we start receiving interrupts, otherwise - * the interrupt handler might think it is supposed to process - * tx ints before we are up and running, which may cause a null - * pointer access in the int handler. - */ - ap->cur_rx = 0; - ap->tx_prd = *(ap->tx_csm) = ap->tx_ret_csm = 0; - - wmb(); - ace_set_txprd(regs, ap, 0); - writel(0, ®s->RxRetCsm); - - /* - * Enable DMA engine now. - * If we do this sooner, Mckinley box pukes. - * I assume it's because Tigon II DMA engine wants to check - * *something* even before the CPU is started. - */ - writel(1, ®s->AssistState); /* enable DMA */ - - /* - * Start the NIC CPU - */ - writel(readl(®s->CpuCtrl) & ~(CPU_HALT|CPU_TRACE), ®s->CpuCtrl); - readl(®s->CpuCtrl); - - /* - * Wait for the firmware to spin up - max 3 seconds. - */ - myjif = jiffies + 3 * HZ; - while (time_before(jiffies, myjif) && !ap->fw_running) - cpu_relax(); - - if (!ap->fw_running) { - printk(KERN_ERR "%s: Firmware NOT running!\n", ap->name); - - ace_dump_trace(ap); - writel(readl(®s->CpuCtrl) | CPU_HALT, ®s->CpuCtrl); - readl(®s->CpuCtrl); - - /* aman@sgi.com - account for badly behaving firmware/NIC: - * - have observed that the NIC may continue to generate - * interrupts for some reason; attempt to stop it - halt - * second CPU for Tigon II cards, and also clear Mb0 - * - if we're a module, we'll fail to load if this was - * the only GbE card in the system => if the kernel does - * see an interrupt from the NIC, code to handle it is - * gone and OOps! - so free_irq also - */ - if (ap->version >= 2) - writel(readl(®s->CpuBCtrl) | CPU_HALT, - ®s->CpuBCtrl); - writel(0, ®s->Mb0Lo); - readl(®s->Mb0Lo); - - ecode = -EBUSY; - goto init_error; - } - - /* - * We load the ring here as there seem to be no way to tell the - * firmware to wipe the ring without re-initializing it. - */ - if (!test_and_set_bit(0, &ap->std_refill_busy)) - ace_load_std_rx_ring(dev, RX_RING_SIZE); - else - printk(KERN_ERR "%s: Someone is busy refilling the RX ring\n", - ap->name); - if (ap->version >= 2) { - if (!test_and_set_bit(0, &ap->mini_refill_busy)) - ace_load_mini_rx_ring(dev, RX_MINI_SIZE); - else - printk(KERN_ERR "%s: Someone is busy refilling " - "the RX mini ring\n", ap->name); - } - return 0; - - init_error: - ace_init_cleanup(dev); - return ecode; -} - - -static void ace_set_rxtx_parms(struct net_device *dev, int jumbo) -{ - struct ace_private *ap = netdev_priv(dev); - struct ace_regs __iomem *regs = ap->regs; - int board_idx = ap->board_idx; - - if (board_idx >= 0) { - if (!jumbo) { - if (!tx_coal_tick[board_idx]) - writel(DEF_TX_COAL, ®s->TuneTxCoalTicks); - if (!max_tx_desc[board_idx]) - writel(DEF_TX_MAX_DESC, ®s->TuneMaxTxDesc); - if (!rx_coal_tick[board_idx]) - writel(DEF_RX_COAL, ®s->TuneRxCoalTicks); - if (!max_rx_desc[board_idx]) - writel(DEF_RX_MAX_DESC, ®s->TuneMaxRxDesc); - if (!tx_ratio[board_idx]) - writel(DEF_TX_RATIO, ®s->TxBufRat); - } else { - if (!tx_coal_tick[board_idx]) - writel(DEF_JUMBO_TX_COAL, - ®s->TuneTxCoalTicks); - if (!max_tx_desc[board_idx]) - writel(DEF_JUMBO_TX_MAX_DESC, - ®s->TuneMaxTxDesc); - if (!rx_coal_tick[board_idx]) - writel(DEF_JUMBO_RX_COAL, - ®s->TuneRxCoalTicks); - if (!max_rx_desc[board_idx]) - writel(DEF_JUMBO_RX_MAX_DESC, - ®s->TuneMaxRxDesc); - if (!tx_ratio[board_idx]) - writel(DEF_JUMBO_TX_RATIO, ®s->TxBufRat); - } - } -} - - -static void ace_watchdog(struct net_device *data, unsigned int txqueue) -{ - struct net_device *dev = data; - struct ace_private *ap = netdev_priv(dev); - struct ace_regs __iomem *regs = ap->regs; - - /* - * We haven't received a stats update event for more than 2.5 - * seconds and there is data in the transmit queue, thus we - * assume the card is stuck. - */ - if (*ap->tx_csm != ap->tx_ret_csm) { - printk(KERN_WARNING "%s: Transmitter is stuck, %08x\n", - dev->name, (unsigned int)readl(®s->HostCtrl)); - /* This can happen due to ieee flow control. */ - } else { - printk(KERN_DEBUG "%s: BUG... transmitter died. Kicking it.\n", - dev->name); -#if 0 - netif_wake_queue(dev); -#endif - } -} - - -static void ace_bh_work(struct work_struct *work) -{ - struct ace_private *ap = from_work(ap, work, ace_bh_work); - struct net_device *dev = ap->ndev; - int cur_size; - - cur_size = atomic_read(&ap->cur_rx_bufs); - if ((cur_size < RX_LOW_STD_THRES) && - !test_and_set_bit(0, &ap->std_refill_busy)) { -#ifdef DEBUG - printk("refilling buffers (current %i)\n", cur_size); -#endif - ace_load_std_rx_ring(dev, RX_RING_SIZE - cur_size); - } - - if (ap->version >= 2) { - cur_size = atomic_read(&ap->cur_mini_bufs); - if ((cur_size < RX_LOW_MINI_THRES) && - !test_and_set_bit(0, &ap->mini_refill_busy)) { -#ifdef DEBUG - printk("refilling mini buffers (current %i)\n", - cur_size); -#endif - ace_load_mini_rx_ring(dev, RX_MINI_SIZE - cur_size); - } - } - - cur_size = atomic_read(&ap->cur_jumbo_bufs); - if (ap->jumbo && (cur_size < RX_LOW_JUMBO_THRES) && - !test_and_set_bit(0, &ap->jumbo_refill_busy)) { -#ifdef DEBUG - printk("refilling jumbo buffers (current %i)\n", cur_size); -#endif - ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE - cur_size); - } - ap->bh_work_pending = 0; -} - - -/* - * Copy the contents of the NIC's trace buffer to kernel memory. - */ -static void ace_dump_trace(struct ace_private *ap) -{ -#if 0 - if (!ap->trace_buf) - if (!(ap->trace_buf = kmalloc(ACE_TRACE_SIZE, GFP_KERNEL))) - return; -#endif -} - - -/* - * Load the standard rx ring. - * - * Loading rings is safe without holding the spin lock since this is - * done only before the device is enabled, thus no interrupts are - * generated and by the interrupt handler/bh handler. - */ -static void ace_load_std_rx_ring(struct net_device *dev, int nr_bufs) -{ - struct ace_private *ap = netdev_priv(dev); - struct ace_regs __iomem *regs = ap->regs; - short i, idx; - - - prefetchw(&ap->cur_rx_bufs); - - idx = ap->rx_std_skbprd; - - for (i = 0; i < nr_bufs; i++) { - struct sk_buff *skb; - struct rx_desc *rd; - dma_addr_t mapping; - - skb = netdev_alloc_skb_ip_align(dev, ACE_STD_BUFSIZE); - if (!skb) - break; - - mapping = dma_map_page(&ap->pdev->dev, - virt_to_page(skb->data), - offset_in_page(skb->data), - ACE_STD_BUFSIZE, DMA_FROM_DEVICE); - ap->skb->rx_std_skbuff[idx].skb = skb; - dma_unmap_addr_set(&ap->skb->rx_std_skbuff[idx], - mapping, mapping); - - rd = &ap->rx_std_ring[idx]; - set_aceaddr(&rd->addr, mapping); - rd->size = ACE_STD_BUFSIZE; - rd->idx = idx; - idx = (idx + 1) % RX_STD_RING_ENTRIES; - } - - if (!i) - goto error_out; - - atomic_add(i, &ap->cur_rx_bufs); - ap->rx_std_skbprd = idx; - - if (ACE_IS_TIGON_I(ap)) { - struct cmd cmd; - cmd.evt = C_SET_RX_PRD_IDX; - cmd.code = 0; - cmd.idx = ap->rx_std_skbprd; - ace_issue_cmd(regs, &cmd); - } else { - writel(idx, ®s->RxStdPrd); - wmb(); - } - - out: - clear_bit(0, &ap->std_refill_busy); - return; - - error_out: - printk(KERN_INFO "Out of memory when allocating " - "standard receive buffers\n"); - goto out; -} - - -static void ace_load_mini_rx_ring(struct net_device *dev, int nr_bufs) -{ - struct ace_private *ap = netdev_priv(dev); - struct ace_regs __iomem *regs = ap->regs; - short i, idx; - - prefetchw(&ap->cur_mini_bufs); - - idx = ap->rx_mini_skbprd; - for (i = 0; i < nr_bufs; i++) { - struct sk_buff *skb; - struct rx_desc *rd; - dma_addr_t mapping; - - skb = netdev_alloc_skb_ip_align(dev, ACE_MINI_BUFSIZE); - if (!skb) - break; - - mapping = dma_map_page(&ap->pdev->dev, - virt_to_page(skb->data), - offset_in_page(skb->data), - ACE_MINI_BUFSIZE, DMA_FROM_DEVICE); - ap->skb->rx_mini_skbuff[idx].skb = skb; - dma_unmap_addr_set(&ap->skb->rx_mini_skbuff[idx], - mapping, mapping); - - rd = &ap->rx_mini_ring[idx]; - set_aceaddr(&rd->addr, mapping); - rd->size = ACE_MINI_BUFSIZE; - rd->idx = idx; - idx = (idx + 1) % RX_MINI_RING_ENTRIES; - } - - if (!i) - goto error_out; - - atomic_add(i, &ap->cur_mini_bufs); - - ap->rx_mini_skbprd = idx; - - writel(idx, ®s->RxMiniPrd); - wmb(); - - out: - clear_bit(0, &ap->mini_refill_busy); - return; - error_out: - printk(KERN_INFO "Out of memory when allocating " - "mini receive buffers\n"); - goto out; -} - - -/* - * Load the jumbo rx ring, this may happen at any time if the MTU - * is changed to a value > 1500. - */ -static void ace_load_jumbo_rx_ring(struct net_device *dev, int nr_bufs) -{ - struct ace_private *ap = netdev_priv(dev); - struct ace_regs __iomem *regs = ap->regs; - short i, idx; - - idx = ap->rx_jumbo_skbprd; - - for (i = 0; i < nr_bufs; i++) { - struct sk_buff *skb; - struct rx_desc *rd; - dma_addr_t mapping; - - skb = netdev_alloc_skb_ip_align(dev, ACE_JUMBO_BUFSIZE); - if (!skb) - break; - - mapping = dma_map_page(&ap->pdev->dev, - virt_to_page(skb->data), - offset_in_page(skb->data), - ACE_JUMBO_BUFSIZE, DMA_FROM_DEVICE); - ap->skb->rx_jumbo_skbuff[idx].skb = skb; - dma_unmap_addr_set(&ap->skb->rx_jumbo_skbuff[idx], - mapping, mapping); - - rd = &ap->rx_jumbo_ring[idx]; - set_aceaddr(&rd->addr, mapping); - rd->size = ACE_JUMBO_BUFSIZE; - rd->idx = idx; - idx = (idx + 1) % RX_JUMBO_RING_ENTRIES; - } - - if (!i) - goto error_out; - - atomic_add(i, &ap->cur_jumbo_bufs); - ap->rx_jumbo_skbprd = idx; - - if (ACE_IS_TIGON_I(ap)) { - struct cmd cmd; - cmd.evt = C_SET_RX_JUMBO_PRD_IDX; - cmd.code = 0; - cmd.idx = ap->rx_jumbo_skbprd; - ace_issue_cmd(regs, &cmd); - } else { - writel(idx, ®s->RxJumboPrd); - wmb(); - } - - out: - clear_bit(0, &ap->jumbo_refill_busy); - return; - error_out: - if (net_ratelimit()) - printk(KERN_INFO "Out of memory when allocating " - "jumbo receive buffers\n"); - goto out; -} - - -/* - * All events are considered to be slow (RX/TX ints do not generate - * events) and are handled here, outside the main interrupt handler, - * to reduce the size of the handler. - */ -static u32 ace_handle_event(struct net_device *dev, u32 evtcsm, u32 evtprd) -{ - struct ace_private *ap; - - ap = netdev_priv(dev); - - while (evtcsm != evtprd) { - switch (ap->evt_ring[evtcsm].evt) { - case E_FW_RUNNING: - printk(KERN_INFO "%s: Firmware up and running\n", - ap->name); - ap->fw_running = 1; - wmb(); - break; - case E_STATS_UPDATED: - break; - case E_LNK_STATE: - { - u16 code = ap->evt_ring[evtcsm].code; - switch (code) { - case E_C_LINK_UP: - { - u32 state = readl(&ap->regs->GigLnkState); - printk(KERN_WARNING "%s: Optical link UP " - "(%s Duplex, Flow Control: %s%s)\n", - ap->name, - state & LNK_FULL_DUPLEX ? "Full":"Half", - state & LNK_TX_FLOW_CTL_Y ? "TX " : "", - state & LNK_RX_FLOW_CTL_Y ? "RX" : ""); - break; - } - case E_C_LINK_DOWN: - printk(KERN_WARNING "%s: Optical link DOWN\n", - ap->name); - break; - case E_C_LINK_10_100: - printk(KERN_WARNING "%s: 10/100BaseT link " - "UP\n", ap->name); - break; - default: - printk(KERN_ERR "%s: Unknown optical link " - "state %02x\n", ap->name, code); - } - break; - } - case E_ERROR: - switch(ap->evt_ring[evtcsm].code) { - case E_C_ERR_INVAL_CMD: - printk(KERN_ERR "%s: invalid command error\n", - ap->name); - break; - case E_C_ERR_UNIMP_CMD: - printk(KERN_ERR "%s: unimplemented command " - "error\n", ap->name); - break; - case E_C_ERR_BAD_CFG: - printk(KERN_ERR "%s: bad config error\n", - ap->name); - break; - default: - printk(KERN_ERR "%s: unknown error %02x\n", - ap->name, ap->evt_ring[evtcsm].code); - } - break; - case E_RESET_JUMBO_RNG: - { - int i; - for (i = 0; i < RX_JUMBO_RING_ENTRIES; i++) { - if (ap->skb->rx_jumbo_skbuff[i].skb) { - ap->rx_jumbo_ring[i].size = 0; - set_aceaddr(&ap->rx_jumbo_ring[i].addr, 0); - dev_kfree_skb(ap->skb->rx_jumbo_skbuff[i].skb); - ap->skb->rx_jumbo_skbuff[i].skb = NULL; - } - } - - if (ACE_IS_TIGON_I(ap)) { - struct cmd cmd; - cmd.evt = C_SET_RX_JUMBO_PRD_IDX; - cmd.code = 0; - cmd.idx = 0; - ace_issue_cmd(ap->regs, &cmd); - } else { - writel(0, &((ap->regs)->RxJumboPrd)); - wmb(); - } - - ap->jumbo = 0; - ap->rx_jumbo_skbprd = 0; - printk(KERN_INFO "%s: Jumbo ring flushed\n", - ap->name); - clear_bit(0, &ap->jumbo_refill_busy); - break; - } - default: - printk(KERN_ERR "%s: Unhandled event 0x%02x\n", - ap->name, ap->evt_ring[evtcsm].evt); - } - evtcsm = (evtcsm + 1) % EVT_RING_ENTRIES; - } - - return evtcsm; -} - - -static void ace_rx_int(struct net_device *dev, u32 rxretprd, u32 rxretcsm) -{ - struct ace_private *ap = netdev_priv(dev); - u32 idx; - int mini_count = 0, std_count = 0; - - idx = rxretcsm; - - prefetchw(&ap->cur_rx_bufs); - prefetchw(&ap->cur_mini_bufs); - - while (idx != rxretprd) { - struct ring_info *rip; - struct sk_buff *skb; - struct rx_desc *retdesc; - u32 skbidx; - int bd_flags, desc_type, mapsize; - u16 csum; - - - /* make sure the rx descriptor isn't read before rxretprd */ - if (idx == rxretcsm) - rmb(); - - retdesc = &ap->rx_return_ring[idx]; - skbidx = retdesc->idx; - bd_flags = retdesc->flags; - desc_type = bd_flags & (BD_FLG_JUMBO | BD_FLG_MINI); - - switch(desc_type) { - /* - * Normal frames do not have any flags set - * - * Mini and normal frames arrive frequently, - * so use a local counter to avoid doing - * atomic operations for each packet arriving. - */ - case 0: - rip = &ap->skb->rx_std_skbuff[skbidx]; - mapsize = ACE_STD_BUFSIZE; - std_count++; - break; - case BD_FLG_JUMBO: - rip = &ap->skb->rx_jumbo_skbuff[skbidx]; - mapsize = ACE_JUMBO_BUFSIZE; - atomic_dec(&ap->cur_jumbo_bufs); - break; - case BD_FLG_MINI: - rip = &ap->skb->rx_mini_skbuff[skbidx]; - mapsize = ACE_MINI_BUFSIZE; - mini_count++; - break; - default: - printk(KERN_INFO "%s: unknown frame type (0x%02x) " - "returned by NIC\n", dev->name, - retdesc->flags); - goto error; - } - - skb = rip->skb; - rip->skb = NULL; - dma_unmap_page(&ap->pdev->dev, dma_unmap_addr(rip, mapping), - mapsize, DMA_FROM_DEVICE); - skb_put(skb, retdesc->size); - - /* - * Fly baby, fly! - */ - csum = retdesc->tcp_udp_csum; - - skb->protocol = eth_type_trans(skb, dev); - - /* - * Instead of forcing the poor tigon mips cpu to calculate - * pseudo hdr checksum, we do this ourselves. - */ - if (bd_flags & BD_FLG_TCP_UDP_SUM) { - skb->csum = htons(csum); - skb->ip_summed = CHECKSUM_COMPLETE; - } else { - skb_checksum_none_assert(skb); - } - - /* send it up */ - if ((bd_flags & BD_FLG_VLAN_TAG)) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), retdesc->vlan); - netif_rx(skb); - - dev->stats.rx_packets++; - dev->stats.rx_bytes += retdesc->size; - - idx = (idx + 1) % RX_RETURN_RING_ENTRIES; - } - - atomic_sub(std_count, &ap->cur_rx_bufs); - if (!ACE_IS_TIGON_I(ap)) - atomic_sub(mini_count, &ap->cur_mini_bufs); - - out: - /* - * According to the documentation RxRetCsm is obsolete with - * the 12.3.x Firmware - my Tigon I NICs seem to disagree! - */ - if (ACE_IS_TIGON_I(ap)) { - writel(idx, &ap->regs->RxRetCsm); - } - ap->cur_rx = idx; - - return; - error: - idx = rxretprd; - goto out; -} - - -static inline void ace_tx_int(struct net_device *dev, - u32 txcsm, u32 idx) -{ - struct ace_private *ap = netdev_priv(dev); - - do { - struct sk_buff *skb; - struct tx_ring_info *info; - - info = ap->skb->tx_skbuff + idx; - skb = info->skb; - - if (dma_unmap_len(info, maplen)) { - dma_unmap_page(&ap->pdev->dev, - dma_unmap_addr(info, mapping), - dma_unmap_len(info, maplen), - DMA_TO_DEVICE); - dma_unmap_len_set(info, maplen, 0); - } - - if (skb) { - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; - dev_consume_skb_irq(skb); - info->skb = NULL; - } - - idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap); - } while (idx != txcsm); - - if (netif_queue_stopped(dev)) - netif_wake_queue(dev); - - wmb(); - ap->tx_ret_csm = txcsm; - - /* So... tx_ret_csm is advanced _after_ check for device wakeup. - * - * We could try to make it before. In this case we would get - * the following race condition: hard_start_xmit on other cpu - * enters after we advanced tx_ret_csm and fills space, - * which we have just freed, so that we make illegal device wakeup. - * There is no good way to workaround this (at entry - * to ace_start_xmit detects this condition and prevents - * ring corruption, but it is not a good workaround.) - * - * When tx_ret_csm is advanced after, we wake up device _only_ - * if we really have some space in ring (though the core doing - * hard_start_xmit can see full ring for some period and has to - * synchronize.) Superb. - * BUT! We get another subtle race condition. hard_start_xmit - * may think that ring is full between wakeup and advancing - * tx_ret_csm and will stop device instantly! It is not so bad. - * We are guaranteed that there is something in ring, so that - * the next irq will resume transmission. To speedup this we could - * mark descriptor, which closes ring with BD_FLG_COAL_NOW - * (see ace_start_xmit). - * - * Well, this dilemma exists in all lock-free devices. - * We, following scheme used in drivers by Donald Becker, - * select the least dangerous. - * --ANK - */ -} - - -static irqreturn_t ace_interrupt(int irq, void *dev_id) -{ - struct net_device *dev = (struct net_device *)dev_id; - struct ace_private *ap = netdev_priv(dev); - struct ace_regs __iomem *regs = ap->regs; - u32 idx; - u32 txcsm, rxretcsm, rxretprd; - u32 evtcsm, evtprd; - - /* - * In case of PCI shared interrupts or spurious interrupts, - * we want to make sure it is actually our interrupt before - * spending any time in here. - */ - if (!(readl(®s->HostCtrl) & IN_INT)) - return IRQ_NONE; - - /* - * ACK intr now. Otherwise we will lose updates to rx_ret_prd, - * which happened _after_ rxretprd = *ap->rx_ret_prd; but before - * writel(0, ®s->Mb0Lo). - * - * "IRQ avoidance" recommended in docs applies to IRQs served - * threads and it is wrong even for that case. - */ - writel(0, ®s->Mb0Lo); - readl(®s->Mb0Lo); - - /* - * There is no conflict between transmit handling in - * start_xmit and receive processing, thus there is no reason - * to take a spin lock for RX handling. Wait until we start - * working on the other stuff - hey we don't need a spin lock - * anymore. - */ - rxretprd = *ap->rx_ret_prd; - rxretcsm = ap->cur_rx; - - if (rxretprd != rxretcsm) - ace_rx_int(dev, rxretprd, rxretcsm); - - txcsm = *ap->tx_csm; - idx = ap->tx_ret_csm; - - if (txcsm != idx) { - /* - * If each skb takes only one descriptor this check degenerates - * to identity, because new space has just been opened. - * But if skbs are fragmented we must check that this index - * update releases enough of space, otherwise we just - * wait for device to make more work. - */ - if (!tx_ring_full(ap, txcsm, ap->tx_prd)) - ace_tx_int(dev, txcsm, idx); - } - - evtcsm = readl(®s->EvtCsm); - evtprd = *ap->evt_prd; - - if (evtcsm != evtprd) { - evtcsm = ace_handle_event(dev, evtcsm, evtprd); - writel(evtcsm, ®s->EvtCsm); - } - - /* - * This has to go last in the interrupt handler and run with - * the spin lock released ... what lock? - */ - if (netif_running(dev)) { - int cur_size; - int run_bh_work = 0; - - cur_size = atomic_read(&ap->cur_rx_bufs); - if (cur_size < RX_LOW_STD_THRES) { - if ((cur_size < RX_PANIC_STD_THRES) && - !test_and_set_bit(0, &ap->std_refill_busy)) { -#ifdef DEBUG - printk("low on std buffers %i\n", cur_size); -#endif - ace_load_std_rx_ring(dev, - RX_RING_SIZE - cur_size); - } else - run_bh_work = 1; - } - - if (!ACE_IS_TIGON_I(ap)) { - cur_size = atomic_read(&ap->cur_mini_bufs); - if (cur_size < RX_LOW_MINI_THRES) { - if ((cur_size < RX_PANIC_MINI_THRES) && - !test_and_set_bit(0, - &ap->mini_refill_busy)) { -#ifdef DEBUG - printk("low on mini buffers %i\n", - cur_size); -#endif - ace_load_mini_rx_ring(dev, - RX_MINI_SIZE - cur_size); - } else - run_bh_work = 1; - } - } - - if (ap->jumbo) { - cur_size = atomic_read(&ap->cur_jumbo_bufs); - if (cur_size < RX_LOW_JUMBO_THRES) { - if ((cur_size < RX_PANIC_JUMBO_THRES) && - !test_and_set_bit(0, - &ap->jumbo_refill_busy)){ -#ifdef DEBUG - printk("low on jumbo buffers %i\n", - cur_size); -#endif - ace_load_jumbo_rx_ring(dev, - RX_JUMBO_SIZE - cur_size); - } else - run_bh_work = 1; - } - } - if (run_bh_work && !ap->bh_work_pending) { - ap->bh_work_pending = 1; - queue_work(system_bh_wq, &ap->ace_bh_work); - } - } - - return IRQ_HANDLED; -} - -static int ace_open(struct net_device *dev) -{ - struct ace_private *ap = netdev_priv(dev); - struct ace_regs __iomem *regs = ap->regs; - struct cmd cmd; - - if (!(ap->fw_running)) { - printk(KERN_WARNING "%s: Firmware not running!\n", dev->name); - return -EBUSY; - } - - writel(dev->mtu + ETH_HLEN + 4, ®s->IfMtu); - - cmd.evt = C_CLEAR_STATS; - cmd.code = 0; - cmd.idx = 0; - ace_issue_cmd(regs, &cmd); - - cmd.evt = C_HOST_STATE; - cmd.code = C_C_STACK_UP; - cmd.idx = 0; - ace_issue_cmd(regs, &cmd); - - if (ap->jumbo && - !test_and_set_bit(0, &ap->jumbo_refill_busy)) - ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE); - - if (dev->flags & IFF_PROMISC) { - cmd.evt = C_SET_PROMISC_MODE; - cmd.code = C_C_PROMISC_ENABLE; - cmd.idx = 0; - ace_issue_cmd(regs, &cmd); - - ap->promisc = 1; - }else - ap->promisc = 0; - ap->mcast_all = 0; - -#if 0 - cmd.evt = C_LNK_NEGOTIATION; - cmd.code = 0; - cmd.idx = 0; - ace_issue_cmd(regs, &cmd); -#endif - - netif_start_queue(dev); - - /* - * Setup the bottom half rx ring refill handler - */ - INIT_WORK(&ap->ace_bh_work, ace_bh_work); - return 0; -} - - -static int ace_close(struct net_device *dev) -{ - struct ace_private *ap = netdev_priv(dev); - struct ace_regs __iomem *regs = ap->regs; - struct cmd cmd; - unsigned long flags; - short i; - - /* - * Without (or before) releasing irq and stopping hardware, this - * is an absolute non-sense, by the way. It will be reset instantly - * by the first irq. - */ - netif_stop_queue(dev); - - - if (ap->promisc) { - cmd.evt = C_SET_PROMISC_MODE; - cmd.code = C_C_PROMISC_DISABLE; - cmd.idx = 0; - ace_issue_cmd(regs, &cmd); - ap->promisc = 0; - } - - cmd.evt = C_HOST_STATE; - cmd.code = C_C_STACK_DOWN; - cmd.idx = 0; - ace_issue_cmd(regs, &cmd); - - cancel_work_sync(&ap->ace_bh_work); - - /* - * Make sure one CPU is not processing packets while - * buffers are being released by another. - */ - - local_irq_save(flags); - ace_mask_irq(dev); - - for (i = 0; i < ACE_TX_RING_ENTRIES(ap); i++) { - struct sk_buff *skb; - struct tx_ring_info *info; - - info = ap->skb->tx_skbuff + i; - skb = info->skb; - - if (dma_unmap_len(info, maplen)) { - if (ACE_IS_TIGON_I(ap)) { - /* NB: TIGON_1 is special, tx_ring is in io space */ - struct tx_desc __iomem *tx; - tx = (__force struct tx_desc __iomem *) &ap->tx_ring[i]; - writel(0, &tx->addr.addrhi); - writel(0, &tx->addr.addrlo); - writel(0, &tx->flagsize); - } else - memset(ap->tx_ring + i, 0, - sizeof(struct tx_desc)); - dma_unmap_page(&ap->pdev->dev, - dma_unmap_addr(info, mapping), - dma_unmap_len(info, maplen), - DMA_TO_DEVICE); - dma_unmap_len_set(info, maplen, 0); - } - if (skb) { - dev_kfree_skb(skb); - info->skb = NULL; - } - } - - if (ap->jumbo) { - cmd.evt = C_RESET_JUMBO_RNG; - cmd.code = 0; - cmd.idx = 0; - ace_issue_cmd(regs, &cmd); - } - - ace_unmask_irq(dev); - local_irq_restore(flags); - - return 0; -} - - -static inline dma_addr_t -ace_map_tx_skb(struct ace_private *ap, struct sk_buff *skb, - struct sk_buff *tail, u32 idx) -{ - dma_addr_t mapping; - struct tx_ring_info *info; - - mapping = dma_map_page(&ap->pdev->dev, virt_to_page(skb->data), - offset_in_page(skb->data), skb->len, - DMA_TO_DEVICE); - - info = ap->skb->tx_skbuff + idx; - info->skb = tail; - dma_unmap_addr_set(info, mapping, mapping); - dma_unmap_len_set(info, maplen, skb->len); - return mapping; -} - - -static inline void -ace_load_tx_bd(struct ace_private *ap, struct tx_desc *desc, u64 addr, - u32 flagsize, u32 vlan_tag) -{ -#if !USE_TX_COAL_NOW - flagsize &= ~BD_FLG_COAL_NOW; -#endif - - if (ACE_IS_TIGON_I(ap)) { - struct tx_desc __iomem *io = (__force struct tx_desc __iomem *) desc; - writel(addr >> 32, &io->addr.addrhi); - writel(addr & 0xffffffff, &io->addr.addrlo); - writel(flagsize, &io->flagsize); - writel(vlan_tag, &io->vlanres); - } else { - desc->addr.addrhi = addr >> 32; - desc->addr.addrlo = addr; - desc->flagsize = flagsize; - desc->vlanres = vlan_tag; - } -} - - -static netdev_tx_t ace_start_xmit(struct sk_buff *skb, - struct net_device *dev) -{ - struct ace_private *ap = netdev_priv(dev); - struct ace_regs __iomem *regs = ap->regs; - struct tx_desc *desc; - u32 idx, flagsize; - unsigned long maxjiff = jiffies + 3*HZ; - -restart: - idx = ap->tx_prd; - - if (tx_ring_full(ap, ap->tx_ret_csm, idx)) - goto overflow; - - if (!skb_shinfo(skb)->nr_frags) { - dma_addr_t mapping; - u32 vlan_tag = 0; - - mapping = ace_map_tx_skb(ap, skb, skb, idx); - flagsize = (skb->len << 16) | (BD_FLG_END); - if (skb->ip_summed == CHECKSUM_PARTIAL) - flagsize |= BD_FLG_TCP_UDP_SUM; - if (skb_vlan_tag_present(skb)) { - flagsize |= BD_FLG_VLAN_TAG; - vlan_tag = skb_vlan_tag_get(skb); - } - desc = ap->tx_ring + idx; - idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap); - - /* Look at ace_tx_int for explanations. */ - if (tx_ring_full(ap, ap->tx_ret_csm, idx)) - flagsize |= BD_FLG_COAL_NOW; - - ace_load_tx_bd(ap, desc, mapping, flagsize, vlan_tag); - } else { - dma_addr_t mapping; - u32 vlan_tag = 0; - int i; - - mapping = ace_map_tx_skb(ap, skb, NULL, idx); - flagsize = (skb_headlen(skb) << 16); - if (skb->ip_summed == CHECKSUM_PARTIAL) - flagsize |= BD_FLG_TCP_UDP_SUM; - if (skb_vlan_tag_present(skb)) { - flagsize |= BD_FLG_VLAN_TAG; - vlan_tag = skb_vlan_tag_get(skb); - } - - ace_load_tx_bd(ap, ap->tx_ring + idx, mapping, flagsize, vlan_tag); - - idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap); - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct tx_ring_info *info; - - info = ap->skb->tx_skbuff + idx; - desc = ap->tx_ring + idx; - - mapping = skb_frag_dma_map(&ap->pdev->dev, frag, 0, - skb_frag_size(frag), - DMA_TO_DEVICE); - - flagsize = skb_frag_size(frag) << 16; - if (skb->ip_summed == CHECKSUM_PARTIAL) - flagsize |= BD_FLG_TCP_UDP_SUM; - idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap); - - if (i == skb_shinfo(skb)->nr_frags - 1) { - flagsize |= BD_FLG_END; - if (tx_ring_full(ap, ap->tx_ret_csm, idx)) - flagsize |= BD_FLG_COAL_NOW; - - /* - * Only the last fragment frees - * the skb! - */ - info->skb = skb; - } else { - info->skb = NULL; - } - dma_unmap_addr_set(info, mapping, mapping); - dma_unmap_len_set(info, maplen, skb_frag_size(frag)); - ace_load_tx_bd(ap, desc, mapping, flagsize, vlan_tag); - } - } - - wmb(); - ap->tx_prd = idx; - ace_set_txprd(regs, ap, idx); - - if (flagsize & BD_FLG_COAL_NOW) { - netif_stop_queue(dev); - - /* - * A TX-descriptor producer (an IRQ) might have gotten - * between, making the ring free again. Since xmit is - * serialized, this is the only situation we have to - * re-test. - */ - if (!tx_ring_full(ap, ap->tx_ret_csm, idx)) - netif_wake_queue(dev); - } - - return NETDEV_TX_OK; - -overflow: - /* - * This race condition is unavoidable with lock-free drivers. - * We wake up the queue _before_ tx_prd is advanced, so that we can - * enter hard_start_xmit too early, while tx ring still looks closed. - * This happens ~1-4 times per 100000 packets, so that we can allow - * to loop syncing to other CPU. Probably, we need an additional - * wmb() in ace_tx_intr as well. - * - * Note that this race is relieved by reserving one more entry - * in tx ring than it is necessary (see original non-SG driver). - * However, with SG we need to reserve 2*MAX_SKB_FRAGS+1, which - * is already overkill. - * - * Alternative is to return with 1 not throttling queue. In this - * case loop becomes longer, no more useful effects. - */ - if (time_before(jiffies, maxjiff)) { - barrier(); - cpu_relax(); - goto restart; - } - - /* The ring is stuck full. */ - printk(KERN_WARNING "%s: Transmit ring stuck full\n", dev->name); - return NETDEV_TX_BUSY; -} - - -static int ace_change_mtu(struct net_device *dev, int new_mtu) -{ - struct ace_private *ap = netdev_priv(dev); - struct ace_regs __iomem *regs = ap->regs; - - writel(new_mtu + ETH_HLEN + 4, ®s->IfMtu); - WRITE_ONCE(dev->mtu, new_mtu); - - if (new_mtu > ACE_STD_MTU) { - if (!(ap->jumbo)) { - printk(KERN_INFO "%s: Enabling Jumbo frame " - "support\n", dev->name); - ap->jumbo = 1; - if (!test_and_set_bit(0, &ap->jumbo_refill_busy)) - ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE); - ace_set_rxtx_parms(dev, 1); - } - } else { - while (test_and_set_bit(0, &ap->jumbo_refill_busy)); - ace_sync_irq(dev->irq); - ace_set_rxtx_parms(dev, 0); - if (ap->jumbo) { - struct cmd cmd; - - cmd.evt = C_RESET_JUMBO_RNG; - cmd.code = 0; - cmd.idx = 0; - ace_issue_cmd(regs, &cmd); - } - } - - return 0; -} - -static int ace_get_link_ksettings(struct net_device *dev, - struct ethtool_link_ksettings *cmd) -{ - struct ace_private *ap = netdev_priv(dev); - struct ace_regs __iomem *regs = ap->regs; - u32 link; - u32 supported; - - memset(cmd, 0, sizeof(struct ethtool_link_ksettings)); - - supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | - SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | - SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full | - SUPPORTED_Autoneg | SUPPORTED_FIBRE); - - cmd->base.port = PORT_FIBRE; - - link = readl(®s->GigLnkState); - if (link & LNK_1000MB) { - cmd->base.speed = SPEED_1000; - } else { - link = readl(®s->FastLnkState); - if (link & LNK_100MB) - cmd->base.speed = SPEED_100; - else if (link & LNK_10MB) - cmd->base.speed = SPEED_10; - else - cmd->base.speed = 0; - } - if (link & LNK_FULL_DUPLEX) - cmd->base.duplex = DUPLEX_FULL; - else - cmd->base.duplex = DUPLEX_HALF; - - if (link & LNK_NEGOTIATE) - cmd->base.autoneg = AUTONEG_ENABLE; - else - cmd->base.autoneg = AUTONEG_DISABLE; - -#if 0 - /* - * Current struct ethtool_cmd is insufficient - */ - ecmd->trace = readl(®s->TuneTrace); - - ecmd->txcoal = readl(®s->TuneTxCoalTicks); - ecmd->rxcoal = readl(®s->TuneRxCoalTicks); -#endif - - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, - supported); - - return 0; -} - -static int ace_set_link_ksettings(struct net_device *dev, - const struct ethtool_link_ksettings *cmd) -{ - struct ace_private *ap = netdev_priv(dev); - struct ace_regs __iomem *regs = ap->regs; - u32 link, speed; - - link = readl(®s->GigLnkState); - if (link & LNK_1000MB) - speed = SPEED_1000; - else { - link = readl(®s->FastLnkState); - if (link & LNK_100MB) - speed = SPEED_100; - else if (link & LNK_10MB) - speed = SPEED_10; - else - speed = SPEED_100; - } - - link = LNK_ENABLE | LNK_1000MB | LNK_100MB | LNK_10MB | - LNK_RX_FLOW_CTL_Y | LNK_NEG_FCTL; - if (!ACE_IS_TIGON_I(ap)) - link |= LNK_TX_FLOW_CTL_Y; - if (cmd->base.autoneg == AUTONEG_ENABLE) - link |= LNK_NEGOTIATE; - if (cmd->base.speed != speed) { - link &= ~(LNK_1000MB | LNK_100MB | LNK_10MB); - switch (cmd->base.speed) { - case SPEED_1000: - link |= LNK_1000MB; - break; - case SPEED_100: - link |= LNK_100MB; - break; - case SPEED_10: - link |= LNK_10MB; - break; - } - } - - if (cmd->base.duplex == DUPLEX_FULL) - link |= LNK_FULL_DUPLEX; - - if (link != ap->link) { - struct cmd cmd; - printk(KERN_INFO "%s: Renegotiating link state\n", - dev->name); - - ap->link = link; - writel(link, ®s->TuneLink); - if (!ACE_IS_TIGON_I(ap)) - writel(link, ®s->TuneFastLink); - wmb(); - - cmd.evt = C_LNK_NEGOTIATION; - cmd.code = 0; - cmd.idx = 0; - ace_issue_cmd(regs, &cmd); - } - return 0; -} - -static void ace_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *info) -{ - struct ace_private *ap = netdev_priv(dev); - - strscpy(info->driver, "acenic", sizeof(info->driver)); - snprintf(info->fw_version, sizeof(info->version), "%i.%i.%i", - ap->firmware_major, ap->firmware_minor, ap->firmware_fix); - - if (ap->pdev) - strscpy(info->bus_info, pci_name(ap->pdev), - sizeof(info->bus_info)); - -} - -/* - * Set the hardware MAC address. - */ -static int ace_set_mac_addr(struct net_device *dev, void *p) -{ - struct ace_private *ap = netdev_priv(dev); - struct ace_regs __iomem *regs = ap->regs; - struct sockaddr *addr=p; - const u8 *da; - struct cmd cmd; - - if(netif_running(dev)) - return -EBUSY; - - eth_hw_addr_set(dev, addr->sa_data); - - da = (const u8 *)dev->dev_addr; - - writel(da[0] << 8 | da[1], ®s->MacAddrHi); - writel((da[2] << 24) | (da[3] << 16) | (da[4] << 8) | da[5], - ®s->MacAddrLo); - - cmd.evt = C_SET_MAC_ADDR; - cmd.code = 0; - cmd.idx = 0; - ace_issue_cmd(regs, &cmd); - - return 0; -} - - -static void ace_set_multicast_list(struct net_device *dev) -{ - struct ace_private *ap = netdev_priv(dev); - struct ace_regs __iomem *regs = ap->regs; - struct cmd cmd; - - if ((dev->flags & IFF_ALLMULTI) && !(ap->mcast_all)) { - cmd.evt = C_SET_MULTICAST_MODE; - cmd.code = C_C_MCAST_ENABLE; - cmd.idx = 0; - ace_issue_cmd(regs, &cmd); - ap->mcast_all = 1; - } else if (ap->mcast_all) { - cmd.evt = C_SET_MULTICAST_MODE; - cmd.code = C_C_MCAST_DISABLE; - cmd.idx = 0; - ace_issue_cmd(regs, &cmd); - ap->mcast_all = 0; - } - - if ((dev->flags & IFF_PROMISC) && !(ap->promisc)) { - cmd.evt = C_SET_PROMISC_MODE; - cmd.code = C_C_PROMISC_ENABLE; - cmd.idx = 0; - ace_issue_cmd(regs, &cmd); - ap->promisc = 1; - }else if (!(dev->flags & IFF_PROMISC) && (ap->promisc)) { - cmd.evt = C_SET_PROMISC_MODE; - cmd.code = C_C_PROMISC_DISABLE; - cmd.idx = 0; - ace_issue_cmd(regs, &cmd); - ap->promisc = 0; - } - - /* - * For the time being multicast relies on the upper layers - * filtering it properly. The Firmware does not allow one to - * set the entire multicast list at a time and keeping track of - * it here is going to be messy. - */ - if (!netdev_mc_empty(dev) && !ap->mcast_all) { - cmd.evt = C_SET_MULTICAST_MODE; - cmd.code = C_C_MCAST_ENABLE; - cmd.idx = 0; - ace_issue_cmd(regs, &cmd); - }else if (!ap->mcast_all) { - cmd.evt = C_SET_MULTICAST_MODE; - cmd.code = C_C_MCAST_DISABLE; - cmd.idx = 0; - ace_issue_cmd(regs, &cmd); - } -} - - -static struct net_device_stats *ace_get_stats(struct net_device *dev) -{ - struct ace_private *ap = netdev_priv(dev); - struct ace_mac_stats __iomem *mac_stats = - (struct ace_mac_stats __iomem *)ap->regs->Stats; - - dev->stats.rx_missed_errors = readl(&mac_stats->drop_space); - dev->stats.multicast = readl(&mac_stats->kept_mc); - dev->stats.collisions = readl(&mac_stats->coll); - - return &dev->stats; -} - - -static void ace_copy(struct ace_regs __iomem *regs, const __be32 *src, - u32 dest, int size) -{ - void __iomem *tdest; - short tsize, i; - - if (size <= 0) - return; - - while (size > 0) { - tsize = min_t(u32, ((~dest & (ACE_WINDOW_SIZE - 1)) + 1), - min_t(u32, size, ACE_WINDOW_SIZE)); - tdest = (void __iomem *) ®s->Window + - (dest & (ACE_WINDOW_SIZE - 1)); - writel(dest & ~(ACE_WINDOW_SIZE - 1), ®s->WinBase); - for (i = 0; i < (tsize / 4); i++) { - /* Firmware is big-endian */ - writel(be32_to_cpup(src), tdest); - src++; - tdest += 4; - dest += 4; - size -= 4; - } - } -} - - -static void ace_clear(struct ace_regs __iomem *regs, u32 dest, int size) -{ - void __iomem *tdest; - short tsize = 0, i; - - if (size <= 0) - return; - - while (size > 0) { - tsize = min_t(u32, ((~dest & (ACE_WINDOW_SIZE - 1)) + 1), - min_t(u32, size, ACE_WINDOW_SIZE)); - tdest = (void __iomem *) ®s->Window + - (dest & (ACE_WINDOW_SIZE - 1)); - writel(dest & ~(ACE_WINDOW_SIZE - 1), ®s->WinBase); - - for (i = 0; i < (tsize / 4); i++) { - writel(0, tdest + i*4); - } - - dest += tsize; - size -= tsize; - } -} - - -/* - * Download the firmware into the SRAM on the NIC - * - * This operation requires the NIC to be halted and is performed with - * interrupts disabled and with the spinlock hold. - */ -static int ace_load_firmware(struct net_device *dev) -{ - const struct firmware *fw; - const char *fw_name = "acenic/tg2.bin"; - struct ace_private *ap = netdev_priv(dev); - struct ace_regs __iomem *regs = ap->regs; - const __be32 *fw_data; - u32 load_addr; - int ret; - - if (!(readl(®s->CpuCtrl) & CPU_HALTED)) { - printk(KERN_ERR "%s: trying to download firmware while the " - "CPU is running!\n", ap->name); - return -EFAULT; - } - - if (ACE_IS_TIGON_I(ap)) - fw_name = "acenic/tg1.bin"; - - ret = request_firmware(&fw, fw_name, &ap->pdev->dev); - if (ret) { - printk(KERN_ERR "%s: Failed to load firmware \"%s\"\n", - ap->name, fw_name); - return ret; - } - - fw_data = (void *)fw->data; - - /* Firmware blob starts with version numbers, followed by - load and start address. Remainder is the blob to be loaded - contiguously from load address. We don't bother to represent - the BSS/SBSS sections any more, since we were clearing the - whole thing anyway. */ - ap->firmware_major = fw->data[0]; - ap->firmware_minor = fw->data[1]; - ap->firmware_fix = fw->data[2]; - - ap->firmware_start = be32_to_cpu(fw_data[1]); - if (ap->firmware_start < 0x4000 || ap->firmware_start >= 0x80000) { - printk(KERN_ERR "%s: bogus load address %08x in \"%s\"\n", - ap->name, ap->firmware_start, fw_name); - ret = -EINVAL; - goto out; - } - - load_addr = be32_to_cpu(fw_data[2]); - if (load_addr < 0x4000 || load_addr >= 0x80000) { - printk(KERN_ERR "%s: bogus load address %08x in \"%s\"\n", - ap->name, load_addr, fw_name); - ret = -EINVAL; - goto out; - } - - /* - * Do not try to clear more than 512KiB or we end up seeing - * funny things on NICs with only 512KiB SRAM - */ - ace_clear(regs, 0x2000, 0x80000-0x2000); - ace_copy(regs, &fw_data[3], load_addr, fw->size-12); - out: - release_firmware(fw); - return ret; -} - - -/* - * The eeprom on the AceNIC is an Atmel i2c EEPROM. - * - * Accessing the EEPROM is `interesting' to say the least - don't read - * this code right after dinner. - * - * This is all about black magic and bit-banging the device .... I - * wonder in what hospital they have put the guy who designed the i2c - * specs. - * - * Oh yes, this is only the beginning! - * - * Thanks to Stevarino Webinski for helping tracking down the bugs in the - * code i2c readout code by beta testing all my hacks. - */ -static void eeprom_start(struct ace_regs __iomem *regs) -{ - u32 local; - - readl(®s->LocalCtrl); - udelay(ACE_SHORT_DELAY); - local = readl(®s->LocalCtrl); - local |= EEPROM_DATA_OUT | EEPROM_WRITE_ENABLE; - writel(local, ®s->LocalCtrl); - readl(®s->LocalCtrl); - mb(); - udelay(ACE_SHORT_DELAY); - local |= EEPROM_CLK_OUT; - writel(local, ®s->LocalCtrl); - readl(®s->LocalCtrl); - mb(); - udelay(ACE_SHORT_DELAY); - local &= ~EEPROM_DATA_OUT; - writel(local, ®s->LocalCtrl); - readl(®s->LocalCtrl); - mb(); - udelay(ACE_SHORT_DELAY); - local &= ~EEPROM_CLK_OUT; - writel(local, ®s->LocalCtrl); - readl(®s->LocalCtrl); - mb(); -} - - -static void eeprom_prep(struct ace_regs __iomem *regs, u8 magic) -{ - short i; - u32 local; - - udelay(ACE_SHORT_DELAY); - local = readl(®s->LocalCtrl); - local &= ~EEPROM_DATA_OUT; - local |= EEPROM_WRITE_ENABLE; - writel(local, ®s->LocalCtrl); - readl(®s->LocalCtrl); - mb(); - - for (i = 0; i < 8; i++, magic <<= 1) { - udelay(ACE_SHORT_DELAY); - if (magic & 0x80) - local |= EEPROM_DATA_OUT; - else - local &= ~EEPROM_DATA_OUT; - writel(local, ®s->LocalCtrl); - readl(®s->LocalCtrl); - mb(); - - udelay(ACE_SHORT_DELAY); - local |= EEPROM_CLK_OUT; - writel(local, ®s->LocalCtrl); - readl(®s->LocalCtrl); - mb(); - udelay(ACE_SHORT_DELAY); - local &= ~(EEPROM_CLK_OUT | EEPROM_DATA_OUT); - writel(local, ®s->LocalCtrl); - readl(®s->LocalCtrl); - mb(); - } -} - - -static int eeprom_check_ack(struct ace_regs __iomem *regs) -{ - int state; - u32 local; - - local = readl(®s->LocalCtrl); - local &= ~EEPROM_WRITE_ENABLE; - writel(local, ®s->LocalCtrl); - readl(®s->LocalCtrl); - mb(); - udelay(ACE_LONG_DELAY); - local |= EEPROM_CLK_OUT; - writel(local, ®s->LocalCtrl); - readl(®s->LocalCtrl); - mb(); - udelay(ACE_SHORT_DELAY); - /* sample data in middle of high clk */ - state = (readl(®s->LocalCtrl) & EEPROM_DATA_IN) != 0; - udelay(ACE_SHORT_DELAY); - mb(); - writel(readl(®s->LocalCtrl) & ~EEPROM_CLK_OUT, ®s->LocalCtrl); - readl(®s->LocalCtrl); - mb(); - - return state; -} - - -static void eeprom_stop(struct ace_regs __iomem *regs) -{ - u32 local; - - udelay(ACE_SHORT_DELAY); - local = readl(®s->LocalCtrl); - local |= EEPROM_WRITE_ENABLE; - writel(local, ®s->LocalCtrl); - readl(®s->LocalCtrl); - mb(); - udelay(ACE_SHORT_DELAY); - local &= ~EEPROM_DATA_OUT; - writel(local, ®s->LocalCtrl); - readl(®s->LocalCtrl); - mb(); - udelay(ACE_SHORT_DELAY); - local |= EEPROM_CLK_OUT; - writel(local, ®s->LocalCtrl); - readl(®s->LocalCtrl); - mb(); - udelay(ACE_SHORT_DELAY); - local |= EEPROM_DATA_OUT; - writel(local, ®s->LocalCtrl); - readl(®s->LocalCtrl); - mb(); - udelay(ACE_LONG_DELAY); - local &= ~EEPROM_CLK_OUT; - writel(local, ®s->LocalCtrl); - mb(); -} - - -/* - * Read a whole byte from the EEPROM. - */ -static int read_eeprom_byte(struct net_device *dev, unsigned long offset) -{ - struct ace_private *ap = netdev_priv(dev); - struct ace_regs __iomem *regs = ap->regs; - unsigned long flags; - u32 local; - int result = 0; - short i; - - /* - * Don't take interrupts on this CPU will bit banging - * the %#%#@$ I2C device - */ - local_irq_save(flags); - - eeprom_start(regs); - - eeprom_prep(regs, EEPROM_WRITE_SELECT); - if (eeprom_check_ack(regs)) { - local_irq_restore(flags); - printk(KERN_ERR "%s: Unable to sync eeprom\n", ap->name); - result = -EIO; - goto eeprom_read_error; - } - - eeprom_prep(regs, (offset >> 8) & 0xff); - if (eeprom_check_ack(regs)) { - local_irq_restore(flags); - printk(KERN_ERR "%s: Unable to set address byte 0\n", - ap->name); - result = -EIO; - goto eeprom_read_error; - } - - eeprom_prep(regs, offset & 0xff); - if (eeprom_check_ack(regs)) { - local_irq_restore(flags); - printk(KERN_ERR "%s: Unable to set address byte 1\n", - ap->name); - result = -EIO; - goto eeprom_read_error; - } - - eeprom_start(regs); - eeprom_prep(regs, EEPROM_READ_SELECT); - if (eeprom_check_ack(regs)) { - local_irq_restore(flags); - printk(KERN_ERR "%s: Unable to set READ_SELECT\n", - ap->name); - result = -EIO; - goto eeprom_read_error; - } - - for (i = 0; i < 8; i++) { - local = readl(®s->LocalCtrl); - local &= ~EEPROM_WRITE_ENABLE; - writel(local, ®s->LocalCtrl); - readl(®s->LocalCtrl); - udelay(ACE_LONG_DELAY); - mb(); - local |= EEPROM_CLK_OUT; - writel(local, ®s->LocalCtrl); - readl(®s->LocalCtrl); - mb(); - udelay(ACE_SHORT_DELAY); - /* sample data mid high clk */ - result = (result << 1) | - ((readl(®s->LocalCtrl) & EEPROM_DATA_IN) != 0); - udelay(ACE_SHORT_DELAY); - mb(); - local = readl(®s->LocalCtrl); - local &= ~EEPROM_CLK_OUT; - writel(local, ®s->LocalCtrl); - readl(®s->LocalCtrl); - udelay(ACE_SHORT_DELAY); - mb(); - if (i == 7) { - local |= EEPROM_WRITE_ENABLE; - writel(local, ®s->LocalCtrl); - readl(®s->LocalCtrl); - mb(); - udelay(ACE_SHORT_DELAY); - } - } - - local |= EEPROM_DATA_OUT; - writel(local, ®s->LocalCtrl); - readl(®s->LocalCtrl); - mb(); - udelay(ACE_SHORT_DELAY); - writel(readl(®s->LocalCtrl) | EEPROM_CLK_OUT, ®s->LocalCtrl); - readl(®s->LocalCtrl); - udelay(ACE_LONG_DELAY); - writel(readl(®s->LocalCtrl) & ~EEPROM_CLK_OUT, ®s->LocalCtrl); - readl(®s->LocalCtrl); - mb(); - udelay(ACE_SHORT_DELAY); - eeprom_stop(regs); - - local_irq_restore(flags); - out: - return result; - - eeprom_read_error: - printk(KERN_ERR "%s: Unable to read eeprom byte 0x%02lx\n", - ap->name, offset); - goto out; -} - -module_pci_driver(acenic_pci_driver); diff --git a/drivers/net/ethernet/alteon/acenic.h b/drivers/net/ethernet/alteon/acenic.h deleted file mode 100644 index 0e45a97b9c9b..000000000000 --- a/drivers/net/ethernet/alteon/acenic.h +++ /dev/null @@ -1,791 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ACENIC_H_ -#define _ACENIC_H_ -#include -#include - -/* - * Generate TX index update each time, when TX ring is closed. - * Normally, this is not useful, because results in more dma (and irqs - * without TX_COAL_INTS_ONLY). - */ -#define USE_TX_COAL_NOW 0 - -/* - * Addressing: - * - * The Tigon uses 64-bit host addresses, regardless of their actual - * length, and it expects a big-endian format. For 32 bit systems the - * upper 32 bits of the address are simply ignored (zero), however for - * little endian 64 bit systems (Alpha) this looks strange with the - * two parts of the address word being swapped. - * - * The addresses are split in two 32 bit words for all architectures - * as some of them are in PCI shared memory and it is necessary to use - * readl/writel to access them. - * - * The addressing code is derived from Pete Wyckoff's work, but - * modified to deal properly with readl/writel usage. - */ - -struct ace_regs { - u32 pad0[16]; /* PCI control registers */ - - u32 HostCtrl; /* 0x40 */ - u32 LocalCtrl; - - u32 pad1[2]; - - u32 MiscCfg; /* 0x50 */ - - u32 pad2[2]; - - u32 PciState; - - u32 pad3[2]; /* 0x60 */ - - u32 WinBase; - u32 WinData; - - u32 pad4[12]; /* 0x70 */ - - u32 DmaWriteState; /* 0xa0 */ - u32 pad5[3]; - u32 DmaReadState; /* 0xb0 */ - - u32 pad6[26]; - - u32 AssistState; - - u32 pad7[8]; /* 0x120 */ - - u32 CpuCtrl; /* 0x140 */ - u32 Pc; - - u32 pad8[3]; - - u32 SramAddr; /* 0x154 */ - u32 SramData; - - u32 pad9[49]; - - u32 MacRxState; /* 0x220 */ - - u32 pad10[7]; - - u32 CpuBCtrl; /* 0x240 */ - u32 PcB; - - u32 pad11[3]; - - u32 SramBAddr; /* 0x254 */ - u32 SramBData; - - u32 pad12[105]; - - u32 pad13[32]; /* 0x400 */ - u32 Stats[32]; - - u32 Mb0Hi; /* 0x500 */ - u32 Mb0Lo; - u32 Mb1Hi; - u32 CmdPrd; - u32 Mb2Hi; - u32 TxPrd; - u32 Mb3Hi; - u32 RxStdPrd; - u32 Mb4Hi; - u32 RxJumboPrd; - u32 Mb5Hi; - u32 RxMiniPrd; - u32 Mb6Hi; - u32 Mb6Lo; - u32 Mb7Hi; - u32 Mb7Lo; - u32 Mb8Hi; - u32 Mb8Lo; - u32 Mb9Hi; - u32 Mb9Lo; - u32 MbAHi; - u32 MbALo; - u32 MbBHi; - u32 MbBLo; - u32 MbCHi; - u32 MbCLo; - u32 MbDHi; - u32 MbDLo; - u32 MbEHi; - u32 MbELo; - u32 MbFHi; - u32 MbFLo; - - u32 pad14[32]; - - u32 MacAddrHi; /* 0x600 */ - u32 MacAddrLo; - u32 InfoPtrHi; - u32 InfoPtrLo; - u32 MultiCastHi; /* 0x610 */ - u32 MultiCastLo; - u32 ModeStat; - u32 DmaReadCfg; - u32 DmaWriteCfg; /* 0x620 */ - u32 TxBufRat; - u32 EvtCsm; - u32 CmdCsm; - u32 TuneRxCoalTicks;/* 0x630 */ - u32 TuneTxCoalTicks; - u32 TuneStatTicks; - u32 TuneMaxTxDesc; - u32 TuneMaxRxDesc; /* 0x640 */ - u32 TuneTrace; - u32 TuneLink; - u32 TuneFastLink; - u32 TracePtr; /* 0x650 */ - u32 TraceStrt; - u32 TraceLen; - u32 IfIdx; - u32 IfMtu; /* 0x660 */ - u32 MaskInt; - u32 GigLnkState; - u32 FastLnkState; - u32 pad16[4]; /* 0x670 */ - u32 RxRetCsm; /* 0x680 */ - - u32 pad17[31]; - - u32 CmdRng[64]; /* 0x700 */ - u32 Window[0x200]; -}; - - -typedef struct { - u32 addrhi; - u32 addrlo; -} aceaddr; - - -#define ACE_WINDOW_SIZE 0x800 - -#define ACE_JUMBO_MTU 9000 -#define ACE_STD_MTU 1500 - -#define ACE_TRACE_SIZE 0x8000 - -/* - * Host control register bits. - */ - -#define IN_INT 0x01 -#define CLR_INT 0x02 -#define HW_RESET 0x08 -#define BYTE_SWAP 0x10 -#define WORD_SWAP 0x20 -#define MASK_INTS 0x40 - -/* - * Local control register bits. - */ - -#define EEPROM_DATA_IN 0x800000 -#define EEPROM_DATA_OUT 0x400000 -#define EEPROM_WRITE_ENABLE 0x200000 -#define EEPROM_CLK_OUT 0x100000 - -#define EEPROM_BASE 0xa0000000 - -#define EEPROM_WRITE_SELECT 0xa0 -#define EEPROM_READ_SELECT 0xa1 - -#define SRAM_BANK_512K 0x200 - - -/* - * udelay() values for when clocking the eeprom - */ -#define ACE_SHORT_DELAY 2 -#define ACE_LONG_DELAY 4 - - -/* - * Misc Config bits - */ - -#define SYNC_SRAM_TIMING 0x100000 - - -/* - * CPU state bits. - */ - -#define CPU_RESET 0x01 -#define CPU_TRACE 0x02 -#define CPU_PROM_FAILED 0x10 -#define CPU_HALT 0x00010000 -#define CPU_HALTED 0xffff0000 - - -/* - * PCI State bits. - */ - -#define DMA_READ_MAX_4 0x04 -#define DMA_READ_MAX_16 0x08 -#define DMA_READ_MAX_32 0x0c -#define DMA_READ_MAX_64 0x10 -#define DMA_READ_MAX_128 0x14 -#define DMA_READ_MAX_256 0x18 -#define DMA_READ_MAX_1K 0x1c -#define DMA_WRITE_MAX_4 0x20 -#define DMA_WRITE_MAX_16 0x40 -#define DMA_WRITE_MAX_32 0x60 -#define DMA_WRITE_MAX_64 0x80 -#define DMA_WRITE_MAX_128 0xa0 -#define DMA_WRITE_MAX_256 0xc0 -#define DMA_WRITE_MAX_1K 0xe0 -#define DMA_READ_WRITE_MASK 0xfc -#define MEM_READ_MULTIPLE 0x00020000 -#define PCI_66MHZ 0x00080000 -#define PCI_32BIT 0x00100000 -#define DMA_WRITE_ALL_ALIGN 0x00800000 -#define READ_CMD_MEM 0x06000000 -#define WRITE_CMD_MEM 0x70000000 - - -/* - * Mode status - */ - -#define ACE_BYTE_SWAP_BD 0x02 -#define ACE_WORD_SWAP_BD 0x04 /* not actually used */ -#define ACE_WARN 0x08 -#define ACE_BYTE_SWAP_DMA 0x10 -#define ACE_NO_JUMBO_FRAG 0x200 -#define ACE_FATAL 0x40000000 - - -/* - * DMA config - */ - -#define DMA_THRESH_1W 0x10 -#define DMA_THRESH_2W 0x20 -#define DMA_THRESH_4W 0x40 -#define DMA_THRESH_8W 0x80 -#define DMA_THRESH_16W 0x100 -#define DMA_THRESH_32W 0x0 /* not described in doc, but exists. */ - - -/* - * Tuning parameters - */ - -#define TICKS_PER_SEC 1000000 - - -/* - * Link bits - */ - -#define LNK_PREF 0x00008000 -#define LNK_10MB 0x00010000 -#define LNK_100MB 0x00020000 -#define LNK_1000MB 0x00040000 -#define LNK_FULL_DUPLEX 0x00080000 -#define LNK_HALF_DUPLEX 0x00100000 -#define LNK_TX_FLOW_CTL_Y 0x00200000 -#define LNK_NEG_ADVANCED 0x00400000 -#define LNK_RX_FLOW_CTL_Y 0x00800000 -#define LNK_NIC 0x01000000 -#define LNK_JAM 0x02000000 -#define LNK_JUMBO 0x04000000 -#define LNK_ALTEON 0x08000000 -#define LNK_NEG_FCTL 0x10000000 -#define LNK_NEGOTIATE 0x20000000 -#define LNK_ENABLE 0x40000000 -#define LNK_UP 0x80000000 - - -/* - * Event definitions - */ - -#define EVT_RING_ENTRIES 256 -#define EVT_RING_SIZE (EVT_RING_ENTRIES * sizeof(struct event)) - -struct event { -#ifdef __LITTLE_ENDIAN_BITFIELD - u32 idx:12; - u32 code:12; - u32 evt:8; -#else - u32 evt:8; - u32 code:12; - u32 idx:12; -#endif - u32 pad; -}; - - -/* - * Events - */ - -#define E_FW_RUNNING 0x01 -#define E_STATS_UPDATED 0x04 - -#define E_STATS_UPDATE 0x04 - -#define E_LNK_STATE 0x06 -#define E_C_LINK_UP 0x01 -#define E_C_LINK_DOWN 0x02 -#define E_C_LINK_10_100 0x03 - -#define E_ERROR 0x07 -#define E_C_ERR_INVAL_CMD 0x01 -#define E_C_ERR_UNIMP_CMD 0x02 -#define E_C_ERR_BAD_CFG 0x03 - -#define E_MCAST_LIST 0x08 -#define E_C_MCAST_ADDR_ADD 0x01 -#define E_C_MCAST_ADDR_DEL 0x02 - -#define E_RESET_JUMBO_RNG 0x09 - - -/* - * Commands - */ - -#define CMD_RING_ENTRIES 64 - -struct cmd { -#ifdef __LITTLE_ENDIAN_BITFIELD - u32 idx:12; - u32 code:12; - u32 evt:8; -#else - u32 evt:8; - u32 code:12; - u32 idx:12; -#endif -}; - - -#define C_HOST_STATE 0x01 -#define C_C_STACK_UP 0x01 -#define C_C_STACK_DOWN 0x02 - -#define C_FDR_FILTERING 0x02 -#define C_C_FDR_FILT_ENABLE 0x01 -#define C_C_FDR_FILT_DISABLE 0x02 - -#define C_SET_RX_PRD_IDX 0x03 -#define C_UPDATE_STATS 0x04 -#define C_RESET_JUMBO_RNG 0x05 -#define C_ADD_MULTICAST_ADDR 0x08 -#define C_DEL_MULTICAST_ADDR 0x09 - -#define C_SET_PROMISC_MODE 0x0a -#define C_C_PROMISC_ENABLE 0x01 -#define C_C_PROMISC_DISABLE 0x02 - -#define C_LNK_NEGOTIATION 0x0b -#define C_C_NEGOTIATE_BOTH 0x00 -#define C_C_NEGOTIATE_GIG 0x01 -#define C_C_NEGOTIATE_10_100 0x02 - -#define C_SET_MAC_ADDR 0x0c -#define C_CLEAR_PROFILE 0x0d - -#define C_SET_MULTICAST_MODE 0x0e -#define C_C_MCAST_ENABLE 0x01 -#define C_C_MCAST_DISABLE 0x02 - -#define C_CLEAR_STATS 0x0f -#define C_SET_RX_JUMBO_PRD_IDX 0x10 -#define C_REFRESH_STATS 0x11 - - -/* - * Descriptor flags - */ -#define BD_FLG_TCP_UDP_SUM 0x01 -#define BD_FLG_IP_SUM 0x02 -#define BD_FLG_END 0x04 -#define BD_FLG_MORE 0x08 -#define BD_FLG_JUMBO 0x10 -#define BD_FLG_UCAST 0x20 -#define BD_FLG_MCAST 0x40 -#define BD_FLG_BCAST 0x60 -#define BD_FLG_TYP_MASK 0x60 -#define BD_FLG_IP_FRAG 0x80 -#define BD_FLG_IP_FRAG_END 0x100 -#define BD_FLG_VLAN_TAG 0x200 -#define BD_FLG_FRAME_ERROR 0x400 -#define BD_FLG_COAL_NOW 0x800 -#define BD_FLG_MINI 0x1000 - - -/* - * Ring Control block flags - */ -#define RCB_FLG_TCP_UDP_SUM 0x01 -#define RCB_FLG_IP_SUM 0x02 -#define RCB_FLG_NO_PSEUDO_HDR 0x08 -#define RCB_FLG_VLAN_ASSIST 0x10 -#define RCB_FLG_COAL_INT_ONLY 0x20 -#define RCB_FLG_TX_HOST_RING 0x40 -#define RCB_FLG_IEEE_SNAP_SUM 0x80 -#define RCB_FLG_EXT_RX_BD 0x100 -#define RCB_FLG_RNG_DISABLE 0x200 - - -/* - * TX ring - maximum TX ring entries for Tigon I's is 128 - */ -#define MAX_TX_RING_ENTRIES 256 -#define TIGON_I_TX_RING_ENTRIES 128 -#define TX_RING_SIZE (MAX_TX_RING_ENTRIES * sizeof(struct tx_desc)) -#define TX_RING_BASE 0x3800 - -struct tx_desc{ - aceaddr addr; - u32 flagsize; -#if 0 -/* - * This is in PCI shared mem and must be accessed with readl/writel - * real layout is: - */ -#if __LITTLE_ENDIAN - u16 flags; - u16 size; - u16 vlan; - u16 reserved; -#else - u16 size; - u16 flags; - u16 reserved; - u16 vlan; -#endif -#endif - u32 vlanres; -}; - - -#define RX_STD_RING_ENTRIES 512 -#define RX_STD_RING_SIZE (RX_STD_RING_ENTRIES * sizeof(struct rx_desc)) - -#define RX_JUMBO_RING_ENTRIES 256 -#define RX_JUMBO_RING_SIZE (RX_JUMBO_RING_ENTRIES *sizeof(struct rx_desc)) - -#define RX_MINI_RING_ENTRIES 1024 -#define RX_MINI_RING_SIZE (RX_MINI_RING_ENTRIES *sizeof(struct rx_desc)) - -#define RX_RETURN_RING_ENTRIES 2048 -#define RX_RETURN_RING_SIZE (RX_MAX_RETURN_RING_ENTRIES * \ - sizeof(struct rx_desc)) - -struct rx_desc{ - aceaddr addr; -#ifdef __LITTLE_ENDIAN - u16 size; - u16 idx; -#else - u16 idx; - u16 size; -#endif -#ifdef __LITTLE_ENDIAN - u16 flags; - u16 type; -#else - u16 type; - u16 flags; -#endif -#ifdef __LITTLE_ENDIAN - u16 tcp_udp_csum; - u16 ip_csum; -#else - u16 ip_csum; - u16 tcp_udp_csum; -#endif -#ifdef __LITTLE_ENDIAN - u16 vlan; - u16 err_flags; -#else - u16 err_flags; - u16 vlan; -#endif - u32 reserved; - u32 opague; -}; - - -/* - * This struct is shared with the NIC firmware. - */ -struct ring_ctrl { - aceaddr rngptr; -#ifdef __LITTLE_ENDIAN - u16 flags; - u16 max_len; -#else - u16 max_len; - u16 flags; -#endif - u32 pad; -}; - - -struct ace_mac_stats { - u32 excess_colls; - u32 coll_1; - u32 coll_2; - u32 coll_3; - u32 coll_4; - u32 coll_5; - u32 coll_6; - u32 coll_7; - u32 coll_8; - u32 coll_9; - u32 coll_10; - u32 coll_11; - u32 coll_12; - u32 coll_13; - u32 coll_14; - u32 coll_15; - u32 late_coll; - u32 defers; - u32 crc_err; - u32 underrun; - u32 crs_err; - u32 pad[3]; - u32 drop_ula; - u32 drop_mc; - u32 drop_fc; - u32 drop_space; - u32 coll; - u32 kept_bc; - u32 kept_mc; - u32 kept_uc; -}; - - -struct ace_info { - union { - u32 stats[256]; - } s; - struct ring_ctrl evt_ctrl; - struct ring_ctrl cmd_ctrl; - struct ring_ctrl tx_ctrl; - struct ring_ctrl rx_std_ctrl; - struct ring_ctrl rx_jumbo_ctrl; - struct ring_ctrl rx_mini_ctrl; - struct ring_ctrl rx_return_ctrl; - aceaddr evt_prd_ptr; - aceaddr rx_ret_prd_ptr; - aceaddr tx_csm_ptr; - aceaddr stats2_ptr; -}; - - -struct ring_info { - struct sk_buff *skb; - DEFINE_DMA_UNMAP_ADDR(mapping); -}; - - -/* - * Funny... As soon as we add maplen on alpha, it starts to work - * much slower. Hmm... is it because struct does not fit to one cacheline? - * So, split tx_ring_info. - */ -struct tx_ring_info { - struct sk_buff *skb; - DEFINE_DMA_UNMAP_ADDR(mapping); - DEFINE_DMA_UNMAP_LEN(maplen); -}; - - -/* - * struct ace_skb holding the rings of skb's. This is an awful lot of - * pointers, but I don't see any other smart mode to do this in an - * efficient manner ;-( - */ -struct ace_skb -{ - struct tx_ring_info tx_skbuff[MAX_TX_RING_ENTRIES]; - struct ring_info rx_std_skbuff[RX_STD_RING_ENTRIES]; - struct ring_info rx_mini_skbuff[RX_MINI_RING_ENTRIES]; - struct ring_info rx_jumbo_skbuff[RX_JUMBO_RING_ENTRIES]; -}; - - -/* - * Struct private for the AceNIC. - * - * Elements are grouped so variables used by the tx handling goes - * together, and will go into the same cache lines etc. in order to - * avoid cache line contention between the rx and tx handling on SMP. - * - * Frequently accessed variables are put at the beginning of the - * struct to help the compiler generate better/shorter code. - */ -struct ace_private -{ - struct net_device *ndev; /* backpointer */ - struct ace_info *info; - struct ace_regs __iomem *regs; /* register base */ - struct ace_skb *skb; - dma_addr_t info_dma; /* 32/64 bit */ - - int version, link; - int promisc, mcast_all; - - /* - * TX elements - */ - struct tx_desc *tx_ring; - u32 tx_prd; - volatile u32 tx_ret_csm; - int tx_ring_entries; - - /* - * RX elements - */ - unsigned long std_refill_busy - __attribute__ ((aligned (SMP_CACHE_BYTES))); - unsigned long mini_refill_busy, jumbo_refill_busy; - atomic_t cur_rx_bufs; - atomic_t cur_mini_bufs; - atomic_t cur_jumbo_bufs; - u32 rx_std_skbprd, rx_mini_skbprd, rx_jumbo_skbprd; - u32 cur_rx; - - struct rx_desc *rx_std_ring; - struct rx_desc *rx_jumbo_ring; - struct rx_desc *rx_mini_ring; - struct rx_desc *rx_return_ring; - - int bh_work_pending, jumbo; - struct work_struct ace_bh_work; - - struct event *evt_ring; - - volatile u32 *evt_prd, *rx_ret_prd, *tx_csm; - - dma_addr_t tx_ring_dma; /* 32/64 bit */ - dma_addr_t rx_ring_base_dma; - dma_addr_t evt_ring_dma; - dma_addr_t evt_prd_dma, rx_ret_prd_dma, tx_csm_dma; - - unsigned char *trace_buf; - struct pci_dev *pdev; - struct net_device *next; - volatile int fw_running; - int board_idx; - u16 pci_command; - u8 pci_latency; - const char *name; -#ifdef INDEX_DEBUG - spinlock_t debug_lock - __attribute__ ((aligned (SMP_CACHE_BYTES))); - u32 last_tx, last_std_rx, last_mini_rx; -#endif - u8 firmware_major; - u8 firmware_minor; - u8 firmware_fix; - u32 firmware_start; -}; - - -#define TX_RESERVED MAX_SKB_FRAGS - -static inline int tx_space (struct ace_private *ap, u32 csm, u32 prd) -{ - return (csm - prd - 1) & (ACE_TX_RING_ENTRIES(ap) - 1); -} - -#define tx_free(ap) tx_space((ap)->tx_ret_csm, (ap)->tx_prd, ap) -#define tx_ring_full(ap, csm, prd) (tx_space(ap, csm, prd) <= TX_RESERVED) - -static inline void set_aceaddr(aceaddr *aa, dma_addr_t addr) -{ - u64 baddr = (u64) addr; - aa->addrlo = baddr & 0xffffffff; - aa->addrhi = baddr >> 32; - wmb(); -} - - -static inline void ace_set_txprd(struct ace_regs __iomem *regs, - struct ace_private *ap, u32 value) -{ -#ifdef INDEX_DEBUG - unsigned long flags; - spin_lock_irqsave(&ap->debug_lock, flags); - writel(value, ®s->TxPrd); - if (value == ap->last_tx) - printk(KERN_ERR "AceNIC RACE ALERT! writing identical value " - "to tx producer (%i)\n", value); - ap->last_tx = value; - spin_unlock_irqrestore(&ap->debug_lock, flags); -#else - writel(value, ®s->TxPrd); -#endif - wmb(); -} - - -static inline void ace_mask_irq(struct net_device *dev) -{ - struct ace_private *ap = netdev_priv(dev); - struct ace_regs __iomem *regs = ap->regs; - - if (ACE_IS_TIGON_I(ap)) - writel(1, ®s->MaskInt); - else - writel(readl(®s->HostCtrl) | MASK_INTS, ®s->HostCtrl); - - ace_sync_irq(dev->irq); -} - - -static inline void ace_unmask_irq(struct net_device *dev) -{ - struct ace_private *ap = netdev_priv(dev); - struct ace_regs __iomem *regs = ap->regs; - - if (ACE_IS_TIGON_I(ap)) - writel(0, ®s->MaskInt); - else - writel(readl(®s->HostCtrl) & ~MASK_INTS, ®s->HostCtrl); -} - - -/* - * Prototypes - */ -static int ace_init(struct net_device *dev); -static void ace_load_std_rx_ring(struct net_device *dev, int nr_bufs); -static void ace_load_mini_rx_ring(struct net_device *dev, int nr_bufs); -static void ace_load_jumbo_rx_ring(struct net_device *dev, int nr_bufs); -static irqreturn_t ace_interrupt(int irq, void *dev_id); -static int ace_load_firmware(struct net_device *dev); -static int ace_open(struct net_device *dev); -static netdev_tx_t ace_start_xmit(struct sk_buff *skb, - struct net_device *dev); -static int ace_close(struct net_device *dev); -static void ace_bh_work(struct work_struct *work); -static void ace_dump_trace(struct ace_private *ap); -static void ace_set_multicast_list(struct net_device *dev); -static int ace_change_mtu(struct net_device *dev, int new_mtu); -static int ace_set_mac_addr(struct net_device *dev, void *p); -static void ace_set_rxtx_parms(struct net_device *dev, int jumbo); -static int ace_allocate_descriptors(struct net_device *dev); -static void ace_free_descriptors(struct net_device *dev); -static void ace_init_cleanup(struct net_device *dev); -static struct net_device_stats *ace_get_stats(struct net_device *dev); -static int read_eeprom_byte(struct net_device *dev, unsigned long offset); - -#endif /* _ACENIC_H_ */ From 961f3c535608df64553f61d64ca086aa9f371bdd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Apr 2026 20:46:53 +0200 Subject: [PATCH 1175/1561] net: ethernet: ti-cpsw:: rename soft_reset() function While looking at the glob symbols shared between the cpsw drivers, I noticed that soft_reset() is the only one that is missing a proper namespace prefix, and will pollute the kernel namespace, so rename it to be consistent with the other symbols. Reviewed-by: Alexander Sverdlin Signed-off-by: Arnd Bergmann Link: https://patch.msgid.link/20260402184726.3746487-1-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/cpsw.c | 2 +- drivers/net/ethernet/ti/cpsw_new.c | 2 +- drivers/net/ethernet/ti/cpsw_priv.c | 2 +- drivers/net/ethernet/ti/cpsw_priv.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index b0e18bdc2c85..aa3531e844e8 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -706,7 +706,7 @@ static void cpsw_init_host_port(struct cpsw_priv *priv) struct cpsw_common *cpsw = priv->cpsw; /* soft reset the controller and initialize ale */ - soft_reset("cpsw", &cpsw->regs->soft_reset); + cpsw_soft_reset("cpsw", &cpsw->regs->soft_reset); cpsw_ale_start(cpsw->ale); /* switch to vlan aware mode */ diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 7f42f58a4b03..c5be359f3c66 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -573,7 +573,7 @@ static void cpsw_init_host_port(struct cpsw_priv *priv) u32 control_reg; /* soft reset the controller and initialize ale */ - soft_reset("cpsw", &cpsw->regs->soft_reset); + cpsw_soft_reset("cpsw", &cpsw->regs->soft_reset); cpsw_ale_start(cpsw->ale); /* switch to vlan aware mode */ diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c index bc4fdf17a99e..c6eb6b785b0b 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.c +++ b/drivers/net/ethernet/ti/cpsw_priv.c @@ -275,7 +275,7 @@ void cpsw_set_slave_mac(struct cpsw_slave *slave, struct cpsw_priv *priv) slave_write(slave, mac_lo(priv->mac_addr), SA_LO); } -void soft_reset(const char *module, void __iomem *reg) +void cpsw_soft_reset(const char *module, void __iomem *reg) { unsigned long timeout = jiffies + HZ; diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h index acb6181c5c9e..fddd7a79f4b0 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.h +++ b/drivers/net/ethernet/ti/cpsw_priv.h @@ -458,7 +458,7 @@ int cpsw_tx_poll(struct napi_struct *napi_tx, int budget); int cpsw_rx_mq_poll(struct napi_struct *napi_rx, int budget); int cpsw_rx_poll(struct napi_struct *napi_rx, int budget); void cpsw_rx_vlan_encap(struct sk_buff *skb); -void soft_reset(const char *module, void __iomem *reg); +void cpsw_soft_reset(const char *module, void __iomem *reg); void cpsw_set_slave_mac(struct cpsw_slave *slave, struct cpsw_priv *priv); void cpsw_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue); int cpsw_need_resplit(struct cpsw_common *cpsw); From df75bd552a8790e83d4aeb5f112050cf3dc687bf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Apr 2026 20:46:54 +0200 Subject: [PATCH 1176/1561] net: ethernet: ti-cpsw: fix linking built-in code to modules There are six variants of the cpsw driver, sharing various parts of the code: davinci-emac, cpsw, cpsw-switchdev, netcp, netcp_ethss and am65-cpsw-nuss. I noticed that this means some files can be linked into more than one loadable module, or even part of vmlinux but also linked into a loadable module, both of which mess up assumptions of the build system, and causes warnings: scripts/Makefile.build:279: cpsw_ale.o is added to multiple modules: ti-am65-cpsw-nuss ti_cpsw ti_cpsw_new scripts/Makefile.build:279: cpsw_priv.o is added to multiple modules: ti_cpsw ti_cpsw_new scripts/Makefile.build:279: cpsw_sl.o is added to multiple modules: ti-am65-cpsw-nuss ti_cpsw ti_cpsw_new scripts/Makefile.build:279: cpsw_ethtool.o is added to multiple modules: ti_cpsw ti_cpsw_new scripts/Makefile.build:279: davinci_cpdma.o is added to multiple modules: ti_cpsw ti_cpsw_new ti_davinci_emac Change this back to having separate modules for each portion that can be linked standalone, exporting symbols as needed: - ti-cpsw-common.ko now contains both cpsw-common.o and davinci_cpdma.o as they are always used together - ti-cpsw-priv.ko contains cpsw_priv.o, cpsw_sl.o and cpsw_ethtool.o, which are the core of the cpsw and cpsw-new drivers. - ti-cpsw-sl.ko contains the cpsw-sl.o object and is used on ti-am65-cpsw-nuss.ko in addition to the two other cpsw variants. - ti-cpsw-ale.o is the one standalone module that is used by all except davinci_emac. Each of these will be built-in if any of its users are built-in, otherwise it's a loadable module if there is at least one module using it. I did not bring back the separate Kconfig symbols for this, but just handle it using Makefile logic. Note: ideally this is something that Kbuild complains about, but usually we just notice when something using THIS_MODULE misbehaves in a way that a user notices. Fixes: 99f6297182729 ("net: ethernet: ti: cpsw: drop TI_DAVINCI_CPDMA config option") Link: https://lore.kernel.org/lkml/20240417084400.3034104-1-arnd@kernel.org/ Signed-off-by: Arnd Bergmann Link: https://patch.msgid.link/20260402184726.3746487-2-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/Makefile | 30 ++++++++++---------- drivers/net/ethernet/ti/cpsw_ale.c | 25 +++++++++++++++++ drivers/net/ethernet/ti/cpsw_ethtool.c | 24 ++++++++++++++++ drivers/net/ethernet/ti/cpsw_priv.c | 37 +++++++++++++++++++++++++ drivers/net/ethernet/ti/cpsw_sl.c | 11 ++++++++ drivers/net/ethernet/ti/davinci_cpdma.c | 27 ++++++++++++++++++ 6 files changed, 139 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile index 6da50f4b7c2e..f4276c9a7762 100644 --- a/drivers/net/ethernet/ti/Makefile +++ b/drivers/net/ethernet/ti/Makefile @@ -6,30 +6,30 @@ obj-$(CONFIG_TI_PRUETH) += icssm-prueth.o icssm-prueth-y := icssm/icssm_prueth.o icssm/icssm_prueth_switch.o icssm/icssm_switchdev.o -obj-$(CONFIG_TI_CPSW) += cpsw-common.o -obj-$(CONFIG_TI_DAVINCI_EMAC) += cpsw-common.o -obj-$(CONFIG_TI_CPSW_SWITCHDEV) += cpsw-common.o +ti-cpsw-common-y += cpsw-common.o davinci_cpdma.o +ti-cpsw-priv-y += cpsw_priv.o cpsw_ethtool.o +ti-cpsw-ale-y += cpsw_ale.o +ti-cpsw-sl-y += cpsw_sl.o obj-$(CONFIG_TLAN) += tlan.o -obj-$(CONFIG_TI_DAVINCI_EMAC) += ti_davinci_emac.o -ti_davinci_emac-y := davinci_emac.o davinci_cpdma.o +obj-$(CONFIG_TI_DAVINCI_EMAC) += davinci_emac.o ti-cpsw-common.o obj-$(CONFIG_TI_DAVINCI_MDIO) += davinci_mdio.o obj-$(CONFIG_TI_CPSW_PHY_SEL) += cpsw-phy-sel.o obj-$(CONFIG_TI_CPTS) += cpts.o -obj-$(CONFIG_TI_CPSW) += ti_cpsw.o -ti_cpsw-y := cpsw.o davinci_cpdma.o cpsw_ale.o cpsw_priv.o cpsw_sl.o cpsw_ethtool.o -obj-$(CONFIG_TI_CPSW_SWITCHDEV) += ti_cpsw_new.o -ti_cpsw_new-y := cpsw_switchdev.o cpsw_new.o davinci_cpdma.o cpsw_ale.o cpsw_sl.o cpsw_priv.o cpsw_ethtool.o +obj-$(CONFIG_TI_CPSW) += ti_cpsw.o ti-cpsw-common.o ti-cpsw-priv.o ti-cpsw-ale.o ti-cpsw-sl.o +ti_cpsw-y := cpsw.o +obj-$(CONFIG_TI_CPSW_SWITCHDEV) += ti_cpsw_new.o ti-cpsw-common.o ti-cpsw-priv.o ti-cpsw-ale.o ti-cpsw-sl.o +ti_cpsw_new-y := cpsw_switchdev.o cpsw_new.o -obj-$(CONFIG_TI_KEYSTONE_NETCP) += keystone_netcp.o -keystone_netcp-y := netcp_core.o cpsw_ale.o -obj-$(CONFIG_TI_KEYSTONE_NETCP_ETHSS) += keystone_netcp_ethss.o -keystone_netcp_ethss-y := netcp_ethss.o netcp_sgmii.o netcp_xgbepcsr.o cpsw_ale.o +obj-$(CONFIG_TI_KEYSTONE_NETCP) += keystone_netcp.o ti-cpsw-ale.o +keystone_netcp-y := netcp_core.o +obj-$(CONFIG_TI_KEYSTONE_NETCP_ETHSS) += keystone_netcp_ethss.o ti-cpsw-ale.o +keystone_netcp_ethss-y := netcp_ethss.o netcp_sgmii.o netcp_xgbepcsr.o obj-$(CONFIG_TI_K3_CPPI_DESC_POOL) += k3-cppi-desc-pool.o -obj-$(CONFIG_TI_K3_AM65_CPSW_NUSS) += ti-am65-cpsw-nuss.o -ti-am65-cpsw-nuss-y := am65-cpsw-nuss.o cpsw_sl.o am65-cpsw-ethtool.o cpsw_ale.o +obj-$(CONFIG_TI_K3_AM65_CPSW_NUSS) += ti-am65-cpsw-nuss.o ti-cpsw-sl.o ti-cpsw-ale.o +ti-am65-cpsw-nuss-y := am65-cpsw-nuss.o am65-cpsw-ethtool.o ti-am65-cpsw-nuss-$(CONFIG_TI_AM65_CPSW_QOS) += am65-cpsw-qos.o ti-am65-cpsw-nuss-$(CONFIG_TI_K3_AM65_CPSW_SWITCHDEV) += am65-cpsw-switchdev.o obj-$(CONFIG_TI_K3_AM65_CPTS) += am65-cpts.o diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index be7b69319221..e202bba49480 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -493,6 +493,7 @@ int cpsw_ale_flush_multicast(struct cpsw_ale *ale, int port_mask, int vid) } return 0; } +EXPORT_SYMBOL_GPL(cpsw_ale_flush_multicast); static inline void cpsw_ale_set_vlan_entry_type(u32 *ale_entry, int flags, u16 vid) @@ -530,6 +531,7 @@ int cpsw_ale_add_ucast(struct cpsw_ale *ale, const u8 *addr, int port, cpsw_ale_write(ale, idx, ale_entry); return 0; } +EXPORT_SYMBOL_GPL(cpsw_ale_add_ucast); int cpsw_ale_del_ucast(struct cpsw_ale *ale, const u8 *addr, int port, int flags, u16 vid) @@ -545,6 +547,7 @@ int cpsw_ale_del_ucast(struct cpsw_ale *ale, const u8 *addr, int port, cpsw_ale_write(ale, idx, ale_entry); return 0; } +EXPORT_SYMBOL_GPL(cpsw_ale_del_ucast); int cpsw_ale_add_mcast(struct cpsw_ale *ale, const u8 *addr, int port_mask, int flags, u16 vid, int mcast_state) @@ -578,6 +581,7 @@ int cpsw_ale_add_mcast(struct cpsw_ale *ale, const u8 *addr, int port_mask, cpsw_ale_write(ale, idx, ale_entry); return 0; } +EXPORT_SYMBOL_GPL(cpsw_ale_add_mcast); int cpsw_ale_del_mcast(struct cpsw_ale *ale, const u8 *addr, int port_mask, int flags, u16 vid) @@ -607,6 +611,7 @@ int cpsw_ale_del_mcast(struct cpsw_ale *ale, const u8 *addr, int port_mask, cpsw_ale_write(ale, idx, ale_entry); return 0; } +EXPORT_SYMBOL_GPL(cpsw_ale_del_mcast); /* ALE NetCP NU switch specific vlan functions */ static void cpsw_ale_set_vlan_mcast(struct cpsw_ale *ale, u32 *ale_entry, @@ -676,6 +681,7 @@ int cpsw_ale_add_vlan(struct cpsw_ale *ale, u16 vid, int port_mask, int untag, cpsw_ale_write(ale, idx, ale_entry); return 0; } +EXPORT_SYMBOL_GPL(cpsw_ale_add_vlan); static void cpsw_ale_vlan_del_modify_int(struct cpsw_ale *ale, u32 *ale_entry, u16 vid, int port_mask) @@ -733,6 +739,7 @@ int cpsw_ale_vlan_del_modify(struct cpsw_ale *ale, u16 vid, int port_mask) return 0; } +EXPORT_SYMBOL_GPL(cpsw_ale_vlan_del_modify); int cpsw_ale_del_vlan(struct cpsw_ale *ale, u16 vid, int port_mask) { @@ -767,6 +774,7 @@ int cpsw_ale_del_vlan(struct cpsw_ale *ale, u16 vid, int port_mask) return 0; } +EXPORT_SYMBOL_GPL(cpsw_ale_del_vlan); int cpsw_ale_vlan_add_modify(struct cpsw_ale *ale, u16 vid, int port_mask, int untag_mask, int reg_mask, int unreg_mask) @@ -806,6 +814,7 @@ int cpsw_ale_vlan_add_modify(struct cpsw_ale *ale, u16 vid, int port_mask, return ret; } +EXPORT_SYMBOL_GPL(cpsw_ale_vlan_add_modify); void cpsw_ale_set_unreg_mcast(struct cpsw_ale *ale, int unreg_mcast_mask, bool add) @@ -833,6 +842,7 @@ void cpsw_ale_set_unreg_mcast(struct cpsw_ale *ale, int unreg_mcast_mask, cpsw_ale_write(ale, idx, ale_entry); } } +EXPORT_SYMBOL_GPL(cpsw_ale_set_unreg_mcast); static void cpsw_ale_vlan_set_unreg_mcast(struct cpsw_ale *ale, u32 *ale_entry, int allmulti) @@ -898,6 +908,7 @@ void cpsw_ale_set_allmulti(struct cpsw_ale *ale, int allmulti, int port) cpsw_ale_write(ale, idx, ale_entry); } } +EXPORT_SYMBOL_GPL(cpsw_ale_set_allmulti); struct ale_control_info { const char *name; @@ -1155,6 +1166,7 @@ int cpsw_ale_control_set(struct cpsw_ale *ale, int port, int control, return 0; } +EXPORT_SYMBOL_GPL(cpsw_ale_control_set); int cpsw_ale_control_get(struct cpsw_ale *ale, int port, int control) { @@ -1178,6 +1190,7 @@ int cpsw_ale_control_get(struct cpsw_ale *ale, int port, int control) tmp = readl_relaxed(ale->params.ale_regs + offset) >> shift; return tmp & BITMASK(info->bits); } +EXPORT_SYMBOL_GPL(cpsw_ale_control_get); int cpsw_ale_rx_ratelimit_mc(struct cpsw_ale *ale, int port, unsigned int ratelimit_pps) @@ -1200,6 +1213,7 @@ int cpsw_ale_rx_ratelimit_mc(struct cpsw_ale *ale, int port, unsigned int rateli port, val * ALE_RATE_LIMIT_MIN_PPS); return 0; } +EXPORT_SYMBOL_GPL(cpsw_ale_rx_ratelimit_mc); int cpsw_ale_rx_ratelimit_bc(struct cpsw_ale *ale, int port, unsigned int ratelimit_pps) @@ -1222,6 +1236,7 @@ int cpsw_ale_rx_ratelimit_bc(struct cpsw_ale *ale, int port, unsigned int rateli port, val * ALE_RATE_LIMIT_MIN_PPS); return 0; } +EXPORT_SYMBOL_GPL(cpsw_ale_rx_ratelimit_bc); static void cpsw_ale_timer(struct timer_list *t) { @@ -1311,6 +1326,7 @@ void cpsw_ale_start(struct cpsw_ale *ale) cpsw_ale_aging_start(ale); } +EXPORT_SYMBOL_GPL(cpsw_ale_start); void cpsw_ale_stop(struct cpsw_ale *ale) { @@ -1318,6 +1334,7 @@ void cpsw_ale_stop(struct cpsw_ale *ale) cpsw_ale_control_set(ale, 0, ALE_CLEAR, 1); cpsw_ale_control_set(ale, 0, ALE_ENABLE, 0); } +EXPORT_SYMBOL_GPL(cpsw_ale_stop); static const struct reg_field ale_fields_cpsw[] = { /* CPSW_ALE_IDVER_REG */ @@ -1618,6 +1635,7 @@ struct cpsw_ale *cpsw_ale_create(struct cpsw_ale_params *params) cpsw_ale_control_set(ale, 0, ALE_CLEAR, 1); return ale; } +EXPORT_SYMBOL_GPL(cpsw_ale_create); void cpsw_ale_dump(struct cpsw_ale *ale, u32 *data) { @@ -1628,6 +1646,7 @@ void cpsw_ale_dump(struct cpsw_ale *ale, u32 *data) data += ALE_ENTRY_WORDS; } } +EXPORT_SYMBOL_GPL(cpsw_ale_dump); void cpsw_ale_restore(struct cpsw_ale *ale, u32 *data) { @@ -1638,11 +1657,13 @@ void cpsw_ale_restore(struct cpsw_ale *ale, u32 *data) data += ALE_ENTRY_WORDS; } } +EXPORT_SYMBOL_GPL(cpsw_ale_restore); u32 cpsw_ale_get_num_entries(struct cpsw_ale *ale) { return ale ? ale->params.ale_entries : 0; } +EXPORT_SYMBOL_GPL(cpsw_ale_get_num_entries); /* Reads the specified policer index into ALE POLICER registers */ static void cpsw_ale_policer_read_idx(struct cpsw_ale *ale, u32 idx) @@ -1745,3 +1766,7 @@ void cpsw_ale_classifier_setup_default(struct cpsw_ale *ale, int num_rx_ch) 1); } } +EXPORT_SYMBOL_GPL(cpsw_ale_classifier_setup_default); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("TI N-Port Ethernet Switch Address Lookup Engine"); diff --git a/drivers/net/ethernet/ti/cpsw_ethtool.c b/drivers/net/ethernet/ti/cpsw_ethtool.c index a43f75ee269e..3f2682c461f9 100644 --- a/drivers/net/ethernet/ti/cpsw_ethtool.c +++ b/drivers/net/ethernet/ti/cpsw_ethtool.c @@ -144,6 +144,7 @@ u32 cpsw_get_msglevel(struct net_device *ndev) return priv->msg_enable; } +EXPORT_SYMBOL_GPL(cpsw_get_msglevel); void cpsw_set_msglevel(struct net_device *ndev, u32 value) { @@ -151,6 +152,7 @@ void cpsw_set_msglevel(struct net_device *ndev, u32 value) priv->msg_enable = value; } +EXPORT_SYMBOL_GPL(cpsw_set_msglevel); int cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal, struct kernel_ethtool_coalesce *kernel_coal, @@ -161,6 +163,7 @@ int cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal, coal->rx_coalesce_usecs = cpsw->coal_intvl; return 0; } +EXPORT_SYMBOL_GPL(cpsw_get_coalesce); int cpsw_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal, struct kernel_ethtool_coalesce *kernel_coal, @@ -220,6 +223,7 @@ update_return: return 0; } +EXPORT_SYMBOL_GPL(cpsw_set_coalesce); int cpsw_get_sset_count(struct net_device *ndev, int sset) { @@ -234,6 +238,7 @@ int cpsw_get_sset_count(struct net_device *ndev, int sset) return -EOPNOTSUPP; } } +EXPORT_SYMBOL_GPL(cpsw_get_sset_count); static void cpsw_add_ch_strings(u8 **p, int ch_num, int rx_dir) { @@ -271,6 +276,7 @@ void cpsw_get_strings(struct net_device *ndev, u32 stringset, u8 *data) break; } } +EXPORT_SYMBOL_GPL(cpsw_get_strings); void cpsw_get_ethtool_stats(struct net_device *ndev, struct ethtool_stats *stats, u64 *data) @@ -303,6 +309,7 @@ void cpsw_get_ethtool_stats(struct net_device *ndev, } } } +EXPORT_SYMBOL_GPL(cpsw_get_ethtool_stats); void cpsw_get_pauseparam(struct net_device *ndev, struct ethtool_pauseparam *pause) @@ -313,6 +320,7 @@ void cpsw_get_pauseparam(struct net_device *ndev, pause->rx_pause = priv->rx_pause ? true : false; pause->tx_pause = priv->tx_pause ? true : false; } +EXPORT_SYMBOL_GPL(cpsw_get_pauseparam); void cpsw_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) { @@ -326,6 +334,7 @@ void cpsw_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) if (cpsw->slaves[slave_no].phy) phy_ethtool_get_wol(cpsw->slaves[slave_no].phy, wol); } +EXPORT_SYMBOL_GPL(cpsw_get_wol); int cpsw_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) { @@ -338,6 +347,7 @@ int cpsw_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) else return -EOPNOTSUPP; } +EXPORT_SYMBOL_GPL(cpsw_set_wol); int cpsw_get_regs_len(struct net_device *ndev) { @@ -346,6 +356,7 @@ int cpsw_get_regs_len(struct net_device *ndev) return cpsw_ale_get_num_entries(cpsw->ale) * ALE_ENTRY_WORDS * sizeof(u32); } +EXPORT_SYMBOL_GPL(cpsw_get_regs_len); void cpsw_get_regs(struct net_device *ndev, struct ethtool_regs *regs, void *p) { @@ -357,6 +368,7 @@ void cpsw_get_regs(struct net_device *ndev, struct ethtool_regs *regs, void *p) cpsw_ale_dump(cpsw->ale, reg); } +EXPORT_SYMBOL_GPL(cpsw_get_regs); int cpsw_ethtool_op_begin(struct net_device *ndev) { @@ -370,6 +382,7 @@ int cpsw_ethtool_op_begin(struct net_device *ndev) return ret; } +EXPORT_SYMBOL_GPL(cpsw_ethtool_op_begin); void cpsw_ethtool_op_complete(struct net_device *ndev) { @@ -377,6 +390,7 @@ void cpsw_ethtool_op_complete(struct net_device *ndev) pm_runtime_put(priv->cpsw->dev); } +EXPORT_SYMBOL_GPL(cpsw_ethtool_op_complete); void cpsw_get_channels(struct net_device *ndev, struct ethtool_channels *ch) { @@ -391,6 +405,7 @@ void cpsw_get_channels(struct net_device *ndev, struct ethtool_channels *ch) ch->tx_count = cpsw->tx_ch_num; ch->combined_count = 0; } +EXPORT_SYMBOL_GPL(cpsw_get_channels); int cpsw_get_link_ksettings(struct net_device *ndev, struct ethtool_link_ksettings *ecmd) @@ -405,6 +420,7 @@ int cpsw_get_link_ksettings(struct net_device *ndev, phy_ethtool_ksettings_get(cpsw->slaves[slave_no].phy, ecmd); return 0; } +EXPORT_SYMBOL_GPL(cpsw_get_link_ksettings); int cpsw_set_link_ksettings(struct net_device *ndev, const struct ethtool_link_ksettings *ecmd) @@ -418,6 +434,7 @@ int cpsw_set_link_ksettings(struct net_device *ndev, return phy_ethtool_ksettings_set(cpsw->slaves[slave_no].phy, ecmd); } +EXPORT_SYMBOL_GPL(cpsw_set_link_ksettings); int cpsw_get_eee(struct net_device *ndev, struct ethtool_keee *edata) { @@ -430,6 +447,7 @@ int cpsw_get_eee(struct net_device *ndev, struct ethtool_keee *edata) else return -EOPNOTSUPP; } +EXPORT_SYMBOL_GPL(cpsw_get_eee); int cpsw_nway_reset(struct net_device *ndev) { @@ -442,6 +460,7 @@ int cpsw_nway_reset(struct net_device *ndev) else return -EOPNOTSUPP; } +EXPORT_SYMBOL_GPL(cpsw_nway_reset); static void cpsw_suspend_data_pass(struct net_device *ndev) { @@ -639,6 +658,7 @@ err: cpsw_fail(cpsw); return ret; } +EXPORT_SYMBOL_GPL(cpsw_set_channels_common); void cpsw_get_ringparam(struct net_device *ndev, struct ethtool_ringparam *ering, @@ -654,6 +674,7 @@ void cpsw_get_ringparam(struct net_device *ndev, ering->rx_max_pending = cpsw->descs_pool_size - CPSW_MAX_QUEUES; ering->rx_pending = cpdma_get_num_rx_descs(cpsw->dma); } +EXPORT_SYMBOL_GPL(cpsw_get_ringparam); int cpsw_set_ringparam(struct net_device *ndev, struct ethtool_ringparam *ering, @@ -700,6 +721,7 @@ err: cpsw_fail(cpsw); return ret; } +EXPORT_SYMBOL_GPL(cpsw_set_ringparam); #if IS_ENABLED(CONFIG_TI_CPTS) int cpsw_get_ts_info(struct net_device *ndev, struct kernel_ethtool_ts_info *info) @@ -720,6 +742,7 @@ int cpsw_get_ts_info(struct net_device *ndev, struct kernel_ethtool_ts_info *inf (1 << HWTSTAMP_FILTER_PTP_V2_EVENT); return 0; } +EXPORT_SYMBOL_GPL(cpsw_get_ts_info); #else int cpsw_get_ts_info(struct net_device *ndev, struct kernel_ethtool_ts_info *info) { @@ -729,4 +752,5 @@ int cpsw_get_ts_info(struct net_device *ndev, struct kernel_ethtool_ts_info *inf info->rx_filters = 0; return 0; } +EXPORT_SYMBOL_GPL(cpsw_get_ts_info); #endif diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c index c6eb6b785b0b..1f6f374551cb 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.c +++ b/drivers/net/ethernet/ti/cpsw_priv.c @@ -32,6 +32,7 @@ #define CPTS_N_ETX_TS 4 int (*cpsw_slave_index)(struct cpsw_common *cpsw, struct cpsw_priv *priv); +EXPORT_SYMBOL_GPL(cpsw_slave_index); void cpsw_intr_enable(struct cpsw_common *cpsw) { @@ -40,6 +41,7 @@ void cpsw_intr_enable(struct cpsw_common *cpsw) cpdma_ctlr_int_ctrl(cpsw->dma, true); } +EXPORT_SYMBOL_GPL(cpsw_intr_enable); void cpsw_intr_disable(struct cpsw_common *cpsw) { @@ -48,6 +50,7 @@ void cpsw_intr_disable(struct cpsw_common *cpsw) cpdma_ctlr_int_ctrl(cpsw->dma, false); } +EXPORT_SYMBOL_GPL(cpsw_intr_disable); void cpsw_tx_handler(void *token, int len, int status) { @@ -82,6 +85,7 @@ void cpsw_tx_handler(void *token, int len, int status) ndev->stats.tx_packets++; ndev->stats.tx_bytes += len; } +EXPORT_SYMBOL_GPL(cpsw_tx_handler); irqreturn_t cpsw_tx_interrupt(int irq, void *dev_id) { @@ -98,6 +102,7 @@ irqreturn_t cpsw_tx_interrupt(int irq, void *dev_id) napi_schedule(&cpsw->napi_tx); return IRQ_HANDLED; } +EXPORT_SYMBOL_GPL(cpsw_tx_interrupt); irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id) { @@ -114,6 +119,7 @@ irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id) napi_schedule(&cpsw->napi_rx); return IRQ_HANDLED; } +EXPORT_SYMBOL_GPL(cpsw_rx_interrupt); irqreturn_t cpsw_misc_interrupt(int irq, void *dev_id) { @@ -126,6 +132,7 @@ irqreturn_t cpsw_misc_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +EXPORT_SYMBOL_GPL(cpsw_misc_interrupt); int cpsw_tx_mq_poll(struct napi_struct *napi_tx, int budget) { @@ -158,6 +165,7 @@ int cpsw_tx_mq_poll(struct napi_struct *napi_tx, int budget) return num_tx; } +EXPORT_SYMBOL_GPL(cpsw_tx_mq_poll); int cpsw_tx_poll(struct napi_struct *napi_tx, int budget) { @@ -176,6 +184,7 @@ int cpsw_tx_poll(struct napi_struct *napi_tx, int budget) return num_tx; } +EXPORT_SYMBOL_GPL(cpsw_tx_poll); int cpsw_rx_mq_poll(struct napi_struct *napi_rx, int budget) { @@ -208,6 +217,7 @@ int cpsw_rx_mq_poll(struct napi_struct *napi_rx, int budget) return num_rx; } +EXPORT_SYMBOL_GPL(cpsw_rx_mq_poll); int cpsw_rx_poll(struct napi_struct *napi_rx, int budget) { @@ -226,6 +236,7 @@ int cpsw_rx_poll(struct napi_struct *napi_rx, int budget) return num_rx; } +EXPORT_SYMBOL_GPL(cpsw_rx_poll); void cpsw_rx_vlan_encap(struct sk_buff *skb) { @@ -268,12 +279,14 @@ void cpsw_rx_vlan_encap(struct sk_buff *skb) skb_pull(skb, VLAN_HLEN); } } +EXPORT_SYMBOL_GPL(cpsw_rx_vlan_encap); void cpsw_set_slave_mac(struct cpsw_slave *slave, struct cpsw_priv *priv) { slave_write(slave, mac_hi(priv->mac_addr), SA_HI); slave_write(slave, mac_lo(priv->mac_addr), SA_LO); } +EXPORT_SYMBOL_GPL(cpsw_set_slave_mac); void cpsw_soft_reset(const char *module, void __iomem *reg) { @@ -286,6 +299,7 @@ void cpsw_soft_reset(const char *module, void __iomem *reg) WARN(readl_relaxed(reg) & 1, "failed to soft-reset %s\n", module); } +EXPORT_SYMBOL_GPL(cpsw_soft_reset); void cpsw_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue) { @@ -305,6 +319,7 @@ void cpsw_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue) netif_trans_update(ndev); netif_tx_wake_all_queues(ndev); } +EXPORT_SYMBOL_GPL(cpsw_ndo_tx_timeout); static int cpsw_get_common_speed(struct cpsw_common *cpsw) { @@ -343,6 +358,7 @@ int cpsw_need_resplit(struct cpsw_common *cpsw) return 1; } +EXPORT_SYMBOL_GPL(cpsw_need_resplit); void cpsw_split_res(struct cpsw_common *cpsw) { @@ -428,6 +444,7 @@ void cpsw_split_res(struct cpsw_common *cpsw) if (budget) cpsw->rxv[0].budget += budget; } +EXPORT_SYMBOL_GPL(cpsw_split_res); int cpsw_init_common(struct cpsw_common *cpsw, void __iomem *ss_regs, int ale_ageout, phys_addr_t desc_mem_phys, @@ -548,6 +565,7 @@ int cpsw_init_common(struct cpsw_common *cpsw, void __iomem *ss_regs, return ret; } +EXPORT_SYMBOL_GPL(cpsw_init_common); #if IS_ENABLED(CONFIG_TI_CPTS) @@ -678,6 +696,7 @@ int cpsw_hwtstamp_set(struct net_device *dev, return 0; } +EXPORT_SYMBOL_GPL(cpsw_hwtstamp_set); int cpsw_hwtstamp_get(struct net_device *dev, struct kernel_hwtstamp_config *cfg) @@ -695,12 +714,14 @@ int cpsw_hwtstamp_get(struct net_device *dev, return 0; } +EXPORT_SYMBOL_GPL(cpsw_hwtstamp_get); #else int cpsw_hwtstamp_get(struct net_device *dev, struct kernel_hwtstamp_config *cfg) { return -EOPNOTSUPP; } +EXPORT_SYMBOL_GPL(cpsw_hwtstamp_set); int cpsw_hwtstamp_set(struct net_device *dev, struct kernel_hwtstamp_config *cfg, @@ -708,6 +729,7 @@ int cpsw_hwtstamp_set(struct net_device *dev, { return -EOPNOTSUPP; } +EXPORT_SYMBOL_GPL(cpsw_hwtstamp_get); #endif /*CONFIG_TI_CPTS*/ int cpsw_ndo_set_tx_maxrate(struct net_device *ndev, int queue, u32 rate) @@ -758,6 +780,7 @@ int cpsw_ndo_set_tx_maxrate(struct net_device *ndev, int queue, u32 rate) cpsw_split_res(cpsw); return ret; } +EXPORT_SYMBOL_GPL(cpsw_ndo_set_tx_maxrate); static int cpsw_tc_to_fifo(int tc, int num_tc) { @@ -782,6 +805,7 @@ bool cpsw_shp_is_off(struct cpsw_priv *priv) return !val; } +EXPORT_SYMBOL_GPL(cpsw_shp_is_off); static void cpsw_fifo_shp_on(struct cpsw_priv *priv, int fifo, int on) { @@ -1043,6 +1067,7 @@ int cpsw_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type, return -EOPNOTSUPP; } } +EXPORT_SYMBOL_GPL(cpsw_ndo_setup_tc); void cpsw_cbs_resume(struct cpsw_slave *slave, struct cpsw_priv *priv) { @@ -1056,6 +1081,7 @@ void cpsw_cbs_resume(struct cpsw_slave *slave, struct cpsw_priv *priv) cpsw_set_fifo_rlimit(priv, fifo, bw); } } +EXPORT_SYMBOL_GPL(cpsw_cbs_resume); void cpsw_mqprio_resume(struct cpsw_slave *slave, struct cpsw_priv *priv) { @@ -1078,6 +1104,7 @@ void cpsw_mqprio_resume(struct cpsw_slave *slave, struct cpsw_priv *priv) slave_write(slave, tx_prio_map, tx_prio_rg); } +EXPORT_SYMBOL_GPL(cpsw_mqprio_resume); int cpsw_fill_rx_channels(struct cpsw_priv *priv) { @@ -1123,6 +1150,7 @@ int cpsw_fill_rx_channels(struct cpsw_priv *priv) return 0; } +EXPORT_SYMBOL_GPL(cpsw_fill_rx_channels); static struct page_pool *cpsw_create_page_pool(struct cpsw_common *cpsw, int size) @@ -1208,6 +1236,7 @@ void cpsw_destroy_xdp_rxqs(struct cpsw_common *cpsw) cpsw->page_pool[ch] = NULL; } } +EXPORT_SYMBOL_GPL(cpsw_destroy_xdp_rxqs); int cpsw_create_xdp_rxqs(struct cpsw_common *cpsw) { @@ -1240,6 +1269,7 @@ err_cleanup: return ret; } +EXPORT_SYMBOL_GPL(cpsw_create_xdp_rxqs); static int cpsw_xdp_prog_setup(struct cpsw_priv *priv, struct netdev_bpf *bpf) { @@ -1267,6 +1297,7 @@ int cpsw_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf) return -EINVAL; } } +EXPORT_SYMBOL_GPL(cpsw_ndo_bpf); int cpsw_xdp_tx_frame(struct cpsw_priv *priv, struct xdp_frame *xdpf, struct page *page, int port) @@ -1300,6 +1331,7 @@ int cpsw_xdp_tx_frame(struct cpsw_priv *priv, struct xdp_frame *xdpf, return ret; } +EXPORT_SYMBOL_GPL(cpsw_xdp_tx_frame); int cpsw_run_xdp(struct cpsw_priv *priv, int ch, struct xdp_buff *xdp, struct page *page, int port, int *len) @@ -1362,6 +1394,7 @@ drop: page_pool_recycle_direct(cpsw->page_pool[ch], page); return ret; } +EXPORT_SYMBOL_GPL(cpsw_run_xdp); static int cpsw_qos_clsflower_add_policer(struct cpsw_priv *priv, struct netlink_ext_ack *extack, @@ -1564,3 +1597,7 @@ void cpsw_qos_clsflower_resume(struct cpsw_priv *priv) cpsw_ale_rx_ratelimit_mc(priv->cpsw->ale, port_id, priv->ale_mc_ratelimit.rate_packet_ps); } +EXPORT_SYMBOL_GPL(cpsw_qos_clsflower_resume); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("TI CPSW Ethernet Switch Driver"); diff --git a/drivers/net/ethernet/ti/cpsw_sl.c b/drivers/net/ethernet/ti/cpsw_sl.c index 0c7531cb0f39..761719a348fa 100644 --- a/drivers/net/ethernet/ti/cpsw_sl.c +++ b/drivers/net/ethernet/ti/cpsw_sl.c @@ -200,6 +200,7 @@ u32 cpsw_sl_reg_read(struct cpsw_sl *sl, enum cpsw_sl_regs reg) dev_dbg(sl->dev, "cpsw_sl: reg: %04X r 0x%08X\n", sl->regs[reg], val); return val; } +EXPORT_SYMBOL_GPL(cpsw_sl_reg_read); void cpsw_sl_reg_write(struct cpsw_sl *sl, enum cpsw_sl_regs reg, u32 val) { @@ -212,6 +213,7 @@ void cpsw_sl_reg_write(struct cpsw_sl *sl, enum cpsw_sl_regs reg, u32 val) dev_dbg(sl->dev, "cpsw_sl: reg: %04X w 0x%08X\n", sl->regs[reg], val); writel(val, sl->sl_base + sl->regs[reg]); } +EXPORT_SYMBOL_GPL(cpsw_sl_reg_write); static const struct cpsw_sl_dev_id *cpsw_sl_match_id( const struct cpsw_sl_dev_id *id, @@ -252,6 +254,7 @@ struct cpsw_sl *cpsw_sl_get(const char *device_id, struct device *dev, return sl; } +EXPORT_SYMBOL_GPL(cpsw_sl_get); void cpsw_sl_reset(struct cpsw_sl *sl, unsigned long tmo) { @@ -270,6 +273,7 @@ void cpsw_sl_reset(struct cpsw_sl *sl, unsigned long tmo) if (cpsw_sl_reg_read(sl, CPSW_SL_SOFT_RESET) & CPSW_SL_SOFT_RESET_BIT) dev_err(sl->dev, "cpsw_sl failed to soft-reset.\n"); } +EXPORT_SYMBOL_GPL(cpsw_sl_reset); u32 cpsw_sl_ctl_set(struct cpsw_sl *sl, u32 ctl_funcs) { @@ -287,6 +291,7 @@ u32 cpsw_sl_ctl_set(struct cpsw_sl *sl, u32 ctl_funcs) return 0; } +EXPORT_SYMBOL_GPL(cpsw_sl_ctl_set); u32 cpsw_sl_ctl_clr(struct cpsw_sl *sl, u32 ctl_funcs) { @@ -304,11 +309,13 @@ u32 cpsw_sl_ctl_clr(struct cpsw_sl *sl, u32 ctl_funcs) return 0; } +EXPORT_SYMBOL_GPL(cpsw_sl_ctl_clr); void cpsw_sl_ctl_reset(struct cpsw_sl *sl) { cpsw_sl_reg_write(sl, CPSW_SL_MACCONTROL, 0); } +EXPORT_SYMBOL_GPL(cpsw_sl_ctl_reset); int cpsw_sl_wait_for_idle(struct cpsw_sl *sl, unsigned long tmo) { @@ -326,3 +333,7 @@ int cpsw_sl_wait_for_idle(struct cpsw_sl *sl, unsigned long tmo) return 0; } +EXPORT_SYMBOL_GPL(cpsw_sl_wait_for_idle); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("TI Ethernet Switch media-access-controller (MAC) submodule"); diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c index d2eab5cd1e0c..41e89a19be53 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.c +++ b/drivers/net/ethernet/ti/davinci_cpdma.c @@ -531,6 +531,7 @@ struct cpdma_ctlr *cpdma_ctlr_create(struct cpdma_params *params) ctlr->num_chan = CPDMA_MAX_CHANNELS; return ctlr; } +EXPORT_SYMBOL_GPL(cpdma_ctlr_create); int cpdma_ctlr_start(struct cpdma_ctlr *ctlr) { @@ -591,6 +592,7 @@ int cpdma_ctlr_start(struct cpdma_ctlr *ctlr) spin_unlock_irqrestore(&ctlr->lock, flags); return 0; } +EXPORT_SYMBOL_GPL(cpdma_ctlr_start); int cpdma_ctlr_stop(struct cpdma_ctlr *ctlr) { @@ -623,6 +625,7 @@ int cpdma_ctlr_stop(struct cpdma_ctlr *ctlr) spin_unlock_irqrestore(&ctlr->lock, flags); return 0; } +EXPORT_SYMBOL_GPL(cpdma_ctlr_stop); int cpdma_ctlr_destroy(struct cpdma_ctlr *ctlr) { @@ -640,6 +643,7 @@ int cpdma_ctlr_destroy(struct cpdma_ctlr *ctlr) cpdma_desc_pool_destroy(ctlr); return ret; } +EXPORT_SYMBOL_GPL(cpdma_ctlr_destroy); int cpdma_ctlr_int_ctrl(struct cpdma_ctlr *ctlr, bool enable) { @@ -660,21 +664,25 @@ int cpdma_ctlr_int_ctrl(struct cpdma_ctlr *ctlr, bool enable) spin_unlock_irqrestore(&ctlr->lock, flags); return 0; } +EXPORT_SYMBOL_GPL(cpdma_ctlr_int_ctrl); void cpdma_ctlr_eoi(struct cpdma_ctlr *ctlr, u32 value) { dma_reg_write(ctlr, CPDMA_MACEOIVECTOR, value); } +EXPORT_SYMBOL_GPL(cpdma_ctlr_eoi); u32 cpdma_ctrl_rxchs_state(struct cpdma_ctlr *ctlr) { return dma_reg_read(ctlr, CPDMA_RXINTSTATMASKED); } +EXPORT_SYMBOL_GPL(cpdma_ctrl_rxchs_state); u32 cpdma_ctrl_txchs_state(struct cpdma_ctlr *ctlr) { return dma_reg_read(ctlr, CPDMA_TXINTSTATMASKED); } +EXPORT_SYMBOL_GPL(cpdma_ctrl_txchs_state); static void cpdma_chan_set_descs(struct cpdma_ctlr *ctlr, int rx, int desc_num, @@ -802,6 +810,7 @@ int cpdma_chan_set_weight(struct cpdma_chan *ch, int weight) spin_unlock_irqrestore(&ctlr->lock, flags); return ret; } +EXPORT_SYMBOL_GPL(cpdma_chan_set_weight); /* cpdma_chan_get_min_rate - get minimum allowed rate for channel * Should be called before cpdma_chan_set_rate. @@ -816,6 +825,7 @@ u32 cpdma_chan_get_min_rate(struct cpdma_ctlr *ctlr) return DIV_ROUND_UP(divident, divisor); } +EXPORT_SYMBOL_GPL(cpdma_chan_get_min_rate); /* cpdma_chan_set_rate - limits bandwidth for transmit channel. * The bandwidth * limited channels have to be in order beginning from lowest. @@ -860,6 +870,7 @@ err: spin_unlock_irqrestore(&ctlr->lock, flags); return ret; } +EXPORT_SYMBOL_GPL(cpdma_chan_set_rate); u32 cpdma_chan_get_rate(struct cpdma_chan *ch) { @@ -872,6 +883,7 @@ u32 cpdma_chan_get_rate(struct cpdma_chan *ch) return rate; } +EXPORT_SYMBOL_GPL(cpdma_chan_get_rate); struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num, cpdma_handler_fn handler, int rx_type) @@ -931,6 +943,7 @@ struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num, spin_unlock_irqrestore(&ctlr->lock, flags); return chan; } +EXPORT_SYMBOL_GPL(cpdma_chan_create); int cpdma_chan_get_rx_buf_num(struct cpdma_chan *chan) { @@ -943,6 +956,7 @@ int cpdma_chan_get_rx_buf_num(struct cpdma_chan *chan) return desc_num; } +EXPORT_SYMBOL_GPL(cpdma_chan_get_rx_buf_num); int cpdma_chan_destroy(struct cpdma_chan *chan) { @@ -964,6 +978,7 @@ int cpdma_chan_destroy(struct cpdma_chan *chan) spin_unlock_irqrestore(&ctlr->lock, flags); return 0; } +EXPORT_SYMBOL_GPL(cpdma_chan_destroy); int cpdma_chan_get_stats(struct cpdma_chan *chan, struct cpdma_chan_stats *stats) @@ -976,6 +991,7 @@ int cpdma_chan_get_stats(struct cpdma_chan *chan, spin_unlock_irqrestore(&chan->lock, flags); return 0; } +EXPORT_SYMBOL_GPL(cpdma_chan_get_stats); static void __cpdma_chan_submit(struct cpdma_chan *chan, struct cpdma_desc __iomem *desc) @@ -1100,6 +1116,7 @@ int cpdma_chan_idle_submit(struct cpdma_chan *chan, void *token, void *data, spin_unlock_irqrestore(&chan->lock, flags); return ret; } +EXPORT_SYMBOL_GPL(cpdma_chan_idle_submit); int cpdma_chan_idle_submit_mapped(struct cpdma_chan *chan, void *token, dma_addr_t data, int len, int directed) @@ -1125,6 +1142,7 @@ int cpdma_chan_idle_submit_mapped(struct cpdma_chan *chan, void *token, spin_unlock_irqrestore(&chan->lock, flags); return ret; } +EXPORT_SYMBOL_GPL(cpdma_chan_idle_submit_mapped); int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data, int len, int directed) @@ -1150,6 +1168,7 @@ int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data, spin_unlock_irqrestore(&chan->lock, flags); return ret; } +EXPORT_SYMBOL_GPL(cpdma_chan_submit); int cpdma_chan_submit_mapped(struct cpdma_chan *chan, void *token, dma_addr_t data, int len, int directed) @@ -1175,6 +1194,7 @@ int cpdma_chan_submit_mapped(struct cpdma_chan *chan, void *token, spin_unlock_irqrestore(&chan->lock, flags); return ret; } +EXPORT_SYMBOL_GPL(cpdma_chan_submit_mapped); bool cpdma_check_free_tx_desc(struct cpdma_chan *chan) { @@ -1189,6 +1209,7 @@ bool cpdma_check_free_tx_desc(struct cpdma_chan *chan) spin_unlock_irqrestore(&chan->lock, flags); return free_tx_desc; } +EXPORT_SYMBOL_GPL(cpdma_check_free_tx_desc); static void __cpdma_chan_free(struct cpdma_chan *chan, struct cpdma_desc __iomem *desc, @@ -1289,6 +1310,7 @@ int cpdma_chan_process(struct cpdma_chan *chan, int quota) } return used; } +EXPORT_SYMBOL_GPL(cpdma_chan_process); int cpdma_chan_start(struct cpdma_chan *chan) { @@ -1308,6 +1330,7 @@ int cpdma_chan_start(struct cpdma_chan *chan) return 0; } +EXPORT_SYMBOL_GPL(cpdma_chan_start); int cpdma_chan_stop(struct cpdma_chan *chan) { @@ -1370,6 +1393,7 @@ int cpdma_chan_stop(struct cpdma_chan *chan) spin_unlock_irqrestore(&chan->lock, flags); return 0; } +EXPORT_SYMBOL_GPL(cpdma_chan_stop); int cpdma_chan_int_ctrl(struct cpdma_chan *chan, bool enable) { @@ -1416,11 +1440,13 @@ int cpdma_get_num_rx_descs(struct cpdma_ctlr *ctlr) { return ctlr->num_rx_desc; } +EXPORT_SYMBOL_GPL(cpdma_get_num_rx_descs); int cpdma_get_num_tx_descs(struct cpdma_ctlr *ctlr) { return ctlr->num_tx_desc; } +EXPORT_SYMBOL_GPL(cpdma_get_num_tx_descs); int cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc) { @@ -1442,3 +1468,4 @@ int cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc) return ret; } +EXPORT_SYMBOL_GPL(cpdma_set_num_rx_descs); From ede3136e56557498ec3f2e6da58cb0920d8074e1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Apr 2026 20:46:55 +0200 Subject: [PATCH 1177/1561] dpaa2: avoid linking objects into multiple modules Each object file contains information about which module it gets linked into, so linking the same file into multiple modules now causes a warning: scripts/Makefile.build:254: drivers/net/ethernet/freescale/dpaa2/Makefile: dpaa2-mac.o is added to multiple modules: fsl-dpaa2-eth fsl-dpaa2-switch scripts/Makefile.build:254: drivers/net/ethernet/freescale/dpaa2/Makefile: dpmac.o is added to multiple modules: fsl-dpaa2-eth fsl-dpaa2-switch Change the way that dpaa2 is built by moving the two common files into a separate module with exported symbols instead. Tested-by: Ioana Ciornei Reviewed-by: Ioana Ciornei Signed-off-by: Arnd Bergmann Link: https://patch.msgid.link/20260402184726.3746487-3-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/Makefile | 9 +++++---- drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/Makefile b/drivers/net/ethernet/freescale/dpaa2/Makefile index 1b05ba8d1cbf..5f74be76434f 100644 --- a/drivers/net/ethernet/freescale/dpaa2/Makefile +++ b/drivers/net/ethernet/freescale/dpaa2/Makefile @@ -3,15 +3,16 @@ # Makefile for the Freescale DPAA2 Ethernet controller # -obj-$(CONFIG_FSL_DPAA2_ETH) += fsl-dpaa2-eth.o +obj-$(CONFIG_FSL_DPAA2_ETH) += fsl-dpaa2-eth.o fsl-dpaa2-mac.o obj-$(CONFIG_FSL_DPAA2_PTP_CLOCK) += fsl-dpaa2-ptp.o -obj-$(CONFIG_FSL_DPAA2_SWITCH) += fsl-dpaa2-switch.o +obj-$(CONFIG_FSL_DPAA2_SWITCH) += fsl-dpaa2-switch.o fsl-dpaa2-mac.o -fsl-dpaa2-eth-objs := dpaa2-eth.o dpaa2-ethtool.o dpni.o dpaa2-mac.o dpmac.o dpaa2-eth-devlink.o dpaa2-xsk.o +fsl-dpaa2-eth-objs := dpaa2-eth.o dpaa2-ethtool.o dpni.o dpaa2-eth-devlink.o dpaa2-xsk.o fsl-dpaa2-eth-${CONFIG_FSL_DPAA2_ETH_DCB} += dpaa2-eth-dcb.o fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o fsl-dpaa2-ptp-objs := dpaa2-ptp.o dprtc.o -fsl-dpaa2-switch-objs := dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o dpaa2-switch-flower.o dpaa2-mac.o dpmac.o +fsl-dpaa2-switch-objs := dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o dpaa2-switch-flower.o +fsl-dpaa2-mac-objs += dpaa2-mac.o dpmac.o # Needed by the tracing framework CFLAGS_dpaa2-eth.o := -I$(src) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c index ad812ebf3139..1f80a527264a 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c @@ -544,6 +544,7 @@ void dpaa2_mac_start(struct dpaa2_mac *mac) phylink_start(mac->phylink); } +EXPORT_SYMBOL_GPL(dpaa2_mac_start); void dpaa2_mac_stop(struct dpaa2_mac *mac) { @@ -554,6 +555,7 @@ void dpaa2_mac_stop(struct dpaa2_mac *mac) if (mac->serdes_phy) phy_power_off(mac->serdes_phy); } +EXPORT_SYMBOL_GPL(dpaa2_mac_stop); int dpaa2_mac_connect(struct dpaa2_mac *mac) { @@ -646,6 +648,7 @@ err_pcs_destroy: return err; } +EXPORT_SYMBOL_GPL(dpaa2_mac_connect); void dpaa2_mac_disconnect(struct dpaa2_mac *mac) { @@ -658,6 +661,7 @@ void dpaa2_mac_disconnect(struct dpaa2_mac *mac) of_phy_put(mac->serdes_phy); mac->serdes_phy = NULL; } +EXPORT_SYMBOL_GPL(dpaa2_mac_disconnect); int dpaa2_mac_open(struct dpaa2_mac *mac) { @@ -729,6 +733,7 @@ err_close_dpmac: dpmac_close(mac->mc_io, 0, dpmac_dev->mc_handle); return err; } +EXPORT_SYMBOL_GPL(dpaa2_mac_open); void dpaa2_mac_close(struct dpaa2_mac *mac) { @@ -753,6 +758,7 @@ void dpaa2_mac_close(struct dpaa2_mac *mac) if (mac->fw_node) fwnode_handle_put(mac->fw_node); } +EXPORT_SYMBOL_GPL(dpaa2_mac_close); static void dpaa2_mac_transfer_stats(const struct dpmac_counter *counters, size_t num_counters, void *s, @@ -824,6 +830,7 @@ void dpaa2_mac_get_rmon_stats(struct dpaa2_mac *mac, *ranges = dpaa2_mac_rmon_ranges; } +EXPORT_SYMBOL_GPL(dpaa2_mac_get_rmon_stats); void dpaa2_mac_get_pause_stats(struct dpaa2_mac *mac, struct ethtool_pause_stats *s) @@ -835,6 +842,7 @@ void dpaa2_mac_get_pause_stats(struct dpaa2_mac *mac, DPAA2_MAC_NUM_PAUSE_STATS, dpaa2_mac_pause_stats, s); } +EXPORT_SYMBOL_GPL(dpaa2_mac_get_pause_stats); void dpaa2_mac_get_ctrl_stats(struct dpaa2_mac *mac, struct ethtool_eth_ctrl_stats *s) @@ -846,6 +854,7 @@ void dpaa2_mac_get_ctrl_stats(struct dpaa2_mac *mac, DPAA2_MAC_NUM_ETH_CTRL_STATS, dpaa2_mac_eth_ctrl_stats, s); } +EXPORT_SYMBOL_GPL(dpaa2_mac_get_ctrl_stats); void dpaa2_mac_get_eth_mac_stats(struct dpaa2_mac *mac, struct ethtool_eth_mac_stats *s) @@ -857,11 +866,13 @@ void dpaa2_mac_get_eth_mac_stats(struct dpaa2_mac *mac, DPAA2_MAC_NUM_ETH_MAC_STATS, dpaa2_mac_eth_mac_stats, s); } +EXPORT_SYMBOL_GPL(dpaa2_mac_get_eth_mac_stats); int dpaa2_mac_get_sset_count(void) { return DPAA2_MAC_NUM_ETHTOOL_STATS; } +EXPORT_SYMBOL_GPL(dpaa2_mac_get_sset_count); void dpaa2_mac_get_strings(u8 **data) { @@ -870,6 +881,7 @@ void dpaa2_mac_get_strings(u8 **data) for (i = 0; i < DPAA2_MAC_NUM_ETHTOOL_STATS; i++) ethtool_puts(data, dpaa2_mac_ethtool_stats[i].name); } +EXPORT_SYMBOL_GPL(dpaa2_mac_get_strings); void dpaa2_mac_get_ethtool_stats(struct dpaa2_mac *mac, u64 *data) { @@ -921,3 +933,7 @@ fallback: *(data + i) = value; } } +EXPORT_SYMBOL_GPL(dpaa2_mac_get_ethtool_stats); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("DPAA2 Ethernet MAC library"); From 00d46be3c319029bdf5b4eb53bff8e47a8e7ddc4 Mon Sep 17 00:00:00 2001 From: Gang Yan Date: Fri, 3 Apr 2026 13:29:27 +0200 Subject: [PATCH 1178/1561] mptcp: reduce 'overhead' from u16 to u8 The 'overhead' in struct mptcp_data_frag can safely use u8, as it represents 'alignment + sizeof(mptcp_data_frag)'. With a maximum alignment of 7('ALIGN(1, sizeof(long)) - 1'), the overhead is at most 47, well below U8_MAX and validated with BUILD_BUG_ON(). This patch also adds a field named 'unused' for further extensions. Signed-off-by: Gang Yan Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260403-net-next-mptcp-msg_eor-misc-v1-1-b0b33bea3fed@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 6 ++++++ net/mptcp/protocol.h | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 239fb9e75c7c..79315e575d07 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -4616,6 +4616,12 @@ void __init mptcp_proto_init(void) inet_register_protosw(&mptcp_protosw); BUILD_BUG_ON(sizeof(struct mptcp_skb_cb) > sizeof_field(struct sk_buff, cb)); + + /* struct mptcp_data_frag: 'overhead' corresponds to the alignment + * (ALIGN(1, sizeof(long)) - 1, so 8-1) + the struct's size + */ + BUILD_BUG_ON(ALIGN(1, sizeof(long)) - 1 + sizeof(struct mptcp_data_frag) + > U8_MAX); } #if IS_ENABLED(CONFIG_MPTCP_IPV6) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 0bd1ee860316..02031007100b 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -263,7 +263,8 @@ struct mptcp_data_frag { u64 data_seq; u16 data_len; u16 offset; - u16 overhead; + u8 overhead; + u8 __unused; u16 already_sent; struct page *page; }; From 7fb2f5f964998819dcc8f5d38dcd8999a568c3ef Mon Sep 17 00:00:00 2001 From: Gang Yan Date: Fri, 3 Apr 2026 13:29:28 +0200 Subject: [PATCH 1179/1561] mptcp: preserve MSG_EOR semantics in sendmsg path Extend MPTCP's sendmsg handling to recognize and honor the MSG_EOR flag, which marks the end of a record for application-level message boundaries. Data fragments tagged with MSG_EOR are explicitly marked in the mptcp_data_frag structure and skb context to prevent unintended coalescing with subsequent data chunks. This ensures the intent of applications using MSG_EOR is preserved across MPTCP subflows, maintaining consistent message segmentation behavior. Signed-off-by: Gang Yan Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260403-net-next-mptcp-msg_eor-misc-v1-2-b0b33bea3fed@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 24 +++++++++++++++++++++--- net/mptcp/protocol.h | 2 +- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 79315e575d07..e21e416cd19a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1005,7 +1005,8 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk, const struct page_frag *pfrag, const struct mptcp_data_frag *df) { - return df && pfrag->page == df->page && + return df && !df->eor && + pfrag->page == df->page && pfrag->size - pfrag->offset > 0 && pfrag->offset == (df->offset + df->data_len) && df->data_seq + df->data_len == msk->write_seq; @@ -1147,6 +1148,7 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag, dfrag->offset = offset + sizeof(struct mptcp_data_frag); dfrag->already_sent = 0; dfrag->page = pfrag->page; + dfrag->eor = 0; return dfrag; } @@ -1408,6 +1410,13 @@ out: mptcp_update_infinite_map(msk, ssk, mpext); trace_mptcp_sendmsg_frag(mpext); mptcp_subflow_ctx(ssk)->rel_write_seq += copy; + + /* if this is the last chunk of a dfrag with MSG_EOR set, + * mark the skb to prevent coalescing with subsequent data. + */ + if (dfrag->eor && info->sent + copy >= dfrag->data_len) + TCP_SKB_CB(skb)->eor = 1; + return copy; } @@ -1868,7 +1877,8 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) long timeo; /* silently ignore everything else */ - msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_FASTOPEN; + msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | + MSG_FASTOPEN | MSG_EOR; lock_sock(sk); @@ -1975,8 +1985,16 @@ wait_for_memory: goto do_error; } - if (copied) + if (copied) { + /* mark the last dfrag with EOR if MSG_EOR was set */ + if (msg->msg_flags & MSG_EOR) { + struct mptcp_data_frag *dfrag = mptcp_pending_tail(sk); + + if (dfrag) + dfrag->eor = 1; + } __mptcp_push_pending(sk, msg->msg_flags); + } out: release_sock(sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 02031007100b..1208f317ac33 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -264,7 +264,7 @@ struct mptcp_data_frag { u16 data_len; u16 offset; u8 overhead; - u8 __unused; + u8 eor; /* currently using 1 bit */ u16 already_sent; struct page *page; }; From eb477fdd68036a54a6678020d4e6ab177dea111c Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 3 Apr 2026 13:29:29 +0200 Subject: [PATCH 1180/1561] tcp: add recv_should_stop helper Factor out a new helper tcp_recv_should_stop() from tcp_recvmsg_locked() and tcp_splice_read() to check whether to stop receiving. And use this helper in mptcp_recvmsg() and mptcp_splice_read() to reduce redundant code. Suggested-by: Paolo Abeni Acked-by: Mat Martineau Signed-off-by: Geliang Tang Acked-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260403-net-next-mptcp-msg_eor-misc-v1-3-b0b33bea3fed@kernel.org Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 8 ++++++++ net/ipv4/tcp.c | 9 ++------- net/mptcp/protocol.c | 11 +++-------- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 565943c34b7e..6156d1d068e1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -3077,4 +3077,12 @@ enum skb_drop_reason tcp_inbound_hash(struct sock *sk, const void *saddr, const void *daddr, int family, int dif, int sdif); +static inline int tcp_recv_should_stop(struct sock *sk) +{ + return sk->sk_err || + sk->sk_state == TCP_CLOSE || + (sk->sk_shutdown & RCV_SHUTDOWN) || + signal_pending(current); +} + #endif /* _TCP_H */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bd2c3c4587e1..e57eaffc007a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -888,9 +888,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, release_sock(sk); lock_sock(sk); - if (sk->sk_err || sk->sk_state == TCP_CLOSE || - (sk->sk_shutdown & RCV_SHUTDOWN) || - signal_pending(current)) + if (tcp_recv_should_stop(sk)) break; } @@ -2755,10 +2753,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, if (copied) { if (!timeo || - sk->sk_err || - sk->sk_state == TCP_CLOSE || - (sk->sk_shutdown & RCV_SHUTDOWN) || - signal_pending(current)) + tcp_recv_should_stop(sk)) break; } else { if (sock_flag(sk, SOCK_DONE)) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e21e416cd19a..2f4776a4f06a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2333,11 +2333,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, break; if (copied) { - if (sk->sk_err || - sk->sk_state == TCP_CLOSE || - (sk->sk_shutdown & RCV_SHUTDOWN) || - !timeo || - signal_pending(current)) + if (tcp_recv_should_stop(sk) || + !timeo) break; } else { if (sk->sk_err) { @@ -4520,9 +4517,7 @@ static ssize_t mptcp_splice_read(struct socket *sock, loff_t *ppos, release_sock(sk); lock_sock(sk); - if (sk->sk_err || sk->sk_state == TCP_CLOSE || - (sk->sk_shutdown & RCV_SHUTDOWN) || - signal_pending(current)) + if (tcp_recv_should_stop(sk)) break; } From c4a5cb2f00f9a8e48437b973d1aceb1540851b98 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 3 Apr 2026 13:29:31 +0200 Subject: [PATCH 1181/1561] selftests: mptcp: join: recreate signal endp with same ID In this "delete re-add signal" MPTCP Join subtest, the endpoint linked to the initial subflow is removed, but readded once with different ID. It appears that there was an issue when reusing the same ID, recently fixed by commit d191101dee25 ("mptcp: pm: in-kernel: always set ID as avail when rm endp"). The test then now reuses the same ID the first time, but continue to use another one (88) the second time. This should then cover more cases. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/615 Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260403-net-next-mptcp-msg_eor-misc-v1-5-b0b33bea3fed@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index a3144d7298a5..beec41f6662a 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -4343,13 +4343,13 @@ endpoint_tests() chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 [ ${ipt} = 1 ] && ip netns exec "${ns1}" ${iptables} -D OUTPUT 1 - pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal + pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal wait_mpj 4 chk_subflow_nr "after re-add ID 0" 3 chk_mptcp_info subflows 3 subflows 3 chk_mptcp_info add_addr_signal 3 add_addr_accepted 2 - pm_nl_del_endpoint $ns1 99 10.0.1.1 + pm_nl_del_endpoint $ns1 42 10.0.1.1 sleep 0.5 chk_subflow_nr "after re-delete ID 0" 2 chk_mptcp_info subflows 2 subflows 2 From 05d42dc8ab92aa54aaeb1c033927c88fd2accba4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 4 Apr 2026 18:15:13 -0700 Subject: [PATCH 1182/1561] xfrm: Drop support for HMAC-RIPEMD-160 Drop support for HMAC-RIPEMD-160 from IPsec to reduce the UAPI surface and simplify future maintenance. It's almost certainly unused. RIPEMD-160 received some attention in the early 2000s when SHA-* weren't quite as well established. But it never received much adoption outside of certain niches such as Bitcoin. It's actually unclear that Linux + IPsec + HMAC-RIPEMD-160 has *ever* been used, even historically. When support for it was added in 2003, it was done so in a "cleanup" commit without any justification [1]. It didn't actually work until someone happened to fix it 5 years later [2]. That person didn't use or test it either [3]. Finally, also note that "hmac(rmd160)" is by far the slowest of the algorithms in aalg_list[]. Of course, today IPsec is usually used with an AEAD, such as AES-GCM. But even for IPsec users still using a dedicated auth algorithm, they almost certainly aren't using, and shouldn't use, HMAC-RIPEMD-160. Thus, let's just drop support for it. Note: no kconfig update is needed, since CRYPTO_RMD160 wasn't actually being selected anyway. References: [1] linux-history commit d462985fc1941a47 ("[IPSEC]: Clean up key manager algorithm handling.") [2] linux commit a13366c632132bb9 ("xfrm: xfrm_algo: correct usage of RIPEMD-160") [3] https://lore.kernel.org/all/1212340578-15574-1-git-send-email-rueegsegger@swiss-it.ch Signed-off-by: Eric Biggers Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_algo.c | 20 -------------------- tools/testing/selftests/net/ipsec.c | 8 ++------ 2 files changed, 2 insertions(+), 26 deletions(-) diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 749011e031c0..70434495f23f 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -290,26 +290,6 @@ static struct xfrm_algo_desc aalg_list[] = { .sadb_alg_maxbits = 512 } }, -{ - .name = "hmac(rmd160)", - .compat = "rmd160", - - .uinfo = { - .auth = { - .icv_truncbits = 96, - .icv_fullbits = 160, - } - }, - - .pfkey_supported = 1, - - .desc = { - .sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC, - .sadb_alg_ivlen = 0, - .sadb_alg_minbits = 160, - .sadb_alg_maxbits = 160 - } -}, { .name = "xcbc(aes)", diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index f4afef51b930..89c32c354c00 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -62,8 +62,6 @@ #define VETH_FMT "ktst-%d" #define VETH_LEN 12 -#define XFRM_ALGO_NR_KEYS 29 - static int nsfd_parent = -1; static int nsfd_childa = -1; static int nsfd_childb = -1; @@ -96,7 +94,6 @@ struct xfrm_key_entry xfrm_key_entries[] = { {"cbc(cast5)", 128}, {"cbc(serpent)", 128}, {"hmac(sha1)", 160}, - {"hmac(rmd160)", 160}, {"cbc(des3_ede)", 192}, {"hmac(sha256)", 256}, {"cbc(aes)", 256}, @@ -813,7 +810,7 @@ static int xfrm_fill_key(char *name, char *buf, { int i; - for (i = 0; i < XFRM_ALGO_NR_KEYS; i++) { + for (i = 0; i < ARRAY_SIZE(xfrm_key_entries); i++) { if (strncmp(name, xfrm_key_entries[i].algo_name, ALGO_LEN) == 0) *key_len = xfrm_key_entries[i].key_len; } @@ -2061,8 +2058,7 @@ static int write_desc(int proto, int test_desc_fd, int proto_list[] = { IPPROTO_AH, IPPROTO_COMP, IPPROTO_ESP }; char *ah_list[] = { "digest_null", "hmac(md5)", "hmac(sha1)", "hmac(sha256)", - "hmac(sha384)", "hmac(sha512)", "hmac(rmd160)", - "xcbc(aes)", "cmac(aes)" + "hmac(sha384)", "hmac(sha512)", "xcbc(aes)", "cmac(aes)" }; char *comp_list[] = { "deflate", From dfecb0c5af3b07ebfa84be63a7a21bfc9e29a872 Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Fri, 3 Apr 2026 11:48:47 +0800 Subject: [PATCH 1183/1561] selftests: net: add tests for PPP Add ping and iperf3 tests for ppp_async.c and pppoe.c. Signed-off-by: Qingfang Deng Link: https://patch.msgid.link/20260403034908.30017-1-qingfang.deng@linux.dev Signed-off-by: Paolo Abeni --- MAINTAINERS | 1 + tools/testing/selftests/Makefile | 1 + tools/testing/selftests/net/ppp/Makefile | 15 +++++ tools/testing/selftests/net/ppp/config | 9 +++ tools/testing/selftests/net/ppp/ppp_async.sh | 43 ++++++++++++ tools/testing/selftests/net/ppp/ppp_common.sh | 45 +++++++++++++ .../selftests/net/ppp/pppoe-server-options | 2 + tools/testing/selftests/net/ppp/pppoe.sh | 65 +++++++++++++++++++ 8 files changed, 181 insertions(+) create mode 100644 tools/testing/selftests/net/ppp/Makefile create mode 100644 tools/testing/selftests/net/ppp/config create mode 100755 tools/testing/selftests/net/ppp/ppp_async.sh create mode 100644 tools/testing/selftests/net/ppp/ppp_common.sh create mode 100644 tools/testing/selftests/net/ppp/pppoe-server-options create mode 100755 tools/testing/selftests/net/ppp/pppoe.sh diff --git a/MAINTAINERS b/MAINTAINERS index cb48c9ad9ddc..9d1e6d3acbac 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21083,6 +21083,7 @@ PPP PROTOCOL DRIVERS AND COMPRESSORS L: linux-ppp@vger.kernel.org S: Orphan F: drivers/net/ppp/ppp_* +F: tools/testing/selftests/net/ppp/ PPS SUPPORT M: Rodolfo Giometti diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 450f13ba4cca..65f84e8a0cf0 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -78,6 +78,7 @@ TARGETS += net/netfilter TARGETS += net/openvswitch TARGETS += net/ovpn TARGETS += net/packetdrill +TARGETS += net/ppp TARGETS += net/rds TARGETS += net/tcp_ao TARGETS += nolibc diff --git a/tools/testing/selftests/net/ppp/Makefile b/tools/testing/selftests/net/ppp/Makefile new file mode 100644 index 000000000000..b39b0abadde6 --- /dev/null +++ b/tools/testing/selftests/net/ppp/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 + +top_srcdir = ../../../../.. + +TEST_PROGS := \ + ppp_async.sh \ + pppoe.sh \ +# end of TEST_PROGS + +TEST_FILES := \ + ppp_common.sh \ + pppoe-server-options \ +# end of TEST_FILES + +include ../../lib.mk diff --git a/tools/testing/selftests/net/ppp/config b/tools/testing/selftests/net/ppp/config new file mode 100644 index 000000000000..b45d25c5b970 --- /dev/null +++ b/tools/testing/selftests/net/ppp/config @@ -0,0 +1,9 @@ +CONFIG_IPV6=y +CONFIG_PACKET=y +CONFIG_PPP=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPPOE=m +CONFIG_PPPOE_HASH_BITS_4=y +CONFIG_VETH=y diff --git a/tools/testing/selftests/net/ppp/ppp_async.sh b/tools/testing/selftests/net/ppp/ppp_async.sh new file mode 100755 index 000000000000..10f54c8dd0bc --- /dev/null +++ b/tools/testing/selftests/net/ppp/ppp_async.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ppp_common.sh + +# Temporary files for PTY symlinks +TTY_DIR=$(mktemp -d /tmp/ppp.XXXXXX) +TTY_SERVER="$TTY_DIR"/server +TTY_CLIENT="$TTY_DIR"/client + +# shellcheck disable=SC2329 +cleanup() { + cleanup_all_ns + [ -n "$SOCAT_PID" ] && kill_process "$SOCAT_PID" + rm -fr "$TTY_DIR" +} + +trap cleanup EXIT + +ppp_common_init +modprobe -q ppp_async + +# Create the virtual serial device +socat -d PTY,link="$TTY_SERVER",rawer PTY,link="$TTY_CLIENT",rawer & +SOCAT_PID=$! + +# Wait for symlinks to be created +slowwait 5 [ -L "$TTY_SERVER" ] + +# Start the PPP Server +ip netns exec "$NS_SERVER" pppd "$TTY_SERVER" 115200 \ + "$IP_SERVER":"$IP_CLIENT" \ + local noauth nodefaultroute debug + +# Start the PPP Client +ip netns exec "$NS_CLIENT" pppd "$TTY_CLIENT" 115200 \ + local noauth updetach nodefaultroute debug + +ppp_test_connectivity + +log_test "PPP async" + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/net/ppp/ppp_common.sh b/tools/testing/selftests/net/ppp/ppp_common.sh new file mode 100644 index 000000000000..40bbec317039 --- /dev/null +++ b/tools/testing/selftests/net/ppp/ppp_common.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# shellcheck disable=SC2153 + +source ../lib.sh + +IP_SERVER="192.168.200.1" +IP_CLIENT="192.168.200.2" + +ppp_common_init() { + # Package requirements + require_command socat + require_command pppd + require_command iperf3 + + # Check for root privileges + if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit "$ksft_skip" + fi + + # Namespaces + setup_ns NS_SERVER NS_CLIENT +} + +ppp_check_addr() { + dev=$1 + addr=$2 + ns=$3 + ip -netns "$ns" -4 addr show dev "$dev" 2>/dev/null | grep -q "$addr" + return $? +} + +ppp_test_connectivity() { + slowwait 10 ppp_check_addr "ppp0" "$IP_CLIENT" "$NS_CLIENT" + + ip netns exec "$NS_CLIENT" ping -c 3 "$IP_SERVER" + check_err $? + + ip netns exec "$NS_SERVER" iperf3 -s -1 -D + wait_local_port_listen "$NS_SERVER" 5201 tcp + + ip netns exec "$NS_CLIENT" iperf3 -c "$IP_SERVER" -Z -t 2 + check_err $? +} diff --git a/tools/testing/selftests/net/ppp/pppoe-server-options b/tools/testing/selftests/net/ppp/pppoe-server-options new file mode 100644 index 000000000000..66c8c9d319e9 --- /dev/null +++ b/tools/testing/selftests/net/ppp/pppoe-server-options @@ -0,0 +1,2 @@ +noauth +noipdefault diff --git a/tools/testing/selftests/net/ppp/pppoe.sh b/tools/testing/selftests/net/ppp/pppoe.sh new file mode 100755 index 000000000000..f67b51df7490 --- /dev/null +++ b/tools/testing/selftests/net/ppp/pppoe.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ppp_common.sh + +VETH_SERVER="veth-server" +VETH_CLIENT="veth-client" +PPPOE_LOG=$(mktemp /tmp/pppoe.XXXXXX) + +# shellcheck disable=SC2329 +cleanup() { + cleanup_all_ns + [ -n "$SOCAT_PID" ] && kill_process "$SOCAT_PID" + rm -f "$PPPOE_LOG" +} + +trap cleanup EXIT + +require_command pppoe-server +ppp_common_init +modprobe -q pppoe + +# Try to locate pppoe.so plugin +PPPOE_PLUGIN=$(find /usr/{lib,lib64,lib32}/pppd/ -name pppoe.so -type f -print -quit) +if [ -z "$PPPOE_PLUGIN" ]; then + log_test_skip "PPPoE: pppoe.so plugin not found" + exit "$EXIT_STATUS" +fi + +# Create the veth pair +ip link add "$VETH_SERVER" type veth peer name "$VETH_CLIENT" +ip link set "$VETH_SERVER" netns "$NS_SERVER" +ip link set "$VETH_CLIENT" netns "$NS_CLIENT" +ip -netns "$NS_SERVER" link set "$VETH_SERVER" up +ip -netns "$NS_CLIENT" link set "$VETH_CLIENT" up + +# Start socat as syslog listener +socat -v -u UNIX-RECV:/dev/log OPEN:/dev/null > "$PPPOE_LOG" 2>&1 & +SOCAT_PID=$! + +# Start the PPP Server. Note that versions before 4.0 ignore -g option and +# instead use a hardcoded plugin path, so they may fail to find the plugin. +ip netns exec "$NS_SERVER" pppoe-server -I "$VETH_SERVER" \ + -L "$IP_SERVER" -R "$IP_CLIENT" -N 1 -q "$(command -v pppd)" \ + -k -O "$(pwd)/pppoe-server-options" -g "$PPPOE_PLUGIN" + +# Start the PPP Client +ip netns exec "$NS_CLIENT" pppd \ + local debug updetach noipdefault noauth nodefaultroute \ + plugin "$PPPOE_PLUGIN" nic-"$VETH_CLIENT" + +ppp_test_connectivity + +log_test "PPPoE" + +# Dump syslog messages if the test failed +if [ "$RET" -ne 0 ]; then + while read -r _sign _date _time len _from _to + do len=${len##*=} + read -n "$len" -r LINE + echo "$LINE" + done < "$PPPOE_LOG" +fi + +exit "$EXIT_STATUS" From 1047e14b44edecbbab02a86514a083b8db9fde4d Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Fri, 3 Apr 2026 12:09:23 +0300 Subject: [PATCH 1184/1561] net/mlx5e: XSK, Increase size for chunk_size param When 64K pages are used, chunk_size can take the 64K value which doesn't fit in u16. This results in overflows that are detected in mlx5e_mpwrq_log_wqe_sz(). Increase the type to u32 to fix this. Signed-off-by: Dragos Tatulea Reviewed-by: Carolina Jubran Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260403090927.139042-2-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en/params.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 9b1a2aed17c3..275f9be53a34 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -8,7 +8,7 @@ struct mlx5e_xsk_param { u16 headroom; - u16 chunk_size; + u32 chunk_size; bool unaligned; }; From 833e72645aac10e9ca9459c4740c417e5b04faf6 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Fri, 3 Apr 2026 12:09:24 +0300 Subject: [PATCH 1185/1561] net/mlx5e: XDP, Improve dma address calculation of linear part for XDP_TX When calculating the dma address of the linear part of an XDP frame, the formula assumes that there is a single XDP buffer per page. Extend the formula to allow multiple XDP buffers per page by calculating the data offset in the page. This is a preparation for the upcoming removal of a single XDP buffer per page limitation when the formula will no longer be correct. Signed-off-by: Dragos Tatulea Reviewed-by: Carolina Jubran Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260403090927.139042-3-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 04e1b5fa4825..d3bab198c99c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -123,7 +123,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, * mode. */ - dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf); + dma_addr = page_pool_get_dma_addr(page) + offset_in_page(xdpf->data); dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd->len, DMA_BIDIRECTIONAL); if (xdptxd->has_frags) { From 2dfaa02387743306e1821ad01d4d4f5e7793cfb0 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Fri, 3 Apr 2026 12:09:25 +0300 Subject: [PATCH 1186/1561] net/mlx5e: XDP, Remove stride size limitation Currently XDP mode always uses PAGE_SIZE strides. This limitation existed because page fragment counting was not implemented when XDP was added. Furthermore, due to this limitation there were other issues as well on system with larger pages (e.g. 64K): - XDP for Striding RQ was effectively disabled on such systems. - Legacy RQ allows the configuration but uses a fixed scheme of one XDP buffer per page which is inefficient. As fragment counting was added during the driver conversion to page_pool and the support for XDP multi-buffer, it is now possible to remove this stride size limitation. This patch does just that. Now it is possible to use XDP on systems with higher page sizes (e.g. 64K): - For Striding RQ, loading the program is no longer blocked. Although a 64K page can fit any packet, MTUs that result in stride > 8K will still make the RQ in non-linear mode. That's because the HW doesn't support a higher than 8K stride. - For Legacy RQ, the stride size was PAGE_SIZE which was very inefficient. Now the stride size will be calculated relative to MTU. Legacy RQ will always be in linear mode for larger system pages. This can be observed with an XDP_DROP test [1] when running in Legacy RQ mode on a ARM Neoverse-N1 system with a 64K page size: +-----------------------------------------------+ | MTU | baseline | this change | improvement | |------+------------+-------------+-------------| | 1500 | 15.55 Mpps | 18.99 Mpps | 22.0 % | | 9000 | 15.53 Mpps | 18.24 Mpps | 17.5 % | +-----------------------------------------------+ There are performance benefits for Striding RQ mode as well: - Striding RQ non-linear mode now uses 256B strides, just like non-XDP mode. - Striding RQ linear mode can now fit a number of XDP buffers per page that is relative to the MTU size. That means that on 4K page systems and a small enough MTU, 2 XDP buffers can fit in one page. The above benefits for Striding RQ can be observed with an XDP_DROP test [1] when running on a 4K page x86_64 system (Intel Xeon Platinum 8580): +-----------------------------------------------+ | MTU | baseline | this change | improvement | |------+------------+-------------+-------------| | 1000 | 28.36 Mpps | 33.98 Mpps | 19.82 % | | 9000 | 20.76 Mpps | 26.30 Mpps | 26.70 % | +-----------------------------------------------+ [1] Test description: - xdp-bench with XDP_DROP - RX: single queue - TX: sends 64B packets to saturate CPU on RX side Signed-off-by: Dragos Tatulea Reviewed-by: Carolina Jubran Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260403090927.139042-4-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 26bb31c56e45..1f4a547917ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -298,12 +298,9 @@ static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev, * no_head_tail_room should be set in the case of XDP with Striding RQ * when SKB is not linear. This is because another page is allocated for the linear part. */ - sz = roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, no_head_tail_room)); + sz = mlx5e_rx_get_linear_sz_skb(params, no_head_tail_room); - /* XDP in mlx5e doesn't support multiple packets per page. - * Do not assume sz <= PAGE_SIZE if params->xdp_prog is set. - */ - return params->xdp_prog && sz < PAGE_SIZE ? PAGE_SIZE : sz; + return roundup_pow_of_two(sz); } static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev, @@ -453,10 +450,6 @@ u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, return order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, rqo, true)); - /* XDP in mlx5e doesn't support multiple packets per page. */ - if (params->xdp_prog) - return PAGE_SHIFT; - return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev); } From ebd4ad29cc828f762d98f41b2a6a3f806185616f Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Fri, 3 Apr 2026 12:09:26 +0300 Subject: [PATCH 1187/1561] net/mlx5e: XDP, Use a single linear page per rq Currently in striding rq there is one mlx5e_frag_page member per WQE for the linear page. This linear page is used only in XDP multi-buffer mode. This is wasteful because only one linear page is needed per rq: the page gets refreshed on every packet, regardless of WQE. Furthermore, it is not needed in other modes (non-XDP, XDP single-buffer). This change moves the linear page into its own structure (struct mlx5_mpw_linear_info) and allocates it only when necessary. A special structure is created because an upcoming patch will extend this structure to support fragmentation of the linear page. This patch has no functional changes. Signed-off-by: Dragos Tatulea Reviewed-by: Carolina Jubran Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260403090927.139042-5-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 6 ++- .../net/ethernet/mellanox/mlx5/core/en_main.c | 37 ++++++++++++++++--- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 17 +++++---- 3 files changed, 47 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index c7ac6ebe8290..592234780f2b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -591,10 +591,13 @@ union mlx5e_alloc_units { struct mlx5e_mpw_info { u16 consumed_strides; DECLARE_BITMAP(skip_release_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE); - struct mlx5e_frag_page linear_page; union mlx5e_alloc_units alloc_units; }; +struct mlx5e_mpw_linear_info { + struct mlx5e_frag_page frag_page; +}; + #define MLX5E_MAX_RX_FRAGS 4 struct mlx5e_rq; @@ -689,6 +692,7 @@ struct mlx5e_rq { u8 umr_wqebbs; u8 mtts_per_wqe; u8 umr_mode; + struct mlx5e_mpw_linear_info *linear_info; struct mlx5e_shampo_hd *shampo; } mpwqe; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1238e5356012..aa8359a48b12 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -369,6 +369,29 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) return 0; } +static int mlx5e_rq_alloc_mpwqe_linear_info(struct mlx5e_rq *rq, int node, + struct mlx5e_params *params, + struct mlx5e_rq_opt_param *rqo, + u32 *pool_size) +{ + struct mlx5_core_dev *mdev = rq->mdev; + struct mlx5e_mpw_linear_info *li; + + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, rqo) || + !params->xdp_prog) + return 0; + + li = kvzalloc_node(sizeof(*li), GFP_KERNEL, node); + if (!li) + return -ENOMEM; + + rq->mpwqe.linear_info = li; + + /* additional page per packet for the linear part */ + *pool_size *= 2; + + return 0; +} static u8 mlx5e_mpwrq_access_mode(enum mlx5e_mpwrq_umr_mode umr_mode) { @@ -915,10 +938,6 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, mlx5e_mpwqe_get_log_rq_size(mdev, params, rqo); pool_order = rq->mpwqe.page_shift - PAGE_SHIFT; - if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, rqo) && - params->xdp_prog) - pool_size *= 2; /* additional page per packet for the linear part */ - rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, rqo); @@ -936,10 +955,15 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, if (err) goto err_rq_mkey; - err = mlx5_rq_shampo_alloc(mdev, params, rq_param, rq, node); + err = mlx5e_rq_alloc_mpwqe_linear_info(rq, node, params, rqo, + &pool_size); if (err) goto err_free_mpwqe_info; + err = mlx5_rq_shampo_alloc(mdev, params, rq_param, rq, node); + if (err) + goto err_free_mpwqe_linear_info; + break; default: /* MLX5_WQ_TYPE_CYCLIC */ err = mlx5_wq_cyc_create(mdev, &rq_param->wq, rqc_wq, @@ -1054,6 +1078,8 @@ err_free_by_rq_type: switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: mlx5e_rq_free_shampo(rq); +err_free_mpwqe_linear_info: + kvfree(rq->mpwqe.linear_info); err_free_mpwqe_info: kvfree(rq->mpwqe.info); err_rq_mkey: @@ -1081,6 +1107,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: mlx5e_rq_free_shampo(rq); + kvfree(rq->mpwqe.linear_info); kvfree(rq->mpwqe.info); mlx5_core_destroy_mkey(rq->mdev, be32_to_cpu(rq->mpwqe.umr_mkey_be)); mlx5e_free_mpwqe_rq_drop_page(rq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index f5c0e2a0ada9..feb042d84b8e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1869,6 +1869,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx]; u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt); struct mlx5e_frag_page *head_page = frag_page; + struct mlx5e_frag_page *linear_page = NULL; struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf; u32 page_size = BIT(rq->mpwqe.page_shift); u32 frag_offset = head_offset; @@ -1897,13 +1898,15 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w if (prog) { /* area for bpf_xdp_[store|load]_bytes */ net_prefetchw(netmem_address(frag_page->netmem) + frag_offset); + + linear_page = &rq->mpwqe.linear_info->frag_page; if (unlikely(mlx5e_page_alloc_fragmented(rq->page_pool, - &wi->linear_page))) { + linear_page))) { rq->stats->buff_alloc_err++; return NULL; } - va = netmem_address(wi->linear_page.netmem); + va = netmem_address(linear_page->netmem); net_prefetchw(va); /* xdp_frame data area */ linear_hr = XDP_PACKET_HEADROOM; linear_data_len = 0; @@ -1966,10 +1969,10 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w for (pfp = head_page; pfp < frag_page; pfp++) pfp->frags++; - wi->linear_page.frags++; + linear_page->frags++; } mlx5e_page_release_fragmented(rq->page_pool, - &wi->linear_page); + linear_page); return NULL; /* page/packet was consumed by XDP */ } @@ -1988,13 +1991,13 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w mxbuf->xdp.data - mxbuf->xdp.data_meta); if (unlikely(!skb)) { mlx5e_page_release_fragmented(rq->page_pool, - &wi->linear_page); + linear_page); return NULL; } skb_mark_for_recycle(skb); - wi->linear_page.frags++; - mlx5e_page_release_fragmented(rq->page_pool, &wi->linear_page); + linear_page->frags++; + mlx5e_page_release_fragmented(rq->page_pool, linear_page); if (xdp_buff_has_frags(&mxbuf->xdp)) { struct mlx5e_frag_page *pagep; From 25b8c9b6d7314d26f91bf115beafab58b5d376d0 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Fri, 3 Apr 2026 12:09:27 +0300 Subject: [PATCH 1188/1561] net/mlx5e: XDP, Use page fragments for linear data in multibuf-mode Currently in XDP multi-buffer mode for striding rq a whole page is allocated for the linear part of the XDP buffer. This is wasteful, especially on systems with larger page sizes. This change splits the page into fixed sized fragments. The page is replenished when the maximum number of allowed fragments is reached. When a fragment is not used, it will be simply recycled on next packet. This is great for XDP_DROP as the fragment can be recycled for the next packet. In the most extreme case (XDP_DROP everything), there will be 0 fragments used => only one linear page allocation for the lifetime of the XDP program. The previous page_pool size increase was too conservative (doubling the size) and now there are much fewer allocations (1/8 for a 4K page). So drop the page_pool size extension altogether when the linear side page is used. This small improvement is at most visible for XDP_DROP tests with small 64B packets and a large enough MTU for Striding RQ to be in non-linear mode: +----------------------------------------------------------------------+ | System | MTU | baseline | this change | improvement | |----------------------+------+------------+-------------+-------------| | 4K page x86_64 [1] | 9000 | 26.30 Mpps | 30.45 Mpps | 15.80 % | | 64K page aarch64 [2] | 9000 | 15.27 Mpps | 20.10 Mpps | 31.62 % | +----------------------------------------------------------------------+ [1] Intel Xeon Platinum 8580 [2] ARM Neoverse-N1 Signed-off-by: Dragos Tatulea Reviewed-by: Carolina Jubran Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260403090927.139042-6-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 6 ++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 25 ++++++-- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 60 +++++++++++++++---- 3 files changed, 74 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 592234780f2b..2270e2e550dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -82,6 +82,9 @@ struct page_pool; #define MLX5E_PAGECNT_BIAS_MAX U16_MAX #define MLX5E_RX_MAX_HEAD (256) +#define MLX5E_XDP_LOG_MAX_LINEAR_SZ \ + order_base_2(MLX5_SKB_FRAG_SZ(XDP_PACKET_HEADROOM + MLX5E_RX_MAX_HEAD)) + #define MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE (8) #define MLX5E_SHAMPO_WQ_HEADER_PER_PAGE \ (PAGE_SIZE >> MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE) @@ -596,6 +599,7 @@ struct mlx5e_mpw_info { struct mlx5e_mpw_linear_info { struct mlx5e_frag_page frag_page; + u16 max_frags; }; #define MLX5E_MAX_RX_FRAGS 4 @@ -1081,6 +1085,8 @@ bool mlx5e_reset_rx_moderation(struct dim_cq_moder *cq_moder, u8 cq_period_mode, bool mlx5e_reset_rx_channels_moderation(struct mlx5e_channels *chs, u8 cq_period_mode, bool dim_enabled, bool keep_dim_state); +void mlx5e_mpwqe_dealloc_linear_page(struct mlx5e_rq *rq); + struct mlx5e_sq_param; int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index aa8359a48b12..4ba198fb9d6c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -371,11 +371,11 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) static int mlx5e_rq_alloc_mpwqe_linear_info(struct mlx5e_rq *rq, int node, struct mlx5e_params *params, - struct mlx5e_rq_opt_param *rqo, - u32 *pool_size) + struct mlx5e_rq_opt_param *rqo) { struct mlx5_core_dev *mdev = rq->mdev; struct mlx5e_mpw_linear_info *li; + u32 linear_frag_count; if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, rqo) || !params->xdp_prog) @@ -385,10 +385,22 @@ static int mlx5e_rq_alloc_mpwqe_linear_info(struct mlx5e_rq *rq, int node, if (!li) return -ENOMEM; + linear_frag_count = + BIT(rq->mpwqe.page_shift - MLX5E_XDP_LOG_MAX_LINEAR_SZ); + if (linear_frag_count > U16_MAX) { + netdev_warn(rq->netdev, + "rq %d: linear_frag_count (%u) larger than expected (%u), page_shift: %u, log_max_linear_sz: %u\n", + rq->ix, linear_frag_count, U16_MAX, + rq->mpwqe.page_shift, MLX5E_XDP_LOG_MAX_LINEAR_SZ); + kvfree(li); + return -EINVAL; + } + + li->max_frags = linear_frag_count; rq->mpwqe.linear_info = li; - /* additional page per packet for the linear part */ - *pool_size *= 2; + /* Set to max to force allocation on first run. */ + li->frag_page.frags = li->max_frags; return 0; } @@ -955,8 +967,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, if (err) goto err_rq_mkey; - err = mlx5e_rq_alloc_mpwqe_linear_info(rq, node, params, rqo, - &pool_size); + err = mlx5e_rq_alloc_mpwqe_linear_info(rq, node, params, rqo); if (err) goto err_free_mpwqe_info; @@ -1347,6 +1358,8 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq) mlx5_wq_ll_pop(wq, wqe_ix_be, &wqe->next.next_wqe_index); } + + mlx5e_mpwqe_dealloc_linear_page(rq); } else { struct mlx5_wq_cyc *wq = &rq->wqe.wq; u16 missing = mlx5_wq_cyc_missing(wq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index feb042d84b8e..5b60aa47c75b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -300,6 +300,35 @@ static void mlx5e_page_release_fragmented(struct page_pool *pp, page_pool_put_unrefed_netmem(pp, netmem, -1, true); } +static int mlx5e_mpwqe_linear_page_refill(struct mlx5e_rq *rq) +{ + struct mlx5e_mpw_linear_info *li = rq->mpwqe.linear_info; + + if (likely(li->frag_page.frags < li->max_frags)) + return 0; + + if (likely(li->frag_page.netmem)) { + mlx5e_page_release_fragmented(rq->page_pool, &li->frag_page); + li->frag_page.netmem = 0; + } + + return mlx5e_page_alloc_fragmented(rq->page_pool, &li->frag_page); +} + +static void *mlx5e_mpwqe_get_linear_page_frag(struct mlx5e_rq *rq) +{ + struct mlx5e_mpw_linear_info *li = rq->mpwqe.linear_info; + u32 frag_offset; + + if (unlikely(mlx5e_mpwqe_linear_page_refill(rq))) + return NULL; + + frag_offset = li->frag_page.frags << MLX5E_XDP_LOG_MAX_LINEAR_SZ; + WARN_ON(frag_offset >= BIT(rq->mpwqe.page_shift)); + + return netmem_address(li->frag_page.netmem) + frag_offset; +} + static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *frag) { @@ -702,6 +731,22 @@ static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); } +void mlx5e_mpwqe_dealloc_linear_page(struct mlx5e_rq *rq) +{ + struct mlx5e_mpw_linear_info *li = rq->mpwqe.linear_info; + + if (!li || !li->frag_page.netmem) + return; + + mlx5e_page_release_fragmented(rq->page_pool, &li->frag_page); + + /* Recovery flow can call this function and then alloc again, so leave + * things in a good state for re-allocation. + */ + li->frag_page.netmem = 0; + li->frag_page.frags = li->max_frags; +} + INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) { struct mlx5_wq_cyc *wq = &rq->wqe.wq; @@ -1899,18 +1944,17 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w /* area for bpf_xdp_[store|load]_bytes */ net_prefetchw(netmem_address(frag_page->netmem) + frag_offset); - linear_page = &rq->mpwqe.linear_info->frag_page; - if (unlikely(mlx5e_page_alloc_fragmented(rq->page_pool, - linear_page))) { + va = mlx5e_mpwqe_get_linear_page_frag(rq); + if (!va) { rq->stats->buff_alloc_err++; return NULL; } - va = netmem_address(linear_page->netmem); net_prefetchw(va); /* xdp_frame data area */ linear_hr = XDP_PACKET_HEADROOM; linear_data_len = 0; linear_frame_sz = MLX5_SKB_FRAG_SZ(linear_hr + MLX5E_RX_MAX_HEAD); + linear_page = &rq->mpwqe.linear_info->frag_page; } else { skb = napi_alloc_skb(rq->cq.napi, ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long))); @@ -1971,8 +2015,6 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w linear_page->frags++; } - mlx5e_page_release_fragmented(rq->page_pool, - linear_page); return NULL; /* page/packet was consumed by XDP */ } @@ -1989,15 +2031,11 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w rq, mxbuf->xdp.data_hard_start, linear_frame_sz, mxbuf->xdp.data - mxbuf->xdp.data_hard_start, len, mxbuf->xdp.data - mxbuf->xdp.data_meta); - if (unlikely(!skb)) { - mlx5e_page_release_fragmented(rq->page_pool, - linear_page); + if (unlikely(!skb)) return NULL; - } skb_mark_for_recycle(skb); linear_page->frags++; - mlx5e_page_release_fragmented(rq->page_pool, linear_page); if (xdp_buff_has_frags(&mxbuf->xdp)) { struct mlx5e_frag_page *pagep; From 7f0de94ef44653764fa2fb8548b1253f0554f213 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Thu, 26 Mar 2026 12:14:31 +0200 Subject: [PATCH 1189/1561] wifi: mac80211: add a TXQ for management frames on NAN devices Currently there is no TXQ for non-data frames. Add a new txq_mgmt for this purpose and create one of these on NAN devices. On NAN devices, these frames may only be transmitted during the discovery window and it is therefore helpful to schedule them using a queue. Signed-off-by: Benjamin Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260326121156.32eddd986bd2.Iee95758287c276155fbd7779d3f263339308e083@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ net/mac80211/iface.c | 28 ++++++++++++++++++++++++++-- net/mac80211/tx.c | 20 ++++++++++++++++---- net/mac80211/util.c | 34 +++++++++++++++++++++++----------- 4 files changed, 67 insertions(+), 17 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 72395895dc0e..b1c412eabf2b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2074,6 +2074,7 @@ enum ieee80211_neg_ttlm_res { * @drv_priv: data area for driver use, will always be aligned to * sizeof(void \*). * @txq: the multicast data TX queue + * @txq_mgmt: the mgmt frame TX queue, currently only exists for NAN devices * @offload_flags: 802.3 -> 802.11 enapsulation offload flags, see * &enum ieee80211_offload_flags. */ @@ -2092,6 +2093,7 @@ struct ieee80211_vif { u8 hw_queue[IEEE80211_NUM_ACS]; struct ieee80211_txq *txq; + struct ieee80211_txq *txq_mgmt; netdev_features_t netdev_features; u32 driver_flags; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 125897717a4c..7518dcbcdf1c 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -682,6 +682,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do if (sdata->vif.txq) ieee80211_txq_purge(sdata->local, to_txq_info(sdata->vif.txq)); + if (sdata->vif.txq_mgmt) + ieee80211_txq_purge(sdata->local, + to_txq_info(sdata->vif.txq_mgmt)); + sdata->bss = NULL; if (local->open_count == 0) @@ -2223,10 +2227,16 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, lockdep_assert_wiphy(local->hw.wiphy); if (type == NL80211_IFTYPE_P2P_DEVICE || type == NL80211_IFTYPE_NAN) { + int size = ALIGN(sizeof(*sdata) + local->hw.vif_data_size, + sizeof(void *)); struct wireless_dev *wdev; + int txq_size = 0; - sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size, - GFP_KERNEL); + if (type == NL80211_IFTYPE_NAN) + txq_size = sizeof(struct txq_info) + + local->hw.txq_data_size; + + sdata = kzalloc(size + txq_size, GFP_KERNEL); if (!sdata) return -ENOMEM; wdev = &sdata->wdev; @@ -2236,6 +2246,16 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ieee80211_assign_perm_addr(local, wdev->address, type); memcpy(sdata->vif.addr, wdev->address, ETH_ALEN); ether_addr_copy(sdata->vif.bss_conf.addr, sdata->vif.addr); + + /* + * Add a management TXQ for NAN devices which includes frames + * that will only be transmitted during discovery windows (DWs) + */ + if (type == NL80211_IFTYPE_NAN) { + txqi = (struct txq_info *)((unsigned long)sdata + size); + ieee80211_txq_init(sdata, NULL, txqi, + IEEE80211_NUM_TIDS); + } } else { int size = ALIGN(sizeof(*sdata) + local->hw.vif_data_size, sizeof(void *)); @@ -2386,6 +2406,10 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata) if (sdata->vif.txq) ieee80211_txq_purge(sdata->local, to_txq_info(sdata->vif.txq)); + if (sdata->vif.txq_mgmt) + ieee80211_txq_purge(sdata->local, + to_txq_info(sdata->vif.txq_mgmt)); + synchronize_rcu(); cfg80211_unregister_wdev(&sdata->wdev); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index e0091a6196fc..f0f23e94db04 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1313,13 +1313,19 @@ static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local, unlikely(!ieee80211_is_data_present(hdr->frame_control))) { if ((!ieee80211_is_mgmt(hdr->frame_control) || ieee80211_is_bufferable_mmpdu(skb) || - vif->type == NL80211_IFTYPE_STATION) && + vif->type == NL80211_IFTYPE_STATION || + vif->type == NL80211_IFTYPE_NAN || + vif->type == NL80211_IFTYPE_NAN_DATA) && sta && sta->uploaded) { /* * This will be NULL if the driver didn't set the * opt-in hardware flag. */ txq = sta->sta.txq[IEEE80211_NUM_TIDS]; + } else if ((!ieee80211_is_mgmt(hdr->frame_control) || + ieee80211_is_bufferable_mmpdu(skb)) && + !sta) { + txq = vif->txq_mgmt; } } else if (sta) { u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; @@ -1512,9 +1518,15 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata, txqi->txq.vif = &sdata->vif; if (!sta) { - sdata->vif.txq = &txqi->txq; - txqi->txq.tid = 0; - txqi->txq.ac = IEEE80211_AC_BE; + txqi->txq.tid = tid; + + if (tid == IEEE80211_NUM_TIDS) { + sdata->vif.txq_mgmt = &txqi->txq; + txqi->txq.ac = IEEE80211_AC_VO; + } else { + sdata->vif.txq = &txqi->txq; + txqi->txq.ac = IEEE80211_AC_BE; + } return; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 8987a4504520..72e73f4f79c5 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -325,7 +325,7 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac) struct ieee80211_vif *vif = &sdata->vif; struct fq *fq = &local->fq; struct ps_data *ps = NULL; - struct txq_info *txqi; + struct txq_info *txqi = NULL; struct sta_info *sta; int i; @@ -344,37 +344,49 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac) for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { struct ieee80211_txq *txq = sta->sta.txq[i]; + struct txq_info *sta_txqi; if (!txq) continue; - txqi = to_txq_info(txq); + sta_txqi = to_txq_info(txq); if (ac != txq->ac) continue; if (!test_and_clear_bit(IEEE80211_TXQ_DIRTY, - &txqi->flags)) + &sta_txqi->flags)) continue; spin_unlock(&fq->lock); - drv_wake_tx_queue(local, txqi); + drv_wake_tx_queue(local, sta_txqi); spin_lock(&fq->lock); } } - if (!vif->txq) - goto out; + if (vif->txq) { + txqi = to_txq_info(vif->txq); - txqi = to_txq_info(vif->txq); + /* txq and txq_mgmt are mutually exclusive */ + WARN_ON_ONCE(vif->txq_mgmt); - if (!test_and_clear_bit(IEEE80211_TXQ_DIRTY, &txqi->flags) || - (ps && atomic_read(&ps->num_sta_ps)) || ac != vif->txq->ac) - goto out; + if (!test_and_clear_bit(IEEE80211_TXQ_DIRTY, &txqi->flags) || + (ps && atomic_read(&ps->num_sta_ps)) || + ac != vif->txq->ac) + txqi = NULL; + } else if (vif->txq_mgmt) { + txqi = to_txq_info(vif->txq_mgmt); + + if (!test_and_clear_bit(IEEE80211_TXQ_DIRTY, &txqi->flags) || + ac != vif->txq_mgmt->ac) + txqi = NULL; + } spin_unlock(&fq->lock); - drv_wake_tx_queue(local, txqi); + if (txqi) + drv_wake_tx_queue(local, txqi); + local_bh_enable(); return; out: From 8ea6b92faebe4bad0e271cb9a8d819b8955ed476 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Thu, 26 Mar 2026 12:14:32 +0200 Subject: [PATCH 1190/1561] wifi: ieee80211: add more NAN definitions These will be needed to implement NAN synchronization in mac80211_hwsim. Signed-off-by: Benjamin Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260326121156.ebb52db4c1eb.Ie8142cf92fc8c97c744a7c8b0a94ce3da6ff75ec@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211-nan.h | 37 +++++++++++++++++++++++++++++++++++ include/linux/ieee80211.h | 1 + 2 files changed, 38 insertions(+) diff --git a/include/linux/ieee80211-nan.h b/include/linux/ieee80211-nan.h index ebf28ea651f9..455033955e54 100644 --- a/include/linux/ieee80211-nan.h +++ b/include/linux/ieee80211-nan.h @@ -37,4 +37,41 @@ #define NAN_DEV_CAPA_NDPE_SUPPORTED 0x08 #define NAN_DEV_CAPA_S3_SUPPORTED 0x10 +/* NAN attributes, as defined in Wi-Fi Aware (TM) specification 4.0 Table 42 */ +#define NAN_ATTR_MASTER_INDICATION 0x00 +#define NAN_ATTR_CLUSTER_INFO 0x01 + +struct ieee80211_nan_attr { + u8 attr; + __le16 length; + u8 data[]; +} __packed; + +struct ieee80211_nan_master_indication { + u8 master_pref; + u8 random_factor; +} __packed; + +struct ieee80211_nan_anchor_master_info { + union { + __le64 master_rank; + struct { + u8 master_addr[ETH_ALEN]; + u8 random_factor; + u8 master_pref; + } __packed; + } __packed; + u8 hop_count; + __le32 ambtt; +} __packed; + +#define for_each_nan_attr(_attr, _data, _datalen) \ + for (_attr = (const struct ieee80211_nan_attr *)(_data); \ + (const u8 *)(_data) + (_datalen) - (const u8 *)_attr >= \ + (int)sizeof(*_attr) && \ + (const u8 *)(_data) + (_datalen) - (const u8 *)_attr >= \ + (int)sizeof(*_attr) + le16_to_cpu(_attr->length); \ + _attr = (const struct ieee80211_nan_attr *) \ + (_attr->data + le16_to_cpu(_attr->length))) + #endif /* LINUX_IEEE80211_NAN_H */ diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index b5d649db123f..ffa8f9f77efe 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2240,6 +2240,7 @@ struct ieee80211_multiple_bssid_configuration { #define WLAN_OUI_WFA 0x506f9a #define WLAN_OUI_TYPE_WFA_P2P 9 +#define WLAN_OUI_TYPE_WFA_NAN 0x13 #define WLAN_OUI_TYPE_WFA_DPP 0x1A #define WLAN_OUI_MICROSOFT 0x0050f2 #define WLAN_OUI_TYPE_MICROSOFT_WPA 1 From b16df0dacb3a77d4c1ce95f21c58a2b99ae10213 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Thu, 26 Mar 2026 12:14:33 +0200 Subject: [PATCH 1191/1561] wifi: mac80211: export ieee80211_calculate_rx_timestamp The function is quite useful when handling beacon timestamps. Export it so that it can be used by mac80211_hwsim and others. Signed-off-by: Benjamin Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260326121156.a1abc9c52f37.Ieabfe66768b1bf64c3076d62e73c50794faeacdc@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 18 ++++++++++++++++++ net/mac80211/ibss.c | 2 +- net/mac80211/ieee80211_i.h | 4 ---- net/mac80211/mesh_sync.c | 2 +- net/mac80211/rx.c | 2 +- net/mac80211/scan.c | 2 +- net/mac80211/util.c | 18 +++--------------- 7 files changed, 25 insertions(+), 23 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b1c412eabf2b..8b513f777187 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7393,6 +7393,24 @@ void ieee80211_disable_rssi_reports(struct ieee80211_vif *vif); */ int ieee80211_ave_rssi(struct ieee80211_vif *vif, int link_id); +/** + * ieee80211_calculate_rx_timestamp - calculate timestamp in frame + * @hw: pointer as obtained from ieee80211_alloc_hw() + * @status: RX status + * @mpdu_len: total MPDU length (including FCS) + * @mpdu_offset: offset into MPDU to calculate timestamp at + * + * This function calculates the RX timestamp at the given MPDU offset, taking + * into account what the RX timestamp was. An offset of 0 will just normalize + * the timestamp to TSF at beginning of MPDU reception. + * + * Returns: the calculated timestamp + */ +u64 ieee80211_calculate_rx_timestamp(struct ieee80211_hw *hw, + struct ieee80211_rx_status *status, + unsigned int mpdu_len, + unsigned int mpdu_offset); + /** * ieee80211_report_wowlan_wakeup - report WoWLAN wakeup * @vif: virtual interface diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 1e1ab25d9d8d..97292ff51475 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1127,7 +1127,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, if (ieee80211_have_rx_timestamp(rx_status)) { /* time when timestamp field was received */ rx_timestamp = - ieee80211_calculate_rx_timestamp(local, rx_status, + ieee80211_calculate_rx_timestamp(&local->hw, rx_status, len + FCS_LEN, 24); } else { /* diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index bacb49ad2817..53d783769642 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1922,10 +1922,6 @@ ieee80211_vif_get_num_mcast_if(struct ieee80211_sub_if_data *sdata) return -1; } -u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, - struct ieee80211_rx_status *status, - unsigned int mpdu_len, - unsigned int mpdu_offset); int ieee80211_hw_config(struct ieee80211_local *local, int radio_idx, u32 changed); int ieee80211_hw_conf_chan(struct ieee80211_local *local); diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index 3a66b4cefca7..24a68eef7db8 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -103,7 +103,7 @@ mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, u16 stype, * section. */ if (ieee80211_have_rx_timestamp(rx_status)) - t_r = ieee80211_calculate_rx_timestamp(local, rx_status, + t_r = ieee80211_calculate_rx_timestamp(&local->hw, rx_status, len + FCS_LEN, 24); else t_r = drv_get_tsf(local, sdata); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index d9a654ef082d..dbdd67c181d8 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -404,7 +404,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, while ((pos - (u8 *)rthdr) & 7) *pos++ = 0; put_unaligned_le64( - ieee80211_calculate_rx_timestamp(local, status, + ieee80211_calculate_rx_timestamp(&local->hw, status, mpdulen, 0), pos); rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_TSFT)); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 4823c8d45639..eeff230bd909 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -216,7 +216,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local, if (link_conf) { bss_meta.parent_tsf = - ieee80211_calculate_rx_timestamp(local, + ieee80211_calculate_rx_timestamp(&local->hw, rx_status, len + FCS_LEN, 24); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 72e73f4f79c5..38b0c42c4c13 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3423,20 +3423,7 @@ u8 ieee80211_mcs_to_chains(const struct ieee80211_mcs_info *mcs) return 1; } -/** - * ieee80211_calculate_rx_timestamp - calculate timestamp in frame - * @local: mac80211 hw info struct - * @status: RX status - * @mpdu_len: total MPDU length (including FCS) - * @mpdu_offset: offset into MPDU to calculate timestamp at - * - * This function calculates the RX timestamp at the given MPDU offset, taking - * into account what the RX timestamp was. An offset of 0 will just normalize - * the timestamp to TSF at beginning of MPDU reception. - * - * Returns: the calculated timestamp - */ -u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, +u64 ieee80211_calculate_rx_timestamp(struct ieee80211_hw *hw, struct ieee80211_rx_status *status, unsigned int mpdu_len, unsigned int mpdu_offset) @@ -3555,7 +3542,7 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, case RX_ENC_LEGACY: { struct ieee80211_supported_band *sband; - sband = local->hw.wiphy->bands[status->band]; + sband = hw->wiphy->bands[status->band]; ri.legacy = sband->bitrates[status->rate_idx].bitrate; if (mactime_plcp_start) { @@ -3587,6 +3574,7 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, return ts; } +EXPORT_SYMBOL_GPL(ieee80211_calculate_rx_timestamp); /* Cancel CAC for the interfaces under the specified @local. If @ctx is * also provided, only the interfaces using that ctx will be canceled. From 23eab70e301f8b88282be658ac9b34b5a5953479 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 26 Mar 2026 12:14:34 +0200 Subject: [PATCH 1192/1561] wifi: mac80211: run NAN DE code only when appropriate NAN DE (Discovery Engine) may be handled in the device or in user space. When handled in user space, all the NAN func management code should not run. Moreover, devices with user space DE should not provide the add/del_nan_func callbaks. For such devices, ieee80211_reconfig_nan will always fail. Make it clear what parts of ieee80211_if_nan are relevant to DE management, and touch those only when DE is offloaded. Add a check that makes sure that a driver doesn't register with add_del/nan_func callbacks if DE is in user space. Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260326121156.6665f64865cd.Iee24bef3bae2e1d502216192e760c1e699d271c9@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 52 ++++++++++++++++++++++++-------------- net/mac80211/ieee80211_i.h | 13 ++++++---- net/mac80211/iface.c | 25 +++++++++++------- net/mac80211/main.c | 4 ++- net/mac80211/util.c | 10 +++++--- 5 files changed, 67 insertions(+), 37 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index b6163dcc7e92..3a6c5fed2420 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -502,12 +502,15 @@ static int ieee80211_add_nan_func(struct wiphy *wiphy, if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; - spin_lock_bh(&sdata->u.nan.func_lock); + if (WARN_ON(wiphy->nan_capa.flags & WIPHY_NAN_FLAGS_USERSPACE_DE)) + return -EOPNOTSUPP; - ret = idr_alloc(&sdata->u.nan.function_inst_ids, + spin_lock_bh(&sdata->u.nan.de.func_lock); + + ret = idr_alloc(&sdata->u.nan.de.function_inst_ids, nan_func, 1, sdata->local->hw.max_nan_de_entries + 1, GFP_ATOMIC); - spin_unlock_bh(&sdata->u.nan.func_lock); + spin_unlock_bh(&sdata->u.nan.de.func_lock); if (ret < 0) return ret; @@ -518,10 +521,10 @@ static int ieee80211_add_nan_func(struct wiphy *wiphy, ret = drv_add_nan_func(sdata->local, sdata, nan_func); if (ret) { - spin_lock_bh(&sdata->u.nan.func_lock); - idr_remove(&sdata->u.nan.function_inst_ids, + spin_lock_bh(&sdata->u.nan.de.func_lock); + idr_remove(&sdata->u.nan.de.function_inst_ids, nan_func->instance_id); - spin_unlock_bh(&sdata->u.nan.func_lock); + spin_unlock_bh(&sdata->u.nan.de.func_lock); } return ret; @@ -534,9 +537,9 @@ ieee80211_find_nan_func_by_cookie(struct ieee80211_sub_if_data *sdata, struct cfg80211_nan_func *func; int id; - lockdep_assert_held(&sdata->u.nan.func_lock); + lockdep_assert_held(&sdata->u.nan.de.func_lock); - idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, id) { + idr_for_each_entry(&sdata->u.nan.de.function_inst_ids, func, id) { if (func->cookie == cookie) return func; } @@ -555,13 +558,16 @@ static void ieee80211_del_nan_func(struct wiphy *wiphy, !ieee80211_sdata_running(sdata)) return; - spin_lock_bh(&sdata->u.nan.func_lock); + if (WARN_ON(wiphy->nan_capa.flags & WIPHY_NAN_FLAGS_USERSPACE_DE)) + return; + + spin_lock_bh(&sdata->u.nan.de.func_lock); func = ieee80211_find_nan_func_by_cookie(sdata, cookie); if (func) instance_id = func->instance_id; - spin_unlock_bh(&sdata->u.nan.func_lock); + spin_unlock_bh(&sdata->u.nan.de.func_lock); if (instance_id) drv_del_nan_func(sdata->local, sdata, instance_id); @@ -4888,18 +4894,22 @@ void ieee80211_nan_func_terminated(struct ieee80211_vif *vif, if (WARN_ON(vif->type != NL80211_IFTYPE_NAN)) return; - spin_lock_bh(&sdata->u.nan.func_lock); + if (WARN_ON(sdata->local->hw.wiphy->nan_capa.flags & + WIPHY_NAN_FLAGS_USERSPACE_DE)) + return; - func = idr_find(&sdata->u.nan.function_inst_ids, inst_id); + spin_lock_bh(&sdata->u.nan.de.func_lock); + + func = idr_find(&sdata->u.nan.de.function_inst_ids, inst_id); if (WARN_ON(!func)) { - spin_unlock_bh(&sdata->u.nan.func_lock); + spin_unlock_bh(&sdata->u.nan.de.func_lock); return; } cookie = func->cookie; - idr_remove(&sdata->u.nan.function_inst_ids, inst_id); + idr_remove(&sdata->u.nan.de.function_inst_ids, inst_id); - spin_unlock_bh(&sdata->u.nan.func_lock); + spin_unlock_bh(&sdata->u.nan.de.func_lock); cfg80211_free_nan_func(func); @@ -4918,16 +4928,20 @@ void ieee80211_nan_func_match(struct ieee80211_vif *vif, if (WARN_ON(vif->type != NL80211_IFTYPE_NAN)) return; - spin_lock_bh(&sdata->u.nan.func_lock); + if (WARN_ON(sdata->local->hw.wiphy->nan_capa.flags & + WIPHY_NAN_FLAGS_USERSPACE_DE)) + return; - func = idr_find(&sdata->u.nan.function_inst_ids, match->inst_id); + spin_lock_bh(&sdata->u.nan.de.func_lock); + + func = idr_find(&sdata->u.nan.de.function_inst_ids, match->inst_id); if (WARN_ON(!func)) { - spin_unlock_bh(&sdata->u.nan.func_lock); + spin_unlock_bh(&sdata->u.nan.de.func_lock); return; } match->cookie = func->cookie; - spin_unlock_bh(&sdata->u.nan.func_lock); + spin_unlock_bh(&sdata->u.nan.de.func_lock); cfg80211_nan_match(ieee80211_vif_to_wdev(vif), match, gfp); } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 53d783769642..8d5f9a725fdf 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -987,16 +987,19 @@ struct ieee80211_if_mntr { * * @conf: current NAN configuration * @started: true iff NAN is started - * @func_lock: lock for @func_inst_ids - * @function_inst_ids: a bitmap of available instance_id's + * @de: Discovery Engine state (only valid if !WIPHY_NAN_FLAGS_USERSPACE_DE) + * @de.func_lock: lock for @de.function_inst_ids + * @de.function_inst_ids: a bitmap of available instance_id's */ struct ieee80211_if_nan { struct cfg80211_nan_conf conf; bool started; - /* protects function_inst_ids */ - spinlock_t func_lock; - struct idr function_inst_ids; + struct { + /* protects function_inst_ids */ + spinlock_t func_lock; + struct idr function_inst_ids; + } de; }; struct ieee80211_link_data_managed { diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 7518dcbcdf1c..f0a5a675c5a5 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -622,15 +622,19 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do break; case NL80211_IFTYPE_NAN: /* clean all the functions */ - spin_lock_bh(&sdata->u.nan.func_lock); + if (!(local->hw.wiphy->nan_capa.flags & + WIPHY_NAN_FLAGS_USERSPACE_DE)) { + spin_lock_bh(&sdata->u.nan.de.func_lock); - idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, i) { - idr_remove(&sdata->u.nan.function_inst_ids, i); - cfg80211_free_nan_func(func); + idr_for_each_entry(&sdata->u.nan.de.function_inst_ids, + func, i) { + idr_remove(&sdata->u.nan.de.function_inst_ids, i); + cfg80211_free_nan_func(func); + } + idr_destroy(&sdata->u.nan.de.function_inst_ids); + + spin_unlock_bh(&sdata->u.nan.de.func_lock); } - idr_destroy(&sdata->u.nan.function_inst_ids); - - spin_unlock_bh(&sdata->u.nan.func_lock); break; default: wiphy_work_cancel(sdata->local->hw.wiphy, &sdata->work); @@ -1942,8 +1946,11 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, MONITOR_FLAG_OTHER_BSS; break; case NL80211_IFTYPE_NAN: - idr_init(&sdata->u.nan.function_inst_ids); - spin_lock_init(&sdata->u.nan.func_lock); + if (!(sdata->local->hw.wiphy->nan_capa.flags & + WIPHY_NAN_FLAGS_USERSPACE_DE)) { + idr_init(&sdata->u.nan.de.function_inst_ids); + spin_lock_init(&sdata->u.nan.de.func_lock); + } sdata->vif.bss_conf.bssid = sdata->vif.addr; break; case NL80211_IFTYPE_AP_VLAN: diff --git a/net/mac80211/main.c b/net/mac80211/main.c index d1bb6353908d..f47dd58770ad 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1157,7 +1157,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (WARN_ON(local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_NAN) && - (!local->ops->start_nan || !local->ops->stop_nan))) + ((!local->ops->start_nan || !local->ops->stop_nan) || + (local->hw.wiphy->nan_capa.flags & WIPHY_NAN_FLAGS_USERSPACE_DE && + (local->ops->add_nan_func || local->ops->del_nan_func))))) return -EINVAL; if (hw->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO) { diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 38b0c42c4c13..36795529ff82 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1754,6 +1754,10 @@ static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata) if (WARN_ON(res)) return res; + if (sdata->local->hw.wiphy->nan_capa.flags & + WIPHY_NAN_FLAGS_USERSPACE_DE) + return 0; + funcs = kzalloc_objs(*funcs, sdata->local->hw.max_nan_de_entries + 1); if (!funcs) return -ENOMEM; @@ -1762,12 +1766,12 @@ static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata) * This is a little bit ugly. We need to call a potentially sleeping * callback for each NAN function, so we can't hold the spinlock. */ - spin_lock_bh(&sdata->u.nan.func_lock); + spin_lock_bh(&sdata->u.nan.de.func_lock); - idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, id) + idr_for_each_entry(&sdata->u.nan.de.function_inst_ids, func, id) funcs[i++] = func; - spin_unlock_bh(&sdata->u.nan.func_lock); + spin_unlock_bh(&sdata->u.nan.de.func_lock); for (i = 0; funcs[i]; i++) { res = drv_add_nan_func(sdata->local, sdata, funcs[i]); From 589c06e8fdeec592e018004cebe283370160e581 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 26 Mar 2026 12:14:35 +0200 Subject: [PATCH 1193/1561] wifi: mac80211: add NAN local schedule support A NAN local schedule consist of a list of NAN channels, and an array that maps time slots to the channel it is scheduled to (or NULL to indicate unscheduled). A NAN channel is the configuration of a channel which is used for NAN operations. It is a new type of chanctx user (before, the only user is a link). A NAN channel may not have a chanctx assigned if it is ULWed out. A NAN channel may or may not be scheduled (for example, user space may want to prepare the resources before the actual schedule is configured). Add management of the NAN local schedule. Since we introduce a new chanctx user, also adjust the different for_each_chanctx_user_* macros to visit also the NAN channels and take those into account. Co-developed-by: Avraham Stern Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260326121156.03350fd40630.Id158f815cfc9b5ab1ebdb8ee608bda426e4d7474@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 66 +++++++ net/mac80211/Makefile | 2 +- net/mac80211/cfg.c | 13 ++ net/mac80211/chan.c | 132 ++++++++++--- net/mac80211/ieee80211_i.h | 20 +- net/mac80211/iface.c | 8 + net/mac80211/nan.c | 378 +++++++++++++++++++++++++++++++++++++ net/mac80211/util.c | 28 ++- 8 files changed, 609 insertions(+), 38 deletions(-) create mode 100644 net/mac80211/nan.c diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8b513f777187..cc23f99f8318 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -365,6 +365,7 @@ struct ieee80211_vif_chanctx_switch { * @BSS_CHANGED_MLD_VALID_LINKS: MLD valid links status changed. * @BSS_CHANGED_MLD_TTLM: negotiated TID to link mapping was changed * @BSS_CHANGED_TPE: transmit power envelope changed + * @BSS_CHANGED_NAN_LOCAL_SCHED: NAN local schedule changed (NAN mode only) */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -402,6 +403,7 @@ enum ieee80211_bss_change { BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33), BSS_CHANGED_MLD_TTLM = BIT_ULL(34), BSS_CHANGED_TPE = BIT_ULL(35), + BSS_CHANGED_NAN_LOCAL_SCHED = BIT_ULL(36), /* when adding here, make sure to change ieee80211_reconfig */ }; @@ -866,6 +868,28 @@ struct ieee80211_bss_conf { u8 s1g_long_beacon_period; }; +#define IEEE80211_NAN_MAX_CHANNELS 3 + +/** + * struct ieee80211_nan_channel - NAN channel information + * + * @chanreq: channel request for this NAN channel. Even though this chanreq::ap + * is irrelevant for NAN, still store it for convenience - some functions + * require it as an argument. + * @needed_rx_chains: number of RX chains needed for this NAN channel + * @chanctx_conf: chanctx_conf assigned to this NAN channel. Will be %NULL + * if the channel is ULWed. + * @channel_entry: the Channel Entry blob as defined in Wi-Fi Aware + * (TM) 4.0 specification Table 100 (Channel Entry format for the NAN + * Availability attribute). + */ +struct ieee80211_nan_channel { + struct ieee80211_chan_req chanreq; + u8 needed_rx_chains; + struct ieee80211_chanctx_conf *chanctx_conf; + u8 channel_entry[6]; +}; + /** * enum mac80211_tx_info_flags - flags to describe transmission information/status * @@ -1917,6 +1941,8 @@ enum ieee80211_offload_flags { IEEE80211_OFFLOAD_DECAP_ENABLED = BIT(2), }; +#define IEEE80211_NAN_AVAIL_BLOB_MAX_LEN 54 + /** * struct ieee80211_eml_params - EHT Operating mode notification parameters * @@ -1942,6 +1968,32 @@ struct ieee80211_eml_params { u8 emlmr_mcs_map_bw[9]; }; +/** + * struct ieee80211_nan_sched_cfg - NAN schedule configuration + * @channels: array of NAN channels. A channel entry is in use if + * channels[i].chanreq.oper.chan is not NULL. + * @schedule: NAN local schedule - mapping of each 16TU time slot to + * the NAN channel on which the radio will operate. NULL if unscheduled. + * @avail_blob: NAN Availability attribute blob. + * @avail_blob_len: length of the @avail_blob in bytes. + * @deferred: indicates that the driver should notify peers before applying the + * new NAN schedule, and apply the new schedule the second NAN Slot + * boundary after it notified the peers, as defined in Wi-Fi Aware (TM) 4.0 + * specification, section 5.2.2. + * The driver must call ieee80211_nan_sched_update_done() after the + * schedule has been applied. + * If a HW restart happened while a deferred schedule update was pending, + * mac80211 will reconfigure the deferred schedule (and wait for the driver + * to notify that the schedule has been applied). + */ +struct ieee80211_nan_sched_cfg { + struct ieee80211_nan_channel channels[IEEE80211_NAN_MAX_CHANNELS]; + struct ieee80211_nan_channel *schedule[CFG80211_NAN_SCHED_NUM_TIME_SLOTS]; + u8 avail_blob[IEEE80211_NAN_AVAIL_BLOB_MAX_LEN]; + u16 avail_blob_len; + bool deferred; +}; + /** * struct ieee80211_vif_cfg - interface configuration * @assoc: association status @@ -1970,6 +2022,7 @@ struct ieee80211_eml_params { * your driver/device needs to do. * @ap_addr: AP MLD address, or BSSID for non-MLO connections * (station mode only) + * @nan_sched: NAN schedule parameters. &struct ieee80211_nan_sched_cfg */ struct ieee80211_vif_cfg { /* association related data */ @@ -1988,6 +2041,8 @@ struct ieee80211_vif_cfg { bool s1g; bool idle; u8 ap_addr[ETH_ALEN] __aligned(2); + /* Protected by the wiphy mutex */ + struct ieee80211_nan_sched_cfg nan_sched; }; #define IEEE80211_TTLM_NUM_TIDS 8 @@ -7756,6 +7811,17 @@ void ieee80211_nan_func_match(struct ieee80211_vif *vif, struct cfg80211_nan_match_params *match, gfp_t gfp); +/** + * ieee80211_nan_sched_update_done - notify that NAN schedule update is done + * + * This function is called by the driver to notify mac80211 that the NAN + * schedule update has been applied. + * Must be called with wiphy mutex held. May sleep. + * + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + */ +void ieee80211_nan_sched_update_done(struct ieee80211_vif *vif); + /** * ieee80211_calc_rx_airtime - calculate estimated transmission airtime for RX. * diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index b0e392eb7753..abf46c951299 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -36,7 +36,7 @@ mac80211-y := \ tdls.o \ ocb.o \ airtime.o \ - eht.o uhr.o + eht.o uhr.o nan.o mac80211-$(CONFIG_MAC80211_LEDS) += led.o mac80211-$(CONFIG_MAC80211_DEBUGFS) += \ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 3a6c5fed2420..5bc867a56811 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -5594,6 +5594,18 @@ ieee80211_set_epcs(struct wiphy *wiphy, struct net_device *dev, bool enable) return ieee80211_mgd_set_epcs(sdata, enable); } +static int +ieee80211_set_local_nan_sched(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_local_sched *sched) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + + lockdep_assert_wiphy(wiphy); + + return ieee80211_nan_set_local_sched(sdata, sched); +} + const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -5710,4 +5722,5 @@ const struct cfg80211_ops mac80211_config_ops = { .get_radio_mask = ieee80211_get_radio_mask, .assoc_ml_reconf = ieee80211_assoc_ml_reconf, .set_epcs = ieee80211_set_epcs, + .nan_set_local_sched = ieee80211_set_local_nan_sched, }; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index dd99fdc1ea9d..50a06de928ba 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -16,6 +16,8 @@ struct ieee80211_chanctx_user_iter { struct ieee80211_chan_req *chanreq; struct ieee80211_sub_if_data *sdata; struct ieee80211_link_data *link; + struct ieee80211_nan_channel *nan_channel; + int nan_channel_next_idx; enum nl80211_iftype iftype; bool reserved, radar_required, done; enum { @@ -31,20 +33,38 @@ enum ieee80211_chanctx_iter_type { CHANCTX_ITER_ASSIGNED, }; -static void ieee80211_chanctx_user_iter_next(struct ieee80211_local *local, - struct ieee80211_chanctx *ctx, - struct ieee80211_chanctx_user_iter *iter, - enum ieee80211_chanctx_iter_type type, - bool start) +static bool +ieee80211_chanctx_user_iter_next_nan_channel(struct ieee80211_chanctx *ctx, + struct ieee80211_chanctx_user_iter *iter) { - lockdep_assert_wiphy(local->hw.wiphy); + /* Start from the next index after current position */ + for (int i = iter->nan_channel_next_idx; + i < ARRAY_SIZE(iter->sdata->vif.cfg.nan_sched.channels); i++) { + struct ieee80211_nan_channel *nan_channel = + &iter->sdata->vif.cfg.nan_sched.channels[i]; - if (start) { - memset(iter, 0, sizeof(*iter)); - goto next_interface; + if (!nan_channel->chanreq.oper.chan) + continue; + + if (nan_channel->chanctx_conf != &ctx->conf) + continue; + + iter->nan_channel = nan_channel; + iter->nan_channel_next_idx = i + 1; + iter->chanreq = &nan_channel->chanreq; + iter->link = NULL; + iter->reserved = false; + iter->radar_required = false; + return true; } + return false; +} -next_link: +static bool +ieee80211_chanctx_user_iter_next_link(struct ieee80211_chanctx *ctx, + struct ieee80211_chanctx_user_iter *iter, + enum ieee80211_chanctx_iter_type type) +{ for (int link_id = iter->link ? iter->link->link_id : 0; link_id < ARRAY_SIZE(iter->sdata->link); link_id++) { @@ -64,7 +84,7 @@ next_link: iter->reserved = false; iter->radar_required = link->radar_required; iter->chanreq = &link->conf->chanreq; - return; + return true; } fallthrough; case CHANCTX_ITER_POS_RESERVED: @@ -77,7 +97,7 @@ next_link: link->reserved_radar_required; iter->chanreq = &link->reserved; - return; + return true; } fallthrough; case CHANCTX_ITER_POS_DONE: @@ -85,6 +105,33 @@ next_link: continue; } } + return false; +} + +static void +ieee80211_chanctx_user_iter_next(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, + struct ieee80211_chanctx_user_iter *iter, + enum ieee80211_chanctx_iter_type type, + bool start) +{ + bool found; + + lockdep_assert_wiphy(local->hw.wiphy); + + if (start) { + memset(iter, 0, sizeof(*iter)); + goto next_interface; + } + +next_user: + if (iter->iftype == NL80211_IFTYPE_NAN) + found = ieee80211_chanctx_user_iter_next_nan_channel(ctx, iter); + else + found = ieee80211_chanctx_user_iter_next_link(ctx, iter, type); + + if (found) + return; next_interface: /* next (or first) interface */ @@ -97,10 +144,18 @@ next_interface: if (iter->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) continue; + /* NAN channels don't reserve channel context */ + if (iter->sdata->vif.type == NL80211_IFTYPE_NAN && + type == CHANCTX_ITER_RESERVED) + continue; + + iter->nan_channel = NULL; iter->link = NULL; - iter->per_link = CHANCTX_ITER_POS_ASSIGNED; iter->iftype = iter->sdata->vif.type; - goto next_link; + iter->chanreq = NULL; + iter->per_link = CHANCTX_ITER_POS_ASSIGNED; + iter->nan_channel_next_idx = 0; + goto next_user; } iter->done = true; @@ -133,8 +188,8 @@ next_interface: CHANCTX_ITER_ALL, \ false)) -static int ieee80211_chanctx_num_assigned(struct ieee80211_local *local, - struct ieee80211_chanctx *ctx) +int ieee80211_chanctx_num_assigned(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) { struct ieee80211_chanctx_user_iter iter; int num = 0; @@ -321,7 +376,7 @@ ieee80211_chanctx_non_reserved_chandef(struct ieee80211_local *local, lockdep_assert_wiphy(local->hw.wiphy); for_each_chanctx_user_assigned(local, ctx, &iter) { - if (iter.link->reserved_chanctx) + if (iter.link && iter.link->reserved_chanctx) continue; comp_def = ieee80211_chanreq_compatible(iter.chanreq, @@ -480,7 +535,6 @@ ieee80211_get_width_of_link(struct ieee80211_link_data *link) case NL80211_IFTYPE_AP_VLAN: return ieee80211_get_max_required_bw(link); case NL80211_IFTYPE_P2P_DEVICE: - case NL80211_IFTYPE_NAN: break; case NL80211_IFTYPE_MONITOR: WARN_ON_ONCE(!ieee80211_hw_check(&local->hw, @@ -495,6 +549,7 @@ ieee80211_get_width_of_link(struct ieee80211_link_data *link) case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_NAN_DATA: WARN_ON_ONCE(1); break; @@ -504,6 +559,18 @@ ieee80211_get_width_of_link(struct ieee80211_link_data *link) return NL80211_CHAN_WIDTH_20_NOHT; } +static enum nl80211_chan_width +ieee80211_get_width_of_chanctx_user(struct ieee80211_chanctx_user_iter *iter) +{ + if (iter->link) + return ieee80211_get_width_of_link(iter->link); + + if (WARN_ON_ONCE(!iter->nan_channel || iter->reserved)) + return NL80211_CHAN_WIDTH_20_NOHT; + + return iter->nan_channel->chanreq.oper.width; +} + static enum nl80211_chan_width ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, @@ -521,7 +588,7 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, /* When this is true we only care about the reserving links */ if (check_reserved) { for_each_chanctx_user_reserved(local, ctx, &iter) { - width = ieee80211_get_width_of_link(iter.link); + width = ieee80211_get_width_of_chanctx_user(&iter); max_bw = max(max_bw, width); } goto check_monitor; @@ -529,7 +596,7 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, /* Consider all assigned links */ for_each_chanctx_user_assigned(local, ctx, &iter) { - width = ieee80211_get_width_of_link(iter.link); + width = ieee80211_get_width_of_chanctx_user(&iter); max_bw = max(max_bw, width); } @@ -943,7 +1010,10 @@ ieee80211_new_chanctx(struct ieee80211_local *local, kfree(ctx); return ERR_PTR(err); } - /* We ignored a driver error, see _ieee80211_set_active_links */ + /* + * We ignored a driver error, see _ieee80211_set_active_links and/or + * ieee80211_nan_set_local_sched + */ WARN_ON_ONCE(err && !local->in_reconfig); list_add_rcu(&ctx->list, &local->chanctx_list); @@ -964,9 +1034,9 @@ static void ieee80211_del_chanctx(struct ieee80211_local *local, ieee80211_remove_wbrf(local, &ctx->conf.def); } -static void ieee80211_free_chanctx(struct ieee80211_local *local, - struct ieee80211_chanctx *ctx, - bool skip_idle_recalc) +void ieee80211_free_chanctx(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, + bool skip_idle_recalc) { lockdep_assert_wiphy(local->hw.wiphy); @@ -1161,6 +1231,7 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_OCB: + case NL80211_IFTYPE_NAN: break; default: continue; @@ -1171,6 +1242,15 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, break; } + if (iter.nan_channel) { + rx_chains_dynamic = rx_chains_static = + iter.nan_channel->needed_rx_chains; + break; + } + + if (!iter.link) + continue; + switch (iter.link->smps_mode) { default: WARN_ONCE(1, "Invalid SMPS mode %d\n", @@ -1779,7 +1859,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) for_each_chanctx_user_assigned(local, ctx->replace_ctx, &iter) { n_assigned++; - if (iter.link->reserved_chanctx) { + if (iter.link && iter.link->reserved_chanctx) { n_reserved++; if (iter.link->reserved_ready) n_ready++; @@ -2035,7 +2115,7 @@ void __ieee80211_link_release_channel(struct ieee80211_link_data *link, ieee80211_vif_use_reserved_switch(local); } -static struct ieee80211_chanctx * +struct ieee80211_chanctx * ieee80211_find_or_create_chanctx(struct ieee80211_sub_if_data *sdata, const struct ieee80211_chan_req *chanreq, enum ieee80211_chanctx_mode mode, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8d5f9a725fdf..92ea8de8a6db 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -990,6 +990,8 @@ struct ieee80211_if_mntr { * @de: Discovery Engine state (only valid if !WIPHY_NAN_FLAGS_USERSPACE_DE) * @de.func_lock: lock for @de.function_inst_ids * @de.function_inst_ids: a bitmap of available instance_id's + * @removed_channels: bitmap of channels that should be removed from the NAN + * schedule once the deferred schedule update is completed. */ struct ieee80211_if_nan { struct cfg80211_nan_conf conf; @@ -1000,6 +1002,8 @@ struct ieee80211_if_nan { spinlock_t func_lock; struct idr function_inst_ids; } de; + + DECLARE_BITMAP(removed_channels, IEEE80211_NAN_MAX_CHANNELS); }; struct ieee80211_link_data_managed { @@ -2024,6 +2028,10 @@ int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata, int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata, u64 *changed); +/* NAN code */ +int ieee80211_nan_set_local_sched(struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_local_sched *sched); + /* scan/BSS handling */ void ieee80211_scan_work(struct wiphy *wiphy, struct wiphy_work *work); int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, @@ -2812,7 +2820,17 @@ int ieee80211_max_num_channels(struct ieee80211_local *local, int radio_idx); u32 ieee80211_get_radio_mask(struct wiphy *wiphy, struct net_device *dev); void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, struct ieee80211_chanctx *ctx); - +struct ieee80211_chanctx * +ieee80211_find_or_create_chanctx(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_chan_req *chanreq, + enum ieee80211_chanctx_mode mode, + bool assign_on_failure, + bool *reused_ctx); +void ieee80211_free_chanctx(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, + bool skip_idle_recalc); +int ieee80211_chanctx_num_assigned(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx); /* TDLS */ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, int link_id, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index f0a5a675c5a5..0f3e49cdbb39 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -886,6 +886,14 @@ static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata) ieee80211_vif_clear_links(sdata); ieee80211_link_stop(&sdata->deflink); + + if (sdata->vif.type == NL80211_IFTYPE_NAN) { + struct ieee80211_nan_sched_cfg *nan_sched = + &sdata->vif.cfg.nan_sched; + + for (int i = 0; i < ARRAY_SIZE(nan_sched->channels); i++) + WARN_ON(nan_sched->channels[i].chanreq.oper.chan); + } } static void ieee80211_uninit(struct net_device *dev) diff --git a/net/mac80211/nan.c b/net/mac80211/nan.c new file mode 100644 index 000000000000..2fa55e9a9dab --- /dev/null +++ b/net/mac80211/nan.c @@ -0,0 +1,378 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * NAN mode implementation + * Copyright(c) 2025 Intel Corporation + */ +#include + +#include "ieee80211_i.h" +#include "driver-ops.h" + +static void +ieee80211_nan_init_channel(struct ieee80211_nan_channel *nan_channel, + struct cfg80211_nan_channel *cfg_nan_channel) +{ + memset(nan_channel, 0, sizeof(*nan_channel)); + + nan_channel->chanreq.oper = cfg_nan_channel->chandef; + memcpy(nan_channel->channel_entry, cfg_nan_channel->channel_entry, + sizeof(nan_channel->channel_entry)); + nan_channel->needed_rx_chains = cfg_nan_channel->rx_nss; +} + +static void +ieee80211_nan_update_channel(struct ieee80211_local *local, + struct ieee80211_nan_channel *nan_channel, + struct cfg80211_nan_channel *cfg_nan_channel, + bool deferred) +{ + struct ieee80211_chanctx_conf *conf; + bool reducing_nss; + + if (WARN_ON(!cfg80211_chandef_identical(&nan_channel->chanreq.oper, + &cfg_nan_channel->chandef))) + return; + + if (WARN_ON(memcmp(nan_channel->channel_entry, + cfg_nan_channel->channel_entry, + sizeof(nan_channel->channel_entry)))) + return; + + if (nan_channel->needed_rx_chains == cfg_nan_channel->rx_nss) + return; + + reducing_nss = nan_channel->needed_rx_chains > cfg_nan_channel->rx_nss; + nan_channel->needed_rx_chains = cfg_nan_channel->rx_nss; + + conf = nan_channel->chanctx_conf; + + /* + * If we are adding NSSs, we need to be ready before notifying the peer, + * if we are reducing NSSs, we need to wait until the peer is notified. + */ + if (!conf || (deferred && reducing_nss)) + return; + + ieee80211_recalc_smps_chanctx(local, container_of(conf, + struct ieee80211_chanctx, + conf)); +} + +static int +ieee80211_nan_use_chanctx(struct ieee80211_sub_if_data *sdata, + struct ieee80211_nan_channel *nan_channel, + bool assign_on_failure) +{ + struct ieee80211_chanctx *ctx; + bool reused_ctx; + + if (!nan_channel->chanreq.oper.chan) + return -EINVAL; + + if (ieee80211_check_combinations(sdata, &nan_channel->chanreq.oper, + IEEE80211_CHANCTX_SHARED, 0, -1)) + return -EBUSY; + + ctx = ieee80211_find_or_create_chanctx(sdata, &nan_channel->chanreq, + IEEE80211_CHANCTX_SHARED, + assign_on_failure, + &reused_ctx); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + nan_channel->chanctx_conf = &ctx->conf; + + /* + * In case an existing channel context is being used, we marked it as + * will_be_used, now that it is assigned - clear this indication + */ + if (reused_ctx) { + WARN_ON(!ctx->will_be_used); + ctx->will_be_used = false; + } + ieee80211_recalc_chanctx_min_def(sdata->local, ctx); + ieee80211_recalc_smps_chanctx(sdata->local, ctx); + + return 0; +} + +static void +ieee80211_nan_remove_channel(struct ieee80211_sub_if_data *sdata, + struct ieee80211_nan_channel *nan_channel) +{ + struct ieee80211_chanctx_conf *conf; + struct ieee80211_chanctx *ctx; + struct ieee80211_nan_sched_cfg *sched_cfg = &sdata->vif.cfg.nan_sched; + + if (WARN_ON(!nan_channel)) + return; + + lockdep_assert_wiphy(sdata->local->hw.wiphy); + + if (!nan_channel->chanreq.oper.chan) + return; + + for (int slot = 0; slot < ARRAY_SIZE(sched_cfg->schedule); slot++) + if (sched_cfg->schedule[slot] == nan_channel) + sched_cfg->schedule[slot] = NULL; + + conf = nan_channel->chanctx_conf; + + memset(nan_channel, 0, sizeof(*nan_channel)); + + /* Update the driver before (possibly) releasing the channel context */ + drv_vif_cfg_changed(sdata->local, sdata, BSS_CHANGED_NAN_LOCAL_SCHED); + + /* Channel might not have a chanctx if it was ULWed */ + if (!conf) + return; + + ctx = container_of(conf, struct ieee80211_chanctx, conf); + + if (ieee80211_chanctx_num_assigned(sdata->local, ctx) > 0) { + ieee80211_recalc_chanctx_chantype(sdata->local, ctx); + ieee80211_recalc_smps_chanctx(sdata->local, ctx); + ieee80211_recalc_chanctx_min_def(sdata->local, ctx); + } + + if (ieee80211_chanctx_refcount(sdata->local, ctx) == 0) + ieee80211_free_chanctx(sdata->local, ctx, false); +} + +static struct ieee80211_nan_channel * +ieee80211_nan_find_free_channel(struct ieee80211_nan_sched_cfg *sched_cfg) +{ + for (int i = 0; i < ARRAY_SIZE(sched_cfg->channels); i++) { + if (!sched_cfg->channels[i].chanreq.oper.chan) + return &sched_cfg->channels[i]; + } + + return NULL; +} + +int ieee80211_nan_set_local_sched(struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_local_sched *sched) +{ + struct ieee80211_nan_channel *sched_idx_to_chan[IEEE80211_NAN_MAX_CHANNELS] = {}; + struct ieee80211_nan_sched_cfg *sched_cfg = &sdata->vif.cfg.nan_sched; + struct ieee80211_nan_sched_cfg backup_sched; + int ret; + + if (sched->n_channels > IEEE80211_NAN_MAX_CHANNELS) + return -EOPNOTSUPP; + + if (sched->nan_avail_blob_len > IEEE80211_NAN_AVAIL_BLOB_MAX_LEN) + return -EINVAL; + + /* + * If a deferred schedule update is pending completion, new updates are + * not allowed. Only allow to configure an empty schedule so NAN can be + * stopped in the middle of a deferred update. This is fine because + * empty schedule means the local NAN device will not be available for + * peers anymore so there is no need to update peers about a new + * schedule. + */ + if (WARN_ON(sched_cfg->deferred && sched->n_channels)) + return -EBUSY; + + bitmap_zero(sdata->u.nan.removed_channels, IEEE80211_NAN_MAX_CHANNELS); + + memcpy(backup_sched.schedule, sched_cfg->schedule, + sizeof(backup_sched.schedule)); + memcpy(backup_sched.channels, sched_cfg->channels, + sizeof(backup_sched.channels)); + memcpy(backup_sched.avail_blob, sched_cfg->avail_blob, + sizeof(backup_sched.avail_blob)); + backup_sched.avail_blob_len = sched_cfg->avail_blob_len; + + memcpy(sched_cfg->avail_blob, sched->nan_avail_blob, + sched->nan_avail_blob_len); + sched_cfg->avail_blob_len = sched->nan_avail_blob_len; + + /* + * Remove channels that are no longer in the new schedule to free up + * resources before adding new channels. For deferred schedule, channels + * will be removed when the schedule is applied. + * Create a mapping from sched index to sched_cfg channel + */ + for (int i = 0; i < ARRAY_SIZE(sched_cfg->channels); i++) { + bool still_needed = false; + + if (!sched_cfg->channels[i].chanreq.oper.chan) + continue; + + for (int j = 0; j < sched->n_channels; j++) { + if (cfg80211_chandef_identical(&sched_cfg->channels[i].chanreq.oper, + &sched->nan_channels[j].chandef)) { + sched_idx_to_chan[j] = + &sched_cfg->channels[i]; + still_needed = true; + break; + } + } + + if (!still_needed) { + __set_bit(i, sdata->u.nan.removed_channels); + if (!sched->deferred) + ieee80211_nan_remove_channel(sdata, + &sched_cfg->channels[i]); + } + } + + for (int i = 0; i < sched->n_channels; i++) { + struct ieee80211_nan_channel *chan = sched_idx_to_chan[i]; + + if (chan) { + ieee80211_nan_update_channel(sdata->local, chan, + &sched->nan_channels[i], + sched->deferred); + } else { + chan = ieee80211_nan_find_free_channel(sched_cfg); + if (WARN_ON(!chan)) { + ret = -EINVAL; + goto err; + } + + sched_idx_to_chan[i] = chan; + ieee80211_nan_init_channel(chan, + &sched->nan_channels[i]); + + ret = ieee80211_nan_use_chanctx(sdata, chan, false); + if (ret) { + memset(chan, 0, sizeof(*chan)); + goto err; + } + } + } + + for (int s = 0; s < ARRAY_SIZE(sched_cfg->schedule); s++) { + if (sched->schedule[s] < ARRAY_SIZE(sched_idx_to_chan)) + sched_cfg->schedule[s] = + sched_idx_to_chan[sched->schedule[s]]; + else + sched_cfg->schedule[s] = NULL; + } + + sched_cfg->deferred = sched->deferred; + + drv_vif_cfg_changed(sdata->local, sdata, BSS_CHANGED_NAN_LOCAL_SCHED); + + /* + * For deferred update, don't update NDI carriers yet as the new + * schedule is not yet applied so common slots don't change. The NDI + * carrier will be updated once the driver notifies the new schedule is + * applied. + */ + if (sched_cfg->deferred) + return 0; + + bitmap_zero(sdata->u.nan.removed_channels, IEEE80211_NAN_MAX_CHANNELS); + + return 0; +err: + /* Remove newly added channels */ + for (int i = 0; i < ARRAY_SIZE(sched_cfg->channels); i++) { + struct cfg80211_chan_def *chan_def = + &sched_cfg->channels[i].chanreq.oper; + + if (!chan_def->chan) + continue; + + if (!cfg80211_chandef_identical(&backup_sched.channels[i].chanreq.oper, + chan_def)) + ieee80211_nan_remove_channel(sdata, + &sched_cfg->channels[i]); + } + + /* Re-add all backed up channels */ + for (int i = 0; i < ARRAY_SIZE(backup_sched.channels); i++) { + struct ieee80211_nan_channel *chan = &sched_cfg->channels[i]; + + *chan = backup_sched.channels[i]; + + /* + * For deferred update, no channels were removed and the channel + * context didn't change, so nothing else to do. + */ + if (!chan->chanctx_conf || sched->deferred) + continue; + + if (test_bit(i, sdata->u.nan.removed_channels)) { + /* Clear the stale chanctx pointer */ + chan->chanctx_conf = NULL; + /* + * We removed the newly added channels so we don't lack + * resources. So the only reason that this would fail + * is a FW error which we ignore. Therefore, this + * should never fail. + */ + WARN_ON(ieee80211_nan_use_chanctx(sdata, chan, true)); + } else { + struct ieee80211_chanctx_conf *conf = chan->chanctx_conf; + + /* FIXME: detect no-op? */ + /* Channel was not removed but may have been updated */ + ieee80211_recalc_smps_chanctx(sdata->local, + container_of(conf, + struct ieee80211_chanctx, + conf)); + } + } + + memcpy(sched_cfg->schedule, backup_sched.schedule, + sizeof(backup_sched.schedule)); + memcpy(sched_cfg->avail_blob, backup_sched.avail_blob, + sizeof(backup_sched.avail_blob)); + sched_cfg->avail_blob_len = backup_sched.avail_blob_len; + sched_cfg->deferred = false; + bitmap_zero(sdata->u.nan.removed_channels, IEEE80211_NAN_MAX_CHANNELS); + + drv_vif_cfg_changed(sdata->local, sdata, BSS_CHANGED_NAN_LOCAL_SCHED); + return ret; +} + +void ieee80211_nan_sched_update_done(struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_nan_sched_cfg *sched_cfg = &vif->cfg.nan_sched; + unsigned int i; + + lockdep_assert_wiphy(sdata->local->hw.wiphy); + + if (WARN_ON(!sched_cfg->deferred)) + return; + + /* + * Clear the deferred flag before removing channels. Removing channels + * will trigger another schedule update to the driver, and there is no + * need for this update to be deferred since removed channels are not + * part of the schedule anymore, so no need to notify peers about + * removing them. + */ + sched_cfg->deferred = false; + + for (i = 0; i < ARRAY_SIZE(sched_cfg->channels); i++) { + struct ieee80211_nan_channel *chan = &sched_cfg->channels[i]; + struct ieee80211_chanctx_conf *conf = chan->chanctx_conf; + + if (!chan->chanreq.oper.chan) + continue; + + if (test_bit(i, sdata->u.nan.removed_channels)) + ieee80211_nan_remove_channel(sdata, chan); + else if (conf) + /* + * We might have called this already for some channels, + * but this knows to handle a no-op. + */ + ieee80211_recalc_smps_chanctx(sdata->local, + container_of(conf, + struct ieee80211_chanctx, + conf)); + } + + bitmap_zero(sdata->u.nan.removed_channels, IEEE80211_NAN_MAX_CHANNELS); + cfg80211_nan_sched_update_done(ieee80211_vif_to_wdev(vif), true, + GFP_KERNEL); +} +EXPORT_SYMBOL(ieee80211_nan_sched_update_done); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 36795529ff82..19ac778b704d 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1744,20 +1744,12 @@ static void ieee80211_reconfig_stations(struct ieee80211_sub_if_data *sdata) } } -static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata) +static int +ieee80211_reconfig_nan_offload_de(struct ieee80211_sub_if_data *sdata) { struct cfg80211_nan_func *func, **funcs; int res, id, i = 0; - res = drv_start_nan(sdata->local, sdata, - &sdata->u.nan.conf); - if (WARN_ON(res)) - return res; - - if (sdata->local->hw.wiphy->nan_capa.flags & - WIPHY_NAN_FLAGS_USERSPACE_DE) - return 0; - funcs = kzalloc_objs(*funcs, sdata->local->hw.max_nan_de_entries + 1); if (!funcs) return -ENOMEM; @@ -1783,6 +1775,22 @@ static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata) } kfree(funcs); + return res; +} + +static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata) +{ + int res; + + res = drv_start_nan(sdata->local, sdata, + &sdata->u.nan.conf); + if (WARN_ON(res)) + return res; + + if (!(sdata->local->hw.wiphy->nan_capa.flags & WIPHY_NAN_FLAGS_USERSPACE_DE)) + return ieee80211_reconfig_nan_offload_de(sdata); + + drv_vif_cfg_changed(sdata->local, sdata, BSS_CHANGED_NAN_LOCAL_SCHED); return 0; } From d6c470def51cd9ccc59c882e14ace478022b5a4a Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 26 Mar 2026 12:14:36 +0200 Subject: [PATCH 1194/1561] wifi: mac80211: support open and close for NAN_DATA interfaces Support opening and closing a NAN_DATA interface. Track the NAN (NMI) interface, for convenience. Allow opening an NAN_DATA interface only if the NAN interface is running (NAN has started). When closing the NAN interface, make sure all NAN_DATA interfaces are closed first, and warn if this is not the case. Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260326121156.a19de68119e5.Ia6724dac6a0e17cb69989dd714d14f4df1c69bef@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 11 +++++++++++ net/mac80211/iface.c | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 92ea8de8a6db..e3a051beba6a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1006,6 +1006,16 @@ struct ieee80211_if_nan { DECLARE_BITMAP(removed_channels, IEEE80211_NAN_MAX_CHANNELS); }; +/** + * struct ieee80211_if_nan_data - NAN data path state + * + * @nmi: pointer to the NAN management interface sdata. Used for data path, + * hence RCU. + */ +struct ieee80211_if_nan_data { + struct ieee80211_sub_if_data __rcu *nmi; +}; + struct ieee80211_link_data_managed { u8 bssid[ETH_ALEN] __aligned(2); @@ -1204,6 +1214,7 @@ struct ieee80211_sub_if_data { struct ieee80211_if_ocb ocb; struct ieee80211_if_mntr mntr; struct ieee80211_if_nan nan; + struct ieee80211_if_nan_data nan_data; } u; struct ieee80211_link_data deflink; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 0f3e49cdbb39..507c5e016ec8 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -361,6 +361,17 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, nsdata->vif.type == NL80211_IFTYPE_OCB)) return -EBUSY; + /* + * A NAN DATA interface is correlated to the NAN + * (management) one + */ + if (iftype == NL80211_IFTYPE_NAN_DATA && + nsdata->vif.type == NL80211_IFTYPE_NAN) { + if (!nsdata->u.nan.started) + return -EINVAL; + rcu_assign_pointer(sdata->u.nan_data.nmi, nsdata); + } + /* * Allow only a single IBSS interface to be up at any * time. This is restricted because beacon distribution @@ -475,6 +486,7 @@ static int ieee80211_open(struct net_device *dev) static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_down) { struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *iter; unsigned long flags; struct sk_buff_head freeq; struct sk_buff *skb, *tmp; @@ -621,6 +633,12 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do } break; case NL80211_IFTYPE_NAN: + /* Check if any open NAN_DATA interfaces */ + list_for_each_entry(iter, &local->interfaces, list) { + WARN_ON(iter->vif.type == NL80211_IFTYPE_NAN_DATA && + ieee80211_sdata_running(iter)); + } + /* clean all the functions */ if (!(local->hw.wiphy->nan_capa.flags & WIPHY_NAN_FLAGS_USERSPACE_DE)) { @@ -636,6 +654,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do spin_unlock_bh(&sdata->u.nan.de.func_lock); } break; + case NL80211_IFTYPE_NAN_DATA: + RCU_INIT_POINTER(sdata->u.nan_data.nmi, NULL); + fallthrough; default: wiphy_work_cancel(sdata->local->hw.wiphy, &sdata->work); /* @@ -1384,9 +1405,12 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_OCB: case NL80211_IFTYPE_NAN: - case NL80211_IFTYPE_NAN_DATA: /* no special treatment */ break; + case NL80211_IFTYPE_NAN_DATA: + if (WARN_ON(!rcu_access_pointer(sdata->u.nan_data.nmi))) + return -ENOLINK; + break; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_P2P_CLIENT: @@ -1404,8 +1428,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) res = drv_start(local); if (res) { /* - * no need to worry about AP_VLAN cleanup since in that - * case we can't have open_count == 0 + * no need to worry about AP_VLAN/NAN_DATA cleanup since + * in that case we can't have open_count == 0 */ return res; } @@ -1524,6 +1548,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) case NL80211_IFTYPE_AP: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_OCB: + case NL80211_IFTYPE_NAN_DATA: netif_carrier_off(dev); break; case NL80211_IFTYPE_P2P_DEVICE: @@ -1570,6 +1595,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) err_stop: if (!local->open_count) drv_stop(local, false); + if (sdata->vif.type == NL80211_IFTYPE_NAN_DATA) + RCU_INIT_POINTER(sdata->u.nan_data.nmi, NULL); if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) list_del(&sdata->u.vlan.list); /* Might not be initialized yet, but it is harmless */ From e43ec602201c5477508855f9c159956df18d108f Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 26 Mar 2026 12:14:37 +0200 Subject: [PATCH 1195/1561] wifi: mac80211: handle reconfig for NAN DATA interfaces Make sure these interfaces are added to the driver only after the NAN one was, and after NAN operation was started. Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260326121156.b14392ce99d6.I2ba3bfcd93e47e48d7f7c74007c70cca52d46896@changeid Signed-off-by: Johannes Berg --- net/mac80211/util.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 19ac778b704d..925a09246ad9 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1780,10 +1780,11 @@ ieee80211_reconfig_nan_offload_de(struct ieee80211_sub_if_data *sdata) static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *ndi_sdata; int res; - res = drv_start_nan(sdata->local, sdata, - &sdata->u.nan.conf); + res = drv_start_nan(local, sdata, &sdata->u.nan.conf); if (WARN_ON(res)) return res; @@ -1792,6 +1793,15 @@ static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata) drv_vif_cfg_changed(sdata->local, sdata, BSS_CHANGED_NAN_LOCAL_SCHED); + /* Now we can add all the NDIs to the driver */ + list_for_each_entry(ndi_sdata, &local->interfaces, list) { + if (ndi_sdata->vif.type == NL80211_IFTYPE_NAN_DATA) { + res = drv_add_interface(local, ndi_sdata); + if (WARN_ON(res)) + return res; + } + } + return 0; } @@ -1945,6 +1955,9 @@ int ieee80211_reconfig(struct ieee80211_local *local) if (sdata->vif.type == NL80211_IFTYPE_MONITOR && !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) continue; + /* These vifs can't be added before NAN was started */ + if (sdata->vif.type == NL80211_IFTYPE_NAN_DATA) + continue; if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && ieee80211_sdata_running(sdata)) { res = drv_add_interface(local, sdata); @@ -1962,6 +1975,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) if (sdata->vif.type == NL80211_IFTYPE_MONITOR && !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) continue; + if (sdata->vif.type == NL80211_IFTYPE_NAN_DATA) + continue; if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && ieee80211_sdata_running(sdata)) drv_remove_interface(local, sdata); From 27e9b326b67440b559517977e19682461a50da2c Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 26 Mar 2026 12:14:38 +0200 Subject: [PATCH 1196/1561] wifi: mac80211: support NAN stations Add support for both NMI and NDI stations. The NDI station will be linked to the NMI station of the NAN peer for which the NDI station is added. A peer can choose to reuse its NMI address as the NDI address. Since different keys might be in use for NAN management and for data frames, we will have 2 different stations, even if they'll have the same address. Even though there are no links in NAN, sta->deflink will still be used to store the one set of capabilities and SMPS mode. Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260326121156.9fdd37b8e755.I7a7bd6e8e751cab49c329419485839afd209cfc6@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 13 ++++- net/mac80211/cfg.c | 103 ++++++++++++++++++++++++++++++++++++---- net/mac80211/he.c | 7 ++- net/mac80211/ht.c | 19 ++++++-- net/mac80211/iface.c | 10 ++-- net/mac80211/sta_info.c | 21 +++++++- net/mac80211/sta_info.h | 3 +- net/mac80211/util.c | 41 ++++++++++++++++ net/mac80211/vht.c | 16 ++++++- 9 files changed, 209 insertions(+), 24 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index cc23f99f8318..b190d9035182 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2534,11 +2534,15 @@ struct ieee80211_sta_aggregates { * @uhr_cap: UHR capabilities of this STA * @s1g_cap: S1G capabilities of this STA * @agg: per-link data for multi-link aggregation - * @bandwidth: current bandwidth the station can receive with + * @bandwidth: current bandwidth the station can receive with. + * This is the minimum between the peer's capabilities and our own + * operating channel width; Invalid for NAN since that is operating on + * multiple channels. * @rx_nss: in HT/VHT, the maximum number of spatial streams the * station can receive at the moment, changed by operating mode * notifications and capabilities. The value is only valid after - * the station moves to associated state. + * the station moves to associated state. Invalid for NAN since it + * operates on multiple configurations of rx_nss. * @txpwr: the station tx power configuration * */ @@ -2620,6 +2624,7 @@ struct ieee80211_link_sta { * @valid_links: bitmap of valid links, or 0 for non-MLO * @spp_amsdu: indicates whether the STA uses SPP A-MSDU or not. * @epp_peer: indicates that the peer is an EPP peer. + * @nmi: For NDI stations, pointer to the NMI station of the peer. */ struct ieee80211_sta { u8 addr[ETH_ALEN] __aligned(2); @@ -2648,6 +2653,8 @@ struct ieee80211_sta { struct ieee80211_link_sta deflink; struct ieee80211_link_sta __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS]; + struct ieee80211_sta __rcu *nmi; + /* must be last */ u8 drv_priv[] __aligned(sizeof(void *)); }; @@ -2881,6 +2888,8 @@ struct ieee80211_txq { * station has a unique address, i.e. each station entry can be identified * by just its MAC address; this prevents, for example, the same station * from connecting to two virtual AP interfaces at the same time. + * Note that this doesn't apply for NAN, in which the peer's NMI address + * can be equal to its NDI address. * * @IEEE80211_HW_SUPPORTS_REORDERING_BUFFER: Hardware (or driver) manages the * reordering buffer internally, guaranteeing mac80211 receives frames in diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 5bc867a56811..607f3bd3f8f8 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2077,7 +2077,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, enum sta_link_apply_mode mode, struct link_station_parameters *params) { - struct ieee80211_supported_band *sband; + struct ieee80211_supported_band *sband = NULL; struct ieee80211_sub_if_data *sdata = sta->sdata; u32 link_id = params->link_id < 0 ? 0 : params->link_id; struct ieee80211_link_data *link = @@ -2085,6 +2085,9 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, struct link_sta_info *link_sta = rcu_dereference_protected(sta->link[link_id], lockdep_is_held(&local->hw.wiphy->mtx)); + const struct ieee80211_sta_ht_cap *own_ht_cap; + const struct ieee80211_sta_vht_cap *own_vht_cap; + const struct ieee80211_sta_he_cap *own_he_cap; bool changes = params->link_mac || params->txpwr_set || params->supported_rates_len || @@ -2114,10 +2117,27 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, if (!link || !link_sta) return -EINVAL; - sband = ieee80211_get_link_sband(link); - if (!sband) + /* + * We should not have any changes in NDI station, its capabilities are + * copied from the NMI sta + */ + if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_NAN_DATA)) return -EINVAL; + if (sdata->vif.type == NL80211_IFTYPE_NAN) { + own_ht_cap = &local->hw.wiphy->nan_capa.phy.ht; + own_vht_cap = &local->hw.wiphy->nan_capa.phy.vht; + own_he_cap = &local->hw.wiphy->nan_capa.phy.he; + } else { + sband = ieee80211_get_link_sband(link); + if (!sband) + return -EINVAL; + + own_ht_cap = &sband->ht_cap; + own_vht_cap = &sband->vht_cap; + own_he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); + } + if (params->link_mac) { if (mode == STA_LINK_MODE_NEW) { memcpy(link_sta->addr, params->link_mac, ETH_ALEN); @@ -2139,6 +2159,27 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, return ret; } + if (sdata->vif.type == NL80211_IFTYPE_NAN) { + static const u8 all_ofdm_rates[] = { + 0x0c, 0x12, 0x18, 0x24, 0x30, 0x48, 0x60, 0x6c + }; + + /* Set the same supported_rates for all bands */ + for (int i = 0; i < NUM_NL80211_BANDS; i++) { + struct ieee80211_supported_band *tmp = + sdata->local->hw.wiphy->bands[i]; + + if ((i != NL80211_BAND_2GHZ && i != NL80211_BAND_5GHZ) || + !tmp) + continue; + + if (!ieee80211_parse_bitrates(tmp, all_ofdm_rates, + sizeof(all_ofdm_rates), + &link_sta->pub->supp_rates[i])) + return -EINVAL; + } + } + if (params->supported_rates && params->supported_rates_len && !ieee80211_parse_bitrates(sband, params->supported_rates, @@ -2147,22 +2188,24 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, return -EINVAL; if (params->ht_capa) - ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, &sband->ht_cap, + ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, own_ht_cap, params->ht_capa, link_sta); /* VHT can override some HT caps such as the A-MSDU max length */ if (params->vht_capa) ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, - &sband->vht_cap, + own_vht_cap, params->vht_capa, NULL, link_sta); if (params->he_capa) - ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, - (void *)params->he_capa, - params->he_capa_len, - (void *)params->he_6ghz_capa, - link_sta); + _ieee80211_he_cap_ie_to_sta_he_cap(sdata, + own_he_cap, + (void *)params->he_capa, + params->he_capa_len, + (sband && sband->band == NL80211_BAND_6GHZ) ? + (void *)params->he_6ghz_capa : NULL, + link_sta); if (params->he_capa && params->eht_capa) ieee80211_eht_cap_ie_to_sta_eht_cap(sdata, sband, @@ -2349,6 +2392,32 @@ static int sta_apply_parameters(struct ieee80211_local *local, if (params->airtime_weight) sta->airtime_weight = params->airtime_weight; + if (params->nmi_mac) { + struct ieee80211_sub_if_data *nmi = + rcu_dereference_wiphy(local->hw.wiphy, + sdata->u.nan_data.nmi); + struct sta_info *nmi_sta; + + if (WARN_ON(!nmi)) + return -EINVAL; + + nmi_sta = sta_info_get(nmi, params->nmi_mac); + if (!nmi_sta) + return -ENOENT; + rcu_assign_pointer(sta->sta.nmi, &nmi_sta->sta); + + /* For NAN_DATA stations, copy capabilities from the NMI station */ + if (!nmi_sta->deflink.pub->ht_cap.ht_supported) + return -EINVAL; + + sta->deflink.pub->ht_cap = nmi_sta->deflink.pub->ht_cap; + sta->deflink.pub->vht_cap = nmi_sta->deflink.pub->vht_cap; + sta->deflink.pub->he_cap = nmi_sta->deflink.pub->he_cap; + memcpy(&sta->deflink.pub->supp_rates, + &nmi_sta->deflink.pub->supp_rates, + sizeof(sta->deflink.pub->supp_rates)); + } + /* set the STA state after all sta info from usermode has been set */ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) || set & BIT(NL80211_STA_FLAG_ASSOCIATED)) { @@ -2494,6 +2563,12 @@ static int ieee80211_change_station(struct wiphy *wiphy, else statype = CFG80211_STA_AP_CLIENT_UNASSOC; break; + case NL80211_IFTYPE_NAN: + statype = CFG80211_STA_NAN_MGMT; + break; + case NL80211_IFTYPE_NAN_DATA: + statype = CFG80211_STA_NAN_DATA; + break; default: return -EOPNOTSUPP; } @@ -2532,6 +2607,14 @@ static int ieee80211_change_station(struct wiphy *wiphy, } } + /* NAN capabilties should not change */ + if (statype == CFG80211_STA_NAN_DATA && + sta->deflink.pub->ht_cap.ht_supported && + (params->link_sta_params.ht_capa || + params->link_sta_params.vht_capa || + params->link_sta_params.he_capa)) + return -EINVAL; + err = sta_apply_parameters(local, sta, params); if (err) return err; diff --git a/net/mac80211/he.c b/net/mac80211/he.c index 93e0342cff4f..a3e16a5bec22 100644 --- a/net/mac80211/he.c +++ b/net/mac80211/he.c @@ -127,6 +127,10 @@ _ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, if (!he_cap_ie || !own_he_cap_ptr || !own_he_cap_ptr->has_he) return; + /* NDI station are using the capabilities from the NMI station */ + if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_NAN_DATA)) + return; + own_he_cap = *own_he_cap_ptr; /* Make sure size is OK */ @@ -156,7 +160,8 @@ _ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, he_cap->has_he = true; link_sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(link_sta); - link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta); + if (sdata->vif.type != NL80211_IFTYPE_NAN) + link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta); if (he_6ghz_capa) ieee80211_update_from_he_6ghz_capa(he_6ghz_capa, link_sta); diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 410e2354f33a..97719298e038 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -154,6 +154,10 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, if (!ht_cap_ie || !own_cap_ptr->ht_supported) goto apply; + /* NDI station are using the capabilities from the NMI station */ + if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_NAN_DATA)) + return 0; + ht_cap.ht_supported = true; own_cap = *own_cap_ptr; @@ -254,10 +258,17 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, rcu_read_lock(); link_conf = rcu_dereference(sdata->vif.link_conf[link_sta->link_id]); - if (WARN_ON(!link_conf)) + if (WARN_ON(!link_conf)) { width = NL80211_CHAN_WIDTH_20_NOHT; - else + } else if (sdata->vif.type == NL80211_IFTYPE_NAN || + sdata->vif.type == NL80211_IFTYPE_NAN_DATA) { + /* In NAN, link_sta->bandwidth is invalid since NAN operates on + * multiple channels. Just take the maximum. + */ + width = NL80211_CHAN_WIDTH_320; + } else { width = link_conf->chanreq.oper.width; + } switch (width) { default: @@ -285,7 +296,9 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; if (sta->sdata->vif.type == NL80211_IFTYPE_AP || - sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sta->sdata->vif.type == NL80211_IFTYPE_NAN || + sta->sdata->vif.type == NL80211_IFTYPE_NAN_DATA) { enum ieee80211_smps_mode smps_mode; switch ((ht_cap.cap & IEEE80211_HT_CAP_SM_PS) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 507c5e016ec8..f1ab85ff326d 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -535,12 +535,14 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do * (because if we remove a STA after ops->remove_interface() * the driver will have removed the vif info already!) * - * For AP_VLANs stations may exist since there's nothing else that - * would have removed them, but in other modes there shouldn't - * be any stations. + * For AP_VLANs, NAN and NAN_DATA stations may exist since there's + * nothing else that would have removed them, but in other modes there + * shouldn't be any stations. */ flushed = sta_info_flush(sdata, -1); - WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_AP_VLAN && flushed > 0); + WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_AP_VLAN && + sdata->vif.type != NL80211_IFTYPE_NAN && + sdata->vif.type != NL80211_IFTYPE_NAN_DATA && flushed > 0); /* don't count this interface for allmulti while it is down */ if (sdata->flags & IEEE80211_SDATA_ALLMULTI) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 7923ee9eafab..580cd7a68beb 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -795,6 +795,7 @@ struct sta_info *sta_info_alloc_with_link(struct ieee80211_sub_if_data *sdata, static int sta_info_insert_check(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_sta *same_addr_sta; lockdep_assert_wiphy(sdata->local->hw.wiphy); @@ -810,13 +811,18 @@ static int sta_info_insert_check(struct sta_info *sta) !is_valid_ether_addr(sta->sta.addr))) return -EINVAL; + if (!ieee80211_hw_check(&sdata->local->hw, NEEDS_UNIQUE_STA_ADDR)) + return 0; + /* The RCU read lock is required by rhashtable due to * asynchronous resize/rehash. We also require the mutex * for correctness. */ rcu_read_lock(); - if (ieee80211_hw_check(&sdata->local->hw, NEEDS_UNIQUE_STA_ADDR) && - ieee80211_find_sta_by_ifaddr(&sdata->local->hw, sta->addr, NULL)) { + same_addr_sta = ieee80211_find_sta_by_ifaddr(&sdata->local->hw, + sta->addr, NULL); + /* For NAN, a peer can re-use */ + if (same_addr_sta && same_addr_sta != rcu_access_pointer(sta->sta.nmi)) { rcu_read_unlock(); return -ENOTUNIQ; } @@ -1294,6 +1300,17 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta) lockdep_assert_wiphy(local->hw.wiphy); + if (sdata->vif.type == NL80211_IFTYPE_NAN) { + struct sta_info *sta_iter, *tmp; + + /* Remove all NDI stations associated with this NMI STA */ + list_for_each_entry_safe(sta_iter, tmp, &local->sta_list, list) { + if (rcu_access_pointer(sta_iter->sta.nmi) != &sta->sta) + continue; + sta_info_destroy_addr(sta_iter->sdata, sta_iter->addr); + } + } + /* * Before removing the station from the driver and * rate control, it might still start new aggregation diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 58ccbea7f6f6..3e5d003bd31f 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -505,7 +505,8 @@ struct ieee80211_fragment_cache { * @status_stats.ack_signal_filled: last ACK signal validity * @status_stats.avg_ack_signal: average ACK signal * @cur_max_bandwidth: maximum bandwidth to use for TX to the station, - * taken from HT/VHT capabilities or VHT operating mode notification + * taken from HT/VHT capabilities or VHT operating mode notification. + * Invalid for NAN since that is operating on multiple bands. * @rx_omi_bw_rx: RX OMI bandwidth restriction to apply for RX * @rx_omi_bw_tx: RX OMI bandwidth restriction to apply for TX * @rx_omi_bw_staging: RX OMI bandwidth restriction to apply later diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 925a09246ad9..a352f73a7ec4 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1782,6 +1782,7 @@ static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *ndi_sdata; + struct sta_info *sta; int res; res = drv_start_nan(local, sdata, &sdata->u.nan.conf); @@ -1802,6 +1803,42 @@ static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata) } } + /* Add NMI stations (stations on the NAN interface) */ + list_for_each_entry(sta, &local->sta_list, list) { + enum ieee80211_sta_state state; + + if (!sta->uploaded || sta->sdata != sdata) + continue; + + for (state = IEEE80211_STA_NOTEXIST; state < sta->sta_state; + state++) { + res = drv_sta_state(local, sdata, sta, state, + state + 1); + if (WARN_ON(res)) + return res; + } + } + + /* Add NDI stations (stations on NAN_DATA interfaces) */ + list_for_each_entry(sta, &local->sta_list, list) { + enum ieee80211_sta_state state; + + if (!sta->uploaded || + sta->sdata->vif.type != NL80211_IFTYPE_NAN_DATA) + continue; + + if (WARN_ON(!sta->sta.nmi)) + continue; + + for (state = IEEE80211_STA_NOTEXIST; state < sta->sta_state; + state++) { + res = drv_sta_state(local, sta->sdata, sta, state, + state + 1); + if (WARN_ON(res)) + return res; + } + } + return 0; } @@ -2060,6 +2097,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MONITOR: break; + case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: + /* NAN stations are handled later */ + break; case NL80211_IFTYPE_ADHOC: if (sdata->vif.cfg.ibss_joined) WARN_ON(drv_join_ibss(local, sdata)); diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index a6570781740a..f3bb5a561a38 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -133,6 +133,10 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, if (!vht_cap_ie || !own_vht_cap->vht_supported) return; + /* NDI station are using the capabilities from the NMI station */ + if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_NAN_DATA)) + return; + if (sband) { /* Allow VHT if at least one channel on the sband supports 80 MHz */ bool have_80mhz = false; @@ -320,7 +324,8 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, IEEE80211_STA_RX_BW_160; } - link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta); + if (sdata->vif.type != NL80211_IFTYPE_NAN) + link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta); /* * Work around the Cisco 9115 FW 17.3 bug by taking the min of @@ -373,6 +378,10 @@ __ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta, } else { struct ieee80211_bss_conf *link_conf; + if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_NAN_DATA || + sdata->vif.type == NL80211_IFTYPE_NAN)) + return IEEE80211_STA_RX_BW_20; + rcu_read_lock(); link_conf = rcu_dereference(sdata->vif.link_conf[link_id]); band = link_conf->chanreq.oper.chan->band; @@ -518,6 +527,11 @@ _ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta, } else { struct ieee80211_bss_conf *link_conf; + /* NAN operates on multiple channels so a chandef must be given */ + if (WARN_ON_ONCE(sta->sdata->vif.type == NL80211_IFTYPE_NAN || + sta->sdata->vif.type == NL80211_IFTYPE_NAN_DATA)) + return IEEE80211_STA_RX_BW_20; + rcu_read_lock(); link_conf = rcu_dereference(sta->sdata->vif.link_conf[link_sta->link_id]); if (WARN_ON_ONCE(!link_conf)) { From 840492bf333bf3b69503a573a7ad71147a7ab67e Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 26 Mar 2026 12:14:39 +0200 Subject: [PATCH 1197/1561] wifi: mac80211: add NAN peer schedule support Peer schedules specify which channels the peer is available on and when. Add support for configuring peer NAN schedules: - build and store the schedule and maps - for each channel, make sure that it fits into the capabilities, and take the minimum between it and the local compatible nan channel. - configure the driver Note that the removal of a peer schedule should be done by the driver upon NMI station removal. Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260326121156.185ff2283fa6.I0345eb665be8ccf4a77eb1aca9a421eb8d2432e2@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 62 +++++++++- net/mac80211/cfg.c | 13 +++ net/mac80211/driver-ops.h | 21 ++++ net/mac80211/ieee80211_i.h | 3 + net/mac80211/nan.c | 226 ++++++++++++++++++++++++++++++++++++- net/mac80211/sta_info.c | 4 + net/mac80211/trace.h | 31 +++++ net/mac80211/util.c | 8 ++ 8 files changed, 365 insertions(+), 3 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b190d9035182..40cb20d9309c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -877,8 +877,11 @@ struct ieee80211_bss_conf { * is irrelevant for NAN, still store it for convenience - some functions * require it as an argument. * @needed_rx_chains: number of RX chains needed for this NAN channel - * @chanctx_conf: chanctx_conf assigned to this NAN channel. Will be %NULL - * if the channel is ULWed. + * @chanctx_conf: chanctx_conf assigned to this NAN channel. + * If a local channel is being ULWed (because we needed this chanctx for + * something else), the local NAN channel that used this chanctx, + * will have this pointer set to %NULL. + * A peer NAN channel should never have this pointer set to %NULL. * @channel_entry: the Channel Entry blob as defined in Wi-Fi Aware * (TM) 4.0 specification Table 100 (Channel Entry format for the NAN * Availability attribute). @@ -890,6 +893,49 @@ struct ieee80211_nan_channel { u8 channel_entry[6]; }; +/** + * struct ieee80211_nan_peer_map - NAN peer schedule map + * + * This stores a single map from a peer's schedule. Each peer can have + * multiple maps. + * + * @map_id: the map ID from the peer schedule, %CFG80211_NAN_INVALID_MAP_ID + * if unused + * @slots: mapping of time slots to channel configurations in the schedule's + * channels array + */ +struct ieee80211_nan_peer_map { + u8 map_id; + struct ieee80211_nan_channel *slots[CFG80211_NAN_SCHED_NUM_TIME_SLOTS]; +}; + +/** + * struct ieee80211_nan_peer_sched - NAN peer schedule + * + * This stores the complete schedule from a peer. Contains peer-level + * parameters and an array of schedule maps. + * + * @seq_id: the sequence ID from the peer schedule + * @committed_dw: committed DW as published by the peer + * @max_chan_switch: maximum channel switch time in microseconds + * @init_ulw: initial ULWs as published by the peer (copied) + * @ulw_size: number of bytes in @init_ulw + * @maps: array of peer schedule maps. Invalid slots have map_id set to + * %CFG80211_NAN_INVALID_MAP_ID. + * @n_channels: number of valid channel entries in @channels + * @channels: flexible array of negotiated peer channels for this schedule + */ +struct ieee80211_nan_peer_sched { + u8 seq_id; + u16 committed_dw; + u16 max_chan_switch; + const u8 *init_ulw; + u16 ulw_size; + struct ieee80211_nan_peer_map maps[CFG80211_NAN_MAX_PEER_MAPS]; + u8 n_channels; + struct ieee80211_nan_channel channels[] __counted_by(n_channels); +}; + /** * enum mac80211_tx_info_flags - flags to describe transmission information/status * @@ -2625,6 +2671,7 @@ struct ieee80211_link_sta { * @spp_amsdu: indicates whether the STA uses SPP A-MSDU or not. * @epp_peer: indicates that the peer is an EPP peer. * @nmi: For NDI stations, pointer to the NMI station of the peer. + * @nan_sched: NAN peer schedule for this station. Valid only for NMI stations. */ struct ieee80211_sta { u8 addr[ETH_ALEN] __aligned(2); @@ -2655,6 +2702,9 @@ struct ieee80211_sta { struct ieee80211_sta __rcu *nmi; + /* should only be accessed with the wiphy mutex held */ + struct ieee80211_nan_peer_sched *nan_sched; + /* must be last */ u8 drv_priv[] __aligned(sizeof(void *)); }; @@ -4556,6 +4606,12 @@ struct ieee80211_prep_tx_info { * @del_nan_func: Remove a NAN function. The driver must call * ieee80211_nan_func_terminated() with * NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST reason code upon removal. + * @nan_peer_sched_changed: Notifies the driver that the peer NAN schedule + * has changed. The new schedule is available via sta->nan_sched. + * Note that the channel_entry blob might not match the actual chandef + * since the bandwidth of the chandef is the minimum of the local and peer + * bandwidth. It is the driver responsibility to remove the peer schedule + * when the NMI station is removed. * @can_aggregate_in_amsdu: Called in order to determine if HW supports * aggregating two specific frames in the same A-MSDU. The relation * between the skbs should be symmetric and transitive. Note that while @@ -4961,6 +5017,8 @@ struct ieee80211_ops { void (*del_nan_func)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u8 instance_id); + int (*nan_peer_sched_changed)(struct ieee80211_hw *hw, + struct ieee80211_sta *sta); bool (*can_aggregate_in_amsdu)(struct ieee80211_hw *hw, struct sk_buff *head, struct sk_buff *skb); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 607f3bd3f8f8..0c1439d31c93 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -5689,6 +5689,18 @@ ieee80211_set_local_nan_sched(struct wiphy *wiphy, return ieee80211_nan_set_local_sched(sdata, sched); } +static int +ieee80211_set_peer_nan_sched(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_peer_sched *sched) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + + lockdep_assert_wiphy(sdata->local->hw.wiphy); + + return ieee80211_nan_set_peer_sched(sdata, sched); +} + const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -5806,4 +5818,5 @@ const struct cfg80211_ops mac80211_config_ops = { .assoc_ml_reconf = ieee80211_assoc_ml_reconf, .set_epcs = ieee80211_set_epcs, .nan_set_local_sched = ieee80211_set_local_nan_sched, + .nan_set_peer_sched = ieee80211_set_peer_nan_sched, }; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 51bf3c7822a7..f1c0b87fddd5 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1793,4 +1793,25 @@ static inline int drv_set_eml_op_mode(struct ieee80211_sub_if_data *sdata, return ret; } +static inline int +drv_nan_peer_sched_changed(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct sta_info *sta) +{ + int ret; + + might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); + check_sdata_in_driver(sdata); + + if (!local->ops->nan_peer_sched_changed) + return -EOPNOTSUPP; + + trace_drv_nan_peer_sched_changed(local, sdata, &sta->sta); + ret = local->ops->nan_peer_sched_changed(&local->hw, &sta->sta); + trace_drv_return_int(local, ret); + + return ret; +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e3a051beba6a..23bf00472915 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2042,6 +2042,9 @@ int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata, /* NAN code */ int ieee80211_nan_set_local_sched(struct ieee80211_sub_if_data *sdata, struct cfg80211_nan_local_sched *sched); +int ieee80211_nan_set_peer_sched(struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_peer_sched *sched); +void ieee80211_nan_free_peer_sched(struct ieee80211_nan_peer_sched *sched); /* scan/BSS handling */ void ieee80211_scan_work(struct wiphy *wiphy, struct wiphy_work *work); diff --git a/net/mac80211/nan.c b/net/mac80211/nan.c index 2fa55e9a9dab..5e1f9bb7c49d 100644 --- a/net/mac80211/nan.c +++ b/net/mac80211/nan.c @@ -1,12 +1,13 @@ // SPDX-License-Identifier: GPL-2.0-only /* * NAN mode implementation - * Copyright(c) 2025 Intel Corporation + * Copyright(c) 2025-2026 Intel Corporation */ #include #include "ieee80211_i.h" #include "driver-ops.h" +#include "sta_info.h" static void ieee80211_nan_init_channel(struct ieee80211_nan_channel *nan_channel, @@ -96,6 +97,82 @@ ieee80211_nan_use_chanctx(struct ieee80211_sub_if_data *sdata, return 0; } +static void +ieee80211_nan_update_peer_channels(struct ieee80211_sub_if_data *sdata, + struct ieee80211_chanctx_conf *removed_conf) +{ + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + + lockdep_assert_wiphy(local->hw.wiphy); + + list_for_each_entry(sta, &local->sta_list, list) { + struct ieee80211_nan_peer_sched *peer_sched; + int write_idx = 0; + bool updated = false; + + if (sta->sdata != sdata) + continue; + + peer_sched = sta->sta.nan_sched; + if (!peer_sched) + continue; + + /* NULL out map slots for channels being removed */ + for (int i = 0; i < peer_sched->n_channels; i++) { + if (peer_sched->channels[i].chanctx_conf != removed_conf) + continue; + + for (int m = 0; m < CFG80211_NAN_MAX_PEER_MAPS; m++) { + struct ieee80211_nan_peer_map *map = + &peer_sched->maps[m]; + + if (map->map_id == CFG80211_NAN_INVALID_MAP_ID) + continue; + + for (int s = 0; s < ARRAY_SIZE(map->slots); s++) + if (map->slots[s] == &peer_sched->channels[i]) + map->slots[s] = NULL; + } + } + + /* Compact channels array, removing those with removed_conf */ + for (int i = 0; i < peer_sched->n_channels; i++) { + if (peer_sched->channels[i].chanctx_conf == removed_conf) { + updated = true; + continue; + } + + if (write_idx != i) { + /* Update map pointers before moving */ + for (int m = 0; m < CFG80211_NAN_MAX_PEER_MAPS; m++) { + struct ieee80211_nan_peer_map *map = + &peer_sched->maps[m]; + + if (map->map_id == CFG80211_NAN_INVALID_MAP_ID) + continue; + + for (int s = 0; s < ARRAY_SIZE(map->slots); s++) + if (map->slots[s] == &peer_sched->channels[i]) + map->slots[s] = &peer_sched->channels[write_idx]; + } + + peer_sched->channels[write_idx] = peer_sched->channels[i]; + } + write_idx++; + } + + /* Clear any remaining entries at the end */ + for (int i = write_idx; i < peer_sched->n_channels; i++) + memset(&peer_sched->channels[i], 0, sizeof(peer_sched->channels[i])); + + peer_sched->n_channels = write_idx; + + if (updated) + drv_nan_peer_sched_changed(local, sdata, sta); + } +} + static void ieee80211_nan_remove_channel(struct ieee80211_sub_if_data *sdata, struct ieee80211_nan_channel *nan_channel) @@ -118,6 +195,10 @@ ieee80211_nan_remove_channel(struct ieee80211_sub_if_data *sdata, conf = nan_channel->chanctx_conf; + /* If any peer nan schedule uses this chanctx, update them */ + if (conf) + ieee80211_nan_update_peer_channels(sdata, conf); + memset(nan_channel, 0, sizeof(*nan_channel)); /* Update the driver before (possibly) releasing the channel context */ @@ -376,3 +457,146 @@ void ieee80211_nan_sched_update_done(struct ieee80211_vif *vif) GFP_KERNEL); } EXPORT_SYMBOL(ieee80211_nan_sched_update_done); + +void ieee80211_nan_free_peer_sched(struct ieee80211_nan_peer_sched *sched) +{ + if (!sched) + return; + + kfree(sched->init_ulw); + kfree(sched); +} + +static int +ieee80211_nan_init_peer_channel(struct ieee80211_sub_if_data *sdata, + const struct sta_info *sta, + const struct cfg80211_nan_channel *cfg_chan, + struct ieee80211_nan_channel *new_chan) +{ + struct ieee80211_nan_sched_cfg *sched_cfg = &sdata->vif.cfg.nan_sched; + + /* Find compatible local channel */ + for (int j = 0; j < ARRAY_SIZE(sched_cfg->channels); j++) { + struct ieee80211_nan_channel *local_chan = + &sched_cfg->channels[j]; + const struct cfg80211_chan_def *compat; + + if (!local_chan->chanreq.oper.chan) + continue; + + compat = cfg80211_chandef_compatible(&local_chan->chanreq.oper, + &cfg_chan->chandef); + if (!compat) + continue; + + /* compat is the wider chandef, and we want the narrower one */ + new_chan->chanreq.oper = compat == &local_chan->chanreq.oper ? + cfg_chan->chandef : local_chan->chanreq.oper; + new_chan->needed_rx_chains = min(local_chan->needed_rx_chains, + cfg_chan->rx_nss); + new_chan->chanctx_conf = local_chan->chanctx_conf; + + break; + } + + /* + * nl80211 already validated that each peer channel is compatible + * with at least one local channel, so this should never happen. + */ + if (WARN_ON(!new_chan->chanreq.oper.chan)) + return -EINVAL; + + memcpy(new_chan->channel_entry, cfg_chan->channel_entry, + sizeof(new_chan->channel_entry)); + + return 0; +} + +static void +ieee80211_nan_init_peer_map(struct ieee80211_nan_peer_sched *peer_sched, + const struct cfg80211_nan_peer_map *cfg_map, + struct ieee80211_nan_peer_map *new_map) +{ + new_map->map_id = cfg_map->map_id; + + if (new_map->map_id == CFG80211_NAN_INVALID_MAP_ID) + return; + + /* Set up the slots array */ + for (int slot = 0; slot < ARRAY_SIZE(new_map->slots); slot++) { + u8 chan_idx = cfg_map->schedule[slot]; + + if (chan_idx < peer_sched->n_channels) + new_map->slots[slot] = &peer_sched->channels[chan_idx]; + } +} + +int ieee80211_nan_set_peer_sched(struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_peer_sched *sched) +{ + struct ieee80211_nan_peer_sched *new_sched, *old_sched, *to_free; + struct sta_info *sta; + int ret; + + lockdep_assert_wiphy(sdata->local->hw.wiphy); + + if (!sdata->u.nan.started) + return -EINVAL; + + sta = sta_info_get(sdata, sched->peer_addr); + if (!sta) + return -ENOENT; + + new_sched = kzalloc(struct_size(new_sched, channels, sched->n_channels), + GFP_KERNEL); + if (!new_sched) + return -ENOMEM; + + to_free = new_sched; + + new_sched->seq_id = sched->seq_id; + new_sched->committed_dw = sched->committed_dw; + new_sched->max_chan_switch = sched->max_chan_switch; + new_sched->n_channels = sched->n_channels; + + if (sched->ulw_size && sched->init_ulw) { + new_sched->init_ulw = kmemdup(sched->init_ulw, sched->ulw_size, + GFP_KERNEL); + if (!new_sched->init_ulw) { + ret = -ENOMEM; + goto out; + } + new_sched->ulw_size = sched->ulw_size; + } + + for (int i = 0; i < sched->n_channels; i++) { + ret = ieee80211_nan_init_peer_channel(sdata, sta, + &sched->nan_channels[i], + &new_sched->channels[i]); + if (ret) + goto out; + } + + for (int m = 0; m < ARRAY_SIZE(sched->maps); m++) + ieee80211_nan_init_peer_map(new_sched, &sched->maps[m], + &new_sched->maps[m]); + + /* Install the new schedule before calling the driver */ + old_sched = sta->sta.nan_sched; + sta->sta.nan_sched = new_sched; + + ret = drv_nan_peer_sched_changed(sdata->local, sdata, sta); + if (ret) { + /* Revert to old schedule */ + sta->sta.nan_sched = old_sched; + goto out; + } + + /* Success - free old schedule */ + to_free = old_sched; + ret = 0; + +out: + ieee80211_nan_free_peer_sched(to_free); + return ret; +} diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 580cd7a68beb..fb0cfd4d16d3 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1309,6 +1309,10 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta) continue; sta_info_destroy_addr(sta_iter->sdata, sta_iter->addr); } + + /* Free and clear the local peer schedule */ + ieee80211_nan_free_peer_sched(sta->sta.nan_sched); + sta->sta.nan_sched = NULL; } /* diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index e5968d754f8b..71cf88039bd4 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -3366,6 +3366,37 @@ TRACE_EVENT(drv_set_eml_op_mode, ) ); +TRACE_EVENT(drv_nan_peer_sched_changed, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta), + + TP_ARGS(local, sdata, sta), + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + STA_ENTRY + __array(u8, map_ids, CFG80211_NAN_MAX_PEER_MAPS) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + STA_ASSIGN; + for (int i = 0; i < CFG80211_NAN_MAX_PEER_MAPS; i++) + __entry->map_ids[i] = sta->nan_sched ? + sta->nan_sched->maps[i].map_id : + 0xff; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT + " map_ids=[%u, %u]", + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, + __entry->map_ids[0], __entry->map_ids[1] + ) +); + #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/mac80211/util.c b/net/mac80211/util.c index a352f73a7ec4..b093bc203c81 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1817,6 +1817,14 @@ static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata) if (WARN_ON(res)) return res; } + + /* Add peer schedules for NMI stations that have them */ + if (!sta->sta.nan_sched) + continue; + + res = drv_nan_peer_sched_changed(local, sdata, sta); + if (WARN_ON(res)) + return res; } /* Add NDI stations (stations on NAN_DATA interfaces) */ From e1d5c95456a433b8898fff48c17a6150e69e9af9 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 26 Mar 2026 12:14:40 +0200 Subject: [PATCH 1198/1561] wifi: mac80211: update NAN data path state on schedule changes A carrier of an NDI interface is turned on when there is at least one NDI station that: (1) correlates to this interface (2) is authorized (3) the NAN peer to which this station belongs has at least one common slot with the local schedule. Otherwise, it is turned off. (common slots are slots where both schedules are active on compatible channels.) Implement the calculation of the carrier state and trigger it when needed. Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260326121156.98ff4115406f.Ie796487ab9eb23cda819b0afac57e7267b134911@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 10 +++- net/mac80211/ieee80211_i.h | 1 + net/mac80211/nan.c | 108 +++++++++++++++++++++++++++++++++++++ net/mac80211/sta_info.c | 4 ++ 4 files changed, 122 insertions(+), 1 deletion(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 0c1439d31c93..e47197fe6d1b 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2502,7 +2502,15 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct wireless_dev *wdev, test_sta_flag(sta, WLAN_STA_ASSOC)) rate_control_rate_init_all_links(sta); - return sta_info_insert(sta); + err = sta_info_insert(sta); + + /* + * ieee80211_nan_update_ndi_carrier was called from sta_apply_parameters, + * but then we did not have the STA in the list. + */ + if (!err && sdata->vif.type == NL80211_IFTYPE_NAN_DATA) + ieee80211_nan_update_ndi_carrier(sta->sdata); + return err; } static int ieee80211_del_station(struct wiphy *wiphy, struct wireless_dev *wdev, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 23bf00472915..2a693406294b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2045,6 +2045,7 @@ int ieee80211_nan_set_local_sched(struct ieee80211_sub_if_data *sdata, int ieee80211_nan_set_peer_sched(struct ieee80211_sub_if_data *sdata, struct cfg80211_nan_peer_sched *sched); void ieee80211_nan_free_peer_sched(struct ieee80211_nan_peer_sched *sched); +void ieee80211_nan_update_ndi_carrier(struct ieee80211_sub_if_data *ndi_sdata); /* scan/BSS handling */ void ieee80211_scan_work(struct wiphy *wiphy, struct wiphy_work *work); diff --git a/net/mac80211/nan.c b/net/mac80211/nan.c index 5e1f9bb7c49d..4e262b624521 100644 --- a/net/mac80211/nan.c +++ b/net/mac80211/nan.c @@ -220,6 +220,23 @@ ieee80211_nan_remove_channel(struct ieee80211_sub_if_data *sdata, ieee80211_free_chanctx(sdata->local, ctx, false); } +static void +ieee80211_nan_update_all_ndi_carriers(struct ieee80211_local *local) +{ + struct ieee80211_sub_if_data *sdata; + + lockdep_assert_wiphy(local->hw.wiphy); + + /* Iterate all interfaces and update carrier for NDI interfaces */ + list_for_each_entry(sdata, &local->interfaces, list) { + if (!ieee80211_sdata_running(sdata) || + sdata->vif.type != NL80211_IFTYPE_NAN_DATA) + continue; + + ieee80211_nan_update_ndi_carrier(sdata); + } +} + static struct ieee80211_nan_channel * ieee80211_nan_find_free_channel(struct ieee80211_nan_sched_cfg *sched_cfg) { @@ -347,6 +364,7 @@ int ieee80211_nan_set_local_sched(struct ieee80211_sub_if_data *sdata, if (sched_cfg->deferred) return 0; + ieee80211_nan_update_all_ndi_carriers(sdata->local); bitmap_zero(sdata->u.nan.removed_channels, IEEE80211_NAN_MAX_CHANNELS); return 0; @@ -409,6 +427,7 @@ err: bitmap_zero(sdata->u.nan.removed_channels, IEEE80211_NAN_MAX_CHANNELS); drv_vif_cfg_changed(sdata->local, sdata, BSS_CHANGED_NAN_LOCAL_SCHED); + ieee80211_nan_update_all_ndi_carriers(sdata->local); return ret; } @@ -423,6 +442,8 @@ void ieee80211_nan_sched_update_done(struct ieee80211_vif *vif) if (WARN_ON(!sched_cfg->deferred)) return; + ieee80211_nan_update_all_ndi_carriers(sdata->local); + /* * Clear the deferred flag before removing channels. Removing channels * will trigger another schedule update to the driver, and there is no @@ -531,6 +552,91 @@ ieee80211_nan_init_peer_map(struct ieee80211_nan_peer_sched *peer_sched, } } +/* + * Check if the local schedule and a peer schedule have at least one common + * slot - a slot where both schedules are active on compatible channels. + */ +static bool +ieee80211_nan_has_common_slots(struct ieee80211_sub_if_data *sdata, + struct ieee80211_nan_peer_sched *peer_sched) +{ + for (int slot = 0; slot < CFG80211_NAN_SCHED_NUM_TIME_SLOTS; slot++) { + struct ieee80211_nan_channel *local_chan = + sdata->vif.cfg.nan_sched.schedule[slot]; + + if (!local_chan || !local_chan->chanctx_conf) + continue; + + /* Check all peer maps for this slot */ + for (int m = 0; m < CFG80211_NAN_MAX_PEER_MAPS; m++) { + struct ieee80211_nan_peer_map *map = &peer_sched->maps[m]; + struct ieee80211_nan_channel *peer_chan; + + if (map->map_id == CFG80211_NAN_INVALID_MAP_ID) + continue; + + peer_chan = map->slots[slot]; + if (!peer_chan) + continue; + + if (local_chan->chanctx_conf == peer_chan->chanctx_conf) + return true; + } + } + + return false; +} + +void ieee80211_nan_update_ndi_carrier(struct ieee80211_sub_if_data *ndi_sdata) +{ + struct ieee80211_local *local = ndi_sdata->local; + struct ieee80211_sub_if_data *nmi_sdata; + struct sta_info *sta; + + lockdep_assert_wiphy(local->hw.wiphy); + + if (WARN_ON(ndi_sdata->vif.type != NL80211_IFTYPE_NAN_DATA || + !ndi_sdata->dev) || !ieee80211_sdata_running(ndi_sdata)) + return; + + nmi_sdata = wiphy_dereference(local->hw.wiphy, ndi_sdata->u.nan_data.nmi); + if (WARN_ON(!nmi_sdata)) + return; + + list_for_each_entry(sta, &local->sta_list, list) { + struct ieee80211_sta *nmi_sta; + + if (sta->sdata != ndi_sdata || + !test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + continue; + + nmi_sta = wiphy_dereference(local->hw.wiphy, sta->sta.nmi); + if (WARN_ON(!nmi_sta) || !nmi_sta->nan_sched) + continue; + + if (ieee80211_nan_has_common_slots(nmi_sdata, nmi_sta->nan_sched)) { + netif_carrier_on(ndi_sdata->dev); + return; + } + } + + netif_carrier_off(ndi_sdata->dev); +} + +static void +ieee80211_nan_update_peer_ndis_carrier(struct ieee80211_local *local, + struct sta_info *nmi_sta) +{ + struct sta_info *sta; + + lockdep_assert_wiphy(local->hw.wiphy); + + list_for_each_entry(sta, &local->sta_list, list) { + if (rcu_access_pointer(sta->sta.nmi) == &nmi_sta->sta) + ieee80211_nan_update_ndi_carrier(sta->sdata); + } +} + int ieee80211_nan_set_peer_sched(struct ieee80211_sub_if_data *sdata, struct cfg80211_nan_peer_sched *sched) { @@ -592,6 +698,8 @@ int ieee80211_nan_set_peer_sched(struct ieee80211_sub_if_data *sdata, goto out; } + ieee80211_nan_update_peer_ndis_carrier(sdata->local, sta); + /* Success - free old schedule */ to_free = old_sched; ret = 0; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index fb0cfd4d16d3..4c31ef8817ce 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1454,6 +1454,8 @@ static int _sta_info_move_state(struct sta_info *sta, } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { ieee80211_vif_dec_num_mcast(sta->sdata); clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags); + if (sta->sdata->vif.type == NL80211_IFTYPE_NAN_DATA) + ieee80211_nan_update_ndi_carrier(sta->sdata); /* * If we have encryption offload, flush (station) queues @@ -1482,6 +1484,8 @@ static int _sta_info_move_state(struct sta_info *sta, set_bit(WLAN_STA_AUTHORIZED, &sta->_flags); ieee80211_check_fast_xmit(sta); ieee80211_check_fast_rx(sta); + if (sta->sdata->vif.type == NL80211_IFTYPE_NAN_DATA) + ieee80211_nan_update_ndi_carrier(sta->sdata); } if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN || sta->sdata->vif.type == NL80211_IFTYPE_AP) From b5e4adbd01d1c4f864941162133840a370008e2c Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 26 Mar 2026 12:14:41 +0200 Subject: [PATCH 1199/1561] wifi: mac80211: add support for TX over NAN_DATA interfaces Add support for TXing frames over NAN_DATA interfaces: - find the NDI station - populoate the addresses fields - use NUM_NL80211_BANDS for the band, similar to NAN interfaces. Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260326121156.b1d248947158.I04b27d9727f7a553fa80520cf6e532683ac03690@changeid Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index f0f23e94db04..b487d2330f25 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2543,6 +2543,13 @@ int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata, if (!sta) return -ENOLINK; break; + case NL80211_IFTYPE_NAN_DATA: + if (is_multicast_ether_addr(skb->data)) { + *sta_out = ERR_PTR(-ENOENT); + return 0; + } + sta = sta_info_get(sdata, skb->data); + break; default: return -EINVAL; } @@ -2836,18 +2843,37 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, memcpy(hdr.addr3, sdata->u.ibss.bssid, ETH_ALEN); hdrlen = 24; break; + case NL80211_IFTYPE_NAN_DATA: { + struct ieee80211_sub_if_data *nmi; + + /* DA SA Cluster ID */ + memcpy(hdr.addr1, skb->data, ETH_ALEN); + memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); + nmi = rcu_dereference(sdata->u.nan_data.nmi); + if (!nmi) { + ret = -ENOTCONN; + goto free; + } + memcpy(hdr.addr3, nmi->wdev.u.nan.cluster_id, ETH_ALEN); + hdrlen = 24; + break; + } default: ret = -EINVAL; goto free; } if (!chanctx_conf) { - if (!ieee80211_vif_is_mld(&sdata->vif)) { + if (sdata->vif.type == NL80211_IFTYPE_NAN_DATA) { + /* NAN operates on multiple bands */ + band = NUM_NL80211_BANDS; + } else if (!ieee80211_vif_is_mld(&sdata->vif)) { ret = -ENOTCONN; goto free; + } else { + /* MLD transmissions must not rely on the band */ + band = 0; } - /* MLD transmissions must not rely on the band */ - band = 0; } else { band = chanctx_conf->def.chan->band; } From 61408403e2b4a3e90b2bc6eda9a57837c4fa8ece Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 26 Mar 2026 12:14:42 +0200 Subject: [PATCH 1200/1561] wifi: mac80211: Accept frames on NAN DATA interfaces Accept frames there were received on NAN DATA interfaces: - Data frames, both multicast or unicast - Non-Public action frames, both multicast or unicast - Unicast secure management frames - FromDS and ToDS are 0. While at it, check FromDS/ToDS also for NAN management frames. Accept only data frames from devices that are part of the NAN cluster. Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260326121156.0e6f37d4a40c.Iaa84cc3d063392f0150fcdf2bf610bdb41062f70@changeid Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index dbdd67c181d8..a00b73420929 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4469,6 +4469,9 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) u8 *bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type); bool multicast = is_multicast_ether_addr(hdr->addr1) || ieee80211_is_s1g_beacon(hdr->frame_control); + static const u8 nan_network_id[ETH_ALEN] __aligned(2) = { + 0x51, 0x6F, 0x9A, 0x01, 0x00, 0x00 + }; switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: @@ -4597,6 +4600,10 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) (ieee80211_is_auth(hdr->frame_control) && ether_addr_equal(sdata->vif.addr, hdr->addr1)); case NL80211_IFTYPE_NAN: + if (ieee80211_has_tods(hdr->frame_control) || + ieee80211_has_fromds(hdr->frame_control)) + return false; + /* Accept only frames that are addressed to the NAN cluster * (based on the Cluster ID). From these frames, accept only * action frames or authentication frames that are addressed to @@ -4608,7 +4615,35 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) (ieee80211_is_auth(hdr->frame_control) && ether_addr_equal(sdata->vif.addr, hdr->addr1))); case NL80211_IFTYPE_NAN_DATA: - return false; + if (ieee80211_has_tods(hdr->frame_control) || + ieee80211_has_fromds(hdr->frame_control)) + return false; + + if (ieee80211_is_data(hdr->frame_control)) { + struct ieee80211_sub_if_data *nmi; + + nmi = rcu_dereference(sdata->u.nan_data.nmi); + if (!nmi) + return false; + + if (!ether_addr_equal(nmi->wdev.u.nan.cluster_id, + hdr->addr3)) + return false; + + return multicast || + ether_addr_equal(sdata->vif.addr, hdr->addr1); + } + + /* Non-public action frames (unicast or multicast) */ + if (ieee80211_is_action(hdr->frame_control) && + !ieee80211_is_public_action(hdr, skb->len) && + (ether_addr_equal(nan_network_id, hdr->addr1) || + ether_addr_equal(sdata->vif.addr, hdr->addr1))) + return true; + + /* Unicast secure management frames */ + return ether_addr_equal(sdata->vif.addr, hdr->addr1) && + ieee80211_is_unicast_robust_mgmt_frame(skb); default: break; } From 779df4461440b34278d8558c8c977f8ff1d6c18d Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 26 Mar 2026 12:14:43 +0200 Subject: [PATCH 1201/1561] wifi: mac80211: allow block ack agreements in NAN Data Allow receiving and sending Add Block Ack action frames for NAN Data Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260326121156.fe51df9d65f4.I104435f2af65e032cba168b1d842cb9610720041@changeid Signed-off-by: Johannes Berg --- net/mac80211/agg-tx.c | 3 ++- net/mac80211/rx.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 01d927b88264..4833b46770b6 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -641,7 +641,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, sdata->vif.type != NL80211_IFTYPE_MESH_POINT && sdata->vif.type != NL80211_IFTYPE_AP_VLAN && sdata->vif.type != NL80211_IFTYPE_AP && - sdata->vif.type != NL80211_IFTYPE_ADHOC) + sdata->vif.type != NL80211_IFTYPE_ADHOC && + sdata->vif.type != NL80211_IFTYPE_NAN_DATA) return -EINVAL; if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index a00b73420929..979ac26d1173 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3748,7 +3748,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) sdata->vif.type != NL80211_IFTYPE_MESH_POINT && sdata->vif.type != NL80211_IFTYPE_AP_VLAN && sdata->vif.type != NL80211_IFTYPE_AP && - sdata->vif.type != NL80211_IFTYPE_ADHOC) + sdata->vif.type != NL80211_IFTYPE_ADHOC && + sdata->vif.type != NL80211_IFTYPE_NAN_DATA) break; /* verify action_code is present */ From 5f6fba9a1e7b03a6369a1078d3921440fff95200 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 26 Mar 2026 12:14:44 +0200 Subject: [PATCH 1202/1561] wifi: mac80211: report and drop spurious NAN Data frames According to Wi-Fi Aware (TM) 4.0 specification 6.2.5, in case a frame is recevied from an address that doesn't belong to any active NDP, the frame should be dropped and a NAN Data Path Termination should be sent to the transmitter. Do it by dropping the frame and calling cfg80211_rx_spurious_frame in that case. Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260326121156.721b3a61c580.I19e3572508beeba143871682c80e9a56b6c1046a@changeid Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 979ac26d1173..3e5d1c47a5b0 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1589,6 +1589,25 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) if (ieee80211_vif_is_mesh(&rx->sdata->vif)) return ieee80211_rx_mesh_check(rx); + /* + * Wi-Fi Aware (TM) 4.0 specification 6.2.5: + * For NAN_DATA, unicast data frames must have A2 (source) + * assigned to an active NDP. If not the frame must be dropped + * and NAN Data Path termination frame should be sent. Notify + * user space so it can do so. + */ + if (rx->sdata->vif.type == NL80211_IFTYPE_NAN_DATA) { + if (ieee80211_is_data(hdr->frame_control) && + !is_multicast_ether_addr(hdr->addr1) && + (!rx->sta || !test_sta_flag(rx->sta, WLAN_STA_ASSOC))) { + if (cfg80211_rx_spurious_frame(rx->sdata->dev, hdr->addr2, + rx->link_id, GFP_ATOMIC)) + return RX_DROP_U_SPURIOUS_NOTIF; + return RX_DROP_U_SPURIOUS; + } + return RX_CONTINUE; + } + if (unlikely((ieee80211_is_data(hdr->frame_control) || ieee80211_is_pspoll(hdr->frame_control)) && rx->sdata->vif.type != NL80211_IFTYPE_ADHOC && From 014eec318fc00f0b497bffa04f1a45255ba7ff5e Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Thu, 26 Mar 2026 12:14:45 +0200 Subject: [PATCH 1203/1561] wifi: mac80211: allow add_key on NAN interfaces Keys may be added to the NAN interfaces to protect NAN management frames and data, allow that. Signed-off-by: Avraham Stern Reviewed-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260326121156.34961ba9a0c4.I5b7c646c456d4112e5ab8663026153ace9b6b7d6@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index e47197fe6d1b..5a908eaddbf2 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -702,6 +702,8 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct wireless_dev *wdev, break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: /* Keys without a station are used for TX only */ if (sta && test_sta_flag(sta, WLAN_STA_MFP)) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; @@ -718,13 +720,11 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct wireless_dev *wdev, case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_P2P_DEVICE: - case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_OCB: - case NL80211_IFTYPE_NAN_DATA: /* shouldn't happen */ WARN_ON_ONCE(1); break; From dd8592fc6007a451c3e4b9025de365e39de8178a Mon Sep 17 00:00:00 2001 From: Ethan Tidmore Date: Mon, 16 Feb 2026 20:30:43 -0600 Subject: [PATCH 1204/1561] wifi: brcmfmac: Fix error pointer dereference The function brcmf_chip_add_core() can return an error pointer and is not checked. Add checks for error pointer. Detected by Smatch: drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c:1010 brcmf_chip_recognition() error: 'core' dereferencing possible ERR_PTR() drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c:1013 brcmf_chip_recognition() error: 'core' dereferencing possible ERR_PTR() drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c:1016 brcmf_chip_recognition() error: 'core' dereferencing possible ERR_PTR() drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c:1019 brcmf_chip_recognition() error: 'core' dereferencing possible ERR_PTR() drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c:1022 brcmf_chip_recognition() error: 'core' dereferencing possible ERR_PTR() Fixes: cb7cf7be9eba7 ("brcmfmac: make chip related functions host interface independent") Signed-off-by: Ethan Tidmore Acked-by: Arend van Spriel Link: https://patch.msgid.link/20260217023043.73631-1-ethantidmore06@gmail.com [add missing wifi: prefix] Signed-off-by: Johannes Berg --- .../wireless/broadcom/brcm80211/brcmfmac/chip.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c index a790f1693b82..4adc0d0e4251 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c @@ -1007,18 +1007,33 @@ static int brcmf_chip_recognition(struct brcmf_chip_priv *ci) core = brcmf_chip_add_core(ci, BCMA_CORE_CHIPCOMMON, SI_ENUM_BASE_DEFAULT, 0); + if (IS_ERR(core)) + return PTR_ERR(core); + brcmf_chip_sb_corerev(ci, core); core = brcmf_chip_add_core(ci, BCMA_CORE_SDIO_DEV, BCM4329_CORE_BUS_BASE, 0); + if (IS_ERR(core)) + return PTR_ERR(core); + brcmf_chip_sb_corerev(ci, core); core = brcmf_chip_add_core(ci, BCMA_CORE_INTERNAL_MEM, BCM4329_CORE_SOCRAM_BASE, 0); + if (IS_ERR(core)) + return PTR_ERR(core); + brcmf_chip_sb_corerev(ci, core); core = brcmf_chip_add_core(ci, BCMA_CORE_ARM_CM3, BCM4329_CORE_ARM_BASE, 0); + if (IS_ERR(core)) + return PTR_ERR(core); + brcmf_chip_sb_corerev(ci, core); core = brcmf_chip_add_core(ci, BCMA_CORE_80211, 0x18001000, 0); + if (IS_ERR(core)) + return PTR_ERR(core); + brcmf_chip_sb_corerev(ci, core); } else if (socitype == SOCI_AI) { ci->iscoreup = brcmf_chip_ai_iscoreup; From 084863593243c5dce0f2eef44e23de8c53ebf4a2 Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Thu, 19 Feb 2026 18:27:39 -0800 Subject: [PATCH 1205/1561] wifi: brcmfmac: of: defer probe for MAC address of_get_mac_address can return EPROBE_DEFER if the specific nvmem driver has not been loaded yet. Signed-off-by: Rosen Penev Acked-by: Arend van Spriel Link: https://patch.msgid.link/20260220022739.41755-1-rosenp@gmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c index 1681ad00f82e..03efae36a0b2 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c @@ -128,7 +128,9 @@ int brcmf_of_probe(struct device *dev, enum brcmf_bus_type bus_type, if (err) brcmf_err("failed to get OF country code map (err=%d)\n", err); - of_get_mac_address(np, settings->mac); + err = of_get_mac_address(np, settings->mac); + if (err == -EPROBE_DEFER) + return err; if (bus_type != BRCMF_BUSTYPE_SDIO) return 0; From 49152949deeabd24db87f284f993ab89d4b8ac38 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Thu, 26 Mar 2026 12:31:58 +0000 Subject: [PATCH 1206/1561] wifi: iwlegacy: Fixup allocation failure log Fix 2 issues spotted by AI[0]: 1. Missing space after the full stop. 2. Wrong GFP flags are printed. And also switch to %pGg for the GFP flags. This produces nice readable output and decouples the format string from the size of gfp_t. [0] https://sashiko.dev/#/patchset/20260319-gfp64-v1-0-2c73b8d42b7f%40google.com Signed-off-by: Brendan Jackman Acked-by: Stanislaw Gruszka Link: https://patch.msgid.link/20260326-gfp64-v2-2-d916021cecdf@google.com [add missing wifi: prefix] Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlegacy/3945-mac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c index c148654aa953..88b31e0b9568 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c @@ -1002,9 +1002,9 @@ il3945_rx_allocate(struct il_priv *il, gfp_t priority) D_INFO("Failed to allocate SKB buffer.\n"); if (rxq->free_count <= RX_LOW_WATERMARK && net_ratelimit()) - IL_ERR("Failed to allocate SKB buffer with %0x." + IL_ERR("Failed to allocate SKB buffer with %pGg. " "Only %u free buffers remaining.\n", - priority, rxq->free_count); + &gfp_mask, rxq->free_count); /* We don't reschedule replenish work here -- we will * call the restock method and if it still needs * more buffers it will schedule replenish */ From d278bf868604af9895f4b8b4f7f4bb82e317740c Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Thu, 26 Mar 2026 20:06:16 -0700 Subject: [PATCH 1207/1561] wifi: wilc1000: use kzalloc_flex Because key is a flexible array member, kzalloc_flex can be used to handle the math properly and simplify the code slightly. Signed-off-by: Rosen Penev Link: https://patch.msgid.link/20260327030616.8774-1-rosenp@gmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/microchip/wilc1000/hif.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index 944b2a812b63..009c4770a6f9 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -1123,7 +1123,7 @@ int wilc_add_ptk(struct wilc_vif *vif, const u8 *ptk, u8 ptk_key_len, wid_list[0].size = sizeof(char); wid_list[0].val = (s8 *)&cipher_mode; - key_buf = kzalloc(sizeof(*key_buf) + t_key_len, GFP_KERNEL); + key_buf = kzalloc_flex(*key_buf, key, t_key_len); if (!key_buf) return -ENOMEM; @@ -1151,7 +1151,7 @@ int wilc_add_ptk(struct wilc_vif *vif, const u8 *ptk, u8 ptk_key_len, struct wid wid; struct wilc_sta_wpa_ptk *key_buf; - key_buf = kzalloc(sizeof(*key_buf) + t_key_len, GFP_KERNEL); + key_buf = kzalloc_flex(*key_buf, key, t_key_len); if (!key_buf) return -ENOMEM; @@ -1186,7 +1186,7 @@ int wilc_add_igtk(struct wilc_vif *vif, const u8 *igtk, u8 igtk_key_len, struct wid wid; struct wilc_wpa_igtk *key_buf; - key_buf = kzalloc(sizeof(*key_buf) + t_key_len, GFP_KERNEL); + key_buf = kzalloc_flex(*key_buf, key, t_key_len); if (!key_buf) return -ENOMEM; @@ -1217,7 +1217,7 @@ int wilc_add_rx_gtk(struct wilc_vif *vif, const u8 *rx_gtk, u8 gtk_key_len, struct wilc_gtk_key *gtk_key; int t_key_len = gtk_key_len + WILC_RX_MIC_KEY_LEN + WILC_TX_MIC_KEY_LEN; - gtk_key = kzalloc(sizeof(*gtk_key) + t_key_len, GFP_KERNEL); + gtk_key = kzalloc_flex(*gtk_key, key, t_key_len); if (!gtk_key) return -ENOMEM; From 72b18625ba8e5c28acbc923822a8b9ca7c5b3931 Mon Sep 17 00:00:00 2001 From: Ronald Claveau Date: Fri, 27 Mar 2026 10:36:26 +0100 Subject: [PATCH 1208/1561] dt-bindings: net: wireless: brcm: Add compatible for bcm43752 Add bcm43752 compatible with its bcm4329 compatible fallback. Acked-by: Conor Dooley Signed-off-by: Ronald Claveau Acked-by: Arend van Spriel Link: https://patch.msgid.link/20260327-add-bcm43752-compatible-v2-1-5b28e6637101@aliel.fr Signed-off-by: Johannes Berg --- .../devicetree/bindings/net/wireless/brcm,bcm4329-fmac.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/wireless/brcm,bcm4329-fmac.yaml b/Documentation/devicetree/bindings/net/wireless/brcm,bcm4329-fmac.yaml index 3be757678764..81fd3e37452a 100644 --- a/Documentation/devicetree/bindings/net/wireless/brcm,bcm4329-fmac.yaml +++ b/Documentation/devicetree/bindings/net/wireless/brcm,bcm4329-fmac.yaml @@ -42,6 +42,7 @@ properties: - brcm,bcm4356-fmac - brcm,bcm4359-fmac - brcm,bcm4366-fmac + - brcm,bcm43752-fmac - cypress,cyw4373-fmac - cypress,cyw43012-fmac - infineon,cyw43439-fmac From 1c161ca67e9bbd39b5c2adc8e067affcab10e8a5 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Fri, 27 Mar 2026 12:30:07 +0000 Subject: [PATCH 1209/1561] wifi: iwlegacy: Fix GFP flags in allocation loop Do not latch these flags, they should be re-evaluated for each iteration of the loop. Concretely, rxq->free_count is incremented during the loop so the __GFP_NOWARN decision may be stale. There may be other reasons to require the re-evaluation too. Suggested-by: Stanislaw Gruszka Link: https://lore.kernel.org/all/20260327115739.GB16800@wp.pl/ Signed-off-by: Brendan Jackman Acked-by: Stanislaw Gruszka Link: https://patch.msgid.link/20260327-iwlegacy-gfp-fix-v1-1-b83e4db0bd66@google.com Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlegacy/3945-mac.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c index 88b31e0b9568..cbaf250626c5 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c @@ -979,9 +979,10 @@ il3945_rx_allocate(struct il_priv *il, gfp_t priority) struct page *page; dma_addr_t page_dma; unsigned long flags; - gfp_t gfp_mask = priority; while (1) { + gfp_t gfp_mask = priority; + spin_lock_irqsave(&rxq->lock, flags); if (list_empty(&rxq->rx_used)) { spin_unlock_irqrestore(&rxq->lock, flags); From 368f5098ed0b2eb5d06dbbe692c163c85f240a4d Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Sat, 28 Mar 2026 15:01:21 +0100 Subject: [PATCH 1210/1561] wifi: brcmfmac: silence warning for non-existent, optional firmware The driver tries to load optional firmware files, specific to the actual board compatible. These might not exist resulting in a warning like this: brcmfmac mmc2:0001:1: Direct firmware load for brcm/brcmfmac4373-sdio.tq,imx93-tqma9352-mba93xxla-mini.bin failed with error -2 Silence this by using firmware_request_nowait_nowarn() for all firmware loads which use brcmf_fw_request_done_alt_path() as callback. This one handles optional firmware files. Signed-off-by: Alexander Stein Tested-by: Christian Hewitt [arend: use nowarn api for optional firmware files] Signed-off-by: Arend van Spriel Link: https://patch.msgid.link/20260328140121.2583606-1-arend.vanspriel@broadcom.com [clean up code a bit] Signed-off-by: Johannes Berg --- .../broadcom/brcm80211/brcmfmac/firmware.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c index 4bacd83db052..22ff326f1924 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c @@ -670,6 +670,9 @@ static int brcmf_fw_request_firmware(const struct firmware **fw, } fallback: + if (cur->flags & BRCMF_FW_REQF_OPTIONAL) + return firmware_request_nowarn(fw, cur->path, fwctx->dev); + return request_firmware(fw, cur->path, fwctx->dev); } @@ -714,9 +717,10 @@ static void brcmf_fw_request_done_alt_path(const struct firmware *fw, void *ctx) if (!alt_path) goto fallback; - ret = request_firmware_nowait(THIS_MODULE, true, alt_path, - fwctx->dev, GFP_KERNEL, fwctx, - brcmf_fw_request_done_alt_path); + ret = firmware_request_nowait_nowarn(THIS_MODULE, + alt_path, fwctx->dev, + GFP_KERNEL, fwctx, + brcmf_fw_request_done_alt_path); kfree(alt_path); if (ret < 0) @@ -779,9 +783,10 @@ int brcmf_fw_get_firmwares(struct device *dev, struct brcmf_fw_request *req, fwctx->req->board_types[0]); if (alt_path) { fwctx->board_index++; - ret = request_firmware_nowait(THIS_MODULE, true, alt_path, - fwctx->dev, GFP_KERNEL, fwctx, - brcmf_fw_request_done_alt_path); + ret = firmware_request_nowait_nowarn(THIS_MODULE, + alt_path, fwctx->dev, + GFP_KERNEL, fwctx, + brcmf_fw_request_done_alt_path); kfree(alt_path); } else { ret = request_firmware_nowait(THIS_MODULE, true, first->path, From c4ed2c3f4f5e34156e607f2ea92cefad638af50e Mon Sep 17 00:00:00 2001 From: Jiajia Liu Date: Tue, 7 Apr 2026 14:32:05 +0800 Subject: [PATCH 1211/1561] wifi: mac80211: remove unused variables in minstrel_ht_alloc_sta Remove the unused variable max_rates and related code. Also remove the variable mi and pass type to kzalloc_obj instead. Signed-off-by: Jiajia Liu Link: https://patch.msgid.link/20260407063205.68471-1-liujiajia@kylinos.cn Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_ht.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 62745ca00e06..b73ef3adfcc5 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -1849,20 +1849,7 @@ minstrel_ht_rate_update(void *priv, struct ieee80211_supported_band *sband, static void * minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) { - struct ieee80211_supported_band *sband; - struct minstrel_ht_sta *mi; - struct minstrel_priv *mp = priv; - struct ieee80211_hw *hw = mp->hw; - int max_rates = 0; - int i; - - for (i = 0; i < NUM_NL80211_BANDS; i++) { - sband = hw->wiphy->bands[i]; - if (sband && sband->n_bitrates > max_rates) - max_rates = sband->n_bitrates; - } - - return kzalloc_obj(*mi, gfp); + return kzalloc_obj(struct minstrel_ht_sta, gfp); } static void From b5b8e295973083abf823fb66647a7c702a8db8a7 Mon Sep 17 00:00:00 2001 From: Nicolas Escande Date: Fri, 27 Mar 2026 11:02:56 +0100 Subject: [PATCH 1212/1561] wifi: mac80211: handle VHT EXT NSS in ieee80211_determine_our_sta_mode() A station which has a NSS ratio on the number of streams it is capable of in 160MHz VHT operation is supposed to use the 'Extended NSS BW Support' as defined by section '9.4.2.156.2 VHT Capabilities Information field'. This was missing in ieee80211_determine_our_sta_mode() and so we would wrongfully downgrade our bandwidth when connecting to an AP that supported 160MHz with messages such as: [ 37.638346] wlan1: AP XX:XX:XX:XX:XX:XX changed bandwidth in assoc response, new used config is 5280.000 MHz, width 3 (5290.000/0 MHz) Fixes: 310c8387c638 ("wifi: mac80211: clean up connection process") Signed-off-by: Nicolas Escande Link: https://patch.msgid.link/20260327100256.3101348-1-nico.escande@gmail.com Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 7fc5616cb244..285caaa2997e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -6085,7 +6085,8 @@ ieee80211_determine_our_sta_mode(struct ieee80211_sub_if_data *sdata, if (is_5ghz && !(vht_cap.cap & (IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ | - IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ))) { + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ | + IEEE80211_VHT_CAP_EXT_NSS_BW_MASK))) { conn->bw_limit = IEEE80211_CONN_BW_LIMIT_80; mlme_link_id_dbg(sdata, link_id, "no VHT 160 MHz capability on 5 GHz, limiting to 80 MHz"); From 469d5d5a3b7a133837004a18f34c899625fd5941 Mon Sep 17 00:00:00 2001 From: Tamizh Chelvam Raja Date: Thu, 26 Mar 2026 22:17:21 +0530 Subject: [PATCH 1213/1561] wifi: mac80211: synchronize valid links for WDS AP_VLAN interfaces The current code does not provide any link-configuration support for 4-address mode WDS AP_VLAN interfaces in MLO setups, preventing MLD stations from being added correctly. Add the required handling to enable proper integration of 4-address WDS stations into an MLO environment. When a 4-address station associates with an MLO AP, compute the intersection of valid links between the master AP interface and the station's advertised capabilities. Configure the AP_VLAN interface with only these common links to ensure correct data-path operation. This update ensures AP_VLAN interfaces correctly track link-state transitions and maintain consistent addressing across all active MLO links. Co-developed-by: Muna Sinada Signed-off-by: Muna Sinada Signed-off-by: Tamizh Chelvam Raja Link: https://patch.msgid.link/20260326164723.553927-2-tamizh.raja@oss.qualcomm.com Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 67 +++++++++++++++++++++++++++++++++++++++++---- net/mac80211/chan.c | 8 ++++++ net/mac80211/link.c | 45 ++++++++++++++++++++---------- 3 files changed, 101 insertions(+), 19 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 5a908eaddbf2..6a5bf403fd85 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2527,6 +2527,65 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct wireless_dev *wdev, return 0; } +static int ieee80211_set_sta_4addr(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct sta_info *sta) +{ + struct ieee80211_vif *vif = &sdata->vif; + struct wiphy *wiphy = local->hw.wiphy; + struct ieee80211_sub_if_data *master; + struct ieee80211_bss_conf *link_conf; + struct wireless_dev *wdev; + unsigned long master_iter; + int link_id; + int err; + + lockdep_assert_wiphy(local->hw.wiphy); + + if (sdata->u.vlan.sta) + return -EBUSY; + + wdev = &sdata->wdev; + master = container_of(sdata->bss, + struct ieee80211_sub_if_data, + u.ap); + + if (sta->sta.valid_links) { + u16 sta_links = sta->sta.valid_links; + u16 new_links = master->vif.valid_links & sta_links; + u16 orig_links = wdev->valid_links; + + wdev->valid_links = new_links; + + err = ieee80211_vif_set_links(sdata, new_links, 0); + if (err) { + wdev->valid_links = orig_links; + return err; + } + + master_iter = master->vif.valid_links; + + for_each_set_bit(link_id, &master_iter, + IEEE80211_MLD_MAX_NUM_LINKS) { + if (!(sta_links & BIT(link_id))) { + eth_zero_addr(wdev->links[link_id].addr); + } else { + link_conf = wiphy_dereference(wiphy, + vif->link_conf[link_id]); + + ether_addr_copy(wdev->links[link_id].addr, + link_conf->bssid); + } + } + } + + rcu_assign_pointer(sdata->u.vlan.sta, sta); + __ieee80211_check_fast_rx_iface(sdata); + drv_sta_set_4addr(local, sta->sdata, &sta->sta, true); + + return 0; +} + static int ieee80211_change_station(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac, struct station_parameters *params) @@ -2589,12 +2648,10 @@ static int ieee80211_change_station(struct wiphy *wiphy, vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); if (params->vlan->ieee80211_ptr->use_4addr) { - if (vlansdata->u.vlan.sta) - return -EBUSY; + err = ieee80211_set_sta_4addr(local, vlansdata, sta); + if (err) + return err; - rcu_assign_pointer(vlansdata->u.vlan.sta, sta); - __ieee80211_check_fast_rx_iface(vlansdata); - drv_sta_set_4addr(local, sta->sdata, &sta->sta, true); } if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 50a06de928ba..fda692316f08 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1321,6 +1321,10 @@ __ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link, list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { struct ieee80211_bss_conf *vlan_conf; + if (vlan->vif.valid_links && + !(vlan->vif.valid_links & BIT(link_id))) + continue; + vlan_conf = wiphy_dereference(local->hw.wiphy, vlan->vif.link_conf[link_id]); if (WARN_ON(!vlan_conf)) @@ -1564,6 +1568,10 @@ ieee80211_link_update_chanreq(struct ieee80211_link_data *link, list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { struct ieee80211_bss_conf *vlan_conf; + if (vlan->vif.valid_links && + !(vlan->vif.valid_links & BIT(link_id))) + continue; + vlan_conf = wiphy_dereference(sdata->local->hw.wiphy, vlan->vif.link_conf[link_id]); if (WARN_ON(!vlan_conf)) diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 03bfca27d205..93e290dd783f 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -14,27 +14,39 @@ static void ieee80211_update_apvlan_links(struct ieee80211_sub_if_data *sdata) { + unsigned long rem = ~sdata->vif.valid_links & + GENMASK(IEEE80211_MLD_MAX_NUM_LINKS - 1, 0); + struct ieee80211_local *local = sdata->local; + unsigned long add = sdata->vif.valid_links; + struct wiphy *wiphy = local->hw.wiphy; struct ieee80211_sub_if_data *vlan; struct ieee80211_link_data *link; - u16 ap_bss_links = sdata->vif.valid_links; - u16 new_links, vlan_links; - unsigned long add; + struct sta_info *sta; list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { int link_id; - /* No support for 4addr with MLO yet */ - if (vlan->wdev.use_4addr) - return; + if (vlan->wdev.use_4addr) { + sta = wiphy_dereference(wiphy, + vlan->u.vlan.sta); + if (sta) + add = add & sta->sta.valid_links; + } - vlan_links = vlan->vif.valid_links; - - new_links = ap_bss_links; - - add = new_links & ~vlan_links; - if (!add) + if (add == vlan->vif.valid_links) continue; + for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) { + vlan->wdev.valid_links |= BIT(link_id); + ether_addr_copy(vlan->wdev.links[link_id].addr, + sdata->wdev.links[link_id].addr); + } + + for_each_set_bit(link_id, &rem, IEEE80211_MLD_MAX_NUM_LINKS) { + vlan->wdev.valid_links &= ~BIT(link_id); + eth_zero_addr(vlan->wdev.links[link_id].addr); + } + ieee80211_vif_set_links(vlan, add, 0); for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) { @@ -96,8 +108,13 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, ap_bss = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); - ap_bss_conf = sdata_dereference(ap_bss->vif.link_conf[link_id], - ap_bss); + + if (deflink) + ap_bss_conf = &ap_bss->vif.bss_conf; + else + ap_bss_conf = sdata_dereference(ap_bss->vif.link_conf[link_id], + ap_bss); + memcpy(link_conf, ap_bss_conf, sizeof(*link_conf)); } From 594be50a3f0a6b7389f40f7acbf0dd731beb5204 Mon Sep 17 00:00:00 2001 From: Tamizh Chelvam Raja Date: Thu, 26 Mar 2026 22:17:22 +0530 Subject: [PATCH 1214/1561] wifi: mac80211: use ap_addr for 4-address NULL frame destination Currently ieee80211_send_4addr_nullfunc() uses deflink.u.mgd.bssid for addr1 and addr3 fields. In MLO configurations, deflink.u.mgd.bssid represents link 0's BSSID and is not updated when link 0 is not an assoc link. This causes 4-address NULL frames to be sent to the wrong address, preventing WDS AP_VLAN interface creation on the peer AP. To fix this use sdata->vif.cfg.ap_addr instead, which contains the AP's MLD address populated during authentication/association and remains valid regardless of which links are active. This ensures 4-address NULL frames reach the correct AP, allowing proper WDS operation over MLO connections. Co-developed-by: Sathishkumar Muruganandam Signed-off-by: Sathishkumar Muruganandam Signed-off-by: Tamizh Chelvam Raja Link: https://patch.msgid.link/20260326164723.553927-3-tamizh.raja@oss.qualcomm.com Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 285caaa2997e..4a34547b39d5 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2514,9 +2514,9 @@ void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local, fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); nullfunc->frame_control = fc; - memcpy(nullfunc->addr1, sdata->deflink.u.mgd.bssid, ETH_ALEN); + memcpy(nullfunc->addr1, sdata->vif.cfg.ap_addr, ETH_ALEN); memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN); - memcpy(nullfunc->addr3, sdata->deflink.u.mgd.bssid, ETH_ALEN); + memcpy(nullfunc->addr3, sdata->vif.cfg.ap_addr, ETH_ALEN); memcpy(nullfunc->addr4, sdata->vif.addr, ETH_ALEN); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; From 915c1d23e2e3a94f432bda6fb64f47c06f840ca1 Mon Sep 17 00:00:00 2001 From: Tamizh Chelvam Raja Date: Thu, 26 Mar 2026 22:17:23 +0530 Subject: [PATCH 1215/1561] wifi: mac80211: enable MLO support for 4-address mode interfaces The current code does not support establishing MLO connections for interfaces operating in 4-address AP_VLAN mode. MLO bringup is blocked by sanity checks in cfg.c, iface.c, and mlme.c, which prevent MLD initialization when use_4addr is enabled. Remove these restrictions to allow 4-address AP_VLAN interfaces to initialize as part of an MLD and successfully participate in MLO connections. This patch series also adds the necessary changes to support WDS operation in MLO, making these modifications valid. Allow 4-address mode interfaces to: - Proceed with MLD initialization during interface setup - Add MLO links dynamically via ieee80211_add_intf_link() - Establish associations with MLO-capable access points - Support AP_VLAN interfaces with MLO parent APs Signed-off-by: Tamizh Chelvam Raja Link: https://patch.msgid.link/20260326164723.553927-4-tamizh.raja@oss.qualcomm.com Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 7 ------- net/mac80211/iface.c | 7 ------- net/mac80211/mlme.c | 4 ---- 3 files changed, 18 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 6a5bf403fd85..7b77d57c9f96 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -281,10 +281,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy, if (params->use_4addr == ifmgd->use_4addr) return 0; - /* FIXME: no support for 4-addr MLO yet */ - if (ieee80211_vif_is_mld(&sdata->vif)) - return -EOPNOTSUPP; - sdata->u.mgd.use_4addr = params->use_4addr; if (!ifmgd->associated) return 0; @@ -5585,9 +5581,6 @@ static int ieee80211_add_intf_link(struct wiphy *wiphy, lockdep_assert_wiphy(sdata->local->hw.wiphy); - if (wdev->use_4addr) - return -EOPNOTSUPP; - return ieee80211_vif_set_links(sdata, wdev->valid_links, 0); } diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index f1ab85ff326d..95b779c4d627 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -409,13 +409,6 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, nsdata->vif.type)) return -ENOTUNIQ; - /* No support for VLAN with MLO yet */ - if (iftype == NL80211_IFTYPE_AP_VLAN && - sdata->wdev.use_4addr && - nsdata->vif.type == NL80211_IFTYPE_AP && - nsdata->vif.valid_links) - return -EOPNOTSUPP; - /* * can only add VLANs to enabled APs */ diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 4a34547b39d5..160ae65a5c64 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -9859,10 +9859,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) size += req->links[i].elems_len; - /* FIXME: no support for 4-addr MLO yet */ - if (sdata->u.mgd.use_4addr && req->link_id >= 0) - return -EOPNOTSUPP; - assoc_data = kzalloc(size, GFP_KERNEL); if (!assoc_data) return -ENOMEM; From 2ce8a41113eda1adddc1e6dc43cf89383ec6dc22 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Fri, 3 Apr 2026 14:39:29 +0200 Subject: [PATCH 1216/1561] net: hsr: emit notification for PRP slave2 changed hw addr on port deletion On PRP protocol, when deleting the port the MAC address change notification was missing. In addition to that, make sure to only perform the MAC address change on slave2 deletion and PRP protocol as the operation isn't necessary for HSR nor slave1. Note that the eth_hw_addr_set() is correct on PRP context as the slaves are either in promiscuous mode or forward offload enabled. Reported-by: Luka Gejak Closes: https://lore.kernel.org/netdev/DHFCZEM93FTT.1RWFBIE32K7OT@linux.dev/ Signed-off-by: Fernando Fernandez Mancera Reviewed-by: Felix Maurer Link: https://patch.msgid.link/20260403123928.4249-2-fmancera@suse.de Signed-off-by: Paolo Abeni --- net/hsr/hsr_slave.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index 44f83c8c56a7..d9af9e65f72f 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -243,7 +243,11 @@ void hsr_del_port(struct hsr_port *port) if (!port->hsr->fwd_offloaded) dev_set_promiscuity(port->dev, -1); netdev_upper_dev_unlink(port->dev, master->dev); - eth_hw_addr_set(port->dev, port->original_macaddress); + if (hsr->prot_version == PRP_V1 && + port->type == HSR_PT_SLAVE_B) { + eth_hw_addr_set(port->dev, port->original_macaddress); + call_netdevice_notifiers(NETDEV_CHANGEADDR, port->dev); + } } kfree_rcu(port, rcu); From ba563287beaa99c18144b2e39f63b89412abfd18 Mon Sep 17 00:00:00 2001 From: Roopni Devanathan Date: Tue, 31 Mar 2026 10:28:34 +0530 Subject: [PATCH 1217/1561] wifi: ath12k: Rename hw_link_id to radio_idx in ath12k_ah_to_ar() ath12k_ah_to_ar() is returning radio from the given hardware based on the radio index passed. But, the variable that radio index is received at is wrongly named 'hw_link_id', which points to the hardware link index that comes from the firmware. This affects readability. Resolve this by renaming 'hw_link_id' to 'radio_idx'. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Signed-off-by: Roopni Devanathan Reviewed-by: Baochen Qiang Reviewed-by: Rameshkumar Sundaram Link: https://patch.msgid.link/20260331045834.1181924-1-roopni.devanathan@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 59c193b24764..5eff86827e9c 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -1366,13 +1366,13 @@ static inline struct ath12k_hw *ath12k_hw_to_ah(struct ieee80211_hw *hw) return hw->priv; } -static inline struct ath12k *ath12k_ah_to_ar(struct ath12k_hw *ah, u8 hw_link_id) +static inline struct ath12k *ath12k_ah_to_ar(struct ath12k_hw *ah, u8 radio_idx) { - if (WARN(hw_link_id >= ah->num_radio, - "bad hw link id %d, so switch to default link\n", hw_link_id)) - hw_link_id = 0; + if (WARN(radio_idx >= ah->num_radio, + "bad radio index %d, use default radio\n", radio_idx)) + radio_idx = 0; - return &ah->radio[hw_link_id]; + return &ah->radio[radio_idx]; } static inline struct ath12k_hw *ath12k_ar_to_ah(struct ath12k *ar) From af5708ed67fc562bc45fafbd0f95789c464c0105 Mon Sep 17 00:00:00 2001 From: Harish Rachakonda Date: Thu, 26 Mar 2026 10:36:41 +0530 Subject: [PATCH 1218/1561] wifi: ath12k: Support channel change stats Add support to request channel change stats from the firmware through HTT stats type 76. These stats give channel switch details like the channel that the radio changed to, its center frequency, time taken for the switch, chainmask details, etc. Sample output: echo 76 > /sys/kernel/debug/ath12k/pci-0000\:06\:00.0/mac0/htt_stats_type cat /sys/kernel/debug/ath12k/pci-0000\:06\:00.0/mac0/htt_stats Channel Change Timings: |PRIMARY CHANNEL FREQ|BANDWIDTH CENTER FREQ|PHYMODE|TX_CHAINMASK|RX_CHAINMASK|SWITCH TIME(us)|INI(us)|TPC+CTL(us)|CAL(us)|MISC(us)|CTL(us)|SW PROFILE| | 5200| 5200| 24| 15| 15| 448850| 2410| 10546| 434593| 1071| 1100| 4| | 5240| 5240| 24| 15| 15| 450730| 4106| 10524| 434528| 1306| 1150| 4| | 5180| 5210| 26| 15| 15| 467894| 4764| 10438| 451101| 1337| 1508| 4| | 5200| 5200| 0| 15| 15| 13838| 2692| 1736| 8558| 686| 802| 6| | 5180| 5180| 0| 15| 15| 13465| 3207| 855| 8579| 578| 760| 6| | 5200| 5200| 24| 15| 15| 570321| 2441| 10439| 555661| 1574| 949| 4| Note: QCC2072 and WCN7850 firmware does not support HTT stats type 76. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.6-01181-QCAHKSWPL_SILICONZ-1 Signed-off-by: Harish Rachakonda Signed-off-by: Roopni Devanathan Reviewed-by: Rameshkumar Sundaram Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20260326050641.3066562-1-roopni.devanathan@oss.qualcomm.com Signed-off-by: Jeff Johnson --- .../wireless/ath/ath12k/debugfs_htt_stats.c | 72 +++++++++++++++++++ .../wireless/ath/ath12k/debugfs_htt_stats.h | 26 +++++++ 2 files changed, 98 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c index 7f6ca07fb335..b772181a496e 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c +++ b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c @@ -5722,6 +5722,75 @@ ath12k_htt_print_tx_hwq_stats_cmn_tlv(const void *tag_buf, u16 tag_len, stats_req->buf_len = len; } +static void +ath12k_htt_print_chan_switch_stats_tlv(const void *tag_buf, u16 tag_len, + struct debug_htt_stats_req *stats_req) +{ + const struct ath12k_htt_chan_switch_stats_tlv *sbuf = tag_buf; + u32 buf_len = ATH12K_HTT_STATS_BUF_SIZE; + u32 switch_freq, switch_profile; + u32 len = stats_req->buf_len; + u8 *buf = stats_req->buf; + u8 i; + + if (tag_len < sizeof(*sbuf)) + return; + + i = min(le32_to_cpu(sbuf->switch_count), ATH12K_HTT_CHAN_SWITCH_STATS_BUF_LEN); + if (!i) + return; + + len += scnprintf(buf + len, buf_len - len, "Channel Change Timings:\n"); + len += scnprintf(buf + len, buf_len - len, + "|%-20s|%-21s|%-7s|%-12s|%-12s|%-15s|", + "PRIMARY CHANNEL FREQ", "BANDWIDTH CENTER FREQ", "PHYMODE", + "TX_CHAINMASK", "RX_CHAINMASK", "SWITCH TIME(us)"); + len += scnprintf(buf + len, buf_len - len, + "%-7s|%-11s|%-7s|%-8s|%-7s|%-10s|\n", + "INI(us)", "TPC+CTL(us)", "CAL(us)", "MISC(us)", "CTL(us)", + "SW PROFILE"); + + /* + * sbuf->switch_count has the number of successful channel changes. The firmware + * sends the record of channel change in such a way that sbuf->chan_stats[0] will + * point to the channel change that occurred first and the recent channel change + * records will be stored in sbuf->chan_stats[9]. As and when new channel change + * occurs, sbuf->chan_stats[0] will be replaced by records from the next index, + * sbuf->chan_stats[1]. While printing the records, reverse chronological order + * is followed, i.e., the most recent channel change records are printed first + * and the oldest one, last. + */ + while (i--) { + switch_freq = le32_to_cpu(sbuf->chan_stats[i].chan_switch_freq); + switch_profile = le32_to_cpu(sbuf->chan_stats[i].chan_switch_profile); + + len += scnprintf(buf + len, buf_len - len, + "|%20u|%21u|%7u|%12u|%12u|%15u|", + u32_get_bits(switch_freq, + ATH12K_HTT_STATS_CHAN_SWITCH_BW_MHZ), + u32_get_bits(switch_freq, + ATH12K_HTT_STATS_CHAN_SWITCH_BAND_FREQ), + u32_get_bits(switch_profile, + ATH12K_HTT_STATS_CHAN_SWITCH_PHY_MODE), + u32_get_bits(switch_profile, + ATH12K_HTT_STATS_CHAN_SWITCH_TX_CHAINMASK), + u32_get_bits(switch_profile, + ATH12K_HTT_STATS_CHAN_SWITCH_RX_CHAINMASK), + le32_to_cpu(sbuf->chan_stats[i].chan_switch_time)); + len += scnprintf(buf + len, buf_len - len, + "%7u|%11u|%7u|%8u|%7u|%10u|\n", + le32_to_cpu(sbuf->chan_stats[i].ini_module_time), + le32_to_cpu(sbuf->chan_stats[i].tpc_module_time), + le32_to_cpu(sbuf->chan_stats[i].cal_module_time), + le32_to_cpu(sbuf->chan_stats[i].misc_module_time), + le32_to_cpu(sbuf->chan_stats[i].ctl_module_time), + u32_get_bits(switch_profile, + ATH12K_HTT_STATS_CHAN_SWITCH_SW_PROFILE)); + } + + stats_req->buf_len = len; +} + static int ath12k_dbg_htt_ext_stats_parse(struct ath12k_base *ab, u16 tag, u16 len, const void *tag_buf, void *user_data) @@ -6024,6 +6093,9 @@ static int ath12k_dbg_htt_ext_stats_parse(struct ath12k_base *ab, case HTT_STATS_TX_HWQ_CMN_TAG: ath12k_htt_print_tx_hwq_stats_cmn_tlv(tag_buf, len, stats_req); break; + case HTT_STATS_CHAN_SWITCH_STATS_TAG: + ath12k_htt_print_chan_switch_stats_tlv(tag_buf, len, stats_req); + break; default: break; } diff --git a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h index bfabe6500d44..82ab7b9e4db9 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h +++ b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h @@ -164,6 +164,7 @@ enum ath12k_dbg_htt_ext_stats_type { ATH12K_DBG_HTT_PDEV_MLO_IPC_STATS = 64, ATH12K_DBG_HTT_EXT_PDEV_RTT_RESP_STATS = 65, ATH12K_DBG_HTT_EXT_PDEV_RTT_INITIATOR_STATS = 66, + ATH12K_DBG_HTT_EXT_CHAN_SWITCH_STATS = 76, /* keep this last */ ATH12K_DBG_HTT_NUM_EXT_STATS, @@ -267,6 +268,7 @@ enum ath12k_dbg_htt_tlv_tag { HTT_STATS_PDEV_RTT_HW_STATS_TAG = 196, HTT_STATS_PDEV_RTT_TBR_SELFGEN_QUEUED_STATS_TAG = 197, HTT_STATS_PDEV_RTT_TBR_CMD_RESULT_STATS_TAG = 198, + HTT_STATS_CHAN_SWITCH_STATS_TAG = 213, HTT_STATS_MAX_TAG, }; @@ -2156,4 +2158,28 @@ struct htt_tx_hwq_stats_cmn_tlv { __le32 txq_timeout; } __packed; +#define ATH12K_HTT_CHAN_SWITCH_STATS_BUF_LEN 10 + +#define ATH12K_HTT_STATS_CHAN_SWITCH_BW_MHZ GENMASK(15, 0) +#define ATH12K_HTT_STATS_CHAN_SWITCH_BAND_FREQ GENMASK(31, 16) +#define ATH12K_HTT_STATS_CHAN_SWITCH_PHY_MODE GENMASK(7, 0) +#define ATH12K_HTT_STATS_CHAN_SWITCH_TX_CHAINMASK GENMASK(15, 8) +#define ATH12K_HTT_STATS_CHAN_SWITCH_RX_CHAINMASK GENMASK(23, 16) +#define ATH12K_HTT_STATS_CHAN_SWITCH_SW_PROFILE GENMASK(31, 24) + +struct ath12k_htt_chan_switch_stats_tlv { + struct { + __le32 chan_switch_freq; + __le32 chan_switch_profile; + __le32 chan_switch_time; + __le32 cal_module_time; + __le32 ini_module_time; + __le32 tpc_module_time; + __le32 misc_module_time; + __le32 ctl_module_time; + __le32 reserved; + } chan_stats[ATH12K_HTT_CHAN_SWITCH_STATS_BUF_LEN]; + __le32 switch_count; /* shows how many channel changes have occurred */ +} __packed; + #endif From 7d7dc26f72abb7a76abb4a68ebad75d5ab7b375e Mon Sep 17 00:00:00 2001 From: Avula Sri Charan Date: Mon, 30 Mar 2026 09:37:32 +0530 Subject: [PATCH 1219/1561] wifi: ath12k: Skip adding inactive partner vdev info Currently, a vdev that is created is considered active for partner link population. In case of an MLD station, non-associated link vdevs can be created but not started. Yet, they are added as partner links. This leads to the creation of stale FW partner entries which accumulate and cause assertions. To resolve this issue, check if the vdev is started and operating on a chosen frequency, i.e., arvif->is_started, instead of checking if the vdev is created, i.e., arvif->is_created. This determines if the vdev is active or not and skips adding it as a partner link if it's inactive. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.6-01181-QCAHKSWPL_SILICONZ-1 Signed-off-by: Avula Sri Charan Signed-off-by: Roopni Devanathan Reviewed-by: Rameshkumar Sundaram Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20260330040732.1847263-1-roopni.devanathan@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 553ec28b6aaa..c1a1b220f4dd 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -11131,7 +11131,7 @@ ath12k_mac_mlo_get_vdev_args(struct ath12k_link_vif *arvif, if (arvif == arvif_p) continue; - if (!arvif_p->is_created) + if (!arvif_p->is_started) continue; link_conf = wiphy_dereference(ahvif->ah->hw->wiphy, From 0ec4b904be72f78ba6ce6bb9a8aaf2eb6b9b1004 Mon Sep 17 00:00:00 2001 From: Roopni Devanathan Date: Thu, 2 Apr 2026 10:44:02 +0530 Subject: [PATCH 1220/1561] wifi: ath12k: Create symlink for each radio in a wiphy In single-wiphy design, when more than one radio is registered as a single-wiphy in the mac80211 layer, the following warnings are seen: 1. debugfs: File 'ath12k' in directory 'phy0' already present! 2. debugfs: File 'simulate_fw_crash' in directory 'pci-0000:57:00.0' already present! debugfs: File 'device_dp_stats' in directory 'pci-01777777777777777777777:57:00.0' already present! When more than one radio is registered as a single-wiphy, symlinks for all the radios are created in the same debugfs directory: /sys/kernel/debug/ieee80211/phyX/ath12k, resulting in warning 1. When a symlink is created for the first radio, since the 'ath12k' directory is not present, it will be created and no warning will be thrown. But when symlink is created for more than one radio, since the 'ath12k' directory was already created for symlink for radio 1, a warning is thrown complaining that 'ath12k' directory is already present. To resolve warning 1, create symlink for each radio in separate debugfs directories. For the first radio, the symlink will always be the 'ath12k' directory. This ensures that the existing directory structure is retained for single-wiphy and multi-wiphy architectures. In single-wiphy architecture with multiple radios, create symlink in separate debugfs directories introduced by mac80211. Existing debugfs directory in single-wiphy architecture: /sys/kernel/debug/ieee80211/phyX/ath12k is a symlink to /sys/kernel/debug/ath12k/pci-0001:01:00.0/macY Proposed debugfs directory in single-wiphy architecture with one radio: /sys/kernel/debug/ieee80211/phyX/ath12k is a symlink to /sys/kernel/debug/ath12k/pci-0001:01:00.0/mac0 Proposed debugfs directory in single-wiphy architecture with more than one radio: /sys/kernel/debug/ieee80211/phyX/radio0/ath12k is a symlink to /sys/kernel/debug/ath12k/pci-0001:01:00.0/mac0 and /sys/kernel/debug/ieee80211/phyX/radioY/ath12k is a symlink to /sys/kernel/debug/ath12k/pci-0001:01:00.0/macY Where X is phy index and Y is radio index, seen in 'iw phyX info | grep Idx'. Two symlinks for the first radio are to ensure compatibility with the existing design. Add radio_idx inside ar, to track the radio index in probing order. API ath12k_debugfs_pdev_create() that creates SoC entries is called more than once when hardware group starts up, resulting in warning 2. To resolve this warning, remove all other calls to this API and add one inside the ath12k_core_pdev_create(). This API carries all pdev-specific initializations and can conveniently hold a call to ath12k_debugfs_pdev_create(). Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Co-developed-by: Harshitha Prem Signed-off-by: Harshitha Prem Signed-off-by: Roopni Devanathan Reviewed-by: Baochen Qiang Reviewed-by: Rameshkumar Sundaram Link: https://patch.msgid.link/20260402051402.3903795-1-roopni.devanathan@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.c | 4 ++-- drivers/net/wireless/ath/ath12k/core.h | 2 ++ drivers/net/wireless/ath/ath12k/debugfs.c | 29 +++++++++++++++++++---- drivers/net/wireless/ath/ath12k/mac.c | 2 +- 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index c31c47fb5a73..2519e2400d58 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -835,8 +835,6 @@ static int ath12k_core_soc_create(struct ath12k_base *ab) goto err_qmi_deinit; } - ath12k_debugfs_pdev_create(ab); - return 0; err_qmi_deinit: @@ -869,6 +867,8 @@ static int ath12k_core_pdev_create(struct ath12k_base *ab) goto err_dp_pdev_free; } + ath12k_debugfs_pdev_create(ab); + return 0; err_dp_pdev_free: diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 5eff86827e9c..46cef973db6f 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -588,6 +588,7 @@ struct ath12k_dbg_htt_stats { struct ath12k_debug { struct dentry *debugfs_pdev; struct dentry *debugfs_pdev_symlink; + struct dentry *debugfs_pdev_symlink_default; struct ath12k_dbg_htt_stats htt_stats; enum wmi_halphy_ctrl_path_stats_id tpc_stats_type; bool tpc_request; @@ -673,6 +674,7 @@ struct ath12k { u8 pdev_idx; u8 lmac_id; u8 hw_link_id; + u8 radio_idx; struct completion peer_assoc_done; struct completion peer_delete_done; diff --git a/drivers/net/wireless/ath/ath12k/debugfs.c b/drivers/net/wireless/ath/ath12k/debugfs.c index 358031fa14eb..8c81a1c22449 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs.c +++ b/drivers/net/wireless/ath/ath12k/debugfs.c @@ -1473,18 +1473,35 @@ void ath12k_debugfs_register(struct ath12k *ar) { struct ath12k_base *ab = ar->ab; struct ieee80211_hw *hw = ar->ah->hw; - char pdev_name[5]; + struct ath12k_hw *ah = ath12k_hw_to_ah(hw); + struct dentry *ath12k_fs; char buf[100] = {}; + char pdev_name[5]; scnprintf(pdev_name, sizeof(pdev_name), "%s%d", "mac", ar->pdev_idx); ar->debug.debugfs_pdev = debugfs_create_dir(pdev_name, ab->debugfs_soc); /* Create a symlink under ieee80211/phy* */ - scnprintf(buf, sizeof(buf), "../../ath12k/%pd2", ar->debug.debugfs_pdev); - ar->debug.debugfs_pdev_symlink = debugfs_create_symlink("ath12k", - hw->wiphy->debugfsdir, - buf); + if (ar->radio_idx == 0) { + scnprintf(buf, sizeof(buf), "../../ath12k/%pd2", + ar->debug.debugfs_pdev); + ath12k_fs = hw->wiphy->debugfsdir; + + /* symbolic link for compatibility */ + ar->debug.debugfs_pdev_symlink_default = debugfs_create_symlink("ath12k", + ath12k_fs, + buf); + } + + if (ah->num_radio > 1) { + scnprintf(buf, sizeof(buf), "../../../ath12k/%pd2", + ar->debug.debugfs_pdev); + ath12k_fs = hw->wiphy->radio_cfg[ar->radio_idx].radio_debugfsdir; + ar->debug.debugfs_pdev_symlink = debugfs_create_symlink("ath12k", + ath12k_fs, + buf); + } if (ar->mac.sbands[NL80211_BAND_5GHZ].channels) { debugfs_create_file("dfs_simulate_radar", 0200, @@ -1513,7 +1530,9 @@ void ath12k_debugfs_unregister(struct ath12k *ar) /* Remove symlink under ieee80211/phy* */ debugfs_remove(ar->debug.debugfs_pdev_symlink); + debugfs_remove(ar->debug.debugfs_pdev_symlink_default); debugfs_remove_recursive(ar->debug.debugfs_pdev); ar->debug.debugfs_pdev_symlink = NULL; + ar->debug.debugfs_pdev_symlink_default = NULL; ar->debug.debugfs_pdev = NULL; } diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index c1a1b220f4dd..fbdfe6424fd7 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -15065,6 +15065,7 @@ static struct ath12k_hw *ath12k_mac_hw_allocate(struct ath12k_hw_group *ag, ar->hw_link_id = pdev->hw_link_id; ar->pdev = pdev; ar->pdev_idx = pdev_idx; + ar->radio_idx = i; pdev->ar = ar; ag->hw_links[ar->hw_link_id].device_id = ab->device_id; @@ -15132,7 +15133,6 @@ int ath12k_mac_allocate(struct ath12k_hw_group *ag) if (!ab) continue; - ath12k_debugfs_pdev_create(ab); ath12k_mac_set_device_defaults(ab); total_radio += ab->num_radios; } From 9a34a59c6086ae731a06b3e61b0951feef758648 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Wed, 25 Mar 2026 11:05:01 +0800 Subject: [PATCH 1221/1561] wifi: ath10k: fix station lookup failure during disconnect Recent commit [1] moved station statistics collection to an earlier stage of the disconnect flow. With this change in place, ath10k fails to resolve the station entry when handling a peer stats event triggered during disconnect, resulting in log messages such as: wlp58s0: deauthenticating from 74:1a:e0:e7:b4:c8 by local choice (Reason: 3=DEAUTH_LEAVING) ath10k_pci 0000:3a:00.0: not found station for peer stats ath10k_pci 0000:3a:00.0: failed to parse stats info tlv: -22 The failure occurs because ath10k relies on ieee80211_find_sta_by_ifaddr() for station lookup. That function uses local->sta_hash, but by the time the peer stats request is triggered during disconnect, mac80211 has already removed the station from that hash table, leading to lookup failure. Before commit [1], this issue was not visible because the transition from IEEE80211_STA_NONE to IEEE80211_STA_NOTEXIST prevented ath10k from sending a peer stats request at all: ath10k_mac_sta_get_peer_stats_info() would fail early to find the peer and skip requesting statistics. Fix this by switching the lookup path to ath10k_peer_find(), which queries ath10k's internal peer table. At the point where the firmware emits the peer stats event, the peer entry is still present in the driver's list, ensuring lookup succeeds. Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00309-QCARMSWPZ-1 Fixes: a203dbeeca15 ("wifi: mac80211: collect station statistics earlier when disconnect") # [1] Reported-by: Paul Menzel Closes: https://lore.kernel.org/ath10k/57671b89-ec9f-4e6c-992c-45eb8e75929c@molgen.mpg.de Signed-off-by: Baochen Qiang Reviewed-by: Rameshkumar Sundaram Reviewed-by: Paul Menzel Tested-by: Paul Menzel Link: https://patch.msgid.link/20260325-ath10k-station-lookup-failure-v1-1-2e0c970f25d5@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath10k/wmi-tlv.c | 26 +++++++++++++---------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c index ec8e91707f84..01f2d1fa9d7d 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c @@ -3,7 +3,7 @@ * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved. - * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include "core.h" #include "debug.h" @@ -14,6 +14,7 @@ #include "wmi-tlv.h" #include "p2p.h" #include "testmode.h" +#include "txrx.h" #include /***************/ @@ -224,8 +225,9 @@ static int ath10k_wmi_tlv_parse_peer_stats_info(struct ath10k *ar, u16 tag, u16 const void *ptr, void *data) { const struct wmi_tlv_peer_stats_info *stat = ptr; - struct ieee80211_sta *sta; + u32 vdev_id = *(u32 *)data; struct ath10k_sta *arsta; + struct ath10k_peer *peer; if (tag != WMI_TLV_TAG_STRUCT_PEER_STATS_INFO) return -EPROTO; @@ -241,20 +243,20 @@ static int ath10k_wmi_tlv_parse_peer_stats_info(struct ath10k *ar, u16 tag, u16 __le32_to_cpu(stat->last_tx_rate_code), __le32_to_cpu(stat->last_tx_bitrate_kbps)); - rcu_read_lock(); - sta = ieee80211_find_sta_by_ifaddr(ar->hw, stat->peer_macaddr.addr, NULL); - if (!sta) { - rcu_read_unlock(); - ath10k_warn(ar, "not found station for peer stats\n"); + guard(spinlock_bh)(&ar->data_lock); + + peer = ath10k_peer_find(ar, vdev_id, stat->peer_macaddr.addr); + if (!peer || !peer->sta) { + ath10k_warn(ar, "not found %s with vdev id %u mac addr %pM for peer stats\n", + peer ? "sta" : "peer", vdev_id, stat->peer_macaddr.addr); return -EINVAL; } - arsta = (struct ath10k_sta *)sta->drv_priv; + arsta = (struct ath10k_sta *)peer->sta->drv_priv; arsta->rx_rate_code = __le32_to_cpu(stat->last_rx_rate_code); arsta->rx_bitrate_kbps = __le32_to_cpu(stat->last_rx_bitrate_kbps); arsta->tx_rate_code = __le32_to_cpu(stat->last_tx_rate_code); arsta->tx_bitrate_kbps = __le32_to_cpu(stat->last_tx_bitrate_kbps); - rcu_read_unlock(); return 0; } @@ -266,6 +268,7 @@ static int ath10k_wmi_tlv_op_pull_peer_stats_info(struct ath10k *ar, const struct wmi_tlv_peer_stats_info_ev *ev; const void *data; u32 num_peer_stats; + u32 vdev_id; int ret; tb = ath10k_wmi_tlv_parse_alloc(ar, skb->data, skb->len, GFP_ATOMIC); @@ -284,15 +287,16 @@ static int ath10k_wmi_tlv_op_pull_peer_stats_info(struct ath10k *ar, } num_peer_stats = __le32_to_cpu(ev->num_peers); + vdev_id = __le32_to_cpu(ev->vdev_id); ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi tlv peer stats info update peer vdev id %d peers %i more data %d\n", - __le32_to_cpu(ev->vdev_id), + vdev_id, num_peer_stats, __le32_to_cpu(ev->more_data)); ret = ath10k_wmi_tlv_iter(ar, data, ath10k_wmi_tlv_len(data), - ath10k_wmi_tlv_parse_peer_stats_info, NULL); + ath10k_wmi_tlv_parse_peer_stats_info, &vdev_id); if (ret) ath10k_warn(ar, "failed to parse stats info tlv: %d\n", ret); From 3ebaf730b5832319726e12ebe634a7679eaf2e9b Mon Sep 17 00:00:00 2001 From: Raj Kumar Bhagat Date: Tue, 7 Apr 2026 10:56:28 +0530 Subject: [PATCH 1222/1561] dt-bindings: net: wireless: add ath12k wifi device IPQ5424 Add the device-tree bindings for the ATH12K AHB wifi device IPQ5424. Signed-off-by: Raj Kumar Bhagat Acked-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20260407-ath12k-ipq5424-v5-1-8e96aa660ec4@oss.qualcomm.com Signed-off-by: Jeff Johnson --- .../devicetree/bindings/net/wireless/qcom,ipq5332-wifi.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ipq5332-wifi.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ipq5332-wifi.yaml index 363a0ecb6ad9..37d8a0da7780 100644 --- a/Documentation/devicetree/bindings/net/wireless/qcom,ipq5332-wifi.yaml +++ b/Documentation/devicetree/bindings/net/wireless/qcom,ipq5332-wifi.yaml @@ -17,6 +17,7 @@ properties: compatible: enum: - qcom,ipq5332-wifi + - qcom,ipq5424-wifi reg: maxItems: 1 From b1ad1a052beda2ac0400d6d4cc05dd2e549a6936 Mon Sep 17 00:00:00 2001 From: Saravanakumar Duraisamy Date: Tue, 7 Apr 2026 10:56:29 +0530 Subject: [PATCH 1223/1561] wifi: ath12k: Add ath12k_hw_params for IPQ5424 Add ath12k_hw_params for the ath12k AHB-based WiFi 7 device IPQ5424. The WiFi device IPQ5424 is similar to IPQ5332. Most of the hardware parameters like hw_ops, wmi_init, ring_mask, etc., are the same between IPQ5424 and IPQ5332, hence use these same parameters for IPQ5424. Some parameters are specific to IPQ5424; initially set these to 0 or NULL, and populate them in subsequent patches. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.6-01243-QCAHKSWPL_SILICONZ-1 Tested-on: IPQ5332 hw1.0 AHB WLAN.WBE.1.6-01275-QCAHKSWPL_SILICONZ-1 Tested-on: IPQ5424 hw1.0 AHB WLAN.WBE.1.6-01275-QCAHKSWPL_SILICONZ-1 Signed-off-by: Saravanakumar Duraisamy Signed-off-by: Raj Kumar Bhagat Reviewed-by: Baochen Qiang Reviewed-by: Rameshkumar Sundaram Link: https://patch.msgid.link/20260407-ath12k-ipq5424-v5-2-8e96aa660ec4@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.h | 1 + drivers/net/wireless/ath/ath12k/wifi7/hw.c | 79 ++++++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 46cef973db6f..8be435535a4e 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -157,6 +157,7 @@ enum ath12k_hw_rev { ATH12K_HW_WCN7850_HW20, ATH12K_HW_IPQ5332_HW10, ATH12K_HW_QCC2072_HW10, + ATH12K_HW_IPQ5424_HW10, }; enum ath12k_firmware_mode { diff --git a/drivers/net/wireless/ath/ath12k/wifi7/hw.c b/drivers/net/wireless/ath/ath12k/wifi7/hw.c index ec6dba96640b..2b5d1f7e9e04 100644 --- a/drivers/net/wireless/ath/ath12k/wifi7/hw.c +++ b/drivers/net/wireless/ath/ath12k/wifi7/hw.c @@ -753,6 +753,85 @@ static const struct ath12k_hw_params ath12k_wifi7_hw_params[] = { .dp_primary_link_only = false, }, + { + .name = "ipq5424 hw1.0", + .hw_rev = ATH12K_HW_IPQ5424_HW10, + .fw = { + .dir = "IPQ5424/hw1.0", + .board_size = 256 * 1024, + .cal_offset = 128 * 1024, + .m3_loader = ath12k_m3_fw_loader_remoteproc, + .download_aux_ucode = false, + }, + .max_radios = 1, + .single_pdev_only = false, + .qmi_service_ins_id = ATH12K_QMI_WLFW_SERVICE_INS_ID_V01_IPQ5332, + .internal_sleep_clock = false, + + .hw_ops = &qcn9274_ops, + .ring_mask = &ath12k_wifi7_hw_ring_mask_ipq5332, + + .host_ce_config = ath12k_wifi7_host_ce_config_ipq5332, + .ce_count = 12, + .target_ce_config = ath12k_wifi7_target_ce_config_wlan_ipq5332, + .target_ce_count = 12, + .svc_to_ce_map = + ath12k_wifi7_target_service_to_ce_map_wlan_ipq5332, + .svc_to_ce_map_len = 18, + + .rxdma1_enable = true, + .num_rxdma_per_pdev = 1, + .num_rxdma_dst_ring = 0, + .rx_mac_buf_ring = false, + .vdev_start_delay = false, + + .interface_modes = BIT(NL80211_IFTYPE_STATION) | + BIT(NL80211_IFTYPE_AP) | + BIT(NL80211_IFTYPE_MESH_POINT), + .supports_monitor = true, + + .idle_ps = false, + .download_calib = true, + .supports_suspend = false, + .tcl_ring_retry = true, + .reoq_lut_support = false, + .supports_shadow_regs = false, + + .num_tcl_banks = 48, + .max_tx_ring = 4, + + .mhi_config = NULL, + + .wmi_init = &ath12k_wifi7_wmi_init_qcn9274, + + .qmi_cnss_feature_bitmap = BIT(CNSS_QDSS_CFG_MISS_V01), + + .rfkill_pin = 0, + .rfkill_cfg = 0, + .rfkill_on_level = 0, + + .rddm_size = 0, + + .def_num_link = 0, + .max_mlo_peer = 256, + + .otp_board_id_register = 0, + + .supports_sta_ps = false, + + .acpi_guid = NULL, + .supports_dynamic_smps_6ghz = false, + .iova_mask = 0, + .supports_aspm = false, + + .ce_ie_addr = NULL, + .ce_remap = NULL, + .bdf_addr_offset = 0x940000, + + .current_cc_support = false, + + .dp_primary_link_only = true, + }, }; /* Note: called under rcu_read_lock() */ From 74f5a619b1a6a06cc5e6246d326da5b6f2b0fcbd Mon Sep 17 00:00:00 2001 From: Raj Kumar Bhagat Date: Tue, 7 Apr 2026 10:56:30 +0530 Subject: [PATCH 1224/1561] wifi: ath12k: add ath12k_hw_version_map entry for IPQ5424 Add a new ath12k_hw_version_map entry for the AHB based WiFi 7 device IPQ5424. Reuse most of the ath12k_hw_version_map fields such as hal_ops, hal_desc_sz, tcl_to_wbm_rbm_map, and hal_params from IPQ5332. The register addresses differ on IPQ5424, hence set hw_regs temporarily to NULL and populated it in a subsequent patch. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.6-01243-QCAHKSWPL_SILICONZ-1 Tested-on: IPQ5332 hw1.0 AHB WLAN.WBE.1.6-01275-QCAHKSWPL_SILICONZ-1 Tested-on: IPQ5424 hw1.0 AHB WLAN.WBE.1.6-01275-QCAHKSWPL_SILICONZ-1 Signed-off-by: Raj Kumar Bhagat Reviewed-by: Baochen Qiang Reviewed-by: Rameshkumar Sundaram Link: https://patch.msgid.link/20260407-ath12k-ipq5424-v5-3-8e96aa660ec4@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wifi7/hal.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/wifi7/hal.c b/drivers/net/wireless/ath/ath12k/wifi7/hal.c index bd1753ca0db6..c2cc99a83f09 100644 --- a/drivers/net/wireless/ath/ath12k/wifi7/hal.c +++ b/drivers/net/wireless/ath/ath12k/wifi7/hal.c @@ -50,6 +50,13 @@ static const struct ath12k_hw_version_map ath12k_wifi7_hw_ver_map[] = { .hal_params = &ath12k_hw_hal_params_wcn7850, .hw_regs = &qcc2072_regs, }, + [ATH12K_HW_IPQ5424_HW10] = { + .hal_ops = &hal_qcn9274_ops, + .hal_desc_sz = sizeof(struct hal_rx_desc_qcn9274_compact), + .tcl_to_wbm_rbm_map = ath12k_hal_tcl_to_wbm_rbm_map_qcn9274, + .hal_params = &ath12k_hw_hal_params_ipq5332, + .hw_regs = NULL, + }, }; int ath12k_wifi7_hal_init(struct ath12k_base *ab) From 7e2131ba332f5ae62b6302eb889feeeea56a1691 Mon Sep 17 00:00:00 2001 From: Saravanakumar Duraisamy Date: Tue, 7 Apr 2026 10:56:31 +0530 Subject: [PATCH 1225/1561] wifi: ath12k: add ath12k_hw_regs for IPQ5424 Add register addresses (ath12k_hw_regs) for ath12k AHB based WiFi 7 device IPQ5424. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.6-01243-QCAHKSWPL_SILICONZ-1 Tested-on: IPQ5332 hw1.0 AHB WLAN.WBE.1.6-01275-QCAHKSWPL_SILICONZ-1 Tested-on: IPQ5424 hw1.0 AHB WLAN.WBE.1.6-01275-QCAHKSWPL_SILICONZ-1 Signed-off-by: Saravanakumar Duraisamy Signed-off-by: Raj Kumar Bhagat Reviewed-by: Baochen Qiang Reviewed-by: Rameshkumar Sundaram Link: https://patch.msgid.link/20260407-ath12k-ipq5424-v5-4-8e96aa660ec4@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wifi7/hal.c | 2 +- drivers/net/wireless/ath/ath12k/wifi7/hal.h | 3 + .../wireless/ath/ath12k/wifi7/hal_qcn9274.c | 88 +++++++++++++++++++ .../wireless/ath/ath12k/wifi7/hal_qcn9274.h | 1 + 4 files changed, 93 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/wifi7/hal.c b/drivers/net/wireless/ath/ath12k/wifi7/hal.c index c2cc99a83f09..a0a1902fb491 100644 --- a/drivers/net/wireless/ath/ath12k/wifi7/hal.c +++ b/drivers/net/wireless/ath/ath12k/wifi7/hal.c @@ -55,7 +55,7 @@ static const struct ath12k_hw_version_map ath12k_wifi7_hw_ver_map[] = { .hal_desc_sz = sizeof(struct hal_rx_desc_qcn9274_compact), .tcl_to_wbm_rbm_map = ath12k_hal_tcl_to_wbm_rbm_map_qcn9274, .hal_params = &ath12k_hw_hal_params_ipq5332, - .hw_regs = NULL, + .hw_regs = &ipq5424_regs, }, }; diff --git a/drivers/net/wireless/ath/ath12k/wifi7/hal.h b/drivers/net/wireless/ath/ath12k/wifi7/hal.h index 9337225a5253..3d9386198893 100644 --- a/drivers/net/wireless/ath/ath12k/wifi7/hal.h +++ b/drivers/net/wireless/ath/ath12k/wifi7/hal.h @@ -364,6 +364,9 @@ #define HAL_IPQ5332_CE_WFSS_REG_BASE 0x740000 #define HAL_IPQ5332_CE_SIZE 0x100000 +#define HAL_IPQ5424_CE_WFSS_REG_BASE 0x200000 +#define HAL_IPQ5424_CE_SIZE 0x100000 + #define HAL_RX_MAX_BA_WINDOW 256 #define HAL_DEFAULT_BE_BK_VI_REO_TIMEOUT_USEC (100 * 1000) diff --git a/drivers/net/wireless/ath/ath12k/wifi7/hal_qcn9274.c b/drivers/net/wireless/ath/ath12k/wifi7/hal_qcn9274.c index 41c918eb1767..ba9ce1e718e8 100644 --- a/drivers/net/wireless/ath/ath12k/wifi7/hal_qcn9274.c +++ b/drivers/net/wireless/ath/ath12k/wifi7/hal_qcn9274.c @@ -484,6 +484,94 @@ const struct ath12k_hw_regs ipq5332_regs = { HAL_IPQ5332_CE_WFSS_REG_BASE, }; +const struct ath12k_hw_regs ipq5424_regs = { + /* SW2TCL(x) R0 ring configuration address */ + .tcl1_ring_id = 0x00000918, + .tcl1_ring_misc = 0x00000920, + .tcl1_ring_tp_addr_lsb = 0x0000092c, + .tcl1_ring_tp_addr_msb = 0x00000930, + .tcl1_ring_consumer_int_setup_ix0 = 0x00000940, + .tcl1_ring_consumer_int_setup_ix1 = 0x00000944, + .tcl1_ring_msi1_base_lsb = 0x00000958, + .tcl1_ring_msi1_base_msb = 0x0000095c, + .tcl1_ring_base_lsb = 0x00000910, + .tcl1_ring_base_msb = 0x00000914, + .tcl1_ring_msi1_data = 0x00000960, + .tcl2_ring_base_lsb = 0x00000988, + .tcl_ring_base_lsb = 0x00000b68, + + /* TCL STATUS ring address */ + .tcl_status_ring_base_lsb = 0x00000d48, + + /* REO DEST ring address */ + .reo2_ring_base = 0x00000578, + .reo1_misc_ctrl_addr = 0x00000b9c, + .reo1_sw_cookie_cfg0 = 0x0000006c, + .reo1_sw_cookie_cfg1 = 0x00000070, + .reo1_qdesc_lut_base0 = 0x00000074, + .reo1_qdesc_lut_base1 = 0x00000078, + .reo1_ring_base_lsb = 0x00000500, + .reo1_ring_base_msb = 0x00000504, + .reo1_ring_id = 0x00000508, + .reo1_ring_misc = 0x00000510, + .reo1_ring_hp_addr_lsb = 0x00000514, + .reo1_ring_hp_addr_msb = 0x00000518, + .reo1_ring_producer_int_setup = 0x00000524, + .reo1_ring_msi1_base_lsb = 0x00000548, + .reo1_ring_msi1_base_msb = 0x0000054C, + .reo1_ring_msi1_data = 0x00000550, + .reo1_aging_thres_ix0 = 0x00000B28, + .reo1_aging_thres_ix1 = 0x00000B2C, + .reo1_aging_thres_ix2 = 0x00000B30, + .reo1_aging_thres_ix3 = 0x00000B34, + + /* REO Exception ring address */ + .reo2_sw0_ring_base = 0x000008c0, + + /* REO Reinject ring address */ + .sw2reo_ring_base = 0x00000320, + .sw2reo1_ring_base = 0x00000398, + + /* REO cmd ring address */ + .reo_cmd_ring_base = 0x000002A8, + + /* REO status ring address */ + .reo_status_ring_base = 0x00000aa0, + + /* WBM idle link ring address */ + .wbm_idle_ring_base_lsb = 0x00000d3c, + .wbm_idle_ring_misc_addr = 0x00000d4c, + .wbm_r0_idle_list_cntl_addr = 0x00000240, + .wbm_r0_idle_list_size_addr = 0x00000244, + .wbm_scattered_ring_base_lsb = 0x00000250, + .wbm_scattered_ring_base_msb = 0x00000254, + .wbm_scattered_desc_head_info_ix0 = 0x00000260, + .wbm_scattered_desc_head_info_ix1 = 0x00000264, + .wbm_scattered_desc_tail_info_ix0 = 0x00000270, + .wbm_scattered_desc_tail_info_ix1 = 0x00000274, + .wbm_scattered_desc_ptr_hp_addr = 0x0000027c, + + /* SW2WBM release ring address */ + .wbm_sw_release_ring_base_lsb = 0x0000037c, + + /* WBM2SW release ring address */ + .wbm0_release_ring_base_lsb = 0x00000e08, + .wbm1_release_ring_base_lsb = 0x00000e80, + + /* PPE release ring address */ + .ppe_rel_ring_base = 0x0000046c, + + /* CE address */ + .umac_ce0_src_reg_base = 0x00200000 - + HAL_IPQ5424_CE_WFSS_REG_BASE, + .umac_ce0_dest_reg_base = 0x00201000 - + HAL_IPQ5424_CE_WFSS_REG_BASE, + .umac_ce1_src_reg_base = 0x00202000 - + HAL_IPQ5424_CE_WFSS_REG_BASE, + .umac_ce1_dest_reg_base = 0x00203000 - + HAL_IPQ5424_CE_WFSS_REG_BASE, +}; + static inline bool ath12k_hal_rx_desc_get_first_msdu_qcn9274(struct hal_rx_desc *desc) { diff --git a/drivers/net/wireless/ath/ath12k/wifi7/hal_qcn9274.h b/drivers/net/wireless/ath/ath12k/wifi7/hal_qcn9274.h index 08c0a0469474..03cf3792d523 100644 --- a/drivers/net/wireless/ath/ath12k/wifi7/hal_qcn9274.h +++ b/drivers/net/wireless/ath/ath12k/wifi7/hal_qcn9274.h @@ -17,6 +17,7 @@ extern const struct hal_ops hal_qcn9274_ops; extern const struct ath12k_hw_regs qcn9274_v1_regs; extern const struct ath12k_hw_regs qcn9274_v2_regs; extern const struct ath12k_hw_regs ipq5332_regs; +extern const struct ath12k_hw_regs ipq5424_regs; extern const struct ath12k_hal_tcl_to_wbm_rbm_map ath12k_hal_tcl_to_wbm_rbm_map_qcn9274[DP_TCL_NUM_RING_MAX]; extern const struct ath12k_hw_hal_params ath12k_hw_hal_params_qcn9274; From 38cff745fa7c0b006f95565a2e5de9f0cac13702 Mon Sep 17 00:00:00 2001 From: Saravanakumar Duraisamy Date: Tue, 7 Apr 2026 10:56:32 +0530 Subject: [PATCH 1226/1561] wifi: ath12k: Add CE remap hardware parameters for IPQ5424 Add CE remap hardware parameters for Ath12k AHB device IPQ5424. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.6-01243-QCAHKSWPL_SILICONZ-1 Tested-on: IPQ5332 hw1.0 AHB WLAN.WBE.1.6-01275-QCAHKSWPL_SILICONZ-1 Tested-on: IPQ5424 hw1.0 AHB WLAN.WBE.1.6-01275-QCAHKSWPL_SILICONZ-1 Signed-off-by: Saravanakumar Duraisamy Signed-off-by: Raj Kumar Bhagat Reviewed-by: Baochen Qiang Reviewed-by: Rameshkumar Sundaram Link: https://patch.msgid.link/20260407-ath12k-ipq5424-v5-5-8e96aa660ec4@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/ce.h | 13 +++++++++---- drivers/net/wireless/ath/ath12k/wifi7/hw.c | 22 +++++++++++++++++----- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/ce.h b/drivers/net/wireless/ath/ath12k/ce.h index df4f2a4f8480..009cddf2d68d 100644 --- a/drivers/net/wireless/ath/ath12k/ce.h +++ b/drivers/net/wireless/ath/ath12k/ce.h @@ -38,10 +38,15 @@ #define PIPEDIR_INOUT 3 /* bidirectional */ #define PIPEDIR_INOUT_H2H 4 /* bidirectional, host to host */ -/* CE address/mask */ -#define CE_HOST_IE_ADDRESS 0x75804C -#define CE_HOST_IE_2_ADDRESS 0x758050 -#define CE_HOST_IE_3_ADDRESS CE_HOST_IE_ADDRESS +/* IPQ5332 CE address/mask */ +#define CE_HOST_IPQ5332_IE_ADDRESS 0x75804C +#define CE_HOST_IPQ5332_IE_2_ADDRESS 0x758050 +#define CE_HOST_IPQ5332_IE_3_ADDRESS CE_HOST_IPQ5332_IE_ADDRESS + +/* IPQ5424 CE address/mask */ +#define CE_HOST_IPQ5424_IE_ADDRESS 0x21804C +#define CE_HOST_IPQ5424_IE_2_ADDRESS 0x218050 +#define CE_HOST_IPQ5424_IE_3_ADDRESS CE_HOST_IPQ5424_IE_ADDRESS #define CE_HOST_IE_3_SHIFT 0xC diff --git a/drivers/net/wireless/ath/ath12k/wifi7/hw.c b/drivers/net/wireless/ath/ath12k/wifi7/hw.c index 2b5d1f7e9e04..cb3185850439 100644 --- a/drivers/net/wireless/ath/ath12k/wifi7/hw.c +++ b/drivers/net/wireless/ath/ath12k/wifi7/hw.c @@ -329,9 +329,15 @@ static const struct ath12k_hw_ring_mask ath12k_wifi7_hw_ring_mask_wcn7850 = { }; static const struct ce_ie_addr ath12k_wifi7_ce_ie_addr_ipq5332 = { - .ie1_reg_addr = CE_HOST_IE_ADDRESS - HAL_IPQ5332_CE_WFSS_REG_BASE, - .ie2_reg_addr = CE_HOST_IE_2_ADDRESS - HAL_IPQ5332_CE_WFSS_REG_BASE, - .ie3_reg_addr = CE_HOST_IE_3_ADDRESS - HAL_IPQ5332_CE_WFSS_REG_BASE, + .ie1_reg_addr = CE_HOST_IPQ5332_IE_ADDRESS - HAL_IPQ5332_CE_WFSS_REG_BASE, + .ie2_reg_addr = CE_HOST_IPQ5332_IE_2_ADDRESS - HAL_IPQ5332_CE_WFSS_REG_BASE, + .ie3_reg_addr = CE_HOST_IPQ5332_IE_3_ADDRESS - HAL_IPQ5332_CE_WFSS_REG_BASE, +}; + +static const struct ce_ie_addr ath12k_wifi7_ce_ie_addr_ipq5424 = { + .ie1_reg_addr = CE_HOST_IPQ5424_IE_ADDRESS - HAL_IPQ5424_CE_WFSS_REG_BASE, + .ie2_reg_addr = CE_HOST_IPQ5424_IE_2_ADDRESS - HAL_IPQ5424_CE_WFSS_REG_BASE, + .ie3_reg_addr = CE_HOST_IPQ5424_IE_3_ADDRESS - HAL_IPQ5424_CE_WFSS_REG_BASE, }; static const struct ce_remap ath12k_wifi7_ce_remap_ipq5332 = { @@ -340,6 +346,12 @@ static const struct ce_remap ath12k_wifi7_ce_remap_ipq5332 = { .cmem_offset = HAL_SEQ_WCSS_CMEM_OFFSET, }; +static const struct ce_remap ath12k_wifi7_ce_remap_ipq5424 = { + .base = HAL_IPQ5424_CE_WFSS_REG_BASE, + .size = HAL_IPQ5424_CE_SIZE, + .cmem_offset = HAL_SEQ_WCSS_CMEM_OFFSET, +}; + static const struct ath12k_hw_params ath12k_wifi7_hw_params[] = { { .name = "qcn9274 hw1.0", @@ -824,8 +836,8 @@ static const struct ath12k_hw_params ath12k_wifi7_hw_params[] = { .iova_mask = 0, .supports_aspm = false, - .ce_ie_addr = NULL, - .ce_remap = NULL, + .ce_ie_addr = &ath12k_wifi7_ce_ie_addr_ipq5424, + .ce_remap = &ath12k_wifi7_ce_remap_ipq5424, .bdf_addr_offset = 0x940000, .current_cc_support = false, From 8fb66931fe31094aa2e1b2a5c015050b8b4cb2ec Mon Sep 17 00:00:00 2001 From: Sowmiya Sree Elavalagan Date: Tue, 7 Apr 2026 10:56:33 +0530 Subject: [PATCH 1227/1561] wifi: ath12k: Enable IPQ5424 WiFi device support Currently, ath12k AHB (in IPQ5332) uses SCM calls to authenticate the firmware image to bring up userpd. From IPQ5424 onwards, Q6 firmware can directly communicate with the Trusted Management Engine - Lite (TME-L), eliminating the need for SCM calls for userpd bring-up. Hence, to enable IPQ5424 device support, use qcom_mdt_load_no_init() and skip the SCM call as Q6 will directly authenticate the userpd firmware. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.6-01243-QCAHKSWPL_SILICONZ-1 Tested-on: IPQ5332 hw1.0 AHB WLAN.WBE.1.6-01275-QCAHKSWPL_SILICONZ-1 Tested-on: IPQ5424 hw1.0 AHB WLAN.WBE.1.6-01275-QCAHKSWPL_SILICONZ-1 Signed-off-by: Sowmiya Sree Elavalagan Co-developed-by: Saravanakumar Duraisamy Signed-off-by: Saravanakumar Duraisamy Co-developed-by: Raj Kumar Bhagat Signed-off-by: Raj Kumar Bhagat Reviewed-by: Baochen Qiang Reviewed-by: Rameshkumar Sundaram Link: https://patch.msgid.link/20260407-ath12k-ipq5424-v5-6-8e96aa660ec4@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/ahb.c | 36 +++++++++++++-------- drivers/net/wireless/ath/ath12k/ahb.h | 1 + drivers/net/wireless/ath/ath12k/wifi7/ahb.c | 8 +++++ 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/ahb.c b/drivers/net/wireless/ath/ath12k/ahb.c index 9a4d34e49104..2dcf0a52e4c1 100644 --- a/drivers/net/wireless/ath/ath12k/ahb.c +++ b/drivers/net/wireless/ath/ath12k/ahb.c @@ -382,8 +382,12 @@ static int ath12k_ahb_power_up(struct ath12k_base *ab) ATH12K_AHB_UPD_SWID; /* Load FW image to a reserved memory location */ - ret = qcom_mdt_load(dev, fw, fw_name, pasid, mem_region, mem_phys, mem_size, - &mem_phys); + if (ab_ahb->scm_auth_enabled) + ret = qcom_mdt_load(dev, fw, fw_name, pasid, mem_region, + mem_phys, mem_size, &mem_phys); + else + ret = qcom_mdt_load_no_init(dev, fw, fw_name, mem_region, + mem_phys, mem_size, &mem_phys); if (ret) { ath12k_err(ab, "Failed to load MDT segments: %d\n", ret); goto err_fw; @@ -414,11 +418,13 @@ static int ath12k_ahb_power_up(struct ath12k_base *ab) goto err_fw2; } - /* Authenticate FW image using peripheral ID */ - ret = qcom_scm_pas_auth_and_reset(pasid); - if (ret) { - ath12k_err(ab, "failed to boot the remote processor %d\n", ret); - goto err_fw2; + if (ab_ahb->scm_auth_enabled) { + /* Authenticate FW image using peripheral ID */ + ret = qcom_scm_pas_auth_and_reset(pasid); + if (ret) { + ath12k_err(ab, "failed to boot the remote processor %d\n", ret); + goto err_fw2; + } } /* Instruct Q6 to spawn userPD thread */ @@ -475,13 +481,15 @@ static void ath12k_ahb_power_down(struct ath12k_base *ab, bool is_suspend) qcom_smem_state_update_bits(ab_ahb->stop_state, BIT(ab_ahb->stop_bit), 0); - pasid = (u32_encode_bits(ab_ahb->userpd_id, ATH12K_USERPD_ID_MASK)) | - ATH12K_AHB_UPD_SWID; - /* Release the firmware */ - ret = qcom_scm_pas_shutdown(pasid); - if (ret) - ath12k_err(ab, "scm pas shutdown failed for userPD%d: %d\n", - ab_ahb->userpd_id, ret); + if (ab_ahb->scm_auth_enabled) { + pasid = (u32_encode_bits(ab_ahb->userpd_id, ATH12K_USERPD_ID_MASK)) | + ATH12K_AHB_UPD_SWID; + /* Release the firmware */ + ret = qcom_scm_pas_shutdown(pasid); + if (ret) + ath12k_err(ab, "scm pas shutdown failed for userPD%d\n", + ab_ahb->userpd_id); + } } static void ath12k_ahb_init_qmi_ce_config(struct ath12k_base *ab) diff --git a/drivers/net/wireless/ath/ath12k/ahb.h b/drivers/net/wireless/ath/ath12k/ahb.h index be9e31b3682d..0fa15daaa3e6 100644 --- a/drivers/net/wireless/ath/ath12k/ahb.h +++ b/drivers/net/wireless/ath/ath12k/ahb.h @@ -68,6 +68,7 @@ struct ath12k_ahb { int userpd_irq_num[ATH12K_USERPD_MAX_IRQ]; const struct ath12k_ahb_ops *ahb_ops; const struct ath12k_ahb_device_family_ops *device_family_ops; + bool scm_auth_enabled; }; struct ath12k_ahb_driver { diff --git a/drivers/net/wireless/ath/ath12k/wifi7/ahb.c b/drivers/net/wireless/ath/ath12k/wifi7/ahb.c index a6c5f7689edd..6a8b8b2a56f9 100644 --- a/drivers/net/wireless/ath/ath12k/wifi7/ahb.c +++ b/drivers/net/wireless/ath/ath12k/wifi7/ahb.c @@ -19,6 +19,9 @@ static const struct of_device_id ath12k_wifi7_ahb_of_match[] = { { .compatible = "qcom,ipq5332-wifi", .data = (void *)ATH12K_HW_IPQ5332_HW10, }, + { .compatible = "qcom,ipq5424-wifi", + .data = (void *)ATH12K_HW_IPQ5424_HW10, + }, { } }; @@ -38,6 +41,11 @@ static int ath12k_wifi7_ahb_probe(struct platform_device *pdev) switch (hw_rev) { case ATH12K_HW_IPQ5332_HW10: ab_ahb->userpd_id = ATH12K_IPQ5332_USERPD_ID; + ab_ahb->scm_auth_enabled = true; + break; + case ATH12K_HW_IPQ5424_HW10: + ab_ahb->userpd_id = ATH12K_IPQ5332_USERPD_ID; + ab_ahb->scm_auth_enabled = false; break; default: return -EOPNOTSUPP; From e65d8b6f3092398efd7c74e722cb7a516d9a0d6d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 4 Apr 2026 16:01:03 -0700 Subject: [PATCH 1228/1561] selftests: drv-net: adjust to socat changes socat v1.8.1.0 now defaults to shut-null, it sends an extra 0-length UDP packet when sender disconnects. This breaks our tests which expect the exact packet sequence. Add shut-none which was the old default where necessary. Acked-by: Stanislav Fomichev Reviewed-by: Joe Damato Reviewed-by: Breno Leitao Link: https://patch.msgid.link/20260404230103.2719103-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh | 6 +++--- tools/testing/selftests/drivers/net/xdp.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 02dcdeb723be..a9a01a64b7b3 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -251,7 +251,7 @@ function listen_port_and_save_to() { # Just wait for 3 seconds timeout 3 ip netns exec "${NAMESPACE}" \ - socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}" 2> /dev/null + socat "${SOCAT_MODE}":"${PORT}",fork,shut-none "${OUTPUT}" 2> /dev/null } # Only validate that the message arrived properly @@ -360,8 +360,8 @@ function check_for_taskset() { # This is necessary if running multiple tests in a row function pkill_socat() { - PROCESS_NAME4="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}" - PROCESS_NAME6="socat UDP6-LISTEN:6666,fork ${OUTPUT_FILE}" + PROCESS_NAME4="socat UDP-LISTEN:6666,fork,shut-none ${OUTPUT_FILE}" + PROCESS_NAME6="socat UDP6-LISTEN:6666,fork,shut-none ${OUTPUT_FILE}" # socat runs under timeout(1), kill it if it is still alive # do not fail if socat doesn't exist anymore set +e diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py index d86446569f89..7e635487d517 100755 --- a/tools/testing/selftests/drivers/net/xdp.py +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -70,7 +70,7 @@ def _exchg_udp(cfg, port, test_string): cfg.require_cmd("socat", remote=True) rx_udp_cmd = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT" - tx_udp_cmd = f"echo -n {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}" + tx_udp_cmd = f"echo -n {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port},shut-none" with bkg(rx_udp_cmd, exit_wait=True) as nc: wait_port_listen(port, proto="udp") @@ -271,7 +271,7 @@ def _test_xdp_native_tx(cfg, bpf_info, payload_lens): # Writing zero bytes to stdin gets ignored by socat, # but with the shut-null flag socat generates a zero sized packet # when the socket is closed. - tx_cmd_suffix = ",shut-null" if payload_len == 0 else "" + tx_cmd_suffix = ",shut-null" if payload_len == 0 else ",shut-none" tx_udp = f"echo -n {test_string} | socat -t 2 " + \ f"-u STDIN UDP:{cfg.baddr}:{port}{tx_cmd_suffix}" From 30e02ec3b4b6bd429a4824f125eb843a291dcccf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Apr 2026 22:15:39 +0000 Subject: [PATCH 1229/1561] net: qdisc_pkt_len_segs_init() cleanup Reduce indentation level by returning early if the transport header was not set. Add an unlikely() clause as this is not the common case. No functional change. Signed-off-by: Eric Dumazet Reviewed-by: Joe Damato Link: https://patch.msgid.link/20260403221540.3297753-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 66 +++++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 4519f0e59beb..3eb2f50f5165 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4104,6 +4104,7 @@ EXPORT_SYMBOL_GPL(validate_xmit_skb_list); static void qdisc_pkt_len_segs_init(struct sk_buff *skb) { struct skb_shared_info *shinfo = skb_shinfo(skb); + unsigned int hdr_len; u16 gso_segs; qdisc_skb_cb(skb)->pkt_len = skb->len; @@ -4117,44 +4118,43 @@ static void qdisc_pkt_len_segs_init(struct sk_buff *skb) /* To get more precise estimation of bytes sent on wire, * we add to pkt_len the headers size of all segments */ - if (skb_transport_header_was_set(skb)) { - unsigned int hdr_len; + if (unlikely(!skb_transport_header_was_set(skb))) + return; - /* mac layer + network layer */ - if (!skb->encapsulation) - hdr_len = skb_transport_offset(skb); - else - hdr_len = skb_inner_transport_offset(skb); + /* mac layer + network layer */ + if (!skb->encapsulation) + hdr_len = skb_transport_offset(skb); + else + hdr_len = skb_inner_transport_offset(skb); - /* + transport layer */ - if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { - const struct tcphdr *th; - struct tcphdr _tcphdr; + /* + transport layer */ + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { + const struct tcphdr *th; + struct tcphdr _tcphdr; - th = skb_header_pointer(skb, hdr_len, - sizeof(_tcphdr), &_tcphdr); - if (likely(th)) - hdr_len += __tcp_hdrlen(th); - } else if (shinfo->gso_type & SKB_GSO_UDP_L4) { - struct udphdr _udphdr; + th = skb_header_pointer(skb, hdr_len, + sizeof(_tcphdr), &_tcphdr); + if (likely(th)) + hdr_len += __tcp_hdrlen(th); + } else if (shinfo->gso_type & SKB_GSO_UDP_L4) { + struct udphdr _udphdr; - if (skb_header_pointer(skb, hdr_len, - sizeof(_udphdr), &_udphdr)) - hdr_len += sizeof(struct udphdr); - } - - if (unlikely(shinfo->gso_type & SKB_GSO_DODGY)) { - int payload = skb->len - hdr_len; - - /* Malicious packet. */ - if (payload <= 0) - return; - gso_segs = DIV_ROUND_UP(payload, shinfo->gso_size); - shinfo->gso_segs = gso_segs; - qdisc_skb_cb(skb)->pkt_segs = gso_segs; - } - qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len; + if (skb_header_pointer(skb, hdr_len, + sizeof(_udphdr), &_udphdr)) + hdr_len += sizeof(struct udphdr); } + + if (unlikely(shinfo->gso_type & SKB_GSO_DODGY)) { + int payload = skb->len - hdr_len; + + /* Malicious packet. */ + if (payload <= 0) + return; + gso_segs = DIV_ROUND_UP(payload, shinfo->gso_size); + shinfo->gso_segs = gso_segs; + qdisc_skb_cb(skb)->pkt_segs = gso_segs; + } + qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len; } static int dev_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *q, From 7fb4c19670110f052c04e1ec1d2b953b9f4f57e4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Apr 2026 22:15:40 +0000 Subject: [PATCH 1230/1561] net: pull headers in qdisc_pkt_len_segs_init() Most ndo_start_xmit() methods expects headers of gso packets to be already in skb->head. net/core/tso.c users are particularly at risk, because tso_build_hdr() does a memcpy(hdr, skb->data, hdr_len); qdisc_pkt_len_segs_init() already does a dissection of gso packets. Use pskb_may_pull() instead of skb_header_pointer() to make sure drivers do not have to reimplement this. Some malicious packets could be fed, detect them so that we can drop them sooner with a new SKB_DROP_REASON_SKB_BAD_GSO drop_reason. Fixes: e876f208af18 ("net: Add a software TSO helper API") Signed-off-by: Eric Dumazet Reviewed-by: Joe Damato Link: https://patch.msgid.link/20260403221540.3297753-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/dropreason-core.h | 3 +++ net/core/dev.c | 51 +++++++++++++++++++++-------------- 2 files changed, 34 insertions(+), 20 deletions(-) diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index de61dd5dbfd9..51855de5d208 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -74,6 +74,7 @@ FN(UNHANDLED_PROTO) \ FN(SKB_CSUM) \ FN(SKB_GSO_SEG) \ + FN(SKB_BAD_GSO) \ FN(SKB_UCOPY_FAULT) \ FN(DEV_HDR) \ FN(DEV_READY) \ @@ -392,6 +393,8 @@ enum skb_drop_reason { SKB_DROP_REASON_SKB_CSUM, /** @SKB_DROP_REASON_SKB_GSO_SEG: gso segmentation error */ SKB_DROP_REASON_SKB_GSO_SEG, + /** @SKB_DROP_REASON_SKB_BAD_GSO: malicious gso packet. */ + SKB_DROP_REASON_SKB_BAD_GSO, /** * @SKB_DROP_REASON_SKB_UCOPY_FAULT: failed to copy data from user space, * e.g., via zerocopy_sg_from_iter() or skb_orphan_frags_rx() diff --git a/net/core/dev.c b/net/core/dev.c index 3eb2f50f5165..5a31f9d2128c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4101,16 +4101,16 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d } EXPORT_SYMBOL_GPL(validate_xmit_skb_list); -static void qdisc_pkt_len_segs_init(struct sk_buff *skb) +static enum skb_drop_reason qdisc_pkt_len_segs_init(struct sk_buff *skb) { struct skb_shared_info *shinfo = skb_shinfo(skb); - unsigned int hdr_len; + unsigned int hdr_len, tlen; u16 gso_segs; qdisc_skb_cb(skb)->pkt_len = skb->len; if (!shinfo->gso_size) { qdisc_skb_cb(skb)->pkt_segs = 1; - return; + return SKB_NOT_DROPPED_YET; } qdisc_skb_cb(skb)->pkt_segs = gso_segs = shinfo->gso_segs; @@ -4118,43 +4118,49 @@ static void qdisc_pkt_len_segs_init(struct sk_buff *skb) /* To get more precise estimation of bytes sent on wire, * we add to pkt_len the headers size of all segments */ - if (unlikely(!skb_transport_header_was_set(skb))) - return; /* mac layer + network layer */ - if (!skb->encapsulation) + if (!skb->encapsulation) { + if (unlikely(!skb_transport_header_was_set(skb))) + return SKB_NOT_DROPPED_YET; hdr_len = skb_transport_offset(skb); - else + } else { hdr_len = skb_inner_transport_offset(skb); - + } /* + transport layer */ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { const struct tcphdr *th; - struct tcphdr _tcphdr; - th = skb_header_pointer(skb, hdr_len, - sizeof(_tcphdr), &_tcphdr); - if (likely(th)) - hdr_len += __tcp_hdrlen(th); + if (!pskb_may_pull(skb, hdr_len + sizeof(struct tcphdr))) + return SKB_DROP_REASON_SKB_BAD_GSO; + + th = (const struct tcphdr *)(skb->data + hdr_len); + tlen = __tcp_hdrlen(th); + if (tlen < sizeof(*th)) + return SKB_DROP_REASON_SKB_BAD_GSO; + hdr_len += tlen; + if (!pskb_may_pull(skb, hdr_len)) + return SKB_DROP_REASON_SKB_BAD_GSO; } else if (shinfo->gso_type & SKB_GSO_UDP_L4) { - struct udphdr _udphdr; - - if (skb_header_pointer(skb, hdr_len, - sizeof(_udphdr), &_udphdr)) - hdr_len += sizeof(struct udphdr); + if (!pskb_may_pull(skb, hdr_len + sizeof(struct udphdr))) + return SKB_DROP_REASON_SKB_BAD_GSO; + hdr_len += sizeof(struct udphdr); } + /* prior pskb_may_pull() might have changed skb->head. */ + shinfo = skb_shinfo(skb); if (unlikely(shinfo->gso_type & SKB_GSO_DODGY)) { int payload = skb->len - hdr_len; /* Malicious packet. */ if (payload <= 0) - return; + return SKB_DROP_REASON_SKB_BAD_GSO; gso_segs = DIV_ROUND_UP(payload, shinfo->gso_size); shinfo->gso_segs = gso_segs; qdisc_skb_cb(skb)->pkt_segs = gso_segs; } qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len; + return SKB_NOT_DROPPED_YET; } static int dev_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *q, @@ -4771,6 +4777,12 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) (SKBTX_SCHED_TSTAMP | SKBTX_BPF))) __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED); + reason = qdisc_pkt_len_segs_init(skb); + if (unlikely(reason)) { + dev_core_stats_tx_dropped_inc(dev); + kfree_skb_reason(skb, reason); + return -EINVAL; + } /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ @@ -4778,7 +4790,6 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) skb_update_prio(skb); - qdisc_pkt_len_segs_init(skb); tcx_set_ingress(skb, false); #ifdef CONFIG_NET_EGRESS if (static_branch_unlikely(&egress_needed_key)) { From 6e6f2b9b3375cc0e6b8567d31ae7d3b2d910582f Mon Sep 17 00:00:00 2001 From: Sun Jian Date: Tue, 3 Mar 2026 18:15:25 +0800 Subject: [PATCH 1231/1561] netfilter: use function typedefs for __rcu NAT helper hook pointers After commit 07919126ecfc ("netfilter: annotate NAT helper hook pointers with __rcu"), sparse can warn about type/address-space mismatches when RCU-dereferencing NAT helper hook function pointers. The hooks are __rcu-annotated and accessed via rcu_dereference(), but the combination of complex function pointer declarators and the WRITE_ONCE() machinery used by RCU_INIT_POINTER()/rcu_assign_pointer() can confuse sparse and trigger false positives. Introduce typedefs for the NAT helper function types, so __rcu applies to a simple "fn_t __rcu *" pointer form. Also replace local typeof(hook) variables with "fn_t *" to avoid propagating __rcu address space into temporaries. No functional change intended. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202603022359.3dGE9fwI-lkp@intel.com/ Signed-off-by: Sun Jian Signed-off-by: Florian Westphal --- include/linux/netfilter/nf_conntrack_amanda.h | 15 +++++++++------ include/linux/netfilter/nf_conntrack_ftp.h | 17 ++++++++++------- include/linux/netfilter/nf_conntrack_irc.h | 15 +++++++++------ include/linux/netfilter/nf_conntrack_snmp.h | 11 +++++++---- include/linux/netfilter/nf_conntrack_tftp.h | 9 ++++++--- net/netfilter/nf_conntrack_amanda.c | 10 ++-------- net/netfilter/nf_conntrack_ftp.c | 10 ++-------- net/netfilter/nf_conntrack_irc.c | 10 ++-------- net/netfilter/nf_conntrack_snmp.c | 7 ++----- net/netfilter/nf_conntrack_tftp.c | 7 ++----- 10 files changed, 51 insertions(+), 60 deletions(-) diff --git a/include/linux/netfilter/nf_conntrack_amanda.h b/include/linux/netfilter/nf_conntrack_amanda.h index dfe89f38d1f7..1719987e8fd8 100644 --- a/include/linux/netfilter/nf_conntrack_amanda.h +++ b/include/linux/netfilter/nf_conntrack_amanda.h @@ -7,10 +7,13 @@ #include #include -extern unsigned int (__rcu *nf_nat_amanda_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned int matchoff, - unsigned int matchlen, - struct nf_conntrack_expect *exp); +typedef unsigned int +nf_nat_amanda_hook_fn(struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conntrack_expect *exp); + +extern nf_nat_amanda_hook_fn __rcu *nf_nat_amanda_hook; #endif /* _NF_CONNTRACK_AMANDA_H */ diff --git a/include/linux/netfilter/nf_conntrack_ftp.h b/include/linux/netfilter/nf_conntrack_ftp.h index f31292642035..7b62446ccec4 100644 --- a/include/linux/netfilter/nf_conntrack_ftp.h +++ b/include/linux/netfilter/nf_conntrack_ftp.h @@ -26,11 +26,14 @@ struct nf_ct_ftp_master { /* For NAT to hook in when we find a packet which describes what other * connection we should expect. */ -extern unsigned int (__rcu *nf_nat_ftp_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - enum nf_ct_ftp_type type, - unsigned int protoff, - unsigned int matchoff, - unsigned int matchlen, - struct nf_conntrack_expect *exp); +typedef unsigned int +nf_nat_ftp_hook_fn(struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + enum nf_ct_ftp_type type, + unsigned int protoff, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conntrack_expect *exp); + +extern nf_nat_ftp_hook_fn __rcu *nf_nat_ftp_hook; #endif /* _NF_CONNTRACK_FTP_H */ diff --git a/include/linux/netfilter/nf_conntrack_irc.h b/include/linux/netfilter/nf_conntrack_irc.h index 4f3ca5621998..ce07250afb4e 100644 --- a/include/linux/netfilter/nf_conntrack_irc.h +++ b/include/linux/netfilter/nf_conntrack_irc.h @@ -8,11 +8,14 @@ #define IRC_PORT 6667 -extern unsigned int (__rcu *nf_nat_irc_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned int matchoff, - unsigned int matchlen, - struct nf_conntrack_expect *exp); +typedef unsigned int +nf_nat_irc_hook_fn(struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conntrack_expect *exp); + +extern nf_nat_irc_hook_fn __rcu *nf_nat_irc_hook; #endif /* _NF_CONNTRACK_IRC_H */ diff --git a/include/linux/netfilter/nf_conntrack_snmp.h b/include/linux/netfilter/nf_conntrack_snmp.h index 99107e4f5234..bb39f04a9977 100644 --- a/include/linux/netfilter/nf_conntrack_snmp.h +++ b/include/linux/netfilter/nf_conntrack_snmp.h @@ -5,9 +5,12 @@ #include #include -extern int (__rcu *nf_nat_snmp_hook)(struct sk_buff *skb, - unsigned int protoff, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo); +typedef int +nf_nat_snmp_hook_fn(struct sk_buff *skb, + unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo); + +extern nf_nat_snmp_hook_fn __rcu *nf_nat_snmp_hook; #endif /* _NF_CONNTRACK_SNMP_H */ diff --git a/include/linux/netfilter/nf_conntrack_tftp.h b/include/linux/netfilter/nf_conntrack_tftp.h index 1490b68dd7d1..90b334bbce3c 100644 --- a/include/linux/netfilter/nf_conntrack_tftp.h +++ b/include/linux/netfilter/nf_conntrack_tftp.h @@ -19,8 +19,11 @@ struct tftphdr { #define TFTP_OPCODE_ACK 4 #define TFTP_OPCODE_ERROR 5 -extern unsigned int (__rcu *nf_nat_tftp_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - struct nf_conntrack_expect *exp); +typedef unsigned int +nf_nat_tftp_hook_fn(struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + struct nf_conntrack_expect *exp); + +extern nf_nat_tftp_hook_fn __rcu *nf_nat_tftp_hook; #endif /* _NF_CONNTRACK_TFTP_H */ diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index c0132559f6af..d2c09e8dd872 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -37,13 +37,7 @@ MODULE_PARM_DESC(master_timeout, "timeout for the master connection"); module_param(ts_algo, charp, 0400); MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)"); -unsigned int (__rcu *nf_nat_amanda_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned int matchoff, - unsigned int matchlen, - struct nf_conntrack_expect *exp) - __read_mostly; +nf_nat_amanda_hook_fn __rcu *nf_nat_amanda_hook __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_amanda_hook); enum amanda_strings { @@ -98,7 +92,7 @@ static int amanda_help(struct sk_buff *skb, u_int16_t len; __be16 port; int ret = NF_ACCEPT; - typeof(nf_nat_amanda_hook) nf_nat_amanda; + nf_nat_amanda_hook_fn *nf_nat_amanda; /* Only look at packets from the Amanda server */ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 5e00f9123c38..de83bf9e6c61 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -43,13 +43,7 @@ module_param_array(ports, ushort, &ports_c, 0400); static bool loose; module_param(loose, bool, 0600); -unsigned int (__rcu *nf_nat_ftp_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - enum nf_ct_ftp_type type, - unsigned int protoff, - unsigned int matchoff, - unsigned int matchlen, - struct nf_conntrack_expect *exp); +nf_nat_ftp_hook_fn __rcu *nf_nat_ftp_hook; EXPORT_SYMBOL_GPL(nf_nat_ftp_hook); static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, @@ -385,7 +379,7 @@ static int help(struct sk_buff *skb, struct nf_conntrack_man cmd = {}; unsigned int i; int found = 0, ends_in_nl; - typeof(nf_nat_ftp_hook) nf_nat_ftp; + nf_nat_ftp_hook_fn *nf_nat_ftp; /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED && diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index b8e6d724acd1..522183b9a604 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -30,13 +30,7 @@ static unsigned int dcc_timeout __read_mostly = 300; static char *irc_buffer; static DEFINE_SPINLOCK(irc_buffer_lock); -unsigned int (__rcu *nf_nat_irc_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned int matchoff, - unsigned int matchlen, - struct nf_conntrack_expect *exp) - __read_mostly; +nf_nat_irc_hook_fn __rcu *nf_nat_irc_hook __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_irc_hook); #define HELPER_NAME "irc" @@ -122,7 +116,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, __be16 port; int i, ret = NF_ACCEPT; char *addr_beg_p, *addr_end_p; - typeof(nf_nat_irc_hook) nf_nat_irc; + nf_nat_irc_hook_fn *nf_nat_irc; unsigned int datalen; /* If packet is coming from IRC server */ diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c index 387dd6e58f88..7b7eed43c54f 100644 --- a/net/netfilter/nf_conntrack_snmp.c +++ b/net/netfilter/nf_conntrack_snmp.c @@ -25,17 +25,14 @@ static unsigned int timeout __read_mostly = 30; module_param(timeout, uint, 0400); MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); -int (__rcu *nf_nat_snmp_hook)(struct sk_buff *skb, - unsigned int protoff, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo); +nf_nat_snmp_hook_fn __rcu *nf_nat_snmp_hook; EXPORT_SYMBOL_GPL(nf_nat_snmp_hook); static int snmp_conntrack_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { - typeof(nf_nat_snmp_hook) nf_nat_snmp; + nf_nat_snmp_hook_fn *nf_nat_snmp; nf_conntrack_broadcast_help(skb, ct, ctinfo, timeout); diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index 89e9914e5d03..a2e6833a0bf7 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -32,10 +32,7 @@ static unsigned int ports_c; module_param_array(ports, ushort, &ports_c, 0400); MODULE_PARM_DESC(ports, "Port numbers of TFTP servers"); -unsigned int (__rcu *nf_nat_tftp_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - struct nf_conntrack_expect *exp) - __read_mostly; +nf_nat_tftp_hook_fn __rcu *nf_nat_tftp_hook __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_tftp_hook); static int tftp_help(struct sk_buff *skb, @@ -48,7 +45,7 @@ static int tftp_help(struct sk_buff *skb, struct nf_conntrack_expect *exp; struct nf_conntrack_tuple *tuple; unsigned int ret = NF_ACCEPT; - typeof(nf_nat_tftp_hook) nf_nat_tftp; + nf_nat_tftp_hook_fn *nf_nat_tftp; tfh = skb_header_pointer(skb, protoff + sizeof(struct udphdr), sizeof(_tftph), &_tftph); From 1f290c497cb644dd3b52e69b2eaa24a5ffb66094 Mon Sep 17 00:00:00 2001 From: Jelle van der Waa Date: Mon, 9 Mar 2026 21:29:33 +0100 Subject: [PATCH 1232/1561] netfilter: nf_tables: Fix typo in enum description Fix the spelling of "options". Signed-off-by: Jelle van der Waa Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter/nf_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 45c71f7d21c2..dca9e72b0558 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -884,7 +884,7 @@ enum nft_exthdr_flags { * @NFT_EXTHDR_OP_TCPOPT: match against tcp options * @NFT_EXTHDR_OP_IPV4: match against ipv4 options * @NFT_EXTHDR_OP_SCTP: match against sctp chunks - * @NFT_EXTHDR_OP_DCCP: match against dccp otions + * @NFT_EXTHDR_OP_DCCP: match against dccp options */ enum nft_exthdr_op { NFT_EXTHDR_OP_IPV6, From 7970d6aaf710db166de98c5356a260089896fae5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 10 Mar 2026 16:05:51 +0100 Subject: [PATCH 1233/1561] netfilter: nf_conntrack_sip: remove net variable shadowing net is already set, derived from nf_conn. I don't see how the device could be living in a different netns than the conntrack entry. Remove the extra variable and re-use existing one. Signed-off-by: Florian Westphal --- net/netfilter/nf_conntrack_sip.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 939502ff7c87..182cfb119448 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -869,9 +869,8 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, saddr = &ct->tuplehash[!dir].tuple.src.u3; } else if (sip_external_media) { struct net_device *dev = skb_dst(skb)->dev; - struct net *net = dev_net(dev); - struct flowi fl; struct dst_entry *dst = NULL; + struct flowi fl; memset(&fl, 0, sizeof(fl)); From 606bd17ef0de1b8f4227cb070308ddfd702979dc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 11 Mar 2026 10:53:15 +0100 Subject: [PATCH 1234/1561] netfilter: add deprecation warning for dccp support Add a deprecation warning for the xt_dccp match and the nft exthdr code. Signed-off-by: Florian Westphal --- net/netfilter/nft_exthdr.c | 3 +++ net/netfilter/xt_dccp.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 5f01269a49bd..14d4ad7f518c 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -796,6 +796,9 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx, break; #ifdef CONFIG_NFT_EXTHDR_DCCP case NFT_EXTHDR_OP_DCCP: + pr_warn_once("The dccp option matching is deprecated and scheduled to be removed in 2027.\n" + "Please contact the netfilter-devel mailing list or update your nftables rules.\n"); + if (tb[NFTA_EXTHDR_DREG]) return &nft_exthdr_dccp_ops; break; diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index 037ab93e25d0..3db81e041af9 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c @@ -159,6 +159,9 @@ static int __init dccp_mt_init(void) { int ret; + pr_warn_once("The DCCP match is deprecated and scheduled to be removed in 2027.\n" + "Please contact the netfilter-devel mailing list or update your iptables rules\n"); + /* doff is 8 bits, so the maximum option size is (4*256). Don't put * this in BSS since DaveM is worried about locked TLB's for kernel * BSS. */ From 390a57dd61af837fcf5ad0681267890bd6cdd594 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 12 Mar 2026 18:53:41 +0100 Subject: [PATCH 1235/1561] netfilter: nf_conntrack_h323: remove unreliable debug code in decode_octstr The debug code (not enabled in any build) reads up to 6 octets of the inpt buffer, but does so without bound checks. Zap this. Signed-off-by: Florian Westphal --- net/netfilter/nf_conntrack_h323_asn1.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 7b1497ed97d2..09e0f724644f 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -21,7 +21,6 @@ #if H323_TRACE #define TAB_SIZE 4 -#define IFTHEN(cond, act) if(cond){act;} #ifdef __KERNEL__ #define PRINT printk #else @@ -29,7 +28,6 @@ #endif #define FNAME(name) name, #else -#define IFTHEN(cond, act) #define PRINT(fmt, args...) #define FNAME(name) #endif @@ -445,11 +443,6 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f, BYTE_ALIGN(bs); if (base && (f->attr & DECODE)) { /* The IP Address */ - IFTHEN(f->lb == 4, - PRINT(" = %d.%d.%d.%d:%d", - bs->cur[0], bs->cur[1], - bs->cur[2], bs->cur[3], - bs->cur[4] * 256 + bs->cur[5])); *((unsigned int *)(base + f->offset)) = bs->cur - bs->buf; } From 66b75e6bbeeb489156a74534561bbbd360843a73 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 10 Mar 2026 00:26:46 +0100 Subject: [PATCH 1236/1561] netfilter: add more netlink-based policy range checks These spots either already check the attribute range manually before use or the consuming functions tolerate unexpected values. Nevertheless, add more range checks via netlink policy so we gain more users and avoid possible re-use in other places that might not have the required manual checks. This also improves error reporting: netlink core can generate extack errors. Signed-off-by: Florian Westphal --- net/netfilter/ipset/ip_set_core.c | 2 +- net/netfilter/nf_tables_api.c | 20 +++++++++++++++----- net/netfilter/nfnetlink_acct.c | 2 +- net/netfilter/nfnetlink_cthelper.c | 2 +- net/netfilter/nfnetlink_hook.c | 2 +- net/netfilter/nfnetlink_log.c | 4 +++- net/netfilter/nfnetlink_osf.c | 2 +- net/netfilter/nfnetlink_queue.c | 2 +- net/netfilter/nft_compat.c | 2 +- net/netfilter/nft_connlimit.c | 2 +- net/netfilter/nft_ct.c | 2 +- net/netfilter/nft_dynset.c | 3 ++- net/netfilter/nft_exthdr.c | 2 +- net/netfilter/nft_inner.c | 2 +- net/netfilter/nft_limit.c | 2 +- net/netfilter/nft_log.c | 2 +- net/netfilter/nft_osf.c | 2 +- net/netfilter/nft_payload.c | 2 +- net/netfilter/nft_queue.c | 2 +- net/netfilter/nft_quota.c | 2 +- net/netfilter/nft_synproxy.c | 4 ++-- net/netfilter/nft_tunnel.c | 4 ++-- net/netfilter/nft_xfrm.c | 4 ++-- 23 files changed, 43 insertions(+), 30 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index d0c9fe59c67d..c5a26236a0bb 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -985,7 +985,7 @@ static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = { .len = IPSET_MAXNAMELEN - 1 }, [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING, .len = IPSET_MAXNAMELEN - 1}, - [IPSET_ATTR_REVISION] = { .type = NLA_U8 }, + [IPSET_ATTR_REVISION] = NLA_POLICY_MAX(NLA_U8, IPSET_REVISION_MAX), [IPSET_ATTR_FAMILY] = { .type = NLA_U8 }, [IPSET_ATTR_DATA] = { .type = NLA_NESTED }, }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8ed8d5384b97..8537b94653d3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1112,7 +1112,7 @@ static __be16 nft_base_seq_be16(const struct net *net) static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { [NFTA_TABLE_NAME] = { .type = NLA_STRING, .len = NFT_TABLE_MAXNAMELEN - 1 }, - [NFTA_TABLE_FLAGS] = { .type = NLA_U32 }, + [NFTA_TABLE_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_TABLE_F_MASK), [NFTA_TABLE_HANDLE] = { .type = NLA_U64 }, [NFTA_TABLE_USERDATA] = { .type = NLA_BINARY, .len = NFT_USERDATA_MAXLEN } @@ -1878,7 +1878,7 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { [NFTA_CHAIN_TYPE] = { .type = NLA_STRING, .len = NFT_MODULE_AUTOLOAD_LIMIT }, [NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED }, - [NFTA_CHAIN_FLAGS] = { .type = NLA_U32 }, + [NFTA_CHAIN_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_CHAIN_FLAGS), [NFTA_CHAIN_ID] = { .type = NLA_U32 }, [NFTA_CHAIN_USERDATA] = { .type = NLA_BINARY, .len = NFT_USERDATA_MAXLEN }, @@ -4597,7 +4597,16 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_SET_NAME] = { .type = NLA_STRING, .len = NFT_SET_MAXNAMELEN - 1 }, - [NFTA_SET_FLAGS] = { .type = NLA_U32 }, + [NFTA_SET_FLAGS] = NLA_POLICY_MASK(NLA_BE32, + NFT_SET_ANONYMOUS | + NFT_SET_CONSTANT | + NFT_SET_INTERVAL | + NFT_SET_MAP | + NFT_SET_TIMEOUT | + NFT_SET_EVAL | + NFT_SET_OBJECT | + NFT_SET_CONCAT | + NFT_SET_EXPR), [NFTA_SET_KEY_TYPE] = { .type = NLA_U32 }, [NFTA_SET_KEY_LEN] = { .type = NLA_U32 }, [NFTA_SET_DATA_TYPE] = { .type = NLA_U32 }, @@ -5929,7 +5938,8 @@ const struct nft_set_ext_type nft_set_ext_types[] = { static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = { [NFTA_SET_ELEM_KEY] = { .type = NLA_NESTED }, [NFTA_SET_ELEM_DATA] = { .type = NLA_NESTED }, - [NFTA_SET_ELEM_FLAGS] = { .type = NLA_U32 }, + [NFTA_SET_ELEM_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_SET_ELEM_INTERVAL_END | + NFT_SET_ELEM_CATCHALL), [NFTA_SET_ELEM_TIMEOUT] = { .type = NLA_U64 }, [NFTA_SET_ELEM_EXPIRATION] = { .type = NLA_U64 }, [NFTA_SET_ELEM_USERDATA] = { .type = NLA_BINARY, @@ -8649,7 +8659,7 @@ static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = { .len = NFT_NAME_MAXLEN - 1 }, [NFTA_FLOWTABLE_HOOK] = { .type = NLA_NESTED }, [NFTA_FLOWTABLE_HANDLE] = { .type = NLA_U64 }, - [NFTA_FLOWTABLE_FLAGS] = { .type = NLA_U32 }, + [NFTA_FLOWTABLE_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_FLOWTABLE_MASK), }; struct nft_flowtable *nft_flowtable_lookup(const struct net *net, diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 2bfaa773d82f..8ff1e0ad5cb0 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -373,7 +373,7 @@ static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = { [NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 }, [NFACCT_BYTES] = { .type = NLA_U64 }, [NFACCT_PKTS] = { .type = NLA_U64 }, - [NFACCT_FLAGS] = { .type = NLA_U32 }, + [NFACCT_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFACCT_F_QUOTA), [NFACCT_QUOTA] = { .type = NLA_U64 }, [NFACCT_FILTER] = {.type = NLA_NESTED }, }; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index d545fa459455..0d16ad82d70c 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -165,7 +165,7 @@ nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy, static const struct nla_policy nfnl_cthelper_expect_policy_set[NFCTH_POLICY_SET_MAX+1] = { - [NFCTH_POLICY_SET_NUM] = { .type = NLA_U32, }, + [NFCTH_POLICY_SET_NUM] = NLA_POLICY_MAX(NLA_BE32, NF_CT_MAX_EXPECT_CLASSES), }; static int diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c index 531706982859..5623c18fcd12 100644 --- a/net/netfilter/nfnetlink_hook.c +++ b/net/netfilter/nfnetlink_hook.c @@ -24,7 +24,7 @@ #include static const struct nla_policy nfnl_hook_nla_policy[NFNLA_HOOK_MAX + 1] = { - [NFNLA_HOOK_HOOKNUM] = { .type = NLA_U32 }, + [NFNLA_HOOK_HOOKNUM] = NLA_POLICY_MAX(NLA_BE32, 255), [NFNLA_HOOK_PRIORITY] = { .type = NLA_U32 }, [NFNLA_HOOK_DEV] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 91aa210b3e53..9497ebeedd55 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -879,7 +879,9 @@ static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = { [NFULA_CFG_TIMEOUT] = { .type = NLA_U32 }, [NFULA_CFG_QTHRESH] = { .type = NLA_U32 }, [NFULA_CFG_NLBUFSIZ] = { .type = NLA_U32 }, - [NFULA_CFG_FLAGS] = { .type = NLA_U16 }, + [NFULA_CFG_FLAGS] = NLA_POLICY_MASK(NLA_BE16, NFULNL_CFG_F_SEQ | + NFULNL_CFG_F_SEQ_GLOBAL | + NFULNL_CFG_F_CONNTRACK), }; static int nfulnl_recv_config(struct sk_buff *skb, const struct nfnl_info *info, diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index 45d9ad231a92..d64ce21c7b55 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -293,7 +293,7 @@ bool nf_osf_find(const struct sk_buff *skb, EXPORT_SYMBOL_GPL(nf_osf_find); static const struct nla_policy nfnl_osf_policy[OSF_ATTR_MAX + 1] = { - [OSF_ATTR_FINGER] = { .len = sizeof(struct nf_osf_user_finger) }, + [OSF_ATTR_FINGER] = NLA_POLICY_EXACT_LEN(sizeof(struct nf_osf_user_finger)), }; static int nfnl_osf_add_callback(struct sk_buff *skb, diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 2aa2380d976a..ac0c19233681 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1599,7 +1599,7 @@ static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) }, [NFQA_CFG_QUEUE_MAXLEN] = { .type = NLA_U32 }, [NFQA_CFG_MASK] = { .type = NLA_U32 }, - [NFQA_CFG_FLAGS] = { .type = NLA_U32 }, + [NFQA_CFG_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFQA_CFG_F_MAX - 1), }; static const struct nf_queue_handler nfqh = { diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 53a614a0e3cd..decc725a33c2 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -195,7 +195,7 @@ static void target_compat_from_user(struct xt_target *t, void *in, void *out) static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] = { [NFTA_RULE_COMPAT_PROTO] = { .type = NLA_U32 }, - [NFTA_RULE_COMPAT_FLAGS] = { .type = NLA_U32 }, + [NFTA_RULE_COMPAT_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_RULE_COMPAT_F_MASK), }; static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index 09ac4f77e389..46b31d78abc6 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -159,7 +159,7 @@ static int nft_connlimit_obj_dump(struct sk_buff *skb, static const struct nla_policy nft_connlimit_policy[NFTA_CONNLIMIT_MAX + 1] = { [NFTA_CONNLIMIT_COUNT] = { .type = NLA_U32 }, - [NFTA_CONNLIMIT_FLAGS] = { .type = NLA_U32 }, + [NFTA_CONNLIMIT_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_CONNLIMIT_F_INV), }; static struct nft_object_type nft_connlimit_obj_type; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index a8fcb4b6ea1a..00dabd985883 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -338,7 +338,7 @@ static void nft_ct_set_eval(const struct nft_expr *expr, static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = { [NFTA_CT_DREG] = { .type = NLA_U32 }, [NFTA_CT_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), - [NFTA_CT_DIRECTION] = { .type = NLA_U8 }, + [NFTA_CT_DIRECTION] = NLA_POLICY_MAX(NLA_U8, IP_CT_DIR_REPLY), [NFTA_CT_SREG] = { .type = NLA_U32 }, }; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 57bf94ae8724..ee9d3e7b1ecf 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -163,7 +163,8 @@ static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = { [NFTA_DYNSET_SREG_DATA] = { .type = NLA_U32 }, [NFTA_DYNSET_TIMEOUT] = { .type = NLA_U64 }, [NFTA_DYNSET_EXPR] = { .type = NLA_NESTED }, - [NFTA_DYNSET_FLAGS] = { .type = NLA_U32 }, + [NFTA_DYNSET_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_DYNSET_F_INV | + NFT_DYNSET_F_EXPR), [NFTA_DYNSET_EXPRESSIONS] = { .type = NLA_NESTED }, }; diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 14d4ad7f518c..b997307d94f9 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -490,7 +490,7 @@ static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { [NFTA_EXTHDR_TYPE] = { .type = NLA_U8 }, [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 }, [NFTA_EXTHDR_LEN] = NLA_POLICY_MAX(NLA_BE32, 255), - [NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 }, + [NFTA_EXTHDR_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_EXTHDR_F_PRESENT), [NFTA_EXTHDR_OP] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_EXTHDR_SREG] = { .type = NLA_U32 }, }; diff --git a/net/netfilter/nft_inner.c b/net/netfilter/nft_inner.c index c4569d4b9228..03ffb1159fc1 100644 --- a/net/netfilter/nft_inner.c +++ b/net/netfilter/nft_inner.c @@ -321,7 +321,7 @@ err: static const struct nla_policy nft_inner_policy[NFTA_INNER_MAX + 1] = { [NFTA_INNER_NUM] = { .type = NLA_U32 }, - [NFTA_INNER_FLAGS] = { .type = NLA_U32 }, + [NFTA_INNER_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_INNER_MASK), [NFTA_INNER_HDRSIZE] = { .type = NLA_U32 }, [NFTA_INNER_TYPE] = { .type = NLA_U32 }, [NFTA_INNER_EXPR] = { .type = NLA_NESTED }, diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index f6830621c471..167d99b1447f 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -189,7 +189,7 @@ static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = { [NFTA_LIMIT_UNIT] = { .type = NLA_U64 }, [NFTA_LIMIT_BURST] = { .type = NLA_U32 }, [NFTA_LIMIT_TYPE] = { .type = NLA_U32 }, - [NFTA_LIMIT_FLAGS] = { .type = NLA_U32 }, + [NFTA_LIMIT_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_LIMIT_F_INV), }; static int nft_limit_pkts_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index da0c0d1c9cea..0d868eea6257 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -69,7 +69,7 @@ static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = { [NFTA_LOG_SNAPLEN] = { .type = NLA_U32 }, [NFTA_LOG_QTHRESHOLD] = { .type = NLA_U16 }, [NFTA_LOG_LEVEL] = { .type = NLA_U32 }, - [NFTA_LOG_FLAGS] = { .type = NLA_U32 }, + [NFTA_LOG_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NF_LOG_MASK), }; static int nft_log_modprobe(struct net *net, enum nf_log_type t) diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index 39ccd67ed265..b2f44bc6bd3f 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -14,7 +14,7 @@ struct nft_osf { static const struct nla_policy nft_osf_policy[NFTA_OSF_MAX + 1] = { [NFTA_OSF_DREG] = { .type = NLA_U32 }, [NFTA_OSF_TTL] = { .type = NLA_U8 }, - [NFTA_OSF_FLAGS] = { .type = NLA_U32 }, + [NFTA_OSF_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_OSF_F_VERSION), }; static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs, diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 973d56af03ff..91b62083d942 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -216,7 +216,7 @@ static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { [NFTA_PAYLOAD_LEN] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 }, [NFTA_PAYLOAD_CSUM_OFFSET] = NLA_POLICY_MAX(NLA_BE32, 255), - [NFTA_PAYLOAD_CSUM_FLAGS] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_CSUM_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_PAYLOAD_L4CSUM_PSEUDOHDR), }; static int nft_payload_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index 8eb13a02942e..b83d209db886 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -95,7 +95,7 @@ static int nft_queue_validate(const struct nft_ctx *ctx, static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = { [NFTA_QUEUE_NUM] = { .type = NLA_U16 }, [NFTA_QUEUE_TOTAL] = { .type = NLA_U16 }, - [NFTA_QUEUE_FLAGS] = { .type = NLA_U16 }, + [NFTA_QUEUE_FLAGS] = NLA_POLICY_MASK(NLA_BE16, NFT_QUEUE_FLAG_MASK), [NFTA_QUEUE_SREG_QNUM] = { .type = NLA_U32 }, }; diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 3be788e5223c..6ed7c4409706 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -46,7 +46,7 @@ static inline void nft_quota_do_eval(struct nft_quota *priv, static const struct nla_policy nft_quota_policy[NFTA_QUOTA_MAX + 1] = { [NFTA_QUOTA_BYTES] = { .type = NLA_U64 }, - [NFTA_QUOTA_FLAGS] = { .type = NLA_U32 }, + [NFTA_QUOTA_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_QUOTA_F_INV), [NFTA_QUOTA_CONSUMED] = { .type = NLA_U64 }, }; diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c index 8e452a874969..7641f249614c 100644 --- a/net/netfilter/nft_synproxy.c +++ b/net/netfilter/nft_synproxy.c @@ -17,8 +17,8 @@ struct nft_synproxy { static const struct nla_policy nft_synproxy_policy[NFTA_SYNPROXY_MAX + 1] = { [NFTA_SYNPROXY_MSS] = { .type = NLA_U16 }, - [NFTA_SYNPROXY_WSCALE] = { .type = NLA_U8 }, - [NFTA_SYNPROXY_FLAGS] = { .type = NLA_U32 }, + [NFTA_SYNPROXY_WSCALE] = NLA_POLICY_MAX(NLA_U8, TCP_MAX_WSCALE), + [NFTA_SYNPROXY_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NF_SYNPROXY_OPT_MASK), }; static void nft_synproxy_tcp_options(struct synproxy_options *opts, diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index f5cadba91417..65d06300f48a 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -68,7 +68,7 @@ static void nft_tunnel_get_eval(const struct nft_expr *expr, static const struct nla_policy nft_tunnel_policy[NFTA_TUNNEL_MAX + 1] = { [NFTA_TUNNEL_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_TUNNEL_DREG] = { .type = NLA_U32 }, - [NFTA_TUNNEL_MODE] = NLA_POLICY_MAX(NLA_BE32, 255), + [NFTA_TUNNEL_MODE] = NLA_POLICY_MAX(NLA_BE32, NFT_TUNNEL_MODE_MAX), }; static int nft_tunnel_get_init(const struct nft_ctx *ctx, @@ -408,7 +408,7 @@ static const struct nla_policy nft_tunnel_key_policy[NFTA_TUNNEL_KEY_MAX + 1] = [NFTA_TUNNEL_KEY_IP] = { .type = NLA_NESTED, }, [NFTA_TUNNEL_KEY_IP6] = { .type = NLA_NESTED, }, [NFTA_TUNNEL_KEY_ID] = { .type = NLA_U32, }, - [NFTA_TUNNEL_KEY_FLAGS] = { .type = NLA_U32, }, + [NFTA_TUNNEL_KEY_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_TUNNEL_F_MASK), [NFTA_TUNNEL_KEY_TOS] = { .type = NLA_U8, }, [NFTA_TUNNEL_KEY_TTL] = { .type = NLA_U8, }, [NFTA_TUNNEL_KEY_SPORT] = { .type = NLA_U16, }, diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c index 7ffe6a2690d1..6858cd2d16a4 100644 --- a/net/netfilter/nft_xfrm.c +++ b/net/netfilter/nft_xfrm.c @@ -17,8 +17,8 @@ static const struct nla_policy nft_xfrm_policy[NFTA_XFRM_MAX + 1] = { [NFTA_XFRM_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), - [NFTA_XFRM_DIR] = { .type = NLA_U8 }, - [NFTA_XFRM_SPNUM] = NLA_POLICY_MAX(NLA_BE32, 255), + [NFTA_XFRM_DIR] = NLA_POLICY_MAX(NLA_U8, XFRM_POLICY_OUT), + [NFTA_XFRM_SPNUM] = NLA_POLICY_MAX(NLA_BE32, XFRM_MAX_DEPTH - 1), [NFTA_XFRM_DREG] = { .type = NLA_U32 }, }; From 8e57338c3601d0cde806bd7e70c377109106c983 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 13 Mar 2026 13:12:30 +0100 Subject: [PATCH 1237/1561] netfilter: nf_tables: add netlink policy based cap on registers Should have no effect in practice; all of these use the nft_parse_register_load/store apis which is mandatory anyway due to the need to further validate the register load/store, e.g. that the size argument doesn't result in out-of-bounds load/store. OTOH this is a simple method to reject obviously wrong input at earlier stage. Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter/nf_tables.h | 4 ++++ net/netfilter/nft_bitwise.c | 6 +++--- net/netfilter/nft_byteorder.c | 4 ++-- net/netfilter/nft_cmp.c | 2 +- net/netfilter/nft_ct.c | 4 ++-- net/netfilter/nft_exthdr.c | 4 ++-- net/netfilter/nft_fib.c | 2 +- net/netfilter/nft_hash.c | 4 ++-- net/netfilter/nft_immediate.c | 2 +- net/netfilter/nft_lookup.c | 4 ++-- net/netfilter/nft_meta.c | 4 ++-- net/netfilter/nft_numgen.c | 2 +- net/netfilter/nft_objref.c | 2 +- net/netfilter/nft_osf.c | 2 +- net/netfilter/nft_payload.c | 4 ++-- net/netfilter/nft_range.c | 2 +- net/netfilter/nft_rt.c | 2 +- net/netfilter/nft_socket.c | 2 +- net/netfilter/nft_tunnel.c | 2 +- net/netfilter/nft_xfrm.c | 2 +- 20 files changed, 32 insertions(+), 28 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index dca9e72b0558..0b708153469c 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -46,6 +46,10 @@ enum nft_registers { }; #define NFT_REG_MAX (__NFT_REG_MAX - 1) +#ifdef __KERNEL__ +#define NFT_REG32_MAX NFT_REG32_15 +#endif + #define NFT_REG_SIZE 16 #define NFT_REG32_SIZE 4 #define NFT_REG32_COUNT (NFT_REG32_15 - NFT_REG32_00 + 1) diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index a4ff781f334d..13808e9cd999 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -125,9 +125,9 @@ void nft_bitwise_eval(const struct nft_expr *expr, } static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = { - [NFTA_BITWISE_SREG] = { .type = NLA_U32 }, - [NFTA_BITWISE_SREG2] = { .type = NLA_U32 }, - [NFTA_BITWISE_DREG] = { .type = NLA_U32 }, + [NFTA_BITWISE_SREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), + [NFTA_BITWISE_SREG2] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), + [NFTA_BITWISE_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_BITWISE_LEN] = { .type = NLA_U32 }, [NFTA_BITWISE_MASK] = { .type = NLA_NESTED }, [NFTA_BITWISE_XOR] = { .type = NLA_NESTED }, diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index 744878773dac..e00dddfa2fc0 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -87,8 +87,8 @@ void nft_byteorder_eval(const struct nft_expr *expr, } static const struct nla_policy nft_byteorder_policy[NFTA_BYTEORDER_MAX + 1] = { - [NFTA_BYTEORDER_SREG] = { .type = NLA_U32 }, - [NFTA_BYTEORDER_DREG] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_SREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), + [NFTA_BYTEORDER_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_BYTEORDER_OP] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_BYTEORDER_LEN] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_BYTEORDER_SIZE] = NLA_POLICY_MAX(NLA_BE32, 255), diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index b61dc9c3383e..e085c2a00b70 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -64,7 +64,7 @@ mismatch: } static const struct nla_policy nft_cmp_policy[NFTA_CMP_MAX + 1] = { - [NFTA_CMP_SREG] = { .type = NLA_U32 }, + [NFTA_CMP_SREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_CMP_OP] = { .type = NLA_U32 }, [NFTA_CMP_DATA] = { .type = NLA_NESTED }, }; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 00dabd985883..afa7142c529a 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -336,10 +336,10 @@ static void nft_ct_set_eval(const struct nft_expr *expr, } static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = { - [NFTA_CT_DREG] = { .type = NLA_U32 }, + [NFTA_CT_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_CT_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_CT_DIRECTION] = NLA_POLICY_MAX(NLA_U8, IP_CT_DIR_REPLY), - [NFTA_CT_SREG] = { .type = NLA_U32 }, + [NFTA_CT_SREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), }; #ifdef CONFIG_NF_CONNTRACK_ZONES diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index b997307d94f9..0407d6f708ae 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -486,13 +486,13 @@ err: #endif static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { - [NFTA_EXTHDR_DREG] = { .type = NLA_U32 }, + [NFTA_EXTHDR_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_EXTHDR_TYPE] = { .type = NLA_U8 }, [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 }, [NFTA_EXTHDR_LEN] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_EXTHDR_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_EXTHDR_F_PRESENT), [NFTA_EXTHDR_OP] = NLA_POLICY_MAX(NLA_BE32, 255), - [NFTA_EXTHDR_SREG] = { .type = NLA_U32 }, + [NFTA_EXTHDR_SREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), }; static int nft_exthdr_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index f7dc0e54375f..327a5f33659c 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -19,7 +19,7 @@ NFTA_FIB_F_PRESENT) const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = { - [NFTA_FIB_DREG] = { .type = NLA_U32 }, + [NFTA_FIB_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_FIB_RESULT] = { .type = NLA_U32 }, [NFTA_FIB_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFTA_FIB_F_ALL), diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 1cf41e0a0e0c..3bacc9b53789 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -58,8 +58,8 @@ static void nft_symhash_eval(const struct nft_expr *expr, } static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { - [NFTA_HASH_SREG] = { .type = NLA_U32 }, - [NFTA_HASH_DREG] = { .type = NLA_U32 }, + [NFTA_HASH_SREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), + [NFTA_HASH_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_HASH_LEN] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_HASH_MODULUS] = { .type = NLA_U32 }, [NFTA_HASH_SEED] = { .type = NLA_U32 }, diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 37c29947b380..1b733c7b1b0e 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -25,7 +25,7 @@ void nft_immediate_eval(const struct nft_expr *expr, } static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = { - [NFTA_IMMEDIATE_DREG] = { .type = NLA_U32 }, + [NFTA_IMMEDIATE_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_IMMEDIATE_DATA] = { .type = NLA_NESTED }, }; diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index e4e619027542..9fafe5afc490 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -125,8 +125,8 @@ static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = { [NFTA_LOOKUP_SET] = { .type = NLA_STRING, .len = NFT_SET_MAXNAMELEN - 1 }, [NFTA_LOOKUP_SET_ID] = { .type = NLA_U32 }, - [NFTA_LOOKUP_SREG] = { .type = NLA_U32 }, - [NFTA_LOOKUP_DREG] = { .type = NLA_U32 }, + [NFTA_LOOKUP_SREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), + [NFTA_LOOKUP_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_LOOKUP_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_LOOKUP_F_INV), }; diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index d0df6cf374d1..7478063339d4 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -460,9 +460,9 @@ void nft_meta_set_eval(const struct nft_expr *expr, EXPORT_SYMBOL_GPL(nft_meta_set_eval); const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { - [NFTA_META_DREG] = { .type = NLA_U32 }, + [NFTA_META_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_META_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), - [NFTA_META_SREG] = { .type = NLA_U32 }, + [NFTA_META_SREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), }; EXPORT_SYMBOL_GPL(nft_meta_policy); diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 4d69b3399195..b0c802370159 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -43,7 +43,7 @@ static void nft_ng_inc_eval(const struct nft_expr *expr, } static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = { - [NFTA_NG_DREG] = { .type = NLA_U32 }, + [NFTA_NG_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_NG_MODULUS] = { .type = NLA_U32 }, [NFTA_NG_TYPE] = { .type = NLA_U32 }, [NFTA_NG_OFFSET] = { .type = NLA_U32 }, diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 633cce69568f..249ded517446 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -265,7 +265,7 @@ static const struct nla_policy nft_objref_policy[NFTA_OBJREF_MAX + 1] = { [NFTA_OBJREF_IMM_NAME] = { .type = NLA_STRING, .len = NFT_OBJ_MAXNAMELEN - 1 }, [NFTA_OBJREF_IMM_TYPE] = { .type = NLA_U32 }, - [NFTA_OBJREF_SET_SREG] = { .type = NLA_U32 }, + [NFTA_OBJREF_SET_SREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_OBJREF_SET_NAME] = { .type = NLA_STRING, .len = NFT_SET_MAXNAMELEN - 1 }, [NFTA_OBJREF_SET_ID] = { .type = NLA_U32 }, diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index b2f44bc6bd3f..18003433476c 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -12,7 +12,7 @@ struct nft_osf { }; static const struct nla_policy nft_osf_policy[NFTA_OSF_MAX + 1] = { - [NFTA_OSF_DREG] = { .type = NLA_U32 }, + [NFTA_OSF_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_OSF_TTL] = { .type = NLA_U8 }, [NFTA_OSF_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_OSF_F_VERSION), }; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 91b62083d942..3fa3c6c835be 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -209,8 +209,8 @@ err: } static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { - [NFTA_PAYLOAD_SREG] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_SREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), + [NFTA_PAYLOAD_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, [NFTA_PAYLOAD_OFFSET] = { .type = NLA_BE32 }, [NFTA_PAYLOAD_LEN] = NLA_POLICY_MAX(NLA_BE32, 255), diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index cbb02644b836..f8a1641afccf 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -41,7 +41,7 @@ void nft_range_eval(const struct nft_expr *expr, } static const struct nla_policy nft_range_policy[NFTA_RANGE_MAX + 1] = { - [NFTA_RANGE_SREG] = { .type = NLA_U32 }, + [NFTA_RANGE_SREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_RANGE_OP] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_RANGE_FROM_DATA] = { .type = NLA_NESTED }, [NFTA_RANGE_TO_DATA] = { .type = NLA_NESTED }, diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index ad527f3596c0..e23cd4759851 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -103,7 +103,7 @@ err: } static const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = { - [NFTA_RT_DREG] = { .type = NLA_U32 }, + [NFTA_RT_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_RT_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), }; diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index c55a1310226a..a146a45d7531 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -163,7 +163,7 @@ out_put_sk: static const struct nla_policy nft_socket_policy[NFTA_SOCKET_MAX + 1] = { [NFTA_SOCKET_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), - [NFTA_SOCKET_DREG] = { .type = NLA_U32 }, + [NFTA_SOCKET_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_SOCKET_LEVEL] = NLA_POLICY_MAX(NLA_BE32, 255), }; diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 65d06300f48a..0b987bc2132a 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -67,7 +67,7 @@ static void nft_tunnel_get_eval(const struct nft_expr *expr, static const struct nla_policy nft_tunnel_policy[NFTA_TUNNEL_MAX + 1] = { [NFTA_TUNNEL_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), - [NFTA_TUNNEL_DREG] = { .type = NLA_U32 }, + [NFTA_TUNNEL_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_TUNNEL_MODE] = NLA_POLICY_MAX(NLA_BE32, NFT_TUNNEL_MODE_MAX), }; diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c index 6858cd2d16a4..65a75d88e5f0 100644 --- a/net/netfilter/nft_xfrm.c +++ b/net/netfilter/nft_xfrm.c @@ -19,7 +19,7 @@ static const struct nla_policy nft_xfrm_policy[NFTA_XFRM_MAX + 1] = { [NFTA_XFRM_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_XFRM_DIR] = NLA_POLICY_MAX(NLA_U8, XFRM_POLICY_OUT), [NFTA_XFRM_SPNUM] = NLA_POLICY_MAX(NLA_BE32, XFRM_MAX_DEPTH - 1), - [NFTA_XFRM_DREG] = { .type = NLA_U32 }, + [NFTA_XFRM_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), }; struct nft_xfrm { From 04e1ca21a5e3f84595c546b481b7bc2b5c3c5fbd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 17 Mar 2026 15:50:08 +0100 Subject: [PATCH 1238/1561] netfilter: nft_set_pipapo: increment data in one step Since commit e807b13cb3e3 ("nft_set_pipapo: Generalise group size for buckets") there is no longer a need to increment the data pointer in two steps. Switch to a single invocation of NFT_PIPAPO_GROUPS_PADDED_SIZE() helper, like the avx2 implementation. [ Stefano: Improve commit message ] Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.c | 4 +--- net/netfilter/nft_set_pipapo.h | 3 --- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 7fd24e0cc428..50d4a4f04309 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -452,8 +452,6 @@ static struct nft_pipapo_elem *pipapo_get_slow(const struct nft_pipapo_match *m, pipapo_and_field_buckets_4bit(f, res_map, data); NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4; - data += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f); - /* Now populate the bitmap for the next field, unless this is * the last field, in which case return the matched 'ext' * pointer if any. @@ -498,7 +496,7 @@ next_match: map_index = !map_index; swap(res_map, fill_map); - data += NFT_PIPAPO_GROUPS_PADDING(f); + data += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); } __local_unlock_nested_bh(&scratch->bh_lock); diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 9aee9a9eaeb7..b82abb03576e 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -42,9 +42,6 @@ /* Fields are padded to 32 bits in input registers */ #define NFT_PIPAPO_GROUPS_PADDED_SIZE(f) \ (round_up((f)->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f), sizeof(u32))) -#define NFT_PIPAPO_GROUPS_PADDING(f) \ - (NFT_PIPAPO_GROUPS_PADDED_SIZE(f) - (f)->groups / \ - NFT_PIPAPO_GROUPS_PER_BYTE(f)) /* Number of buckets given by 2 ^ n, with n bucket bits */ #define NFT_PIPAPO_BUCKETS(bb) (1 << (bb)) From a3f1e6a19a5dccb7a29d941b3acee0a3974974f4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 17 Mar 2026 15:09:18 +0100 Subject: [PATCH 1239/1561] netfilter: nft_set_pipapo_avx2: remove redundant loop in lookup_slow nft_pipapo_avx2_lookup_slow will never be used in reality, because the common sizes are handled by avx2 optimized versions. However, nft_pipapo_avx2_lookup_slow loops over the data just like the avx2 functions. However, _slow doesn't need to do that. As-is, first loop sets all the right result bits and the next iterations boil down to 'x = x & x'. Remove the loop. Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo_avx2.c | 32 ++++++++--------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 6395982e4d95..dad265807b8b 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -1041,7 +1041,6 @@ nothing: * @map: Previous match result, used as initial bitmap * @fill: Destination bitmap to be filled with current match result * @f: Field, containing lookup and mapping tables - * @offset: Ignore buckets before the given index, no bits are filled there * @pkt: Packet data, pointer to input nftables register * @first: If this is the first field, don't source previous result * @last: Last field: stop at the first match and return bit index @@ -1056,32 +1055,19 @@ nothing: static int nft_pipapo_avx2_lookup_slow(const struct nft_pipapo_match *mdata, unsigned long *map, unsigned long *fill, const struct nft_pipapo_field *f, - int offset, const u8 *pkt, + const u8 *pkt, bool first, bool last) { - unsigned long bsize = f->bsize; - int i, ret = -1, b; - if (first) pipapo_resmap_init(mdata, map); - for (i = offset; i < bsize; i++) { - if (f->bb == 8) - pipapo_and_field_buckets_8bit(f, map, pkt); - else - pipapo_and_field_buckets_4bit(f, map, pkt); - NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4; + if (f->bb == 8) + pipapo_and_field_buckets_8bit(f, map, pkt); + else + pipapo_and_field_buckets_4bit(f, map, pkt); + NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4; - b = pipapo_refill(map, bsize, f->rules, fill, f->mt, last); - - if (last) - return b; - - if (ret == -1) - ret = b / XSAVE_YMM_SIZE; - } - - return ret; + return pipapo_refill(map, f->bsize, f->rules, fill, f->mt, last); } /** @@ -1201,7 +1187,7 @@ struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m, NFT_SET_PIPAPO_AVX2_LOOKUP(8, 16); } else { ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f, - ret, data, + data, first, last); } } else { @@ -1217,7 +1203,7 @@ struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m, NFT_SET_PIPAPO_AVX2_LOOKUP(4, 32); } else { ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f, - ret, data, + data, first, last); } } From 3785091c6c16a1ce4a5e0460881fc81ed8d2c8a1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 22 Mar 2026 23:51:47 +0100 Subject: [PATCH 1240/1561] netfilter: nft_meta: add double-tagged vlan and pppoe support Currently: add rule netdev x y ip saddr 1.1.1.1 does not work with neither double-tagged vlan nor pppoe packets. This is because the network and transport header offset are not pointing to the IP and transport protocol headers in the stack. This patch expands NFT_META_PROTOCOL and NFT_META_L4PROTO to parse double-tagged vlan and pppoe packets so matching network and transport header fields becomes possible with the existing userspace generated bytecode. Note that this parser only supports double-tagged vlan which is composed of vlan offload + vlan header in the skb payload area for simplicity. NFT_META_PROTOCOL is used by bridge and netdev family as an implicit dependency in the bytecode to match on network header fields. Similarly, there is also NFT_META_L4PROTO, which is also used as an implicit dependency when matching on the transport protocol header fields. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- include/net/netfilter/nf_tables.h | 4 ++ include/net/netfilter/nf_tables_ipv4.h | 17 +++++--- include/net/netfilter/nf_tables_ipv6.h | 16 +++++--- net/netfilter/nf_tables_core.c | 2 +- net/netfilter/nft_meta.c | 54 +++++++++++++++++++++++++- net/netfilter/nft_payload.c | 2 +- 6 files changed, 82 insertions(+), 13 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 66cedcfa338e..2c0173d9309c 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -31,7 +31,9 @@ struct nft_pktinfo { const struct nf_hook_state *state; u8 flags; u8 tprot; + __be16 ethertype; u16 fragoff; + u16 nhoff; u16 thoff; u16 inneroff; }; @@ -83,6 +85,8 @@ static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt) { pkt->flags = 0; pkt->tprot = 0; + pkt->ethertype = pkt->skb->protocol; + pkt->nhoff = 0; pkt->thoff = 0; pkt->fragoff = 0; } diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index fcf967286e37..e715405a73cb 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -12,16 +12,19 @@ static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt) ip = ip_hdr(pkt->skb); pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = ip->protocol; + pkt->ethertype = pkt->skb->protocol; + pkt->nhoff = 0; pkt->thoff = ip_hdrlen(pkt->skb); pkt->fragoff = ntohs(ip->frag_off) & IP_OFFSET; } -static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) +static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, + int nhoff) { struct iphdr *iph, _iph; u32 len, thoff, skb_len; - iph = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb), + iph = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb) + nhoff, sizeof(*iph), &_iph); if (!iph) return -1; @@ -31,7 +34,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) len = iph_totlen(pkt->skb, iph); thoff = iph->ihl * 4; - skb_len = pkt->skb->len - skb_network_offset(pkt->skb); + skb_len = pkt->skb->len - skb_network_offset(pkt->skb) - nhoff; if (skb_len < len) return -1; @@ -42,7 +45,9 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = iph->protocol; - pkt->thoff = skb_network_offset(pkt->skb) + thoff; + pkt->ethertype = pkt->skb->protocol; + pkt->nhoff = nhoff; + pkt->thoff = skb_network_offset(pkt->skb) + nhoff + thoff; pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET; return 0; @@ -50,7 +55,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) static inline void nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) { - if (__nft_set_pktinfo_ipv4_validate(pkt) < 0) + if (__nft_set_pktinfo_ipv4_validate(pkt, 0) < 0) nft_set_pktinfo_unspec(pkt); } @@ -78,6 +83,8 @@ static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt) } pkt->flags = NFT_PKTINFO_L4PROTO; + pkt->ethertype = pkt->skb->protocol; + pkt->nhoff = 0; pkt->tprot = iph->protocol; pkt->thoff = thoff; pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET; diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index c53ac00bb974..d7b8c559b795 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -20,21 +20,23 @@ static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt) pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = protohdr; + pkt->ethertype = pkt->skb->protocol; + pkt->nhoff = 0; pkt->thoff = thoff; pkt->fragoff = frag_off; } -static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) +static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, int nhoff) { #if IS_ENABLED(CONFIG_IPV6) unsigned int flags = IP6_FH_F_AUTH; struct ipv6hdr *ip6h, _ip6h; - unsigned int thoff = 0; + unsigned int thoff = nhoff; unsigned short frag_off; u32 pkt_len, skb_len; int protohdr; - ip6h = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb), + ip6h = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb) + nhoff, sizeof(*ip6h), &_ip6h); if (!ip6h) return -1; @@ -43,7 +45,7 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) return -1; pkt_len = ipv6_payload_len(pkt->skb, ip6h); - skb_len = pkt->skb->len - skb_network_offset(pkt->skb); + skb_len = pkt->skb->len - skb_network_offset(pkt->skb) - nhoff; if (pkt_len + sizeof(*ip6h) > skb_len) return -1; @@ -53,6 +55,8 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = protohdr; + pkt->ethertype = pkt->skb->protocol; + pkt->nhoff = nhoff; pkt->thoff = thoff; pkt->fragoff = frag_off; @@ -64,7 +68,7 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) static inline void nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) { - if (__nft_set_pktinfo_ipv6_validate(pkt) < 0) + if (__nft_set_pktinfo_ipv6_validate(pkt, 0) < 0) nft_set_pktinfo_unspec(pkt); } @@ -99,6 +103,8 @@ static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt) pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = protohdr; + pkt->ethertype = pkt->skb->protocol; + pkt->nhoff = 0; pkt->thoff = thoff; pkt->fragoff = frag_off; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 6557a4018c09..5ddd5b6e135f 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -151,7 +151,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, unsigned char *ptr; if (priv->base == NFT_PAYLOAD_NETWORK_HEADER) - ptr = skb_network_header(skb); + ptr = skb_network_header(skb) + pkt->nhoff; else { if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) return false; diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 7478063339d4..5b25851381e5 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -23,6 +23,8 @@ #include /* for TCP_TIME_WAIT */ #include #include +#include +#include #include #include @@ -309,6 +311,54 @@ nft_meta_get_eval_sdifname(u32 *dest, const struct nft_pktinfo *pkt) nft_meta_store_ifname(dest, dev); } +static void nft_meta_pktinfo_may_update(struct nft_pktinfo *pkt) +{ + struct sk_buff *skb = pkt->skb; + struct vlan_ethhdr *veth; + __be16 ethertype; + int nhoff; + + /* Is this an IP packet? Then, skip. */ + if (pkt->flags) + return; + + /* ... else maybe an IP packet over PPPoE or Q-in-Q? */ + switch (skb->protocol) { + case htons(ETH_P_8021Q): + if (!pskb_may_pull(skb, skb_mac_offset(skb) + sizeof(*veth))) + return; + + veth = (struct vlan_ethhdr *)skb_mac_header(skb); + nhoff = VLAN_HLEN; + ethertype = veth->h_vlan_encapsulated_proto; + break; + case htons(ETH_P_PPP_SES): + if (!nf_flow_pppoe_proto(skb, ðertype)) + return; + + nhoff = PPPOE_SES_HLEN; + break; + default: + return; + } + + nhoff += skb_network_offset(skb); + switch (ethertype) { + case htons(ETH_P_IP): + if (__nft_set_pktinfo_ipv4_validate(pkt, nhoff)) + nft_set_pktinfo_unspec(pkt); + break; + case htons(ETH_P_IPV6): + if (__nft_set_pktinfo_ipv6_validate(pkt, nhoff)) + nft_set_pktinfo_unspec(pkt); + break; + default: + break; + } + + pkt->ethertype = ethertype; +} + void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -322,12 +372,14 @@ void nft_meta_get_eval(const struct nft_expr *expr, *dest = skb->len; break; case NFT_META_PROTOCOL: - nft_reg_store16(dest, (__force u16)skb->protocol); + nft_meta_pktinfo_may_update((struct nft_pktinfo *)pkt); + nft_reg_store16(dest, (__force u16)pkt->ethertype); break; case NFT_META_NFPROTO: nft_reg_store8(dest, nft_pf(pkt)); break; case NFT_META_L4PROTO: + nft_meta_pktinfo_may_update((struct nft_pktinfo *)pkt); if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) goto err; nft_reg_store8(dest, pkt->tprot); diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 3fa3c6c835be..01e13e5255a9 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -183,7 +183,7 @@ void nft_payload_eval(const struct nft_expr *expr, offset = skb_mac_header(skb) - skb->data; break; case NFT_PAYLOAD_NETWORK_HEADER: - offset = skb_network_offset(skb); + offset = skb_network_offset(skb) + pkt->nhoff; break; case NFT_PAYLOAD_TRANSPORT_HEADER: if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff) From f33fad8dbfff7c35c22abb3a7305173d005ac362 Mon Sep 17 00:00:00 2001 From: David Laight Date: Thu, 26 Mar 2026 20:18:19 +0000 Subject: [PATCH 1241/1561] netfilter: nf_conntrack_h323: Correct indentation when H323_TRACE defined The trace lines are indented using PRINT("%*.s", xx, " "). Userspace will treat this as "%*.0s" and will output no characters when 'xx' is zero, the kernel treats it as "%*s" and will output a single ' ' - which is probably what is intended. Change all the formats to "%*s" removing the default precision. This gives a single space indent when level is zero. Signed-off-by: David Laight Reviewed-by: Petr Mladek Signed-off-by: Florian Westphal --- net/netfilter/nf_conntrack_h323_asn1.c | 38 +++++++++++++------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 09e0f724644f..6830c9da3507 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -274,7 +274,7 @@ static unsigned int get_uint(struct bitstr *bs, int b) static int decode_nul(struct bitstr *bs, const struct field_t *f, char *base, int level) { - PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s\n", level * TAB_SIZE, " ", f->name); return H323_ERROR_NONE; } @@ -282,7 +282,7 @@ static int decode_nul(struct bitstr *bs, const struct field_t *f, static int decode_bool(struct bitstr *bs, const struct field_t *f, char *base, int level) { - PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s\n", level * TAB_SIZE, " ", f->name); INC_BIT(bs); if (nf_h323_error_boundary(bs, 0, 0)) @@ -295,7 +295,7 @@ static int decode_oid(struct bitstr *bs, const struct field_t *f, { int len; - PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s\n", level * TAB_SIZE, " ", f->name); BYTE_ALIGN(bs); if (nf_h323_error_boundary(bs, 1, 0)) @@ -314,7 +314,7 @@ static int decode_int(struct bitstr *bs, const struct field_t *f, { unsigned int len; - PRINT("%*.s%s", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s", level * TAB_SIZE, " ", f->name); switch (f->sz) { case BYTE: /* Range == 256 */ @@ -361,7 +361,7 @@ static int decode_int(struct bitstr *bs, const struct field_t *f, static int decode_enum(struct bitstr *bs, const struct field_t *f, char *base, int level) { - PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s\n", level * TAB_SIZE, " ", f->name); if ((f->attr & EXT) && get_bit(bs)) { INC_BITS(bs, 7); @@ -379,7 +379,7 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f, { unsigned int len; - PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s\n", level * TAB_SIZE, " ", f->name); BYTE_ALIGN(bs); switch (f->sz) { @@ -415,7 +415,7 @@ static int decode_numstr(struct bitstr *bs, const struct field_t *f, { unsigned int len; - PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s\n", level * TAB_SIZE, " ", f->name); /* 2 <= Range <= 255 */ if (nf_h323_error_boundary(bs, 0, f->sz)) @@ -435,7 +435,7 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f, { unsigned int len; - PRINT("%*.s%s", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s", level * TAB_SIZE, " ", f->name); switch (f->sz) { case FIXD: /* Range == 1 */ @@ -483,7 +483,7 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, { unsigned int len; - PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s\n", level * TAB_SIZE, " ", f->name); switch (f->sz) { case BYTE: /* Range == 256 */ @@ -515,7 +515,7 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, const struct field_t *son; unsigned char *beg = NULL; - PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s\n", level * TAB_SIZE, " ", f->name); /* Decode? */ base = (base && (f->attr & DECODE)) ? base + f->offset : NULL; @@ -537,7 +537,7 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, /* Decode the root components */ for (i = opt = 0, son = f->fields; i < f->lb; i++, son++) { if (son->attr & STOP) { - PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", + PRINT("%*s%s\n", (level + 1) * TAB_SIZE, " ", son->name); return H323_ERROR_STOP; } @@ -555,7 +555,7 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, if (nf_h323_error_boundary(bs, len, 0)) return H323_ERROR_BOUND; if (!base || !(son->attr & DECODE)) { - PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, + PRINT("%*s%s\n", (level + 1) * TAB_SIZE, " ", son->name); bs->cur += len; continue; @@ -608,7 +608,7 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, } if (son->attr & STOP) { - PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", + PRINT("%*s%s\n", (level + 1) * TAB_SIZE, " ", son->name); return H323_ERROR_STOP; } @@ -622,7 +622,7 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, if (nf_h323_error_boundary(bs, len, 0)) return H323_ERROR_BOUND; if (!base || !(son->attr & DECODE)) { - PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", + PRINT("%*s%s\n", (level + 1) * TAB_SIZE, " ", son->name); bs->cur += len; continue; @@ -648,7 +648,7 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f, const struct field_t *son; unsigned char *beg = NULL; - PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s\n", level * TAB_SIZE, " ", f->name); /* Decode? */ base = (base && (f->attr & DECODE)) ? base + f->offset : NULL; @@ -703,7 +703,7 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f, if (nf_h323_error_boundary(bs, len, 0)) return H323_ERROR_BOUND; if (!base || !(son->attr & DECODE)) { - PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, + PRINT("%*s%s\n", (level + 1) * TAB_SIZE, " ", son->name); bs->cur += len; continue; @@ -744,7 +744,7 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f, const struct field_t *son; unsigned char *beg = NULL; - PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s\n", level * TAB_SIZE, " ", f->name); /* Decode? */ base = (base && (f->attr & DECODE)) ? base + f->offset : NULL; @@ -785,7 +785,7 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f, /* Transfer to son level */ son = &f->fields[type]; if (son->attr & STOP) { - PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", son->name); + PRINT("%*s%s\n", (level + 1) * TAB_SIZE, " ", son->name); return H323_ERROR_STOP; } @@ -797,7 +797,7 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f, if (nf_h323_error_boundary(bs, len, 0)) return H323_ERROR_BOUND; if (!base || !(son->attr & DECODE)) { - PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", + PRINT("%*s%s\n", (level + 1) * TAB_SIZE, " ", son->name); bs->cur += len; return H323_ERROR_NONE; From c6f85577584b5f8414141ae389e974b8ca6a698b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 30 Mar 2026 11:04:02 +0200 Subject: [PATCH 1242/1561] netfilter: nf_tables_offload: add nft_flow_action_entry_next() and use it Add a new helper function to retrieve the next action entry in flow rule, check if the maximum number of actions is reached, bail out in such case. Replace existing opencoded iteration on the action array by this helper function. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- include/net/netfilter/nf_tables_offload.h | 10 ++++++++++ net/netfilter/nf_dup_netdev.c | 5 ++++- net/netfilter/nft_immediate.c | 4 +++- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h index 3568b6a2f5f0..14c427891ee6 100644 --- a/include/net/netfilter/nf_tables_offload.h +++ b/include/net/netfilter/nf_tables_offload.h @@ -67,6 +67,16 @@ struct nft_flow_rule { struct flow_rule *rule; }; +static inline struct flow_action_entry * +nft_flow_action_entry_next(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow) +{ + if (unlikely(ctx->num_actions >= flow->rule->action.num_entries)) + return NULL; + + return &flow->rule->action.entries[ctx->num_actions++]; +} + void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow, enum flow_dissector_key_id addr_type); diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index fab8b9011098..e348fb90b8dc 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -95,7 +95,10 @@ int nft_fwd_dup_netdev_offload(struct nft_offload_ctx *ctx, if (!dev) return -EOPNOTSUPP; - entry = &flow->rule->action.entries[ctx->num_actions++]; + entry = nft_flow_action_entry_next(ctx, flow); + if (!entry) + return -E2BIG; + entry->id = id; entry->dev = dev; diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 1b733c7b1b0e..d00eb2eb30e4 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -279,7 +279,9 @@ static int nft_immediate_offload_verdict(struct nft_offload_ctx *ctx, struct flow_action_entry *entry; const struct nft_data *data; - entry = &flow->rule->action.entries[ctx->num_actions++]; + entry = nft_flow_action_entry_next(ctx, flow); + if (!entry) + return -E2BIG; data = &priv->data; switch (data->verdict.code) { From 607363a10ee60cc46ae0168349f1f8c59045c371 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 7 Apr 2026 17:11:09 +0200 Subject: [PATCH 1243/1561] wifi: at76c50x: refactor endpoint lookup Use the common USB helper for looking up bulk and interrupt endpoints instead of open coding. Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260407151111.3187826-2-johan@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/atmel/at76c50x-usb.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/drivers/net/wireless/atmel/at76c50x-usb.c b/drivers/net/wireless/atmel/at76c50x-usb.c index 44b04ea3cc8b..32e3e09e7680 100644 --- a/drivers/net/wireless/atmel/at76c50x-usb.c +++ b/drivers/net/wireless/atmel/at76c50x-usb.c @@ -2226,34 +2226,20 @@ static struct at76_priv *at76_alloc_new_device(struct usb_device *udev) static int at76_alloc_urbs(struct at76_priv *priv, struct usb_interface *interface) { - struct usb_endpoint_descriptor *endpoint, *ep_in, *ep_out; - int i; + struct usb_endpoint_descriptor *ep_in, *ep_out; int buffer_size; struct usb_host_interface *iface_desc; + int ret; at76_dbg(DBG_PROC_ENTRY, "%s: ENTER", __func__); at76_dbg(DBG_URB, "%s: NumEndpoints %d ", __func__, interface->cur_altsetting->desc.bNumEndpoints); - ep_in = NULL; - ep_out = NULL; iface_desc = interface->cur_altsetting; - for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) { - endpoint = &iface_desc->endpoint[i].desc; - at76_dbg(DBG_URB, "%s: %d. endpoint: addr 0x%x attr 0x%x", - __func__, i, endpoint->bEndpointAddress, - endpoint->bmAttributes); - - if (!ep_in && usb_endpoint_is_bulk_in(endpoint)) - ep_in = endpoint; - - if (!ep_out && usb_endpoint_is_bulk_out(endpoint)) - ep_out = endpoint; - } - - if (!ep_in || !ep_out) { + ret = usb_find_common_endpoints(iface_desc, &ep_in, &ep_out, NULL, NULL); + if (ret) { dev_err(&interface->dev, "bulk endpoints missing\n"); return -ENXIO; } From c885e392aadb43227d3c677eab2941a2c31355ff Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 7 Apr 2026 17:11:10 +0200 Subject: [PATCH 1244/1561] wifi: libertas: refactor endpoint lookup Use the common USB helpers for looking up bulk and interrupt endpoints (and determining max packet size) instead of open coding. Note that the driver has an implicit max packet size check which is kept. Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260407151111.3187826-3-johan@kernel.org Signed-off-by: Johannes Berg --- .../net/wireless/marvell/libertas/if_usb.c | 39 +++++++++---------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas/if_usb.c b/drivers/net/wireless/marvell/libertas/if_usb.c index 176f2106bab6..4fae0e335136 100644 --- a/drivers/net/wireless/marvell/libertas/if_usb.c +++ b/drivers/net/wireless/marvell/libertas/if_usb.c @@ -193,13 +193,12 @@ static void if_usb_reset_olpc_card(struct lbs_private *priv) static int if_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { + struct usb_endpoint_descriptor *ep_in, *ep_out; struct usb_device *udev; struct usb_host_interface *iface_desc; - struct usb_endpoint_descriptor *endpoint; struct lbs_private *priv; struct if_usb_card *cardp; int r = -ENOMEM; - int i; udev = interface_to_usbdev(intf); @@ -224,27 +223,27 @@ static int if_usb_probe(struct usb_interface *intf, init_usb_anchor(&cardp->rx_submitted); init_usb_anchor(&cardp->tx_submitted); - for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { - endpoint = &iface_desc->endpoint[i].desc; - if (usb_endpoint_is_bulk_in(endpoint)) { - cardp->ep_in_size = le16_to_cpu(endpoint->wMaxPacketSize); - cardp->ep_in = usb_endpoint_num(endpoint); - - lbs_deb_usbd(&udev->dev, "in_endpoint = %d\n", cardp->ep_in); - lbs_deb_usbd(&udev->dev, "Bulk in size is %d\n", cardp->ep_in_size); - - } else if (usb_endpoint_is_bulk_out(endpoint)) { - cardp->ep_out_size = le16_to_cpu(endpoint->wMaxPacketSize); - cardp->ep_out = usb_endpoint_num(endpoint); - - lbs_deb_usbd(&udev->dev, "out_endpoint = %d\n", cardp->ep_out); - lbs_deb_usbd(&udev->dev, "Bulk out size is %d\n", cardp->ep_out_size); - } - } - if (!cardp->ep_out_size || !cardp->ep_in_size) { + if (usb_find_common_endpoints_reverse(iface_desc, &ep_in, &ep_out, NULL, NULL)) { lbs_deb_usbd(&udev->dev, "Endpoints not found\n"); goto dealloc; } + + cardp->ep_in_size = usb_endpoint_maxp(ep_in); + cardp->ep_in = usb_endpoint_num(ep_in); + + lbs_deb_usbd(&udev->dev, "in_endpoint = %d\n", cardp->ep_in); + lbs_deb_usbd(&udev->dev, "Bulk in size is %d\n", cardp->ep_in_size); + + cardp->ep_out_size = usb_endpoint_maxp(ep_out); + cardp->ep_out = usb_endpoint_num(ep_out); + + lbs_deb_usbd(&udev->dev, "out_endpoint = %d\n", cardp->ep_out); + lbs_deb_usbd(&udev->dev, "Bulk out size is %d\n", cardp->ep_out_size); + + if (!cardp->ep_out_size || !cardp->ep_in_size) { + lbs_deb_usbd(&udev->dev, "Endpoints not valid\n"); + goto dealloc; + } if (!(cardp->rx_urb = usb_alloc_urb(0, GFP_KERNEL))) { lbs_deb_usbd(&udev->dev, "Rx URB allocation failed\n"); goto dealloc; From e801194b604cd9fdb24c2958892fd79e1aa3b651 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 7 Apr 2026 17:11:11 +0200 Subject: [PATCH 1245/1561] wifi: libertas_tf: refactor endpoint lookup Use the common USB helpers for looking up bulk and interrupt endpoints (and determining max packet size) instead of open coding. Note that the driver has an implicit max packet size check which is kept. Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260407151111.3187826-4-johan@kernel.org Signed-off-by: Johannes Berg --- .../net/wireless/marvell/libertas_tf/if_usb.c | 48 +++++++++---------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/drivers/net/wireless/marvell/libertas_tf/if_usb.c b/drivers/net/wireless/marvell/libertas_tf/if_usb.c index 07b38f2b8f58..b85c6d783bf7 100644 --- a/drivers/net/wireless/marvell/libertas_tf/if_usb.c +++ b/drivers/net/wireless/marvell/libertas_tf/if_usb.c @@ -144,12 +144,12 @@ static const struct lbtf_ops if_usb_ops = { static int if_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { + struct usb_endpoint_descriptor *ep_in, *ep_out; struct usb_device *udev; struct usb_host_interface *iface_desc; - struct usb_endpoint_descriptor *endpoint; struct lbtf_private *priv; struct if_usb_card *cardp; - int i; + int ret; lbtf_deb_enter(LBTF_DEB_USB); udev = interface_to_usbdev(intf); @@ -171,31 +171,27 @@ static int if_usb_probe(struct usb_interface *intf, udev->descriptor.bDeviceSubClass, udev->descriptor.bDeviceProtocol); - for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { - endpoint = &iface_desc->endpoint[i].desc; - if (usb_endpoint_is_bulk_in(endpoint)) { - cardp->ep_in_size = - le16_to_cpu(endpoint->wMaxPacketSize); - cardp->ep_in = usb_endpoint_num(endpoint); - - lbtf_deb_usbd(&udev->dev, "in_endpoint = %d\n", - cardp->ep_in); - lbtf_deb_usbd(&udev->dev, "Bulk in size is %d\n", - cardp->ep_in_size); - } else if (usb_endpoint_is_bulk_out(endpoint)) { - cardp->ep_out_size = - le16_to_cpu(endpoint->wMaxPacketSize); - cardp->ep_out = usb_endpoint_num(endpoint); - - lbtf_deb_usbd(&udev->dev, "out_endpoint = %d\n", - cardp->ep_out); - lbtf_deb_usbd(&udev->dev, "Bulk out size is %d\n", - cardp->ep_out_size); - } - } - if (!cardp->ep_out_size || !cardp->ep_in_size) { + ret = usb_find_common_endpoints_reverse(iface_desc, &ep_in, &ep_out, + NULL, NULL); + if (ret) { lbtf_deb_usbd(&udev->dev, "Endpoints not found\n"); - /* Endpoints not found */ + goto dealloc; + } + + cardp->ep_in_size = usb_endpoint_maxp(ep_in); + cardp->ep_in = usb_endpoint_num(ep_in); + + lbtf_deb_usbd(&udev->dev, "in_endpoint = %d\n", cardp->ep_in); + lbtf_deb_usbd(&udev->dev, "Bulk in size is %d\n", cardp->ep_in_size); + + cardp->ep_out_size = usb_endpoint_maxp(ep_out); + cardp->ep_out = usb_endpoint_num(ep_out); + + lbtf_deb_usbd(&udev->dev, "out_endpoint = %d\n", cardp->ep_out); + lbtf_deb_usbd(&udev->dev, "Bulk out size is %d\n", cardp->ep_out_size); + + if (!cardp->ep_out_size || !cardp->ep_in_size) { + lbtf_deb_usbd(&udev->dev, "Endpoints not valid\n"); goto dealloc; } From ea06baf59bd4b83c2cb13698411909e5e6be001e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 7 Apr 2026 20:06:46 -0700 Subject: [PATCH 1246/1561] wifi: ipw2x00: Rename michael_mic() to libipw_michael_mic() Rename the driver-local michael_mic() function to libipw_michael_mic() to prevent a name conflict with the common michael_mic() function. Note that this code will be superseded later when libipw starts using the common michael_mic(). This commit just prevents a bisection hazard. Signed-off-by: Eric Biggers Link: https://patch.msgid.link/20260408030651.80336-2-ebiggers@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/ipw2x00/libipw_crypto_tkip.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_crypto_tkip.c b/drivers/net/wireless/intel/ipw2x00/libipw_crypto_tkip.c index c6b0de8d91ae..c2cd6808fd0f 100644 --- a/drivers/net/wireless/intel/ipw2x00/libipw_crypto_tkip.c +++ b/drivers/net/wireless/intel/ipw2x00/libipw_crypto_tkip.c @@ -464,7 +464,7 @@ static int libipw_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) return keyidx; } -static int michael_mic(struct crypto_shash *tfm_michael, u8 *key, u8 *hdr, +static int libipw_michael_mic(struct crypto_shash *tfm_michael, u8 *key, u8 *hdr, u8 *data, size_t data_len, u8 *mic) { SHASH_DESC_ON_STACK(desc, tfm_michael); @@ -546,7 +546,7 @@ static int libipw_michael_mic_add(struct sk_buff *skb, int hdr_len, michael_mic_hdr(skb, tkey->tx_hdr); pos = skb_put(skb, 8); - if (michael_mic(tkey->tx_tfm_michael, &tkey->key[16], tkey->tx_hdr, + if (libipw_michael_mic(tkey->tx_tfm_michael, &tkey->key[16], tkey->tx_hdr, skb->data + hdr_len, skb->len - 8 - hdr_len, pos)) return -1; @@ -584,7 +584,7 @@ static int libipw_michael_mic_verify(struct sk_buff *skb, int keyidx, return -1; michael_mic_hdr(skb, tkey->rx_hdr); - if (michael_mic(tkey->rx_tfm_michael, &tkey->key[24], tkey->rx_hdr, + if (libipw_michael_mic(tkey->rx_tfm_michael, &tkey->key[24], tkey->rx_hdr, skb->data + hdr_len, skb->len - 8 - hdr_len, mic)) return -1; if (memcmp(mic, skb->data + skb->len - 8, 8) != 0) { From 613c83766884503f0f6bfdc45964c84b5286091c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 7 Apr 2026 20:06:47 -0700 Subject: [PATCH 1247/1561] wifi: mac80211, cfg80211: Export michael_mic() and move it to cfg80211 Export michael_mic() so that the ath11k and ath12k drivers can call it. In addition, move it from mac80211 to cfg80211 so that the ipw2x00 drivers, which depend on cfg80211 but not mac80211, can also call it. Currently these drivers have their own local implementations of michael_mic() based on crypto_shash, which is redundant and inefficient. By consolidating all the Michael MIC code into cfg80211, we'll be able to remove the duplicate Michael MIC code in the crypto/ directory. Signed-off-by: Eric Biggers Link: https://patch.msgid.link/20260408030651.80336-3-ebiggers@kernel.org Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 5 +++++ net/mac80211/Makefile | 1 - net/mac80211/michael.h | 22 ------------------- net/mac80211/wpa.c | 1 - net/wireless/Makefile | 2 +- .../michael.c => wireless/michael-mic.c} | 5 ++++- 6 files changed, 10 insertions(+), 26 deletions(-) delete mode 100644 net/mac80211/michael.h rename net/{mac80211/michael.c => wireless/michael-mic.c} (96%) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index ffa8f9f77efe..23f9df9be837 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1921,6 +1921,11 @@ enum ieee80211_radio_measurement_actioncode { #define PMK_MAX_LEN 64 #define SAE_PASSWORD_MAX_LEN 128 +#define MICHAEL_MIC_LEN 8 + +void michael_mic(const u8 *key, struct ieee80211_hdr *hdr, + const u8 *data, size_t data_len, u8 *mic); + /* Public action codes (IEEE Std 802.11-2016, 9.6.8.1, Table 9-307) */ enum ieee80211_pub_actioncode { WLAN_PUB_ACTION_20_40_BSS_COEX = 0, diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index abf46c951299..20c3135b73ea 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -18,7 +18,6 @@ mac80211-y := \ iface.o \ link.o \ rate.o \ - michael.o \ tkip.o \ aes_cmac.o \ aes_gmac.o \ diff --git a/net/mac80211/michael.h b/net/mac80211/michael.h deleted file mode 100644 index a7fdb8e84615..000000000000 --- a/net/mac80211/michael.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Michael MIC implementation - optimized for TKIP MIC operations - * Copyright 2002-2003, Instant802 Networks, Inc. - */ - -#ifndef MICHAEL_H -#define MICHAEL_H - -#include -#include - -#define MICHAEL_MIC_LEN 8 - -struct michael_mic_ctx { - u32 l, r; -}; - -void michael_mic(const u8 *key, struct ieee80211_hdr *hdr, - const u8 *data, size_t data_len, u8 *mic); - -#endif /* MICHAEL_H */ diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 64a57475ce50..724ec831a885 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -18,7 +18,6 @@ #include #include "ieee80211_i.h" -#include "michael.h" #include "tkip.h" #include "aes_ccm.h" #include "aes_cmac.h" diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 62a83faf0e07..a77fd5ba6368 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_WEXT_PRIV) += wext-priv.o cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o mesh.o ap.o trace.o ocb.o -cfg80211-y += pmsr.o +cfg80211-y += michael-mic.o pmsr.o cfg80211-$(CONFIG_OF) += of.o cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o diff --git a/net/mac80211/michael.c b/net/wireless/michael-mic.c similarity index 96% rename from net/mac80211/michael.c rename to net/wireless/michael-mic.c index 8a1afc93e749..50cdb67f0503 100644 --- a/net/mac80211/michael.c +++ b/net/wireless/michael-mic.c @@ -8,7 +8,9 @@ #include #include -#include "michael.h" +struct michael_mic_ctx { + u32 l, r; +}; static void michael_block(struct michael_mic_ctx *mctx, u32 val) { @@ -81,3 +83,4 @@ void michael_mic(const u8 *key, struct ieee80211_hdr *hdr, put_unaligned_le32(mctx.l, mic); put_unaligned_le32(mctx.r, mic + 4); } +EXPORT_SYMBOL_GPL(michael_mic); From 295e476b8217345ba25ff69d8e78c842771c31a8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 7 Apr 2026 20:06:48 -0700 Subject: [PATCH 1248/1561] wifi: ath11k: Use michael_mic() from cfg80211 Just use the michael_mic() function from cfg80211 instead of a local implementation of it using the crypto_shash API. Note: when the kernel is booted with fips=1, crypto_alloc_shash("michael_mic", 0, 0) always returned ERR_PTR(-ENOENT), because Michael MIC is not a "FIPS allowed" algorithm. For now, just preserve that behavior exactly, to ensure that TKIP is not allowed to be used in FIPS mode. This logic actually seems to disable the entire driver in FIPS mode and not just TKIP, but that was the existing behavior. Supporting this driver in FIPS mode, if anyone actually needs it there, should be a separate commit. Signed-off-by: Eric Biggers Link: https://patch.msgid.link/20260408030651.80336-4-ebiggers@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath11k/Kconfig | 1 - drivers/net/wireless/ath/ath11k/dp.c | 2 - drivers/net/wireless/ath/ath11k/dp_rx.c | 60 +++---------------------- drivers/net/wireless/ath/ath11k/peer.h | 1 - 4 files changed, 7 insertions(+), 57 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/Kconfig b/drivers/net/wireless/ath/ath11k/Kconfig index 47dfd39caa89..385513cfdc30 100644 --- a/drivers/net/wireless/ath/ath11k/Kconfig +++ b/drivers/net/wireless/ath/ath11k/Kconfig @@ -2,7 +2,6 @@ config ATH11K tristate "Qualcomm Technologies 802.11ax chipset support" depends on MAC80211 && HAS_DMA - select CRYPTO_MICHAEL_MIC select ATH_COMMON select QCOM_QMI_HELPERS help diff --git a/drivers/net/wireless/ath/ath11k/dp.c b/drivers/net/wireless/ath/ath11k/dp.c index c940de285276..bbb86f165141 100644 --- a/drivers/net/wireless/ath/ath11k/dp.c +++ b/drivers/net/wireless/ath/ath11k/dp.c @@ -5,7 +5,6 @@ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ -#include #include #include "core.h" #include "dp_tx.h" @@ -39,7 +38,6 @@ void ath11k_dp_peer_cleanup(struct ath11k *ar, int vdev_id, const u8 *addr) ath11k_peer_rx_tid_cleanup(ar, peer); peer->dp_setup_done = false; - crypto_free_shash(peer->tfm_mmic); spin_unlock_bh(&ab->base_lock); } diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index 85defe11750d..fe79109adc70 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -4,10 +4,10 @@ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ +#include #include #include #include -#include #include "core.h" #include "debug.h" #include "debugfs_htt_stats.h" @@ -3182,16 +3182,13 @@ static void ath11k_dp_rx_frag_timer(struct timer_list *timer) int ath11k_peer_rx_frag_setup(struct ath11k *ar, const u8 *peer_mac, int vdev_id) { struct ath11k_base *ab = ar->ab; - struct crypto_shash *tfm; struct ath11k_peer *peer; struct dp_rx_tid *rx_tid; int i; - tfm = crypto_alloc_shash("michael_mic", 0, 0); - if (IS_ERR(tfm)) { - ath11k_warn(ab, "failed to allocate michael_mic shash: %ld\n", - PTR_ERR(tfm)); - return PTR_ERR(tfm); + if (fips_enabled) { + ath11k_warn(ab, "This driver is disabled due to FIPS\n"); + return -ENOENT; } spin_lock_bh(&ab->base_lock); @@ -3200,7 +3197,6 @@ int ath11k_peer_rx_frag_setup(struct ath11k *ar, const u8 *peer_mac, int vdev_id if (!peer) { ath11k_warn(ab, "failed to find the peer to set up fragment info\n"); spin_unlock_bh(&ab->base_lock); - crypto_free_shash(tfm); return -ENOENT; } @@ -3211,54 +3207,12 @@ int ath11k_peer_rx_frag_setup(struct ath11k *ar, const u8 *peer_mac, int vdev_id skb_queue_head_init(&rx_tid->rx_frags); } - peer->tfm_mmic = tfm; peer->dp_setup_done = true; spin_unlock_bh(&ab->base_lock); return 0; } -static int ath11k_dp_rx_h_michael_mic(struct crypto_shash *tfm, u8 *key, - struct ieee80211_hdr *hdr, u8 *data, - size_t data_len, u8 *mic) -{ - SHASH_DESC_ON_STACK(desc, tfm); - u8 mic_hdr[16] = {}; - u8 tid = 0; - int ret; - - if (!tfm) - return -EINVAL; - - desc->tfm = tfm; - - ret = crypto_shash_setkey(tfm, key, 8); - if (ret) - goto out; - - ret = crypto_shash_init(desc); - if (ret) - goto out; - - /* TKIP MIC header */ - memcpy(mic_hdr, ieee80211_get_DA(hdr), ETH_ALEN); - memcpy(mic_hdr + ETH_ALEN, ieee80211_get_SA(hdr), ETH_ALEN); - if (ieee80211_is_data_qos(hdr->frame_control)) - tid = ieee80211_get_tid(hdr); - mic_hdr[12] = tid; - - ret = crypto_shash_update(desc, mic_hdr, 16); - if (ret) - goto out; - ret = crypto_shash_update(desc, data, data_len); - if (ret) - goto out; - ret = crypto_shash_final(desc, mic); -out: - shash_desc_zero(desc); - return ret; -} - static int ath11k_dp_rx_h_verify_tkip_mic(struct ath11k *ar, struct ath11k_peer *peer, struct sk_buff *msdu) { @@ -3267,7 +3221,7 @@ static int ath11k_dp_rx_h_verify_tkip_mic(struct ath11k *ar, struct ath11k_peer struct ieee80211_key_conf *key_conf; struct ieee80211_hdr *hdr; u8 mic[IEEE80211_CCMP_MIC_LEN]; - int head_len, tail_len, ret; + int head_len, tail_len; size_t data_len; u32 hdr_len, hal_rx_desc_sz = ar->ab->hw_params.hal_desc_sz; u8 *key, *data; @@ -3293,8 +3247,8 @@ static int ath11k_dp_rx_h_verify_tkip_mic(struct ath11k *ar, struct ath11k_peer data_len = msdu->len - head_len - tail_len; key = &key_conf->key[NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY]; - ret = ath11k_dp_rx_h_michael_mic(peer->tfm_mmic, key, hdr, data, data_len, mic); - if (ret || memcmp(mic, data + data_len, IEEE80211_CCMP_MIC_LEN)) + michael_mic(key, hdr, data, data_len, mic); + if (memcmp(mic, data + data_len, IEEE80211_CCMP_MIC_LEN)) goto mic_fail; return 0; diff --git a/drivers/net/wireless/ath/ath11k/peer.h b/drivers/net/wireless/ath/ath11k/peer.h index 3ad2f3355b14..f5ef1a27f8f2 100644 --- a/drivers/net/wireless/ath/ath11k/peer.h +++ b/drivers/net/wireless/ath/ath11k/peer.h @@ -29,7 +29,6 @@ struct ath11k_peer { /* Info used in MMIC verification of * RX fragments */ - struct crypto_shash *tfm_mmic; u8 mcast_keyidx; u8 ucast_keyidx; u16 sec_type; From 65abaa9e722ef29ce79dbc6034195961aa33954c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 7 Apr 2026 20:06:49 -0700 Subject: [PATCH 1249/1561] wifi: ath12k: Use michael_mic() from cfg80211 Just use the michael_mic() function from cfg80211 instead of a local implementation of it using the crypto_shash API. Note: when the kernel is booted with fips=1, crypto_alloc_shash("michael_mic", 0, 0) always returned ERR_PTR(-ENOENT), because Michael MIC is not a "FIPS allowed" algorithm. For now, just preserve that behavior exactly, to ensure that TKIP is not allowed to be used in FIPS mode. This logic actually seems to disable the entire driver in FIPS mode and not just TKIP, but that was the existing behavior. Supporting this driver in FIPS mode, if anyone actually needs it there, should be a separate commit. Signed-off-by: Eric Biggers Link: https://patch.msgid.link/20260408030651.80336-5-ebiggers@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath12k/Kconfig | 1 - drivers/net/wireless/ath/ath12k/dp.c | 2 - drivers/net/wireless/ath/ath12k/dp_peer.h | 1 - drivers/net/wireless/ath/ath12k/dp_rx.c | 55 ++----------------- drivers/net/wireless/ath/ath12k/dp_rx.h | 4 -- drivers/net/wireless/ath/ath12k/wifi7/dp_rx.c | 7 +-- 6 files changed, 8 insertions(+), 62 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/Kconfig b/drivers/net/wireless/ath/ath12k/Kconfig index 1ea1af1b8f6c..d39c075758bd 100644 --- a/drivers/net/wireless/ath/ath12k/Kconfig +++ b/drivers/net/wireless/ath/ath12k/Kconfig @@ -2,7 +2,6 @@ config ATH12K tristate "Qualcomm Technologies Wi-Fi 7 support (ath12k)" depends on MAC80211 && HAS_DMA && PCI - select CRYPTO_MICHAEL_MIC select QCOM_QMI_HELPERS select MHI_BUS select QRTR diff --git a/drivers/net/wireless/ath/ath12k/dp.c b/drivers/net/wireless/ath/ath12k/dp.c index 1c82d927d27b..90802ed1aa59 100644 --- a/drivers/net/wireless/ath/ath12k/dp.c +++ b/drivers/net/wireless/ath/ath12k/dp.c @@ -4,7 +4,6 @@ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ -#include #include "core.h" #include "dp_tx.h" #include "hif.h" @@ -41,7 +40,6 @@ void ath12k_dp_peer_cleanup(struct ath12k *ar, int vdev_id, const u8 *addr) } ath12k_dp_rx_peer_tid_cleanup(ar, peer); - crypto_free_shash(peer->dp_peer->tfm_mmic); peer->dp_peer->dp_setup_done = false; spin_unlock_bh(&dp->dp_lock); } diff --git a/drivers/net/wireless/ath/ath12k/dp_peer.h b/drivers/net/wireless/ath/ath12k/dp_peer.h index 20294ff09513..113b8040010f 100644 --- a/drivers/net/wireless/ath/ath12k/dp_peer.h +++ b/drivers/net/wireless/ath/ath12k/dp_peer.h @@ -139,7 +139,6 @@ struct ath12k_dp_peer { u16 sec_type; /* Info used in MMIC verification of * RX fragments */ - struct crypto_shash *tfm_mmic; struct ieee80211_key_conf *keys[WMI_MAX_KEY_INDEX + 1]; struct ath12k_dp_link_peer __rcu *link_peers[ATH12K_NUM_MAX_LINKS]; struct ath12k_reoq_buf reoq_bufs[IEEE80211_NUM_TIDS + 1]; diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 59088ab407d0..250459facff3 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -4,10 +4,10 @@ * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ +#include #include #include #include -#include #include "core.h" #include "debug.h" #include "hw.h" @@ -1433,29 +1433,27 @@ static void ath12k_dp_rx_frag_timer(struct timer_list *timer) int ath12k_dp_rx_peer_frag_setup(struct ath12k *ar, const u8 *peer_mac, int vdev_id) { struct ath12k_base *ab = ar->ab; - struct crypto_shash *tfm; struct ath12k_dp_link_peer *peer; struct ath12k_dp_rx_tid *rx_tid; int i; struct ath12k_dp *dp = ath12k_ab_to_dp(ab); - tfm = crypto_alloc_shash("michael_mic", 0, 0); - if (IS_ERR(tfm)) - return PTR_ERR(tfm); + if (fips_enabled) { + ath12k_warn(ab, "This driver is disabled due to FIPS\n"); + return -ENOENT; + } spin_lock_bh(&dp->dp_lock); peer = ath12k_dp_link_peer_find_by_vdev_and_addr(dp, vdev_id, peer_mac); if (!peer || !peer->dp_peer) { spin_unlock_bh(&dp->dp_lock); - crypto_free_shash(tfm); ath12k_warn(ab, "failed to find the peer to set up fragment info\n"); return -ENOENT; } if (!peer->primary_link) { spin_unlock_bh(&dp->dp_lock); - crypto_free_shash(tfm); return 0; } @@ -1466,55 +1464,12 @@ int ath12k_dp_rx_peer_frag_setup(struct ath12k *ar, const u8 *peer_mac, int vdev skb_queue_head_init(&rx_tid->rx_frags); } - peer->dp_peer->tfm_mmic = tfm; peer->dp_peer->dp_setup_done = true; spin_unlock_bh(&dp->dp_lock); return 0; } -int ath12k_dp_rx_h_michael_mic(struct crypto_shash *tfm, u8 *key, - struct ieee80211_hdr *hdr, u8 *data, - size_t data_len, u8 *mic) -{ - SHASH_DESC_ON_STACK(desc, tfm); - u8 mic_hdr[16] = {}; - u8 tid = 0; - int ret; - - if (!tfm) - return -EINVAL; - - desc->tfm = tfm; - - ret = crypto_shash_setkey(tfm, key, 8); - if (ret) - goto out; - - ret = crypto_shash_init(desc); - if (ret) - goto out; - - /* TKIP MIC header */ - memcpy(mic_hdr, ieee80211_get_DA(hdr), ETH_ALEN); - memcpy(mic_hdr + ETH_ALEN, ieee80211_get_SA(hdr), ETH_ALEN); - if (ieee80211_is_data_qos(hdr->frame_control)) - tid = ieee80211_get_tid(hdr); - mic_hdr[12] = tid; - - ret = crypto_shash_update(desc, mic_hdr, 16); - if (ret) - goto out; - ret = crypto_shash_update(desc, data, data_len); - if (ret) - goto out; - ret = crypto_shash_final(desc, mic); -out: - shash_desc_zero(desc); - return ret; -} -EXPORT_SYMBOL(ath12k_dp_rx_h_michael_mic); - void ath12k_dp_rx_h_undecap_frag(struct ath12k_pdev_dp *dp_pdev, struct sk_buff *msdu, enum hal_encrypt_type enctype, u32 flags) { diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.h b/drivers/net/wireless/ath/ath12k/dp_rx.h index bd62af0c80d4..55a31e669b3b 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.h +++ b/drivers/net/wireless/ath/ath12k/dp_rx.h @@ -6,7 +6,6 @@ #ifndef ATH12K_DP_RX_H #define ATH12K_DP_RX_H -#include #include "core.h" #include "debug.h" @@ -204,9 +203,6 @@ void ath12k_dp_rx_h_sort_frags(struct ath12k_hal *hal, struct sk_buff *cur_frag); void ath12k_dp_rx_h_undecap_frag(struct ath12k_pdev_dp *dp_pdev, struct sk_buff *msdu, enum hal_encrypt_type enctype, u32 flags); -int ath12k_dp_rx_h_michael_mic(struct crypto_shash *tfm, u8 *key, - struct ieee80211_hdr *hdr, u8 *data, - size_t data_len, u8 *mic); int ath12k_dp_rx_ampdu_start(struct ath12k *ar, struct ieee80211_ampdu_params *params, u8 link_id); diff --git a/drivers/net/wireless/ath/ath12k/wifi7/dp_rx.c b/drivers/net/wireless/ath/ath12k/wifi7/dp_rx.c index e6a934d74e85..945680b3ebdf 100644 --- a/drivers/net/wireless/ath/ath12k/wifi7/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/wifi7/dp_rx.c @@ -983,7 +983,7 @@ static int ath12k_wifi7_dp_rx_h_verify_tkip_mic(struct ath12k_pdev_dp *dp_pdev, struct ieee80211_key_conf *key_conf; struct ieee80211_hdr *hdr; u8 mic[IEEE80211_CCMP_MIC_LEN]; - int head_len, tail_len, ret; + int head_len, tail_len; size_t data_len; u32 hdr_len, hal_rx_desc_sz = hal->hal_desc_sz; u8 *key, *data; @@ -1011,9 +1011,8 @@ static int ath12k_wifi7_dp_rx_h_verify_tkip_mic(struct ath12k_pdev_dp *dp_pdev, data_len = msdu->len - head_len - tail_len; key = &key_conf->key[NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY]; - ret = ath12k_dp_rx_h_michael_mic(peer->tfm_mmic, key, hdr, data, - data_len, mic); - if (ret || memcmp(mic, data + data_len, IEEE80211_CCMP_MIC_LEN)) + michael_mic(key, hdr, data, data_len, mic); + if (memcmp(mic, data + data_len, IEEE80211_CCMP_MIC_LEN)) goto mic_fail; return 0; From 32a0e1c63cdfaa9a6f1405b552b5f9eb2be61c59 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 7 Apr 2026 20:06:50 -0700 Subject: [PATCH 1250/1561] wifi: ipw2x00: Use michael_mic() from cfg80211 Just use the michael_mic() function from cfg80211 instead of a local implementation of it using the crypto_shash API. Signed-off-by: Eric Biggers Link: https://patch.msgid.link/20260408030651.80336-6-ebiggers@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/ipw2x00/Kconfig | 1 - .../intel/ipw2x00/libipw_crypto_tkip.c | 120 +----------------- 2 files changed, 5 insertions(+), 116 deletions(-) diff --git a/drivers/net/wireless/intel/ipw2x00/Kconfig b/drivers/net/wireless/intel/ipw2x00/Kconfig index b92df91adb3a..b508f14542d5 100644 --- a/drivers/net/wireless/intel/ipw2x00/Kconfig +++ b/drivers/net/wireless/intel/ipw2x00/Kconfig @@ -154,7 +154,6 @@ config LIBIPW depends on PCI && CFG80211 select WIRELESS_EXT select CRYPTO - select CRYPTO_MICHAEL_MIC select CRYPTO_LIB_ARC4 select CRC32 help diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_crypto_tkip.c b/drivers/net/wireless/intel/ipw2x00/libipw_crypto_tkip.c index c2cd6808fd0f..24bb28ab7a49 100644 --- a/drivers/net/wireless/intel/ipw2x00/libipw_crypto_tkip.c +++ b/drivers/net/wireless/intel/ipw2x00/libipw_crypto_tkip.c @@ -25,8 +25,6 @@ #include #include #include -#include -#include #include #include "libipw.h" @@ -57,11 +55,6 @@ struct libipw_tkip_data { struct arc4_ctx rx_ctx_arc4; struct arc4_ctx tx_ctx_arc4; - struct crypto_shash *rx_tfm_michael; - struct crypto_shash *tx_tfm_michael; - - /* scratch buffers for virt_to_page() (crypto API) */ - u8 rx_hdr[16], tx_hdr[16]; unsigned long flags; }; @@ -89,41 +82,14 @@ static void *libipw_tkip_init(int key_idx) priv = kzalloc_obj(*priv, GFP_ATOMIC); if (priv == NULL) - goto fail; + return priv; priv->key_idx = key_idx; - - priv->tx_tfm_michael = crypto_alloc_shash("michael_mic", 0, 0); - if (IS_ERR(priv->tx_tfm_michael)) { - priv->tx_tfm_michael = NULL; - goto fail; - } - - priv->rx_tfm_michael = crypto_alloc_shash("michael_mic", 0, 0); - if (IS_ERR(priv->rx_tfm_michael)) { - priv->rx_tfm_michael = NULL; - goto fail; - } - return priv; - - fail: - if (priv) { - crypto_free_shash(priv->tx_tfm_michael); - crypto_free_shash(priv->rx_tfm_michael); - kfree(priv); - } - - return NULL; } static void libipw_tkip_deinit(void *priv) { - struct libipw_tkip_data *_priv = priv; - if (_priv) { - crypto_free_shash(_priv->tx_tfm_michael); - crypto_free_shash(_priv->rx_tfm_michael); - } kfree_sensitive(priv); } @@ -464,73 +430,6 @@ static int libipw_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) return keyidx; } -static int libipw_michael_mic(struct crypto_shash *tfm_michael, u8 *key, u8 *hdr, - u8 *data, size_t data_len, u8 *mic) -{ - SHASH_DESC_ON_STACK(desc, tfm_michael); - int err; - - if (tfm_michael == NULL) { - pr_warn("%s(): tfm_michael == NULL\n", __func__); - return -1; - } - - desc->tfm = tfm_michael; - - if (crypto_shash_setkey(tfm_michael, key, 8)) - return -1; - - err = crypto_shash_init(desc); - if (err) - goto out; - err = crypto_shash_update(desc, hdr, 16); - if (err) - goto out; - err = crypto_shash_update(desc, data, data_len); - if (err) - goto out; - err = crypto_shash_final(desc, mic); - -out: - shash_desc_zero(desc); - return err; -} - -static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) -{ - struct ieee80211_hdr *hdr11; - - hdr11 = (struct ieee80211_hdr *)skb->data; - - switch (le16_to_cpu(hdr11->frame_control) & - (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) { - case IEEE80211_FCTL_TODS: - memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */ - memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */ - break; - case IEEE80211_FCTL_FROMDS: - memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */ - memcpy(hdr + ETH_ALEN, hdr11->addr3, ETH_ALEN); /* SA */ - break; - case IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS: - memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */ - memcpy(hdr + ETH_ALEN, hdr11->addr4, ETH_ALEN); /* SA */ - break; - default: - memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */ - memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */ - break; - } - - if (ieee80211_is_data_qos(hdr11->frame_control)) { - hdr[12] = le16_to_cpu(*((__le16 *)ieee80211_get_qos_ctl(hdr11))) - & IEEE80211_QOS_CTL_TID_MASK; - } else - hdr[12] = 0; /* priority */ - - hdr[13] = hdr[14] = hdr[15] = 0; /* reserved */ -} - static int libipw_michael_mic_add(struct sk_buff *skb, int hdr_len, void *priv) { @@ -544,12 +443,9 @@ static int libipw_michael_mic_add(struct sk_buff *skb, int hdr_len, return -1; } - michael_mic_hdr(skb, tkey->tx_hdr); pos = skb_put(skb, 8); - if (libipw_michael_mic(tkey->tx_tfm_michael, &tkey->key[16], tkey->tx_hdr, - skb->data + hdr_len, skb->len - 8 - hdr_len, pos)) - return -1; - + michael_mic(&tkey->key[16], (struct ieee80211_hdr *)skb->data, + skb->data + hdr_len, skb->len - 8 - hdr_len, pos); return 0; } @@ -583,10 +479,8 @@ static int libipw_michael_mic_verify(struct sk_buff *skb, int keyidx, if (!tkey->key_set) return -1; - michael_mic_hdr(skb, tkey->rx_hdr); - if (libipw_michael_mic(tkey->rx_tfm_michael, &tkey->key[24], tkey->rx_hdr, - skb->data + hdr_len, skb->len - 8 - hdr_len, mic)) - return -1; + michael_mic(&tkey->key[24], (struct ieee80211_hdr *)skb->data, + skb->data + hdr_len, skb->len - 8 - hdr_len, mic); if (memcmp(mic, skb->data + skb->len - 8, 8) != 0) { struct ieee80211_hdr *hdr; hdr = (struct ieee80211_hdr *)skb->data; @@ -614,17 +508,13 @@ static int libipw_tkip_set_key(void *key, int len, u8 * seq, void *priv) { struct libipw_tkip_data *tkey = priv; int keyidx; - struct crypto_shash *tfm = tkey->tx_tfm_michael; struct arc4_ctx *tfm2 = &tkey->tx_ctx_arc4; - struct crypto_shash *tfm3 = tkey->rx_tfm_michael; struct arc4_ctx *tfm4 = &tkey->rx_ctx_arc4; keyidx = tkey->key_idx; memset(tkey, 0, sizeof(*tkey)); tkey->key_idx = keyidx; - tkey->tx_tfm_michael = tfm; tkey->tx_ctx_arc4 = *tfm2; - tkey->rx_tfm_michael = tfm3; tkey->rx_ctx_arc4 = *tfm4; if (len == TKIP_KEY_LEN) { memcpy(tkey->key, key, TKIP_KEY_LEN); From 8c6d03b7a249ffe85ba2bda09a2a7614c0ff03db Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 7 Apr 2026 20:06:51 -0700 Subject: [PATCH 1251/1561] crypto: Remove michael_mic from crypto_shash API Remove the "michael_mic" crypto_shash algorithm, since it's no longer used. Its only users were wireless drivers, which have now been converted to use the michael_mic() function instead. It makes sense that no other users ever appeared: Michael MIC is an insecure algorithm that is specific to WPA TKIP, which itself was an interim security solution to replace the broken WEP standard. Acked-by: Geert Uytterhoeven Signed-off-by: Eric Biggers Acked-by: Herbert Xu Link: https://patch.msgid.link/20260408030651.80336-7-ebiggers@kernel.org Signed-off-by: Johannes Berg --- arch/arm/configs/omap2plus_defconfig | 1 - arch/arm/configs/spitz_defconfig | 1 - arch/arm64/configs/defconfig | 1 - arch/m68k/configs/amiga_defconfig | 1 - arch/m68k/configs/apollo_defconfig | 1 - arch/m68k/configs/atari_defconfig | 1 - arch/m68k/configs/bvme6000_defconfig | 1 - arch/m68k/configs/hp300_defconfig | 1 - arch/m68k/configs/mac_defconfig | 1 - arch/m68k/configs/multi_defconfig | 1 - arch/m68k/configs/mvme147_defconfig | 1 - arch/m68k/configs/mvme16x_defconfig | 1 - arch/m68k/configs/q40_defconfig | 1 - arch/m68k/configs/sun3_defconfig | 1 - arch/m68k/configs/sun3x_defconfig | 1 - arch/mips/configs/bigsur_defconfig | 1 - arch/mips/configs/decstation_64_defconfig | 1 - arch/mips/configs/decstation_defconfig | 1 - arch/mips/configs/decstation_r4k_defconfig | 1 - arch/mips/configs/gpr_defconfig | 1 - arch/mips/configs/ip32_defconfig | 1 - arch/mips/configs/lemote2f_defconfig | 1 - arch/mips/configs/malta_qemu_32r6_defconfig | 1 - arch/mips/configs/maltaaprp_defconfig | 1 - arch/mips/configs/maltasmvp_defconfig | 1 - arch/mips/configs/maltasmvp_eva_defconfig | 1 - arch/mips/configs/maltaup_defconfig | 1 - arch/mips/configs/mtx1_defconfig | 1 - arch/mips/configs/rm200_defconfig | 1 - arch/mips/configs/sb1250_swarm_defconfig | 1 - arch/parisc/configs/generic-32bit_defconfig | 1 - arch/parisc/configs/generic-64bit_defconfig | 1 - arch/powerpc/configs/g5_defconfig | 1 - arch/powerpc/configs/linkstation_defconfig | 1 - arch/powerpc/configs/mvme5100_defconfig | 1 - arch/powerpc/configs/powernv_defconfig | 1 - arch/powerpc/configs/ppc64_defconfig | 1 - arch/powerpc/configs/ppc64e_defconfig | 1 - arch/powerpc/configs/ppc6xx_defconfig | 1 - arch/powerpc/configs/ps3_defconfig | 1 - arch/s390/configs/debug_defconfig | 1 - arch/s390/configs/defconfig | 1 - arch/sh/configs/sh2007_defconfig | 1 - arch/sh/configs/titan_defconfig | 1 - arch/sh/configs/ul2_defconfig | 1 - arch/sparc/configs/sparc32_defconfig | 1 - arch/sparc/configs/sparc64_defconfig | 1 - crypto/Kconfig | 12 -- crypto/Makefile | 1 - crypto/michael_mic.c | 176 -------------------- crypto/tcrypt.c | 4 - crypto/testmgr.c | 6 - crypto/testmgr.h | 50 ------ 53 files changed, 296 deletions(-) delete mode 100644 crypto/michael_mic.c diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 0464f6552169..ae2883d3ff0e 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -704,7 +704,6 @@ CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_SECURITY=y -CONFIG_CRYPTO_MICHAEL_MIC=y CONFIG_CRYPTO_GHASH_ARM_CE=m CONFIG_CRYPTO_AES=m CONFIG_CRYPTO_AES_ARM_BS=m diff --git a/arch/arm/configs/spitz_defconfig b/arch/arm/configs/spitz_defconfig index c130af6d44d4..f116a01c3f5f 100644 --- a/arch/arm/configs/spitz_defconfig +++ b/arch/arm/configs/spitz_defconfig @@ -230,7 +230,6 @@ CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m CONFIG_FONTS=y diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 0651a771f5c1..8b2343048465 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1914,7 +1914,6 @@ CONFIG_CRYPTO_USER=y CONFIG_CRYPTO_CHACHA20=m CONFIG_CRYPTO_BENCHMARK=m CONFIG_CRYPTO_ECHAINIV=y -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_GHASH_ARM64_CE=y diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index c8b936bb702f..510e16f25318 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -538,7 +538,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index fc1792495bbc..d1d8f02b4d96 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -495,7 +495,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index e440c596e60b..2e207af4add2 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -515,7 +515,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 7aa352d14363..d75465289898 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -487,7 +487,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 0baaf2a82c61..c2011a749e10 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -497,7 +497,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 0cbbfe5aeaec..e377443c56d4 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -514,7 +514,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 2b96f90e1a4d..cdd0449f7141 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -601,7 +601,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index b49264cec911..4e78ff8d793e 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -487,7 +487,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 96a974b0a766..9fc30a3236e1 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -488,7 +488,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index e53361584393..c3b5fdabfe16 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -504,7 +504,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index af89287c1093..ea40f990eb22 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -485,7 +485,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index af210e8b77f9..e20b6a9d26cd 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -485,7 +485,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SM3_GENERIC=m diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig index 349e9e0b4f54..3b64e151e187 100644 --- a/arch/mips/configs/bigsur_defconfig +++ b/arch/mips/configs/bigsur_defconfig @@ -222,7 +222,6 @@ CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m diff --git a/arch/mips/configs/decstation_64_defconfig b/arch/mips/configs/decstation_64_defconfig index dad98c575292..7c43352fac6b 100644 --- a/arch/mips/configs/decstation_64_defconfig +++ b/arch/mips/configs/decstation_64_defconfig @@ -180,7 +180,6 @@ CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig index 4e1b51a4ad90..aee10274f048 100644 --- a/arch/mips/configs/decstation_defconfig +++ b/arch/mips/configs/decstation_defconfig @@ -175,7 +175,6 @@ CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m diff --git a/arch/mips/configs/decstation_r4k_defconfig b/arch/mips/configs/decstation_r4k_defconfig index 4e550dffc23d..a1698049aa7a 100644 --- a/arch/mips/configs/decstation_r4k_defconfig +++ b/arch/mips/configs/decstation_r4k_defconfig @@ -175,7 +175,6 @@ CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig index 437ef6dc0b4c..fdd28a89e336 100644 --- a/arch/mips/configs/gpr_defconfig +++ b/arch/mips/configs/gpr_defconfig @@ -275,7 +275,6 @@ CONFIG_CRYPTO_AUTHENC=m CONFIG_CRYPTO_BENCHMARK=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig index 7568838eb08b..68558d0d3f52 100644 --- a/arch/mips/configs/ip32_defconfig +++ b/arch/mips/configs/ip32_defconfig @@ -159,7 +159,6 @@ CONFIG_CRYPTO_PCBC=y CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_XCBC=y CONFIG_CRYPTO_MD4=y -CONFIG_CRYPTO_MICHAEL_MIC=y CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig index 8d3f20ed19b5..eb3565a3f292 100644 --- a/arch/mips/configs/lemote2f_defconfig +++ b/arch/mips/configs/lemote2f_defconfig @@ -308,7 +308,6 @@ CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_WP512=m diff --git a/arch/mips/configs/malta_qemu_32r6_defconfig b/arch/mips/configs/malta_qemu_32r6_defconfig index accb471a1d93..46a69e8984c5 100644 --- a/arch/mips/configs/malta_qemu_32r6_defconfig +++ b/arch/mips/configs/malta_qemu_32r6_defconfig @@ -167,7 +167,6 @@ CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_ISO8859_1=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m diff --git a/arch/mips/configs/maltaaprp_defconfig b/arch/mips/configs/maltaaprp_defconfig index 6bda67c5f68f..74a0e5f6a886 100644 --- a/arch/mips/configs/maltaaprp_defconfig +++ b/arch/mips/configs/maltaaprp_defconfig @@ -168,7 +168,6 @@ CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_ISO8859_1=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m diff --git a/arch/mips/configs/maltasmvp_defconfig b/arch/mips/configs/maltasmvp_defconfig index e4082537f80f..873bfc59623b 100644 --- a/arch/mips/configs/maltasmvp_defconfig +++ b/arch/mips/configs/maltasmvp_defconfig @@ -169,7 +169,6 @@ CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_ISO8859_1=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m diff --git a/arch/mips/configs/maltasmvp_eva_defconfig b/arch/mips/configs/maltasmvp_eva_defconfig index 58f5af45fa98..c9230b2c4ea8 100644 --- a/arch/mips/configs/maltasmvp_eva_defconfig +++ b/arch/mips/configs/maltasmvp_eva_defconfig @@ -171,7 +171,6 @@ CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_ISO8859_1=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m diff --git a/arch/mips/configs/maltaup_defconfig b/arch/mips/configs/maltaup_defconfig index 9bfef7de0d1c..79fd3ccab339 100644 --- a/arch/mips/configs/maltaup_defconfig +++ b/arch/mips/configs/maltaup_defconfig @@ -167,7 +167,6 @@ CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_ISO8859_1=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index 77050ae3945f..930c0178cc67 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -663,7 +663,6 @@ CONFIG_CRYPTO_BENCHMARK=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig index b507dc4dddd4..b1e67ff0c4f0 100644 --- a/arch/mips/configs/rm200_defconfig +++ b/arch/mips/configs/rm200_defconfig @@ -382,7 +382,6 @@ CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m diff --git a/arch/mips/configs/sb1250_swarm_defconfig b/arch/mips/configs/sb1250_swarm_defconfig index ae2afff00e01..4a25b8d3e507 100644 --- a/arch/mips/configs/sb1250_swarm_defconfig +++ b/arch/mips/configs/sb1250_swarm_defconfig @@ -85,7 +85,6 @@ CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_BLOWFISH=m diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index 5444ce6405f3..a2cb2a7a02db 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -259,7 +259,6 @@ CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_DEFLATE=y diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig index ce91f9d1fdbf..4dd6cf6a2cb9 100644 --- a/arch/parisc/configs/generic-64bit_defconfig +++ b/arch/parisc/configs/generic-64bit_defconfig @@ -287,7 +287,6 @@ CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_DEFLATE=m # CONFIG_CRYPTO_HW is not set CONFIG_PRINTK_TIME=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 428f17b45513..466f196ee8b2 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -238,7 +238,6 @@ CONFIG_BOOTX_TEXT=y CONFIG_CRYPTO_BENCHMARK=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/powerpc/configs/linkstation_defconfig b/arch/powerpc/configs/linkstation_defconfig index b564f9e33a0d..31f84d08b6ef 100644 --- a/arch/powerpc/configs/linkstation_defconfig +++ b/arch/powerpc/configs/linkstation_defconfig @@ -129,7 +129,6 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_SERPENT=m diff --git a/arch/powerpc/configs/mvme5100_defconfig b/arch/powerpc/configs/mvme5100_defconfig index fa2b3b9c5945..c82754c14e15 100644 --- a/arch/powerpc/configs/mvme5100_defconfig +++ b/arch/powerpc/configs/mvme5100_defconfig @@ -115,7 +115,6 @@ CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=20 CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_DES=y diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 9ac746cfb4be..675462b78320 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -319,7 +319,6 @@ CONFIG_XMON=y CONFIG_CRYPTO_BENCHMARK=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 2b0720f2753b..e3e1cad668d9 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -382,7 +382,6 @@ CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_LZO=m diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index 90247b2a0ab0..e877598fe356 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -225,7 +225,6 @@ CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index 3c08f46f3d41..27d4350e8fdb 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -1077,7 +1077,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_SHA512=m diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 0b48d2b776c4..7cfae0b7b2f3 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -146,7 +146,6 @@ CONFIG_NLS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_LZO=m CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 98fd0a2f51c6..b74f96eec465 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -794,7 +794,6 @@ CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3_GENERIC=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 0f4cedcab3ce..0c831481e43f 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -778,7 +778,6 @@ CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3_GENERIC=m diff --git a/arch/sh/configs/sh2007_defconfig b/arch/sh/configs/sh2007_defconfig index e32d2ce72699..5d9080499485 100644 --- a/arch/sh/configs/sh2007_defconfig +++ b/arch/sh/configs/sh2007_defconfig @@ -170,7 +170,6 @@ CONFIG_CRYPTO_XTS=y CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_XCBC=y CONFIG_CRYPTO_MD4=y -CONFIG_CRYPTO_MICHAEL_MIC=y CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index 896e980d04e1..00863ecb228e 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -246,7 +246,6 @@ CONFIG_DEBUG_KERNEL=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MICHAEL_MIC=y CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_TGR192=m diff --git a/arch/sh/configs/ul2_defconfig b/arch/sh/configs/ul2_defconfig index 0d1c858754db..00a37944b043 100644 --- a/arch/sh/configs/ul2_defconfig +++ b/arch/sh/configs/ul2_defconfig @@ -79,4 +79,3 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_932=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_CRYPTO_MICHAEL_MIC=y diff --git a/arch/sparc/configs/sparc32_defconfig b/arch/sparc/configs/sparc32_defconfig index e021ecfb5a77..48d834acafb4 100644 --- a/arch/sparc/configs/sparc32_defconfig +++ b/arch/sparc/configs/sparc32_defconfig @@ -82,7 +82,6 @@ CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MD4=y -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_AES=m diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index 9f3f41246ae6..632081a262ba 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -210,7 +210,6 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_XCBC=y CONFIG_CRYPTO_MD4=y -CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_TGR192=m diff --git a/crypto/Kconfig b/crypto/Kconfig index b4bb85e8e226..769aef52a785 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -916,18 +916,6 @@ config CRYPTO_MD5 help MD5 message digest algorithm (RFC1321), including HMAC support. -config CRYPTO_MICHAEL_MIC - tristate "Michael MIC" - select CRYPTO_HASH - help - Michael MIC (Message Integrity Code) (IEEE 802.11i) - - Defined by the IEEE 802.11i TKIP (Temporal Key Integrity Protocol), - known as WPA (Wif-Fi Protected Access). - - This algorithm is required for TKIP, but it should not be used for - other purposes because of the weakness of the algorithm. - config CRYPTO_RMD160 tristate "RIPEMD-160" select CRYPTO_HASH diff --git a/crypto/Makefile b/crypto/Makefile index 04e269117589..aa35ba03222f 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -150,7 +150,6 @@ obj-$(CONFIG_CRYPTO_ARIA) += aria_generic.o obj-$(CONFIG_CRYPTO_CHACHA20) += chacha.o CFLAGS_chacha.o += -DARCH=$(ARCH) obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o -obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o obj-$(CONFIG_CRYPTO_CRC32C) += crc32c-cryptoapi.o crc32c-cryptoapi-y := crc32c.o obj-$(CONFIG_CRYPTO_CRC32) += crc32-cryptoapi.o diff --git a/crypto/michael_mic.c b/crypto/michael_mic.c deleted file mode 100644 index 69ad35f524d7..000000000000 --- a/crypto/michael_mic.c +++ /dev/null @@ -1,176 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Cryptographic API - * - * Michael MIC (IEEE 802.11i/TKIP) keyed digest - * - * Copyright (c) 2004 Jouni Malinen - */ -#include -#include -#include -#include -#include -#include - - -struct michael_mic_ctx { - u32 l, r; -}; - -struct michael_mic_desc_ctx { - __le32 pending; - size_t pending_len; - - u32 l, r; -}; - -static inline u32 xswap(u32 val) -{ - return ((val & 0x00ff00ff) << 8) | ((val & 0xff00ff00) >> 8); -} - - -#define michael_block(l, r) \ -do { \ - r ^= rol32(l, 17); \ - l += r; \ - r ^= xswap(l); \ - l += r; \ - r ^= rol32(l, 3); \ - l += r; \ - r ^= ror32(l, 2); \ - l += r; \ -} while (0) - - -static int michael_init(struct shash_desc *desc) -{ - struct michael_mic_desc_ctx *mctx = shash_desc_ctx(desc); - struct michael_mic_ctx *ctx = crypto_shash_ctx(desc->tfm); - mctx->pending_len = 0; - mctx->l = ctx->l; - mctx->r = ctx->r; - - return 0; -} - - -static int michael_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct michael_mic_desc_ctx *mctx = shash_desc_ctx(desc); - - if (mctx->pending_len) { - int flen = 4 - mctx->pending_len; - if (flen > len) - flen = len; - memcpy((u8 *)&mctx->pending + mctx->pending_len, data, flen); - mctx->pending_len += flen; - data += flen; - len -= flen; - - if (mctx->pending_len < 4) - return 0; - - mctx->l ^= le32_to_cpu(mctx->pending); - michael_block(mctx->l, mctx->r); - mctx->pending_len = 0; - } - - while (len >= 4) { - mctx->l ^= get_unaligned_le32(data); - michael_block(mctx->l, mctx->r); - data += 4; - len -= 4; - } - - if (len > 0) { - mctx->pending_len = len; - memcpy(&mctx->pending, data, len); - } - - return 0; -} - - -static int michael_final(struct shash_desc *desc, u8 *out) -{ - struct michael_mic_desc_ctx *mctx = shash_desc_ctx(desc); - u8 *data = (u8 *)&mctx->pending; - - /* Last block and padding (0x5a, 4..7 x 0) */ - switch (mctx->pending_len) { - case 0: - mctx->l ^= 0x5a; - break; - case 1: - mctx->l ^= data[0] | 0x5a00; - break; - case 2: - mctx->l ^= data[0] | (data[1] << 8) | 0x5a0000; - break; - case 3: - mctx->l ^= data[0] | (data[1] << 8) | (data[2] << 16) | - 0x5a000000; - break; - } - michael_block(mctx->l, mctx->r); - /* l ^= 0; */ - michael_block(mctx->l, mctx->r); - - put_unaligned_le32(mctx->l, out); - put_unaligned_le32(mctx->r, out + 4); - - return 0; -} - - -static int michael_setkey(struct crypto_shash *tfm, const u8 *key, - unsigned int keylen) -{ - struct michael_mic_ctx *mctx = crypto_shash_ctx(tfm); - - if (keylen != 8) - return -EINVAL; - - mctx->l = get_unaligned_le32(key); - mctx->r = get_unaligned_le32(key + 4); - return 0; -} - -static struct shash_alg alg = { - .digestsize = 8, - .setkey = michael_setkey, - .init = michael_init, - .update = michael_update, - .final = michael_final, - .descsize = sizeof(struct michael_mic_desc_ctx), - .base = { - .cra_name = "michael_mic", - .cra_driver_name = "michael_mic-generic", - .cra_blocksize = 8, - .cra_ctxsize = sizeof(struct michael_mic_ctx), - .cra_module = THIS_MODULE, - } -}; - -static int __init michael_mic_init(void) -{ - return crypto_register_shash(&alg); -} - - -static void __exit michael_mic_exit(void) -{ - crypto_unregister_shash(&alg); -} - - -module_init(michael_mic_init); -module_exit(michael_mic_exit); - -MODULE_LICENSE("GPL v2"); -MODULE_DESCRIPTION("Michael MIC"); -MODULE_AUTHOR("Jouni Malinen "); -MODULE_ALIAS_CRYPTO("michael_mic"); diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index aded37546137..24f0ccc76796 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1557,10 +1557,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) ret = min(ret, tcrypt_test("ecb(arc4)")); break; - case 17: - ret = min(ret, tcrypt_test("michael_mic")); - break; - case 18: ret = min(ret, tcrypt_test("crc32c")); break; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 4985411dedae..d5c38683bf46 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -5197,12 +5197,6 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(md5_tv_template) } - }, { - .alg = "michael_mic", - .test = alg_test_hash, - .suite = { - .hash = __VECS(michael_mic_tv_template) - } }, { .alg = "p1363(ecdsa-nist-p192)", .test = alg_test_null, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 1c69c11c0cdb..11911bff5f79 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -32808,56 +32808,6 @@ static const struct comp_testvec lzorle_decomp_tv_template[] = { }, }; -/* - * Michael MIC test vectors from IEEE 802.11i - */ -#define MICHAEL_MIC_TEST_VECTORS 6 - -static const struct hash_testvec michael_mic_tv_template[] = { - { - .key = "\x00\x00\x00\x00\x00\x00\x00\x00", - .ksize = 8, - .plaintext = zeroed_string, - .psize = 0, - .digest = "\x82\x92\x5c\x1c\xa1\xd1\x30\xb8", - }, - { - .key = "\x82\x92\x5c\x1c\xa1\xd1\x30\xb8", - .ksize = 8, - .plaintext = "M", - .psize = 1, - .digest = "\x43\x47\x21\xca\x40\x63\x9b\x3f", - }, - { - .key = "\x43\x47\x21\xca\x40\x63\x9b\x3f", - .ksize = 8, - .plaintext = "Mi", - .psize = 2, - .digest = "\xe8\xf9\xbe\xca\xe9\x7e\x5d\x29", - }, - { - .key = "\xe8\xf9\xbe\xca\xe9\x7e\x5d\x29", - .ksize = 8, - .plaintext = "Mic", - .psize = 3, - .digest = "\x90\x03\x8f\xc6\xcf\x13\xc1\xdb", - }, - { - .key = "\x90\x03\x8f\xc6\xcf\x13\xc1\xdb", - .ksize = 8, - .plaintext = "Mich", - .psize = 4, - .digest = "\xd5\x5e\x10\x05\x10\x12\x89\x86", - }, - { - .key = "\xd5\x5e\x10\x05\x10\x12\x89\x86", - .ksize = 8, - .plaintext = "Michael", - .psize = 7, - .digest = "\x0a\x94\x2b\x12\x4e\xca\xa5\x46", - } -}; - /* * CRC32 test vectors */ From 3d7640b6c371a1795e6d9580695d20caf16be9a4 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Tue, 7 Apr 2026 08:43:54 +0200 Subject: [PATCH 1252/1561] dt-bindings: wireless: ath10k: Add quirk to skip host cap QMI requests Some firmware versions do not support the host-capability QMI request. Since this request occurs before firmware and board files are loaded, the quirk cannot be expressed in the firmware itself and must be described in the device tree. Signed-off-by: Amit Pundir Co-developed-by: David Heidelberg Signed-off-by: David Heidelberg Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20260407-skip-host-cam-qmi-req-v5-1-dfa8a05c6538@ixit.cz Signed-off-by: Jeff Johnson --- .../devicetree/bindings/net/wireless/qcom,ath10k.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml index f2440d39b7eb..c21d66c7cd55 100644 --- a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml +++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml @@ -171,6 +171,12 @@ properties: Quirk specifying that the firmware expects the 8bit version of the host capability QMI request + qcom,snoc-host-cap-skip-quirk: + type: boolean + description: + Quirk specifying that the firmware wants to skip the host + capability QMI request + qcom,xo-cal-data: $ref: /schemas/types.yaml#/definitions/uint32 description: @@ -292,6 +298,11 @@ allOf: required: - interrupts + - not: + required: + - qcom,snoc-host-cap-8bit-quirk + - qcom,snoc-host-cap-skip-quirk + examples: # SNoC - | From 6a7693873b20680a3c33bae0c9f9cb3185f64ade Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Tue, 7 Apr 2026 08:43:55 +0200 Subject: [PATCH 1253/1561] wifi: ath10k: Add device-tree quirk to skip host cap QMI requests Some firmware versions do not support the host capability QMI request. Since this request occurs before firmware-N.bin and board-M.bin are loaded, the quirk cannot be expressed in the firmware itself. The root cause is unclear, but there appears to be a generation of firmware that lacks host capability support. Without this quirk, ath10k_qmi_host_cap_send_sync() returns QMI_ERR_MALFORMED_MSG_V01 before loading the firmware. This error is not fatal - Wi-Fi services still come up successfully if the request is simply skipped. Add a device-tree quirk to skip the host capability QMI request on devices whose firmware does not support it. For example, firmware build "QC_IMAGE_VERSION_STRING=WLAN.HL.2.0.c3-00257-QCAHLSWMTPLZ-1" on Xiaomi Poco F1 phone requires this quirk. Suggested-by: Bjorn Andersson Signed-off-by: Amit Pundir Tested-by: Paul Sajna Reviewed-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Acked-by: Dmitry Baryshkov Signed-off-by: David Heidelberg Link: https://patch.msgid.link/20260407-skip-host-cam-qmi-req-v5-2-dfa8a05c6538@ixit.cz Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath10k/qmi.c | 13 ++++++++++--- drivers/net/wireless/ath/ath10k/snoc.c | 3 +++ drivers/net/wireless/ath/ath10k/snoc.h | 1 + 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/qmi.c b/drivers/net/wireless/ath/ath10k/qmi.c index eebd78e7ff6b..e7f90fd9e9b8 100644 --- a/drivers/net/wireless/ath/ath10k/qmi.c +++ b/drivers/net/wireless/ath/ath10k/qmi.c @@ -808,6 +808,7 @@ out: static void ath10k_qmi_event_server_arrive(struct ath10k_qmi *qmi) { struct ath10k *ar = qmi->ar; + struct ath10k_snoc *ar_snoc = ath10k_snoc_priv(ar); int ret; ret = ath10k_qmi_ind_register_send_sync_msg(qmi); @@ -819,9 +820,15 @@ static void ath10k_qmi_event_server_arrive(struct ath10k_qmi *qmi) return; } - ret = ath10k_qmi_host_cap_send_sync(qmi); - if (ret) - return; + /* + * Skip the host capability request for the firmware versions which + * do not support this feature. + */ + if (!test_bit(ATH10K_SNOC_FLAG_SKIP_HOST_CAP_QUIRK, &ar_snoc->flags)) { + ret = ath10k_qmi_host_cap_send_sync(qmi); + if (ret) + return; + } ret = ath10k_qmi_msa_mem_info_send_sync_msg(qmi); if (ret) diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c index f72f236fb9eb..310650227578 100644 --- a/drivers/net/wireless/ath/ath10k/snoc.c +++ b/drivers/net/wireless/ath/ath10k/snoc.c @@ -1362,6 +1362,9 @@ static void ath10k_snoc_quirks_init(struct ath10k *ar) if (of_property_read_bool(dev->of_node, "qcom,snoc-host-cap-8bit-quirk")) set_bit(ATH10K_SNOC_FLAG_8BIT_HOST_CAP_QUIRK, &ar_snoc->flags); + + if (of_property_read_bool(dev->of_node, "qcom,snoc-host-cap-skip-quirk")) + set_bit(ATH10K_SNOC_FLAG_SKIP_HOST_CAP_QUIRK, &ar_snoc->flags); } int ath10k_snoc_fw_indication(struct ath10k *ar, u64 type) diff --git a/drivers/net/wireless/ath/ath10k/snoc.h b/drivers/net/wireless/ath/ath10k/snoc.h index 1ecae34687c2..46574fd8f84e 100644 --- a/drivers/net/wireless/ath/ath10k/snoc.h +++ b/drivers/net/wireless/ath/ath10k/snoc.h @@ -51,6 +51,7 @@ enum ath10k_snoc_flags { ATH10K_SNOC_FLAG_MODEM_STOPPED, ATH10K_SNOC_FLAG_RECOVERY, ATH10K_SNOC_FLAG_8BIT_HOST_CAP_QUIRK, + ATH10K_SNOC_FLAG_SKIP_HOST_CAP_QUIRK, }; struct clk_bulk_data; From bd5c24e4001d39136e1084fc47335c93e4ebbb0d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 6 Apr 2026 10:53:34 -0700 Subject: [PATCH 1254/1561] docs: netdev: improve wording of reviewer guidance Reword the reviewer guidance based on behavior we see on the list. Steer folks: - towards sending tags - away from process issues. Reviewed-by: Joe Damato Reviewed-by: Nicolai Buchwitz Link: https://patch.msgid.link/20260406175334.3153451-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/process/maintainer-netdev.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst index 3aa13bc2405d..bda93b459a05 100644 --- a/Documentation/process/maintainer-netdev.rst +++ b/Documentation/process/maintainer-netdev.rst @@ -551,10 +551,12 @@ helpful tips please see :ref:`development_advancedtopics_reviews`. It's safe to assume that netdev maintainers know the community and the level of expertise of the reviewers. The reviewers should not be concerned about -their comments impeding or derailing the patch flow. +their comments impeding or derailing the patch flow. A Reviewed-by tag +is understood to mean "I have reviewed this code to the best of my ability" +rather than "I can attest this code is correct". -Less experienced reviewers are highly encouraged to do more in-depth -review of submissions and not focus exclusively on trivial or subjective +Reviewers are highly encouraged to do more in-depth review of submissions +and not focus exclusively on process issues, trivial or subjective matters like code formatting, tags etc. Testimonials / feedback From f81f4e79b192be6c43abb256ea7da3a7dfb7899d Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Sat, 4 Apr 2026 15:04:12 -0700 Subject: [PATCH 1255/1561] bonding: remove unused bond_is_first_slave and bond_is_last_slave macros Since commit 2884bf72fb8f ("net: bonding: fix use-after-free in bond_xmit_broadcast()"), bond_is_last_slave() was only used in bond_xmit_broadcast(). After the recent fix replaced that usage with a simple index comparison, bond_is_last_slave() has no remaining callers. bond_is_first_slave() likewise has no callers. Remove both unused macros. Signed-off-by: Xiang Mei Link: https://patch.msgid.link/20260404220412.444753-1-xmei5@asu.edu Signed-off-by: Jakub Kicinski --- include/net/bonding.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/net/bonding.h b/include/net/bonding.h index d3520ecfa7f0..edd1942dcd73 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -69,9 +69,6 @@ #define bond_first_slave_rcu(bond) \ netdev_lower_get_first_private_rcu(bond->dev) -#define bond_is_first_slave(bond, pos) (pos == bond_first_slave(bond)) -#define bond_is_last_slave(bond, pos) (pos == bond_last_slave(bond)) - /** * bond_for_each_slave - iterate over all slaves * @bond: the bond holding this list From dbc2bb4e8742068d3d3dc8ebb46d874e5fd953b8 Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Mon, 6 Apr 2026 22:12:12 +0200 Subject: [PATCH 1256/1561] net: phy: realtek: get rid of magic numbers in rtl8201_config_intr() Replace the magic numbers with defines. Register names were obtained from publicly available documentation[1]. This should make it clear what's going on in the code. 1. RTL8201F/RTL8201FL/RTL8201FN Rev. 1.4 Datasheet Signed-off-by: Aleksander Jan Bajkowski Reviewed-by: Daniel Golle Reviewed-by: Nicolai Buchwitz nb@tipi-net.de Link: https://patch.msgid.link/20260406201222.1043396-1-olek2@wp.pl Signed-off-by: Jakub Kicinski --- drivers/net/phy/realtek/realtek_main.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index 023e47ad605b..c3604e1c45d9 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -22,7 +22,14 @@ #include "../phylib.h" #include "realtek.h" +#define RTL8201F_IER_PAGE 0x07 #define RTL8201F_IER 0x13 +#define RTL8201F_IER_LINK BIT(13) +#define RTL8201F_IER_DUPLEX BIT(12) +#define RTL8201F_IER_ANERR BIT(11) +#define RTL8201F_IER_MASK (RTL8201F_IER_ANERR | \ + RTL8201F_IER_DUPLEX | \ + RTL8201F_IER_LINK) #define RTL8201F_ISR 0x1e #define RTL8201F_ISR_ANERR BIT(15) @@ -349,11 +356,13 @@ static int rtl8201_config_intr(struct phy_device *phydev) if (err) return err; - val = BIT(13) | BIT(12) | BIT(11); - err = phy_write_paged(phydev, 0x7, RTL8201F_IER, val); + val = RTL8201F_IER_MASK; + err = phy_write_paged(phydev, RTL8201F_IER_PAGE, + RTL8201F_IER, val); } else { val = 0; - err = phy_write_paged(phydev, 0x7, RTL8201F_IER, val); + err = phy_write_paged(phydev, RTL8201F_IER_PAGE, + RTL8201F_IER, val); if (err) return err; From ea25e03da7a79e0413f1606d4a407a97ed41628a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Apr 2026 14:30:53 +0000 Subject: [PATCH 1257/1561] codel: annotate data-races in codel_dump_stats() codel_dump_stats() only runs with RTNL held, reading fields that can be changed in qdisc fast path. Add READ_ONCE()/WRITE_ONCE() annotations. Alternative would be to acquire the qdisc spinlock, but our long-term goal is to make qdisc dump operations lockless as much as we can. tc_codel_xstats fields don't need to be latched atomically, otherwise this bug would have been caught earlier. No change in kernel size: $ scripts/bloat-o-meter -t vmlinux.0 vmlinux add/remove: 0/0 grow/shrink: 1/1 up/down: 3/-1 (2) Function old new delta codel_qdisc_dequeue 2462 2465 +3 codel_dump_stats 250 249 -1 Total: Before=29739919, After=29739921, chg +0.00% Fixes: 76e3cc126bb2 ("codel: Controlled Delay AQM") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260407143053.1570620-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/codel_impl.h | 45 +++++++++++++++++++++------------------- net/sched/sch_codel.c | 22 ++++++++++---------- 2 files changed, 35 insertions(+), 32 deletions(-) diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h index b2c359c6dd1b..2c1f0ec309e9 100644 --- a/include/net/codel_impl.h +++ b/include/net/codel_impl.h @@ -120,10 +120,10 @@ static bool codel_should_drop(const struct sk_buff *skb, } skb_len = skb_len_func(skb); - vars->ldelay = now - skb_time_func(skb); + WRITE_ONCE(vars->ldelay, now - skb_time_func(skb)); if (unlikely(skb_len > stats->maxpacket)) - stats->maxpacket = skb_len; + WRITE_ONCE(stats->maxpacket, skb_len); if (codel_time_before(vars->ldelay, params->target) || *backlog <= params->mtu) { @@ -159,7 +159,7 @@ static struct sk_buff *codel_dequeue(void *ctx, if (!skb) { vars->first_above_time = 0; - vars->dropping = false; + WRITE_ONCE(vars->dropping, false); return skb; } now = codel_get_time(); @@ -168,7 +168,7 @@ static struct sk_buff *codel_dequeue(void *ctx, if (vars->dropping) { if (!drop) { /* sojourn time below target - leave dropping state */ - vars->dropping = false; + WRITE_ONCE(vars->dropping, false); } else if (codel_time_after_eq(now, vars->drop_next)) { /* It's time for the next drop. Drop the current * packet and dequeue the next. The dequeue might @@ -180,16 +180,18 @@ static struct sk_buff *codel_dequeue(void *ctx, */ while (vars->dropping && codel_time_after_eq(now, vars->drop_next)) { - vars->count++; /* dont care of possible wrap - * since there is no more divide - */ + /* dont care of possible wrap + * since there is no more divide. + */ + WRITE_ONCE(vars->count, vars->count + 1); codel_Newton_step(vars); if (params->ecn && INET_ECN_set_ce(skb)) { - stats->ecn_mark++; - vars->drop_next = + WRITE_ONCE(stats->ecn_mark, + stats->ecn_mark + 1); + WRITE_ONCE(vars->drop_next, codel_control_law(vars->drop_next, params->interval, - vars->rec_inv_sqrt); + vars->rec_inv_sqrt)); goto end; } stats->drop_len += skb_len_func(skb); @@ -202,13 +204,13 @@ static struct sk_buff *codel_dequeue(void *ctx, skb_time_func, backlog, now)) { /* leave dropping state */ - vars->dropping = false; + WRITE_ONCE(vars->dropping, false); } else { /* and schedule the next drop */ - vars->drop_next = + WRITE_ONCE(vars->drop_next, codel_control_law(vars->drop_next, params->interval, - vars->rec_inv_sqrt); + vars->rec_inv_sqrt)); } } } @@ -216,7 +218,7 @@ static struct sk_buff *codel_dequeue(void *ctx, u32 delta; if (params->ecn && INET_ECN_set_ce(skb)) { - stats->ecn_mark++; + WRITE_ONCE(stats->ecn_mark, stats->ecn_mark + 1); } else { stats->drop_len += skb_len_func(skb); drop_func(skb, ctx); @@ -227,7 +229,7 @@ static struct sk_buff *codel_dequeue(void *ctx, stats, skb_len_func, skb_time_func, backlog, now); } - vars->dropping = true; + WRITE_ONCE(vars->dropping, true); /* if min went above target close to when we last went below it * assume that the drop rate that controlled the queue on the * last cycle is a good starting point to control it now. @@ -236,19 +238,20 @@ static struct sk_buff *codel_dequeue(void *ctx, if (delta > 1 && codel_time_before(now - vars->drop_next, 16 * params->interval)) { - vars->count = delta; + WRITE_ONCE(vars->count, delta); /* we dont care if rec_inv_sqrt approximation * is not very precise : * Next Newton steps will correct it quadratically. */ codel_Newton_step(vars); } else { - vars->count = 1; + WRITE_ONCE(vars->count, 1); vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT; } - vars->lastcount = vars->count; - vars->drop_next = codel_control_law(now, params->interval, - vars->rec_inv_sqrt); + WRITE_ONCE(vars->lastcount, vars->count); + WRITE_ONCE(vars->drop_next, + codel_control_law(now, params->interval, + vars->rec_inv_sqrt)); } end: if (skb && codel_time_after(vars->ldelay, params->ce_threshold)) { @@ -262,7 +265,7 @@ end: params->ce_threshold_selector)); } if (set_ce && INET_ECN_set_ce(skb)) - stats->ce_mark++; + WRITE_ONCE(stats->ce_mark, stats->ce_mark + 1); } return skb; } diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index dc2be90666ff..317aae0ec7bd 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -85,7 +85,7 @@ static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, return qdisc_enqueue_tail(skb, sch); } q = qdisc_priv(sch); - q->drop_overlimit++; + WRITE_ONCE(q->drop_overlimit, q->drop_overlimit + 1); return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_OVERLIMIT); } @@ -221,18 +221,18 @@ static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { const struct codel_sched_data *q = qdisc_priv(sch); struct tc_codel_xstats st = { - .maxpacket = q->stats.maxpacket, - .count = q->vars.count, - .lastcount = q->vars.lastcount, - .drop_overlimit = q->drop_overlimit, - .ldelay = codel_time_to_us(q->vars.ldelay), - .dropping = q->vars.dropping, - .ecn_mark = q->stats.ecn_mark, - .ce_mark = q->stats.ce_mark, + .maxpacket = READ_ONCE(q->stats.maxpacket), + .count = READ_ONCE(q->vars.count), + .lastcount = READ_ONCE(q->vars.lastcount), + .drop_overlimit = READ_ONCE(q->drop_overlimit), + .ldelay = codel_time_to_us(READ_ONCE(q->vars.ldelay)), + .dropping = READ_ONCE(q->vars.dropping), + .ecn_mark = READ_ONCE(q->stats.ecn_mark), + .ce_mark = READ_ONCE(q->stats.ce_mark), }; - if (q->vars.dropping) { - codel_tdiff_t delta = q->vars.drop_next - codel_get_time(); + if (st.dropping) { + codel_tdiff_t delta = READ_ONCE(q->vars.drop_next) - codel_get_time(); if (delta >= 0) st.drop_next = codel_time_to_us(delta); From 202ab599413c9a66e3e4449886b85c7b21314d50 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Apr 2026 15:07:10 +0000 Subject: [PATCH 1258/1561] net: dropreason: add MACVLAN_BROADCAST_BACKLOG and IPVLAN_MULTICAST_BACKLOG ipvlan and macvlan use queues to process broadcast/multicast packets from a work queue. Under attack these queues can drop packets. Add MACVLAN_BROADCAST_BACKLOG drop_reason for macvlan broadcast queue. Add IPVLAN_MULTICAST_BACKLOG drop_reason for ipvlan multicast queue. Use different reasons as some deployments use both ipvlan and macvlan. Also change ipvlan_rcv_frame() to use SKB_DROP_REASON_DEV_READY when the device is not UP. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260407150710.1640747-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ipvlan/ipvlan_core.c | 4 ++-- drivers/net/macvlan.c | 2 +- include/net/dropreason-core.h | 12 ++++++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 835e048351a9..0b493a8aa338 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -337,7 +337,7 @@ static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb, */ if (local) { if (unlikely(!(dev->flags & IFF_UP))) { - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_DEV_READY); goto out; } @@ -596,7 +596,7 @@ static void ipvlan_multicast_enqueue(struct ipvl_port *port, } else { spin_unlock(&port->backlog.lock); dev_core_stats_rx_dropped_inc(skb->dev); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_IPVLAN_MULTICAST_BACKLOG); } } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 54c514acacc5..9f90c598649d 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -386,7 +386,7 @@ static void macvlan_broadcast_enqueue(struct macvlan_port *port, return; free_nskb: - kfree_skb(nskb); + kfree_skb_reason(nskb, SKB_DROP_REASON_MACVLAN_BROADCAST_BACKLOG); err: dev_core_stats_rx_dropped_inc(skb->dev); } diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 51855de5d208..e0ca3904ff8e 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -69,6 +69,8 @@ FN(QDISC_DROP) \ FN(QDISC_BURST_DROP) \ FN(CPU_BACKLOG) \ + FN(MACVLAN_BROADCAST_BACKLOG) \ + FN(IPVLAN_MULTICAST_BACKLOG) \ FN(XDP) \ FN(TC_INGRESS) \ FN(UNHANDLED_PROTO) \ @@ -383,6 +385,16 @@ enum skb_drop_reason { * netdev_max_backlog in net.rst) or RPS flow limit */ SKB_DROP_REASON_CPU_BACKLOG, + /** + * @SKB_DROP_REASON_MACVLAN_BROADCAST_BACKLOG: failed to enqueue the skb + * to macvlan broadcast queue. + */ + SKB_DROP_REASON_MACVLAN_BROADCAST_BACKLOG, + /** + * @SKB_DROP_REASON_IPVLAN_MULTICAST_BACKLOG: failed to enqueue the skb + * to ipvlan multicast queue. + */ + SKB_DROP_REASON_IPVLAN_MULTICAST_BACKLOG, /** @SKB_DROP_REASON_XDP: dropped by XDP in input path */ SKB_DROP_REASON_XDP, /** @SKB_DROP_REASON_TC_INGRESS: dropped in TC ingress HOOK */ From 686a7587bd0be9407f5ea748edf3d8bb00e5bc72 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 7 Apr 2026 17:18:13 -0700 Subject: [PATCH 1259/1561] net: bcmasp: Switch to page pool for RX path This shows an improvement of 1.9% in reducing the CPU cycles and data cache misses. Signed-off-by: Florian Fainelli Reviewed-by: Justin Chen Reviewed-by: Nicolai Buchwitz Link: https://patch.msgid.link/20260408001813.635679-1-florian.fainelli@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/Kconfig | 1 + drivers/net/ethernet/broadcom/asp2/bcmasp.h | 8 +- .../net/ethernet/broadcom/asp2/bcmasp_intf.c | 125 +++++++++++++++--- .../ethernet/broadcom/asp2/bcmasp_intf_defs.h | 4 + 4 files changed, 115 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index dd164acafd01..4287edc7ddd6 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -272,6 +272,7 @@ config BCMASP depends on OF select PHYLIB select MDIO_BCM_UNIMAC + select PAGE_POOL help This configuration enables the Broadcom ASP 2.0 Ethernet controller driver which is present in Broadcom STB SoCs such as 72165. diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.h b/drivers/net/ethernet/broadcom/asp2/bcmasp.h index 29cd87335ec8..8c8ffaeadc79 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp.h +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.h @@ -6,6 +6,7 @@ #include #include #include +#include #define ASP_INTR2_OFFSET 0x1000 #define ASP_INTR2_STATUS 0x0 @@ -298,16 +299,19 @@ struct bcmasp_intf { void __iomem *rx_edpkt_cfg; void __iomem *rx_edpkt_dma; int rx_edpkt_index; - int rx_buf_order; struct bcmasp_desc *rx_edpkt_cpu; dma_addr_t rx_edpkt_dma_addr; dma_addr_t rx_edpkt_dma_read; dma_addr_t rx_edpkt_dma_valid; - /* RX buffer prefetcher ring*/ + /* Streaming RX data ring (RBUF_4K mode) */ void *rx_ring_cpu; dma_addr_t rx_ring_dma; dma_addr_t rx_ring_dma_valid; + int rx_buf_order; + + /* Page pool for recycling RX SKB data pages */ + struct page_pool *rx_page_pool; struct napi_struct rx_napi; struct bcmasp_res res; diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c index b368ec2fea43..ec63f50a849e 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "bcmasp.h" #include "bcmasp_intf_defs.h" @@ -482,10 +483,14 @@ static int bcmasp_rx_poll(struct napi_struct *napi, int budget) struct bcmasp_desc *desc; struct sk_buff *skb; dma_addr_t valid; + struct page *page; void *data; u64 flags; u32 len; + /* Hardware advances DMA_VALID as it writes each descriptor + * (RBUF_4K streaming mode); software chases with rx_edpkt_dma_read. + */ valid = rx_edpkt_dma_rq(intf, RX_EDPKT_DMA_VALID) + 1; if (valid == intf->rx_edpkt_dma_addr + DESC_RING_SIZE) valid = intf->rx_edpkt_dma_addr; @@ -493,12 +498,12 @@ static int bcmasp_rx_poll(struct napi_struct *napi, int budget) while ((processed < budget) && (valid != intf->rx_edpkt_dma_read)) { desc = &intf->rx_edpkt_cpu[intf->rx_edpkt_index]; - /* Ensure that descriptor has been fully written to DRAM by - * hardware before reading by the CPU + /* Ensure the descriptor has been fully written to DRAM by + * the hardware before the CPU reads it. */ rmb(); - /* Calculate virt addr by offsetting from physical addr */ + /* Locate the packet data inside the streaming ring buffer. */ data = intf->rx_ring_cpu + (DESC_ADDR(desc->buf) - intf->rx_ring_dma); @@ -524,19 +529,38 @@ static int bcmasp_rx_poll(struct napi_struct *napi, int budget) len = desc->size; - skb = napi_alloc_skb(napi, len); + /* Allocate a page pool page as the SKB data area so the + * kernel can recycle it efficiently after the packet is + * consumed, avoiding repeated slab allocations. + */ + page = page_pool_dev_alloc_pages(intf->rx_page_pool); + if (!page) { + u64_stats_update_begin(&stats->syncp); + u64_stats_inc(&stats->rx_dropped); + u64_stats_update_end(&stats->syncp); + intf->mib.alloc_rx_skb_failed++; + goto next; + } + + skb = napi_build_skb(page_address(page), PAGE_SIZE); if (!skb) { u64_stats_update_begin(&stats->syncp); u64_stats_inc(&stats->rx_dropped); u64_stats_update_end(&stats->syncp); intf->mib.alloc_rx_skb_failed++; - + page_pool_recycle_direct(intf->rx_page_pool, page); goto next; } + /* Reserve headroom then copy the full descriptor payload + * (hardware prepends a 2-byte alignment pad at the start). + */ + skb_reserve(skb, NET_SKB_PAD); skb_put(skb, len); memcpy(skb->data, data, len); + skb_mark_for_recycle(skb); + /* Skip the 2-byte hardware alignment pad. */ skb_pull(skb, 2); len -= 2; if (likely(intf->crc_fwd)) { @@ -558,6 +582,7 @@ static int bcmasp_rx_poll(struct napi_struct *napi, int budget) u64_stats_update_end(&stats->syncp); next: + /* Return this portion of the streaming ring buffer to HW. */ rx_edpkt_cfg_wq(intf, (DESC_ADDR(desc->buf) + desc->size), RX_EDPKT_RING_BUFFER_READ); @@ -661,12 +686,31 @@ static void bcmasp_adj_link(struct net_device *dev) phy_print_status(phydev); } -static int bcmasp_alloc_buffers(struct bcmasp_intf *intf) +static struct page_pool * +bcmasp_rx_page_pool_create(struct bcmasp_intf *intf) +{ + struct page_pool_params pp_params = { + .order = 0, + .flags = 0, + .pool_size = NUM_4K_BUFFERS, + .nid = NUMA_NO_NODE, + .dev = &intf->parent->pdev->dev, + .napi = &intf->rx_napi, + .netdev = intf->ndev, + .offset = 0, + .max_len = PAGE_SIZE, + }; + + return page_pool_create(&pp_params); +} + +static int bcmasp_alloc_rx_buffers(struct bcmasp_intf *intf) { struct device *kdev = &intf->parent->pdev->dev; struct page *buffer_pg; + int ret; - /* Alloc RX */ + /* Contiguous streaming ring that hardware writes packet data into. */ intf->rx_buf_order = get_order(RING_BUFFER_SIZE); buffer_pg = alloc_pages(GFP_KERNEL, intf->rx_buf_order); if (!buffer_pg) @@ -675,13 +719,55 @@ static int bcmasp_alloc_buffers(struct bcmasp_intf *intf) intf->rx_ring_cpu = page_to_virt(buffer_pg); intf->rx_ring_dma = dma_map_page(kdev, buffer_pg, 0, RING_BUFFER_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(kdev, intf->rx_ring_dma)) - goto free_rx_buffer; + if (dma_mapping_error(kdev, intf->rx_ring_dma)) { + ret = -ENOMEM; + goto free_ring_pages; + } + + /* Page pool for SKB data areas (copy targets, not DMA buffers). */ + intf->rx_page_pool = bcmasp_rx_page_pool_create(intf); + if (IS_ERR(intf->rx_page_pool)) { + ret = PTR_ERR(intf->rx_page_pool); + intf->rx_page_pool = NULL; + goto free_ring_dma; + } + + return 0; + +free_ring_dma: + dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE, + DMA_FROM_DEVICE); +free_ring_pages: + __free_pages(buffer_pg, intf->rx_buf_order); + return ret; +} + +static void bcmasp_reclaim_rx_buffers(struct bcmasp_intf *intf) +{ + struct device *kdev = &intf->parent->pdev->dev; + + page_pool_destroy(intf->rx_page_pool); + intf->rx_page_pool = NULL; + dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE, + DMA_FROM_DEVICE); + __free_pages(virt_to_page(intf->rx_ring_cpu), intf->rx_buf_order); +} + +static int bcmasp_alloc_buffers(struct bcmasp_intf *intf) +{ + struct device *kdev = &intf->parent->pdev->dev; + int ret; + + /* Alloc RX */ + ret = bcmasp_alloc_rx_buffers(intf); + if (ret) + return ret; intf->rx_edpkt_cpu = dma_alloc_coherent(kdev, DESC_RING_SIZE, - &intf->rx_edpkt_dma_addr, GFP_KERNEL); + &intf->rx_edpkt_dma_addr, + GFP_KERNEL); if (!intf->rx_edpkt_cpu) - goto free_rx_buffer_dma; + goto free_rx_buffers; /* Alloc TX */ intf->tx_spb_cpu = dma_alloc_coherent(kdev, DESC_RING_SIZE, @@ -701,11 +787,8 @@ free_tx_spb_dma: free_rx_edpkt_dma: dma_free_coherent(kdev, DESC_RING_SIZE, intf->rx_edpkt_cpu, intf->rx_edpkt_dma_addr); -free_rx_buffer_dma: - dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE, - DMA_FROM_DEVICE); -free_rx_buffer: - __free_pages(buffer_pg, intf->rx_buf_order); +free_rx_buffers: + bcmasp_reclaim_rx_buffers(intf); return -ENOMEM; } @@ -717,9 +800,7 @@ static void bcmasp_reclaim_free_buffers(struct bcmasp_intf *intf) /* RX buffers */ dma_free_coherent(kdev, DESC_RING_SIZE, intf->rx_edpkt_cpu, intf->rx_edpkt_dma_addr); - dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE, - DMA_FROM_DEVICE); - __free_pages(virt_to_page(intf->rx_ring_cpu), intf->rx_buf_order); + bcmasp_reclaim_rx_buffers(intf); /* TX buffers */ dma_free_coherent(kdev, DESC_RING_SIZE, intf->tx_spb_cpu, @@ -738,7 +819,7 @@ static void bcmasp_init_rx(struct bcmasp_intf *intf) /* Make sure channels are disabled */ rx_edpkt_cfg_wl(intf, 0x0, RX_EDPKT_CFG_ENABLE); - /* Rx SPB */ + /* Streaming data ring: hardware writes raw packet bytes here. */ rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_READ); rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_WRITE); rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_BASE); @@ -747,7 +828,9 @@ static void bcmasp_init_rx(struct bcmasp_intf *intf) rx_edpkt_cfg_wq(intf, intf->rx_ring_dma_valid, RX_EDPKT_RING_BUFFER_VALID); - /* EDPKT */ + /* EDPKT descriptor ring: hardware fills descriptors pointing into + * the streaming ring buffer above (RBUF_4K mode). + */ rx_edpkt_cfg_wl(intf, (RX_EDPKT_CFG_CFG0_RBUF_4K << RX_EDPKT_CFG_CFG0_DBUF_SHIFT) | (RX_EDPKT_CFG_CFG0_64_ALN << diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf_defs.h b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf_defs.h index af7418348e81..0318f257452a 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf_defs.h +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf_defs.h @@ -246,6 +246,10 @@ ((((intf)->channel - 6) * 0x14) + 0xa2000) #define RX_SPB_TOP_BLKOUT 0x00 +/* + * Number of 4 KB pages that make up the contiguous RBUF_4K streaming ring + * and the page pool used as copy-target SKB data areas. + */ #define NUM_4K_BUFFERS 32 #define RING_BUFFER_SIZE (PAGE_SIZE * NUM_4K_BUFFERS) From fff75dba7992d8f63d4df8796baf8114015842cc Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 7 Apr 2026 13:20:58 +0300 Subject: [PATCH 1260/1561] selftests: forwarding: lib: rewrite processing of command line arguments The piece of code which processes the command line arguments and populates NETIFS based on them is really unobvious. Rewrite it so that the intention is clear and the code is easy to follow. Suggested-by: Petr Machata Signed-off-by: Ioana Ciornei Reviewed-by: Petr Machata Link: https://patch.msgid.link/20260407102058.867279-1-ioana.ciornei@nxp.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/lib.sh | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index d8cc4c64148d..d2bdbff68075 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -467,17 +467,18 @@ if [ "${DRIVER_TEST_CONFORMANT}" = "yes" ]; then TARGETS[$remote_netif]="$REMOTE_TYPE:$REMOTE_ARGS" else count=0 + # Prime NETIFS from the command line, but retain if none given. + if [[ $# -gt 0 ]]; then + unset NETIFS + declare -A NETIFS - while [[ $# -gt 0 ]]; do - if [[ "$count" -eq "0" ]]; then - unset NETIFS - declare -A NETIFS - fi - count=$((count + 1)) - NETIFS[p$count]="$1" - TARGETS[$1]="local:" - shift - done + while [[ $# -gt 0 ]]; do + count=$((count + 1)) + NETIFS[p$count]="$1" + TARGETS[$1]="local:" + shift + done + fi fi ############################################################################## From 1a6b3965385a935ffd70275d162f68139bd86898 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Tue, 7 Apr 2026 16:42:18 +0800 Subject: [PATCH 1261/1561] net: initialize sk_rx_queue_mapping in sk_clone() sk_clone() initializes sk_tx_queue_mapping via sk_tx_queue_clear() but does not initialize sk_rx_queue_mapping. Since this field is in the sk_dontcopy region, it is neither copied from the parent socket by sock_copy() nor zeroed by sk_prot_alloc() (called without __GFP_ZERO from sk_clone). Commit 03cfda4fa6ea ("tcp: fix another uninit-value (sk_rx_queue_mapping)") attempted to fix this by introducing sk_mark_napi_id_set() with force_set=true in tcp_child_process(). However, sk_mark_napi_id_set() -> sk_rx_queue_set() only writes when skb_rx_queue_recorded(skb) is true. If the 3-way handshake ACK arrives through a device that does not record rx_queue (e.g. loopback or veth), sk_rx_queue_mapping remains uninitialized. When a subsequent data packet arrives with a recorded rx_queue, sk_mark_napi_id() -> sk_rx_queue_update() reads the uninitialized field for comparison (force_set=false path), triggering KMSAN. This was reproduced by establishing a TCP connection over loopback (which does not call skb_record_rx_queue), then attaching a BPF TC program on lo ingress to set skb->queue_mapping on data packets: BUG: KMSAN: uninit-value in tcp_v4_do_rcv (net/ipv4/tcp_ipv4.c:1875) tcp_v4_do_rcv (net/ipv4/tcp_ipv4.c:1875) tcp_v4_rcv (net/ipv4/tcp_ipv4.c:2287) ip_protocol_deliver_rcu (net/ipv4/ip_input.c:207) ip_local_deliver_finish (net/ipv4/ip_input.c:242) ip_local_deliver (net/ipv4/ip_input.c:262) ip_rcv (net/ipv4/ip_input.c:573) __netif_receive_skb (net/core/dev.c:6294) process_backlog (net/core/dev.c:6646) __napi_poll (net/core/dev.c:7710) net_rx_action (net/core/dev.c:7929) handle_softirqs (kernel/softirq.c:623) do_softirq (kernel/softirq.c:523) __local_bh_enable_ip (kernel/softirq.c:?) __dev_queue_xmit (net/core/dev.c:?) ip_finish_output2 (net/ipv4/ip_output.c:237) ip_output (net/ipv4/ip_output.c:438) __ip_queue_xmit (net/ipv4/ip_output.c:534) __tcp_transmit_skb (net/ipv4/tcp_output.c:1693) tcp_write_xmit (net/ipv4/tcp_output.c:3064) tcp_sendmsg_locked (net/ipv4/tcp.c:?) tcp_sendmsg (net/ipv4/tcp.c:1465) inet_sendmsg (net/ipv4/af_inet.c:865) sock_write_iter (net/socket.c:1195) vfs_write (fs/read_write.c:688) ... Uninit was created at: kmem_cache_alloc_noprof (mm/slub.c:4873) sk_prot_alloc (net/core/sock.c:2239) sk_alloc (net/core/sock.c:2301) inet_create (net/ipv4/af_inet.c:334) __sock_create (net/socket.c:1605) __sys_socket (net/socket.c:1747) Fix this at the root by adding sk_rx_queue_clear() alongside sk_tx_queue_clear() in sk_clone(). Signed-off-by: Jiayuan Chen Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260407084219.95718-1-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- net/core/sock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/sock.c b/net/core/sock.c index fdaf66e6dc18..e821b95e0015 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2583,6 +2583,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority, sk_set_socket(newsk, NULL); sk_tx_queue_clear(newsk); + sk_rx_queue_clear(newsk); RCU_INIT_POINTER(newsk->sk_wq, NULL); if (newsk->sk_prot->sockets_allocated) From d2000361e4ddf5047d660a902a3b0ed7520be1e5 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 7 Apr 2026 10:45:17 +0200 Subject: [PATCH 1262/1561] mptcp: better mptcp-level RTT estimator The current MPTCP-level RTT estimator has several issues. On high speed links, the MPTCP-level receive buffer auto-tuning happens with a frequency well above the TCP-level's one. That in turn can cause excessive/unneeded receive buffer increase. On such links, the initial rtt_us value is considerably higher than the actual delay, and the current mptcp_rcv_space_adjust() updates msk->rcvq_space.rtt_us with a period equal to the such field previous value. If the initial rtt_us is 40ms, its first update will happen after 40ms, even if the subflows see actual RTT orders of magnitude lower. Additionally: - setting the msk RTT to the maximum among all the subflows RTTs makes DRS constantly overshooting the rcvbuf size when a subflow has considerable higher latency than the other(s). - during unidirectional bulk transfers with multiple active subflows, the TCP-level RTT estimator occasionally sees considerably higher value than the real link delay, i.e. when the packet scheduler reacts to an incoming ACK on given subflow pushing data on a different subflow. - currently inactive but still open subflows (i.e. switched to backup mode) are always considered when computing the msk-level RTT. Address the all the issues above with a more accurate RTT estimation strategy: the MPTCP-level RTT is set to the minimum of all the subflows actually feeding data into the MPTCP receive buffer, using a small sliding window. While at it, also use EWMA to compute the msk-level scaling_ratio, to that MPTCP can avoid traversing the subflow list is mptcp_rcv_space_adjust(). Use some care to avoid updating msk and ssk level fields too often. Fixes: a6b118febbab ("mptcp: add receive buffer auto-tuning") Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260407-net-next-mptcp-reduce-rbuf-v2-1-0d1d135bf6f6@kernel.org Signed-off-by: Jakub Kicinski --- include/trace/events/mptcp.h | 2 +- net/mptcp/protocol.c | 63 ++++++++++++++++++++---------------- net/mptcp/protocol.h | 37 ++++++++++++++++++++- 3 files changed, 72 insertions(+), 30 deletions(-) diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h index 269d949b2025..04521acba483 100644 --- a/include/trace/events/mptcp.h +++ b/include/trace/events/mptcp.h @@ -219,7 +219,7 @@ TRACE_EVENT(mptcp_rcvbuf_grow, __be32 *p32; __entry->time = time; - __entry->rtt_us = msk->rcvq_space.rtt_us >> 3; + __entry->rtt_us = mptcp_rtt_us_est(msk) >> 3; __entry->copied = msk->rcvq_space.copied; __entry->inq = mptcp_inq_hint(sk); __entry->space = msk->rcvq_space.space; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2f4776a4f06a..70a090a95299 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -879,6 +879,32 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) return moved; } +static void mptcp_rcv_rtt_update(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow) +{ + const struct tcp_sock *tp = tcp_sk(subflow->tcp_sock); + u32 rtt_us = tp->rcv_rtt_est.rtt_us; + int id; + + /* Update once per subflow per rcvwnd to avoid touching the msk + * too often. + */ + if (!rtt_us || tp->rcv_rtt_est.seq == subflow->prev_rtt_seq) + return; + + subflow->prev_rtt_seq = tp->rcv_rtt_est.seq; + + /* Pairs with READ_ONCE() in mptcp_rtt_us_est(). */ + id = msk->rcv_rtt_est.next_sample; + WRITE_ONCE(msk->rcv_rtt_est.samples[id], rtt_us); + if (++msk->rcv_rtt_est.next_sample == MPTCP_RTT_SAMPLES) + msk->rcv_rtt_est.next_sample = 0; + + /* EWMA among the incoming subflows */ + msk->scaling_ratio = ((msk->scaling_ratio << 3) - msk->scaling_ratio + + tp->scaling_ratio) >> 3; +} + void mptcp_data_ready(struct sock *sk, struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); @@ -892,6 +918,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) return; mptcp_data_lock(sk); + mptcp_rcv_rtt_update(msk, subflow); if (!sock_owned_by_user(sk)) { /* Wake-up the reader only for in-sequence data */ if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk)) @@ -2095,7 +2122,6 @@ static void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk) msk->rcvspace_init = 1; msk->rcvq_space.copied = 0; - msk->rcvq_space.rtt_us = 0; /* initial rcv_space offering made to peer */ msk->rcvq_space.space = min_t(u32, tp->rcv_wnd, @@ -2106,15 +2132,15 @@ static void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk) /* receive buffer autotuning. See tcp_rcv_space_adjust for more information. * - * Only difference: Use highest rtt estimate of the subflows in use. + * Only difference: Use lowest rtt estimate of the subflows in use, see + * mptcp_rcv_rtt_update() and mptcp_rtt_us_est(). */ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; - u8 scaling_ratio = U8_MAX; - u32 time, advmss = 1; - u64 rtt_us, mstamp; + u32 time, rtt_us; + u64 mstamp; msk_owned_by_me(msk); @@ -2129,29 +2155,8 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) mstamp = mptcp_stamp(); time = tcp_stamp_us_delta(mstamp, READ_ONCE(msk->rcvq_space.time)); - rtt_us = msk->rcvq_space.rtt_us; - if (rtt_us && time < (rtt_us >> 3)) - return; - - rtt_us = 0; - mptcp_for_each_subflow(msk, subflow) { - const struct tcp_sock *tp; - u64 sf_rtt_us; - u32 sf_advmss; - - tp = tcp_sk(mptcp_subflow_tcp_sock(subflow)); - - sf_rtt_us = READ_ONCE(tp->rcv_rtt_est.rtt_us); - sf_advmss = READ_ONCE(tp->advmss); - - rtt_us = max(sf_rtt_us, rtt_us); - advmss = max(sf_advmss, advmss); - scaling_ratio = min(tp->scaling_ratio, scaling_ratio); - } - - msk->rcvq_space.rtt_us = rtt_us; - msk->scaling_ratio = scaling_ratio; - if (time < (rtt_us >> 3) || rtt_us == 0) + rtt_us = mptcp_rtt_us_est(msk); + if (rtt_us == U32_MAX || time < (rtt_us >> 3)) return; if (msk->rcvq_space.copied <= msk->rcvq_space.space) @@ -3015,6 +3020,7 @@ static void __mptcp_init_sock(struct sock *sk) msk->timer_ival = TCP_RTO_MIN; msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO; msk->backlog_len = 0; + mptcp_init_rtt_est(msk); WRITE_ONCE(msk->first, NULL); inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; @@ -3460,6 +3466,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) msk->bytes_retrans = 0; msk->rcvspace_init = 0; msk->fastclosing = 0; + mptcp_init_rtt_est(msk); /* for fallback's sake */ WRITE_ONCE(msk->ack_seq, 0); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 1208f317ac33..e1d4783db02f 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -269,6 +269,13 @@ struct mptcp_data_frag { struct page *page; }; +/* Arbitrary compromise between as low as possible to react timely to subflow + * close event and as big as possible to avoid being fouled by biased large + * samples due to peer sending data on a different subflow WRT to the incoming + * ack. + */ +#define MPTCP_RTT_SAMPLES 5 + /* MPTCP connection sock */ struct mptcp_sock { /* inet_connection_sock must be the first member */ @@ -341,11 +348,17 @@ struct mptcp_sock { */ struct mptcp_pm_data pm; struct mptcp_sched_ops *sched; + + /* Most recent rtt_us observed by in use incoming subflows. */ + struct { + u32 samples[MPTCP_RTT_SAMPLES]; + u32 next_sample; + } rcv_rtt_est; + struct { int space; /* bytes copied in last measurement window */ int copied; /* bytes copied in this measurement window */ u64 time; /* start time of measurement window */ - u64 rtt_us; /* last maximum rtt of subflows */ } rcvq_space; u8 scaling_ratio; bool allow_subflows; @@ -423,6 +436,27 @@ static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) return msk->first_pending; } +static inline void mptcp_init_rtt_est(struct mptcp_sock *msk) +{ + int i; + + for (i = 0; i < MPTCP_RTT_SAMPLES; ++i) + msk->rcv_rtt_est.samples[i] = U32_MAX; + msk->rcv_rtt_est.next_sample = 0; + msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO; +} + +static inline u32 mptcp_rtt_us_est(const struct mptcp_sock *msk) +{ + u32 rtt_us = READ_ONCE(msk->rcv_rtt_est.samples[0]); + int i; + + /* Lockless access of collected samples. */ + for (i = 1; i < MPTCP_RTT_SAMPLES; ++i) + rtt_us = min(rtt_us, READ_ONCE(msk->rcv_rtt_est.samples[i])); + return rtt_us; +} + static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -524,6 +558,7 @@ struct mptcp_subflow_context { u32 map_data_len; __wsum map_data_csum; u32 map_csum_len; + u32 prev_rtt_seq; u32 request_mptcp : 1, /* send MP_CAPABLE */ request_join : 1, /* send MP_JOIN */ request_bkup : 1, From 7272d8131a9dc9bda39ecbb639986889fee002a6 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 7 Apr 2026 10:45:18 +0200 Subject: [PATCH 1263/1561] mptcp: add receive queue awareness in tcp_rcv_space_adjust() This is the MPTCP counter-part of commit ea33537d8292 ("tcp: add receive queue awareness in tcp_rcv_space_adjust()"). Prior to this commit: ESTAB 33165568 0 192.168.255.2:5201 192.168.255.1:53380 \ skmem:(r33076416,rb33554432,t0,tb91136,f448,w0,o0,bl0,d0) After: ESTAB 3279168 0 192.168.255.2:5201 192.168.255.1]:53042 \ skmem:(r3190912,rb3719956,t0,tb91136,f1536,w0,o0,bl0,d0) Same throughput. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260407-net-next-mptcp-reduce-rbuf-v2-2-0d1d135bf6f6@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 70a090a95299..cf5747595259 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2159,11 +2159,13 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) if (rtt_us == U32_MAX || time < (rtt_us >> 3)) return; - if (msk->rcvq_space.copied <= msk->rcvq_space.space) + copied = msk->rcvq_space.copied; + copied -= mptcp_inq_hint(sk); + if (copied <= msk->rcvq_space.space) goto new_measure; trace_mptcp_rcvbuf_grow(sk, time); - if (mptcp_rcvbuf_grow(sk, msk->rcvq_space.copied)) { + if (mptcp_rcvbuf_grow(sk, copied)) { /* Make subflows follow along. If we do not do this, we * get drops at subflow level if skbs can't be moved to * the mptcp rx queue fast enough (announced rcv_win can @@ -2177,7 +2179,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) slow = lock_sock_fast(ssk); /* subflows can be added before tcp_init_transfer() */ if (tcp_sk(ssk)->rcvq_space.space) - tcp_rcvbuf_grow(ssk, msk->rcvq_space.copied); + tcp_rcvbuf_grow(ssk, copied); unlock_sock_fast(ssk, slow); } } From b773b9935239e9bec86b96ce91b6ba2252c20b44 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 7 Apr 2026 00:21:55 +0300 Subject: [PATCH 1264/1561] net: dsa: remove struct platform_data This is not used anywhere in the kernel. Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20260406212158.721806-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- Documentation/networking/dsa/dsa.rst | 5 ----- include/linux/platform_data/dsa.h | 17 ----------------- 2 files changed, 22 deletions(-) diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst index 5c79740a533b..fd3c254ced1d 100644 --- a/Documentation/networking/dsa/dsa.rst +++ b/Documentation/networking/dsa/dsa.rst @@ -383,11 +383,6 @@ DSA data structures are defined in ``include/net/dsa.h`` as well as well as various properties of its ports: names/labels, and finally a routing table indication (when cascading switches) -- ``dsa_platform_data``: platform device configuration data which can reference - a collection of dsa_chip_data structures if multiple switches are cascaded, - the conduit network device this switch tree is attached to needs to be - referenced - - ``dsa_switch_tree``: structure assigned to the conduit network device under ``dsa_ptr``, this structure references a dsa_platform_data structure as well as the tagging protocol supported by the switch tree, and which receive/transmit diff --git a/include/linux/platform_data/dsa.h b/include/linux/platform_data/dsa.h index d4d9bf2060a6..fec1ae5bddb9 100644 --- a/include/linux/platform_data/dsa.h +++ b/include/linux/platform_data/dsa.h @@ -48,21 +48,4 @@ struct dsa_chip_data { s8 rtable[DSA_MAX_SWITCHES]; }; -struct dsa_platform_data { - /* - * Reference to a Linux network interface that connects - * to the root switch chip of the tree. - */ - struct device *netdev; - struct net_device *of_netdev; - - /* - * Info structs describing each of the switch chips - * connected via this network interface. - */ - int nr_chips; - struct dsa_chip_data *chip; -}; - - #endif /* __DSA_PDATA_H */ From dc915f375e545cf72421d66ede983d88e298228f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 7 Apr 2026 00:21:56 +0300 Subject: [PATCH 1265/1561] net: dsa: clean up struct dsa_chip_data This has accumulated some fields which are no longer parsed by the core or set by any driver. Remove them. Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20260406212158.721806-3-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- include/linux/platform_data/dsa.h | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/include/linux/platform_data/dsa.h b/include/linux/platform_data/dsa.h index fec1ae5bddb9..031f4cf83ae2 100644 --- a/include/linux/platform_data/dsa.h +++ b/include/linux/platform_data/dsa.h @@ -10,12 +10,6 @@ struct net_device; #define DSA_RTABLE_NONE -1 struct dsa_chip_data { - /* - * How to access the switch configuration registers. - */ - struct device *host_dev; - int sw_addr; - /* * Reference to network devices */ @@ -24,12 +18,6 @@ struct dsa_chip_data { /* set to size of eeprom if supported by the switch */ int eeprom_len; - /* Device tree node pointer for this specific switch chip - * used during switch setup in case additional properties - * and resources needs to be used - */ - struct device_node *of_node; - /* * The names of the switch's ports. Use "cpu" to * designate the switch port that the cpu is connected to, @@ -38,14 +26,6 @@ struct dsa_chip_data { * or any other string to indicate this is a physical port. */ char *port_names[DSA_MAX_PORTS]; - struct device_node *port_dn[DSA_MAX_PORTS]; - - /* - * An array of which element [a] indicates which port on this - * switch should be used to send packets to that are destined - * for switch a. Can be NULL if there is only one switch chip. - */ - s8 rtable[DSA_MAX_SWITCHES]; }; #endif /* __DSA_PDATA_H */ From c3b09190e658d3f1c3cd595df3a931962662f8f0 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 7 Apr 2026 00:21:57 +0300 Subject: [PATCH 1266/1561] net: dsa: remove unused platform_data definitions Pretty self-explanatory, nobody needs these. Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20260406212158.721806-4-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- include/linux/platform_data/dsa.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/linux/platform_data/dsa.h b/include/linux/platform_data/dsa.h index 031f4cf83ae2..77424bb24723 100644 --- a/include/linux/platform_data/dsa.h +++ b/include/linux/platform_data/dsa.h @@ -3,11 +3,8 @@ #define __DSA_PDATA_H struct device; -struct net_device; -#define DSA_MAX_SWITCHES 4 #define DSA_MAX_PORTS 12 -#define DSA_RTABLE_NONE -1 struct dsa_chip_data { /* From da9008674d9658de1e9f45d386ff6627313f39f7 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 7 Apr 2026 00:21:58 +0300 Subject: [PATCH 1267/1561] net: dsa: eliminate There is no reason at all to export these data types to the global include directory. Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20260406212158.721806-5-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/dsa_loop.c | 35 ++++++++++++++++++++++++++++++- include/linux/dsa/loop.h | 42 -------------------------------------- 2 files changed, 34 insertions(+), 43 deletions(-) delete mode 100644 include/linux/dsa/loop.h diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c index b41254b3ac42..7058faf23592 100644 --- a/drivers/net/dsa/dsa_loop.c +++ b/drivers/net/dsa/dsa_loop.c @@ -14,13 +14,46 @@ #include #include #include -#include +#include +#include #include #define DSA_LOOP_NUM_PORTS 6 #define DSA_LOOP_CPU_PORT (DSA_LOOP_NUM_PORTS - 1) #define NUM_FIXED_PHYS (DSA_LOOP_NUM_PORTS - 2) +struct dsa_loop_vlan { + u16 members; + u16 untagged; +}; + +struct dsa_loop_mib_entry { + char name[ETH_GSTRING_LEN]; + unsigned long val; +}; + +enum dsa_loop_mib_counters { + DSA_LOOP_PHY_READ_OK, + DSA_LOOP_PHY_READ_ERR, + DSA_LOOP_PHY_WRITE_OK, + DSA_LOOP_PHY_WRITE_ERR, + __DSA_LOOP_CNT_MAX, +}; + +struct dsa_loop_port { + struct dsa_loop_mib_entry mib[__DSA_LOOP_CNT_MAX]; + u16 pvid; + int mtu; +}; + +struct dsa_loop_priv { + struct mii_bus *bus; + unsigned int port_base; + struct dsa_loop_vlan vlans[VLAN_N_VID]; + struct net_device *netdev; + struct dsa_loop_port ports[DSA_MAX_PORTS]; +}; + struct dsa_loop_pdata { /* Must be first, such that dsa_register_switch() can access this * without gory pointer manipulations diff --git a/include/linux/dsa/loop.h b/include/linux/dsa/loop.h deleted file mode 100644 index b8fef35591aa..000000000000 --- a/include/linux/dsa/loop.h +++ /dev/null @@ -1,42 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef DSA_LOOP_H -#define DSA_LOOP_H - -#include -#include -#include -#include - -struct dsa_loop_vlan { - u16 members; - u16 untagged; -}; - -struct dsa_loop_mib_entry { - char name[ETH_GSTRING_LEN]; - unsigned long val; -}; - -enum dsa_loop_mib_counters { - DSA_LOOP_PHY_READ_OK, - DSA_LOOP_PHY_READ_ERR, - DSA_LOOP_PHY_WRITE_OK, - DSA_LOOP_PHY_WRITE_ERR, - __DSA_LOOP_CNT_MAX, -}; - -struct dsa_loop_port { - struct dsa_loop_mib_entry mib[__DSA_LOOP_CNT_MAX]; - u16 pvid; - int mtu; -}; - -struct dsa_loop_priv { - struct mii_bus *bus; - unsigned int port_base; - struct dsa_loop_vlan vlans[VLAN_N_VID]; - struct net_device *netdev; - struct dsa_loop_port ports[DSA_MAX_PORTS]; -}; - -#endif /* DSA_LOOP_H */ From 5ae4ba98d72509a4da592751be4d6b4dbfa8cac8 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Mon, 6 Apr 2026 15:26:54 +0800 Subject: [PATCH 1268/1561] selftests/drivers/net: Add an xdp test to xdp.py In "bpf: Disallow freplace on XDP with mismatched xdp_has_frags values" [1], this XDP test is suggested to add to xdp.py. 1. Verify the failure of updating frag-capable prog with non-frag-capable prog, when the frag-capable prog attaches to mtu=9k driver. The test has been verified against Mellanox CX6 and Intel 82599ES NICs. With dropping other tests, here is the test log. # ethtool -i eth0 driver: mlx5_core version: 6.19.0-061900-generic # NETIF=eth0 python3 xdp.py TAP version 13 1..1 ok 1 xdp.test_xdp_native_update_mb_to_sb # Totals: pass:1 fail:0 xfail:0 xpass:0 skip:0 error:0 # ethtool -i eth0 driver: ixgbe version: 6.19.0-061900-generic # NETIF=eth0 python3 xdp.py TAP version 13 1..1 # CMD: ip link set dev eth0 xdpdrv obj /path/to/tools/testing/selftests/net/lib/xdp_dummy.bpf.o sec xdp.frags # EXIT: 2 # STDERR: RTNETLINK answers: Invalid argument ok 1 xdp.test_xdp_native_update_mb_to_sb # SKIP device does not support multi-buffer XDP # Totals: pass:0 fail:0 xfail:0 xpass:0 skip:1 error:0 Signed-off-by: Leon Hwang Link: https://patch.msgid.link/20260406072655.368173-1-leon.huangfu@shopee.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/xdp.py | 31 +++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py index 7e635487d517..2ad5932299e8 100755 --- a/tools/testing/selftests/drivers/net/xdp.py +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -13,7 +13,7 @@ from enum import Enum from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_ne, ksft_pr from lib.py import KsftNamedVariant, ksft_variants -from lib.py import KsftFailEx, NetDrvEpEnv +from lib.py import KsftFailEx, KsftSkipEx, NetDrvEpEnv from lib.py import EthtoolFamily, NetdevFamily, NlError from lib.py import bkg, cmd, rand_port, wait_port_listen from lib.py import ip, defer @@ -693,6 +693,34 @@ def test_xdp_native_qstats(cfg, act): ksft_ge(after['tx-packets'], before['tx-packets']) +def test_xdp_native_update_mb_to_sb(cfg): + """ + Test multi-buf to single-buf replacement with jumbo MTU. + """ + obj = cfg.net_lib_dir / "xdp_dummy.bpf.o" + mtu = 9000 + + ip(f"link set dev {cfg.ifname} mtu {mtu}") + defer(ip, f"link set dev {cfg.ifname} mtu {cfg.dev['mtu']} xdpdrv off") + + attach = cmd(f"ip link set dev {cfg.ifname} xdpdrv obj {obj} sec xdp", fail=False) + if attach.ret == 0: + raise KsftSkipEx(f"device supports single-buffer XDP with mtu {mtu}") + + attach = cmd(f"ip link set dev {cfg.ifname} xdpdrv obj {obj} sec xdp.frags", fail=False) + if attach.ret != 0: + ksft_pr(attach) + raise KsftSkipEx("device does not support multi-buffer XDP") + + # Verify updating mb -> mb program works. + cmd(f"ip -force link set dev {cfg.ifname} xdpdrv obj {obj} sec xdp.frags") + + # Verify updating mb -> sb program does not work. + update = cmd(f"ip -force link set dev {cfg.ifname} xdpdrv obj {obj} sec xdp", fail=False) + if update.ret == 0: + raise KsftFailEx("device unexpectedly updates non-multi-buffer XDP") + + def main(): """ Main function to execute the XDP tests. @@ -718,6 +746,7 @@ def main(): test_xdp_native_adjst_head_grow_data, test_xdp_native_adjst_head_shrnk_data, test_xdp_native_qstats, + test_xdp_native_update_mb_to_sb, ], args=(cfg,)) ksft_exit() From 7be3163c49b24bf923d32f333096963159e03a14 Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Tue, 7 Apr 2026 22:40:56 +0300 Subject: [PATCH 1269/1561] devlink: Refactor resource functions to be generic Currently the resource functions take devlink pointer as parameter and take the resource list from there. Allow resource functions to work with other resource lists that will be added in next patches and not only with the devlink's resource list. Signed-off-by: Or Har-Toov Reviewed-by: Shay Drori Reviewed-by: Moshe Shemesh Reviewed-by: Jiri Pirko Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260407194107.148063-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 2 +- net/devlink/resource.c | 118 ++++++++++++++++++++++++++--------------- 2 files changed, 75 insertions(+), 45 deletions(-) diff --git a/include/net/devlink.h b/include/net/devlink.h index 3038af6ec017..f5439d050eb0 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1885,7 +1885,7 @@ int devl_resource_register(struct devlink *devlink, u64 resource_size, u64 resource_id, u64 parent_resource_id, - const struct devlink_resource_size_params *size_params); + const struct devlink_resource_size_params *params); void devl_resources_unregister(struct devlink *devlink); void devlink_resources_unregister(struct devlink *devlink); int devl_resource_size_get(struct devlink *devlink, diff --git a/net/devlink/resource.c b/net/devlink/resource.c index 351835a710b1..ee169a467d48 100644 --- a/net/devlink/resource.c +++ b/net/devlink/resource.c @@ -36,15 +36,16 @@ struct devlink_resource { }; static struct devlink_resource * -devlink_resource_find(struct devlink *devlink, - struct devlink_resource *resource, u64 resource_id) +__devlink_resource_find(struct list_head *resource_list_head, + struct devlink_resource *resource, + u64 resource_id) { struct list_head *resource_list; if (resource) resource_list = &resource->resource_list; else - resource_list = &devlink->resource_list; + resource_list = resource_list_head; list_for_each_entry(resource, resource_list, list) { struct devlink_resource *child_resource; @@ -52,14 +53,23 @@ devlink_resource_find(struct devlink *devlink, if (resource->id == resource_id) return resource; - child_resource = devlink_resource_find(devlink, resource, - resource_id); + child_resource = __devlink_resource_find(resource_list_head, + resource, + resource_id); if (child_resource) return child_resource; } return NULL; } +static struct devlink_resource * +devlink_resource_find(struct devlink *devlink, + struct devlink_resource *resource, u64 resource_id) +{ + return __devlink_resource_find(&devlink->resource_list, + resource, resource_id); +} + static void devlink_resource_validate_children(struct devlink_resource *resource) { @@ -314,26 +324,12 @@ int devlink_resources_validate(struct devlink *devlink, return err; } -/** - * devl_resource_register - devlink resource register - * - * @devlink: devlink - * @resource_name: resource's name - * @resource_size: resource's size - * @resource_id: resource's id - * @parent_resource_id: resource's parent id - * @size_params: size parameters - * - * Generic resources should reuse the same names across drivers. - * Please see the generic resources list at: - * Documentation/networking/devlink/devlink-resource.rst - */ -int devl_resource_register(struct devlink *devlink, - const char *resource_name, - u64 resource_size, - u64 resource_id, - u64 parent_resource_id, - const struct devlink_resource_size_params *size_params) +static int +__devl_resource_register(struct devlink *devlink, + struct list_head *resource_list_head, + const char *resource_name, u64 resource_size, + u64 resource_id, u64 parent_resource_id, + const struct devlink_resource_size_params *params) { struct devlink_resource *resource; struct list_head *resource_list; @@ -343,7 +339,8 @@ int devl_resource_register(struct devlink *devlink, top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP; - resource = devlink_resource_find(devlink, NULL, resource_id); + resource = __devlink_resource_find(resource_list_head, NULL, + resource_id); if (resource) return -EEXIST; @@ -352,12 +349,13 @@ int devl_resource_register(struct devlink *devlink, return -ENOMEM; if (top_hierarchy) { - resource_list = &devlink->resource_list; + resource_list = resource_list_head; } else { struct devlink_resource *parent_resource; - parent_resource = devlink_resource_find(devlink, NULL, - parent_resource_id); + parent_resource = __devlink_resource_find(resource_list_head, + NULL, + parent_resource_id); if (parent_resource) { resource_list = &parent_resource->resource_list; resource->parent = parent_resource; @@ -372,23 +370,64 @@ int devl_resource_register(struct devlink *devlink, resource->size_new = resource_size; resource->id = resource_id; resource->size_valid = true; - memcpy(&resource->size_params, size_params, - sizeof(resource->size_params)); + memcpy(&resource->size_params, params, sizeof(resource->size_params)); INIT_LIST_HEAD(&resource->resource_list); list_add_tail(&resource->list, resource_list); return 0; } + +/** + * devl_resource_register - devlink resource register + * + * @devlink: devlink + * @resource_name: resource's name + * @resource_size: resource's size + * @resource_id: resource's id + * @parent_resource_id: resource's parent id + * @params: size parameters + * + * Generic resources should reuse the same names across drivers. + * Please see the generic resources list at: + * Documentation/networking/devlink/devlink-resource.rst + * + * Return: 0 on success, negative error code otherwise. + */ +int devl_resource_register(struct devlink *devlink, const char *resource_name, + u64 resource_size, u64 resource_id, + u64 parent_resource_id, + const struct devlink_resource_size_params *params) +{ + return __devl_resource_register(devlink, &devlink->resource_list, + resource_name, resource_size, + resource_id, parent_resource_id, + params); +} EXPORT_SYMBOL_GPL(devl_resource_register); -static void devlink_resource_unregister(struct devlink *devlink, - struct devlink_resource *resource) +static void devlink_resource_unregister(struct devlink_resource *resource) { struct devlink_resource *tmp, *child_resource; list_for_each_entry_safe(child_resource, tmp, &resource->resource_list, list) { - devlink_resource_unregister(devlink, child_resource); + devlink_resource_unregister(child_resource); + list_del(&child_resource->list); + kfree(child_resource); + } +} + +static void +__devl_resources_unregister(struct devlink *devlink, + struct list_head *resource_list_head) +{ + struct devlink_resource *tmp, *child_resource; + + lockdep_assert_held(&devlink->lock); + + list_for_each_entry_safe(child_resource, tmp, resource_list_head, + list) { + devlink_resource_unregister(child_resource); list_del(&child_resource->list); kfree(child_resource); } @@ -401,16 +440,7 @@ static void devlink_resource_unregister(struct devlink *devlink, */ void devl_resources_unregister(struct devlink *devlink) { - struct devlink_resource *tmp, *child_resource; - - lockdep_assert_held(&devlink->lock); - - list_for_each_entry_safe(child_resource, tmp, &devlink->resource_list, - list) { - devlink_resource_unregister(devlink, child_resource); - list_del(&child_resource->list); - kfree(child_resource); - } + __devl_resources_unregister(devlink, &devlink->resource_list); } EXPORT_SYMBOL_GPL(devl_resources_unregister); From 6f38acfed5edb398201d9ff127919745cbb331a1 Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Tue, 7 Apr 2026 22:40:57 +0300 Subject: [PATCH 1270/1561] devlink: Add port-level resource registration infrastructure The current devlink resource infrastructure supports only device-level resources. Some hardware resources are associated with specific ports rather than the entire device, and today we have no way to show resource per-port. Add support for registering resources at the port level. Signed-off-by: Or Har-Toov Reviewed-by: Shay Drori Reviewed-by: Moshe Shemesh Reviewed-by: Jiri Pirko Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260407194107.148063-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 8 ++++++++ net/devlink/port.c | 2 ++ net/devlink/resource.c | 43 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/include/net/devlink.h b/include/net/devlink.h index f5439d050eb0..bcd31de1f890 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -129,6 +129,7 @@ struct devlink_rate { struct devlink_port { struct list_head list; struct list_head region_list; + struct list_head resource_list; struct devlink *devlink; const struct devlink_port_ops *ops; unsigned int index; @@ -1891,6 +1892,13 @@ void devlink_resources_unregister(struct devlink *devlink); int devl_resource_size_get(struct devlink *devlink, u64 resource_id, u64 *p_resource_size); +int +devl_port_resource_register(struct devlink_port *devlink_port, + const char *resource_name, + u64 resource_size, u64 resource_id, + u64 parent_resource_id, + const struct devlink_resource_size_params *params); +void devl_port_resources_unregister(struct devlink_port *devlink_port); int devl_dpipe_table_resource_set(struct devlink *devlink, const char *table_name, u64 resource_id, u64 resource_units); diff --git a/net/devlink/port.c b/net/devlink/port.c index 7fcd1d3ed44c..485029d43428 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -1025,6 +1025,7 @@ void devlink_port_init(struct devlink *devlink, return; devlink_port->devlink = devlink; INIT_LIST_HEAD(&devlink_port->region_list); + INIT_LIST_HEAD(&devlink_port->resource_list); devlink_port->initialized = true; } EXPORT_SYMBOL_GPL(devlink_port_init); @@ -1042,6 +1043,7 @@ EXPORT_SYMBOL_GPL(devlink_port_init); void devlink_port_fini(struct devlink_port *devlink_port) { WARN_ON(!list_empty(&devlink_port->region_list)); + WARN_ON(!list_empty(&devlink_port->resource_list)); } EXPORT_SYMBOL_GPL(devlink_port_fini); diff --git a/net/devlink/resource.c b/net/devlink/resource.c index ee169a467d48..f3014ec425c4 100644 --- a/net/devlink/resource.c +++ b/net/devlink/resource.c @@ -532,3 +532,46 @@ void devl_resource_occ_get_unregister(struct devlink *devlink, resource->occ_get_priv = NULL; } EXPORT_SYMBOL_GPL(devl_resource_occ_get_unregister); + +/** + * devl_port_resource_register - devlink port resource register + * + * @devlink_port: devlink port + * @resource_name: resource's name + * @resource_size: resource's size + * @resource_id: resource's id + * @parent_resource_id: resource's parent id + * @params: size parameters + * + * Generic resources should reuse the same names across drivers. + * Please see the generic resources list at: + * Documentation/networking/devlink/devlink-resource.rst + * + * Return: 0 on success, negative error code otherwise. + */ +int +devl_port_resource_register(struct devlink_port *devlink_port, + const char *resource_name, + u64 resource_size, u64 resource_id, + u64 parent_resource_id, + const struct devlink_resource_size_params *params) +{ + return __devl_resource_register(devlink_port->devlink, + &devlink_port->resource_list, + resource_name, resource_size, + resource_id, parent_resource_id, + params); +} +EXPORT_SYMBOL_GPL(devl_port_resource_register); + +/** + * devl_port_resources_unregister - unregister all devlink port resources + * + * @devlink_port: devlink port + */ +void devl_port_resources_unregister(struct devlink_port *devlink_port) +{ + __devl_resources_unregister(devlink_port->devlink, + &devlink_port->resource_list); +} +EXPORT_SYMBOL_GPL(devl_port_resources_unregister); From 4be8326d817e6d47f8446938408bb2001b799340 Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Tue, 7 Apr 2026 22:40:58 +0300 Subject: [PATCH 1271/1561] net/mlx5: Register SF resource on PF port representor The device-level "resource show" displays max_local_SFs and max_external_SFs without indicating which port each resource belongs to. Users cannot determine the controller number and pfnum associated with each SF pool. Register max_SFs resource on the host PF representor port to expose per-port SF limits. Users can correlate the port resource with the controller number and pfnum shown in 'devlink port show'. Future patches will introduce an ECPF that manages multiple PFs, where each PF has its own SF pool. Example usage: $ devlink resource show pci/0000:03:00.0/196608 pci/0000:03:00.0/196608: name max_SFs size 20 unit entry $ devlink port show pci/0000:03:00.0/196608 pci/0000:03:00.0/196608: type eth netdev pf0hpf flavour pcipf controller 1 pfnum 0 external true splittable false function: hw_addr b8:3f:d2:e1:8f:dc roce enable max_io_eqs 120 We can create up to 20 SFs over devlink port pci/0000:03:00.0/196608, with pfnum 0 and controller 1. Signed-off-by: Or Har-Toov Reviewed-by: Shay Drori Reviewed-by: Moshe Shemesh Reviewed-by: Jiri Pirko Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260407194107.148063-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/devlink.h | 4 ++ .../mellanox/mlx5/core/esw/devlink_port.c | 37 +++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h index 43b9bf8829cf..4fbb3926a3e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -14,6 +14,10 @@ enum mlx5_devlink_resource_id { MLX5_ID_RES_MAX = __MLX5_ID_RES_MAX - 1, }; +enum mlx5_devlink_port_resource_id { + MLX5_DL_PORT_RES_MAX_SFS = 1, +}; + enum mlx5_devlink_param_id { MLX5_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index 8bffba85f21f..e1d11326af1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -3,6 +3,7 @@ #include #include "eswitch.h" +#include "devlink.h" static void mlx5_esw_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_id *ppid) @@ -158,6 +159,32 @@ static const struct devlink_port_ops mlx5_esw_dl_sf_port_ops = { .port_fn_max_io_eqs_set = mlx5_devlink_port_fn_max_io_eqs_set, }; +static int mlx5_esw_devlink_port_res_register(struct mlx5_eswitch *esw, + struct devlink_port *dl_port) +{ + struct devlink_resource_size_params size_params; + struct mlx5_core_dev *dev = esw->dev; + u16 max_sfs, sf_base_id; + int err; + + err = mlx5_esw_sf_max_hpf_functions(dev, &max_sfs, &sf_base_id); + if (err) + return err; + + devlink_resource_size_params_init(&size_params, max_sfs, max_sfs, 1, + DEVLINK_RESOURCE_UNIT_ENTRY); + + return devl_port_resource_register(dl_port, "max_SFs", max_sfs, + MLX5_DL_PORT_RES_MAX_SFS, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &size_params); +} + +static void mlx5_esw_devlink_port_res_unregister(struct devlink_port *dl_port) +{ + devl_port_resources_unregister(dl_port); +} + int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { struct mlx5_core_dev *dev = esw->dev; @@ -189,6 +216,15 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, struct mlx if (err) goto rate_err; + if (vport_num == MLX5_VPORT_PF) { + err = mlx5_esw_devlink_port_res_register(esw, + &dl_port->dl_port); + if (err) + mlx5_core_dbg(dev, + "Failed to register port resources: %d\n", + err); + } + return 0; rate_err: @@ -203,6 +239,7 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_vport *vport) if (!vport->dl_port) return; dl_port = vport->dl_port; + mlx5_esw_devlink_port_res_unregister(&dl_port->dl_port); mlx5_esw_qos_vport_update_parent(vport, NULL, NULL); devl_rate_leaf_destroy(&dl_port->dl_port); From 085b234b28ccf13af96c2465d4b82c3bc46a7885 Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Tue, 7 Apr 2026 22:40:59 +0300 Subject: [PATCH 1272/1561] netdevsim: Add devlink port resource registration Register port-level resources for netdevsim ports to enable testing of the port resource infrastructure. Signed-off-by: Or Har-Toov Reviewed-by: Shay Drori Reviewed-by: Moshe Shemesh Reviewed-by: Jiri Pirko Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260407194107.148063-5-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/dev.c | 23 ++++++++++++++++++++++- drivers/net/netdevsim/netdevsim.h | 4 ++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index e82de0fd3157..1e06e781c835 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -1486,9 +1486,25 @@ static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, enum nsim_dev_port_typ if (err) goto err_port_free; + if (nsim_dev_port_is_pf(nsim_dev_port)) { + u64 parent_id = DEVLINK_RESOURCE_ID_PARENT_TOP; + struct devlink_resource_size_params params = { + .size_max = 100, + .size_granularity = 1, + .unit = DEVLINK_RESOURCE_UNIT_ENTRY + }; + + err = devl_port_resource_register(devlink_port, + "test_resource", 20, + NSIM_PORT_RESOURCE_TEST, + parent_id, ¶ms); + if (err) + goto err_dl_port_unregister; + } + err = nsim_dev_port_debugfs_init(nsim_dev, nsim_dev_port); if (err) - goto err_dl_port_unregister; + goto err_port_resource_unregister; nsim_dev_port->ns = nsim_create(nsim_dev, nsim_dev_port, perm_addr); if (IS_ERR(nsim_dev_port->ns)) { @@ -1511,6 +1527,9 @@ err_nsim_destroy: nsim_destroy(nsim_dev_port->ns); err_port_debugfs_exit: nsim_dev_port_debugfs_exit(nsim_dev_port); +err_port_resource_unregister: + if (nsim_dev_port_is_pf(nsim_dev_port)) + devl_port_resources_unregister(devlink_port); err_dl_port_unregister: devl_port_unregister(devlink_port); err_port_free: @@ -1527,6 +1546,8 @@ static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port) devl_rate_leaf_destroy(&nsim_dev_port->devlink_port); nsim_destroy(nsim_dev_port->ns); nsim_dev_port_debugfs_exit(nsim_dev_port); + if (nsim_dev_port_is_pf(nsim_dev_port)) + devl_port_resources_unregister(devlink_port); devl_port_unregister(devlink_port); kfree(nsim_dev_port); } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index c904e14f6b3f..c7de53706ec4 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -224,6 +224,10 @@ enum nsim_resource_id { NSIM_RESOURCE_NEXTHOPS, }; +enum nsim_port_resource_id { + NSIM_PORT_RESOURCE_TEST = 1, +}; + struct nsim_dev_health { struct devlink_health_reporter *empty_reporter; struct devlink_health_reporter *dummy_reporter; From 11636b550eea9e480fe4ceec8ab7bd8f24b363da Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Tue, 7 Apr 2026 22:41:00 +0300 Subject: [PATCH 1273/1561] devlink: Add dump support for device-level resources Add dumpit handler for resource-dump command to iterate over all devlink devices and show their resources. $ devlink resource show pci/0000:08:00.0: name local_max_SFs size 508 unit entry name external_max_SFs size 508 unit entry pci/0000:08:00.1: name local_max_SFs size 508 unit entry name external_max_SFs size 508 unit entry Signed-off-by: Or Har-Toov Reviewed-by: Shay Drori Reviewed-by: Moshe Shemesh Reviewed-by: Jiri Pirko Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260407194107.148063-6-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/devlink.yaml | 6 +- net/devlink/netlink_gen.c | 20 +++++- net/devlink/netlink_gen.h | 4 +- net/devlink/resource.c | 77 ++++++++++++++++++++++++ 4 files changed, 102 insertions(+), 5 deletions(-) diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index b495d56b9137..c423e049c7bd 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -1764,13 +1764,17 @@ operations: - bus-name - dev-name - index - reply: + reply: &resource-dump-reply value: 36 attributes: - bus-name - dev-name - index - resource-list + dump: + request: + attributes: *dev-id-attrs + reply: *resource-dump-reply - name: reload diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index eb35e80e01d1..a5a47a4c6de8 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -305,7 +305,14 @@ static const struct nla_policy devlink_resource_set_nl_policy[DEVLINK_ATTR_INDEX }; /* DEVLINK_CMD_RESOURCE_DUMP - do */ -static const struct nla_policy devlink_resource_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { +static const struct nla_policy devlink_resource_dump_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), +}; + +/* DEVLINK_CMD_RESOURCE_DUMP - dump */ +static const struct nla_policy devlink_resource_dump_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), @@ -680,7 +687,7 @@ static const struct nla_policy devlink_notify_filter_set_nl_policy[DEVLINK_ATTR_ }; /* Ops table for devlink */ -const struct genl_split_ops devlink_nl_ops[74] = { +const struct genl_split_ops devlink_nl_ops[75] = { { .cmd = DEVLINK_CMD_GET, .validate = GENL_DONT_VALIDATE_STRICT, @@ -958,10 +965,17 @@ const struct genl_split_ops devlink_nl_ops[74] = { .pre_doit = devlink_nl_pre_doit, .doit = devlink_nl_resource_dump_doit, .post_doit = devlink_nl_post_doit, - .policy = devlink_resource_dump_nl_policy, + .policy = devlink_resource_dump_do_nl_policy, .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, + { + .cmd = DEVLINK_CMD_RESOURCE_DUMP, + .dumpit = devlink_nl_resource_dump_dumpit, + .policy = devlink_resource_dump_dump_nl_policy, + .maxattr = DEVLINK_ATTR_INDEX, + .flags = GENL_CMD_CAP_DUMP, + }, { .cmd = DEVLINK_CMD_RELOAD, .validate = GENL_DONT_VALIDATE_STRICT, diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h index 2817d53a0eba..d79f6a0888f6 100644 --- a/net/devlink/netlink_gen.h +++ b/net/devlink/netlink_gen.h @@ -18,7 +18,7 @@ extern const struct nla_policy devlink_dl_rate_tc_bws_nl_policy[DEVLINK_RATE_TC_ extern const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1]; /* Ops table for devlink */ -extern const struct genl_split_ops devlink_nl_ops[74]; +extern const struct genl_split_ops devlink_nl_ops[75]; int devlink_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); @@ -80,6 +80,8 @@ int devlink_nl_dpipe_table_counters_set_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_resource_set_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_resource_dump_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_resource_dump_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); int devlink_nl_reload_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_param_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_param_get_dumpit(struct sk_buff *skb, diff --git a/net/devlink/resource.c b/net/devlink/resource.c index f3014ec425c4..02fb36e25c52 100644 --- a/net/devlink/resource.c +++ b/net/devlink/resource.c @@ -223,6 +223,31 @@ nla_put_failure: return -EMSGSIZE; } +static int devlink_resource_list_fill(struct sk_buff *skb, + struct devlink *devlink, + struct list_head *resource_list_head, + int *idx) +{ + struct devlink_resource *resource; + int i = 0; + int err; + + list_for_each_entry(resource, resource_list_head, list) { + if (i < *idx) { + i++; + continue; + } + err = devlink_resource_put(devlink, skb, resource); + if (err) { + *idx = i; + return err; + } + i++; + } + *idx = 0; + return 0; +} + static int devlink_resource_fill(struct genl_info *info, enum devlink_command cmd, int flags) { @@ -302,6 +327,58 @@ int devlink_nl_resource_dump_doit(struct sk_buff *skb, struct genl_info *info) return devlink_resource_fill(info, DEVLINK_CMD_RESOURCE_DUMP, 0); } +static int +devlink_nl_resource_dump_one(struct sk_buff *skb, struct devlink *devlink, + struct netlink_callback *cb, int flags) +{ + struct devlink_nl_dump_state *state = devlink_dump_state(cb); + struct nlattr *resources_attr; + int start_idx = state->idx; + void *hdr; + int err; + + if (list_empty(&devlink->resource_list)) + return 0; + + err = -EMSGSIZE; + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &devlink_nl_family, flags, DEVLINK_CMD_RESOURCE_DUMP); + if (!hdr) + return err; + + if (devlink_nl_put_handle(skb, devlink)) + goto nla_put_failure; + + resources_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_RESOURCE_LIST); + if (!resources_attr) + goto nla_put_failure; + + err = devlink_resource_list_fill(skb, devlink, + &devlink->resource_list, &state->idx); + if (err) { + if (state->idx == start_idx) + goto resource_list_cancel; + nla_nest_end(skb, resources_attr); + genlmsg_end(skb, hdr); + return err; + } + nla_nest_end(skb, resources_attr); + genlmsg_end(skb, hdr); + return 0; + +resource_list_cancel: + nla_nest_cancel(skb, resources_attr); +nla_put_failure: + genlmsg_cancel(skb, hdr); + return err; +} + +int devlink_nl_resource_dump_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + return devlink_nl_dumpit(skb, cb, devlink_nl_resource_dump_one); +} + int devlink_resources_validate(struct devlink *devlink, struct devlink_resource *resource, struct genl_info *info) From 810b76394d69f340c0060260f7167639352b7217 Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Tue, 7 Apr 2026 22:41:01 +0300 Subject: [PATCH 1274/1561] devlink: Include port resources in resource dump dumpit Allow querying devlink resources per-port via the resource-dump dumpit handler. Both device-level and all ports resources are included in the reply. For example: $ devlink resource show pci/0000:03:00.0: name local_max_SFs size 508 unit entry name external_max_SFs size 508 unit entry pci/0000:03:00.0/196608: name max_SFs size 20 unit entry pci/0000:03:00.1: name local_max_SFs size 508 unit entry name external_max_SFs size 508 unit entry pci/0000:03:00.1/262144: name max_SFs size 20 unit entry Signed-off-by: Or Har-Toov Reviewed-by: Moshe Shemesh Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260407194107.148063-7-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- net/devlink/devl_internal.h | 5 ++++ net/devlink/netlink.c | 2 ++ net/devlink/resource.c | 59 ++++++++++++++++++++++++++++++++----- 3 files changed, 58 insertions(+), 8 deletions(-) diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index 7dfb7cdd2d23..e4e48ee2da5a 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -164,6 +164,11 @@ struct devlink_nl_dump_state { struct { u64 dump_ts; }; + /* DEVLINK_CMD_RESOURCE_DUMP */ + struct { + u32 index; + bool index_valid; + } port_ctx; }; }; diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c index 32ddbe244cb7..ae4afc739678 100644 --- a/net/devlink/netlink.c +++ b/net/devlink/netlink.c @@ -370,6 +370,8 @@ static int devlink_nl_inst_iter_dumpit(struct sk_buff *msg, /* restart sub-object walk for the next instance */ state->idx = 0; + state->port_ctx.index = 0; + state->port_ctx.index_valid = false; } if (err != -EMSGSIZE) diff --git a/net/devlink/resource.c b/net/devlink/resource.c index 02fb36e25c52..7984eda63eb6 100644 --- a/net/devlink/resource.c +++ b/net/devlink/resource.c @@ -328,16 +328,20 @@ int devlink_nl_resource_dump_doit(struct sk_buff *skb, struct genl_info *info) } static int -devlink_nl_resource_dump_one(struct sk_buff *skb, struct devlink *devlink, - struct netlink_callback *cb, int flags) +devlink_resource_dump_fill_one(struct sk_buff *skb, struct devlink *devlink, + struct devlink_port *devlink_port, + struct netlink_callback *cb, int flags, int *idx) { - struct devlink_nl_dump_state *state = devlink_dump_state(cb); + struct list_head *resource_list; struct nlattr *resources_attr; - int start_idx = state->idx; + int start_idx = *idx; void *hdr; int err; - if (list_empty(&devlink->resource_list)) + resource_list = devlink_port ? + &devlink_port->resource_list : &devlink->resource_list; + + if (list_empty(resource_list)) return 0; err = -EMSGSIZE; @@ -348,15 +352,17 @@ devlink_nl_resource_dump_one(struct sk_buff *skb, struct devlink *devlink, if (devlink_nl_put_handle(skb, devlink)) goto nla_put_failure; + if (devlink_port && + nla_put_u32(skb, DEVLINK_ATTR_PORT_INDEX, devlink_port->index)) + goto nla_put_failure; resources_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_RESOURCE_LIST); if (!resources_attr) goto nla_put_failure; - err = devlink_resource_list_fill(skb, devlink, - &devlink->resource_list, &state->idx); + err = devlink_resource_list_fill(skb, devlink, resource_list, idx); if (err) { - if (state->idx == start_idx) + if (*idx == start_idx) goto resource_list_cancel; nla_nest_end(skb, resources_attr); genlmsg_end(skb, hdr); @@ -373,6 +379,43 @@ nla_put_failure: return err; } +static int +devlink_nl_resource_dump_one(struct sk_buff *skb, struct devlink *devlink, + struct netlink_callback *cb, int flags) +{ + struct devlink_nl_dump_state *state = devlink_dump_state(cb); + struct devlink_port *devlink_port; + unsigned long port_idx; + int err; + + if (!state->port_ctx.index_valid) { + err = devlink_resource_dump_fill_one(skb, devlink, NULL, + cb, flags, &state->idx); + if (err) + return err; + state->idx = 0; + } + + /* Check in case port was removed between dump callbacks. */ + if (state->port_ctx.index_valid && + !xa_load(&devlink->ports, state->port_ctx.index)) + state->idx = 0; + state->port_ctx.index_valid = true; + xa_for_each_start(&devlink->ports, port_idx, devlink_port, + state->port_ctx.index) { + err = devlink_resource_dump_fill_one(skb, devlink, devlink_port, + cb, flags, &state->idx); + if (err) { + state->port_ctx.index = port_idx; + return err; + } + state->idx = 0; + } + state->port_ctx.index_valid = false; + state->port_ctx.index = 0; + return 0; +} + int devlink_nl_resource_dump_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { From 7511ff14f30d264691ec493b09111404aed4aaf4 Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Tue, 7 Apr 2026 22:41:02 +0300 Subject: [PATCH 1275/1561] devlink: Add port-specific option to resource dump doit Allow querying devlink resources per-port via the resource-dump doit handler. When a port-index attribute is provided, only that port's resources are returned. When no port-index is given, only device-level resources are returned, preserving backward compatibility. Signed-off-by: Or Har-Toov Reviewed-by: Moshe Shemesh Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260407194107.148063-8-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/devlink.yaml | 4 +++- net/devlink/netlink_gen.c | 3 ++- net/devlink/netlink_gen.h | 4 ++-- net/devlink/resource.c | 20 +++++++++++++++++--- 4 files changed, 24 insertions(+), 7 deletions(-) diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index c423e049c7bd..34aa81ba689e 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -1757,19 +1757,21 @@ operations: attribute-set: devlink dont-validate: [strict] do: - pre: devlink-nl-pre-doit + pre: devlink-nl-pre-doit-port-optional post: devlink-nl-post-doit request: attributes: - bus-name - dev-name - index + - port-index reply: &resource-dump-reply value: 36 attributes: - bus-name - dev-name - index + - port-index - resource-list dump: request: diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index a5a47a4c6de8..9cc372d9ee41 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -309,6 +309,7 @@ static const struct nla_policy devlink_resource_dump_do_nl_policy[DEVLINK_ATTR_I [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_RESOURCE_DUMP - dump */ @@ -962,7 +963,7 @@ const struct genl_split_ops devlink_nl_ops[75] = { { .cmd = DEVLINK_CMD_RESOURCE_DUMP, .validate = GENL_DONT_VALIDATE_STRICT, - .pre_doit = devlink_nl_pre_doit, + .pre_doit = devlink_nl_pre_doit_port_optional, .doit = devlink_nl_resource_dump_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_resource_dump_do_nl_policy, diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h index d79f6a0888f6..20034b0929a8 100644 --- a/net/devlink/netlink_gen.h +++ b/net/devlink/netlink_gen.h @@ -24,11 +24,11 @@ int devlink_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); int devlink_nl_pre_doit_port(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); -int devlink_nl_pre_doit_dev_lock(const struct genl_split_ops *ops, - struct sk_buff *skb, struct genl_info *info); int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); +int devlink_nl_pre_doit_dev_lock(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info); void devlink_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); diff --git a/net/devlink/resource.c b/net/devlink/resource.c index 7984eda63eb6..bf5221fb3e64 100644 --- a/net/devlink/resource.c +++ b/net/devlink/resource.c @@ -251,8 +251,10 @@ static int devlink_resource_list_fill(struct sk_buff *skb, static int devlink_resource_fill(struct genl_info *info, enum devlink_command cmd, int flags) { + struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = info->user_ptr[0]; struct devlink_resource *resource; + struct list_head *resource_list; struct nlattr *resources_attr; struct sk_buff *skb = NULL; struct nlmsghdr *nlh; @@ -261,7 +263,9 @@ static int devlink_resource_fill(struct genl_info *info, int i; int err; - resource = list_first_entry(&devlink->resource_list, + resource_list = devlink_port ? + &devlink_port->resource_list : &devlink->resource_list; + resource = list_first_entry(resource_list, struct devlink_resource, list); start_again: err = devlink_nl_msg_reply_and_new(&skb, info); @@ -277,6 +281,9 @@ start_again: if (devlink_nl_put_handle(skb, devlink)) goto nla_put_failure; + if (devlink_port && + nla_put_u32(skb, DEVLINK_ATTR_PORT_INDEX, devlink_port->index)) + goto nla_put_failure; resources_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_RESOURCE_LIST); @@ -285,7 +292,7 @@ start_again: incomplete = false; i = 0; - list_for_each_entry_from(resource, &devlink->resource_list, list) { + list_for_each_entry_from(resource, resource_list, list) { err = devlink_resource_put(devlink, skb, resource); if (err) { if (!i) @@ -319,9 +326,16 @@ err_resource_put: int devlink_nl_resource_dump_doit(struct sk_buff *skb, struct genl_info *info) { + struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = info->user_ptr[0]; + struct list_head *resource_list; - if (list_empty(&devlink->resource_list)) + if (info->attrs[DEVLINK_ATTR_PORT_INDEX] && !devlink_port) + return -ENODEV; + + resource_list = devlink_port ? + &devlink_port->resource_list : &devlink->resource_list; + if (list_empty(resource_list)) return -EOPNOTSUPP; return devlink_resource_fill(info, DEVLINK_CMD_RESOURCE_DUMP, 0); From 3961353771041908cadfdf8b773bfcb19b281b1a Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Tue, 7 Apr 2026 22:41:03 +0300 Subject: [PATCH 1276/1561] selftest: netdevsim: Add devlink port resource doit test Tests that querying a specific port handle returns the expected resource name and size. Signed-off-by: Or Har-Toov Reviewed-by: Moshe Shemesh Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260407194107.148063-9-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../drivers/net/netdevsim/devlink.sh | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index 1b529ccaf050..31d1cef54898 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -5,7 +5,8 @@ lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS="fw_flash_test params_test \ params_default_test regions_test reload_test \ - netns_reload_test resource_test dev_info_test \ + netns_reload_test resource_test \ + port_resource_doit_test dev_info_test \ empty_reporter_test dummy_reporter_test rate_test" NUM_NETIFS=0 source $lib_dir/lib.sh @@ -768,6 +769,32 @@ rate_node_del() devlink port function rate del $handle } +port_resource_doit_test() +{ + RET=0 + + local port_handle="${DL_HANDLE}/0" + local name + local size + + if ! devlink resource help 2>&1 | grep -q "PORT_INDEX"; then + echo "SKIP: devlink resource show with port not supported" + return + fi + + name=$(cmd_jq "devlink resource show $port_handle -j" \ + '.[][][].name') + [ "$name" == "test_resource" ] + check_err $? "wrong port resource name (got $name)" + + size=$(cmd_jq "devlink resource show $port_handle -j" \ + '.[][][].size') + [ "$size" == "20" ] + check_err $? "wrong port resource size (got $size)" + + log_test "port resource doit test" +} + rate_test() { RET=0 From 170e160a0e7c6396f74279d922bee90902bd16f6 Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Tue, 7 Apr 2026 22:41:04 +0300 Subject: [PATCH 1277/1561] devlink: Document port-level resources and full dump Document the port-level resource support and the option to dump all resources, including both device-level and port-level entries. Signed-off-by: Or Har-Toov Reviewed-by: Shay Drori Reviewed-by: Moshe Shemesh Reviewed-by: Jiri Pirko Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260407194107.148063-10-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../networking/devlink/devlink-resource.rst | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/Documentation/networking/devlink/devlink-resource.rst b/Documentation/networking/devlink/devlink-resource.rst index 3d5ae51e65a2..9839c1661315 100644 --- a/Documentation/networking/devlink/devlink-resource.rst +++ b/Documentation/networking/devlink/devlink-resource.rst @@ -74,3 +74,38 @@ attribute, which represents the pending change in size. For example: Note that changes in resource size may require a device reload to properly take effect. + +Port-level Resources and Full Dump +================================== + +In addition to device-level resources, ``devlink`` also supports port-level +resources. These resources are associated with a specific devlink port rather +than the device as a whole. + +To list resources for all devlink devices and ports: + +.. code:: shell + + $ devlink resource show + pci/0000:03:00.0: + name max_local_SFs size 128 unit entry dpipe_tables none + name max_external_SFs size 128 unit entry dpipe_tables none + pci/0000:03:00.0/196608: + name max_SFs size 128 unit entry dpipe_tables none + pci/0000:03:00.0/196609: + name max_SFs size 128 unit entry dpipe_tables none + pci/0000:03:00.1: + name max_local_SFs size 128 unit entry dpipe_tables none + name max_external_SFs size 128 unit entry dpipe_tables none + pci/0000:03:00.1/196708: + name max_SFs size 128 unit entry dpipe_tables none + pci/0000:03:00.1/196709: + name max_SFs size 128 unit entry dpipe_tables none + +To show resources for a specific port: + +.. code:: shell + + $ devlink resource show pci/0000:03:00.0/196608 + pci/0000:03:00.0/196608: + name max_SFs size 128 unit entry dpipe_tables none From 1bc45341a6ea4009ee9f2fbca9096b33a9ef71a2 Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Tue, 7 Apr 2026 22:41:05 +0300 Subject: [PATCH 1278/1561] devlink: Add resource scope filtering to resource dump Allow filtering the resource dump to device-level or port-level resources using the 'scope' option. Example - dump only device-level resources: $ devlink resource show scope dev pci/0000:03:00.0: name max_local_SFs size 128 unit entry dpipe_tables none name max_external_SFs size 128 unit entry dpipe_tables none pci/0000:03:00.1: name max_local_SFs size 128 unit entry dpipe_tables none name max_external_SFs size 128 unit entry dpipe_tables none Example - dump only port-level resources: $ devlink resource show scope port pci/0000:03:00.0/196608: name max_SFs size 128 unit entry dpipe_tables none pci/0000:03:00.0/196609: name max_SFs size 128 unit entry dpipe_tables none pci/0000:03:00.1/196708: name max_SFs size 128 unit entry dpipe_tables none pci/0000:03:00.1/196709: name max_SFs size 128 unit entry dpipe_tables none Signed-off-by: Or Har-Toov Reviewed-by: Moshe Shemesh Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260407194107.148063-11-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/devlink.yaml | 24 +++++++++++++++++++++++- include/uapi/linux/devlink.h | 11 +++++++++++ net/devlink/netlink_gen.c | 5 +++-- net/devlink/resource.c | 19 ++++++++++++++++++- 4 files changed, 55 insertions(+), 4 deletions(-) diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index 34aa81ba689e..247b147d689f 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -157,6 +157,14 @@ definitions: entries: - name: entry + - + type: enum + name: resource-scope + entries: + - + name: dev + - + name: port - type: enum name: reload-action @@ -873,6 +881,16 @@ attribute-sets: doc: Unique devlink instance index. checks: max: u32-max + - + name: resource-scope-mask + type: u32 + enum: resource-scope + enum-as-flags: true + doc: | + Bitmask selecting which resource classes to include in a + resource-dump response. Bit 0 (dev) selects device-level + resources; bit 1 (port) selects port-level resources. + When absent all classes are returned. - name: dl-dev-stats subset-of: devlink @@ -1775,7 +1793,11 @@ operations: - resource-list dump: request: - attributes: *dev-id-attrs + attributes: + - bus-name + - dev-name + - index + - resource-scope-mask reply: *resource-dump-reply - diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 7de2d8cc862f..0b165eac7619 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -645,6 +645,7 @@ enum devlink_attr { DEVLINK_ATTR_PARAM_RESET_DEFAULT, /* flag */ DEVLINK_ATTR_INDEX, /* uint */ + DEVLINK_ATTR_RESOURCE_SCOPE_MASK, /* u32 */ /* Add new attributes above here, update the spec in * Documentation/netlink/specs/devlink.yaml and re-generate @@ -704,6 +705,16 @@ enum devlink_resource_unit { DEVLINK_RESOURCE_UNIT_ENTRY, }; +enum devlink_resource_scope { + DEVLINK_RESOURCE_SCOPE_DEV_BIT, + DEVLINK_RESOURCE_SCOPE_PORT_BIT, +}; + +#define DEVLINK_RESOURCE_SCOPE_DEV \ + _BITUL(DEVLINK_RESOURCE_SCOPE_DEV_BIT) +#define DEVLINK_RESOURCE_SCOPE_PORT \ + _BITUL(DEVLINK_RESOURCE_SCOPE_PORT_BIT) + enum devlink_port_fn_attr_cap { DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT, DEVLINK_PORT_FN_ATTR_CAP_MIGRATABLE_BIT, diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index 9cc372d9ee41..81899786fd98 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -313,10 +313,11 @@ static const struct nla_policy devlink_resource_dump_do_nl_policy[DEVLINK_ATTR_I }; /* DEVLINK_CMD_RESOURCE_DUMP - dump */ -static const struct nla_policy devlink_resource_dump_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { +static const struct nla_policy devlink_resource_dump_dump_nl_policy[DEVLINK_ATTR_RESOURCE_SCOPE_MASK + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), + [DEVLINK_ATTR_RESOURCE_SCOPE_MASK] = NLA_POLICY_MASK(NLA_U32, 0x3), }; /* DEVLINK_CMD_RELOAD - do */ @@ -974,7 +975,7 @@ const struct genl_split_ops devlink_nl_ops[75] = { .cmd = DEVLINK_CMD_RESOURCE_DUMP, .dumpit = devlink_nl_resource_dump_dumpit, .policy = devlink_resource_dump_dump_nl_policy, - .maxattr = DEVLINK_ATTR_INDEX, + .maxattr = DEVLINK_ATTR_RESOURCE_SCOPE_MASK, .flags = GENL_CMD_CAP_DUMP, }, { diff --git a/net/devlink/resource.c b/net/devlink/resource.c index bf5221fb3e64..3d2f42bc2fb5 100644 --- a/net/devlink/resource.c +++ b/net/devlink/resource.c @@ -398,11 +398,25 @@ devlink_nl_resource_dump_one(struct sk_buff *skb, struct devlink *devlink, struct netlink_callback *cb, int flags) { struct devlink_nl_dump_state *state = devlink_dump_state(cb); + const struct genl_info *info = genl_info_dump(cb); struct devlink_port *devlink_port; + struct nlattr *scope_attr = NULL; unsigned long port_idx; + u32 scope = 0; int err; - if (!state->port_ctx.index_valid) { + if (info->attrs && info->attrs[DEVLINK_ATTR_RESOURCE_SCOPE_MASK]) { + scope_attr = info->attrs[DEVLINK_ATTR_RESOURCE_SCOPE_MASK]; + scope = nla_get_u32(scope_attr); + if (!scope) { + NL_SET_ERR_MSG_ATTR(info->extack, scope_attr, + "empty resource scope selection"); + return -EINVAL; + } + } + + if (!state->port_ctx.index_valid && + (!scope || (scope & DEVLINK_RESOURCE_SCOPE_DEV))) { err = devlink_resource_dump_fill_one(skb, devlink, NULL, cb, flags, &state->idx); if (err) @@ -410,6 +424,8 @@ devlink_nl_resource_dump_one(struct sk_buff *skb, struct devlink *devlink, state->idx = 0; } + if (scope && !(scope & DEVLINK_RESOURCE_SCOPE_PORT)) + goto out; /* Check in case port was removed between dump callbacks. */ if (state->port_ctx.index_valid && !xa_load(&devlink->ports, state->port_ctx.index)) @@ -425,6 +441,7 @@ devlink_nl_resource_dump_one(struct sk_buff *skb, struct devlink *devlink, } state->idx = 0; } +out: state->port_ctx.index_valid = false; state->port_ctx.index = 0; return 0; From 2a8e91235254862aa1d99434c6105aec65cd8e42 Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Tue, 7 Apr 2026 22:41:06 +0300 Subject: [PATCH 1279/1561] selftest: netdevsim: Add resource dump and scope filter test Add resource_dump_test() which verifies dumping resources for all devices and ports, and tests that scope=dev returns only device-level resources and scope=port returns only port resources. Skip if userspace does not support the scope parameter. Signed-off-by: Or Har-Toov Reviewed-by: Moshe Shemesh Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260407194107.148063-12-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../drivers/net/netdevsim/devlink.sh | 52 ++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index 31d1cef54898..22a626c6cde3 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -5,7 +5,7 @@ lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS="fw_flash_test params_test \ params_default_test regions_test reload_test \ - netns_reload_test resource_test \ + netns_reload_test resource_test resource_dump_test \ port_resource_doit_test dev_info_test \ empty_reporter_test dummy_reporter_test rate_test" NUM_NETIFS=0 @@ -483,6 +483,56 @@ resource_test() log_test "resource test" } +resource_dump_test() +{ + RET=0 + + local port_jq + local dev_jq + local dl_jq + local count + + dl_jq="with_entries(select(.key | startswith(\"$DL_HANDLE\")))" + port_jq="[.[] | $dl_jq | keys |" + port_jq+=" map(select(test(\"/.+/\"))) | length] | add" + dev_jq="[.[] | $dl_jq | keys |" + dev_jq+=" map(select(test(\"/.+/\")|not)) | length] | add" + + if ! devlink resource help 2>&1 | grep -q "scope"; then + echo "SKIP: devlink resource show not supported" + return + fi + + devlink resource show > /dev/null 2>&1 + check_err $? "Failed to dump all resources" + + count=$(cmd_jq "devlink resource show -j" "$port_jq") + [ "$count" -gt "0" ] + check_err $? "missing port resources in resource dump" + + count=$(cmd_jq "devlink resource show -j" "$dev_jq") + [ "$count" -gt "0" ] + check_err $? "missing device resources in resource dump" + + count=$(cmd_jq "devlink resource show scope dev -j" "$dev_jq") + [ "$count" -gt "0" ] + check_err $? "dev scope missing device resources" + + count=$(cmd_jq "devlink resource show scope dev -j" "$port_jq") + [ "$count" -eq "0" ] + check_err $? "dev scope returned port resources" + + count=$(cmd_jq "devlink resource show scope port -j" "$port_jq") + [ "$count" -gt "0" ] + check_err $? "port scope missing port resources" + + count=$(cmd_jq "devlink resource show scope port -j" "$dev_jq") + [ "$count" -eq "0" ] + check_err $? "port scope returned device resources" + + log_test "resource dump test" +} + info_get() { local name=$1 From 78c327c1728de377020672d429250c80a8a13723 Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Tue, 7 Apr 2026 22:41:07 +0300 Subject: [PATCH 1280/1561] devlink: Document resource scope filtering Document the scope parameter for devlink resource show, which allows filtering the dump to device-level or port-level resources only. Signed-off-by: Or Har-Toov Reviewed-by: Moshe Shemesh Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260407194107.148063-13-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../networking/devlink/devlink-resource.rst | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/Documentation/networking/devlink/devlink-resource.rst b/Documentation/networking/devlink/devlink-resource.rst index 9839c1661315..47eec8f875b4 100644 --- a/Documentation/networking/devlink/devlink-resource.rst +++ b/Documentation/networking/devlink/devlink-resource.rst @@ -109,3 +109,38 @@ To show resources for a specific port: $ devlink resource show pci/0000:03:00.0/196608 pci/0000:03:00.0/196608: name max_SFs size 128 unit entry dpipe_tables none + +Resource Scope Filtering +======================== + +When dumping resources for all devices, ``devlink resource show`` accepts +an optional ``scope`` parameter to restrict the response to device-level +resources, port-level resources, or both (the default). + +To dump only device-level resources across all devices: + +.. code:: shell + + $ devlink resource show scope dev + pci/0000:03:00.0: + name max_local_SFs size 128 unit entry dpipe_tables none + name max_external_SFs size 128 unit entry dpipe_tables none + pci/0000:03:00.1: + name max_local_SFs size 128 unit entry dpipe_tables none + name max_external_SFs size 128 unit entry dpipe_tables none + +To dump only port-level resources across all devices: + +.. code:: shell + + $ devlink resource show scope port + pci/0000:03:00.0/196608: + name max_SFs size 128 unit entry dpipe_tables none + pci/0000:03:00.0/196609: + name max_SFs size 128 unit entry dpipe_tables none + pci/0000:03:00.1/196708: + name max_SFs size 128 unit entry dpipe_tables none + pci/0000:03:00.1/196709: + name max_SFs size 128 unit entry dpipe_tables none + +Note that port-level resources are read-only. From fa489a77e3267e05df95db96ba98e141ec07cbd9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 8 Apr 2026 20:03:33 -0700 Subject: [PATCH 1281/1561] wifi: cfg80211: Explicitly include in michael-mic.c This happened to be included transitively via a long chain starting with , but it's less fragile to include it explicitly. Signed-off-by: Eric Biggers Link: https://patch.msgid.link/20260409030333.13024-1-ebiggers@kernel.org Signed-off-by: Johannes Berg --- net/wireless/michael-mic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/michael-mic.c b/net/wireless/michael-mic.c index 50cdb67f0503..ec5164756e0a 100644 --- a/net/wireless/michael-mic.c +++ b/net/wireless/michael-mic.c @@ -5,6 +5,7 @@ */ #include #include +#include #include #include From f9e3bd43d55f24331e5ea65f667dbb33716e7d6b Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Fri, 3 Apr 2026 12:00:27 +0300 Subject: [PATCH 1282/1561] net/mlx5: Rename MLX5_PF page counter type to MLX5_SELF The MLX5_PF enum value in mlx5_func_type is used to track firmware page allocations for the page manager function itself, which is either the ECPF on SmartNIC systems or the host PF when there is no ECPF. Rename it to MLX5_SELF to accurately reflect that this counter tracks pages allocated by the manager for its own use, regardless of whether it is a PF or ECPF. Signed-off-by: Moshe Shemesh Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260403090028.137783-2-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 3 ++- include/linux/mlx5/driver.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 5ccb3ce98acb..77ffa31cc505 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -77,7 +77,8 @@ static u32 get_function(u16 func_id, bool ec_function) static u16 func_id_to_type(struct mlx5_core_dev *dev, u16 func_id, bool ec_function) { if (!func_id) - return mlx5_core_is_ecpf(dev) && !ec_function ? MLX5_HOST_PF : MLX5_PF; + return mlx5_core_is_ecpf(dev) && !ec_function ? + MLX5_HOST_PF : MLX5_SELF; if (func_id <= max(mlx5_core_max_vfs(dev), mlx5_core_max_ec_vfs(dev))) { if (ec_function) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index b8b5af78284d..10bc913591d5 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -550,7 +550,7 @@ struct mlx5_debugfs_entries { }; enum mlx5_func_type { - MLX5_PF, + MLX5_SELF, MLX5_VF, MLX5_SF, MLX5_HOST_PF, From a1bac8b70ede332a05487081c7512d2947f3a912 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Fri, 3 Apr 2026 12:00:28 +0300 Subject: [PATCH 1283/1561] net/mlx5: Add icm_mng_function_id_mode cap bit Introduce the capability bit icm_mng_function_id_mode to indicate that the device firmware uses vhca_id instead of function_id as the effective identifier for the firmware commands MANAGE_PAGES, QUERY_PAGES, and page request event. Signed-off-by: Moshe Shemesh Reviewed-by: Akiva Goldberger Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260403090028.137783-3-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 2400b4c38c77..007f5138db2b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1654,6 +1654,11 @@ enum { MLX5_STEERING_FORMAT_CONNECTX_8 = 3, }; +enum { + MLX5_ID_MODE_FUNCTION_INDEX = 0, + MLX5_ID_MODE_FUNCTION_VHCA_ID = 1, +}; + struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_0[0x6]; u8 page_request_disable[0x1]; @@ -1916,7 +1921,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_280[0x10]; u8 max_wqe_sz_sq[0x10]; - u8 reserved_at_2a0[0x7]; + u8 icm_mng_function_id_mode[0x1]; + u8 reserved_at_2a1[0x6]; u8 mkey_pcie_tph[0x1]; u8 reserved_at_2a8[0x1]; u8 tis_tir_td_order[0x1]; From ebe5fd2ed20af5ae176dd41dd4a85a3cdc738b8a Mon Sep 17 00:00:00 2001 From: Birger Koblitz Date: Sat, 4 Apr 2026 09:57:42 +0200 Subject: [PATCH 1284/1561] r8152: Add support for 5Gbit Link Speeds and EEE The RTL8157 supports 5GBit Link speeds. Add support for this speed in the setup and setting/getting through ethtool. Also add 5GBit EEE. Add functionality for setup and ethtool get/set methods. Signed-off-by: Birger Koblitz Link: https://patch.msgid.link/20260404-rtl8157_next-v7-1-039121318f23@birger-koblitz.de Signed-off-by: Paolo Abeni --- drivers/net/usb/r8152.c | 100 ++++++++++++++++++++++++++++++++-------- 1 file changed, 82 insertions(+), 18 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 1765da5bd6cf..c81bb788ac34 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -606,6 +606,7 @@ enum spd_duplex { FORCE_100M_FULL, FORCE_1000M_FULL, NWAY_2500M_FULL, + NWAY_5000M_FULL, }; /* OCP_ALDPS_CONFIG */ @@ -727,6 +728,7 @@ enum spd_duplex { #define BP4_SUPER_ONLY 0x1578 /* RTL_VER_04 only */ enum rtl_register_content { + _5000bps = BIT(12), _2500bps = BIT(10), _1250bps = BIT(9), _500bps = BIT(8), @@ -740,6 +742,7 @@ enum rtl_register_content { }; #define is_speed_2500(_speed) (((_speed) & (_2500bps | LINK_STATUS)) == (_2500bps | LINK_STATUS)) +#define is_speed_5000(_speed) (((_speed) & (_5000bps | LINK_STATUS)) == (_5000bps | LINK_STATUS)) #define is_flow_control(_speed) (((_speed) & (_tx_flow | _rx_flow)) == (_tx_flow | _rx_flow)) #define RTL8152_MAX_TX 4 @@ -946,6 +949,7 @@ struct r8152 { unsigned int pipe_in, pipe_out, pipe_intr, pipe_ctrl_in, pipe_ctrl_out; u32 support_2500full:1; + u32 support_5000full:1; u32 lenovo_macpassthru:1; u32 dell_tb_rx_agg_bug:1; u16 ocp_base; @@ -1196,6 +1200,7 @@ enum tx_csum_stat { #define RTL_ADVERTISED_1000_HALF BIT(4) #define RTL_ADVERTISED_1000_FULL BIT(5) #define RTL_ADVERTISED_2500_FULL BIT(6) +#define RTL_ADVERTISED_5000_FULL BIT(7) /* Maximum number of multicast addresses to filter (vs. Rx-all-multicast). * The RTL chips use a 64 element hash table based on the Ethernet CRC. @@ -5421,12 +5426,23 @@ static void r8153_eee_en(struct r8152 *tp, bool enable) static void r8156_eee_en(struct r8152 *tp, bool enable) { + u16 config; + r8153_eee_en(tp, enable); + config = ocp_reg_read(tp, OCP_EEE_ADV2); + if (enable && (tp->eee_adv2 & MDIO_EEE_2_5GT)) - ocp_reg_set_bits(tp, OCP_EEE_ADV2, MDIO_EEE_2_5GT); + config |= MDIO_EEE_2_5GT; else - ocp_reg_clr_bits(tp, OCP_EEE_ADV2, MDIO_EEE_2_5GT); + config &= ~MDIO_EEE_2_5GT; + + if (enable && (tp->eee_adv2 & MDIO_EEE_5GT)) + config |= MDIO_EEE_5GT; + else + config &= ~MDIO_EEE_5GT; + + ocp_reg_write(tp, OCP_EEE_ADV2, config); } static void rtl_eee_enable(struct r8152 *tp, bool enable) @@ -6190,9 +6206,13 @@ static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u32 speed, u8 duplex, if (tp->support_2500full) support |= RTL_ADVERTISED_2500_FULL; + + if (tp->support_5000full) + support |= RTL_ADVERTISED_5000_FULL; } - if (!(advertising & support)) + advertising &= support; + if (!advertising) return -EINVAL; orig = r8152_mdio_read(tp, MII_ADVERTISE); @@ -6235,15 +6255,20 @@ static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u32 speed, u8 duplex, r8152_mdio_write(tp, MII_CTRL1000, new1); } - if (tp->support_2500full) { + if (tp->support_2500full || tp->support_5000full) { orig = ocp_reg_read(tp, OCP_10GBT_CTRL); - new1 = orig & ~MDIO_AN_10GBT_CTRL_ADV2_5G; + new1 = orig & ~(MDIO_AN_10GBT_CTRL_ADV2_5G | MDIO_AN_10GBT_CTRL_ADV5G); if (advertising & RTL_ADVERTISED_2500_FULL) { new1 |= MDIO_AN_10GBT_CTRL_ADV2_5G; tp->ups_info.speed_duplex = NWAY_2500M_FULL; } + if (advertising & RTL_ADVERTISED_5000_FULL) { + new1 |= MDIO_AN_10GBT_CTRL_ADV5G; + tp->ups_info.speed_duplex = NWAY_5000M_FULL; + } + if (orig != new1) ocp_reg_write(tp, OCP_10GBT_CTRL, new1); } @@ -8220,17 +8245,38 @@ int rtl8152_get_link_ksettings(struct net_device *netdev, linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, cmd->link_modes.supported, tp->support_2500full); - if (tp->support_2500full) { - linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, - cmd->link_modes.advertising, - ocp_reg_read(tp, OCP_10GBT_CTRL) & MDIO_AN_10GBT_CTRL_ADV2_5G); + linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + cmd->link_modes.supported, tp->support_5000full); - linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, - cmd->link_modes.lp_advertising, - ocp_reg_read(tp, OCP_10GBT_STAT) & MDIO_AN_10GBT_STAT_LP2_5G); + if (tp->support_2500full || tp->support_5000full) { + u16 ocp_10gbt_ctrl = ocp_reg_read(tp, OCP_10GBT_CTRL); + u16 ocp_10gbt_stat = ocp_reg_read(tp, OCP_10GBT_STAT); - if (is_speed_2500(rtl8152_get_speed(tp))) - cmd->base.speed = SPEED_2500; + if (tp->support_2500full) { + linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + cmd->link_modes.advertising, + ocp_10gbt_ctrl & MDIO_AN_10GBT_CTRL_ADV2_5G); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + cmd->link_modes.lp_advertising, + ocp_10gbt_stat & MDIO_AN_10GBT_STAT_LP2_5G); + + if (is_speed_2500(rtl8152_get_speed(tp))) + cmd->base.speed = SPEED_2500; + } + + if (tp->support_5000full) { + linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + cmd->link_modes.advertising, + ocp_10gbt_ctrl & MDIO_AN_10GBT_CTRL_ADV5G); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + cmd->link_modes.lp_advertising, + ocp_10gbt_stat & MDIO_AN_10GBT_STAT_LP5G); + + if (is_speed_5000(rtl8152_get_speed(tp))) + cmd->base.speed = SPEED_5000; + } } mutex_unlock(&tp->control); @@ -8280,6 +8326,10 @@ static int rtl8152_set_link_ksettings(struct net_device *dev, cmd->link_modes.advertising)) advertising |= RTL_ADVERTISED_2500_FULL; + if (test_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + cmd->link_modes.advertising)) + advertising |= RTL_ADVERTISED_5000_FULL; + mutex_lock(&tp->control); ret = rtl8152_set_speed(tp, cmd->base.autoneg, cmd->base.speed, @@ -8397,7 +8447,7 @@ static int r8152_set_eee(struct r8152 *tp, struct ethtool_keee *eee) tp->eee_en = eee->eee_enabled; tp->eee_adv = val; - if (tp->support_2500full) { + if (tp->support_2500full || tp->support_5000full) { val = linkmode_to_mii_eee_cap2_t(eee->advertised); tp->eee_adv2 = val; } @@ -8421,19 +8471,28 @@ static int r8153_get_eee(struct r8152 *tp, struct ethtool_keee *eee) val = ocp_reg_read(tp, OCP_EEE_LPABLE); mii_eee_cap1_mod_linkmode_t(eee->lp_advertised, val); - if (tp->support_2500full) { - linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, eee->supported); - + if (tp->support_2500full || tp->support_5000full) { val = ocp_reg_read(tp, OCP_EEE_ADV2); mii_eee_cap2_mod_linkmode_adv_t(eee->advertised, val); val = ocp_reg_read(tp, OCP_EEE_LPABLE2); mii_eee_cap2_mod_linkmode_adv_t(eee->lp_advertised, val); + } + + if (tp->support_2500full) { + linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, eee->supported); if (speed & _2500bps) linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, common); } + if (tp->support_5000full) { + linkmode_set_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, eee->supported); + + if (speed & _5000bps) + linkmode_set_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, common); + } + eee->eee_enabled = tp->eee_en; if (speed & _1000bps) @@ -9374,6 +9433,11 @@ static int rtl8152_probe_once(struct usb_interface *intf, } else { tp->speed = SPEED_1000; } + if (tp->support_5000full && + tp->udev->speed >= USB_SPEED_SUPER) { + tp->speed = SPEED_5000; + tp->advertising |= RTL_ADVERTISED_5000_FULL; + } tp->advertising |= RTL_ADVERTISED_1000_FULL; } tp->duplex = DUPLEX_FULL; From fd3c7d080df53136c7e7e37f753fb7bd4640ca42 Mon Sep 17 00:00:00 2001 From: Birger Koblitz Date: Sat, 4 Apr 2026 09:57:43 +0200 Subject: [PATCH 1285/1561] r8152: Add support for the RTL8157 hardware The RTL8157 uses a different packet descriptor format compared to the previous generation of chips. Add support for this format by adding a descriptor format structure into the r8152 structure and corresponding desc_ops functions which abstract the vlan-tag, tx/rx len and tx/rx checksum algorithms. Also, add support for the ADV indirect access interface of the RTL8157 and PHY setup. For initialization of the RTL8157, combine the existing RTL8156B and RTL8156 init functions and add RTL8157-specific functinality in order to improve code readability and maintainability. r8156_init() is now called with RTL_VER_10 and RTL_VER_11 for the RTL8156, with RTL_VER_12, RTL_VER_13 and RTL_VER_15 for the RTL8156B and with RTL_VER_16 for the RTL8157 and checks the version for chip-specific code. Also add USB power control functions for the RTL8157. Add support for the USB device ID of Realtek RTL8157-based adapters. Detect the RTL8157 as RTL_VER_16 and set it up. Signed-off-by: Birger Koblitz Link: https://patch.msgid.link/20260404-rtl8157_next-v7-2-039121318f23@birger-koblitz.de Signed-off-by: Paolo Abeni --- drivers/net/usb/r8152.c | 913 ++++++++++++++++++++++++++++++++-------- 1 file changed, 729 insertions(+), 184 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index c81bb788ac34..7337bf1b7d6a 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -123,6 +123,7 @@ #define USB_CSR_DUMMY1 0xb464 #define USB_CSR_DUMMY2 0xb466 #define USB_DEV_STAT 0xb808 +#define USB_U2P3_V2_CTRL 0xc2c0 #define USB_CONNECT_TIMER 0xcbf8 #define USB_MSC_TIMER 0xcbfc #define USB_BURST_SIZE 0xcfc0 @@ -156,6 +157,9 @@ #define USB_U1U2_TIMER 0xd4da #define USB_FW_TASK 0xd4e8 /* RTL8153B */ #define USB_RX_AGGR_NUM 0xd4ee +#define USB_ADV_ADDR 0xd5d6 +#define USB_ADV_DATA 0xd5d8 +#define USB_ADV_CMD 0xd5dc #define USB_UPS_CTRL 0xd800 #define USB_POWER_CUT 0xd80a #define USB_MISC_0 0xd81a @@ -492,6 +496,12 @@ /* USB_RX_AGGR_NUM */ #define RX_AGGR_NUM_MASK 0x1ff +/* USB_ADV_CMD */ +#define ADV_CMD_BMU 0 +#define ADV_CMD_BUSY BIT(0) +#define ADV_CMD_WR BIT(1) +#define ADV_CMD_IP BIT(2) + /* USB_UPS_CTRL */ #define POWER_CUT 0x0100 @@ -531,11 +541,15 @@ #define CDC_ECM_EN BIT(3) #define RX_AGG_DISABLE 0x0010 #define RX_ZERO_EN 0x0080 +#define RX_DESC_16B 0x0400 /* USB_U2P3_CTRL */ #define U2P3_ENABLE 0x0001 #define RX_DETECT8 BIT(3) +/* USB_U2P3_V2_CTRL */ +#define U2P3_V2_ENABLE BIT(29) + /* USB_POWER_CUT */ #define PWR_EN 0x0001 #define PHASE2_EN 0x0008 @@ -748,8 +762,6 @@ enum rtl_register_content { #define RTL8152_MAX_TX 4 #define RTL8152_MAX_RX 10 #define INTBUFSIZE 2 -#define TX_ALIGN 4 -#define RX_ALIGN 8 #define RTL8152_RX_MAX_PENDING 4096 #define RTL8152_RXFG_HEADSZ 256 @@ -761,7 +773,6 @@ enum rtl_register_content { #define RTL8152_TX_TIMEOUT (5 * HZ) #define mtu_to_size(m) ((m) + VLAN_ETH_HLEN + ETH_FCS_LEN) #define size_to_mtu(s) ((s) - VLAN_ETH_HLEN - ETH_FCS_LEN) -#define rx_reserved_size(x) (mtu_to_size(x) + sizeof(struct rx_desc) + RX_ALIGN) /* rtl8152 flags */ enum rtl8152_flags { @@ -846,6 +857,40 @@ struct tx_desc { #define TX_VLAN_TAG BIT(16) }; +struct rx_desc_v2 { + __le32 opts1; +#define RX_LEN_MASK_2 0xfffe0000 +#define rx_v2_get_len(x) (((x) & RX_LEN_MASK_2) >> 17) +#define RX_VLAN_TAG_2 BIT(3) +#define RX_VER_MASK 0x3 + + __le32 opts2; + + __le32 opts3; +#define IPF_2 BIT(26) /* IP checksum fail */ +#define UDPF_2 BIT(25) /* UDP checksum fail */ +#define TCPF_2 BIT(24) /* TCP checksum fail */ +#define RD_IPV6_CS_2 BIT(15) +#define RD_IPV4_CS_2 BIT(14) +#define RD_UDP_CS_2 BIT(11) +#define RD_TCP_CS_2 BIT(10) + + __le32 opts4; +}; + +struct tx_desc_v2 { + __le32 opts1; + + __le32 opts2; +#define TCPHO_MAX_2 0x3ffU + + __le32 opts3; +#define tx_v2_set_len(x) ((x) << 4) + + __le32 opts4; +#define TX_SIG (0x15 << 27) +}; + struct r8152; struct rx_agg { @@ -919,6 +964,19 @@ struct r8152 { u32 ctap_short_off:1; } ups_info; + struct desc_info { + void (*vlan_tag)(void *desc, struct sk_buff *skb); + u8 align; + u8 size; + } rx_desc, tx_desc; + + struct desc_ops { + void (*tx_len)(struct r8152 *tp, void *desc, u32 len); + u32 (*rx_len)(struct r8152 *tp, void *desc); + u8 (*rx_csum)(struct r8152 *tp, void *desc); + int (*tx_csum)(struct r8152 *tp, void *desc, struct sk_buff *skb, u32 len); + } desc_ops; + #define RTL_VER_SIZE 32 struct rtl_fw { @@ -1183,6 +1241,7 @@ enum rtl_version { RTL_VER_13, RTL_VER_14, RTL_VER_15, + RTL_VER_16, RTL_VER_MAX }; @@ -1208,7 +1267,7 @@ enum tx_csum_stat { static const int multicast_filter_limit = 32; static unsigned int agg_buf_sz = 16384; -#define RTL_LIMITED_TSO_SIZE (size_to_mtu(agg_buf_sz) - sizeof(struct tx_desc)) +#define RTL_LIMITED_TSO_SIZE (size_to_mtu(agg_buf_sz) - tp->tx_desc.size) /* If register access fails then we block access and issue a reset. If this * happens too many times in a row without a successful access then we stop @@ -1619,6 +1678,122 @@ static inline int r8152_mdio_read(struct r8152 *tp, u32 reg_addr) return ocp_reg_read(tp, OCP_BASE_MII + reg_addr * 2); } +static int wait_cmd_ready(struct r8152 *tp, u16 cmd) +{ + return poll_timeout_us(u16 ocp_data = ocp_read_word(tp, MCU_TYPE_USB, cmd), + !(ocp_data & ADV_CMD_BUSY), 2000, 20000, false); +} + +static int ocp_adv_read(struct r8152 *tp, u16 cmd, u16 addr, u32 *data) +{ + int ret; + + ret = wait_cmd_ready(tp, USB_ADV_CMD); + if (ret < 0) + goto out; + + ocp_write_word(tp, MCU_TYPE_USB, USB_ADV_ADDR, addr); + + cmd |= ADV_CMD_BUSY; + ocp_write_word(tp, MCU_TYPE_USB, USB_ADV_CMD, cmd); + + ret = wait_cmd_ready(tp, USB_ADV_CMD); + if (ret < 0) + goto out; + + *data = ocp_read_dword(tp, MCU_TYPE_USB, USB_ADV_DATA); + +out: + return ret; +} + +static int ocp_adv_write(struct r8152 *tp, u16 cmd, u16 addr, u32 data) +{ + int ret; + + ret = wait_cmd_ready(tp, USB_ADV_CMD); + if (ret < 0) + goto out; + + cmd |= ADV_CMD_WR; + ocp_write_dword(tp, MCU_TYPE_USB, USB_ADV_DATA, data); + + ocp_write_word(tp, MCU_TYPE_USB, USB_ADV_ADDR, addr); + + cmd |= ADV_CMD_BUSY; + ocp_write_word(tp, MCU_TYPE_USB, USB_ADV_CMD, cmd); + +out: + return ret; +} + +static int rtl_bmu_read(struct r8152 *tp, u16 addr, u32 *data) +{ + return ocp_adv_read(tp, ADV_CMD_BMU, addr, data); +} + +static int rtl_bmu_write(struct r8152 *tp, u16 addr, u32 data) +{ + return ocp_adv_write(tp, ADV_CMD_BMU, addr, data); +} + +static int rtl_bmu_w0w1(struct r8152 *tp, u16 addr, u32 clear, u32 set) +{ + u32 bmu; + int ret; + + ret = rtl_bmu_read(tp, addr, &bmu); + if (ret < 0) + goto out; + + bmu = (bmu & ~clear) | set; + ret = rtl_bmu_write(tp, addr, bmu); + +out: + return ret; +} + +static int rtl_bmu_clr_bits(struct r8152 *tp, u16 addr, u32 clear) +{ + return rtl_bmu_w0w1(tp, addr, clear, 0); +} + +static int rtl_ip_read(struct r8152 *tp, u16 addr, u32 *data) +{ + return ocp_adv_read(tp, ADV_CMD_IP, addr, data); +} + +static int rtl_ip_write(struct r8152 *tp, u16 addr, u32 data) +{ + return ocp_adv_write(tp, ADV_CMD_IP, addr, data); +} + +static int rtl_ip_w0w1(struct r8152 *tp, u16 addr, u32 clear, u32 set) +{ + int ret; + u32 ip; + + ret = rtl_ip_read(tp, addr, &ip); + if (ret < 0) + goto out; + + ip = (ip & ~clear) | set; + ret = rtl_ip_write(tp, addr, ip); + +out: + return ret; +} + +static int rtl_ip_clr_bits(struct r8152 *tp, u16 addr, u32 clear) +{ + return rtl_ip_w0w1(tp, addr, clear, 0); +} + +static int rtl_ip_set_bits(struct r8152 *tp, u16 addr, u32 set) +{ + return rtl_ip_w0w1(tp, addr, 0, set); +} + static void sram_write(struct r8152 *tp, u16 addr, u16 data) { ocp_reg_write(tp, OCP_SRAM_ADDR, addr); @@ -2182,14 +2357,14 @@ resubmit: } } -static inline void *rx_agg_align(void *data) +static void *rx_agg_align(struct r8152 *tp, void *data) { - return (void *)ALIGN((uintptr_t)data, RX_ALIGN); + return (void *)ALIGN((uintptr_t)data, tp->rx_desc.align); } -static inline void *tx_agg_align(void *data) +static void *tx_agg_align(struct r8152 *tp, void *data) { - return (void *)ALIGN((uintptr_t)data, TX_ALIGN); + return (void *)ALIGN((uintptr_t)data, tp->tx_desc.align); } static void free_rx_agg(struct r8152 *tp, struct rx_agg *agg) @@ -2307,9 +2482,9 @@ static int alloc_all_mem(struct r8152 *tp) if (!buf) goto err1; - if (buf != tx_agg_align(buf)) { + if (buf != tx_agg_align(tp, buf)) { kfree(buf); - buf = kmalloc_node(agg_buf_sz + TX_ALIGN, GFP_KERNEL, + buf = kmalloc_node(agg_buf_sz + tp->tx_desc.align, GFP_KERNEL, node); if (!buf) goto err1; @@ -2325,7 +2500,7 @@ static int alloc_all_mem(struct r8152 *tp) tp->tx_info[i].context = tp; tp->tx_info[i].urb = urb; tp->tx_info[i].buffer = buf; - tp->tx_info[i].head = tx_agg_align(buf); + tp->tx_info[i].head = tx_agg_align(tp, buf); list_add_tail(&tp->tx_info[i].list, &tp->tx_free); } @@ -2412,8 +2587,17 @@ drop: } } -static inline void rtl_tx_vlan_tag(struct tx_desc *desc, struct sk_buff *skb) +static void r8152_tx_len(struct r8152 *tp, void *tx_desc, u32 len) { + struct tx_desc *desc = tx_desc; + + desc->opts1 |= cpu_to_le32(len); +} + +static void r8152_tx_vlan_tag(void *d, struct sk_buff *skb) +{ + struct tx_desc *desc = d; + if (skb_vlan_tag_present(skb)) { u32 opts2; @@ -2422,8 +2606,10 @@ static inline void rtl_tx_vlan_tag(struct tx_desc *desc, struct sk_buff *skb) } } -static inline void rtl_rx_vlan_tag(struct rx_desc *desc, struct sk_buff *skb) +static void r8152_rx_vlan_tag(void *d, struct sk_buff *skb) { + struct rx_desc *desc = d; + u32 opts2 = le32_to_cpu(desc->opts2); if (opts2 & RX_VLAN_TAG) @@ -2431,10 +2617,11 @@ static inline void rtl_rx_vlan_tag(struct rx_desc *desc, struct sk_buff *skb) swab16(opts2 & 0xffff)); } -static int r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc, +static int r8152_tx_csum(struct r8152 *tp, void *d, struct sk_buff *skb, u32 len) { u32 mss = skb_shinfo(skb)->gso_size; + struct tx_desc *desc = d; u32 opts1, opts2 = 0; int ret = TX_CSUM_SUCCESS; @@ -2519,6 +2706,74 @@ unavailable: return ret; } +static u32 r8152_rx_len(struct r8152 *tp, void *d) +{ + struct rx_desc *desc = d; + + return le32_to_cpu(desc->opts1) & RX_LEN_MASK; +} + +static u32 r8157_rx_len(struct r8152 *tp, void *d) +{ + struct rx_desc_v2 *desc = d; + + return rx_v2_get_len(le32_to_cpu(desc->opts1)); +} + +static void r8157_rx_vlan_tag(void *desc, struct sk_buff *skb) +{ + struct rx_desc_v2 *d = desc; + u32 opts1; + + opts1 = le32_to_cpu(d->opts1); + if (opts1 & RX_VLAN_TAG_2) { + u32 opts2 = le32_to_cpu(d->opts2); + + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + swab16((opts2 >> 16) & 0xffff)); + } +} + +static int r8157_tx_csum(struct r8152 *tp, void *tx_desc, struct sk_buff *skb, u32 len) +{ + u32 mss = skb_shinfo(skb)->gso_size; + + if (!mss && skb->ip_summed == CHECKSUM_PARTIAL) { + u32 transport_offset = (u32)skb_transport_offset(skb); + + if (transport_offset > TCPHO_MAX_2) { + netif_warn(tp, tx_err, tp->netdev, + "Invalid transport offset 0x%x\n", + transport_offset); + return TX_CSUM_NONE; + } + } + + return r8152_tx_csum(tp, tx_desc, skb, len); +} + +static void r8157_tx_len(struct r8152 *tp, void *tx_desc, u32 len) +{ + struct tx_desc_v2 *desc = tx_desc; + + desc->opts3 = cpu_to_le32(tx_v2_set_len(len)); + desc->opts4 = cpu_to_le32(TX_SIG); +} + +static int rtl_tx_csum(struct r8152 *tp, void *desc, struct sk_buff *skb, + u32 len) +{ + int ret = TX_CSUM_SUCCESS; + + WARN_ON_ONCE(len > TX_LEN_MAX); + + ret = tp->desc_ops.tx_csum(tp, desc, skb, len); + if (!ret) + tp->desc_ops.tx_len(tp, desc, len); + + return ret; +} + static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg) { struct sk_buff_head skb_head, *tx_queue = &tp->tx_queue; @@ -2535,33 +2790,33 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg) agg->skb_len = 0; remain = agg_buf_sz; - while (remain >= ETH_ZLEN + sizeof(struct tx_desc)) { - struct tx_desc *tx_desc; + while (remain >= ETH_ZLEN + tp->tx_desc.size) { struct sk_buff *skb; unsigned int len; + void *tx_desc; skb = __skb_dequeue(&skb_head); if (!skb) break; - len = skb->len + sizeof(*tx_desc); + len = skb->len + tp->tx_desc.size; if (len > remain) { __skb_queue_head(&skb_head, skb); break; } - tx_data = tx_agg_align(tx_data); - tx_desc = (struct tx_desc *)tx_data; + tx_data = tx_agg_align(tp, tx_data); + tx_desc = (void *)tx_data; - if (r8152_tx_csum(tp, tx_desc, skb, skb->len)) { + if (rtl_tx_csum(tp, tx_desc, skb, skb->len)) { r8152_csum_workaround(tp, skb, &skb_head); continue; } - rtl_tx_vlan_tag(tx_desc, skb); + tp->tx_desc.vlan_tag(tx_desc, skb); - tx_data += sizeof(*tx_desc); + tx_data += tp->tx_desc.size; len = skb->len; if (skb_copy_bits(skb, 0, tx_data, len) < 0) { @@ -2569,7 +2824,7 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg) stats->tx_dropped++; dev_kfree_skb_any(skb); - tx_data -= sizeof(*tx_desc); + tx_data -= tp->tx_desc.size; continue; } @@ -2579,7 +2834,7 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg) dev_kfree_skb_any(skb); - remain = agg_buf_sz - (int)(tx_agg_align(tx_data) - agg->head); + remain = agg_buf_sz - (int)(tx_agg_align(tp, tx_data) - agg->head); if (tp->dell_tb_rx_agg_bug) break; @@ -2617,8 +2872,9 @@ out_tx_fill: return ret; } -static u8 r8152_rx_csum(struct r8152 *tp, struct rx_desc *rx_desc) +static u8 r8152_rx_csum(struct r8152 *tp, void *d) { + struct rx_desc *rx_desc = d; u8 checksum = CHECKSUM_NONE; u32 opts2, opts3; @@ -2646,6 +2902,30 @@ return_result: return checksum; } +static u8 r8157_rx_csum(struct r8152 *tp, void *desc) +{ + struct rx_desc_v2 *d = desc; + u8 checksum = CHECKSUM_NONE; + u32 opts3; + + if (!(tp->netdev->features & NETIF_F_RXCSUM)) + goto return_result; + + opts3 = le32_to_cpu(d->opts3); + + if ((opts3 & (RD_IPV4_CS_2 | IPF_2)) == (RD_IPV4_CS_2 | IPF_2)) { + checksum = CHECKSUM_NONE; + } else if (opts3 & (RD_IPV4_CS_2 | RD_IPV6_CS_2)) { + if ((opts3 & (RD_UDP_CS_2 | UDPF_2)) == RD_UDP_CS_2) + checksum = CHECKSUM_UNNECESSARY; + else if ((opts3 & (RD_TCP_CS_2 | TCPF_2)) == RD_TCP_CS_2) + checksum = CHECKSUM_UNNECESSARY; + } + +return_result: + return checksum; +} + static inline bool rx_count_exceed(struct r8152 *tp) { return atomic_read(&tp->rx_count) > RTL8152_MAX_RX; @@ -2721,10 +3001,10 @@ static int rx_bottom(struct r8152 *tp, int budget) spin_unlock_irqrestore(&tp->rx_lock, flags); list_for_each_safe(cursor, next, &rx_queue) { - struct rx_desc *rx_desc; struct rx_agg *agg, *agg_free; int len_used = 0; struct urb *urb; + void *rx_desc; u8 *rx_data; /* A bulk transfer of USB may contain may packets, so the @@ -2747,7 +3027,7 @@ static int rx_bottom(struct r8152 *tp, int budget) rx_desc = agg->buffer; rx_data = agg->buffer; - len_used += sizeof(struct rx_desc); + len_used += tp->rx_desc.size; while (urb->actual_length > len_used) { struct net_device *netdev = tp->netdev; @@ -2758,7 +3038,7 @@ static int rx_bottom(struct r8152 *tp, int budget) WARN_ON_ONCE(skb_queue_len(&tp->rx_queue) >= 1000); - pkt_len = le32_to_cpu(rx_desc->opts1) & RX_LEN_MASK; + pkt_len = tp->desc_ops.rx_len(tp, rx_desc); if (pkt_len < ETH_ZLEN) break; @@ -2768,7 +3048,7 @@ static int rx_bottom(struct r8152 *tp, int budget) pkt_len -= ETH_FCS_LEN; len = pkt_len; - rx_data += sizeof(struct rx_desc); + rx_data += tp->rx_desc.size; if (!agg_free || tp->rx_copybreak > len) use_frags = false; @@ -2799,8 +3079,8 @@ static int rx_bottom(struct r8152 *tp, int budget) goto find_next_rx; } - skb->ip_summed = r8152_rx_csum(tp, rx_desc); - rtl_rx_vlan_tag(rx_desc, skb); + skb->ip_summed = tp->desc_ops.rx_csum(tp, rx_desc); + tp->rx_desc.vlan_tag(rx_desc, skb); if (use_frags) { if (rx_frag_head_sz) { @@ -2837,10 +3117,10 @@ static int rx_bottom(struct r8152 *tp, int budget) } find_next_rx: - rx_data = rx_agg_align(rx_data + len + ETH_FCS_LEN); - rx_desc = (struct rx_desc *)rx_data; + rx_data = rx_agg_align(tp, rx_data + len + ETH_FCS_LEN); + rx_desc = rx_data; len_used = agg_offset(agg, rx_data); - len_used += sizeof(struct rx_desc); + len_used += tp->rx_desc.size; } WARN_ON(!agg_free && page_count(agg->page) > 1); @@ -3083,13 +3363,19 @@ static netdev_features_t rtl8152_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { + struct r8152 *tp = netdev_priv(dev); u32 mss = skb_shinfo(skb)->gso_size; - int max_offset = mss ? GTTCPHO_MAX : TCPHO_MAX; + int max_offset; + + if (tp->version < RTL_VER_16) + max_offset = mss ? GTTCPHO_MAX : TCPHO_MAX; + else + max_offset = mss ? GTTCPHO_MAX : TCPHO_MAX_2; if ((mss || skb->ip_summed == CHECKSUM_PARTIAL) && skb_transport_offset(skb) > max_offset) features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); - else if ((skb->len + sizeof(struct tx_desc)) > agg_buf_sz) + else if ((skb->len + tp->tx_desc.size) > agg_buf_sz) features &= ~NETIF_F_GSO_MASK; return features; @@ -3127,31 +3413,26 @@ static void r8152b_reset_packet_filter(struct r8152 *tp) static void rtl8152_nic_reset(struct r8152 *tp) { - int i; - switch (tp->version) { case RTL_TEST_01: case RTL_VER_10: case RTL_VER_11: ocp_byte_clr_bits(tp, MCU_TYPE_PLA, PLA_CR, CR_TE); - - ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_BMU_RESET, - BMU_RESET_EP_IN); - + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_BMU_RESET, BMU_RESET_EP_IN); ocp_word_set_bits(tp, MCU_TYPE_USB, USB_USB_CTRL, CDC_ECM_EN); - ocp_byte_clr_bits(tp, MCU_TYPE_PLA, PLA_CR, CR_RE); - - ocp_word_set_bits(tp, MCU_TYPE_USB, USB_BMU_RESET, - BMU_RESET_EP_IN); - + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_BMU_RESET, BMU_RESET_EP_IN); ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_USB_CTRL, CDC_ECM_EN); break; + case RTL_VER_16: + ocp_byte_clr_bits(tp, MCU_TYPE_PLA, PLA_CR, CR_RE | CR_TE); + break; + default: ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CR, CR_RST); - for (i = 0; i < 1000; i++) { + for (int i = 0; i < 1000; i++) { if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) break; if (!(ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CR) & CR_RST)) @@ -3164,7 +3445,7 @@ static void rtl8152_nic_reset(struct r8152 *tp) static void set_tx_qlen(struct r8152 *tp) { - tp->tx_qlen = agg_buf_sz / (mtu_to_size(tp->netdev->mtu) + sizeof(struct tx_desc)); + tp->tx_qlen = agg_buf_sz / (mtu_to_size(tp->netdev->mtu) + tp->tx_desc.size); } static inline u16 rtl8152_get_speed(struct r8152 *tp) @@ -3368,6 +3649,7 @@ static void r8153_set_rx_early_timeout(struct r8152 *tp) case RTL_VER_12: case RTL_VER_13: case RTL_VER_15: + case RTL_VER_16: ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_TIMEOUT, 640 / 8); ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EXTRA_AGGR_TMR, @@ -3379,9 +3661,14 @@ static void r8153_set_rx_early_timeout(struct r8152 *tp) } } +static u32 rx_reserved_size(struct r8152 *tp, u32 mtu) +{ + return mtu_to_size(mtu) + tp->rx_desc.size + tp->rx_desc.align; +} + static void r8153_set_rx_early_size(struct r8152 *tp) { - u32 ocp_data = tp->rx_buf_sz - rx_reserved_size(tp->netdev->mtu); + u32 ocp_data = tp->rx_buf_sz - rx_reserved_size(tp, tp->netdev->mtu); switch (tp->version) { case RTL_VER_03: @@ -3406,6 +3693,10 @@ static void r8153_set_rx_early_size(struct r8152 *tp) ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE, ocp_data / 8); break; + case RTL_VER_16: + ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE, + ocp_data / 16); + break; default: WARN_ON_ONCE(1); break; @@ -3517,6 +3808,7 @@ static void rtl_rx_vlan_en(struct r8152 *tp, bool enable) case RTL_VER_12: case RTL_VER_13: case RTL_VER_15: + case RTL_VER_16: default: if (enable) ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_RCR1, @@ -3674,6 +3966,14 @@ static void r8153_u2p3en(struct r8152 *tp, bool enable) ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_U2P3_CTRL, U2P3_ENABLE); } +static int r8157_u2p3en(struct r8152 *tp, bool enable) +{ + if (enable) + return rtl_ip_set_bits(tp, USB_U2P3_V2_CTRL, U2P3_V2_ENABLE); + else + return rtl_ip_clr_bits(tp, USB_U2P3_V2_CTRL, U2P3_V2_ENABLE); +} + static void r8153b_ups_flags(struct r8152 *tp) { u32 ups_flags = 0; @@ -4019,6 +4319,18 @@ static void r8153b_power_cut_en(struct r8152 *tp, bool enable) ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_MISC_0, PCUT_STATUS); } +static void r8157_power_cut_en(struct r8152 *tp, bool enable) +{ + if (enable) { + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_POWER_CUT, PWR_EN | PHASE2_EN); + ocp_byte_set_bits(tp, MCU_TYPE_USB, USB_MISC_2, BIT(1)); + } else { + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_POWER_CUT, PWR_EN); + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_MISC_0, PCUT_STATUS); + ocp_byte_clr_bits(tp, MCU_TYPE_USB, USB_MISC_2, BIT(1)); + } +} + static void r8153_queue_wake(struct r8152 *tp, bool enable) { if (enable) @@ -4135,6 +4447,22 @@ static void rtl8156_runtime_enable(struct r8152 *tp, bool enable) } } +static void rtl8157_runtime_enable(struct r8152 *tp, bool enable) +{ + if (enable) { + r8153_queue_wake(tp, true); + r8153b_u1u2en(tp, false); + r8157_u2p3en(tp, false); + rtl_runtime_suspend_enable(tp, true); + } else { + r8153_queue_wake(tp, false); + rtl_runtime_suspend_enable(tp, false); + r8157_u2p3en(tp, true); + if (tp->udev->speed >= USB_SPEED_SUPER) + r8153b_u1u2en(tp, true); + } +} + static void r8153_teredo_off(struct r8152 *tp) { switch (tp->version) { @@ -4159,6 +4487,7 @@ static void r8153_teredo_off(struct r8152 *tp) case RTL_VER_13: case RTL_VER_14: case RTL_VER_15: + case RTL_VER_16: default: /* The bit 0 ~ 7 are relative with teredo settings. They are * W1C (write 1 to clear), so set all 1 to disable it. @@ -4212,6 +4541,7 @@ static void rtl_clear_bp(struct r8152 *tp, u16 type) bp_num = 8; break; case RTL_VER_14: + case RTL_VER_16: default: ocp_write_word(tp, type, USB_BP2_EN, 0); bp_num = 16; @@ -4319,6 +4649,7 @@ static bool rtl8152_is_fw_phy_speed_up_ok(struct r8152 *tp, struct fw_phy_speed_ case RTL_VER_11: case RTL_VER_12: case RTL_VER_14: + case RTL_VER_16: goto out; case RTL_VER_13: case RTL_VER_15: @@ -5480,6 +5811,7 @@ static void rtl_eee_enable(struct r8152 *tp, bool enable) case RTL_VER_12: case RTL_VER_13: case RTL_VER_15: + case RTL_VER_16: if (enable) { r8156_eee_en(tp, true); ocp_reg_write(tp, OCP_EEE_ADV, tp->eee_adv); @@ -6064,15 +6396,24 @@ static int rtl8156_enable(struct r8152 *tp) if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return -ENODEV; - r8156_fc_parameter(tp); + if (tp->version < RTL_VER_12) + r8156_fc_parameter(tp); + set_tx_qlen(tp); rtl_set_eee_plus(tp); + + if (tp->version >= RTL_VER_12 && tp->version <= RTL_VER_16) + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_RX_AGGR_NUM, RX_AGGR_NUM_MASK); + r8153_set_rx_early_timeout(tp); r8153_set_rx_early_size(tp); speed = rtl8152_get_speed(tp); rtl_set_ifg(tp, speed); + if (tp->version >= RTL_VER_16) + return rtl_enable(tp); + if (speed & _2500bps) ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, IDLE_SPDWN_EN); @@ -6080,10 +6421,12 @@ static int rtl8156_enable(struct r8152 *tp) ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, IDLE_SPDWN_EN); - if (speed & _1000bps) - ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_TXTWSYS, 0x11); - else if (speed & _500bps) - ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_TXTWSYS, 0x3d); + if (tp->version < RTL_VER_12) { + if (speed & _1000bps) + ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_TXTWSYS, 0x11); + else if (speed & _500bps) + ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_TXTWSYS, 0x3d); + } if (tp->udev->speed == USB_SPEED_HIGH) { /* USB 0xb45e[3:0] l1_nyet_hird */ @@ -6108,45 +6451,6 @@ static void rtl8156_disable(struct r8152 *tp) rtl8153_disable(tp); } -static int rtl8156b_enable(struct r8152 *tp) -{ - u16 speed; - - if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) - return -ENODEV; - - set_tx_qlen(tp); - rtl_set_eee_plus(tp); - - ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_RX_AGGR_NUM, RX_AGGR_NUM_MASK); - - r8153_set_rx_early_timeout(tp); - r8153_set_rx_early_size(tp); - - speed = rtl8152_get_speed(tp); - rtl_set_ifg(tp, speed); - - if (speed & _2500bps) - ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, - IDLE_SPDWN_EN); - else - ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, - IDLE_SPDWN_EN); - - if (tp->udev->speed == USB_SPEED_HIGH) { - if (is_flow_control(speed)) - ocp_word_w0w1(tp, MCU_TYPE_USB, USB_L1_CTRL, 0xf, 0xf); - else - ocp_word_w0w1(tp, MCU_TYPE_USB, USB_L1_CTRL, 0xf, 0x1); - } - - ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_FW_TASK, FC_PATCH_TASK); - usleep_range(1000, 2000); - ocp_word_set_bits(tp, MCU_TYPE_USB, USB_FW_TASK, FC_PATCH_TASK); - - return rtl_enable(tp); -} - static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u32 speed, u8 duplex, u32 advertising) { @@ -6489,7 +6793,7 @@ static void rtl8156_change_mtu(struct r8152 *tp) /* TX share fifo free credit full threshold */ ocp_write_word(tp, MCU_TYPE_PLA, PLA_TXFIFO_CTRL, 512 / 64); ocp_write_word(tp, MCU_TYPE_PLA, PLA_TXFIFO_FULL, - ALIGN(rx_max_size + sizeof(struct tx_desc), 1024) / 16); + ALIGN(rx_max_size + tp->tx_desc.size, 1024) / 16); } static void rtl8156_up(struct r8152 *tp) @@ -6498,7 +6802,8 @@ static void rtl8156_up(struct r8152 *tp) return; r8153b_u1u2en(tp, false); - r8153_u2p3en(tp, false); + if (tp->version != RTL_VER_16) + r8153_u2p3en(tp, false); r8153_aldps_en(tp, false); rxdy_gated_en(tp, true); @@ -6511,6 +6816,9 @@ static void rtl8156_up(struct r8152 *tp) ocp_byte_clr_bits(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, NOW_IS_OOB); + if (tp->version == RTL_VER_16) + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_RCR1, BIT(3)); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, MCU_BORW_EN); rtl_rx_vlan_en(tp, tp->netdev->features & NETIF_F_HW_VLAN_CTAG_RX); @@ -6534,8 +6842,11 @@ static void rtl8156_up(struct r8152 *tp) ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, PLA_MCU_SPDWN_EN); - ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_SPEED_OPTION, - RG_PWRDN_EN | ALL_SPEED_OFF); + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, PLA_MCU_SPDWN_EN); + + if (tp->version != RTL_VER_16) + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_SPEED_OPTION, + RG_PWRDN_EN | ALL_SPEED_OFF); ocp_write_dword(tp, MCU_TYPE_USB, USB_RX_BUF_TH, 0x00600400); @@ -6545,9 +6856,10 @@ static void rtl8156_up(struct r8152 *tp) } r8153_aldps_en(tp, true); - r8153_u2p3en(tp, true); + if (tp->version != RTL_VER_16) + r8153_u2p3en(tp, true); - if (tp->udev->speed >= USB_SPEED_SUPER) + if (tp->version != RTL_VER_16 && tp->udev->speed >= USB_SPEED_SUPER) r8153b_u1u2en(tp, true); } @@ -6562,8 +6874,10 @@ static void rtl8156_down(struct r8152 *tp) PLA_MCU_SPDWN_EN); r8153b_u1u2en(tp, false); - r8153_u2p3en(tp, false); - r8153b_power_cut_en(tp, false); + if (tp->version != RTL_VER_16) { + r8153_u2p3en(tp, false); + r8153b_power_cut_en(tp, false); + } r8153_aldps_en(tp, false); ocp_byte_clr_bits(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, NOW_IS_OOB); @@ -7636,106 +7950,241 @@ static void r8156b_hw_phy_cfg(struct r8152 *tp) set_bit(PHY_RESET, &tp->flags); } -static void r8156_init(struct r8152 *tp) +static void r8157_hw_phy_cfg(struct r8152 *tp) { + u32 ocp_data; u16 data; - int i; + int ret; - if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) - return; + r8156b_wait_loading_flash(tp); - ocp_byte_clr_bits(tp, MCU_TYPE_USB, USB_ECM_OP, EN_ALL_SPEED); - - ocp_write_word(tp, MCU_TYPE_USB, USB_SPEED_OPTION, 0); - - ocp_word_set_bits(tp, MCU_TYPE_USB, USB_ECM_OPTION, BYPASS_MAC_RESET); - - r8153b_u1u2en(tp, false); - - for (i = 0; i < 500; i++) { - if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & - AUTOLOAD_DONE) - break; - - msleep(20); - if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) - return; + ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0); + if (ocp_data & PCUT_STATUS) { + ocp_data &= ~PCUT_STATUS; + ocp_write_word(tp, MCU_TYPE_USB, USB_MISC_0, ocp_data); } data = r8153_phy_status(tp, 0); - if (data == PHY_STAT_EXT_INIT) + switch (data) { + case PHY_STAT_EXT_INIT: + ocp_reg_clr_bits(tp, 0xa466, BIT(0)); ocp_reg_clr_bits(tp, 0xa468, BIT(3) | BIT(1)); + break; + case PHY_STAT_LAN_ON: + case PHY_STAT_PWRDN: + default: + break; + } - r8152_mdio_test_and_clr_bit(tp, MII_BMCR, BMCR_PDOWN); + data = r8152_mdio_read(tp, MII_BMCR); + if (data & BMCR_PDOWN) { + data &= ~BMCR_PDOWN; + r8152_mdio_write(tp, MII_BMCR, data); + } - data = r8153_phy_status(tp, PHY_STAT_LAN_ON); - WARN_ON_ONCE(data != PHY_STAT_LAN_ON); + r8153_aldps_en(tp, false); + rtl_eee_enable(tp, false); - r8153_u2p3en(tp, false); + ret = r8153_phy_status(tp, PHY_STAT_LAN_ON); + if (ret < 0) + return; + WARN_ON_ONCE(ret != PHY_STAT_LAN_ON); - /* MSC timer = 0xfff * 8ms = 32760 ms */ - ocp_write_word(tp, MCU_TYPE_USB, USB_MSC_TIMER, 0x0fff); + /* PFM mode */ + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_PHY_PWR, PFM_PWM_SWITCH); - /* U1/U2/L1 idle timer. 500 us */ - ocp_write_word(tp, MCU_TYPE_USB, USB_U1U2_TIMER, 500); + /* Advanced Power Saving parameter */ + ocp_reg_set_bits(tp, 0xa430, BIT(0) | BIT(1)); - r8153b_power_cut_en(tp, false); - r8156_ups_en(tp, false); - r8153_queue_wake(tp, false); - rtl_runtime_suspend_enable(tp, false); + /* aldpsce force mode */ + ocp_reg_clr_bits(tp, 0xa44a, BIT(2)); - if (tp->udev->speed >= USB_SPEED_SUPER) - r8153b_u1u2en(tp, true); + switch (tp->version) { + case RTL_VER_16: + /* XG_INRX parameter */ + sram_write_w0w1(tp, 0x8183, 0xff00, 0x5900); + ocp_reg_set_bits(tp, 0xa654, BIT(11)); + ocp_reg_set_bits(tp, 0xb648, BIT(14)); + ocp_reg_clr_bits(tp, 0xad2c, BIT(15)); + ocp_reg_set_bits(tp, 0xad94, BIT(5)); + ocp_reg_set_bits(tp, 0xada0, BIT(1)); + ocp_reg_w0w1(tp, 0xae06, 0xfc00, 0x7c00); + sram2_write_w0w1(tp, 0x8647, 0xff00, 0xe600); + sram2_write_w0w1(tp, 0x8036, 0xff00, 0x3000); + sram2_write_w0w1(tp, 0x8078, 0xff00, 0x3000); - usb_enable_lpm(tp->udev); + /* green mode */ + sram2_write_w0w1(tp, 0x89e9, 0xff00, 0); + sram2_write_w0w1(tp, 0x8ffd, 0xff00, 0x0100); + sram2_write_w0w1(tp, 0x8ffe, 0xff00, 0x0200); + sram2_write_w0w1(tp, 0x8fff, 0xff00, 0x0400); - r8156_mac_clk_spd(tp, true); + /* recognize AQC/Bcom function */ + sram_write_w0w1(tp, 0x8018, 0xff00, 0x7700); + ocp_reg_write(tp, OCP_SRAM_ADDR, 0x8f9c); + ocp_reg_write(tp, OCP_SRAM_DATA, 0x0005); + ocp_reg_write(tp, OCP_SRAM_DATA, 0x0000); + ocp_reg_write(tp, OCP_SRAM_DATA, 0x00ed); + ocp_reg_write(tp, OCP_SRAM_DATA, 0x0502); + ocp_reg_write(tp, OCP_SRAM_DATA, 0x0b00); + ocp_reg_write(tp, OCP_SRAM_DATA, 0xd401); + sram_write_w0w1(tp, 0x8fa8, 0xff00, 0x2900); - ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, - PLA_MCU_SPDWN_EN); + /* RFI_corr_thd 5g */ + sram2_write_w0w1(tp, 0x814b, 0xff00, 0x1100); + sram2_write_w0w1(tp, 0x814d, 0xff00, 0x1100); + sram2_write_w0w1(tp, 0x814f, 0xff00, 0x0b00); + sram2_write_w0w1(tp, 0x8142, 0xff00, 0x0100); + sram2_write_w0w1(tp, 0x8144, 0xff00, 0x0100); + sram2_write_w0w1(tp, 0x8150, 0xff00, 0x0100); - if (rtl8152_get_speed(tp) & LINK_STATUS) - ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, - CUR_LINK_OK | POLL_LINK_CHG); - else - ocp_word_w0w1(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, CUR_LINK_OK, - POLL_LINK_CHG); + /* RFI_corr_thd 2p5g */ + sram2_write_w0w1(tp, 0x8118, 0xff00, 0x0700); + sram2_write_w0w1(tp, 0x811a, 0xff00, 0x0700); + sram2_write_w0w1(tp, 0x811c, 0xff00, 0x0500); + sram2_write_w0w1(tp, 0x810f, 0xff00, 0x0100); + sram2_write_w0w1(tp, 0x8111, 0xff00, 0x0100); + sram2_write_w0w1(tp, 0x811d, 0xff00, 0x0100); - set_bit(GREEN_ETHERNET, &tp->flags); + /* RFI parameter */ + ocp_reg_clr_bits(tp, 0xad1c, BIT(8)); + ocp_reg_w0w1(tp, 0xade8, 0xffc0, 0x1400); + sram2_write_w0w1(tp, 0x864b, 0xff00, 0x9d00); + sram2_write_w0w1(tp, 0x862c, 0xff00, 0x1200); + ocp_reg_write(tp, OCP_SRAM_ADDR, 0x8566); + ocp_reg_write(tp, OCP_SRAM_DATA, 0x003f); + ocp_reg_write(tp, OCP_SRAM_DATA, 0x3f02); + ocp_reg_write(tp, OCP_SRAM_DATA, 0x023c); + ocp_reg_write(tp, OCP_SRAM_DATA, 0x3b0a); + ocp_reg_write(tp, OCP_SRAM_DATA, 0x1c00); + ocp_reg_write(tp, OCP_SRAM_DATA, 0x0000); + ocp_reg_write(tp, OCP_SRAM_DATA, 0x0000); + ocp_reg_write(tp, OCP_SRAM_DATA, 0x0000); + ocp_reg_write(tp, OCP_SRAM_DATA, 0x0000); - /* rx aggregation */ - ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_USB_CTRL, - RX_AGG_DISABLE | RX_ZERO_EN); + /* RFI-color noise gen parameter 5g */ + ocp_reg_set_bits(tp, 0xad9c, BIT(5)); + sram2_write_w0w1(tp, 0x8122, 0xff00, 0x0c00); + ocp_reg_write(tp, OCP_SRAM2_ADDR, 0x82c8); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x03ed); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x03ff); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x0009); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x03fe); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x000b); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x0021); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x03f7); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x03b8); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x03e0); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x0049); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x0049); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x03e0); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x03b8); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x03f7); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x0021); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x000b); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x03fe); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x0009); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x03ff); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x03ed); - ocp_byte_set_bits(tp, MCU_TYPE_USB, USB_BMU_CONFIG, ACT_ODMA); + /* RFI-color noise gen parameter 2p5g */ + sram2_write_w0w1(tp, 0x80ef, 0xff00, 0x0c00); + ocp_reg_write(tp, OCP_SRAM2_ADDR, 0x82a0); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x000e); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x03fe); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x03ed); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x0006); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x001a); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x03f1); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x03d8); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x0023); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x0054); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x0322); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x00dd); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x03ab); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x03dc); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x0027); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x000e); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x03e5); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x03f9); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x0012); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x0001); + ocp_reg_write(tp, OCP_SRAM2_DATA, 0x03f1); - r8156_mdio_force_mode(tp); - rtl_tally_reset(tp); + /* modify thermal speed down threshold */ + ocp_reg_w0w1(tp, 0xb54c, 0xffc0, 0x3700); - tp->coalesce = 15000; /* 15 us */ + /* XG compatibility modification */ + ocp_reg_set_bits(tp, 0xb648, BIT(6)); + sram2_write_w0w1(tp, 0x8082, 0xff00, 0x5d00); + sram2_write_w0w1(tp, 0x807c, 0xff00, 0x5000); + sram2_write_w0w1(tp, 0x809d, 0xff00, 0x5000); + break; + default: + break; + } + + if (rtl_phy_patch_request(tp, true, true)) + return; + + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, EEE_SPDWN_EN); + + ocp_reg_w0w1(tp, OCP_DOWN_SPEED, EN_EEE_100 | EN_EEE_1000, EN_10M_CLKDIV); + + tp->ups_info._10m_ckdiv = true; + tp->ups_info.eee_plloff_100 = false; + tp->ups_info.eee_plloff_giga = false; + + ocp_reg_set_bits(tp, OCP_POWER_CFG, EEE_CLKDIV_EN); + tp->ups_info.eee_ckdiv = true; + + rtl_phy_patch_request(tp, false, true); + + rtl_green_en(tp, test_bit(GREEN_ETHERNET, &tp->flags)); + + ocp_reg_clr_bits(tp, 0xa428, BIT(9)); + ocp_reg_clr_bits(tp, 0xa5ea, BIT(0) | BIT(1)); + tp->ups_info.lite_mode = 0; + + if (tp->eee_en) + rtl_eee_enable(tp, true); + + r8153_aldps_en(tp, true); + r8152b_enable_fc(tp); + + set_bit(PHY_RESET, &tp->flags); } -static void r8156b_init(struct r8152 *tp) +static void r8156_init(struct r8152 *tp) { + u32 ocp_data; u16 data; int i; if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) return; + if (tp->version == RTL_VER_16) { + ocp_byte_set_bits(tp, MCU_TYPE_USB, 0xcffe, BIT(3)); + ocp_byte_clr_bits(tp, MCU_TYPE_USB, 0xd3ca, BIT(0)); + } + ocp_byte_clr_bits(tp, MCU_TYPE_USB, USB_ECM_OP, EN_ALL_SPEED); - ocp_write_word(tp, MCU_TYPE_USB, USB_SPEED_OPTION, 0); + if (tp->version != RTL_VER_16) + ocp_write_word(tp, MCU_TYPE_USB, USB_SPEED_OPTION, 0); ocp_word_set_bits(tp, MCU_TYPE_USB, USB_ECM_OPTION, BYPASS_MAC_RESET); - ocp_word_set_bits(tp, MCU_TYPE_USB, USB_U2P3_CTRL, RX_DETECT8); + if (tp->version >= RTL_VER_12 && tp->version <= RTL_VER_15) + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_U2P3_CTRL, RX_DETECT8); r8153b_u1u2en(tp, false); switch (tp->version) { case RTL_VER_13: case RTL_VER_15: + case RTL_VER_16: r8156b_wait_loading_flash(tp); break; default: @@ -7755,14 +8204,22 @@ static void r8156b_init(struct r8152 *tp) data = r8153_phy_status(tp, 0); if (data == PHY_STAT_EXT_INIT) { ocp_reg_clr_bits(tp, 0xa468, BIT(3) | BIT(1)); - ocp_reg_clr_bits(tp, 0xa466, BIT(0)); + if (tp->version >= RTL_VER_12) + ocp_reg_clr_bits(tp, 0xa466, BIT(0)); } - r8152_mdio_test_and_clr_bit(tp, MII_BMCR, BMCR_PDOWN); + data = r8152_mdio_read(tp, MII_BMCR); + if (data & BMCR_PDOWN) { + data &= ~BMCR_PDOWN; + r8152_mdio_write(tp, MII_BMCR, data); + } data = r8153_phy_status(tp, PHY_STAT_LAN_ON); - r8153_u2p3en(tp, false); + if (tp->version == RTL_VER_16) + r8157_u2p3en(tp, false); + else + r8153_u2p3en(tp, false); /* MSC timer = 0xfff * 8ms = 32760 ms */ ocp_write_word(tp, MCU_TYPE_USB, USB_MSC_TIMER, 0x0fff); @@ -7770,7 +8227,11 @@ static void r8156b_init(struct r8152 *tp) /* U1/U2/L1 idle timer. 500 us */ ocp_write_word(tp, MCU_TYPE_USB, USB_U1U2_TIMER, 500); - r8153b_power_cut_en(tp, false); + if (tp->version == RTL_VER_16) + r8157_power_cut_en(tp, false); + else + r8153b_power_cut_en(tp, false); + r8156_ups_en(tp, false); r8153_queue_wake(tp, false); rtl_runtime_suspend_enable(tp, false); @@ -7780,39 +8241,53 @@ static void r8156b_init(struct r8152 *tp) usb_enable_lpm(tp->udev); - ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_RCR, SLOT_EN); + if (tp->version >= RTL_VER_12 && tp->version <= RTL_VER_15) { + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_RCR, SLOT_EN); - ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_CPCR, FLOW_CTRL_EN); + ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_CPCR, FLOW_CTRL_EN); - /* enable fc timer and set timer to 600 ms. */ - ocp_write_word(tp, MCU_TYPE_USB, USB_FC_TIMER, - CTRL_TIMER_EN | (600 / 8)); + /* enable fc timer and set timer to 600 ms. */ + ocp_write_word(tp, MCU_TYPE_USB, USB_FC_TIMER, CTRL_TIMER_EN | (600 / 8)); - if (!(ocp_read_word(tp, MCU_TYPE_PLA, PLA_POL_GPIO_CTRL) & DACK_DET_EN)) - ocp_word_w0w1(tp, MCU_TYPE_USB, USB_FW_CTRL, AUTO_SPEEDUP, - FLOW_CTRL_PATCH_2); - else - ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_FW_CTRL, AUTO_SPEEDUP); + ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_CTRL); + if (!(ocp_read_word(tp, MCU_TYPE_PLA, PLA_POL_GPIO_CTRL) & DACK_DET_EN)) + ocp_data |= FLOW_CTRL_PATCH_2; + ocp_data &= ~AUTO_SPEEDUP; + ocp_write_word(tp, MCU_TYPE_USB, USB_FW_CTRL, ocp_data); - ocp_word_set_bits(tp, MCU_TYPE_USB, USB_FW_TASK, FC_PATCH_TASK); + ocp_word_set_bits(tp, MCU_TYPE_USB, USB_FW_TASK, FC_PATCH_TASK); + } r8156_mac_clk_spd(tp, true); - ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, - PLA_MCU_SPDWN_EN); + if (tp->version != RTL_VER_16) + ocp_word_clr_bits(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, PLA_MCU_SPDWN_EN); + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS); if (rtl8152_get_speed(tp) & LINK_STATUS) - ocp_word_set_bits(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, - CUR_LINK_OK | POLL_LINK_CHG); + ocp_data |= CUR_LINK_OK; else - ocp_word_w0w1(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, CUR_LINK_OK, - POLL_LINK_CHG); + ocp_data &= ~CUR_LINK_OK; + ocp_data |= POLL_LINK_CHG; + ocp_write_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, ocp_data); set_bit(GREEN_ETHERNET, &tp->flags); - /* rx aggregation */ - ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_USB_CTRL, - RX_AGG_DISABLE | RX_ZERO_EN); + /* rx aggregation / 16 bytes Rx descriptor */ + if (tp->version == RTL_VER_16) + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_USB_CTRL, RX_AGG_DISABLE | RX_DESC_16B); + else + ocp_word_clr_bits(tp, MCU_TYPE_USB, USB_USB_CTRL, RX_AGG_DISABLE | RX_ZERO_EN); + + if (tp->version < RTL_VER_12) + ocp_byte_set_bits(tp, MCU_TYPE_USB, USB_BMU_CONFIG, ACT_ODMA); + + if (tp->version == RTL_VER_16) { + /* Disable Rx Zero Len */ + rtl_bmu_clr_bits(tp, 0x2300, BIT(3)); + /* TX descriptor Signature */ + ocp_byte_clr_bits(tp, MCU_TYPE_USB, 0xd4ae, BIT(1)); + } r8156_mdio_force_mode(tp); rtl_tally_reset(tp); @@ -8962,6 +9437,11 @@ static void rtl8153_unload(struct r8152 *tp) return; r8153_power_cut_en(tp, false); + + if (tp->version >= RTL_VER_16) { + /* Disable Interrupt Mitigation */ + ocp_byte_clr_bits(tp, MCU_TYPE_USB, 0xcf04, BIT(0) | BIT(1) | BIT(2) | BIT(7)); + } } static void rtl8153b_unload(struct r8152 *tp) @@ -8972,6 +9452,38 @@ static void rtl8153b_unload(struct r8152 *tp) r8153b_power_cut_en(tp, false); } +static int r8152_desc_init(struct r8152 *tp) +{ + tp->rx_desc.size = sizeof(struct rx_desc); + tp->rx_desc.align = 8; + tp->rx_desc.vlan_tag = r8152_rx_vlan_tag; + tp->desc_ops.rx_csum = r8152_rx_csum; + tp->desc_ops.rx_len = r8152_rx_len; + tp->tx_desc.size = sizeof(struct tx_desc); + tp->tx_desc.align = 4; + tp->tx_desc.vlan_tag = r8152_tx_vlan_tag; + tp->desc_ops.tx_csum = r8152_tx_csum; + tp->desc_ops.tx_len = r8152_tx_len; + + return 0; +} + +static int r8157_desc_init(struct r8152 *tp) +{ + tp->rx_desc.size = sizeof(struct rx_desc_v2); + tp->rx_desc.align = 16; + tp->rx_desc.vlan_tag = r8157_rx_vlan_tag; + tp->desc_ops.rx_csum = r8157_rx_csum; + tp->desc_ops.rx_len = r8157_rx_len; + tp->tx_desc.size = sizeof(struct tx_desc_v2); + tp->tx_desc.align = 16; + tp->tx_desc.vlan_tag = r8152_tx_vlan_tag; + tp->desc_ops.tx_csum = r8157_tx_csum; + tp->desc_ops.tx_len = r8157_tx_len; + + return 0; +} + static int rtl_ops_init(struct r8152 *tp) { struct rtl_ops *ops = &tp->rtl_ops; @@ -8995,6 +9507,7 @@ static int rtl_ops_init(struct r8152 *tp) tp->rx_buf_sz = 16 * 1024; tp->eee_en = true; tp->eee_adv = MDIO_EEE_100TX; + r8152_desc_init(tp); break; case RTL_VER_03: @@ -9019,6 +9532,7 @@ static int rtl_ops_init(struct r8152 *tp) tp->rx_buf_sz = 32 * 1024; tp->eee_en = true; tp->eee_adv = MDIO_EEE_1000T | MDIO_EEE_100TX; + r8152_desc_init(tp); break; case RTL_VER_08: @@ -9038,6 +9552,7 @@ static int rtl_ops_init(struct r8152 *tp) tp->rx_buf_sz = 32 * 1024; tp->eee_en = true; tp->eee_adv = MDIO_EEE_1000T | MDIO_EEE_100TX; + r8152_desc_init(tp); break; case RTL_VER_11: @@ -9060,6 +9575,7 @@ static int rtl_ops_init(struct r8152 *tp) ops->change_mtu = rtl8156_change_mtu; tp->rx_buf_sz = 48 * 1024; tp->support_2500full = 1; + r8152_desc_init(tp); break; case RTL_VER_12: @@ -9070,8 +9586,8 @@ static int rtl_ops_init(struct r8152 *tp) tp->eee_en = true; tp->eee_adv = MDIO_EEE_1000T | MDIO_EEE_100TX; tp->eee_adv2 = MDIO_EEE_2_5GT; - ops->init = r8156b_init; - ops->enable = rtl8156b_enable; + ops->init = r8156_init; + ops->enable = rtl8156_enable; ops->disable = rtl8153_disable; ops->up = rtl8156_up; ops->down = rtl8156_down; @@ -9083,6 +9599,7 @@ static int rtl_ops_init(struct r8152 *tp) ops->autosuspend_en = rtl8156_runtime_enable; ops->change_mtu = rtl8156_change_mtu; tp->rx_buf_sz = 48 * 1024; + r8152_desc_init(tp); break; case RTL_VER_14: @@ -9101,6 +9618,29 @@ static int rtl_ops_init(struct r8152 *tp) tp->rx_buf_sz = 32 * 1024; tp->eee_en = true; tp->eee_adv = MDIO_EEE_1000T | MDIO_EEE_100TX; + r8152_desc_init(tp); + break; + + case RTL_VER_16: + tp->eee_en = true; + tp->eee_adv = MDIO_EEE_1000T | MDIO_EEE_100TX; + tp->eee_adv2 = MDIO_EEE_2_5GT | MDIO_EEE_5GT; + ops->init = r8156_init; + ops->enable = rtl8156_enable; + ops->disable = rtl8153_disable; + ops->up = rtl8156_up; + ops->down = rtl8156_down; + ops->unload = rtl8153_unload; + ops->eee_get = r8153_get_eee; + ops->eee_set = r8152_set_eee; + ops->in_nway = rtl8153_in_nway; + ops->hw_phy_cfg = r8157_hw_phy_cfg; + ops->autosuspend_en = rtl8157_runtime_enable; + ops->change_mtu = rtl8156_change_mtu; + tp->rx_buf_sz = 32 * 1024; + tp->support_2500full = 1; + tp->support_5000full = 1; + r8157_desc_init(tp); break; default: @@ -9253,6 +9793,9 @@ static u8 __rtl_get_hw_ver(struct usb_device *udev) case 0x7420: version = RTL_VER_15; break; + case 0x1030: + version = RTL_VER_16; + break; default: version = RTL_VER_UNKNOWN; dev_info(&udev->dev, "Unknown version 0x%04x\n", ocp_data); @@ -9404,6 +9947,7 @@ static int rtl8152_probe_once(struct usb_interface *intf, case RTL_VER_12: case RTL_VER_13: case RTL_VER_15: + case RTL_VER_16: netdev->max_mtu = size_to_mtu(16 * 1024); break; case RTL_VER_01: @@ -9563,6 +10107,7 @@ static const struct usb_device_id rtl8152_table[] = { { USB_DEVICE(VENDOR_ID_REALTEK, 0x8153) }, { USB_DEVICE(VENDOR_ID_REALTEK, 0x8155) }, { USB_DEVICE(VENDOR_ID_REALTEK, 0x8156) }, + { USB_DEVICE(VENDOR_ID_REALTEK, 0x8157) }, /* Microsoft */ { USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07ab) }, From fed4626501c871890da287bec62a96e52da1af89 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 27 Mar 2026 11:45:20 +0100 Subject: [PATCH 1286/1561] can: ucan: fix devres lifetime USB drivers bind to USB interfaces and any device managed resources should have their lifetime tied to the interface rather than parent USB device. This avoids issues like memory leaks when drivers are unbound without their devices being physically disconnected (e.g. on probe deferral or configuration changes). Fix the control message buffer lifetime so that it is released on driver unbind. Fixes: 9f2d3eae88d2 ("can: ucan: add driver for Theobroma Systems UCAN devices") Cc: stable@vger.kernel.org # 4.19 Cc: Jakob Unterwurzacher Signed-off-by: Johan Hovold Link: https://patch.msgid.link/20260327104520.1310158-1-johan@kernel.org Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/ucan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/ucan.c b/drivers/net/can/usb/ucan.c index 0ea0ac75e42f..ee3c1abbd063 100644 --- a/drivers/net/can/usb/ucan.c +++ b/drivers/net/can/usb/ucan.c @@ -1397,7 +1397,7 @@ static int ucan_probe(struct usb_interface *intf, */ /* Prepare Memory for control transfers */ - ctl_msg_buffer = devm_kzalloc(&udev->dev, + ctl_msg_buffer = devm_kzalloc(&intf->dev, sizeof(union ucan_ctl_payload), GFP_KERNEL); if (!ctl_msg_buffer) { From a535a9217ca3f2fccedaafb2fddb4c48f27d36dc Mon Sep 17 00:00:00 2001 From: Samuel Page Date: Wed, 8 Apr 2026 15:30:13 +0100 Subject: [PATCH 1287/1561] can: raw: fix ro->uniq use-after-free in raw_rcv() raw_release() unregisters raw CAN receive filters via can_rx_unregister(), but receiver deletion is deferred with call_rcu(). This leaves a window where raw_rcv() may still be running in an RCU read-side critical section after raw_release() frees ro->uniq, leading to a use-after-free of the percpu uniq storage. Move free_percpu(ro->uniq) out of raw_release() and into a raw-specific socket destructor. can_rx_unregister() takes an extra reference to the socket and only drops it from the RCU callback, so freeing uniq from sk_destruct ensures the percpu area is not released until the relevant callbacks have drained. Fixes: 514ac99c64b2 ("can: fix multiple delivery of a single CAN frame for overlapping CAN filters") Cc: stable@vger.kernel.org # v4.1+ Assisted-by: Bynario AI Signed-off-by: Samuel Page Link: https://patch.msgid.link/26ec626d-cae7-4418-9782-7198864d070c@bynar.io Acked-by: Oliver Hartkopp [mkl: applied manually] Signed-off-by: Marc Kleine-Budde --- net/can/raw.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/can/raw.c b/net/can/raw.c index eee244ffc31e..58a96e933deb 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -361,6 +361,14 @@ static int raw_notifier(struct notifier_block *nb, unsigned long msg, return NOTIFY_DONE; } +static void raw_sock_destruct(struct sock *sk) +{ + struct raw_sock *ro = raw_sk(sk); + + free_percpu(ro->uniq); + can_sock_destruct(sk); +} + static int raw_init(struct sock *sk) { struct raw_sock *ro = raw_sk(sk); @@ -387,6 +395,8 @@ static int raw_init(struct sock *sk) if (unlikely(!ro->uniq)) return -ENOMEM; + sk->sk_destruct = raw_sock_destruct; + /* set notifier */ spin_lock(&raw_notifier_lock); list_add_tail(&ro->notifier, &raw_notifier_list); @@ -436,7 +446,6 @@ static int raw_release(struct socket *sock) ro->bound = 0; ro->dev = NULL; ro->count = 0; - free_percpu(ro->uniq); sock_orphan(sk); sock->sk = NULL; From eb216e422044f5523da038136ce0f2abcc6a75bc Mon Sep 17 00:00:00 2001 From: Salil Mehta Date: Thu, 9 Apr 2026 01:04:30 +0100 Subject: [PATCH 1288/1561] MAINTAINERS: Remove Salil Mehta as HiSilicon HNS3/HNS Ethernet maintainer Closing this chapter and a long wonderful journey with my team, I sign off one last time with my Huawei email address. Remove my maintainer entry for the HiSilicon HNS and HNS3 10G/100G Ethernet drivers, and add a CREDITS entry for my co-authorship and maintenance contributions to these drivers. Link: https://lore.kernel.org/netdev/259cd032-2ccb-452b-8524-75bc7162e138@huawei.com/ Cc: Jian Shen Signed-off-by: Salil Mehta Acked-by: Jijie Shao Link: https://patch.msgid.link/20260409000430.7217-1-salil.mehta@huawei.com Signed-off-by: Jakub Kicinski --- CREDITS | 10 ++++++++++ MAINTAINERS | 2 -- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/CREDITS b/CREDITS index 9091bac3d2da..a03b00452a1e 100644 --- a/CREDITS +++ b/CREDITS @@ -3592,6 +3592,16 @@ E: wsalamon@tislabs.com E: wsalamon@nai.com D: portions of the Linux Security Module (LSM) framework and security modules +N: Salil Mehta +E: salil.mehta@opnsrc.net +D: Co-authored Huawei/HiSilicon Kunpeng 920 SoC HNS3 PF and VF 100G +D: Ethernet driver +D: Co-authored Huawei/HiSilicon Kunpeng 916 SoC HNS 10G Ethernet +D: driver enhancements +D: Maintained Huawei/HiSilicon HNS and HNS3 10G/100G Ethernet drivers +D: for Kunpeng 916 family, 920 family of SoCs +S: Cambridge, Cambridgeshire, United Kingdom + N: Robert Sanders E: gt8134b@prism.gatech.edu D: Dosemu diff --git a/MAINTAINERS b/MAINTAINERS index dc935838e516..3e4d0c67e58d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11539,7 +11539,6 @@ F: drivers/bus/hisi_lpc.c HISILICON NETWORK SUBSYSTEM 3 DRIVER (HNS3) M: Jian Shen -M: Salil Mehta M: Jijie Shao L: netdev@vger.kernel.org S: Maintained @@ -11554,7 +11553,6 @@ F: drivers/net/ethernet/hisilicon/hibmcge/ HISILICON NETWORK SUBSYSTEM DRIVER M: Jian Shen -M: Salil Mehta L: netdev@vger.kernel.org S: Maintained W: http://www.hisilicon.com From 7789c6bb76acf21539c2c74b0cc869bb57de99e6 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 3 Apr 2026 01:10:18 +0200 Subject: [PATCH 1289/1561] net: Add queue-create operation Add a ynl netdev family operation called queue-create that creates a new queue on a netdevice: name: queue-create attribute-set: queue flags: [admin-perm] do: request: attributes: - ifindex - type - lease reply: &queue-create-op attributes: - id This is a generic operation such that it can be extended for various use cases in future. Right now it is mandatory to specify ifindex, the queue type which is enforced to rx and a lease. The newly created queue id is returned to the caller. A queue from a virtual device can have a lease which refers to another queue from a physical device. This is useful for memory providers and AF_XDP operations which take an ifindex and queue id to allow applications to bind against virtual devices in containers. The lease couples both queues together and allows to proxy the operations from a virtual device in a container to the physical device. In future, the nested lease attribute can be lifted and made optional for other use-cases such as dynamic queue creation for physical netdevs. The lack of lease and the specification of the physical device as an ifindex will imply that we need a real queue to be allocated. Similarly, the queue type enforcement to rx can then be lifted as well to support tx. An early implementation had only driver-specific integration [0], but in order for other virtual devices to reuse, it makes sense to have this as a generic API in core net. For leasing queues, the virtual netdev must have real_num_rx_queues less than num_rx_queues at the time of calling queue-create. The queue-type must be rx as only rx queues are supported for leasing for now. We also enforce that the queue-create ifindex must point to a virtual device, and that the nested lease attribute's ifindex must point to a physical device. The nested lease attribute set contains a netns-id attribute which is optional and can specify a netns-id relative to the caller's netns. It requires cap_net_admin and if the netns-id attribute is not specified, the lease ifindex will be retrieved from the current netns. Also, it is modeled as an s32 type similarly as done elsewhere in the stack. Signed-off-by: Daniel Borkmann Co-developed-by: David Wei Signed-off-by: David Wei Acked-by: Stanislav Fomichev Reviewed-by: Nikolay Aleksandrov Link: https://bpfconf.ebpf.io/bpfconf2025/bpfconf2025_material/lsfmmbpf_2025_netkit_borkmann.pdf [0] Link: https://patch.msgid.link/20260402231031.447597-2-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 46 +++++++++++++++++++++++++ include/uapi/linux/netdev.h | 11 ++++++ net/core/netdev-genl-gen.c | 20 +++++++++++ net/core/netdev-genl-gen.h | 2 ++ net/core/netdev-genl.c | 5 +++ tools/include/uapi/linux/netdev.h | 11 ++++++ 6 files changed, 95 insertions(+) diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 596c306ce52b..b93beb247a11 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -339,6 +339,15 @@ attribute-sets: doc: XSK information for this queue, if any. type: nest nested-attributes: xsk-info + - + name: lease + doc: | + A queue from a virtual device can have a lease which refers to + another queue from a physical device. This is useful for memory + providers and AF_XDP operations which take an ifindex and queue id + to allow applications to bind against virtual devices in containers. + type: nest + nested-attributes: lease - name: qstats doc: | @@ -537,6 +546,26 @@ attribute-sets: name: id - name: type + - + name: lease + attributes: + - + name: ifindex + doc: The netdev ifindex to lease the queue from. + type: u32 + checks: + min: 1 + - + name: queue + doc: The netdev queue to lease from. + type: nest + nested-attributes: queue-id + - + name: netns-id + doc: The network namespace id of the netdev. + type: s32 + checks: + min: 0 - name: dmabuf attributes: @@ -686,6 +715,7 @@ operations: - dmabuf - io-uring - xsk + - lease dump: request: attributes: @@ -797,6 +827,22 @@ operations: reply: attributes: - id + - + name: queue-create + doc: | + Create a new queue for the given netdevice. Whether this operation + is supported depends on the device and the driver. + attribute-set: queue + flags: [admin-perm] + do: + request: + attributes: + - ifindex + - type + - lease + reply: &queue-create-op + attributes: + - id kernel-family: headers: ["net/netdev_netlink.h"] diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index e0b579a1df4f..7df1056a35fd 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -160,6 +160,7 @@ enum { NETDEV_A_QUEUE_DMABUF, NETDEV_A_QUEUE_IO_URING, NETDEV_A_QUEUE_XSK, + NETDEV_A_QUEUE_LEASE, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) @@ -202,6 +203,15 @@ enum { NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) }; +enum { + NETDEV_A_LEASE_IFINDEX = 1, + NETDEV_A_LEASE_QUEUE, + NETDEV_A_LEASE_NETNS_ID, + + __NETDEV_A_LEASE_MAX, + NETDEV_A_LEASE_MAX = (__NETDEV_A_LEASE_MAX - 1) +}; + enum { NETDEV_A_DMABUF_IFINDEX = 1, NETDEV_A_DMABUF_QUEUES, @@ -228,6 +238,7 @@ enum { NETDEV_CMD_BIND_RX, NETDEV_CMD_NAPI_SET, NETDEV_CMD_BIND_TX, + NETDEV_CMD_QUEUE_CREATE, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index ba673e81716f..81aecb5d3bc5 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -28,6 +28,12 @@ static const struct netlink_range_validation netdev_a_napi_defer_hard_irqs_range }; /* Common nested types */ +const struct nla_policy netdev_lease_nl_policy[NETDEV_A_LEASE_NETNS_ID + 1] = { + [NETDEV_A_LEASE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), + [NETDEV_A_LEASE_QUEUE] = NLA_POLICY_NESTED(netdev_queue_id_nl_policy), + [NETDEV_A_LEASE_NETNS_ID] = NLA_POLICY_MIN(NLA_S32, 0), +}; + const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1] = { [NETDEV_A_PAGE_POOL_ID] = NLA_POLICY_FULL_RANGE(NLA_UINT, &netdev_a_page_pool_id_range), [NETDEV_A_PAGE_POOL_IFINDEX] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_page_pool_ifindex_range), @@ -107,6 +113,13 @@ static const struct nla_policy netdev_bind_tx_nl_policy[NETDEV_A_DMABUF_FD + 1] [NETDEV_A_DMABUF_FD] = { .type = NLA_U32, }, }; +/* NETDEV_CMD_QUEUE_CREATE - do */ +static const struct nla_policy netdev_queue_create_nl_policy[NETDEV_A_QUEUE_LEASE + 1] = { + [NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), + [NETDEV_A_QUEUE_TYPE] = NLA_POLICY_MAX(NLA_U32, 1), + [NETDEV_A_QUEUE_LEASE] = NLA_POLICY_NESTED(netdev_lease_nl_policy), +}; + /* Ops table for netdev */ static const struct genl_split_ops netdev_nl_ops[] = { { @@ -205,6 +218,13 @@ static const struct genl_split_ops netdev_nl_ops[] = { .maxattr = NETDEV_A_DMABUF_FD, .flags = GENL_CMD_CAP_DO, }, + { + .cmd = NETDEV_CMD_QUEUE_CREATE, + .doit = netdev_nl_queue_create_doit, + .policy = netdev_queue_create_nl_policy, + .maxattr = NETDEV_A_QUEUE_LEASE, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, }; static const struct genl_multicast_group netdev_nl_mcgrps[] = { diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index cffc08517a41..d71b435d72c1 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -14,6 +14,7 @@ #include /* Common nested types */ +extern const struct nla_policy netdev_lease_nl_policy[NETDEV_A_LEASE_NETNS_ID + 1]; extern const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1]; extern const struct nla_policy netdev_queue_id_nl_policy[NETDEV_A_QUEUE_TYPE + 1]; @@ -36,6 +37,7 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb, int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info); +int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info); enum { NETDEV_NLGRP_MGMT, diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 470fabbeacd9..aae75431858d 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -1120,6 +1120,11 @@ err_genlmsg_free: return err; } +int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info) +{ + return -EOPNOTSUPP; +} + void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv) { INIT_LIST_HEAD(&priv->bindings); diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index e0b579a1df4f..7df1056a35fd 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -160,6 +160,7 @@ enum { NETDEV_A_QUEUE_DMABUF, NETDEV_A_QUEUE_IO_URING, NETDEV_A_QUEUE_XSK, + NETDEV_A_QUEUE_LEASE, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) @@ -202,6 +203,15 @@ enum { NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) }; +enum { + NETDEV_A_LEASE_IFINDEX = 1, + NETDEV_A_LEASE_QUEUE, + NETDEV_A_LEASE_NETNS_ID, + + __NETDEV_A_LEASE_MAX, + NETDEV_A_LEASE_MAX = (__NETDEV_A_LEASE_MAX - 1) +}; + enum { NETDEV_A_DMABUF_IFINDEX = 1, NETDEV_A_DMABUF_QUEUES, @@ -228,6 +238,7 @@ enum { NETDEV_CMD_BIND_RX, NETDEV_CMD_NAPI_SET, NETDEV_CMD_BIND_TX, + NETDEV_CMD_QUEUE_CREATE, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) From d04686d9bc86432ea3008d5f358373d8466d1943 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 3 Apr 2026 01:10:19 +0200 Subject: [PATCH 1290/1561] net: Implement netdev_nl_queue_create_doit Implement netdev_nl_queue_create_doit which creates a new rx queue in a virtual netdev and then leases it to a rx queue in a physical netdev. Example with ynl client: # ynl --family netdev --output-json --do queue-create \ --json '{"ifindex": 8, "type": "rx", "lease": {"ifindex": 4, "queue": {"type": "rx", "id": 15}}}' {'id': 1} Note that the netdevice locking order is always from the virtual to the physical device. Signed-off-by: Daniel Borkmann Co-developed-by: David Wei Signed-off-by: David Wei Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260402231031.447597-3-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- Documentation/networking/netdevices.rst | 6 + include/linux/netdevice.h | 9 +- include/net/netdev_queues.h | 19 ++- include/net/netdev_rx_queue.h | 15 ++- net/core/dev.c | 8 ++ net/core/dev.h | 5 + net/core/netdev-genl.c | 164 +++++++++++++++++++++++- net/core/netdev_queues.c | 62 +++++++++ net/core/netdev_rx_queue.c | 46 ++++++- 9 files changed, 323 insertions(+), 11 deletions(-) diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst index 35704d115312..83e28b96884f 100644 --- a/Documentation/networking/netdevices.rst +++ b/Documentation/networking/netdevices.rst @@ -329,6 +329,12 @@ by setting ``request_ops_lock`` to true. Code comments and docs refer to drivers which have ops called under the instance lock as "ops locked". See also the documentation of the ``lock`` member of struct net_device. +There is also a case of taking two per-netdev locks in sequence when netdev +queues are leased, that is, the netdev-scope lock is taken for both the +virtual and the physical device. To prevent deadlocks, the virtual device's +lock must always be acquired before the physical device's (see +``netdev_nl_queue_create_doit``). + In the future, there will be an option for individual drivers to opt out of using ``rtnl_lock`` and instead perform their control operations directly under the netdev instance lock. diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e15367373f7c..e8aa9cc4075d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2561,7 +2561,14 @@ struct net_device { * Also protects some fields in: * struct napi_struct, struct netdev_queue, struct netdev_rx_queue * - * Ordering: take after rtnl_lock. + * Ordering: + * + * - take after rtnl_lock + * + * - for the case of netdev queue leasing, the netdev-scope lock is + * taken for both the virtual and the physical device; to prevent + * deadlocks, the virtual device's lock must always be acquired + * before the physical device's (see netdev_nl_queue_create_doit) */ struct mutex lock; diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 95ed28212f4e..748b70552ed1 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -150,6 +150,11 @@ enum { * When NIC-wide config is changed the callback will * be invoked for all queues. * + * @ndo_queue_create: Create a new RX queue on a virtual device that will + * be paired with a physical device's queue via leasing. + * Return the new queue id on success, negative error + * on failure. + * * @supported_params: Bitmask of supported parameters, see QCFG_*. * * Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while @@ -178,6 +183,8 @@ struct netdev_queue_mgmt_ops { struct netlink_ext_ack *extack); struct device * (*ndo_queue_get_dma_dev)(struct net_device *dev, int idx); + int (*ndo_queue_create)(struct net_device *dev, + struct netlink_ext_ack *extack); unsigned int supported_params; }; @@ -185,7 +192,7 @@ struct netdev_queue_mgmt_ops { void netdev_queue_config(struct net_device *dev, int rxq, struct netdev_queue_config *qcfg); -bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx); +bool netif_rxq_has_unreadable_mp(struct net_device *dev, unsigned int rxq_idx); /** * DOC: Lockless queue stopping / waking helpers. @@ -374,5 +381,11 @@ static inline unsigned int netif_xmit_timeout_ms(struct netdev_queue *txq) }) struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx); - -#endif +bool netdev_can_create_queue(const struct net_device *dev, + struct netlink_ext_ack *extack); +bool netdev_can_lease_queue(const struct net_device *dev, + struct netlink_ext_ack *extack); +bool netdev_queue_busy(struct net_device *dev, unsigned int idx, + enum netdev_queue_type type, + struct netlink_ext_ack *extack); +#endif /* _LINUX_NET_QUEUES_H */ diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index 08f81329fc11..1d41c253f0a3 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -31,6 +31,14 @@ struct netdev_rx_queue { struct napi_struct *napi; struct netdev_queue_config qcfg; struct pp_memory_provider_params mp_params; + + /* If a queue is leased, then the lease pointer is always + * valid. From the physical device it points to the virtual + * queue, and from the virtual device it points to the + * physical queue. + */ + struct netdev_rx_queue *lease; + netdevice_tracker lease_tracker; } ____cacheline_aligned_in_smp; /* @@ -60,5 +68,8 @@ get_netdev_rx_queue_index(struct netdev_rx_queue *queue) } int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq); - -#endif +void netdev_rx_queue_lease(struct netdev_rx_queue *rxq_dst, + struct netdev_rx_queue *rxq_src); +void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst, + struct netdev_rx_queue *rxq_src); +#endif /* _LINUX_NETDEV_RX_QUEUE_H */ diff --git a/net/core/dev.c b/net/core/dev.c index 5a31f9d2128c..cc7bcac892af 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1121,6 +1121,14 @@ netdev_get_by_index_lock_ops_compat(struct net *net, int ifindex) return __netdev_put_lock_ops_compat(dev, net); } +struct net_device * +netdev_put_lock(struct net_device *dev, struct net *net, + netdevice_tracker *tracker) +{ + netdev_tracker_free(dev, tracker); + return __netdev_put_lock(dev, net); +} + struct net_device * netdev_xa_find_lock(struct net *net, struct net_device *dev, unsigned long *index) diff --git a/net/core/dev.h b/net/core/dev.h index 781619e76b3e..6516ce2b5517 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -31,6 +31,8 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id); struct net_device *dev_get_by_napi_id(unsigned int napi_id); struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net); +struct net_device *netdev_put_lock(struct net_device *dev, struct net *net, + netdevice_tracker *tracker); struct net_device * netdev_xa_find_lock(struct net *net, struct net_device *dev, unsigned long *index); @@ -96,6 +98,9 @@ int netdev_queue_config_validate(struct net_device *dev, int rxq_idx, struct netdev_queue_config *qcfg, struct netlink_ext_ack *extack); +bool netif_rxq_has_mp(struct net_device *dev, unsigned int rxq_idx); +bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx); + /* netdev management, shared between various uAPI entry points */ struct netdev_name_node { struct hlist_node hlist; diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index aae75431858d..5d5e5b9a8af0 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -1122,7 +1122,169 @@ err_genlmsg_free: int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info) { - return -EOPNOTSUPP; + const int qmaxtype = ARRAY_SIZE(netdev_queue_id_nl_policy) - 1; + const int lmaxtype = ARRAY_SIZE(netdev_lease_nl_policy) - 1; + int err, ifindex, ifindex_lease, queue_id, queue_id_lease; + struct nlattr *qtb[ARRAY_SIZE(netdev_queue_id_nl_policy)]; + struct nlattr *ltb[ARRAY_SIZE(netdev_lease_nl_policy)]; + struct netdev_rx_queue *rxq, *rxq_lease; + struct net_device *dev, *dev_lease; + netdevice_tracker dev_tracker; + s32 netns_lease = -1; + struct nlattr *nest; + struct sk_buff *rsp; + struct net *net; + void *hdr; + + if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX) || + GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) || + GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_LEASE)) + return -EINVAL; + if (nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]) != + NETDEV_QUEUE_TYPE_RX) { + NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_QUEUE_TYPE]); + return -EINVAL; + } + + ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]); + + nest = info->attrs[NETDEV_A_QUEUE_LEASE]; + err = nla_parse_nested(ltb, lmaxtype, nest, + netdev_lease_nl_policy, info->extack); + if (err < 0) + return err; + if (NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_IFINDEX) || + NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_QUEUE)) + return -EINVAL; + if (ltb[NETDEV_A_LEASE_NETNS_ID]) { + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + netns_lease = nla_get_s32(ltb[NETDEV_A_LEASE_NETNS_ID]); + } + + ifindex_lease = nla_get_u32(ltb[NETDEV_A_LEASE_IFINDEX]); + + nest = ltb[NETDEV_A_LEASE_QUEUE]; + err = nla_parse_nested(qtb, qmaxtype, nest, + netdev_queue_id_nl_policy, info->extack); + if (err < 0) + return err; + if (NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_ID) || + NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_TYPE)) + return -EINVAL; + if (nla_get_u32(qtb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) { + NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_TYPE]); + return -EINVAL; + } + + queue_id_lease = nla_get_u32(qtb[NETDEV_A_QUEUE_ID]); + + rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!rsp) + return -ENOMEM; + + hdr = genlmsg_iput(rsp, info); + if (!hdr) { + err = -EMSGSIZE; + goto err_genlmsg_free; + } + + /* Locking order is always from the virtual to the physical device + * since this is also the same order when applications open the + * memory provider later on. + */ + dev = netdev_get_by_index_lock(genl_info_net(info), ifindex); + if (!dev) { + err = -ENODEV; + goto err_genlmsg_free; + } + if (!netdev_can_create_queue(dev, info->extack)) { + err = -EINVAL; + goto err_unlock_dev; + } + + net = genl_info_net(info); + if (netns_lease >= 0) { + net = get_net_ns_by_id(net, netns_lease); + if (!net) { + err = -ENONET; + goto err_unlock_dev; + } + } + + dev_lease = netdev_get_by_index(net, ifindex_lease, &dev_tracker, + GFP_KERNEL); + if (!dev_lease) { + err = -ENODEV; + goto err_put_netns; + } + if (!netdev_can_lease_queue(dev_lease, info->extack)) { + netdev_put(dev_lease, &dev_tracker); + err = -EINVAL; + goto err_put_netns; + } + + dev_lease = netdev_put_lock(dev_lease, net, &dev_tracker); + if (!dev_lease) { + err = -ENODEV; + goto err_put_netns; + } + if (queue_id_lease >= dev_lease->real_num_rx_queues) { + err = -ERANGE; + NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_ID]); + goto err_unlock_dev_lease; + } + if (netdev_queue_busy(dev_lease, queue_id_lease, NETDEV_QUEUE_TYPE_RX, + info->extack)) { + err = -EBUSY; + goto err_unlock_dev_lease; + } + + rxq_lease = __netif_get_rx_queue(dev_lease, queue_id_lease); + rxq = __netif_get_rx_queue(dev, dev->real_num_rx_queues - 1); + + /* Leasing queues from different physical devices is currently + * not supported. Capabilities such as XDP features and DMA + * device may differ between physical devices, and computing + * a correct intersection for the virtual device is not yet + * implemented. + */ + if (rxq->lease && rxq->lease->dev != dev_lease) { + err = -EOPNOTSUPP; + NL_SET_ERR_MSG(info->extack, + "Leasing queues from different devices not supported"); + goto err_unlock_dev_lease; + } + + queue_id = dev->queue_mgmt_ops->ndo_queue_create(dev, info->extack); + if (queue_id < 0) { + err = queue_id; + goto err_unlock_dev_lease; + } + rxq = __netif_get_rx_queue(dev, queue_id); + + netdev_rx_queue_lease(rxq, rxq_lease); + + nla_put_u32(rsp, NETDEV_A_QUEUE_ID, queue_id); + genlmsg_end(rsp, hdr); + + netdev_unlock(dev_lease); + netdev_unlock(dev); + if (netns_lease >= 0) + put_net(net); + + return genlmsg_reply(rsp, info); + +err_unlock_dev_lease: + netdev_unlock(dev_lease); +err_put_netns: + if (netns_lease >= 0) + put_net(net); +err_unlock_dev: + netdev_unlock(dev); +err_genlmsg_free: + nlmsg_free(rsp); + return err; } void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv) diff --git a/net/core/netdev_queues.c b/net/core/netdev_queues.c index 251f27a8307f..177401828e79 100644 --- a/net/core/netdev_queues.c +++ b/net/core/netdev_queues.c @@ -1,6 +1,10 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include +#include + +#include "dev.h" /** * netdev_queue_get_dma_dev() - get dma device for zero-copy operations @@ -25,3 +29,61 @@ struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx) return dma_dev && dma_dev->dma_mask ? dma_dev : NULL; } +bool netdev_can_create_queue(const struct net_device *dev, + struct netlink_ext_ack *extack) +{ + if (dev->dev.parent) { + NL_SET_ERR_MSG(extack, "Device is not a virtual device"); + return false; + } + if (!dev->queue_mgmt_ops || + !dev->queue_mgmt_ops->ndo_queue_create) { + NL_SET_ERR_MSG(extack, "Device does not support queue creation"); + return false; + } + if (dev->real_num_rx_queues < 1 || + dev->real_num_tx_queues < 1) { + NL_SET_ERR_MSG(extack, "Device must have at least one real queue"); + return false; + } + return true; +} + +bool netdev_can_lease_queue(const struct net_device *dev, + struct netlink_ext_ack *extack) +{ + if (!dev->dev.parent) { + NL_SET_ERR_MSG(extack, "Lease device is a virtual device"); + return false; + } + if (!netif_device_present(dev)) { + NL_SET_ERR_MSG(extack, "Lease device has been removed from the system"); + return false; + } + if (!dev->queue_mgmt_ops) { + NL_SET_ERR_MSG(extack, "Lease device does not support queue management operations"); + return false; + } + return true; +} + +bool netdev_queue_busy(struct net_device *dev, unsigned int idx, + enum netdev_queue_type type, + struct netlink_ext_ack *extack) +{ + if (xsk_get_pool_from_qid(dev, idx)) { + NL_SET_ERR_MSG(extack, "Device queue in use by AF_XDP"); + return true; + } + if (type == NETDEV_QUEUE_TYPE_TX) + return false; + if (netif_rxq_is_leased(dev, idx)) { + NL_SET_ERR_MSG(extack, "Device queue in use due to queue leasing"); + return true; + } + if (netif_rxq_has_mp(dev, idx)) { + NL_SET_ERR_MSG(extack, "Device queue in use by memory provider"); + return true; + } + return false; +} diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index 668a90658f25..a1f23c2c96d4 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -10,15 +10,53 @@ #include "dev.h" #include "page_pool_priv.h" -/* See also page_pool_is_unreadable() */ -bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx) +void netdev_rx_queue_lease(struct netdev_rx_queue *rxq_dst, + struct netdev_rx_queue *rxq_src) { - struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, idx); + netdev_assert_locked(rxq_src->dev); + netdev_assert_locked(rxq_dst->dev); - return !!rxq->mp_params.mp_ops; + netdev_hold(rxq_src->dev, &rxq_src->lease_tracker, GFP_KERNEL); + + WRITE_ONCE(rxq_src->lease, rxq_dst); + WRITE_ONCE(rxq_dst->lease, rxq_src); +} + +void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst, + struct netdev_rx_queue *rxq_src) +{ + netdev_assert_locked(rxq_dst->dev); + netdev_assert_locked(rxq_src->dev); + + WRITE_ONCE(rxq_src->lease, NULL); + WRITE_ONCE(rxq_dst->lease, NULL); + + netdev_put(rxq_src->dev, &rxq_src->lease_tracker); +} + +bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx) +{ + if (rxq_idx < dev->real_num_rx_queues) + return READ_ONCE(__netif_get_rx_queue(dev, rxq_idx)->lease); + return false; +} + +/* See also page_pool_is_unreadable() */ +bool netif_rxq_has_unreadable_mp(struct net_device *dev, unsigned int rxq_idx) +{ + if (rxq_idx < dev->real_num_rx_queues) + return __netif_get_rx_queue(dev, rxq_idx)->mp_params.mp_ops; + return false; } EXPORT_SYMBOL(netif_rxq_has_unreadable_mp); +bool netif_rxq_has_mp(struct net_device *dev, unsigned int rxq_idx) +{ + if (rxq_idx < dev->real_num_rx_queues) + return __netif_get_rx_queue(dev, rxq_idx)->mp_params.mp_priv; + return false; +} + static int netdev_rx_queue_reconfig(struct net_device *dev, unsigned int rxq_idx, struct netdev_queue_config *qcfg_old, From 21d58b35e500ae099188c1be8398442733bc0d89 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 3 Apr 2026 01:10:20 +0200 Subject: [PATCH 1291/1561] net: Add lease info to queue-get response Populate nested lease info to the queue-get response that returns the ifindex, queue id with type and optionally netns id if the device resides in a different netns. Example with ynl client when using AF_XDP via queue leasing: # ip a [...] 4: enp10s0f0np0: mtu 1500 xdp/id:24 qdisc mq state UP group default qlen 1000 link/ether e8:eb:d3:a3:43:f6 brd ff:ff:ff:ff:ff:ff inet 10.0.0.2/24 scope global enp10s0f0np0 valid_lft forever preferred_lft forever inet6 fe80::eaeb:d3ff:fea3:43f6/64 scope link proto kernel_ll valid_lft forever preferred_lft forever [...] # ethtool -i enp10s0f0np0 driver: mlx5_core [...] # ynl --family netdev --output-json --do queue-get \ --json '{"ifindex": 4, "id": 15, "type": "rx"}' {'id': 15, 'ifindex': 4, 'lease': {'ifindex': 8, 'netns-id': 0, 'queue': {'id': 1, 'type': 'rx'}}, 'napi-id': 8227, 'type': 'rx', 'xsk': {}} # ip netns list foo (id: 0) # ip netns exec foo ip a [...] 8: nk@NONE: mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff inet6 fe80::200:ff:fe00:0/64 scope link proto kernel_ll valid_lft forever preferred_lft forever [...] # ip netns exec foo ethtool -i nk driver: netkit [...] # ip netns exec foo ls /sys/class/net/nk/queues/ rx-0 rx-1 tx-0 # ip netns exec foo ynl --family netdev --output-json --do queue-get \ --json '{"ifindex": 8, "id": 1, "type": "rx"}' {"id": 1, "type": "rx", "ifindex": 8, "xsk": {}} Note that the caller of netdev_nl_queue_fill_one() holds the netdevice lock. For the queue-get we do not lock both devices. When queues get {un,}leased, both devices are locked, thus if __netif_get_rx_queue_lease() returns a lease pointer, it points to a valid device. The netns-id is fetched via peernet2id_alloc() similarly as done in OVS. Signed-off-by: Daniel Borkmann Co-developed-by: David Wei Signed-off-by: David Wei Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260402231031.447597-4-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- include/net/netdev_rx_queue.h | 14 ++++++++ net/core/netdev-genl.c | 66 ++++++++++++++++++++++++++++++++--- net/core/netdev_rx_queue.c | 54 ++++++++++++++++++++++++++++ 3 files changed, 130 insertions(+), 4 deletions(-) diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index 1d41c253f0a3..7e98c679ea84 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -67,6 +67,20 @@ get_netdev_rx_queue_index(struct netdev_rx_queue *queue) return index; } +enum netif_lease_dir { + NETIF_VIRT_TO_PHYS, + NETIF_PHYS_TO_VIRT, +}; + +struct netdev_rx_queue * +__netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq, + enum netif_lease_dir dir); + +struct netdev_rx_queue * +netif_get_rx_queue_lease_locked(struct net_device **dev, unsigned int *rxq); +void netif_put_rx_queue_lease_locked(struct net_device *orig_dev, + struct net_device *dev); + int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq); void netdev_rx_queue_lease(struct netdev_rx_queue *rxq_dst, struct netdev_rx_queue *rxq_src); diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 5d5e5b9a8af0..515832854251 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -386,12 +386,63 @@ static int nla_put_napi_id(struct sk_buff *skb, const struct napi_struct *napi) return 0; } +static int +netdev_nl_queue_fill_lease(struct sk_buff *rsp, struct net_device *netdev, + u32 q_idx, u32 q_type) +{ + struct net_device *orig_netdev = netdev; + struct nlattr *nest_lease, *nest_queue; + struct netdev_rx_queue *rxq; + struct net *net, *peer_net; + + rxq = __netif_get_rx_queue_lease(&netdev, &q_idx, + NETIF_PHYS_TO_VIRT); + if (!rxq || orig_netdev == netdev) + return 0; + + nest_lease = nla_nest_start(rsp, NETDEV_A_QUEUE_LEASE); + if (!nest_lease) + goto nla_put_failure; + + nest_queue = nla_nest_start(rsp, NETDEV_A_LEASE_QUEUE); + if (!nest_queue) + goto nla_put_failure; + if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx)) + goto nla_put_failure; + if (nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type)) + goto nla_put_failure; + nla_nest_end(rsp, nest_queue); + + if (nla_put_u32(rsp, NETDEV_A_LEASE_IFINDEX, + READ_ONCE(netdev->ifindex))) + goto nla_put_failure; + + rcu_read_lock(); + peer_net = dev_net_rcu(netdev); + net = dev_net_rcu(orig_netdev); + if (!net_eq(net, peer_net)) { + s32 id = peernet2id_alloc(net, peer_net, GFP_ATOMIC); + + if (nla_put_s32(rsp, NETDEV_A_LEASE_NETNS_ID, id)) + goto nla_put_failure_unlock; + } + rcu_read_unlock(); + nla_nest_end(rsp, nest_lease); + return 0; + +nla_put_failure_unlock: + rcu_read_unlock(); +nla_put_failure: + return -ENOMEM; +} + static int netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx, u32 q_type, const struct genl_info *info) { struct pp_memory_provider_params *params; - struct netdev_rx_queue *rxq; + struct net_device *orig_netdev = netdev; + struct netdev_rx_queue *rxq, *rxq_lease; struct netdev_queue *txq; void *hdr; @@ -409,17 +460,22 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, rxq = __netif_get_rx_queue(netdev, q_idx); if (nla_put_napi_id(rsp, rxq->napi)) goto nla_put_failure; + if (netdev_nl_queue_fill_lease(rsp, netdev, q_idx, q_type)) + goto nla_put_failure; + rxq_lease = netif_get_rx_queue_lease_locked(&netdev, &q_idx); + if (rxq_lease) + rxq = rxq_lease; params = &rxq->mp_params; if (params->mp_ops && params->mp_ops->nl_fill(params->mp_priv, rsp, rxq)) - goto nla_put_failure; + goto nla_put_failure_lease; #ifdef CONFIG_XDP_SOCKETS if (rxq->pool) if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK)) - goto nla_put_failure; + goto nla_put_failure_lease; #endif - + netif_put_rx_queue_lease_locked(orig_netdev, netdev); break; case NETDEV_QUEUE_TYPE_TX: txq = netdev_get_tx_queue(netdev, q_idx); @@ -437,6 +493,8 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, return 0; +nla_put_failure_lease: + netif_put_rx_queue_lease_locked(orig_netdev, netdev); nla_put_failure: genlmsg_cancel(rsp, hdr); return -EMSGSIZE; diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index a1f23c2c96d4..a4d8cad6db74 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -41,6 +41,60 @@ bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx) return false; } +/* Virtual devices eligible for leasing have no dev->dev.parent, while + * physical devices always have one. Use this to enforce the correct + * lease traversal direction. + */ +static bool netif_lease_dir_ok(const struct net_device *dev, + enum netif_lease_dir dir) +{ + if (dir == NETIF_VIRT_TO_PHYS && !dev->dev.parent) + return true; + if (dir == NETIF_PHYS_TO_VIRT && dev->dev.parent) + return true; + return false; +} + +struct netdev_rx_queue * +__netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq_idx, + enum netif_lease_dir dir) +{ + struct net_device *orig_dev = *dev; + struct netdev_rx_queue *rxq = __netif_get_rx_queue(orig_dev, *rxq_idx); + + if (rxq->lease) { + if (!netif_lease_dir_ok(orig_dev, dir)) + return NULL; + rxq = rxq->lease; + *rxq_idx = get_netdev_rx_queue_index(rxq); + *dev = rxq->dev; + } + return rxq; +} + +struct netdev_rx_queue * +netif_get_rx_queue_lease_locked(struct net_device **dev, unsigned int *rxq_idx) +{ + struct net_device *orig_dev = *dev; + struct netdev_rx_queue *rxq; + + /* Locking order is always from the virtual to the physical device + * see netdev_nl_queue_create_doit(). + */ + netdev_ops_assert_locked(orig_dev); + rxq = __netif_get_rx_queue_lease(dev, rxq_idx, NETIF_VIRT_TO_PHYS); + if (rxq && orig_dev != *dev) + netdev_lock(*dev); + return rxq; +} + +void netif_put_rx_queue_lease_locked(struct net_device *orig_dev, + struct net_device *dev) +{ + if (orig_dev != dev) + netdev_unlock(dev); +} + /* See also page_pool_is_unreadable() */ bool netif_rxq_has_unreadable_mp(struct net_device *dev, unsigned int rxq_idx) { From 22fdf28f7c03d3c130103ee77382c53d293f1732 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 3 Apr 2026 01:10:21 +0200 Subject: [PATCH 1292/1561] net, ethtool: Disallow leased real rxqs to be resized Similar to AF_XDP, do not allow queues in a physical netdev to be resized by ethtool -L when they are leased. Cover channel resize paths (both netlink and ioctl) to reject resizing when the queues would be affected. Given we need to have different checks for RX vs TX, detangle the code into a two-loop version rather than the range of new_combined + min(new_rx, new_tx) to old_combined + max(old_rx, old_tx). Signed-off-by: Daniel Borkmann Co-developed-by: David Wei Signed-off-by: David Wei Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260402231031.447597-5-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- net/ethtool/channels.c | 28 +++++++++++++++++----------- net/ethtool/ioctl.c | 21 ++++++++++++--------- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c index 45232cf1c144..64ef8cff2005 100644 --- a/net/ethtool/channels.c +++ b/net/ethtool/channels.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only -#include +#include #include "common.h" #include "netlink.h" @@ -109,7 +109,7 @@ ethnl_set_channels_validate(struct ethnl_req_info *req_info, static int ethnl_set_channels(struct ethnl_req_info *req_info, struct genl_info *info) { - unsigned int from_channel, old_total, i; + unsigned int old_combined, old_rx, old_tx, i; bool mod = false, mod_combined = false; struct net_device *dev = req_info->dev; struct ethtool_channels channels = {}; @@ -118,8 +118,9 @@ ethnl_set_channels(struct ethnl_req_info *req_info, struct genl_info *info) int ret; dev->ethtool_ops->get_channels(dev, &channels); - old_total = channels.combined_count + - max(channels.rx_count, channels.tx_count); + old_combined = channels.combined_count; + old_rx = channels.rx_count; + old_tx = channels.tx_count; ethnl_update_u32(&channels.rx_count, tb[ETHTOOL_A_CHANNELS_RX_COUNT], &mod); @@ -169,14 +170,19 @@ ethnl_set_channels(struct ethnl_req_info *req_info, struct genl_info *info) if (ret) return ret; - /* Disabling channels, query zero-copy AF_XDP sockets */ - from_channel = channels.combined_count + - min(channels.rx_count, channels.tx_count); - for (i = from_channel; i < old_total; i++) - if (xsk_get_pool_from_qid(dev, i)) { - GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing zerocopy AF_XDP sockets"); + /* ensure channels are not busy at the moment */ + for (i = channels.combined_count + channels.rx_count; + i < old_combined + old_rx; i++) { + if (netdev_queue_busy(dev, i, NETDEV_QUEUE_TYPE_RX, + info->extack)) return -EINVAL; - } + } + for (i = channels.combined_count + channels.tx_count; + i < old_combined + old_tx; i++) { + if (netdev_queue_busy(dev, i, NETDEV_QUEUE_TYPE_TX, + info->extack)) + return -EINVAL; + } ret = dev->ethtool_ops->set_channels(dev, &channels); return ret < 0 ? ret : 1; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 3c713a91ad0d..bd97f9b9bf18 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -27,12 +27,12 @@ #include #include #include +#include #include #include -#include #include #include -#include +#include #include "common.h" @@ -2250,7 +2250,6 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, void __user *useraddr) { struct ethtool_channels channels, curr = { .cmd = ETHTOOL_GCHANNELS }; - u16 from_channel, to_channel; unsigned int i; int ret; @@ -2284,13 +2283,17 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, if (ret) return ret; - /* Disabling channels, query zero-copy AF_XDP sockets */ - from_channel = channels.combined_count + - min(channels.rx_count, channels.tx_count); - to_channel = curr.combined_count + max(curr.rx_count, curr.tx_count); - for (i = from_channel; i < to_channel; i++) - if (xsk_get_pool_from_qid(dev, i)) + /* Disabling channels, query busy queues (AF_XDP, queue leasing) */ + for (i = channels.combined_count + channels.rx_count; + i < curr.combined_count + curr.rx_count; i++) { + if (netdev_queue_busy(dev, i, NETDEV_QUEUE_TYPE_RX, NULL)) return -EINVAL; + } + for (i = channels.combined_count + channels.tx_count; + i < curr.combined_count + curr.tx_count; i++) { + if (netdev_queue_busy(dev, i, NETDEV_QUEUE_TYPE_TX, NULL)) + return -EINVAL; + } ret = dev->ethtool_ops->set_channels(dev, &channels); if (!ret) From 1e91c98bc9a8ef8198e73151b2a118cd3748925d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 3 Apr 2026 01:10:22 +0200 Subject: [PATCH 1293/1561] net: Slightly simplify net_mp_{open,close}_rxq net_mp_open_rxq is currently not used in the tree as all callers are using __net_mp_open_rxq directly, and net_mp_close_rxq is only used once while all other locations use __net_mp_close_rxq. Consolidate into a single API, netif_mp_{open,close}_rxq, using the netif_ prefix to indicate that the caller is responsible for locking. Signed-off-by: Daniel Borkmann Co-developed-by: David Wei Signed-off-by: David Wei Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260402231031.447597-6-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- include/net/page_pool/memory_provider.h | 8 ++------ io_uring/zcrx.c | 9 ++++++--- net/core/devmem.c | 6 +++--- net/core/netdev_rx_queue.c | 23 ++--------------------- 4 files changed, 13 insertions(+), 33 deletions(-) diff --git a/include/net/page_pool/memory_provider.h b/include/net/page_pool/memory_provider.h index ada4f968960a..255ce4cfd975 100644 --- a/include/net/page_pool/memory_provider.h +++ b/include/net/page_pool/memory_provider.h @@ -23,14 +23,10 @@ bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr); void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov); void net_mp_niov_clear_page_pool(struct net_iov *niov); -int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, - struct pp_memory_provider_params *p); -int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, +int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, const struct pp_memory_provider_params *p, struct netlink_ext_ack *extack); -void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, - struct pp_memory_provider_params *old_p); -void __net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, +void netif_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, const struct pp_memory_provider_params *old_p); /** diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 62d693287457..d3ec63c83d0c 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -552,8 +552,11 @@ static void io_close_queue(struct io_zcrx_ifq *ifq) } if (netdev) { - if (ifq->if_rxq != -1) - net_mp_close_rxq(netdev, ifq->if_rxq, &p); + if (ifq->if_rxq != -1) { + netdev_lock(netdev); + netif_mp_close_rxq(netdev, ifq->if_rxq, &p); + netdev_unlock(netdev); + } netdev_put(netdev, &netdev_tracker); } ifq->if_rxq = -1; @@ -841,7 +844,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, mp_param.rx_page_size = 1U << ifq->niov_shift; mp_param.mp_ops = &io_uring_pp_zc_ops; mp_param.mp_priv = ifq; - ret = __net_mp_open_rxq(ifq->netdev, reg.if_rxq, &mp_param, NULL); + ret = netif_mp_open_rxq(ifq->netdev, reg.if_rxq, &mp_param, NULL); if (ret) goto netdev_put_unlock; netdev_unlock(ifq->netdev); diff --git a/net/core/devmem.c b/net/core/devmem.c index 69d79aee07ef..cde4c89bc146 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -145,7 +145,7 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) rxq_idx = get_netdev_rx_queue_index(rxq); - __net_mp_close_rxq(binding->dev, rxq_idx, &mp_params); + netif_mp_close_rxq(binding->dev, rxq_idx, &mp_params); } percpu_ref_kill(&binding->ref); @@ -163,7 +163,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, u32 xa_idx; int err; - err = __net_mp_open_rxq(dev, rxq_idx, &mp_params, extack); + err = netif_mp_open_rxq(dev, rxq_idx, &mp_params, extack); if (err) return err; @@ -176,7 +176,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, return 0; err_close_rxq: - __net_mp_close_rxq(dev, rxq_idx, &mp_params); + netif_mp_close_rxq(dev, rxq_idx, &mp_params); return err; } diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index a4d8cad6db74..06ac3bd5507f 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -200,7 +200,7 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx) } EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL"); -int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, +int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, const struct pp_memory_provider_params *p, struct netlink_ext_ack *extack) { @@ -264,18 +264,7 @@ err_clear_mp: return ret; } -int net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, - struct pp_memory_provider_params *p) -{ - int ret; - - netdev_lock(dev); - ret = __net_mp_open_rxq(dev, rxq_idx, p, NULL); - netdev_unlock(dev); - return ret; -} - -void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, +void netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, const struct pp_memory_provider_params *old_p) { struct netdev_queue_config qcfg[2]; @@ -305,11 +294,3 @@ void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, err = netdev_rx_queue_reconfig(dev, ifq_idx, &qcfg[0], &qcfg[1]); WARN_ON(err && err != -ENETDOWN); } - -void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, - struct pp_memory_provider_params *old_p) -{ - netdev_lock(dev); - __net_mp_close_rxq(dev, ifq_idx, old_p); - netdev_unlock(dev); -} From 5602ad61ebee99c83081fba1aaf5814736edc3e7 Mon Sep 17 00:00:00 2001 From: David Wei Date: Fri, 3 Apr 2026 01:10:23 +0200 Subject: [PATCH 1294/1561] net: Proxy netif_mp_{open,close}_rxq for leased queues When a process in a container wants to setup a memory provider, it will use the virtual netdev and a leased rxq, and call netif_mp_{open,close}_rxq to try and restart the queue. At this point, proxy the queue restart on the real rxq in the physical netdev. For memory providers (io_uring zero-copy rx and devmem), it causes the real rxq in the physical netdev to be filled from a memory provider that has DMA mapped memory from a process within a container. Signed-off-by: David Wei Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260402231031.447597-7-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- net/core/dev.c | 4 +- net/core/dev.h | 7 +++ net/core/netdev_rx_queue.c | 104 +++++++++++++++++++++++++++++++------ 3 files changed, 95 insertions(+), 20 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index cc7bcac892af..2df8a2a5ecf5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -12350,10 +12350,8 @@ static void dev_memory_provider_uninstall(struct net_device *dev) for (i = 0; i < dev->real_num_rx_queues; i++) { struct netdev_rx_queue *rxq = &dev->_rx[i]; - struct pp_memory_provider_params *p = &rxq->mp_params; - if (p->mp_ops && p->mp_ops->uninstall) - p->mp_ops->uninstall(rxq->mp_params.mp_priv, rxq); + __netif_mp_uninstall_rxq(rxq, &rxq->mp_params); } } diff --git a/net/core/dev.h b/net/core/dev.h index 6516ce2b5517..95edb2d4eff8 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -12,6 +12,7 @@ struct net; struct netlink_ext_ack; struct netdev_queue_config; struct cpumask; +struct pp_memory_provider_params; /* Random bits of netdevice that don't need to be exposed */ #define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */ @@ -101,6 +102,12 @@ int netdev_queue_config_validate(struct net_device *dev, int rxq_idx, bool netif_rxq_has_mp(struct net_device *dev, unsigned int rxq_idx); bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx); +void __netif_mp_uninstall_rxq(struct netdev_rx_queue *rxq, + const struct pp_memory_provider_params *p); + +void netif_rxq_cleanup_unlease(struct netdev_rx_queue *phys_rxq, + struct netdev_rx_queue *virt_rxq); + /* netdev management, shared between various uAPI entry points */ struct netdev_name_node { struct hlist_node hlist; diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index 06ac3bd5507f..1d6e7e47bf0a 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -28,6 +28,8 @@ void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst, netdev_assert_locked(rxq_dst->dev); netdev_assert_locked(rxq_src->dev); + netif_rxq_cleanup_unlease(rxq_src, rxq_dst); + WRITE_ONCE(rxq_src->lease, NULL); WRITE_ONCE(rxq_dst->lease, NULL); @@ -200,24 +202,15 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx) } EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL"); -int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, - const struct pp_memory_provider_params *p, - struct netlink_ext_ack *extack) +static int __netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, + const struct pp_memory_provider_params *p, + struct netlink_ext_ack *extack) { const struct netdev_queue_mgmt_ops *qops = dev->queue_mgmt_ops; struct netdev_queue_config qcfg[2]; struct netdev_rx_queue *rxq; int ret; - if (!netdev_need_ops_lock(dev)) - return -EOPNOTSUPP; - - if (rxq_idx >= dev->real_num_rx_queues) { - NL_SET_ERR_MSG(extack, "rx queue index out of range"); - return -ERANGE; - } - rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues); - if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { NL_SET_ERR_MSG(extack, "tcp-data-split is disabled"); return -EINVAL; @@ -264,16 +257,48 @@ err_clear_mp: return ret; } -void netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, - const struct pp_memory_provider_params *old_p) +int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, + const struct pp_memory_provider_params *p, + struct netlink_ext_ack *extack) +{ + struct net_device *orig_dev = dev; + int ret; + + if (!netdev_need_ops_lock(dev)) + return -EOPNOTSUPP; + + if (rxq_idx >= dev->real_num_rx_queues) { + NL_SET_ERR_MSG(extack, "rx queue index out of range"); + return -ERANGE; + } + rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues); + + if (!netif_rxq_is_leased(dev, rxq_idx)) + return __netif_mp_open_rxq(dev, rxq_idx, p, extack); + + if (!netif_get_rx_queue_lease_locked(&dev, &rxq_idx)) { + NL_SET_ERR_MSG(extack, "rx queue leased to a virtual netdev"); + return -EBUSY; + } + if (!dev->dev.parent) { + NL_SET_ERR_MSG(extack, "rx queue belongs to a virtual netdev"); + ret = -EOPNOTSUPP; + goto out; + } + + ret = __netif_mp_open_rxq(dev, rxq_idx, p, extack); +out: + netif_put_rx_queue_lease_locked(orig_dev, dev); + return ret; +} + +static void __netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, + const struct pp_memory_provider_params *old_p) { struct netdev_queue_config qcfg[2]; struct netdev_rx_queue *rxq; int err; - if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues)) - return; - rxq = __netif_get_rx_queue(dev, ifq_idx); /* Callers holding a netdev ref may get here after we already @@ -294,3 +319,48 @@ void netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, err = netdev_rx_queue_reconfig(dev, ifq_idx, &qcfg[0], &qcfg[1]); WARN_ON(err && err != -ENETDOWN); } + +void netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, + const struct pp_memory_provider_params *old_p) +{ + struct net_device *orig_dev = dev; + + if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues)) + return; + if (!netif_rxq_is_leased(dev, ifq_idx)) + return __netif_mp_close_rxq(dev, ifq_idx, old_p); + + if (WARN_ON_ONCE(!netif_get_rx_queue_lease_locked(&dev, &ifq_idx))) + return; + + __netif_mp_close_rxq(dev, ifq_idx, old_p); + netif_put_rx_queue_lease_locked(orig_dev, dev); +} + +void __netif_mp_uninstall_rxq(struct netdev_rx_queue *rxq, + const struct pp_memory_provider_params *p) +{ + if (p->mp_ops && p->mp_ops->uninstall) + p->mp_ops->uninstall(p->mp_priv, rxq); +} + +/* Clean up memory provider state when a queue lease is torn down. If + * a memory provider was installed on the physical queue via the lease, + * close it now. The memory provider is a property of the queue itself, + * and it was _guaranteed_ to be installed on the physical queue via + * the lease redirection. The extra __netif_mp_close_rxq is needed + * since the physical queue can outlive the virtual queue in the lease + * case, so it needs to be reconfigured to clear the memory provider. + */ +void netif_rxq_cleanup_unlease(struct netdev_rx_queue *phys_rxq, + struct netdev_rx_queue *virt_rxq) +{ + struct pp_memory_provider_params *p = &phys_rxq->mp_params; + unsigned int ifq_idx = get_netdev_rx_queue_index(phys_rxq); + + if (!p->mp_ops) + return; + + __netif_mp_uninstall_rxq(virt_rxq, p); + __netif_mp_close_rxq(phys_rxq->dev, ifq_idx, p); +} From 222b5566a02dbf136291376e4aa1806213fe9fa2 Mon Sep 17 00:00:00 2001 From: David Wei Date: Fri, 3 Apr 2026 01:10:24 +0200 Subject: [PATCH 1295/1561] net: Proxy netdev_queue_get_dma_dev for leased queues Extend netdev_queue_get_dma_dev to return the physical device of the real rxq for DMA in case the queue was leased. This allows memory providers like io_uring zero-copy or devmem to bind to the physically leased rxq via virtual devices such as netkit. Signed-off-by: David Wei Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260402231031.447597-8-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- include/net/netdev_queues.h | 4 +++- io_uring/zcrx.c | 3 ++- net/core/netdev-genl.c | 5 +++-- net/core/netdev_queues.c | 45 ++++++++++++++++++++++++++++--------- 4 files changed, 42 insertions(+), 15 deletions(-) diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 748b70552ed1..70c9fe9e83cc 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -380,7 +380,9 @@ static inline unsigned int netif_xmit_timeout_ms(struct netdev_queue *txq) get_desc, start_thrs); \ }) -struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx); +struct device *netdev_queue_get_dma_dev(struct net_device *dev, + unsigned int idx, + enum netdev_queue_type type); bool netdev_can_create_queue(const struct net_device *dev, struct netlink_ext_ack *extack); bool netdev_can_lease_queue(const struct net_device *dev, diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index d3ec63c83d0c..f4a7809ba0c2 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -829,7 +829,8 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, } netdev_hold(ifq->netdev, &ifq->netdev_tracker, GFP_KERNEL); - ifq->dev = netdev_queue_get_dma_dev(ifq->netdev, reg.if_rxq); + ifq->dev = netdev_queue_get_dma_dev(ifq->netdev, reg.if_rxq, + NETDEV_QUEUE_TYPE_RX); if (!ifq->dev) { ret = -EOPNOTSUPP; goto netdev_put_unlock; diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 515832854251..056460d01940 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -976,7 +976,8 @@ netdev_nl_get_dma_dev(struct net_device *netdev, unsigned long *rxq_bitmap, for_each_set_bit(rxq_idx, rxq_bitmap, netdev->real_num_rx_queues) { struct device *rxq_dma_dev; - rxq_dma_dev = netdev_queue_get_dma_dev(netdev, rxq_idx); + rxq_dma_dev = netdev_queue_get_dma_dev(netdev, rxq_idx, + NETDEV_QUEUE_TYPE_RX); if (dma_dev && rxq_dma_dev != dma_dev) { NL_SET_ERR_MSG_FMT(extack, "DMA device mismatch between queue %u and %u (multi-PF device?)", rxq_idx, prev_rxq_idx); @@ -1153,7 +1154,7 @@ int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info) goto err_unlock_netdev; } - dma_dev = netdev_queue_get_dma_dev(netdev, 0); + dma_dev = netdev_queue_get_dma_dev(netdev, 0, NETDEV_QUEUE_TYPE_TX); binding = net_devmem_bind_dmabuf(netdev, dma_dev, DMA_TO_DEVICE, dmabuf_fd, priv, info->extack); if (IS_ERR(binding)) { diff --git a/net/core/netdev_queues.c b/net/core/netdev_queues.c index 177401828e79..265161e12a9c 100644 --- a/net/core/netdev_queues.c +++ b/net/core/netdev_queues.c @@ -6,17 +6,8 @@ #include "dev.h" -/** - * netdev_queue_get_dma_dev() - get dma device for zero-copy operations - * @dev: net_device - * @idx: queue index - * - * Get dma device for zero-copy operations to be used for this queue. - * When such device is not available or valid, the function will return NULL. - * - * Return: Device or NULL on error - */ -struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx) +static struct device * +__netdev_queue_get_dma_dev(struct net_device *dev, unsigned int idx) { const struct netdev_queue_mgmt_ops *queue_ops = dev->queue_mgmt_ops; struct device *dma_dev; @@ -29,6 +20,38 @@ struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx) return dma_dev && dma_dev->dma_mask ? dma_dev : NULL; } +/** + * netdev_queue_get_dma_dev() - get dma device for zero-copy operations + * @dev: net_device + * @idx: queue index + * @type: queue type (RX or TX) + * + * Get dma device for zero-copy operations to be used for this queue. If + * the queue is an RX queue leased from a physical queue, we retrieve the + * physical queue's dma device. When the dma device is not available or + * valid, the function will return NULL. + * + * Return: Device or NULL on error + */ +struct device *netdev_queue_get_dma_dev(struct net_device *dev, + unsigned int idx, + enum netdev_queue_type type) +{ + struct net_device *orig_dev = dev; + struct device *dma_dev; + + /* Only RX side supports queue leasing today. */ + if (type != NETDEV_QUEUE_TYPE_RX || !netif_rxq_is_leased(dev, idx)) + return __netdev_queue_get_dma_dev(dev, idx); + + if (!netif_get_rx_queue_lease_locked(&dev, &idx)) + return NULL; + + dma_dev = __netdev_queue_get_dma_dev(dev, idx); + netif_put_rx_queue_lease_locked(orig_dev, dev); + return dma_dev; +} + bool netdev_can_create_queue(const struct net_device *dev, struct netlink_ext_ack *extack) { From 9368397fb92ac95a0495cd73b5e3194ade6b883d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 3 Apr 2026 01:10:25 +0200 Subject: [PATCH 1296/1561] xsk: Extend xsk_rcv_check validation xsk_rcv_check tests for inbound packets to see whether they match the bound AF_XDP socket. Refactor the test into a small helper xsk_dev_queue_valid and move the validation against xs->dev and xs->queue_id there. The fast-path case stays in place and allows for quick return in xsk_dev_queue_valid. If it fails, the validation is extended to check whether the AF_XDP socket is bound against a leased queue, and if so, the test is redone. Signed-off-by: Daniel Borkmann Co-developed-by: David Wei Signed-off-by: David Wei Acked-by: Stanislav Fomichev Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260402231031.447597-9-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- net/xdp/xsk.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 6149f6a79897..d638d7dbd7ed 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -330,14 +330,37 @@ static bool xsk_is_bound(struct xdp_sock *xs) return false; } +static bool xsk_dev_queue_valid(const struct xdp_sock *xs, + const struct xdp_rxq_info *info) +{ + struct net_device *dev = xs->dev; + u32 queue_index = xs->queue_id; + struct netdev_rx_queue *rxq; + + if (info->dev == dev && + info->queue_index == queue_index) + return true; + + if (queue_index < dev->real_num_rx_queues) { + rxq = READ_ONCE(__netif_get_rx_queue(dev, queue_index)->lease); + if (!rxq) + return false; + + dev = rxq->dev; + queue_index = get_netdev_rx_queue_index(rxq); + + return info->dev == dev && + info->queue_index == queue_index; + } + return false; +} + static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) { if (!xsk_is_bound(xs)) return -ENXIO; - - if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) + if (!xsk_dev_queue_valid(xs, xdp->rxq)) return -EINVAL; - if (len > xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) { xs->rx_dropped++; return -ENOSPC; From 910f636db958b65c03eb2ea6f2f93c8d426c6066 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 3 Apr 2026 01:10:26 +0200 Subject: [PATCH 1297/1561] xsk: Proxy pool management for leased queues Similarly to the netif_mp_{open,close}_rxq handling for leased queues, proxy the xsk_{reg,clear}_pool_at_qid via netif_get_rx_queue_lease_locked such that in case a virtual netdev picked a leased rxq, the request gets through to the real rxq in the physical netdev. The proxying is only relevant for queue_id < dev->real_num_rx_queues since right now it's only supported for rxqs. Signed-off-by: Daniel Borkmann Co-developed-by: David Wei Signed-off-by: David Wei Acked-by: Stanislav Fomichev Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260402231031.447597-10-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- net/xdp/xsk.c | 47 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index d638d7dbd7ed..fe1c7899455e 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -23,6 +23,8 @@ #include #include #include + +#include #include #include #include @@ -117,10 +119,18 @@ EXPORT_SYMBOL(xsk_get_pool_from_qid); void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id) { - if (queue_id < dev->num_rx_queues) - dev->_rx[queue_id].pool = NULL; - if (queue_id < dev->num_tx_queues) - dev->_tx[queue_id].pool = NULL; + struct net_device *orig_dev = dev; + unsigned int id = queue_id; + + if (id < dev->real_num_rx_queues) + WARN_ON_ONCE(!netif_get_rx_queue_lease_locked(&dev, &id)); + + if (id < dev->num_rx_queues) + dev->_rx[id].pool = NULL; + if (id < dev->num_tx_queues) + dev->_tx[id].pool = NULL; + + netif_put_rx_queue_lease_locked(orig_dev, dev); } /* The buffer pool is stored both in the _rx struct and the _tx struct as we do @@ -130,17 +140,30 @@ void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id) int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool, u16 queue_id) { - if (queue_id >= max_t(unsigned int, - dev->real_num_rx_queues, - dev->real_num_tx_queues)) + struct net_device *orig_dev = dev; + unsigned int id = queue_id; + int ret = 0; + + if (id >= max(dev->real_num_rx_queues, + dev->real_num_tx_queues)) return -EINVAL; - if (queue_id < dev->real_num_rx_queues) - dev->_rx[queue_id].pool = pool; - if (queue_id < dev->real_num_tx_queues) - dev->_tx[queue_id].pool = pool; + if (id < dev->real_num_rx_queues) { + if (!netif_get_rx_queue_lease_locked(&dev, &id)) + return -EBUSY; + if (xsk_get_pool_from_qid(dev, id)) { + ret = -EBUSY; + goto out; + } + } - return 0; + if (id < dev->real_num_rx_queues) + dev->_rx[id].pool = pool; + if (id < dev->real_num_tx_queues) + dev->_tx[id].pool = pool; +out: + netif_put_rx_queue_lease_locked(orig_dev, dev); + return ret; } static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff_xsk *xskb, u32 len, From 48103896053828a8b4d25839a39aa8514071914a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 3 Apr 2026 01:10:27 +0200 Subject: [PATCH 1298/1561] netkit: Add single device mode for netkit Add a single device mode for netkit instead of netkit pairs. The primary target for the paired devices is to connect network namespaces, of course, and support has been implemented in projects like Cilium [0]. For the rxq leasing the plan is to support two main scenarios related to single device mode: * For the use-case of io_uring zero-copy, the control plane can either set up a netkit pair where the peer device can perform rxq leasing which is then tied to the lifetime of the peer device, or the control plane can use a regular netkit pair to connect the hostns to a Pod/container and dynamically add/remove rxq leasing through a single device without having to interrupt the device pair. In the case of io_uring, the memory pool is used as skb non-linear pages, and thus the skb will go its way through the regular stack into netkit. Things like the netkit policy when no BPF is attached or skb scrubbing etc apply as-is in case the paired devices are used, or if the backend memory is tied to the single device and traffic goes through a paired device. * For the use-case of AF_XDP, the control plane needs to use netkit in the single device mode. The single device mode currently enforces only a pass policy when no BPF is attached, and does not yet support BPF link attachments for AF_XDP. skbs sent to that device get dropped at the moment. Given AF_XDP operates at a lower layer of the stack tying this to the netkit pair did not make sense. In future, the plan is to allow BPF at the XDP layer which can: i) process traffic coming from the AF_XDP application (e.g. QEMU with AF_XDP backend) to filter egress traffic or to push selected egress traffic up to the single netkit device to the local stack (e.g. DHCP requests), and ii) vice-versa skbs sent to the single netkit into the AF_XDP application (e.g. DHCP replies). Also, the control-plane can dynamically manage rxq leasing for the single netkit device without having to interrupt (e.g. down/up cycle) the main netkit pair for the Pod which has traffic going in and out. Signed-off-by: Daniel Borkmann Co-developed-by: David Wei Signed-off-by: David Wei Reviewed-by: Jordan Rife Reviewed-by: Nikolay Aleksandrov Link: https://docs.cilium.io/en/stable/operations/performance/tuning/#netkit-device-mode [0] Link: https://patch.msgid.link/20260402231031.447597-11-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt-link.yaml | 11 ++ drivers/net/netkit.c | 131 ++++++++++++++--------- include/uapi/linux/if_link.h | 6 ++ 3 files changed, 99 insertions(+), 49 deletions(-) diff --git a/Documentation/netlink/specs/rt-link.yaml b/Documentation/netlink/specs/rt-link.yaml index df4b56beb818..fcb5aaf0926f 100644 --- a/Documentation/netlink/specs/rt-link.yaml +++ b/Documentation/netlink/specs/rt-link.yaml @@ -825,6 +825,13 @@ definitions: entries: - name: none - name: default + - + name: netkit-pairing + type: enum + enum-name: netkit-pairing + entries: + - name: pair + - name: single - name: ovpn-mode enum-name: ovpn-mode @@ -2299,6 +2306,10 @@ attribute-sets: - name: tailroom type: u16 + - + name: pairing + type: u32 + enum: netkit-pairing - name: linkinfo-ovpn-attrs name-prefix: ifla-ovpn- diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index 5c0e01396e06..96c098a6db0d 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -26,6 +26,7 @@ struct netkit { __cacheline_group_begin(netkit_slowpath); enum netkit_mode mode; + enum netkit_pairing pair; bool primary; u32 headroom; __cacheline_group_end(netkit_slowpath); @@ -135,6 +136,10 @@ static int netkit_open(struct net_device *dev) struct netkit *nk = netkit_priv(dev); struct net_device *peer = rtnl_dereference(nk->peer); + if (nk->pair == NETKIT_DEVICE_SINGLE) { + netif_carrier_on(dev); + return 0; + } if (!peer) return -ENOTCONN; if (peer->flags & IFF_UP) { @@ -194,16 +199,17 @@ static void netkit_set_headroom(struct net_device *dev, int headroom) rcu_read_lock(); peer = rcu_dereference(nk->peer); - if (unlikely(!peer)) - goto out; + if (!peer) { + nk->headroom = headroom; + dev->needed_headroom = headroom; + } else { + nk2 = netkit_priv(peer); + nk->headroom = headroom; + headroom = max(nk->headroom, nk2->headroom); - nk2 = netkit_priv(peer); - nk->headroom = headroom; - headroom = max(nk->headroom, nk2->headroom); - - peer->needed_headroom = headroom; - dev->needed_headroom = headroom; -out: + peer->needed_headroom = headroom; + dev->needed_headroom = headroom; + } rcu_read_unlock(); } @@ -335,15 +341,17 @@ static int netkit_new_link(struct net_device *dev, enum netkit_scrub scrub_prim = NETKIT_SCRUB_DEFAULT; enum netkit_scrub scrub_peer = NETKIT_SCRUB_DEFAULT; struct nlattr *peer_tb[IFLA_MAX + 1], **tbp, *attr; + enum netkit_pairing pair = NETKIT_DEVICE_PAIR; enum netkit_action policy_prim = NETKIT_PASS; enum netkit_action policy_peer = NETKIT_PASS; + bool seen_peer = false, seen_scrub = false; struct nlattr **data = params->data; enum netkit_mode mode = NETKIT_L3; unsigned char ifname_assign_type; struct nlattr **tb = params->tb; u16 headroom = 0, tailroom = 0; struct ifinfomsg *ifmp = NULL; - struct net_device *peer; + struct net_device *peer = NULL; char ifname[IFNAMSIZ]; struct netkit *nk; int err; @@ -380,6 +388,13 @@ static int netkit_new_link(struct net_device *dev, headroom = nla_get_u16(data[IFLA_NETKIT_HEADROOM]); if (data[IFLA_NETKIT_TAILROOM]) tailroom = nla_get_u16(data[IFLA_NETKIT_TAILROOM]); + if (data[IFLA_NETKIT_PAIRING]) + pair = nla_get_u32(data[IFLA_NETKIT_PAIRING]); + + seen_scrub = data[IFLA_NETKIT_SCRUB]; + seen_peer = data[IFLA_NETKIT_PEER_INFO] || + data[IFLA_NETKIT_PEER_SCRUB] || + data[IFLA_NETKIT_PEER_POLICY]; } if (ifmp && tbp[IFLA_IFNAME]) { @@ -392,45 +407,47 @@ static int netkit_new_link(struct net_device *dev, if (mode != NETKIT_L2 && (tb[IFLA_ADDRESS] || tbp[IFLA_ADDRESS])) return -EOPNOTSUPP; + if (pair == NETKIT_DEVICE_SINGLE && + (tb != tbp || seen_peer || seen_scrub || + policy_prim != NETKIT_PASS)) + return -EOPNOTSUPP; - peer = rtnl_create_link(peer_net, ifname, ifname_assign_type, - &netkit_link_ops, tbp, extack); - if (IS_ERR(peer)) - return PTR_ERR(peer); + if (pair == NETKIT_DEVICE_PAIR) { + peer = rtnl_create_link(peer_net, ifname, ifname_assign_type, + &netkit_link_ops, tbp, extack); + if (IS_ERR(peer)) + return PTR_ERR(peer); - netif_inherit_tso_max(peer, dev); - if (headroom) { - peer->needed_headroom = headroom; - dev->needed_headroom = headroom; + netif_inherit_tso_max(peer, dev); + if (headroom) + peer->needed_headroom = headroom; + if (tailroom) + peer->needed_tailroom = tailroom; + if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS])) + eth_hw_addr_random(peer); + if (ifmp && dev->ifindex) + peer->ifindex = ifmp->ifi_index; + + nk = netkit_priv(peer); + nk->primary = false; + nk->policy = policy_peer; + nk->scrub = scrub_peer; + nk->mode = mode; + nk->pair = pair; + nk->headroom = headroom; + bpf_mprog_bundle_init(&nk->bundle); + + err = register_netdevice(peer); + if (err < 0) + goto err_register_peer; + netif_carrier_off(peer); + if (mode == NETKIT_L2) + dev_change_flags(peer, peer->flags & ~IFF_NOARP, NULL); + + err = rtnl_configure_link(peer, NULL, 0, NULL); + if (err < 0) + goto err_configure_peer; } - if (tailroom) { - peer->needed_tailroom = tailroom; - dev->needed_tailroom = tailroom; - } - - if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS])) - eth_hw_addr_random(peer); - if (ifmp && dev->ifindex) - peer->ifindex = ifmp->ifi_index; - - nk = netkit_priv(peer); - nk->primary = false; - nk->policy = policy_peer; - nk->scrub = scrub_peer; - nk->mode = mode; - nk->headroom = headroom; - bpf_mprog_bundle_init(&nk->bundle); - - err = register_netdevice(peer); - if (err < 0) - goto err_register_peer; - netif_carrier_off(peer); - if (mode == NETKIT_L2) - dev_change_flags(peer, peer->flags & ~IFF_NOARP, NULL); - - err = rtnl_configure_link(peer, NULL, 0, NULL); - if (err < 0) - goto err_configure_peer; if (mode == NETKIT_L2 && !tb[IFLA_ADDRESS]) eth_hw_addr_random(dev); @@ -438,12 +455,17 @@ static int netkit_new_link(struct net_device *dev, nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); else strscpy(dev->name, "nk%d", IFNAMSIZ); + if (headroom) + dev->needed_headroom = headroom; + if (tailroom) + dev->needed_tailroom = tailroom; nk = netkit_priv(dev); nk->primary = true; nk->policy = policy_prim; nk->scrub = scrub_prim; nk->mode = mode; + nk->pair = pair; nk->headroom = headroom; bpf_mprog_bundle_init(&nk->bundle); @@ -455,10 +477,12 @@ static int netkit_new_link(struct net_device *dev, dev_change_flags(dev, dev->flags & ~IFF_NOARP, NULL); rcu_assign_pointer(netkit_priv(dev)->peer, peer); - rcu_assign_pointer(netkit_priv(peer)->peer, dev); + if (peer) + rcu_assign_pointer(netkit_priv(peer)->peer, dev); return 0; err_configure_peer: - unregister_netdevice(peer); + if (peer) + unregister_netdevice(peer); return err; err_register_peer: free_netdev(peer); @@ -518,6 +542,8 @@ static struct net_device *netkit_dev_fetch(struct net *net, u32 ifindex, u32 whi nk = netkit_priv(dev); if (!nk->primary) return ERR_PTR(-EACCES); + if (nk->pair == NETKIT_DEVICE_SINGLE) + return ERR_PTR(-EOPNOTSUPP); if (which == BPF_NETKIT_PEER) { dev = rcu_dereference_rtnl(nk->peer); if (!dev) @@ -879,6 +905,7 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[], { IFLA_NETKIT_PEER_INFO, "peer info" }, { IFLA_NETKIT_HEADROOM, "headroom" }, { IFLA_NETKIT_TAILROOM, "tailroom" }, + { IFLA_NETKIT_PAIRING, "pairing" }, }; if (!nk->primary) { @@ -898,9 +925,11 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[], } if (data[IFLA_NETKIT_POLICY]) { + err = -EOPNOTSUPP; attr = data[IFLA_NETKIT_POLICY]; policy = nla_get_u32(attr); - err = netkit_check_policy(policy, attr, extack); + if (nk->pair == NETKIT_DEVICE_PAIR) + err = netkit_check_policy(policy, attr, extack); if (err) return err; WRITE_ONCE(nk->policy, policy); @@ -931,6 +960,7 @@ static size_t netkit_get_size(const struct net_device *dev) nla_total_size(sizeof(u8)) + /* IFLA_NETKIT_PRIMARY */ nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_HEADROOM */ nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_TAILROOM */ + nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PAIRING */ 0; } @@ -951,6 +981,8 @@ static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev) return -EMSGSIZE; if (nla_put_u16(skb, IFLA_NETKIT_TAILROOM, dev->needed_tailroom)) return -EMSGSIZE; + if (nla_put_u32(skb, IFLA_NETKIT_PAIRING, nk->pair)) + return -EMSGSIZE; if (peer) { nk = netkit_priv(peer); @@ -972,6 +1004,7 @@ static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = { [IFLA_NETKIT_TAILROOM] = { .type = NLA_U16 }, [IFLA_NETKIT_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT), [IFLA_NETKIT_PEER_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT), + [IFLA_NETKIT_PAIRING] = NLA_POLICY_MAX(NLA_U32, NETKIT_DEVICE_SINGLE), [IFLA_NETKIT_PRIMARY] = { .type = NLA_REJECT, .reject_message = "Primary attribute is read-only" }, }; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 83a96c56b8ca..280bb1780512 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1296,6 +1296,11 @@ enum netkit_mode { NETKIT_L3, }; +enum netkit_pairing { + NETKIT_DEVICE_PAIR, + NETKIT_DEVICE_SINGLE, +}; + /* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to * the BPF program if attached. This also means the latter can * consume the two fields if they were populated earlier. @@ -1320,6 +1325,7 @@ enum { IFLA_NETKIT_PEER_SCRUB, IFLA_NETKIT_HEADROOM, IFLA_NETKIT_TAILROOM, + IFLA_NETKIT_PAIRING, __IFLA_NETKIT_MAX, }; #define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1) From b789acc0695cc273736ada06aac5f3c830af39e1 Mon Sep 17 00:00:00 2001 From: David Wei Date: Fri, 3 Apr 2026 01:10:28 +0200 Subject: [PATCH 1299/1561] netkit: Implement rtnl_link_ops->alloc and ndo_queue_create Implement rtnl_link_ops->alloc that allows the number of rx queues to be set when netkit is created. By default, netkit has only a single rxq (and single txq). The number of queues is deliberately not allowed to be changed via ethtool -L and is fixed for the lifetime of a netkit instance. For netkit device creation, numrxqueues with larger than one rxq can be specified. These rxqs are leasable to real rxqs in physical netdevs: ip link add type netkit peer numrxqueues 64 # for device pair ip link add numrxqueues 64 type netkit single # for single device The limit of numrxqueues for netkit is currently set to 1024, which allows leasing multiple real rxqs from physical netdevs. The implementation of ndo_queue_create() adds a new rxq during the queue lease operation. We allow to create queues either in single device mode or for the case of dual device mode for the netkit peer device which gets placed into the target network namespace. For dual device mode the lease against the primary device does not make sense for the targeted use cases, and therefore gets rejected. We also need to add a lockdep class for netkit, such that lockdep does not trip over us, similarly done as in commit 0bef512012b1 ("net: add netdev_lockdep_set_classes() to virtual drivers"). This is also the last missing bit to netkit for supporting io_uring with zero-copy mode [0]. Up until this point it was not possible to consume the latter out of containers or Kubernetes Pods where applications are in their own network namespace. io_uring example with eth0 being a physical device with 16 queues where netkit is bound to the last queue, iou-zcrx.c is binary from selftests; ethtool configuration (tcp-data-split, hds_thresh, RSS, flow steering) is done on the physical device by the control plane; here, flow steering to that queue is based on the service VIP:port of the server utilizing io_uring: # ethtool -X eth0 start 0 equal 15 # ethtool -X eth0 start 15 equal 1 context new # ethtool --config-ntuple eth0 flow-type tcp4 dst-ip 1.2.3.4 dst-port 5000 action 15 # ip netns add foo # ip link add type netkit peer numrxqueues 2 # ynl --family netdev --output-json --do queue-create \ --json "{"ifindex": $(ifindex nk0), "type": "rx", \ "lease": { "ifindex": $(ifindex eth0), \ "queue": { "type": "rx", "id": 15 } } }" {'id': 1} # ip link set nk0 netns foo # ip link set nk1 up # ip netns exec foo ip link set lo up # ip netns exec foo ip link set nk0 up # ip netns exec foo ip addr add 1.2.3.4/32 dev nk0 [ ... setup routing etc to get external traffic into the netns ... ] # ip netns exec foo ./iou-zcrx -s -p 5000 -i nk0 -q 1 Remote io_uring client: # ./iou-zcrx -c -h 1.2.3.4 -p 5000 -l 12840 -z 65536 We have tested the above against a Broadcom BCM957504 (bnxt_en) 100G NIC, supporting TCP header/data split. Similarly, this also works for devmem which we tested using ncdevmem: # ip netns exec foo ./ncdevmem -s 1.2.3.4 -l -p 5000 -f nk0 -t 1 -q 1 And on the remote client: # ./ncdevmem -s 1.2.3.4 -p 5000 -f eth0 For Cilium, the plan is to open up support for the various memory providers for regular Kubernetes Pods when Cilium is configured with netkit datapath mode. Signed-off-by: David Wei Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Link: https://kernel-recipes.org/en/2024/schedule/efficient-zero-copy-networking-using-io_uring [0] Link: https://patch.msgid.link/20260402231031.447597-12-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- drivers/net/netkit.c | 126 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 114 insertions(+), 12 deletions(-) diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index 96c098a6db0d..b22bd0b6508a 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -9,11 +9,20 @@ #include #include +#include +#include +#include #include #include #include -#define DRV_NAME "netkit" +#define NETKIT_DRV_NAME "netkit" + +#define NETKIT_NUM_RX_QUEUES_MAX 1024 +#define NETKIT_NUM_TX_QUEUES_MAX 1 + +#define NETKIT_NUM_RX_QUEUES_REAL 1 +#define NETKIT_NUM_TX_QUEUES_REAL 1 struct netkit { __cacheline_group_begin(netkit_fastpath); @@ -37,6 +46,8 @@ struct netkit_link { struct net_device *dev; }; +static struct rtnl_link_ops netkit_link_ops; + static __always_inline int netkit_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb, enum netkit_action ret) @@ -225,9 +236,16 @@ static void netkit_get_stats(struct net_device *dev, stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped); } +static int netkit_init(struct net_device *dev) +{ + netdev_lockdep_set_classes(dev); + return 0; +} + static void netkit_uninit(struct net_device *dev); static const struct net_device_ops netkit_netdev_ops = { + .ndo_init = netkit_init, .ndo_open = netkit_open, .ndo_stop = netkit_close, .ndo_start_xmit = netkit_xmit, @@ -244,13 +262,96 @@ static const struct net_device_ops netkit_netdev_ops = { static void netkit_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strscpy(info->driver, DRV_NAME, sizeof(info->driver)); + strscpy(info->driver, NETKIT_DRV_NAME, sizeof(info->driver)); } static const struct ethtool_ops netkit_ethtool_ops = { .get_drvinfo = netkit_get_drvinfo, }; +static int netkit_queue_create(struct net_device *dev, + struct netlink_ext_ack *extack) +{ + struct netkit *nk = netkit_priv(dev); + u32 rxq_count_old, rxq_count_new; + int err; + + rxq_count_old = dev->real_num_rx_queues; + rxq_count_new = rxq_count_old + 1; + + /* In paired mode, only the non-primary (peer) device can + * create leased queues since the primary is the management + * side. In single device mode, leasing is always allowed. + */ + if (nk->pair == NETKIT_DEVICE_PAIR && nk->primary) { + NL_SET_ERR_MSG(extack, + "netkit can only lease against the peer device"); + return -EOPNOTSUPP; + } + + err = netif_set_real_num_rx_queues(dev, rxq_count_new); + if (err) { + if (rxq_count_new > dev->num_rx_queues) + NL_SET_ERR_MSG(extack, + "netkit maximum queue limit reached"); + else + NL_SET_ERR_MSG_FMT(extack, + "netkit cannot create more queues err=%d", err); + return err; + } + + return rxq_count_old; +} + +static const struct netdev_queue_mgmt_ops netkit_queue_mgmt_ops = { + .ndo_queue_create = netkit_queue_create, +}; + +static struct net_device *netkit_alloc(struct nlattr *tb[], + const char *ifname, + unsigned char name_assign_type, + unsigned int num_tx_queues, + unsigned int num_rx_queues) +{ + const struct rtnl_link_ops *ops = &netkit_link_ops; + struct net_device *dev; + + if (num_tx_queues > NETKIT_NUM_TX_QUEUES_MAX || + num_rx_queues > NETKIT_NUM_RX_QUEUES_MAX) + return ERR_PTR(-EOPNOTSUPP); + + dev = alloc_netdev_mqs(ops->priv_size, ifname, + name_assign_type, ops->setup, + num_tx_queues, num_rx_queues); + if (dev) { + dev->real_num_tx_queues = NETKIT_NUM_TX_QUEUES_REAL; + dev->real_num_rx_queues = NETKIT_NUM_RX_QUEUES_REAL; + } + return dev; +} + +static void netkit_queue_unlease(struct net_device *dev) +{ + struct netdev_rx_queue *rxq, *rxq_lease; + struct net_device *dev_lease; + int i; + + if (dev->real_num_rx_queues == 1) + return; + + netdev_lock(dev); + for (i = 1; i < dev->real_num_rx_queues; i++) { + rxq = __netif_get_rx_queue(dev, i); + rxq_lease = rxq->lease; + dev_lease = rxq_lease->dev; + + netdev_lock(dev_lease); + netdev_rx_queue_unlease(rxq, rxq_lease); + netdev_unlock(dev_lease); + } + netdev_unlock(dev); +} + static void netkit_setup(struct net_device *dev) { static const netdev_features_t netkit_features_hw_vlan = @@ -281,8 +382,9 @@ static void netkit_setup(struct net_device *dev) dev->priv_flags |= IFF_DISABLE_NETPOLL; dev->lltx = true; - dev->ethtool_ops = &netkit_ethtool_ops; - dev->netdev_ops = &netkit_netdev_ops; + dev->netdev_ops = &netkit_netdev_ops; + dev->ethtool_ops = &netkit_ethtool_ops; + dev->queue_mgmt_ops = &netkit_queue_mgmt_ops; dev->features |= netkit_features; dev->hw_features = netkit_features; @@ -331,8 +433,6 @@ static int netkit_validate(struct nlattr *tb[], struct nlattr *data[], return 0; } -static struct rtnl_link_ops netkit_link_ops; - static int netkit_new_link(struct net_device *dev, struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) @@ -870,6 +970,7 @@ static void netkit_release_all(struct net_device *dev) static void netkit_uninit(struct net_device *dev) { netkit_release_all(dev); + netkit_queue_unlease(dev); } static void netkit_del_link(struct net_device *dev, struct list_head *head) @@ -1010,8 +1111,9 @@ static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = { }; static struct rtnl_link_ops netkit_link_ops = { - .kind = DRV_NAME, + .kind = NETKIT_DRV_NAME, .priv_size = sizeof(struct netkit), + .alloc = netkit_alloc, .setup = netkit_setup, .newlink = netkit_new_link, .dellink = netkit_del_link, @@ -1025,7 +1127,7 @@ static struct rtnl_link_ops netkit_link_ops = { .maxtype = IFLA_NETKIT_MAX, }; -static __init int netkit_init(void) +static __init int netkit_mod_init(void) { BUILD_BUG_ON((int)NETKIT_NEXT != (int)TCX_NEXT || (int)NETKIT_PASS != (int)TCX_PASS || @@ -1035,16 +1137,16 @@ static __init int netkit_init(void) return rtnl_link_register(&netkit_link_ops); } -static __exit void netkit_exit(void) +static __exit void netkit_mod_exit(void) { rtnl_link_unregister(&netkit_link_ops); } -module_init(netkit_init); -module_exit(netkit_exit); +module_init(netkit_mod_init); +module_exit(netkit_mod_exit); MODULE_DESCRIPTION("BPF-programmable network device"); MODULE_AUTHOR("Daniel Borkmann "); MODULE_AUTHOR("Nikolay Aleksandrov "); MODULE_LICENSE("GPL"); -MODULE_ALIAS_RTNL_LINK(DRV_NAME); +MODULE_ALIAS_RTNL_LINK(NETKIT_DRV_NAME); From 25444470570b44da61366e307b3e54be653bf595 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 3 Apr 2026 01:10:29 +0200 Subject: [PATCH 1300/1561] netkit: Add netkit notifier to check for unregistering devices Add a netdevice notifier in netkit to watch for NETDEV_UNREGISTER events. If the target device is indeed NETREG_UNREGISTERING and previously leased a queue to a netkit device, then collect the related netkit devices and batch-unregister_netdevice_many() them. If this were not done, then the netkit device would hold a reference on the physical device preventing it from going away. However, in case of both io_uring zero-copy as well as AF_XDP this situation is handled gracefully and the allocated resources are torn down. In the case where mentioned infra is used through netkit, the applications have a reference on netkit, and netkit in turn holds a reference on the physical device. In order to have netkit release the reference on the physical device, we need such watcher to then unregister the netkit ones. This is generally quite similar to the dependency handling in case of tunnels (e.g. vxlan bound to a underlying netdev) where the tunnel device gets removed along with the physical device. # ip a [...] 4: enp10s0f0np0: mtu 1500 qdisc mq state DOWN group default qlen 1000 link/ether e8:eb:d3:a3:43:f6 brd ff:ff:ff:ff:ff:ff inet 10.0.0.2/24 scope global enp10s0f0np0 valid_lft forever preferred_lft forever [...] 8: nk@NONE: mtu 1500 qdisc noop state DOWN group default qlen 1000 link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff [...] # rmmod mlx5_ib # rmmod mlx5_core [...] [ 309.261822] mlx5_core 0000:0a:00.0 mlx5_0: Port: 1 Link DOWN [ 344.235236] mlx5_core 0000:0a:00.1: E-Switch: Unload vfs: mode(LEGACY), nvfs(0), necvfs(0), active vports(0) [ 344.246948] mlx5_core 0000:0a:00.1: E-Switch: Disable: mode(LEGACY), nvfs(0), necvfs(0), active vports(0) [ 344.463754] mlx5_core 0000:0a:00.1: E-Switch: Disable: mode(LEGACY), nvfs(0), necvfs(0), active vports(0) [ 344.770155] mlx5_core 0000:0a:00.1: E-Switch: cleanup [...] # ip a [...] [ both enp10s0f0np0 and nk gone ] [...] Signed-off-by: Daniel Borkmann Co-developed-by: David Wei Signed-off-by: David Wei Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260402231031.447597-13-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- drivers/net/netkit.c | 69 +++++++++++++++++++++++++++++++++++++-- include/linux/netdevice.h | 2 ++ net/core/dev.c | 6 ++++ 3 files changed, 75 insertions(+), 2 deletions(-) diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index b22bd0b6508a..1ec21aef348f 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -983,7 +983,15 @@ static void netkit_del_link(struct net_device *dev, struct list_head *head) if (peer) { nk = netkit_priv(peer); RCU_INIT_POINTER(nk->peer, NULL); - unregister_netdevice_queue(peer, head); + /* Guard against the peer already being in an unregister + * list (e.g. same-namespace teardown where the peer is + * in the caller's dev_kill_list). list_move_tail() on an + * already-queued device would otherwise corrupt that + * list's iteration. This situation can occur via netkit + * notifier, hence guard against this scenario. + */ + if (!unregister_netdevice_queued(peer)) + unregister_netdevice_queue(peer, head); } } @@ -1051,6 +1059,50 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[], return 0; } +static void netkit_check_lease_unregister(struct net_device *dev) +{ + LIST_HEAD(list_kill); + u32 q_idx; + + if (READ_ONCE(dev->reg_state) != NETREG_UNREGISTERING || + !dev->dev.parent) + return; + + netdev_lock_ops(dev); + for (q_idx = 0; q_idx < dev->real_num_rx_queues; q_idx++) { + struct net_device *tmp = dev; + struct netdev_rx_queue *rxq; + u32 tmp_q_idx = q_idx; + + rxq = __netif_get_rx_queue_lease(&tmp, &tmp_q_idx, + NETIF_PHYS_TO_VIRT); + if (rxq && tmp != dev && + tmp->netdev_ops == &netkit_netdev_ops) { + /* A single phys device can have multiple queues leased + * to one netkit device. We can only queue that netkit + * device once to the list_kill. Queues of that phys + * device can be leased with different individual netkit + * devices, hence we batch via list_kill. + */ + if (unregister_netdevice_queued(tmp)) + continue; + netkit_del_link(tmp, &list_kill); + } + } + netdev_unlock_ops(dev); + unregister_netdevice_many(&list_kill); +} + +static int netkit_notifier(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + if (event == NETDEV_UNREGISTER) + netkit_check_lease_unregister(dev); + return NOTIFY_DONE; +} + static size_t netkit_get_size(const struct net_device *dev) { return nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_POLICY */ @@ -1127,18 +1179,31 @@ static struct rtnl_link_ops netkit_link_ops = { .maxtype = IFLA_NETKIT_MAX, }; +static struct notifier_block netkit_netdev_notifier = { + .notifier_call = netkit_notifier, +}; + static __init int netkit_mod_init(void) { + int ret; + BUILD_BUG_ON((int)NETKIT_NEXT != (int)TCX_NEXT || (int)NETKIT_PASS != (int)TCX_PASS || (int)NETKIT_DROP != (int)TCX_DROP || (int)NETKIT_REDIRECT != (int)TCX_REDIRECT); - return rtnl_link_register(&netkit_link_ops); + ret = rtnl_link_register(&netkit_link_ops); + if (ret) + return ret; + ret = register_netdevice_notifier(&netkit_netdev_notifier); + if (ret) + rtnl_link_unregister(&netkit_link_ops); + return ret; } static __exit void netkit_mod_exit(void) { + unregister_netdevice_notifier(&netkit_netdev_notifier); rtnl_link_unregister(&netkit_link_ops); } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e8aa9cc4075d..47417b2d48a4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3420,6 +3420,8 @@ static inline int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) int register_netdevice(struct net_device *dev); void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); void unregister_netdevice_many(struct list_head *head); +bool unregister_netdevice_queued(const struct net_device *dev); + static inline void unregister_netdevice(struct net_device *dev) { unregister_netdevice_queue(dev, NULL); diff --git a/net/core/dev.c b/net/core/dev.c index 2df8a2a5ecf5..e7bc95cbd1fa 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -12384,6 +12384,12 @@ static void netif_close_many_and_unlock_cond(struct list_head *close_head) #endif } +bool unregister_netdevice_queued(const struct net_device *dev) +{ + ASSERT_RTNL(); + return !list_empty(&dev->unreg_list); +} + void unregister_netdevice_many_notify(struct list_head *head, u32 portid, const struct nlmsghdr *nlh) { From a14fd6474883871f0cb348db7b58688d9953c178 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 3 Apr 2026 01:10:30 +0200 Subject: [PATCH 1301/1561] netkit: Add xsk support for af_xdp applications Enable support for AF_XDP applications to operate on a netkit device. The goal is that AF_XDP applications can natively consume AF_XDP from network namespaces. The use-case from Cilium side is to support Kubernetes KubeVirt VMs through QEMU's AF_XDP backend. KubeVirt is a virtual machine management add-on for Kubernetes which aims to provide a common ground for virtualization. KubeVirt spawns the VMs inside Kubernetes Pods which reside in their own network namespace just like regular Pods. Raw QEMU AF_XDP backend example with eth0 being a physical device with 16 queues where netkit is bound to the last queue (for multi-queue RSS context can be used if supported by the driver): # ethtool -X eth0 start 0 equal 15 # ethtool -X eth0 start 15 equal 1 context new # ethtool --config-ntuple eth0 flow-type ether \ src 00:00:00:00:00:00 \ src-mask ff:ff:ff:ff:ff:ff \ dst $mac dst-mask 00:00:00:00:00:00 \ proto 0 proto-mask 0xffff action 15 [ ... setup BPF/XDP prog on eth0 to steer into shared xsk map ... ] # ip netns add foo # ip link add numrxqueues 2 nk type netkit single # ynl --family netdev --output-json --do queue-create \ --json "{"ifindex": $(ifindex nk), "type": "rx", \ "lease": { "ifindex": $(ifindex eth0), \ "queue": { "type": "rx", "id": 15 } } }" {'id': 1} # ip link set nk netns foo # ip netns exec foo ip link set lo up # ip netns exec foo ip link set nk up # ip netns exec foo qemu-system-x86_64 \ -kernel $kernel \ -drive file=${image_name},index=0,media=disk,format=raw \ -append "root=/dev/sda rw console=ttyS0" \ -cpu host \ -m $memory \ -enable-kvm \ -device virtio-net-pci,netdev=net0,mac=$mac \ -netdev af-xdp,ifname=nk,id=net0,mode=native,queues=1,start-queue=1,inhibit=on,map-path=$dir/xsks_map \ -nographic We have tested the above against a dual-port Nvidia ConnectX-6 (mlx5) 100G NIC with successful network connectivity out of QEMU. An earlier iteration of this work was presented at LSF/MM/BPF [0] and more recently at LPC [1]. For getting to a first starting point to connect all things with KubeVirt, bind mounting the xsk map from Cilium into the VM launcher Pod which acts as a regular Kubernetes Pod while not perfect, is not a big problem given its out of reach from the application sitting inside the VM (and some of the control plane aspects are baked in the launcher Pod already), so the isolation barrier is still the VM. Eventually the goal is to have a XDP/XSK redirect extension where there is no need to have the xsk map, and the BPF program can just derive the target xsk through the queue where traffic was received on. The exposure through netkit is because Cilium should not act as a proxy handing out xsk sockets. Existing applications expect a netdev from kernel side and should not need to rewrite just to implement against a CNI's protocol. Also, all the memory should not be accounted against Cilium but rather the application Pod itself which is consuming AF_XDP. Further, on up/downgrades we expect the data plane to being completely decoupled from the control plane; if Cilium would own the sockets that would be disruptive. Another use-case which opens up and is regularly asked from users would be to have DPDK applications on top of AF_XDP in regular Kubernetes Pods. Signed-off-by: Daniel Borkmann Co-developed-by: David Wei Signed-off-by: David Wei Reviewed-by: Nikolay Aleksandrov Link: https://bpfconf.ebpf.io/bpfconf2025/bpfconf2025_material/lsfmmbpf_2025_netkit_borkmann.pdf [0] Link: https://lpc.events/event/19/contributions/2275/ [1] Link: https://patch.msgid.link/20260402231031.447597-14-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- drivers/net/netkit.c | 86 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index 1ec21aef348f..5619209329d5 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -236,6 +237,86 @@ static void netkit_get_stats(struct net_device *dev, stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped); } +static bool netkit_xsk_supported_at_phys(const struct net_device *dev) +{ + if (!dev->netdev_ops->ndo_bpf || + !dev->netdev_ops->ndo_xdp_xmit || + !dev->netdev_ops->ndo_xsk_wakeup) + return false; + return true; +} + +static int netkit_xsk(struct net_device *dev, struct netdev_bpf *xdp) +{ + struct netkit *nk = netkit_priv(dev); + struct netdev_bpf xdp_lower; + struct netdev_rx_queue *rxq; + struct net_device *phys; + bool create = false; + int ret = -EBUSY; + + switch (xdp->command) { + case XDP_SETUP_XSK_POOL: + if (nk->pair == NETKIT_DEVICE_PAIR) + return -EOPNOTSUPP; + if (xdp->xsk.queue_id >= dev->real_num_rx_queues) + return -EINVAL; + + rxq = __netif_get_rx_queue(dev, xdp->xsk.queue_id); + if (!rxq->lease) + return -EOPNOTSUPP; + + phys = rxq->lease->dev; + if (!netkit_xsk_supported_at_phys(phys)) + return -EOPNOTSUPP; + + create = xdp->xsk.pool; + memcpy(&xdp_lower, xdp, sizeof(xdp_lower)); + xdp_lower.xsk.queue_id = get_netdev_rx_queue_index(rxq->lease); + break; + case XDP_SETUP_PROG: + return -EOPNOTSUPP; + default: + return -EINVAL; + } + + netdev_lock(phys); + if (create && + (phys->xdp_features & NETDEV_XDP_ACT_XSK) != NETDEV_XDP_ACT_XSK) { + ret = -EOPNOTSUPP; + goto out; + } + if (!create || !dev_get_min_mp_channel_count(phys)) + ret = phys->netdev_ops->ndo_bpf(phys, &xdp_lower); +out: + netdev_unlock(phys); + return ret; +} + +static int netkit_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) +{ + struct netdev_rx_queue *rxq, *rxq_lease; + struct net_device *phys; + + if (queue_id >= dev->real_num_rx_queues) + return -EINVAL; + + rxq = __netif_get_rx_queue(dev, queue_id); + rxq_lease = READ_ONCE(rxq->lease); + if (unlikely(!rxq_lease)) + return -EOPNOTSUPP; + + /* netkit_xsk already validated full xsk support, hence it's + * fine to call into ndo_xsk_wakeup right away given this + * was a prerequisite to get here in the first place. The + * phys xsk support cannot change without tearing down the + * device (which clears the lease first). + */ + phys = rxq_lease->dev; + return phys->netdev_ops->ndo_xsk_wakeup(phys, + get_netdev_rx_queue_index(rxq_lease), flags); +} + static int netkit_init(struct net_device *dev) { netdev_lockdep_set_classes(dev); @@ -256,6 +337,8 @@ static const struct net_device_ops netkit_netdev_ops = { .ndo_get_peer_dev = netkit_peer_dev, .ndo_get_stats64 = netkit_get_stats, .ndo_uninit = netkit_uninit, + .ndo_bpf = netkit_xsk, + .ndo_xsk_wakeup = netkit_xsk_wakeup, .ndo_features_check = passthru_features_check, }; @@ -569,6 +652,9 @@ static int netkit_new_link(struct net_device *dev, nk->headroom = headroom; bpf_mprog_bundle_init(&nk->bundle); + if (pair == NETKIT_DEVICE_SINGLE) + xdp_set_features_flag(dev, NETDEV_XDP_ACT_XSK); + err = register_netdevice(dev); if (err < 0) goto err_configure_peer; From 65d657d806848add1e1f0632562d7f47d5d5c188 Mon Sep 17 00:00:00 2001 From: David Wei Date: Fri, 3 Apr 2026 01:10:31 +0200 Subject: [PATCH 1302/1561] selftests/net: Add queue leasing tests with netkit Add extensive selftests for netkit queue leasing, using io_uring zero copy test binary inside of a netns with netkit. This checks that memory providers can be bound against virtual queues in a netkit within a netns that are leasing from a physical netdev in the default netns. Also add various test cases around corner cases for the queue creation itself as well as queue info dumping and teardown in case of netkit in device pair and single mode. Signed-off-by: David Wei Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Link: https://patch.msgid.link/20260402231031.447597-15-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- .../testing/selftests/drivers/net/hw/Makefile | 1 + .../drivers/net/hw/lib/py/__init__.py | 4 +- .../selftests/drivers/net/hw/nk_qlease.py | 1407 +++++++++++++++++ 3 files changed, 1410 insertions(+), 2 deletions(-) create mode 100755 tools/testing/selftests/drivers/net/hw/nk_qlease.py diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index deeca3f8d080..28d245e11bc4 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -35,6 +35,7 @@ TEST_PROGS = \ loopback.sh \ nic_timestamp.py \ nk_netns.py \ + nk_qlease.py \ pp_alloc_fail.py \ rss_api.py \ rss_ctx.py \ diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py index df4da5078c48..84a4dab6c649 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -20,7 +20,7 @@ try: # Import one by one to avoid pylint false positives from net.lib.py import NetNS, NetNSEnter, NetdevSimDev from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ - NlError, RtnlFamily, DevlinkFamily, PSPFamily + NlError, RtnlFamily, DevlinkFamily, PSPFamily, Netlink from net.lib.py import CmdExitFailure from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ fd_read_timeout, ip, rand_port, rand_ports, wait_port_listen, \ @@ -36,7 +36,7 @@ try: __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev", "EthtoolFamily", "NetdevFamily", "NetshaperFamily", - "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily", + "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily", "Netlink", "CmdExitFailure", "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool", "fd_read_timeout", "ip", "rand_port", "rand_ports", diff --git a/tools/testing/selftests/drivers/net/hw/nk_qlease.py b/tools/testing/selftests/drivers/net/hw/nk_qlease.py new file mode 100755 index 000000000000..2bc5ffe96c7d --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/nk_qlease.py @@ -0,0 +1,1407 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import errno +import re +import time +import threading +from os import path +from lib.py import ( + ksft_run, + ksft_exit, + ksft_eq, + ksft_ne, + ksft_in, + ksft_not_in, + ksft_raises, +) +from lib.py import ( + NetDrvContEnv, + NetNS, + NetNSEnter, + EthtoolFamily, + NetdevFamily, + RtnlFamily, + NetdevSimDev, +) +from lib.py import ( + NlError, + Netlink, + bkg, + cmd, + defer, + ethtool, + ip, + rand_port, + wait_port_listen, +) +from lib.py import KsftSkipEx, CmdExitFailure + + +def set_flow_rule(cfg): + output = ethtool( + f"-N {cfg.ifname} flow-type tcp6 dst-port {cfg.port} action {cfg.src_queue}" + ).stdout + values = re.search(r"ID (\d+)", output).group(1) + return int(values) + + +def create_netkit(rxqueues): + all_links = ip("-d link show", json=True) + old_idxs = { + link["ifindex"] + for link in all_links + if link.get("linkinfo", {}).get("info_kind") == "netkit" + } + + rtnl = RtnlFamily() + rtnl.newlink( + { + "linkinfo": { + "kind": "netkit", + "data": { + "mode": "l2", + "policy": "forward", + "peer-policy": "forward", + }, + }, + "num-rx-queues": rxqueues, + }, + flags=[Netlink.NLM_F_CREATE, Netlink.NLM_F_EXCL], + ) + + all_links = ip("-d link show", json=True) + nk_links = [ + link + for link in all_links + if link.get("linkinfo", {}).get("info_kind") == "netkit" + and link["ifindex"] not in old_idxs + ] + nk_links.sort(key=lambda x: x["ifindex"]) + return ( + nk_links[1]["ifname"], + nk_links[1]["ifindex"], + nk_links[0]["ifname"], + nk_links[0]["ifindex"], + ) + + +def create_netkit_single(rxqueues): + rtnl = RtnlFamily() + rtnl.newlink( + { + "linkinfo": { + "kind": "netkit", + "data": { + "mode": "l2", + "pairing": "single", + }, + }, + "num-rx-queues": rxqueues, + }, + flags=[Netlink.NLM_F_CREATE, Netlink.NLM_F_EXCL], + ) + + all_links = ip("-d link show", json=True) + nk_links = [ + link + for link in all_links + if link.get("linkinfo", {}).get("info_kind") == "netkit" + and "UP" not in link.get("flags", []) + ] + return nk_links[0]["ifname"], nk_links[0]["ifindex"] + + +def test_remove_phys(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + nk_queue_id = result["id"] + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["ifindex"], nk_guest_idx) + ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) + + nsimdev.remove() + time.sleep(0.1) + ret = cmd(f"ip link show dev {nk_host}", fail=False) + ksft_ne(ret.ret, 0) + + +def test_double_lease(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=3) + defer(cmd, f"ip link del dev {nk_host}") + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(result["id"], 1) + + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EBUSY) + + +def test_virtual_lessor(netns) -> None: + nk_host_a, _, nk_guest_a, nk_guest_a_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_a}") + ip(f"link set dev {nk_host_a} up") + ip(f"link set dev {nk_guest_a} up") + + nk_host_b, _, nk_guest_b, nk_guest_b_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_b}") + + ip(f"link set dev {nk_guest_b} netns {netns.name}") + ip(f"link set dev {nk_host_b} up") + ip(f"link set dev {nk_guest_b} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_b_idx, + "type": "rx", + "lease": { + "ifindex": nk_guest_a_idx, + "queue": {"id": 0, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_phys_lessee(_netns) -> None: + nsimdev_a = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev_a.remove) + nsim_a = nsimdev_a.nsims[0] + ip(f"link set dev {nsim_a.ifname} up") + + nsimdev_b = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev_b.remove) + nsim_b = nsimdev_b.nsims[0] + ip(f"link set dev {nsim_b.ifname} up") + + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nsim_a.ifindex, + "type": "rx", + "lease": { + "ifindex": nsim_b.ifindex, + "queue": {"id": 0, "type": "rx"}, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_different_lessors(netns) -> None: + nsimdev_a = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev_a.remove) + nsim_a = nsimdev_a.nsims[0] + ip(f"link set dev {nsim_a.ifname} up") + + nsimdev_b = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev_b.remove) + nsim_b = nsimdev_b.nsims[0] + ip(f"link set dev {nsim_b.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=3) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim_a.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim_b.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EOPNOTSUPP) + + +def test_queue_out_of_range(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 2, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.ERANGE) + + +def test_resize_leased(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + ethnl = EthtoolFamily() + with ksft_raises(NlError) as e: + ethnl.channels_set({"header": {"dev-index": nsim.ifindex}, "combined-count": 1}) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_self_lease(_netns) -> None: + nk_host, _, _, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nk_guest_idx, + "queue": {"id": 0, "type": "rx"}, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_veth_queue_create(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + ip("link add veth0 type veth peer name veth1") + defer(cmd, "ip link del dev veth0", fail=False) + + all_links = ip("-d link show", json=True) + veth_peer = [ + link + for link in all_links + if link.get("ifname") == "veth1" + ] + veth_peer_idx = veth_peer[0]["ifindex"] + + ip(f"link set dev veth1 netns {netns.name}") + ip("link set dev veth0 up") + ip("link set dev veth1 up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": veth_peer_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_create_tx_type(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "tx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_create_primary(_netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, nk_host_idx, _, _ = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_host} up") + + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_host_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EOPNOTSUPP) + + +def test_create_limit(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=1) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_link_flap_phys(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}") + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + nk_queue_id = result["id"] + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) + + # Link flap the physical device + ip(f"link set dev {nsim.ifname} down") + ip(f"link set dev {nsim.ifname} up") + + # Verify lease survives the flap + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) + + +def test_queue_get_virtual(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}") + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + nk_queue_id = result["id"] + + # queue-get on virtual device's leased queue should not show lease + # info (lease info is only shown from the physical device's side) + queue_info = netdevnl.queue_get( + {"ifindex": nk_guest_idx, "id": nk_queue_id, "type": "rx"} + ) + ksft_eq(queue_info["id"], nk_queue_id) + ksft_eq(queue_info["ifindex"], nk_guest_idx) + ksft_not_in("lease", queue_info) + + # Default queue (not leased) also has no lease info + queue_info = netdevnl.queue_get( + {"ifindex": nk_guest_idx, "id": 0, "type": "rx"} + ) + ksft_not_in("lease", queue_info) + + +def test_remove_virt_first(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(result["id"], 1) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], result["id"]) + + # Delete netkit (virtual device removed first, physical stays) + cmd(f"ip link del dev {nk_host}") + + # Verify lease is cleaned up on physical device + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_not_in("lease", queue_info) + + +def test_multiple_leases(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=3) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=4) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + r1 = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + r2 = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 2, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + ksft_eq(r1["id"], 1) + ksft_eq(r2["id"], 2) + + # Verify both leases visible on physical device + netdevnl = NetdevFamily() + q1 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + q2 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 2, "type": "rx"} + ) + ksft_in("lease", q1) + ksft_in("lease", q2) + ksft_eq(q1["lease"]["ifindex"], nk_guest_idx) + ksft_eq(q2["lease"]["ifindex"], nk_guest_idx) + ksft_eq(q1["lease"]["queue"]["id"], r1["id"]) + ksft_eq(q2["lease"]["queue"]["id"], r2["id"]) + + +def test_lease_queue_tx_type(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "tx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_invalid_netns(netns) -> None: + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": 1, + "queue": {"id": 0, "type": "rx"}, + "netns-id": 999, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.ENONET) + + +def test_invalid_phys_ifindex(netns) -> None: + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": 99999, + "queue": {"id": 0, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.ENODEV) + + +def test_multi_netkit_remove_phys(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=3) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + # Create two netkit pairs, each leasing a different physical queue + nk_host_a, _, nk_guest_a, nk_guest_a_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_a}", fail=False) + + nk_host_b, _, nk_guest_b, nk_guest_b_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_b}", fail=False) + + ip(f"link set dev {nk_guest_a} netns {netns.name}") + ip(f"link set dev {nk_host_a} up") + ip(f"link set dev {nk_guest_a} up", ns=netns) + + ip(f"link set dev {nk_guest_b} netns {netns.name}") + ip(f"link set dev {nk_host_b} up") + ip(f"link set dev {nk_guest_b} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_guest_a_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + netdevnl.queue_create( + { + "ifindex": nk_guest_b_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 2, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + # Removing the physical device should take down both netkit pairs + nsimdev.remove() + time.sleep(0.1) + ret = cmd(f"ip link show dev {nk_host_a}", fail=False) + ksft_ne(ret.ret, 0) + ret = cmd(f"ip link show dev {nk_host_b}", fail=False) + ksft_ne(ret.ret, 0) + + +def test_single_remove_phys(_netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_name, nk_idx = create_netkit_single(rxqueues=2) + defer(cmd, f"ip link del dev {nk_name}", fail=False) + + ip(f"link set dev {nk_name} up") + + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + }, + } + ) + + # Removing the physical device should take down the single netkit device + nsimdev.remove() + time.sleep(0.1) + ret = cmd(f"ip link show dev {nk_name}", fail=False) + ksft_ne(ret.ret, 0) + + +def test_link_flap_virt(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}") + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + nk_queue_id = result["id"] + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) + + # Link flap the virtual (netkit) device + ip(f"link set dev {nk_guest} down", ns=netns) + ip(f"link set dev {nk_guest} up", ns=netns) + + # Verify lease survives the virtual device flap + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) + + +def test_phys_queue_no_lease(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}") + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + # Physical queue 0 (not leased) should have no lease info + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 0, "type": "rx"} + ) + ksft_not_in("lease", queue_info) + + # Physical queue 1 (leased) should have lease info + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + ksft_in("lease", queue_info) + + +def test_same_ns_lease(_netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_name, nk_idx = create_netkit_single(rxqueues=2) + defer(cmd, f"ip link del dev {nk_name}", fail=False) + + ip(f"link set dev {nk_name} up") + + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + }, + } + ) + ksft_eq(result["id"], 1) + + # Same namespace: lease info should NOT have netns-id + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["ifindex"], nk_idx) + ksft_eq(queue_info["lease"]["queue"]["id"], result["id"]) + ksft_not_in("netns-id", queue_info["lease"]) + + +def test_resize_after_unlease(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + # Resize should fail while lease is active + ethnl = EthtoolFamily() + with ksft_raises(NlError) as e: + ethnl.channels_set({"header": {"dev-index": nsim.ifindex}, "combined-count": 1}) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + # Delete netkit, clearing the lease + cmd(f"ip link del dev {nk_host}") + + # Resize should now succeed + ethnl.channels_set({"header": {"dev-index": nsim.ifindex}, "combined-count": 1}) + + +def test_lease_queue_zero(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 0, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(result["id"], 1) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 0, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], result["id"]) + + +def test_release_and_reuse(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + src_queue = 1 + + # First lease + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + + # Delete netkit, freeing the lease + cmd(f"ip link del dev {nk_host}") + + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_not_in("lease", queue_info) + + # Re-create netkit and lease the same physical queue again + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(result["id"], 1) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], result["id"]) + + +def test_iou_zcrx(cfg) -> None: + cfg.require_ipver("6") + ethnl = EthtoolFamily() + + rings = ethnl.rings_get({"header": {"dev-index": cfg.ifindex}}) + rx_rings = rings["rx"] + hds_thresh = rings.get("hds-thresh", 0) + + ethnl.rings_set( + { + "header": {"dev-index": cfg.ifindex}, + "tcp-data-split": "enabled", + "hds-thresh": 0, + "rx": 64, + } + ) + defer( + ethnl.rings_set, + { + "header": {"dev-index": cfg.ifindex}, + "tcp-data-split": "unknown", + "hds-thresh": hds_thresh, + "rx": rx_rings, + }, + ) + + ethtool(f"-X {cfg.ifname} equal {cfg.src_queue}") + defer(ethtool, f"-X {cfg.ifname} default") + + flow_rule_id = set_flow_rule(cfg) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") + + rx_cmd = f"ip netns exec {cfg.netns.name} {cfg.bin_local} -s -p {cfg.port} -i {cfg._nk_guest_ifname} -q {cfg.nk_queue}" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.nk_guest_ipv6} -p {cfg.port} -l 12840" + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(cfg.port, proto="tcp", ns=cfg.netns) + cmd(tx_cmd, host=cfg.remote) + + +def test_attrs(cfg) -> None: + cfg.require_ipver("6") + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": cfg.ifindex, "id": cfg.src_queue, "type": "rx"} + ) + + ksft_eq(queue_info["id"], cfg.src_queue) + ksft_eq(queue_info["type"], "rx") + ksft_eq(queue_info["ifindex"], cfg.ifindex) + + ksft_in("lease", queue_info) + lease = queue_info["lease"] + ksft_eq(lease["ifindex"], cfg.nk_guest_ifindex) + ksft_eq(lease["queue"]["id"], cfg.nk_queue) + ksft_eq(lease["queue"]["type"], "rx") + ksft_in("netns-id", lease) + + +def test_attach_xdp_with_mp(cfg) -> None: + cfg.require_ipver("6") + ethnl = EthtoolFamily() + + rings = ethnl.rings_get({"header": {"dev-index": cfg.ifindex}}) + rx_rings = rings["rx"] + hds_thresh = rings.get("hds-thresh", 0) + + ethnl.rings_set( + { + "header": {"dev-index": cfg.ifindex}, + "tcp-data-split": "enabled", + "hds-thresh": 0, + "rx": 64, + } + ) + defer( + ethnl.rings_set, + { + "header": {"dev-index": cfg.ifindex}, + "tcp-data-split": "unknown", + "hds-thresh": hds_thresh, + "rx": rx_rings, + }, + ) + + ethtool(f"-X {cfg.ifname} equal {cfg.src_queue}") + defer(ethtool, f"-X {cfg.ifname} default") + + netdevnl = NetdevFamily() + + rx_cmd = f"ip netns exec {cfg.netns.name} {cfg.bin_local} -s -p {cfg.port} -i {cfg._nk_guest_ifname} -q {cfg.nk_queue}" + with bkg(rx_cmd): + wait_port_listen(cfg.port, proto="tcp", ns=cfg.netns) + + time.sleep(0.1) + queue_info = netdevnl.queue_get( + {"ifindex": cfg.ifindex, "id": cfg.src_queue, "type": "rx"} + ) + ksft_in("io-uring", queue_info) + + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" + with ksft_raises(CmdExitFailure): + ip(f"link set dev {cfg.ifname} xdp obj {prog} sec xdp.frags") + + time.sleep(0.1) + queue_info = netdevnl.queue_get( + {"ifindex": cfg.ifindex, "id": cfg.src_queue, "type": "rx"} + ) + ksft_not_in("io-uring", queue_info) + + +def test_destroy(cfg) -> None: + cfg.require_ipver("6") + ethnl = EthtoolFamily() + + rings = ethnl.rings_get({"header": {"dev-index": cfg.ifindex}}) + rx_rings = rings["rx"] + hds_thresh = rings.get("hds-thresh", 0) + + ethnl.rings_set( + { + "header": {"dev-index": cfg.ifindex}, + "tcp-data-split": "enabled", + "hds-thresh": 0, + "rx": 64, + } + ) + defer( + ethnl.rings_set, + { + "header": {"dev-index": cfg.ifindex}, + "tcp-data-split": "unknown", + "hds-thresh": hds_thresh, + "rx": rx_rings, + }, + ) + + ethtool(f"-X {cfg.ifname} equal {cfg.src_queue}") + defer(ethtool, f"-X {cfg.ifname} default") + + rx_cmd = f"ip netns exec {cfg.netns.name} {cfg.bin_local} -s -p {cfg.port} -i {cfg._nk_guest_ifname} -q {cfg.nk_queue}" + rx_proc = cmd(rx_cmd, background=True) + wait_port_listen(cfg.port, proto="tcp", ns=cfg.netns) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": cfg.ifindex, "id": cfg.src_queue, "type": "rx"} + ) + ksft_in("io-uring", queue_info) + + # ip link del will wait for all refs to drop first, but iou-zcrx is holding + # onto a ref. Terminate iou-zcrx async via a thread after a delay. + kill_timer = threading.Timer(1, rx_proc.proc.terminate) + kill_timer.start() + + ip(f"link del dev {cfg._nk_host_ifname}") + kill_timer.join() + cfg._nk_host_ifname = None + cfg._nk_guest_ifname = None + + queue_info = netdevnl.queue_get( + {"ifindex": cfg.ifindex, "id": cfg.src_queue, "type": "rx"} + ) + ksft_not_in("io-uring", queue_info) + + cmd(f"tc filter del dev {cfg.ifname} ingress pref {cfg._bpf_prog_pref}") + cfg._tc_attached = False + + flow_rule_id = set_flow_rule(cfg) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") + + rx_cmd = f"{cfg.bin_local} -s -p {cfg.port} -i {cfg.ifname} -q {cfg.src_queue}" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {cfg.port} -l 12840" + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(cfg.port, proto="tcp") + cmd(tx_cmd, host=cfg.remote) + # Short delay since iou cleanup is async and takes a bit of time. + time.sleep(0.1) + queue_info = netdevnl.queue_get( + {"ifindex": cfg.ifindex, "id": cfg.src_queue, "type": "rx"} + ) + ksft_not_in("io-uring", queue_info) + + +def main() -> None: + netns = NetNS() + cmd("ip netns attach init 1") + ip("netns set init 0", ns=netns) + ip("link set lo up", ns=netns) + + ksft_run( + [ + test_remove_phys, + test_double_lease, + test_virtual_lessor, + test_phys_lessee, + test_different_lessors, + test_queue_out_of_range, + test_resize_leased, + test_self_lease, + test_create_tx_type, + test_create_primary, + test_create_limit, + test_link_flap_phys, + test_queue_get_virtual, + test_remove_virt_first, + test_multiple_leases, + test_lease_queue_tx_type, + test_invalid_netns, + test_invalid_phys_ifindex, + test_multi_netkit_remove_phys, + test_single_remove_phys, + test_link_flap_virt, + test_phys_queue_no_lease, + test_same_ns_lease, + test_resize_after_unlease, + test_lease_queue_zero, + test_release_and_reuse, + test_veth_queue_create, + ], + args=(netns,), + ) + + cmd("ip netns del init", fail=False) + del netns + + with NetDrvContEnv(__file__, rxqueues=2) as cfg: + cfg.bin_local = path.abspath( + path.dirname(__file__) + "/../../../drivers/net/hw/iou-zcrx" + ) + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + cfg.port = rand_port() + + ethnl = EthtoolFamily() + channels = ethnl.channels_get({"header": {"dev-index": cfg.ifindex}}) + channels = channels["combined-count"] + if channels < 2: + raise KsftSkipEx("Test requires NETIF with at least 2 combined channels") + + cfg.src_queue = channels - 1 + + with NetNSEnter(str(cfg.netns)): + netdevnl = NetdevFamily() + bind_result = netdevnl.queue_create( + { + "ifindex": cfg.nk_guest_ifindex, + "type": "rx", + "lease": { + "ifindex": cfg.ifindex, + "queue": {"id": cfg.src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + cfg.nk_queue = bind_result["id"] + + # test_destroy must be last because it destroys the netkit devices + ksft_run( + [test_iou_zcrx, test_attrs, test_attach_xdp_with_mp, test_destroy], + args=(cfg,), + ) + ksft_exit() + + +if __name__ == "__main__": + main() From 581d28606cdd51c5da06330e8fb97476503cd74d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 8 Apr 2026 15:12:51 -0700 Subject: [PATCH 1303/1561] net: remove the netif_get_rx_queue_lease_locked() helpers The netif_get_rx_queue_lease_locked() API hides the locking and the descend onto the leased queue. Making the code harder to follow (at least to me). Remove the API and open code the descend a bit. Most of the code now looks like: if (!leased) return __helper(x); hw_rxq = .. netdev_lock(hw_rxq->dev); ret = __helper(x); netdev_unlock(hw_rxq->dev); return ret; Of course if we have more code paths that need the wrapping we may need to revisit. For now, IMHO, having to know what netif_get_rx_queue_lease_locked() does is not worth the 20LoC it saves. Link: https://patch.msgid.link/20260408151251.72bd2482@kernel.org Signed-off-by: Jakub Kicinski --- include/net/netdev_rx_queue.h | 5 --- net/core/dev.h | 1 + net/core/netdev-genl.c | 59 +++++++++++++++++---------- net/core/netdev_queues.c | 16 +++++--- net/core/netdev_rx_queue.c | 48 +++++++--------------- net/xdp/xsk.c | 77 ++++++++++++++++++++++------------- 6 files changed, 113 insertions(+), 93 deletions(-) diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index 7e98c679ea84..9415a94d333d 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -76,11 +76,6 @@ struct netdev_rx_queue * __netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq, enum netif_lease_dir dir); -struct netdev_rx_queue * -netif_get_rx_queue_lease_locked(struct net_device **dev, unsigned int *rxq); -void netif_put_rx_queue_lease_locked(struct net_device *orig_dev, - struct net_device *dev); - int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq); void netdev_rx_queue_lease(struct netdev_rx_queue *rxq_dst, struct netdev_rx_queue *rxq_src); diff --git a/net/core/dev.h b/net/core/dev.h index 95edb2d4eff8..376bac4a82da 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -101,6 +101,7 @@ int netdev_queue_config_validate(struct net_device *dev, int rxq_idx, bool netif_rxq_has_mp(struct net_device *dev, unsigned int rxq_idx); bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx); +bool netif_is_queue_leasee(const struct net_device *dev); void __netif_mp_uninstall_rxq(struct netdev_rx_queue *rxq, const struct pp_memory_provider_params *p); diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 056460d01940..b8f6076d8007 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -395,8 +395,7 @@ netdev_nl_queue_fill_lease(struct sk_buff *rsp, struct net_device *netdev, struct netdev_rx_queue *rxq; struct net *net, *peer_net; - rxq = __netif_get_rx_queue_lease(&netdev, &q_idx, - NETIF_PHYS_TO_VIRT); + rxq = __netif_get_rx_queue_lease(&netdev, &q_idx, NETIF_PHYS_TO_VIRT); if (!rxq || orig_netdev == netdev) return 0; @@ -436,13 +435,45 @@ nla_put_failure: return -ENOMEM; } +static int +__netdev_nl_queue_fill_mp(struct sk_buff *rsp, struct netdev_rx_queue *rxq) +{ + struct pp_memory_provider_params *params = &rxq->mp_params; + + if (params->mp_ops && + params->mp_ops->nl_fill(params->mp_priv, rsp, rxq)) + return -EMSGSIZE; + +#ifdef CONFIG_XDP_SOCKETS + if (rxq->pool) + if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK)) + return -EMSGSIZE; +#endif + return 0; +} + +static int +netdev_nl_queue_fill_mp(struct sk_buff *rsp, struct net_device *netdev, + struct netdev_rx_queue *rxq) +{ + struct netdev_rx_queue *hw_rxq; + int ret; + + hw_rxq = rxq->lease; + if (!hw_rxq || !netif_is_queue_leasee(netdev)) + return __netdev_nl_queue_fill_mp(rsp, rxq); + + netdev_lock(hw_rxq->dev); + ret = __netdev_nl_queue_fill_mp(rsp, hw_rxq); + netdev_unlock(hw_rxq->dev); + return ret; +} + static int netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx, u32 q_type, const struct genl_info *info) { - struct pp_memory_provider_params *params; - struct net_device *orig_netdev = netdev; - struct netdev_rx_queue *rxq, *rxq_lease; + struct netdev_rx_queue *rxq; struct netdev_queue *txq; void *hdr; @@ -462,20 +493,8 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, goto nla_put_failure; if (netdev_nl_queue_fill_lease(rsp, netdev, q_idx, q_type)) goto nla_put_failure; - - rxq_lease = netif_get_rx_queue_lease_locked(&netdev, &q_idx); - if (rxq_lease) - rxq = rxq_lease; - params = &rxq->mp_params; - if (params->mp_ops && - params->mp_ops->nl_fill(params->mp_priv, rsp, rxq)) - goto nla_put_failure_lease; -#ifdef CONFIG_XDP_SOCKETS - if (rxq->pool) - if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK)) - goto nla_put_failure_lease; -#endif - netif_put_rx_queue_lease_locked(orig_netdev, netdev); + if (netdev_nl_queue_fill_mp(rsp, netdev, rxq)) + goto nla_put_failure; break; case NETDEV_QUEUE_TYPE_TX: txq = netdev_get_tx_queue(netdev, q_idx); @@ -493,8 +512,6 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, return 0; -nla_put_failure_lease: - netif_put_rx_queue_lease_locked(orig_netdev, netdev); nla_put_failure: genlmsg_cancel(rsp, hdr); return -EMSGSIZE; diff --git a/net/core/netdev_queues.c b/net/core/netdev_queues.c index 265161e12a9c..73fb28087a93 100644 --- a/net/core/netdev_queues.c +++ b/net/core/netdev_queues.c @@ -37,18 +37,24 @@ struct device *netdev_queue_get_dma_dev(struct net_device *dev, unsigned int idx, enum netdev_queue_type type) { - struct net_device *orig_dev = dev; + struct netdev_rx_queue *hw_rxq; struct device *dma_dev; + netdev_ops_assert_locked(dev); + /* Only RX side supports queue leasing today. */ if (type != NETDEV_QUEUE_TYPE_RX || !netif_rxq_is_leased(dev, idx)) return __netdev_queue_get_dma_dev(dev, idx); - - if (!netif_get_rx_queue_lease_locked(&dev, &idx)) + if (!netif_is_queue_leasee(dev)) return NULL; - dma_dev = __netdev_queue_get_dma_dev(dev, idx); - netif_put_rx_queue_lease_locked(orig_dev, dev); + hw_rxq = __netif_get_rx_queue(dev, idx)->lease; + + netdev_lock(hw_rxq->dev); + idx = get_netdev_rx_queue_index(hw_rxq); + dma_dev = __netdev_queue_get_dma_dev(hw_rxq->dev, idx); + netdev_unlock(hw_rxq->dev); + return dma_dev; } diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index 469319451ba2..8771e06a0afe 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -57,6 +57,11 @@ static bool netif_lease_dir_ok(const struct net_device *dev, return false; } +bool netif_is_queue_leasee(const struct net_device *dev) +{ + return netif_lease_dir_ok(dev, NETIF_VIRT_TO_PHYS); +} + struct netdev_rx_queue * __netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq_idx, enum netif_lease_dir dir) @@ -74,29 +79,6 @@ __netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq_idx, return rxq; } -struct netdev_rx_queue * -netif_get_rx_queue_lease_locked(struct net_device **dev, unsigned int *rxq_idx) -{ - struct net_device *orig_dev = *dev; - struct netdev_rx_queue *rxq; - - /* Locking order is always from the virtual to the physical device - * see netdev_nl_queue_create_doit(). - */ - netdev_ops_assert_locked(orig_dev); - rxq = __netif_get_rx_queue_lease(dev, rxq_idx, NETIF_VIRT_TO_PHYS); - if (rxq && orig_dev != *dev) - netdev_lock(*dev); - return rxq; -} - -void netif_put_rx_queue_lease_locked(struct net_device *orig_dev, - struct net_device *dev) -{ - if (orig_dev != dev) - netdev_unlock(dev); -} - /* See also page_pool_is_unreadable() */ bool netif_rxq_has_unreadable_mp(struct net_device *dev, unsigned int rxq_idx) { @@ -264,7 +246,6 @@ int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, const struct pp_memory_provider_params *p, struct netlink_ext_ack *extack) { - struct net_device *orig_dev = dev; int ret; if (!netdev_need_ops_lock(dev)) @@ -279,19 +260,18 @@ int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, if (!netif_rxq_is_leased(dev, rxq_idx)) return __netif_mp_open_rxq(dev, rxq_idx, p, extack); - if (!netif_get_rx_queue_lease_locked(&dev, &rxq_idx)) { + if (!__netif_get_rx_queue_lease(&dev, &rxq_idx, NETIF_VIRT_TO_PHYS)) { NL_SET_ERR_MSG(extack, "rx queue leased to a virtual netdev"); return -EBUSY; } if (!dev->dev.parent) { NL_SET_ERR_MSG(extack, "rx queue belongs to a virtual netdev"); - ret = -EOPNOTSUPP; - goto out; + return -EOPNOTSUPP; } + netdev_lock(dev); ret = __netif_mp_open_rxq(dev, rxq_idx, p, extack); -out: - netif_put_rx_queue_lease_locked(orig_dev, dev); + netdev_unlock(dev); return ret; } @@ -326,18 +306,18 @@ static void __netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, void netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, const struct pp_memory_provider_params *old_p) { - struct net_device *orig_dev = dev; - if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues)) return; if (!netif_rxq_is_leased(dev, ifq_idx)) return __netif_mp_close_rxq(dev, ifq_idx, old_p); - if (WARN_ON_ONCE(!netif_get_rx_queue_lease_locked(&dev, &ifq_idx))) + if (!__netif_get_rx_queue_lease(&dev, &ifq_idx, NETIF_VIRT_TO_PHYS)) { + WARN_ON_ONCE(1); return; - + } + netdev_lock(dev); __netif_mp_close_rxq(dev, ifq_idx, old_p); - netif_put_rx_queue_lease_locked(orig_dev, dev); + netdev_unlock(dev); } void __netif_mp_uninstall_rxq(struct netdev_rx_queue *rxq, diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 60be6561f486..145876089b4b 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -31,6 +31,8 @@ #include #include +#include "../core/dev.h" + #include "xsk_queue.h" #include "xdp_umem.h" #include "xsk.h" @@ -117,20 +119,42 @@ struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev, } EXPORT_SYMBOL(xsk_get_pool_from_qid); +static void __xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id) +{ + if (queue_id < dev->num_rx_queues) + dev->_rx[queue_id].pool = NULL; + if (queue_id < dev->num_tx_queues) + dev->_tx[queue_id].pool = NULL; +} + void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id) { - struct net_device *orig_dev = dev; - unsigned int id = queue_id; + struct netdev_rx_queue *hw_rxq; - if (id < dev->real_num_rx_queues) - WARN_ON_ONCE(!netif_get_rx_queue_lease_locked(&dev, &id)); + if (!netif_rxq_is_leased(dev, queue_id)) + return __xsk_clear_pool_at_qid(dev, queue_id); + WARN_ON_ONCE(!netif_is_queue_leasee(dev)); - if (id < dev->num_rx_queues) - dev->_rx[id].pool = NULL; - if (id < dev->num_tx_queues) - dev->_tx[id].pool = NULL; + hw_rxq = __netif_get_rx_queue(dev, queue_id)->lease; - netif_put_rx_queue_lease_locked(orig_dev, dev); + netdev_lock(hw_rxq->dev); + queue_id = get_netdev_rx_queue_index(hw_rxq); + __xsk_clear_pool_at_qid(hw_rxq->dev, queue_id); + netdev_unlock(hw_rxq->dev); +} + +static int __xsk_reg_pool_at_qid(struct net_device *dev, + struct xsk_buff_pool *pool, u16 queue_id) +{ + if (xsk_get_pool_from_qid(dev, queue_id)) + return -EBUSY; + + if (queue_id < dev->real_num_rx_queues) + dev->_rx[queue_id].pool = pool; + if (queue_id < dev->real_num_tx_queues) + dev->_tx[queue_id].pool = pool; + + return 0; } /* The buffer pool is stored both in the _rx struct and the _tx struct as we do @@ -140,29 +164,26 @@ void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id) int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool, u16 queue_id) { - struct net_device *orig_dev = dev; - unsigned int id = queue_id; - int ret = 0; + struct netdev_rx_queue *hw_rxq; + int ret; - if (id >= max(dev->real_num_rx_queues, - dev->real_num_tx_queues)) + if (queue_id >= max(dev->real_num_rx_queues, + dev->real_num_tx_queues)) return -EINVAL; - if (id < dev->real_num_rx_queues) { - if (!netif_get_rx_queue_lease_locked(&dev, &id)) - return -EBUSY; - if (xsk_get_pool_from_qid(dev, id)) { - ret = -EBUSY; - goto out; - } - } + if (queue_id >= dev->real_num_rx_queues || + !netif_rxq_is_leased(dev, queue_id)) + return __xsk_reg_pool_at_qid(dev, pool, queue_id); + if (!netif_is_queue_leasee(dev)) + return -EBUSY; + + hw_rxq = __netif_get_rx_queue(dev, queue_id)->lease; + + netdev_lock(hw_rxq->dev); + queue_id = get_netdev_rx_queue_index(hw_rxq); + ret = __xsk_reg_pool_at_qid(hw_rxq->dev, pool, queue_id); + netdev_unlock(hw_rxq->dev); - if (id < dev->real_num_rx_queues) - dev->_rx[id].pool = pool; - if (id < dev->real_num_tx_queues) - dev->_tx[id].pool = pool; -out: - netif_put_rx_queue_lease_locked(orig_dev, dev); return ret; } From 9addea5d44b69d377ba97a36f7a19e1097969e18 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Tue, 7 Apr 2026 16:07:58 +0100 Subject: [PATCH 1304/1561] net: use get_random_u{16,32,64}() where appropriate Use the typed random integer helpers instead of get_random_bytes() when filling a single integer variable. The helpers return the value directly, require no pointer or size argument, and better express intent. Skipped sites writing into __be16 (netdevsim) and __le64 (ceph) fields where a direct assignment would trigger sparse endianness warnings. Signed-off-by: David Carlier Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260407150758.5889-1-devnexen@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/psample.c | 4 ++-- net/core/net_namespace.c | 2 +- net/mac80211/mesh_plink.c | 2 +- net/mptcp/subflow.c | 4 ++-- net/openvswitch/flow_table.c | 2 +- net/sctp/sm_make_chunk.c | 4 ++-- net/tipc/node.c | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/netdevsim/psample.c b/drivers/net/netdevsim/psample.c index 47d24bc64ee4..717d157c3ae2 100644 --- a/drivers/net/netdevsim/psample.c +++ b/drivers/net/netdevsim/psample.c @@ -94,7 +94,7 @@ static void nsim_dev_psample_md_prepare(const struct nsim_dev_psample *psample, if (psample->out_tc_occ_max) { u64 out_tc_occ; - get_random_bytes(&out_tc_occ, sizeof(u64)); + out_tc_occ = get_random_u64(); md->out_tc_occ = out_tc_occ & (psample->out_tc_occ_max - 1); md->out_tc_occ_valid = 1; } @@ -102,7 +102,7 @@ static void nsim_dev_psample_md_prepare(const struct nsim_dev_psample *psample, if (psample->latency_max) { u64 latency; - get_random_bytes(&latency, sizeof(u64)); + latency = get_random_u64(); md->latency = latency & (psample->latency_max - 1); md->latency_valid = 1; } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 1057d16d5dd2..deb8b2ec5674 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -411,7 +411,7 @@ static __net_init int preinit_net(struct net *net, struct user_namespace *user_n ref_tracker_dir_init(&net->refcnt_tracker, 128, "net_refcnt"); ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net_notrefcnt"); - get_random_bytes(&net->hash_mix, sizeof(u32)); + net->hash_mix = get_random_u32(); net->dev_base_seq = 1; net->user_ns = user_ns; diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 803106fc3134..7cbab90c8784 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -712,7 +712,7 @@ void mesh_plink_timer(struct timer_list *t) "Mesh plink for %pM (retry, timeout): %d %d\n", sta->sta.addr, sta->mesh->plink_retries, sta->mesh->plink_timeout); - get_random_bytes(&rand, sizeof(u32)); + rand = get_random_u32(); sta->mesh->plink_timeout = sta->mesh->plink_timeout + rand % sta->mesh->plink_timeout; ++sta->mesh->plink_retries; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index c57ed27a5fb0..e2cb9d23e4a0 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -72,7 +72,7 @@ static void subflow_req_create_thmac(struct mptcp_subflow_request_sock *subflow_ struct mptcp_sock *msk = subflow_req->msk; u8 hmac[SHA256_DIGEST_SIZE]; - get_random_bytes(&subflow_req->local_nonce, sizeof(u32)); + subflow_req->local_nonce = get_random_u32(); subflow_generate_hmac(READ_ONCE(msk->local_key), READ_ONCE(msk->remote_key), @@ -1639,7 +1639,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local, ssk = sf->sk; subflow = mptcp_subflow_ctx(ssk); do { - get_random_bytes(&subflow->local_nonce, sizeof(u32)); + subflow->local_nonce = get_random_u32(); } while (!subflow->local_nonce); /* if 'IPADDRANY', the ID will be set later, after the routing */ diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 61c6a5f77c2e..67d5b8c0fe79 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -167,7 +167,7 @@ static struct table_instance *table_instance_alloc(int new_size) ti->n_buckets = new_size; ti->node_ver = 0; - get_random_bytes(&ti->hash_seed, sizeof(u32)); + ti->hash_seed = get_random_u32(); return ti; } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 2c0017d058d4..de86ac088289 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2727,7 +2727,7 @@ __u32 sctp_generate_tag(const struct sctp_endpoint *ep) __u32 x; do { - get_random_bytes(&x, sizeof(__u32)); + x = get_random_u32(); } while (x == 0); return x; @@ -2738,7 +2738,7 @@ __u32 sctp_generate_tsn(const struct sctp_endpoint *ep) { __u32 retval; - get_random_bytes(&retval, sizeof(__u32)); + retval = get_random_u32(); return retval; } diff --git a/net/tipc/node.c b/net/tipc/node.c index af442a5ef8f3..97aa970a0d83 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1275,7 +1275,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, goto exit; if_name = strchr(b->name, ':') + 1; - get_random_bytes(&session, sizeof(u16)); + session = get_random_u16(); if (!tipc_link_create(net, if_name, b->identity, b->tolerance, b->net_plane, b->mtu, b->priority, b->min_win, b->max_win, session, From 8e6405f8218b3f412d36b772318e94d589513eba Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 8 Apr 2026 16:36:49 +0200 Subject: [PATCH 1305/1561] ipv6: move IFA_F_PERMANENT percpu allocation in process scope Observed at boot time: CPU: 43 UID: 0 PID: 3595 Comm: (t-daemon) Not tainted 6.12.0 #1 Call Trace: dump_stack_lvl+0x4e/0x70 pcpu_alloc_noprof.cold+0x1f/0x4b fib_nh_common_init+0x4c/0x110 fib6_nh_init+0x387/0x740 ip6_route_info_create+0x46d/0x640 addrconf_f6i_alloc+0x13b/0x180 addrconf_permanent_addr+0xd0/0x220 addrconf_notify+0x93/0x540 notifier_call_chain+0x5a/0xd0 __dev_notify_flags+0x5c/0xf0 dev_change_flags+0x54/0x70 do_setlink+0x36c/0xce0 rtnl_setlink+0x11f/0x1d0 rtnetlink_rcv_msg+0x142/0x3f0 netlink_rcv_skb+0x50/0x100 netlink_unicast+0x242/0x390 netlink_sendmsg+0x21b/0x470 __sys_sendto+0x1dc/0x1f0 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x7d/0x160 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f5c3852f127 Code: 0c 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 80 3d 85 ef 0c 00 00 41 89 ca 74 10 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 71 c3 55 48 83 ec 30 44 89 4c 24 2c 4c 89 44 RSP: 002b:00007ffe86caf4c8 EFLAGS: 00000202 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000556c5cd93210 RCX: 00007f5c3852f127 RDX: 0000000000000020 RSI: 0000556c5cd938b0 RDI: 0000000000000003 RBP: 00007ffe86caf5a0 R08: 00007ffe86caf4e0 R09: 0000000000000080 R10: 0000000000000000 R11: 0000000000000202 R12: 0000556c5cd932d0 R13: 00000000021d05d1 R14: 00000000021d05d1 R15: 0000000000000001 IFA_F_PERMANENT addresses require the allocation of a bunch of percpu pointers, currently in atomic scope. Similar to commit 51454ea42c1a ("ipv6: fix locking issues with loops over idev->addr_list"), move fixup_permanent_addr() outside the &idev->lock scope, and do the allocations with GFP_KERNEL. With such change fixup_permanent_addr() is invoked with the BH enabled, and the ifp lock acquired there needs the BH variant. Note that we don't need to acquire a reference to the permanent addresses before releasing the mentioned write lock, because addrconf_permanent_addr() runs under RTNL and ifa removal always happens under RTNL, too. Also the PERMANENT flag is constant in the relevant scope, as it can be cleared only by inet6_addr_modify() under the RTNL lock. Reviewed-by: David Ahern Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/46a7a030727e236af2dc7752994cd4f04f4a91d2.1775658924.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index dd0b4d80e0f8..77c77e843c96 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3585,15 +3585,15 @@ static int fixup_permanent_addr(struct net *net, struct fib6_info *f6i, *prev; f6i = addrconf_f6i_alloc(net, idev, &ifp->addr, false, - GFP_ATOMIC, NULL); + GFP_KERNEL, NULL); if (IS_ERR(f6i)) return PTR_ERR(f6i); /* ifp->rt can be accessed outside of rtnl */ - spin_lock(&ifp->lock); + spin_lock_bh(&ifp->lock); prev = ifp->rt; ifp->rt = f6i; - spin_unlock(&ifp->lock); + spin_unlock_bh(&ifp->lock); fib6_info_release(prev); } @@ -3601,7 +3601,7 @@ static int fixup_permanent_addr(struct net *net, if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) { addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->rt_priority, idev->dev, 0, 0, - GFP_ATOMIC); + GFP_KERNEL); } if (ifp->state == INET6_IFADDR_STATE_PREDAD) @@ -3612,29 +3612,36 @@ static int fixup_permanent_addr(struct net *net, static void addrconf_permanent_addr(struct net *net, struct net_device *dev) { - struct inet6_ifaddr *ifp, *tmp; + struct inet6_ifaddr *ifp; + LIST_HEAD(tmp_addr_list); struct inet6_dev *idev; + /* Mutual exclusion with other if_list_aux users. */ + ASSERT_RTNL(); + idev = __in6_dev_get(dev); if (!idev) return; write_lock_bh(&idev->lock); + list_for_each_entry(ifp, &idev->addr_list, if_list) { + if (ifp->flags & IFA_F_PERMANENT) + list_add_tail(&ifp->if_list_aux, &tmp_addr_list); + } + write_unlock_bh(&idev->lock); - list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) { - if ((ifp->flags & IFA_F_PERMANENT) && - fixup_permanent_addr(net, idev, ifp) < 0) { - write_unlock_bh(&idev->lock); + while (!list_empty(&tmp_addr_list)) { + ifp = list_first_entry(&tmp_addr_list, + struct inet6_ifaddr, if_list_aux); + list_del(&ifp->if_list_aux); + if (fixup_permanent_addr(net, idev, ifp) < 0) { net_info_ratelimited("%s: Failed to add prefix route for address %pI6c; dropping\n", idev->dev->name, &ifp->addr); in6_ifa_hold(ifp); ipv6_del_addr(ifp); - write_lock_bh(&idev->lock); } } - - write_unlock_bh(&idev->lock); } static int addrconf_notify(struct notifier_block *this, unsigned long event, From e1ab601bb23006e2710359c0fba27342f8887aec Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Wed, 8 Apr 2026 14:52:37 +0300 Subject: [PATCH 1306/1561] selftests: Migrate nsim-only MACsec tests to Python Move MACsec offload API and ethtool feature tests from tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh to tools/testing/selftests/drivers/net/macsec.py using the NetDrvEnv framework so tests can run against both netdevsim (default) and real hardware (NETIF=ethX). As some real hardware requires MACsec to use encryption, add that to the tests. Netdevsim-specific limit checks (max SecY, max RX SC) were moved into separate test cases to avoid failures on real hardware. Signed-off-by: Cosmin Ratiu Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20260408115240.1636047-2-cratiu@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/Makefile | 1 + tools/testing/selftests/drivers/net/config | 1 + tools/testing/selftests/drivers/net/macsec.py | 202 ++++++++++++++++++ .../selftests/drivers/net/netdevsim/Makefile | 1 - .../drivers/net/netdevsim/macsec-offload.sh | 117 ---------- 5 files changed, 204 insertions(+), 118 deletions(-) create mode 100755 tools/testing/selftests/drivers/net/macsec.py delete mode 100755 tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 8154d6d429d3..5e045dde0273 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -13,6 +13,7 @@ TEST_GEN_FILES := \ TEST_PROGS := \ gro.py \ hds.py \ + macsec.py \ napi_id.py \ napi_threaded.py \ netpoll_basic.py \ diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config index 77ccf83d87e0..d4b31a317c09 100644 --- a/tools/testing/selftests/drivers/net/config +++ b/tools/testing/selftests/drivers/net/config @@ -3,6 +3,7 @@ CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_INFO_BTF_MODULES=n CONFIG_INET_PSP=y CONFIG_IPV6=y +CONFIG_MACSEC=m CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_NETCONSOLE_EXTENDED_LOG=y diff --git a/tools/testing/selftests/drivers/net/macsec.py b/tools/testing/selftests/drivers/net/macsec.py new file mode 100755 index 000000000000..5220c0656c0c --- /dev/null +++ b/tools/testing/selftests/drivers/net/macsec.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""MACsec tests.""" + +import os + +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises +from lib.py import CmdExitFailure, KsftSkipEx +from lib.py import NetDrvEpEnv +from lib.py import cmd, ip, defer, ethtool + +# Unique prefix per run to avoid collisions in the shared netns. +# Keep it short: IFNAMSIZ is 16 (incl. NUL), and VLAN names append ".". +MACSEC_PFX = f"ms{os.getpid()}_" + + +def _macsec_name(idx=0): + return f"{MACSEC_PFX}{idx}" + + +def _get_macsec_offload(dev): + """Returns macsec offload mode string from ip -d link show.""" + info = ip(f"-d link show dev {dev}", json=True)[0] + return info.get("linkinfo", {}).get("info_data", {}).get("offload") + + +def _get_features(dev): + """Returns ethtool features dict for a device.""" + return ethtool(f"-k {dev}", json=True)[0] + + +def _require_ip_macsec(cfg): + """SKIP if iproute2 on local or remote lacks 'ip macsec' support.""" + for host in [None, cfg.remote]: + out = cmd("ip macsec help", fail=False, host=host) + if "Usage" not in out.stdout + out.stderr: + where = "remote" if host else "local" + raise KsftSkipEx(f"iproute2 too old on {where}," + " missing macsec support") + + +def _require_ip_macsec_offload(): + """SKIP if local iproute2 doesn't understand 'ip macsec offload'.""" + out = cmd("ip macsec help", fail=False) + if "offload" not in out.stdout + out.stderr: + raise KsftSkipEx("iproute2 too old, missing macsec offload") + + +def _require_macsec_offload(cfg): + """SKIP if local device doesn't support macsec-hw-offload.""" + _require_ip_macsec_offload() + try: + feat = ethtool(f"-k {cfg.ifname}", json=True)[0] + except (CmdExitFailure, IndexError) as e: + raise KsftSkipEx( + f"can't query features: {e}") from e + if not feat.get("macsec-hw-offload", {}).get("active"): + raise KsftSkipEx("macsec-hw-offload not supported") + + +def test_offload_api(cfg) -> None: + """MACsec offload API: create SecY, add SA/rx, toggle offload.""" + + _require_macsec_offload(cfg) + ms0 = _macsec_name(0) + ms1 = _macsec_name(1) + ms2 = _macsec_name(2) + + # Create 3 SecY with offload + ip(f"link add link {cfg.ifname} {ms0} type macsec " + f"port 4 encrypt on offload mac") + defer(ip, f"link del {ms0}") + + ip(f"link add link {cfg.ifname} {ms1} type macsec " + f"address aa:bb:cc:dd:ee:ff port 5 encrypt on offload mac") + defer(ip, f"link del {ms1}") + + ip(f"link add link {cfg.ifname} {ms2} type macsec " + f"sci abbacdde01020304 encrypt on offload mac") + defer(ip, f"link del {ms2}") + + # Add TX SA + ip(f"macsec add {ms0} tx sa 0 pn 1024 on " + "key 01 12345678901234567890123456789012") + + # Add RX SC + SA + ip(f"macsec add {ms0} rx port 1234 address 1c:ed:de:ad:be:ef") + ip(f"macsec add {ms0} rx port 1234 address 1c:ed:de:ad:be:ef " + "sa 0 pn 1 on key 00 0123456789abcdef0123456789abcdef") + + # Can't disable offload when SAs are configured + with ksft_raises(CmdExitFailure): + ip(f"link set {ms0} type macsec offload off") + with ksft_raises(CmdExitFailure): + ip(f"macsec offload {ms0} off") + + # Toggle offload via rtnetlink on SA-free device + ip(f"link set {ms2} type macsec offload off") + ip(f"link set {ms2} type macsec encrypt on offload mac") + + # Toggle offload via genetlink + ip(f"macsec offload {ms2} off") + ip(f"macsec offload {ms2} mac") + + +def test_max_secy(cfg) -> None: + """nsim-only test for max number of SecYs.""" + + cfg.require_nsim() + _require_ip_macsec_offload() + ms0 = _macsec_name(0) + ms1 = _macsec_name(1) + ms2 = _macsec_name(2) + ms3 = _macsec_name(3) + + ip(f"link add link {cfg.ifname} {ms0} type macsec " + f"port 4 encrypt on offload mac") + defer(ip, f"link del {ms0}") + + ip(f"link add link {cfg.ifname} {ms1} type macsec " + f"address aa:bb:cc:dd:ee:ff port 5 encrypt on offload mac") + defer(ip, f"link del {ms1}") + + ip(f"link add link {cfg.ifname} {ms2} type macsec " + f"sci abbacdde01020304 encrypt on offload mac") + defer(ip, f"link del {ms2}") + with ksft_raises(CmdExitFailure): + ip(f"link add link {cfg.ifname} {ms3} " + f"type macsec port 8 encrypt on offload mac") + + +def test_max_sc(cfg) -> None: + """nsim-only test for max number of SCs.""" + + cfg.require_nsim() + _require_ip_macsec_offload() + ms0 = _macsec_name(0) + + ip(f"link add link {cfg.ifname} {ms0} type macsec " + f"port 4 encrypt on offload mac") + defer(ip, f"link del {ms0}") + ip(f"macsec add {ms0} rx port 1234 address 1c:ed:de:ad:be:ef") + with ksft_raises(CmdExitFailure): + ip(f"macsec add {ms0} rx port 1235 address 1c:ed:de:ad:be:ef") + + +def test_offload_state(cfg) -> None: + """Offload state reflects configuration changes.""" + + _require_macsec_offload(cfg) + ms0 = _macsec_name(0) + + # Create with offload on + ip(f"link add link {cfg.ifname} {ms0} type macsec " + f"encrypt on offload mac") + cleanup = defer(ip, f"link del {ms0}") + + ksft_eq(_get_macsec_offload(ms0), "mac", + "created with offload: should be mac") + feats_on_1 = _get_features(ms0) + + ip(f"link set {ms0} type macsec offload off") + ksft_eq(_get_macsec_offload(ms0), "off", + "offload disabled: should be off") + feats_off_1 = _get_features(ms0) + + ip(f"link set {ms0} type macsec encrypt on offload mac") + ksft_eq(_get_macsec_offload(ms0), "mac", + "offload re-enabled: should be mac") + ksft_eq(_get_features(ms0), feats_on_1, + "features should match first offload-on snapshot") + + # Delete and recreate without offload + cleanup.exec() + ip(f"link add link {cfg.ifname} {ms0} type macsec") + defer(ip, f"link del {ms0}") + ksft_eq(_get_macsec_offload(ms0), "off", + "created without offload: should be off") + ksft_eq(_get_features(ms0), feats_off_1, + "features should match first offload-off snapshot") + + ip(f"link set {ms0} type macsec encrypt on offload mac") + ksft_eq(_get_macsec_offload(ms0), "mac", + "offload enabled after create: should be mac") + ksft_eq(_get_features(ms0), feats_on_1, + "features should match first offload-on snapshot") + + +def main() -> None: + """Main program.""" + with NetDrvEpEnv(__file__) as cfg: + ksft_run([test_offload_api, + test_max_secy, + test_max_sc, + test_offload_state, + ], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile index 1a228c5430f5..9808c2fbae9e 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/Makefile +++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile @@ -11,7 +11,6 @@ TEST_PROGS := \ fib.sh \ fib_notifications.sh \ hw_stats_l3.sh \ - macsec-offload.sh \ nexthop.sh \ peer.sh \ psample.sh \ diff --git a/tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh b/tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh deleted file mode 100755 index 98033e6667d2..000000000000 --- a/tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh +++ /dev/null @@ -1,117 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0-only - -source ethtool-common.sh - -NSIM_NETDEV=$(make_netdev) -MACSEC_NETDEV=macsec_nsim - -set -o pipefail - -if ! ethtool -k $NSIM_NETDEV | grep -q 'macsec-hw-offload: on'; then - echo "SKIP: netdevsim doesn't support MACsec offload" - exit 4 -fi - -if ! ip link add link $NSIM_NETDEV $MACSEC_NETDEV type macsec offload mac 2>/dev/null; then - echo "SKIP: couldn't create macsec device" - exit 4 -fi -ip link del $MACSEC_NETDEV - -# -# test macsec offload API -# - -ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}" type macsec port 4 offload mac -check $? - -ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}2" type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac -check $? - -ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}3" type macsec sci abbacdde01020304 offload mac -check $? - -ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}4" type macsec port 8 offload mac 2> /dev/null -check $? '' '' 1 - -ip macsec add "${MACSEC_NETDEV}" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012 -check $? - -ip macsec add "${MACSEC_NETDEV}" rx port 1234 address "1c:ed:de:ad:be:ef" -check $? - -ip macsec add "${MACSEC_NETDEV}" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \ - key 00 0123456789abcdef0123456789abcdef -check $? - -ip macsec add "${MACSEC_NETDEV}" rx port 1235 address "1c:ed:de:ad:be:ef" 2> /dev/null -check $? '' '' 1 - -# can't disable macsec offload when SAs are configured -ip link set "${MACSEC_NETDEV}" type macsec offload off 2> /dev/null -check $? '' '' 1 - -ip macsec offload "${MACSEC_NETDEV}" off 2> /dev/null -check $? '' '' 1 - -# toggle macsec offload via rtnetlink -ip link set "${MACSEC_NETDEV}2" type macsec offload off -check $? - -ip link set "${MACSEC_NETDEV}2" type macsec offload mac -check $? - -# toggle macsec offload via genetlink -ip macsec offload "${MACSEC_NETDEV}2" off -check $? - -ip macsec offload "${MACSEC_NETDEV}2" mac -check $? - -for dev in ${MACSEC_NETDEV}{,2,3} ; do - ip link del $dev - check $? -done - - -# -# test ethtool features when toggling offload -# - -ip link add link $NSIM_NETDEV $MACSEC_NETDEV type macsec offload mac -TMP_FEATS_ON_1="$(ethtool -k $MACSEC_NETDEV)" - -ip link set $MACSEC_NETDEV type macsec offload off -TMP_FEATS_OFF_1="$(ethtool -k $MACSEC_NETDEV)" - -ip link set $MACSEC_NETDEV type macsec offload mac -TMP_FEATS_ON_2="$(ethtool -k $MACSEC_NETDEV)" - -[ "$TMP_FEATS_ON_1" = "$TMP_FEATS_ON_2" ] -check $? - -ip link del $MACSEC_NETDEV - -ip link add link $NSIM_NETDEV $MACSEC_NETDEV type macsec -check $? - -TMP_FEATS_OFF_2="$(ethtool -k $MACSEC_NETDEV)" -[ "$TMP_FEATS_OFF_1" = "$TMP_FEATS_OFF_2" ] -check $? - -ip link set $MACSEC_NETDEV type macsec offload mac -check $? - -TMP_FEATS_ON_3="$(ethtool -k $MACSEC_NETDEV)" -[ "$TMP_FEATS_ON_1" = "$TMP_FEATS_ON_3" ] -check $? - - -if [ $num_errors -eq 0 ]; then - echo "PASSED all $((num_passes)) checks" - exit 0 -else - echo "FAILED $num_errors/$((num_errors+num_passes)) checks" - exit 1 -fi From c89f194b6b8eddc905425a4ad702db803f50af47 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Wed, 8 Apr 2026 14:52:38 +0300 Subject: [PATCH 1307/1561] nsim: Add support for VLAN filters Add support for storing the list of VLANs in nsim devices, together with ops for adding/removing them and a debug file to show them. This will be used in upcoming tests. Signed-off-by: Cosmin Ratiu Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20260408115240.1636047-3-cratiu@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/netdev.c | 65 ++++++++++++++++++++++++++++++- drivers/net/netdevsim/netdevsim.h | 8 ++++ 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 3645ebde049a..19b0ff183c45 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -605,6 +605,36 @@ static int nsim_stop(struct net_device *dev) return 0; } +static int nsim_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct netdevsim *ns = netdev_priv(dev); + + if (vid >= VLAN_N_VID) + return -EINVAL; + + if (proto == htons(ETH_P_8021Q)) + WARN_ON_ONCE(test_and_set_bit(vid, ns->vlan.ctag)); + else if (proto == htons(ETH_P_8021AD)) + WARN_ON_ONCE(test_and_set_bit(vid, ns->vlan.stag)); + + return 0; +} + +static int nsim_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct netdevsim *ns = netdev_priv(dev); + + if (vid >= VLAN_N_VID) + return -EINVAL; + + if (proto == htons(ETH_P_8021Q)) + WARN_ON_ONCE(!test_and_clear_bit(vid, ns->vlan.ctag)); + else if (proto == htons(ETH_P_8021AD)) + WARN_ON_ONCE(!test_and_clear_bit(vid, ns->vlan.stag)); + + return 0; +} + static int nsim_shaper_set(struct net_shaper_binding *binding, const struct net_shaper *shaper, struct netlink_ext_ack *extack) @@ -662,6 +692,8 @@ static const struct net_device_ops nsim_netdev_ops = { .ndo_bpf = nsim_bpf, .ndo_open = nsim_open, .ndo_stop = nsim_stop, + .ndo_vlan_rx_add_vid = nsim_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = nsim_vlan_rx_kill_vid, .net_shaper_ops = &nsim_shaper_ops, }; @@ -673,6 +705,8 @@ static const struct net_device_ops nsim_vf_netdev_ops = { .ndo_change_mtu = nsim_change_mtu, .ndo_setup_tc = nsim_setup_tc, .ndo_set_features = nsim_set_features, + .ndo_vlan_rx_add_vid = nsim_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = nsim_vlan_rx_kill_vid, }; /* We don't have true per-queue stats, yet, so do some random fakery here. @@ -970,6 +1004,20 @@ static const struct file_operations nsim_pp_hold_fops = { .owner = THIS_MODULE, }; +static int nsim_vlan_show(struct seq_file *s, void *data) +{ + struct netdevsim *ns = s->private; + int vid; + + for_each_set_bit(vid, ns->vlan.ctag, VLAN_N_VID) + seq_printf(s, "ctag %d\n", vid); + for_each_set_bit(vid, ns->vlan.stag, VLAN_N_VID) + seq_printf(s, "stag %d\n", vid); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(nsim_vlan); + static void nsim_setup(struct net_device *dev) { ether_setup(dev); @@ -982,14 +1030,18 @@ static void nsim_setup(struct net_device *dev) NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | NETIF_F_LRO | - NETIF_F_TSO; + NETIF_F_TSO | + NETIF_F_HW_VLAN_CTAG_FILTER | + NETIF_F_HW_VLAN_STAG_FILTER; dev->hw_features |= NETIF_F_HW_TC | NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | NETIF_F_LRO | NETIF_F_TSO | - NETIF_F_LOOPBACK; + NETIF_F_LOOPBACK | + NETIF_F_HW_VLAN_CTAG_FILTER | + NETIF_F_HW_VLAN_STAG_FILTER; dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; dev->max_mtu = ETH_MAX_MTU; dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_HW_OFFLOAD; @@ -1156,6 +1208,8 @@ struct netdevsim *nsim_create(struct nsim_dev *nsim_dev, ns->qr_dfs = debugfs_create_file("queue_reset", 0200, nsim_dev_port->ddir, ns, &nsim_qreset_fops); + ns->vlan_dfs = debugfs_create_file("vlan", 0400, nsim_dev_port->ddir, + ns, &nsim_vlan_fops); return ns; err_free_netdev: @@ -1167,7 +1221,9 @@ void nsim_destroy(struct netdevsim *ns) { struct net_device *dev = ns->netdev; struct netdevsim *peer; + u16 vid; + debugfs_remove(ns->vlan_dfs); debugfs_remove(ns->qr_dfs); debugfs_remove(ns->pp_dfs); @@ -1193,6 +1249,11 @@ void nsim_destroy(struct netdevsim *ns) if (nsim_dev_port_is_pf(ns->nsim_dev_port)) nsim_exit_netdevsim(ns); + for_each_set_bit(vid, ns->vlan.ctag, VLAN_N_VID) + WARN_ON_ONCE(1); + for_each_set_bit(vid, ns->vlan.stag, VLAN_N_VID) + WARN_ON_ONCE(1); + /* Put this intentionally late to exercise the orphaning path */ if (ns->page) { page_pool_put_full_page(pp_page_to_nmdesc(ns->page)->pp, diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index f767fc8a7505..f844c27ca78b 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -75,6 +76,11 @@ struct nsim_macsec { u8 nsim_secy_count; }; +struct nsim_vlan { + DECLARE_BITMAP(ctag, VLAN_N_VID); + DECLARE_BITMAP(stag, VLAN_N_VID); +}; + struct nsim_ethtool_pauseparam { bool rx; bool tx; @@ -135,6 +141,7 @@ struct netdevsim { bool bpf_map_accept; struct nsim_ipsec ipsec; struct nsim_macsec macsec; + struct nsim_vlan vlan; struct { u32 inject_error; u32 __ports[2][NSIM_UDP_TUNNEL_N_PORTS]; @@ -146,6 +153,7 @@ struct netdevsim { struct page *page; struct dentry *pp_dfs; struct dentry *qr_dfs; + struct dentry *vlan_dfs; struct nsim_ethtool ethtool; struct netdevsim __rcu *peer; From 26555673bc7888b80d5867618525eb04d4216a24 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Wed, 8 Apr 2026 14:52:39 +0300 Subject: [PATCH 1308/1561] selftests: Add MACsec VLAN propagation traffic test Add VLAN filter propagation tests through offloaded MACsec devices via actual traffic. The tests create MACsec tunnels with matching SAs on both endpoints, stack VLANs on top, and verify connectivity with ping. Covered: - Offloaded MACsec with VLAN (filters propagate to HW) - Software MACsec with VLAN (no HW filter propagation) - Offload on/off toggle and verifying traffic still works On netdevsim this makes use of the VLAN filter debugfs file to actually validate that filters are applied/removed correctly. On real hardware the traffic should validate actual VLAN filter propagation. Signed-off-by: Cosmin Ratiu Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20260408115240.1636047-4-cratiu@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/config | 1 + .../selftests/drivers/net/lib/py/env.py | 9 ++ tools/testing/selftests/drivers/net/macsec.py | 141 ++++++++++++++++++ 3 files changed, 151 insertions(+) diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config index d4b31a317c09..fd16994366f4 100644 --- a/tools/testing/selftests/drivers/net/config +++ b/tools/testing/selftests/drivers/net/config @@ -8,4 +8,5 @@ CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_NETCONSOLE_EXTENDED_LOG=y CONFIG_NETDEVSIM=m +CONFIG_VLAN_8021Q=m CONFIG_XDP_SOCKETS=y diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 41cc248ac848..b80666277e36 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -255,6 +255,15 @@ class NetDrvEpEnv(NetDrvEnvBase): if nsim_test is False and self._ns is not None: raise KsftXfailEx("Test does not work on netdevsim") + def get_local_nsim_dev(self): + """Returns the local netdevsim device or None. + Using this method is discouraged, as it makes tests nsim-specific. + Standard interfaces available on all HW should ideally be used. + This method is intended for the few cases where nsim-specific + assertions need to be verified which cannot be verified otherwise. + """ + return self._ns + def _require_cmd(self, comm, key, host=None): cached = self._required_cmd.get(comm, {}) if cached.get(key) is None: diff --git a/tools/testing/selftests/drivers/net/macsec.py b/tools/testing/selftests/drivers/net/macsec.py index 5220c0656c0c..9a83d9542e04 100755 --- a/tools/testing/selftests/drivers/net/macsec.py +++ b/tools/testing/selftests/drivers/net/macsec.py @@ -6,10 +6,14 @@ import os from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises +from lib.py import ksft_variants, KsftNamedVariant from lib.py import CmdExitFailure, KsftSkipEx from lib.py import NetDrvEpEnv from lib.py import cmd, ip, defer, ethtool +MACSEC_KEY = "12345678901234567890123456789012" +MACSEC_VLAN_VID = 10 + # Unique prefix per run to avoid collisions in the shared netns. # Keep it short: IFNAMSIZ is 16 (incl. NUL), and VLAN names append ".". MACSEC_PFX = f"ms{os.getpid()}_" @@ -59,6 +63,78 @@ def _require_macsec_offload(cfg): raise KsftSkipEx("macsec-hw-offload not supported") +def _get_mac(ifname, host=None): + """Gets MAC address of an interface.""" + dev = ip(f"link show dev {ifname}", json=True, host=host) + return dev[0]["address"] + + +def _setup_macsec_sa(cfg, name): + """Adds matching TX/RX SAs on both ends.""" + local_mac = _get_mac(name) + remote_mac = _get_mac(name, host=cfg.remote) + + ip(f"macsec add {name} tx sa 0 pn 1 on key 01 {MACSEC_KEY}") + ip(f"macsec add {name} rx port 1 address {remote_mac}") + ip(f"macsec add {name} rx port 1 address {remote_mac} " + f"sa 0 pn 1 on key 02 {MACSEC_KEY}") + + ip(f"macsec add {name} tx sa 0 pn 1 on key 02 {MACSEC_KEY}", + host=cfg.remote) + ip(f"macsec add {name} rx port 1 address {local_mac}", host=cfg.remote) + ip(f"macsec add {name} rx port 1 address {local_mac} " + f"sa 0 pn 1 on key 01 {MACSEC_KEY}", host=cfg.remote) + + +def _setup_macsec_devs(cfg, name, offload): + """Creates macsec devices on both ends. + + Only the local device gets HW offload; the remote always uses software + MACsec since it may not support offload at all. + """ + offload_arg = "mac" if offload else "off" + + ip(f"link add link {cfg.ifname} {name} " + f"type macsec encrypt on offload {offload_arg}") + defer(ip, f"link del {name}") + ip(f"link add link {cfg.remote_ifname} {name} " + f"type macsec encrypt on", host=cfg.remote) + defer(ip, f"link del {name}", host=cfg.remote) + + +def _set_offload(name, offload): + """Sets offload on the local macsec device only.""" + offload_arg = "mac" if offload else "off" + + ip(f"link set {name} type macsec encrypt on offload {offload_arg}") + + +def _setup_vlans(cfg, name, vid): + """Adds VLANs on top of existing macsec devs.""" + vlan_name = f"{name}.{vid}" + + ip(f"link add link {name} {vlan_name} type vlan id {vid}") + defer(ip, f"link del {vlan_name}") + ip(f"link add link {name} {vlan_name} type vlan id {vid}", host=cfg.remote) + defer(ip, f"link del {vlan_name}", host=cfg.remote) + + +def _setup_vlan_ips(cfg, name, vid): + """Adds VLANs and IPs and brings up the macsec + VLAN devices.""" + local_ip = "198.51.100.1" + remote_ip = "198.51.100.2" + vlan_name = f"{name}.{vid}" + + ip(f"addr add {local_ip}/24 dev {vlan_name}") + ip(f"addr add {remote_ip}/24 dev {vlan_name}", host=cfg.remote) + ip(f"link set {name} up") + ip(f"link set {name} up", host=cfg.remote) + ip(f"link set {vlan_name} up") + ip(f"link set {vlan_name} up", host=cfg.remote) + + return vlan_name, remote_ip + + def test_offload_api(cfg) -> None: """MACsec offload API: create SecY, add SA/rx, toggle offload.""" @@ -187,6 +263,69 @@ def test_offload_state(cfg) -> None: "features should match first offload-on snapshot") +def _check_nsim_vid(cfg, vid, expected) -> None: + """Checks if a VLAN is present. Only works on netdevsim.""" + + nsim = cfg.get_local_nsim_dev() + if not nsim: + return + + vlan_path = os.path.join(nsim.nsims[0].dfs_dir, "vlan") + with open(vlan_path, encoding="utf-8") as f: + vids = f.read() + found = f"ctag {vid}\n" in vids + ksft_eq(found, expected, + f"VLAN {vid} {'expected' if expected else 'not expected'}" + f" in debugfs") + + +@ksft_variants([ + KsftNamedVariant("offloaded", True), + KsftNamedVariant("software", False), +]) +def test_vlan(cfg, offload) -> None: + """Ping through VLAN-over-macsec.""" + + _require_ip_macsec(cfg) + if offload: + _require_macsec_offload(cfg) + else: + _require_ip_macsec_offload() + name = _macsec_name() + _setup_macsec_devs(cfg, name, offload=offload) + _setup_macsec_sa(cfg, name) + _setup_vlans(cfg, name, MACSEC_VLAN_VID) + vlan_name, remote_ip = _setup_vlan_ips(cfg, name, MACSEC_VLAN_VID) + _check_nsim_vid(cfg, MACSEC_VLAN_VID, offload) + # nsim doesn't handle the data path for offloaded macsec, so skip + # the ping when offloaded on nsim. + if not offload or not cfg.get_local_nsim_dev(): + cmd(f"ping -I {vlan_name} -c 1 -W 5 {remote_ip}") + + +@ksft_variants([ + KsftNamedVariant("on_to_off", True), + KsftNamedVariant("off_to_on", False), +]) +def test_vlan_toggle(cfg, offload) -> None: + """Toggle offload: VLAN filters propagate/remove correctly.""" + + _require_ip_macsec(cfg) + _require_macsec_offload(cfg) + name = _macsec_name() + _setup_macsec_devs(cfg, name, offload=offload) + _setup_vlans(cfg, name, MACSEC_VLAN_VID) + _check_nsim_vid(cfg, MACSEC_VLAN_VID, offload) + _set_offload(name, offload=not offload) + _check_nsim_vid(cfg, MACSEC_VLAN_VID, not offload) + vlan_name, remote_ip = _setup_vlan_ips(cfg, name, MACSEC_VLAN_VID) + _setup_macsec_sa(cfg, name) + # nsim doesn't handle the data path for offloaded macsec, so skip + # the ping when the final state is offloaded on nsim. + if offload or not cfg.get_local_nsim_dev(): + cmd(f"ping -I {vlan_name} -c 1 -W 5 {remote_ip}") + + def main() -> None: """Main program.""" with NetDrvEpEnv(__file__) as cfg: @@ -194,6 +333,8 @@ def main() -> None: test_max_secy, test_max_sc, test_offload_state, + test_vlan, + test_vlan_toggle, ], args=(cfg,)) ksft_exit() From a363b1c8be879c79a688eaf93ba01b63f8b0e63c Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Wed, 8 Apr 2026 14:52:40 +0300 Subject: [PATCH 1309/1561] macsec: Support VLAN-filtering lower devices VLAN-filtering is done through two netdev features (NETIF_F_HW_VLAN_CTAG_FILTER and NETIF_F_HW_VLAN_STAG_FILTER) and two netdev ops (ndo_vlan_rx_add_vid and ndo_vlan_rx_kill_vid). Implement these and advertise the features if the lower device supports them. This allows proper VLAN filtering to work on top of MACsec devices, when the lower device is capable of VLAN filtering. As a concrete example, having this chain of interfaces now works: vlan_filtering_capable_dev(1) -> macsec_dev(2) -> macsec_vlan_dev(3) Before the mentioned commit this used to accidentally work because the MACsec device (and thus the lower device) was put in promiscuous mode and the VLAN filter was not used. But after commit [1] correctly made the macsec driver expose the IFF_UNICAST_FLT flag, promiscuous mode was no longer used and VLAN filters on dev 1 kicked in. Without support in dev 2 for propagating VLAN filters down, the register_vlan_dev -> vlan_vid_add -> __vlan_vid_add -> vlan_add_rx_filter_info call from dev 3 is silently eaten (because vlan_hw_filter_capable returns false and vlan_add_rx_filter_info silently succeeds). For MACsec, VLAN filters are only relevant for offload, otherwise the VLANs are encrypted and the lower devices don't care about them. So VLAN filters are only passed on to lower devices in offload mode. Flipping between offload modes now needs to offload/unoffload the filters with vlan_{get,drop}_rx_*_filter_info(). To avoid the back-and-forth filter updating during rollback, the setting of macsec->offload is moved after the add/del secy ops. This is safe since none of the code called from those requires macsec->offload. In case adding the filters fails, the added ones are rolled back and an error is returned to the operation toggling the offload state. Fixes: 0349659fd72f ("macsec: set IFF_UNICAST_FLT priv flag") Signed-off-by: Cosmin Ratiu Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20260408115240.1636047-5-cratiu@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/macsec.c | 71 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 63 insertions(+), 8 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index f6cad0746a02..6147ee8b1d78 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2584,7 +2584,9 @@ static void macsec_inherit_tso_max(struct net_device *dev) netif_inherit_tso_max(dev, macsec->real_dev); } -static int macsec_update_offload(struct net_device *dev, enum macsec_offload offload) +static int macsec_update_offload(struct net_device *dev, + enum macsec_offload offload, + struct netlink_ext_ack *extack) { enum macsec_offload prev_offload; const struct macsec_ops *ops; @@ -2616,14 +2618,35 @@ static int macsec_update_offload(struct net_device *dev, enum macsec_offload off if (!ops) return -EOPNOTSUPP; - macsec->offload = offload; - ctx.secy = &macsec->secy; ret = offload == MACSEC_OFFLOAD_OFF ? macsec_offload(ops->mdo_del_secy, &ctx) : macsec_offload(ops->mdo_add_secy, &ctx); - if (ret) { - macsec->offload = prev_offload; + if (ret) return ret; + + /* Remove VLAN filters when disabling offload. */ + if (offload == MACSEC_OFFLOAD_OFF) { + vlan_drop_rx_ctag_filter_info(dev); + vlan_drop_rx_stag_filter_info(dev); + } + macsec->offload = offload; + /* Add VLAN filters when enabling offload. */ + if (prev_offload == MACSEC_OFFLOAD_OFF) { + ret = vlan_get_rx_ctag_filter_info(dev); + if (ret) { + NL_SET_ERR_MSG_FMT(extack, + "adding ctag VLAN filters failed, err %d", + ret); + goto rollback_offload; + } + ret = vlan_get_rx_stag_filter_info(dev); + if (ret) { + NL_SET_ERR_MSG_FMT(extack, + "adding stag VLAN filters failed, err %d", + ret); + vlan_drop_rx_ctag_filter_info(dev); + goto rollback_offload; + } } macsec_set_head_tail_room(dev); @@ -2633,6 +2656,12 @@ static int macsec_update_offload(struct net_device *dev, enum macsec_offload off netdev_update_features(dev); + return 0; + +rollback_offload: + macsec->offload = prev_offload; + macsec_offload(ops->mdo_del_secy, &ctx); + return ret; } @@ -2673,7 +2702,7 @@ static int macsec_upd_offload(struct sk_buff *skb, struct genl_info *info) offload = nla_get_u8(tb_offload[MACSEC_OFFLOAD_ATTR_TYPE]); if (macsec->offload != offload) - ret = macsec_update_offload(dev, offload); + ret = macsec_update_offload(dev, offload, info->extack); out: rtnl_unlock(); return ret; @@ -3486,7 +3515,8 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb, } #define MACSEC_FEATURES \ - (NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST) + (NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ + NETIF_F_HW_VLAN_STAG_FILTER | NETIF_F_HW_VLAN_CTAG_FILTER) #define MACSEC_OFFLOAD_FEATURES \ (MACSEC_FEATURES | NETIF_F_GSO_SOFTWARE | NETIF_F_SOFT_FEATURES | \ @@ -3707,6 +3737,29 @@ restore_old_addr: return err; } +static int macsec_vlan_rx_add_vid(struct net_device *dev, + __be16 proto, u16 vid) +{ + struct macsec_dev *macsec = netdev_priv(dev); + + if (!macsec_is_offloaded(macsec)) + return 0; + + return vlan_vid_add(macsec->real_dev, proto, vid); +} + +static int macsec_vlan_rx_kill_vid(struct net_device *dev, + __be16 proto, u16 vid) +{ + struct macsec_dev *macsec = netdev_priv(dev); + + if (!macsec_is_offloaded(macsec)) + return 0; + + vlan_vid_del(macsec->real_dev, proto, vid); + return 0; +} + static int macsec_change_mtu(struct net_device *dev, int new_mtu) { struct macsec_dev *macsec = macsec_priv(dev); @@ -3748,6 +3801,8 @@ static const struct net_device_ops macsec_netdev_ops = { .ndo_set_rx_mode = macsec_dev_set_rx_mode, .ndo_change_rx_flags = macsec_dev_change_rx_flags, .ndo_set_mac_address = macsec_set_mac_address, + .ndo_vlan_rx_add_vid = macsec_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = macsec_vlan_rx_kill_vid, .ndo_start_xmit = macsec_start_xmit, .ndo_get_stats64 = macsec_get_stats64, .ndo_get_iflink = macsec_get_iflink, @@ -3912,7 +3967,7 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[], offload = nla_get_u8(data[IFLA_MACSEC_OFFLOAD]); if (macsec->offload != offload) { macsec_offload_state_change = true; - ret = macsec_update_offload(dev, offload); + ret = macsec_update_offload(dev, offload, extack); if (ret) goto cleanup; } From 4de7a8acd18e2b71591e286678a8ed711e258090 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 6 Apr 2026 01:29:56 +0200 Subject: [PATCH 1310/1561] dt-bindings: net: realtek,rtl82xx: Keep property list sorted Sort the documented properties alphabetically, no functional change. Acked-by: Rob Herring (Arm) Signed-off-by: Marek Vasut Link: https://patch.msgid.link/20260405233008.148974-1-marek.vasut@mailbox.org Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/net/realtek,rtl82xx.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml b/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml index 2b5697bd7c5d..eafcc2f3e3d6 100644 --- a/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml +++ b/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml @@ -40,16 +40,16 @@ properties: leds: true - realtek,clkout-disable: - type: boolean - description: - Disable CLKOUT clock, CLKOUT clock default is enabled after hardware reset. - realtek,aldps-enable: type: boolean description: Enable ALDPS mode, ALDPS mode default is disabled after hardware reset. + realtek,clkout-disable: + type: boolean + description: + Disable CLKOUT clock, CLKOUT clock default is enabled after hardware reset. + wakeup-source: type: boolean description: From bfb859a5cb4941db06b37fd8bbd8e6d8a0dd5dcf Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 6 Apr 2026 01:29:57 +0200 Subject: [PATCH 1311/1561] dt-bindings: net: realtek,rtl82xx: Document realtek,*-ssc-enable property Document support for spread spectrum clocking (SSC) on RTL8211F(D)(I)-CG, RTL8211FS(I)(-VS)-CG, RTL8211FG(I)(-VS)-CG PHYs. Introduce DT properties 'realtek,clkout-ssc-enable', 'realtek,rxc-ssc-enable' and 'realtek,sysclk-ssc-enable' which control CLKOUT, RXC and SYSCLK SSC spread spectrum clocking enablement on these signals. These clock are not exposed via the clock API, therefore assigned-clock-sscs property does not apply. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Marek Vasut Link: https://patch.msgid.link/20260405233008.148974-2-marek.vasut@mailbox.org Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/net/realtek,rtl82xx.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml b/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml index eafcc2f3e3d6..45033c31a2d5 100644 --- a/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml +++ b/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml @@ -50,6 +50,21 @@ properties: description: Disable CLKOUT clock, CLKOUT clock default is enabled after hardware reset. + realtek,clkout-ssc-enable: + type: boolean + description: + Enable CLKOUT SSC mode, CLKOUT SSC mode default is disabled after hardware reset. + + realtek,rxc-ssc-enable: + type: boolean + description: + Enable RXC SSC mode, RXC SSC mode default is disabled after hardware reset. + + realtek,sysclk-ssc-enable: + type: boolean + description: + Enable SYSCLK SSC mode, SYSCLK SSC mode default is disabled after hardware reset. + wakeup-source: type: boolean description: From 84c5a3f00084ffd741a4c3261a58dd10cd5aceaf Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 6 Apr 2026 01:29:58 +0200 Subject: [PATCH 1312/1561] net: phy: realtek: Add property to enable SSC Add support for spread spectrum clocking (SSC) on RTL8211F(D)(I)-CG, RTL8211FS(I)(-VS)-CG, RTL8211FG(I)(-VS)-CG PHYs. The implementation follows EMI improvement application note Rev. 1.2 for these PHYs. The current implementation enables SSC for both RXC and SYSCLK clock signals. Introduce DT properties 'realtek,clkout-ssc-enable', 'realtek,rxc-ssc-enable' and 'realtek,sysclk-ssc-enable' which control CLKOUT, RXC and SYSCLK SSC spread spectrum clocking enablement on these signals. Signed-off-by: Marek Vasut Link: https://patch.msgid.link/20260405233008.148974-3-marek.vasut@mailbox.org Signed-off-by: Jakub Kicinski --- drivers/net/phy/realtek/realtek_main.c | 127 +++++++++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index c3604e1c45d9..bdb1221023fb 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -82,10 +82,18 @@ #define RTL8211F_PHYCR2 0x19 #define RTL8211F_CLKOUT_EN BIT(0) +#define RTL8211F_SYSCLK_SSC_EN BIT(3) #define RTL8211F_PHYCR2_PHY_EEE_ENABLE BIT(5) +#define RTL8211F_CLKOUT_SSC_EN BIT(7) +#define RTL8211F_CLKOUT_SSC_CAP GENMASK(13, 12) #define RTL8211F_INSR 0x1d +/* RTL8211F SSC settings */ +#define RTL8211F_SSC_PAGE 0xc44 +#define RTL8211F_SSC_RXC 0x13 +#define RTL8211F_SSC_SYSCLK 0x17 + /* RTL8211F LED configuration */ #define RTL8211F_LEDCR_PAGE 0xd04 #define RTL8211F_LEDCR 0x10 @@ -222,6 +230,9 @@ MODULE_LICENSE("GPL"); struct rtl821x_priv { bool enable_aldps; bool disable_clk_out; + bool enable_clkout_ssc; + bool enable_rxc_ssc; + bool enable_sysclk_ssc; struct clk *clk; /* rtl8211f */ u16 iner; @@ -285,6 +296,12 @@ static int rtl821x_probe(struct phy_device *phydev) "realtek,aldps-enable"); priv->disable_clk_out = of_property_read_bool(dev->of_node, "realtek,clkout-disable"); + priv->enable_clkout_ssc = of_property_read_bool(dev->of_node, + "realtek,clkout-ssc-enable"); + priv->enable_rxc_ssc = of_property_read_bool(dev->of_node, + "realtek,rxc-ssc-enable"); + priv->enable_sysclk_ssc = of_property_read_bool(dev->of_node, + "realtek,sysclk-ssc-enable"); phydev->priv = priv; @@ -716,6 +733,104 @@ static int rtl8211f_config_phy_eee(struct phy_device *phydev) RTL8211F_PHYCR2_PHY_EEE_ENABLE, 0); } +static int rtl8211f_config_clkout_ssc(struct phy_device *phydev) +{ + struct rtl821x_priv *priv = phydev->priv; + struct device *dev = &phydev->mdio.dev; + int ret; + + /* The value is preserved if the device tree property is absent */ + if (!priv->enable_clkout_ssc) + return 0; + + /* RTL8211FVD has PHYCR2 register, but configuration of CLKOUT SSC + * is not currently supported by this driver due to different bit + * layout. + */ + if (phydev->drv->phy_id == RTL_8211FVD_PHYID) + return 0; + + /* Unnamed registers from EMI improvement parameters application note 1.2 */ + ret = phy_write_paged(phydev, 0xd09, 0x10, 0xcf00); + if (ret < 0) { + dev_err(dev, "CLKOUT SSC initialization failed: %pe\n", ERR_PTR(ret)); + return ret; + } + + /* Enable CLKOUT SSC and CLKOUT SSC Capability using PHYCR2 + * bits 7, 12, 13. This matches the register 25 write 0x38C3 + * from the EMI improvement parameters application note 1.2 + * section 2.3, without affecting unrelated bits. + */ + ret = phy_set_bits(phydev, RTL8211F_PHYCR2, + RTL8211F_CLKOUT_SSC_CAP | RTL8211F_CLKOUT_SSC_EN); + if (ret < 0) { + dev_err(dev, "CLKOUT SSC enable failed: %pe\n", ERR_PTR(ret)); + return ret; + } + + return 0; +} + +static int rtl8211f_config_rxc_ssc(struct phy_device *phydev) +{ + struct rtl821x_priv *priv = phydev->priv; + struct device *dev = &phydev->mdio.dev; + int ret; + + /* The value is preserved if the device tree property is absent */ + if (!priv->enable_rxc_ssc) + return 0; + + /* RTL8211FVD has PHYCR2 register, but configuration of RXC SSC + * is not currently supported by this driver due to different bit + * layout. + */ + if (phydev->drv->phy_id == RTL_8211FVD_PHYID) + return 0; + + ret = phy_write_paged(phydev, RTL8211F_SSC_PAGE, RTL8211F_SSC_RXC, 0x5f00); + if (ret < 0) { + dev_err(dev, "RXC SSC configuration failed: %pe\n", ERR_PTR(ret)); + return ret; + } + + return 0; +} + +static int rtl8211f_config_sysclk_ssc(struct phy_device *phydev) +{ + struct rtl821x_priv *priv = phydev->priv; + struct device *dev = &phydev->mdio.dev; + int ret; + + /* The value is preserved if the device tree property is absent */ + if (!priv->enable_sysclk_ssc) + return 0; + + /* RTL8211FVD has PHYCR2 register, but configuration of SYSCLK SSC + * is not currently supported by this driver due to different bit + * layout. + */ + if (phydev->drv->phy_id == RTL_8211FVD_PHYID) + return 0; + + ret = phy_write_paged(phydev, RTL8211F_SSC_PAGE, RTL8211F_SSC_SYSCLK, 0x4f00); + if (ret < 0) { + dev_err(dev, "SYSCLK SSC configuration failed: %pe\n", ERR_PTR(ret)); + return ret; + } + + /* Enable SSC */ + ret = phy_set_bits(phydev, RTL8211F_PHYCR2, RTL8211F_SYSCLK_SSC_EN); + if (ret < 0) { + dev_err(dev, "SYSCLK SSC enable failed: %pe\n", ERR_PTR(ret)); + return ret; + } + + return 0; +} + static int rtl8211f_config_init(struct phy_device *phydev) { struct device *dev = &phydev->mdio.dev; @@ -732,6 +847,18 @@ static int rtl8211f_config_init(struct phy_device *phydev) if (ret) return ret; + ret = rtl8211f_config_rxc_ssc(phydev); + if (ret) + return ret; + + ret = rtl8211f_config_sysclk_ssc(phydev); + if (ret) + return ret; + + ret = rtl8211f_config_clkout_ssc(phydev); + if (ret) + return ret; + ret = rtl8211f_config_clk_out(phydev); if (ret) { dev_err(dev, "clkout configuration failed: %pe\n", From 5ecbebc9483c61280153ebdceda5f4db7ae63d18 Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Tue, 7 Apr 2026 17:40:56 +0800 Subject: [PATCH 1313/1561] ppp: consolidate refcount decrements ppp_destroy_{channel,interface} are always called after refcount_dec_and_test(). To reduce boilerplate code, consolidate the decrements by moving them into the two functions. To reflect this change in semantics, rename the functions to ppp_release_*. Signed-off-by: Qingfang Deng Link: https://patch.msgid.link/20260407094058.257246-1-qingfang.deng@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ppp/ppp_generic.c | 61 ++++++++++++++++------------------- 1 file changed, 28 insertions(+), 33 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index cb29a6968c63..b097d1b38ac9 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -286,12 +286,12 @@ static struct compressor *find_compressor(int type); static void ppp_get_stats(struct ppp *ppp, struct ppp_stats *st); static int ppp_create_interface(struct net *net, struct file *file, int *unit); static void init_ppp_file(struct ppp_file *pf, int kind); -static void ppp_destroy_interface(struct ppp *ppp); +static void ppp_release_interface(struct ppp *ppp); static struct ppp *ppp_find_unit(struct ppp_net *pn, int unit); static struct channel *ppp_find_channel(struct ppp_net *pn, int unit); static int ppp_connect_channel(struct channel *pch, int unit); static int ppp_disconnect_channel(struct channel *pch); -static void ppp_destroy_channel(struct channel *pch); +static void ppp_release_channel(struct channel *pch); static int unit_get(struct idr *p, void *ptr, int min); static int unit_set(struct idr *p, void *ptr, int n); static void unit_put(struct idr *p, int n); @@ -407,22 +407,18 @@ static int ppp_release(struct inode *unused, struct file *file) if (pf) { file->private_data = NULL; - if (pf->kind == INTERFACE) { + switch (pf->kind) { + case INTERFACE: ppp = PF_TO_PPP(pf); rtnl_lock(); if (file == ppp->owner) unregister_netdevice(ppp->dev); rtnl_unlock(); - } - if (refcount_dec_and_test(&pf->refcnt)) { - switch (pf->kind) { - case INTERFACE: - ppp_destroy_interface(PF_TO_PPP(pf)); - break; - case CHANNEL: - ppp_destroy_channel(PF_TO_CHANNEL(pf)); - break; - } + ppp_release_interface(ppp); + break; + case CHANNEL: + ppp_release_channel(PF_TO_CHANNEL(pf)); + break; } } return 0; @@ -675,8 +671,7 @@ err_unset: synchronize_rcu(); if (pchb) - if (refcount_dec_and_test(&pchb->file.refcnt)) - ppp_destroy_channel(pchb); + ppp_release_channel(pchb); return -EALREADY; } @@ -708,11 +703,9 @@ static int ppp_unbridge_channels(struct channel *pch) synchronize_rcu(); if (pchbb == pch) - if (refcount_dec_and_test(&pch->file.refcnt)) - ppp_destroy_channel(pch); + ppp_release_channel(pch); - if (refcount_dec_and_test(&pchb->file.refcnt)) - ppp_destroy_channel(pchb); + ppp_release_channel(pchb); return 0; } @@ -786,8 +779,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) break; err = ppp_bridge_channels(pch, pchb); /* Drop earlier refcount now bridge establishment is complete */ - if (refcount_dec_and_test(&pchb->file.refcnt)) - ppp_destroy_channel(pchb); + ppp_release_channel(pchb); break; case PPPIOCUNBRIDGECHAN: @@ -1584,8 +1576,7 @@ static void ppp_dev_priv_destructor(struct net_device *dev) struct ppp *ppp; ppp = netdev_priv(dev); - if (refcount_dec_and_test(&ppp->file.refcnt)) - ppp_destroy_interface(ppp); + ppp_release_interface(ppp); } static int ppp_fill_forward_path(struct net_device_path_ctx *ctx, @@ -3022,8 +3013,7 @@ ppp_unregister_channel(struct ppp_channel *chan) pch->file.dead = 1; wake_up_interruptible(&pch->file.rwait); - if (refcount_dec_and_test(&pch->file.refcnt)) - ppp_destroy_channel(pch); + ppp_release_channel(pch); } /* @@ -3404,12 +3394,14 @@ init_ppp_file(struct ppp_file *pf, int kind) } /* - * Free the memory used by a ppp unit. This is only called once - * there are no channels connected to the unit and no file structs - * that reference the unit. + * Drop a reference to a ppp unit and free its memory if the refcount reaches + * zero. */ -static void ppp_destroy_interface(struct ppp *ppp) +static void ppp_release_interface(struct ppp *ppp) { + if (!refcount_dec_and_test(&ppp->file.refcnt)) + return; + atomic_dec(&ppp_unit_count); if (!ppp->file.dead || ppp->n_channels) { @@ -3561,18 +3553,21 @@ ppp_disconnect_channel(struct channel *pch) wake_up_interruptible(&ppp->file.rwait); ppp_unlock(ppp); synchronize_net(); - if (refcount_dec_and_test(&ppp->file.refcnt)) - ppp_destroy_interface(ppp); + ppp_release_interface(ppp); err = 0; } return err; } /* - * Free up the resources used by a ppp channel. + * Drop a reference to a ppp channel and free its memory if the refcount reaches + * zero. */ -static void ppp_destroy_channel(struct channel *pch) +static void ppp_release_channel(struct channel *pch) { + if (!refcount_dec_and_test(&pch->file.refcnt)) + return; + put_net_track(pch->chan_net, &pch->ns_tracker); pch->chan_net = NULL; From a17d3c3d0cb2827eaa87c43e748095e21f8cc1ab Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 7 Apr 2026 08:45:39 +0800 Subject: [PATCH 1314/1561] net: macb: Use napi_schedule_irqoff() in IRQ handler For non-PREEMPT_RT kernels, the IRQ handler runs with interrupts disabled, allowing the use of napi_schedule_irqoff() to save a pair of local_irq_{save,restore} operations. For PREEMPT_RT kernels, napi_schedule_irqoff() behaves identically to napi_schedule(). Signed-off-by: Kevin Hao Link: https://patch.msgid.link/20260407-macb-napi-irqoff-v1-1-61bec60047d7@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index d9716c56f705..a12aa21244e8 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -2149,7 +2149,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) */ queue_writel(queue, IDR, bp->rx_intr_mask); macb_queue_isr_clear(bp, queue, MACB_BIT(RCOMP)); - napi_schedule(&queue->napi_rx); + napi_schedule_irqoff(&queue->napi_rx); } if (status & (MACB_BIT(TCOMP) | @@ -2162,7 +2162,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) wmb(); // ensure softirq can see update } - napi_schedule(&queue->napi_tx); + napi_schedule_irqoff(&queue->napi_tx); } if (unlikely(status & MACB_INT_MISC_FLAGS)) From 57f3f53d2c9c5a9e133596e2f7bc1c50688a6d38 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Mon, 6 Apr 2026 10:57:54 -0700 Subject: [PATCH 1315/1561] net: bcmgenet: fix off-by-one in bcmgenet_put_txcb The write_ptr points to the next open tx_cb. We want to return the tx_cb that gets rewinded, so we must rewind the pointer first then return the tx_cb that it points to. That way the txcb can be correctly cleaned up. Fixes: 876dbadd53a7 ("net: bcmgenet: Fix unmapping of fragments in bcmgenet_xmit()") Signed-off-by: Justin Chen Reviewed-by: Nicolai Buchwitz Link: https://patch.msgid.link/20260406175756.134567-2-justin.chen@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 482a31e7b72b..0f6e4baba25b 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1819,15 +1819,15 @@ static struct enet_cb *bcmgenet_put_txcb(struct bcmgenet_priv *priv, { struct enet_cb *tx_cb_ptr; - tx_cb_ptr = ring->cbs; - tx_cb_ptr += ring->write_ptr - ring->cb_ptr; - /* Rewinding local write pointer */ if (ring->write_ptr == ring->cb_ptr) ring->write_ptr = ring->end_ptr; else ring->write_ptr--; + tx_cb_ptr = ring->cbs; + tx_cb_ptr += ring->write_ptr - ring->cb_ptr; + return tx_cb_ptr; } From 3f3168300efb839028328d720ab3962f91d6a0d0 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Mon, 6 Apr 2026 10:57:55 -0700 Subject: [PATCH 1316/1561] net: bcmgenet: fix leaking free_bds While reclaiming the tx queue we fast forward the write pointer to drop any data in flight. These dropped frames are not added back to the pool of free bds. We also need to tell the netdev that we are dropping said data. Fixes: f1bacae8b655 ("net: bcmgenet: support reclaiming unsent Tx packets") Signed-off-by: Justin Chen Reviewed-by: Florian Fainelli Reviewed-by: Nicolai Buchwitz Tested-by: Nicolai Buchwitz Link: https://patch.msgid.link/20260406175756.134567-3-justin.chen@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 0f6e4baba25b..e89126a0c20e 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1985,6 +1985,7 @@ static unsigned int bcmgenet_tx_reclaim(struct net_device *dev, drop = (ring->prod_index - ring->c_index) & DMA_C_INDEX_MASK; released += drop; ring->prod_index = ring->c_index & DMA_C_INDEX_MASK; + ring->free_bds += drop; while (drop--) { cb_ptr = bcmgenet_put_txcb(priv, ring); skb = cb_ptr->skb; @@ -1996,6 +1997,7 @@ static unsigned int bcmgenet_tx_reclaim(struct net_device *dev, } if (skb) dev_consume_skb_any(skb); + netdev_tx_reset_queue(netdev_get_tx_queue(dev, ring->index)); bcmgenet_tdma_ring_writel(priv, ring->index, ring->prod_index, TDMA_PROD_INDEX); wr_ptr = ring->write_ptr * WORDS_PER_BD(priv); From 5393b2b5bee2ac51a0043dc7f4ac3475f053d08d Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Mon, 6 Apr 2026 10:57:56 -0700 Subject: [PATCH 1317/1561] net: bcmgenet: fix racing timeout handler The bcmgenet_timeout handler tries to take down all tx queues when a single queue times out. This is over zealous and causes many race conditions with queues that are still chugging along. Instead lets only restart the timed out queue. Fixes: 13ea657806cf ("net: bcmgenet: improve TX timeout") Signed-off-by: Justin Chen Reviewed-by: Florian Fainelli Reviewed-by: Nicolai Buchwitz Tested-by: Nicolai Buchwitz Link: https://patch.msgid.link/20260406175756.134567-4-justin.chen@broadcom.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/broadcom/genet/bcmgenet.c | 22 ++++++++----------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index e89126a0c20e..54f71b1e85fc 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3477,27 +3477,23 @@ static void bcmgenet_dump_tx_queue(struct bcmgenet_tx_ring *ring) static void bcmgenet_timeout(struct net_device *dev, unsigned int txqueue) { struct bcmgenet_priv *priv = netdev_priv(dev); - u32 int1_enable = 0; - unsigned int q; + struct bcmgenet_tx_ring *ring = &priv->tx_rings[txqueue]; + struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue); netif_dbg(priv, tx_err, dev, "bcmgenet_timeout\n"); - for (q = 0; q <= priv->hw_params->tx_queues; q++) - bcmgenet_dump_tx_queue(&priv->tx_rings[q]); + bcmgenet_dump_tx_queue(ring); - bcmgenet_tx_reclaim_all(dev); + bcmgenet_tx_reclaim(dev, ring, true); - for (q = 0; q <= priv->hw_params->tx_queues; q++) - int1_enable |= (1 << q); + /* Re-enable the TX interrupt for this ring */ + bcmgenet_intrl2_1_writel(priv, 1 << txqueue, INTRL2_CPU_MASK_CLEAR); - /* Re-enable TX interrupts if disabled */ - bcmgenet_intrl2_1_writel(priv, int1_enable, INTRL2_CPU_MASK_CLEAR); + txq_trans_cond_update(txq); - netif_trans_update(dev); + BCMGENET_STATS64_INC((&ring->stats64), errors); - BCMGENET_STATS64_INC((&priv->tx_rings[txqueue].stats64), errors); - - netif_tx_wake_all_queues(dev); + netif_tx_wake_queue(txq); } #define MAX_MDF_FILTER 17 From e159f05e12cc1111a3103b99375ddf0dfd0e7d63 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 7 Apr 2026 17:40:41 +0800 Subject: [PATCH 1318/1561] net: txgbe: fix RTNL assertion warning when remove module For the copper NIC with external PHY, the driver called phylink_connect_phy() during probe and phylink_disconnect_phy() during remove. It caused an RTNL assertion warning in phylink_disconnect_phy() upon module remove. To fix this, add rtnl_lock() and rtnl_unlock() around the phylink_disconnect_phy() in remove function. ------------[ cut here ]------------ RTNL: assertion failed at drivers/net/phy/phylink.c (2351) WARNING: drivers/net/phy/phylink.c:2351 at phylink_disconnect_phy+0xd8/0xf0 [phylink], CPU#0: rmmod/4464 Modules linked in: ... CPU: 0 UID: 0 PID: 4464 Comm: rmmod Kdump: loaded Not tainted 7.0.0-rc4+ Hardware name: Micro-Star International Co., Ltd. MS-7E16/X670E GAMING PLUS WIFI (MS-7E16), BIOS 1.90 12/31/2024 RIP: 0010:phylink_disconnect_phy+0xe4/0xf0 [phylink] Code: 5b 41 5c 41 5d 41 5e 41 5f 5d 31 c0 31 d2 31 f6 31 ff e9 3a 38 8f e7 48 8d 3d 48 87 e2 ff ba 2f 09 00 00 48 c7 c6 c1 22 24 c0 <67> 48 0f b9 3a e9 34 ff ff ff 66 90 90 90 90 90 90 90 90 90 90 90 RSP: 0018:ffffce7288363ac0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff89654b2a1a00 RCX: 0000000000000000 RDX: 000000000000092f RSI: ffffffffc02422c1 RDI: ffffffffc0239020 RBP: ffffce7288363ae8 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8964c4022000 R13: ffff89654fce3028 R14: ffff89654ebb4000 R15: ffffffffc0226348 FS: 0000795e80d93780(0000) GS:ffff896c52857000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005b528b592000 CR3: 0000000170d0f000 CR4: 0000000000f50ef0 PKRU: 55555554 Call Trace: txgbe_remove_phy+0xbb/0xd0 [txgbe] txgbe_remove+0x4c/0xb0 [txgbe] pci_device_remove+0x41/0xb0 device_remove+0x43/0x80 device_release_driver_internal+0x206/0x270 driver_detach+0x4a/0xa0 bus_remove_driver+0x83/0x120 driver_unregister+0x2f/0x60 pci_unregister_driver+0x40/0x90 txgbe_driver_exit+0x10/0x850 [txgbe] __do_sys_delete_module.isra.0+0x1c3/0x2f0 __x64_sys_delete_module+0x12/0x20 x64_sys_call+0x20c3/0x2390 do_syscall_64+0x11c/0x1500 ? srso_alias_return_thunk+0x5/0xfbef5 ? do_syscall_64+0x15a/0x1500 ? srso_alias_return_thunk+0x5/0xfbef5 ? do_fault+0x312/0x580 ? srso_alias_return_thunk+0x5/0xfbef5 ? __handle_mm_fault+0x9d5/0x1040 ? srso_alias_return_thunk+0x5/0xfbef5 ? count_memcg_events+0x101/0x1d0 ? srso_alias_return_thunk+0x5/0xfbef5 ? handle_mm_fault+0x1e8/0x2f0 ? srso_alias_return_thunk+0x5/0xfbef5 ? do_user_addr_fault+0x2f8/0x820 ? srso_alias_return_thunk+0x5/0xfbef5 ? irqentry_exit+0xb2/0x600 ? srso_alias_return_thunk+0x5/0xfbef5 ? exc_page_fault+0x92/0x1c0 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fixes: 02b2a6f91b90 ("net: txgbe: support copper NIC with external PHY") Cc: stable@vger.kernel.org Signed-off-by: Jiawen Wu Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/8B47A5872884147D+20260407094041.4646-1-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c index 8ea7aa07ae4e..dc9f24314658 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -657,7 +657,9 @@ void txgbe_remove_phy(struct txgbe *txgbe) return; case wx_mac_sp: if (txgbe->wx->media_type == wx_media_copper) { + rtnl_lock(); phylink_disconnect_phy(txgbe->wx->phylink); + rtnl_unlock(); phylink_destroy(txgbe->wx->phylink); return; } From 4ae0604a0673e11e2075b178387151fcad5111b5 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 7 Apr 2026 08:48:04 +0200 Subject: [PATCH 1319/1561] net: airoha: Add dma_rmb() and READ_ONCE() in airoha_qdma_rx_process() Add missing dma_rmb() in airoha_qdma_rx_process routine to make sure the DMA read operations are completed when the NIC reports the processing on the current descriptor is done. Moreover, add missing READ_ONCE() in airoha_qdma_rx_process() for DMA descriptor control fields in order to avoid any compiler reordering. Fixes: 23020f0493270 ("net: airoha: Introduce ethernet support for EN7581 SoC") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260407-airoha_qdma_rx_process-fix-reordering-v3-1-91c36e9da31f@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 91cb63a32d99..9285a68f435f 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -584,7 +584,7 @@ static int airoha_qdma_fill_rx_queue(struct airoha_queue *q) static int airoha_qdma_get_gdm_port(struct airoha_eth *eth, struct airoha_qdma_desc *desc) { - u32 port, sport, msg1 = le32_to_cpu(desc->msg1); + u32 port, sport, msg1 = le32_to_cpu(READ_ONCE(desc->msg1)); sport = FIELD_GET(QDMA_ETH_RXMSG_SPORT_MASK, msg1); switch (sport) { @@ -612,21 +612,24 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) while (done < budget) { struct airoha_queue_entry *e = &q->entry[q->tail]; struct airoha_qdma_desc *desc = &q->desc[q->tail]; - u32 hash, reason, msg1 = le32_to_cpu(desc->msg1); - struct page *page = virt_to_head_page(e->buf); - u32 desc_ctrl = le32_to_cpu(desc->ctrl); + u32 hash, reason, msg1, desc_ctrl; struct airoha_gdm_port *port; int data_len, len, p; + struct page *page; + desc_ctrl = le32_to_cpu(READ_ONCE(desc->ctrl)); if (!(desc_ctrl & QDMA_DESC_DONE_MASK)) break; + dma_rmb(); + q->tail = (q->tail + 1) % q->ndesc; q->queued--; dma_sync_single_for_cpu(eth->dev, e->dma_addr, SKB_WITH_OVERHEAD(q->buf_size), dir); + page = virt_to_head_page(e->buf); len = FIELD_GET(QDMA_DESC_LEN_MASK, desc_ctrl); data_len = q->skb ? q->buf_size : SKB_WITH_OVERHEAD(q->buf_size); @@ -670,8 +673,8 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) * DMA descriptor. Report DSA tag to the DSA stack * via skb dst info. */ - u32 sptag = FIELD_GET(QDMA_ETH_RXMSG_SPTAG, - le32_to_cpu(desc->msg0)); + u32 msg0 = le32_to_cpu(READ_ONCE(desc->msg0)); + u32 sptag = FIELD_GET(QDMA_ETH_RXMSG_SPTAG, msg0); if (sptag < ARRAY_SIZE(port->dsa_meta) && port->dsa_meta[sptag]) @@ -679,6 +682,7 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) &port->dsa_meta[sptag]->dst); } + msg1 = le32_to_cpu(READ_ONCE(desc->msg1)); hash = FIELD_GET(AIROHA_RXD4_FOE_ENTRY, msg1); if (hash != AIROHA_RXD4_FOE_ENTRY) skb_set_hash(q->skb, jhash_1word(hash, 0), From 12ff2a4aee6c86746623d5aed24389dbf6dffded Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Tue, 7 Apr 2026 17:24:15 -0700 Subject: [PATCH 1320/1561] eth: fbnic: Use wake instead of start fbnic_up() calls netif_tx_start_all_queues(), which only clears __QUEUE_STATE_DRV_XOFF. If qdisc backlog has accumulated on any TX queue before the reconfiguration (e.g. ring resize via ethtool -G), start does not call __netif_schedule() to kick the qdisc, so the pending backlog is never drained and the queue stalls. Switch to netif_tx_wake_all_queues(), which clears DRV_XOFF and also calls __netif_schedule() on every queue, ensuring any backlog that built up before the down/up cycle is promptly dequeued. Fixes: bc6107771bb4 ("eth: fbnic: Allocate a netdevice and napi vectors with queues") Signed-off-by: Mohsin Bashir Link: https://patch.msgid.link/20260408002415.2963915-1-mohsin.bashr@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index 3fa9d1910daa..8f331358c972 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -139,7 +139,7 @@ void fbnic_up(struct fbnic_net *fbn) /* Enable Tx/Rx processing */ fbnic_napi_enable(fbn); - netif_tx_start_all_queues(fbn->netdev); + netif_tx_wake_all_queues(fbn->netdev); fbnic_service_task_start(fbn); From 3c6132ccc58e8adf044166728e783f55dc323ef9 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Wed, 8 Apr 2026 11:20:51 +0800 Subject: [PATCH 1321/1561] ipv6: sit: remove redundant ret = 0 assignment The variable ret is assigned a value at all places where it is used; There is no need to assign a value when it is initially defined. Signed-off-by: Yue Haibing Link: https://patch.msgid.link/20260408032051.3096449-1-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski --- net/ipv6/sit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index ef2e5111fb3a..201347b4e127 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -309,7 +309,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __u struct ip_tunnel_prl kprl, *kp; struct ip_tunnel_prl_entry *prl; unsigned int cmax, c = 0, ca, len; - int ret = 0; + int ret; if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) return -EINVAL; From 3d2c3d2eea9acdbee5b5742d15d021069b49d3f9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 8 Apr 2026 15:19:52 -0700 Subject: [PATCH 1322/1561] selftests: net: py: explicitly forbid multiple ksft_run() calls People (do people still write code or is it all AI?) seem to not get that ksft_run() can only be called once. If we call it multiple times KTAP parsers will likely cut off after the first batch has finished. Link: https://patch.msgid.link/20260408221952.819822-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib/py/ksft.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index 7b8af463e35d..7083c99c9444 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -341,10 +341,13 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0} + global KSFT_RESULT + if KSFT_RESULT is not None: + raise RuntimeError("ksft_run() can't be called multiple times.") + print("TAP version 13", flush=True) print("1.." + str(len(test_cases)), flush=True) - global KSFT_RESULT cnt = 0 stop = False for func, args, name in test_cases: From 42f9b4c6ef19e71d2c7d9bfd3c5037d4fe434ad7 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 8 Apr 2026 15:19:05 +0800 Subject: [PATCH 1323/1561] tools: ynl: tests: fix leading space on Makefile target The ../generated/protos.a rule had a spurious leading space before the target name. In make, target rules must start at column 0; only recipe lines are indented with a tab. The extra space caused make to misparse the rule. Remove the leading space to match the style of the adjacent ../lib/ynl.a rule. Fixes: e0aa0c61758f ("tools: ynl: move samples to tests") Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20260408-ynl_makefile-v1-1-f9624acc2ad9@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/tests/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/net/ynl/tests/Makefile b/tools/net/ynl/tests/Makefile index 2a02958c7039..9215e84cca05 100644 --- a/tools/net/ynl/tests/Makefile +++ b/tools/net/ynl/tests/Makefile @@ -50,7 +50,7 @@ all: $(TEST_GEN_PROGS) $(TEST_GEN_FILES) ../lib/ynl.a: @$(MAKE) -C ../lib - ../generated/protos.a: +../generated/protos.a: @$(MAKE) -C ../generated $(TEST_GEN_PROGS) $(TEST_GEN_FILES): %: %.c ../lib/ynl.a ../generated/protos.a From 22e620fe8455870ee1aa26aca6f2b5277a57431f Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 4 Apr 2026 18:34:37 +0300 Subject: [PATCH 1324/1561] ipvs: show the current conn_tab size to users As conn_tab is per-net, better to show the current hash table size to users instead of the ip_vs_conn_tab_size (max). Signed-off-by: Julian Anastasov Signed-off-by: Florian Westphal --- net/netfilter/ipvs/ip_vs_ctl.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a1f070cb76c3..1322dd54ed7c 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -281,6 +281,20 @@ unlock: mutex_unlock(&ipvs->est_mutex); } +static int get_conn_tab_size(struct netns_ipvs *ipvs) +{ + const struct ip_vs_rht *t; + int size = 0; + + rcu_read_lock(); + t = rcu_dereference(ipvs->conn_tab); + if (t) + size = t->size; + rcu_read_unlock(); + + return size; +} + int ip_vs_use_count_inc(void) { @@ -2741,10 +2755,13 @@ static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) static int ip_vs_info_seq_show(struct seq_file *seq, void *v) { + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); + if (v == SEQ_START_TOKEN) { seq_printf(seq, "IP Virtual Server version %d.%d.%d (size=%d)\n", - NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); + NVERSION(IP_VS_VERSION_CODE), get_conn_tab_size(ipvs)); seq_puts(seq, "Prot LocalAddress:Port Scheduler Flags\n"); seq_puts(seq, @@ -3425,7 +3442,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) char buf[64]; sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)", - NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); + NVERSION(IP_VS_VERSION_CODE), get_conn_tab_size(ipvs)); if (copy_to_user(user, buf, strlen(buf)+1) != 0) { ret = -EFAULT; goto out; @@ -3437,8 +3454,9 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) case IP_VS_SO_GET_INFO: { struct ip_vs_getinfo info; + info.version = IP_VS_VERSION_CODE; - info.size = ip_vs_conn_tab_size; + info.size = get_conn_tab_size(ipvs); info.num_services = atomic_read(&ipvs->num_services[IP_VS_AF_INET]); if (copy_to_user(user, &info, sizeof(info)) != 0) @@ -4447,7 +4465,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE) || nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE, - ip_vs_conn_tab_size)) + get_conn_tab_size(ipvs))) goto nla_put_failure; break; } From 9a9ccef907a7a8722ed27013c925baf68b7c0506 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 4 Apr 2026 18:34:38 +0300 Subject: [PATCH 1325/1561] ipvs: add ip_vs_status info Add /proc/net/ip_vs_status to show current state of IPVS. The motivation for this new /proc interface is to provide the output for the users to help them decide when to tune the load factor for hash tables, which is possible with the new sysctl knobs coming in followup patch. The output also includes information for the kthreads used for stats. Signed-off-by: Julian Anastasov Signed-off-by: Florian Westphal --- net/netfilter/ipvs/ip_vs_ctl.c | 145 +++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 1322dd54ed7c..fb1df61edfdd 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2924,6 +2924,144 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) return 0; } + +static int ip_vs_status_show(struct seq_file *seq, void *v) +{ + struct net *net = seq_file_single_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); + unsigned int resched_score = 0; + struct ip_vs_conn_hnode *hn; + struct hlist_bl_head *head; + struct ip_vs_service *svc; + struct ip_vs_rht *t, *pt; + struct hlist_bl_node *e; + int old_gen, new_gen; + u32 counts[8]; + u32 bucket; + int count; + u32 sum1; + u32 sum; + int i; + + rcu_read_lock(); + + t = rcu_dereference(ipvs->conn_tab); + + seq_printf(seq, "Conns:\t%d\n", atomic_read(&ipvs->conn_count)); + seq_printf(seq, "Conn buckets:\t%d (%d bits, lfactor %d)\n", + t ? t->size : 0, t ? t->bits : 0, t ? t->lfactor : 0); + + if (!atomic_read(&ipvs->conn_count)) + goto after_conns; + old_gen = atomic_read(&ipvs->conn_tab_changes); + +repeat_conn: + smp_rmb(); /* ipvs->conn_tab and conn_tab_changes */ + memset(counts, 0, sizeof(counts)); + ip_vs_rht_for_each_table_rcu(ipvs->conn_tab, t, pt) { + for (bucket = 0; bucket < t->size; bucket++) { + DECLARE_IP_VS_RHT_WALK_BUCKET_RCU(); + + count = 0; + resched_score++; + ip_vs_rht_walk_bucket_rcu(t, bucket, head) { + count = 0; + hlist_bl_for_each_entry_rcu(hn, e, head, node) + count++; + } + resched_score += count; + if (resched_score >= 100) { + resched_score = 0; + cond_resched_rcu(); + new_gen = atomic_read(&ipvs->conn_tab_changes); + /* New table installed ? */ + if (old_gen != new_gen) { + old_gen = new_gen; + goto repeat_conn; + } + } + counts[min(count, (int)ARRAY_SIZE(counts) - 1)]++; + } + } + for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++) + sum += counts[i]; + sum1 = sum - counts[0]; + seq_printf(seq, "Conn buckets empty:\t%u (%lu%%)\n", + counts[0], (unsigned long)counts[0] * 100 / max(sum, 1U)); + for (i = 1; i < ARRAY_SIZE(counts); i++) { + if (!counts[i]) + continue; + seq_printf(seq, "Conn buckets len-%d:\t%u (%lu%%)\n", + i, counts[i], + (unsigned long)counts[i] * 100 / max(sum1, 1U)); + } + +after_conns: + t = rcu_dereference(ipvs->svc_table); + + count = ip_vs_get_num_services(ipvs); + seq_printf(seq, "Services:\t%d\n", count); + seq_printf(seq, "Service buckets:\t%d (%d bits, lfactor %d)\n", + t ? t->size : 0, t ? t->bits : 0, t ? t->lfactor : 0); + + if (!count) + goto after_svc; + old_gen = atomic_read(&ipvs->svc_table_changes); + +repeat_svc: + smp_rmb(); /* ipvs->svc_table and svc_table_changes */ + memset(counts, 0, sizeof(counts)); + ip_vs_rht_for_each_table_rcu(ipvs->svc_table, t, pt) { + for (bucket = 0; bucket < t->size; bucket++) { + DECLARE_IP_VS_RHT_WALK_BUCKET_RCU(); + + count = 0; + resched_score++; + ip_vs_rht_walk_bucket_rcu(t, bucket, head) { + count = 0; + hlist_bl_for_each_entry_rcu(svc, e, head, + s_list) + count++; + } + resched_score += count; + if (resched_score >= 100) { + resched_score = 0; + cond_resched_rcu(); + new_gen = atomic_read(&ipvs->svc_table_changes); + /* New table installed ? */ + if (old_gen != new_gen) { + old_gen = new_gen; + goto repeat_svc; + } + } + counts[min(count, (int)ARRAY_SIZE(counts) - 1)]++; + } + } + for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++) + sum += counts[i]; + sum1 = sum - counts[0]; + seq_printf(seq, "Service buckets empty:\t%u (%lu%%)\n", + counts[0], (unsigned long)counts[0] * 100 / max(sum, 1U)); + for (i = 1; i < ARRAY_SIZE(counts); i++) { + if (!counts[i]) + continue; + seq_printf(seq, "Service buckets len-%d:\t%u (%lu%%)\n", + i, counts[i], + (unsigned long)counts[i] * 100 / max(sum1, 1U)); + } + +after_svc: + seq_printf(seq, "Stats thread slots:\t%d (max %lu)\n", + ipvs->est_kt_count, ipvs->est_max_threads); + seq_printf(seq, "Stats chain max len:\t%d\n", ipvs->est_chain_max); + seq_printf(seq, "Stats thread ests:\t%d\n", + ipvs->est_chain_max * IPVS_EST_CHAIN_FACTOR * + IPVS_EST_NTICKS); + + rcu_read_unlock(); + return 0; +} + #endif /* @@ -4825,6 +4963,9 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) ipvs->net->proc_net, ip_vs_stats_percpu_show, NULL)) goto err_percpu; + if (!proc_create_net_single("ip_vs_status", 0, ipvs->net->proc_net, + ip_vs_status_show, NULL)) + goto err_status; #endif ret = ip_vs_control_net_init_sysctl(ipvs); @@ -4835,6 +4976,9 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) err: #ifdef CONFIG_PROC_FS + remove_proc_entry("ip_vs_status", ipvs->net->proc_net); + +err_status: remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net); err_percpu: @@ -4860,6 +5004,7 @@ void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs) ip_vs_control_net_cleanup_sysctl(ipvs); cancel_delayed_work_sync(&ipvs->est_reload_work); #ifdef CONFIG_PROC_FS + remove_proc_entry("ip_vs_status", ipvs->net->proc_net); remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net); remove_proc_entry("ip_vs_stats", ipvs->net->proc_net); remove_proc_entry("ip_vs", ipvs->net->proc_net); From 8d7de5477e47525c870b599fb2de06ef8af63466 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 4 Apr 2026 18:34:39 +0300 Subject: [PATCH 1326/1561] ipvs: add conn_lfactor and svc_lfactor sysctl vars Allow the default load factor for the connection and service tables to be configured. Signed-off-by: Julian Anastasov Signed-off-by: Florian Westphal --- Documentation/networking/ipvs-sysctl.rst | 37 ++++++++++++ net/netfilter/ipvs/ip_vs_ctl.c | 76 ++++++++++++++++++++++++ 2 files changed, 113 insertions(+) diff --git a/Documentation/networking/ipvs-sysctl.rst b/Documentation/networking/ipvs-sysctl.rst index 3fb5fa142eef..a556439f8be7 100644 --- a/Documentation/networking/ipvs-sysctl.rst +++ b/Documentation/networking/ipvs-sysctl.rst @@ -29,6 +29,33 @@ backup_only - BOOLEAN If set, disable the director function while the server is in backup mode to avoid packet loops for DR/TUN methods. +conn_lfactor - INTEGER + Possible values: -8 (larger table) .. 8 (smaller table) + + Default: -4 + + Controls the sizing of the connection hash table based on the + load factor (number of connections per table buckets): + + 2^conn_lfactor = nodes / buckets + + As result, the table grows if load increases and shrinks when + load decreases in the range of 2^8 - 2^conn_tab_bits (module + parameter). + The value is a shift count where negative values select + buckets = (connection hash nodes << -value) while positive + values select buckets = (connection hash nodes >> value). The + negative values reduce the collisions and reduce the time for + lookups but increase the table size. Positive values will + tolerate load above 100% when using smaller table is + preferred with the cost of more collisions. If using NAT + connections consider decreasing the value with one because + they add two nodes in the hash table. + + Example: + -4: grow if load goes above 6% (buckets = nodes * 16) + 2: grow if load goes above 400% (buckets = nodes / 4) + conn_reuse_mode - INTEGER 1 - default @@ -219,6 +246,16 @@ secure_tcp - INTEGER The value definition is the same as that of drop_entry and drop_packet. +svc_lfactor - INTEGER + Possible values: -8 (larger table) .. 8 (smaller table) + + Default: -3 + + Controls the sizing of the service hash table based on the + load factor (number of services per table buckets). The table + will grow and shrink in the range of 2^4 - 2^20. + See conn_lfactor for explanation. + sync_threshold - vector of 2 INTEGERs: sync_threshold, sync_period default 3 50 diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index fb1df61edfdd..6632daa87ded 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2445,6 +2445,60 @@ static int ipvs_proc_run_estimation(const struct ctl_table *table, int write, return ret; } +static int ipvs_proc_conn_lfactor(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct netns_ipvs *ipvs = table->extra2; + int *valp = table->data; + int val = *valp; + int ret; + + struct ctl_table tmp_table = { + .data = &val, + .maxlen = sizeof(int), + }; + + ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos); + if (write && ret >= 0) { + if (val < -8 || val > 8) { + ret = -EINVAL; + } else { + *valp = val; + if (rcu_access_pointer(ipvs->conn_tab)) + mod_delayed_work(system_unbound_wq, + &ipvs->conn_resize_work, 0); + } + } + return ret; +} + +static int ipvs_proc_svc_lfactor(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct netns_ipvs *ipvs = table->extra2; + int *valp = table->data; + int val = *valp; + int ret; + + struct ctl_table tmp_table = { + .data = &val, + .maxlen = sizeof(int), + }; + + ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos); + if (write && ret >= 0) { + if (val < -8 || val > 8) { + ret = -EINVAL; + } else { + *valp = val; + if (rcu_access_pointer(ipvs->svc_table)) + mod_delayed_work(system_unbound_wq, + &ipvs->svc_resize_work, 0); + } + } + return ret; +} + /* * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) * Do not change order or insert new entries without @@ -2633,6 +2687,18 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = ipvs_proc_est_nice, }, + { + .procname = "conn_lfactor", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = ipvs_proc_conn_lfactor, + }, + { + .procname = "svc_lfactor", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = ipvs_proc_svc_lfactor, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -4853,6 +4919,16 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) tbl[idx].extra2 = ipvs; tbl[idx++].data = &ipvs->sysctl_est_nice; + if (unpriv) + tbl[idx].mode = 0444; + tbl[idx].extra2 = ipvs; + tbl[idx++].data = &ipvs->sysctl_conn_lfactor; + + if (unpriv) + tbl[idx].mode = 0444; + tbl[idx].extra2 = ipvs; + tbl[idx++].data = &ipvs->sysctl_svc_lfactor; + #ifdef CONFIG_IP_VS_DEBUG /* Global sysctls must be ro in non-init netns */ if (!net_eq(net, &init_net)) From 8df772afc9d016b597d22a1431e7011b90ce1fb3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 28 Mar 2026 23:00:31 +0100 Subject: [PATCH 1327/1561] netfilter: x_physdev: reject empty or not-nul terminated device names Reject names that lack a \0 character and reject the empty string as well. iptables allows this but it fails to re-parse iptables-save output that contain such rules. Signed-off-by: Florian Westphal --- net/netfilter/xt_physdev.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 343e65f377d4..53997771013f 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -107,6 +107,28 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) return -EINVAL; } +#define X(memb) strnlen(info->memb, sizeof(info->memb)) >= sizeof(info->memb) + if (info->bitmask & XT_PHYSDEV_OP_IN) { + if (info->physindev[0] == '\0') + return -EINVAL; + if (X(physindev)) + return -ENAMETOOLONG; + } + + if (info->bitmask & XT_PHYSDEV_OP_OUT) { + if (info->physoutdev[0] == '\0') + return -EINVAL; + + if (X(physoutdev)) + return -ENAMETOOLONG; + } + + if (X(in_mask)) + return -ENAMETOOLONG; + if (X(out_mask)) + return -ENAMETOOLONG; +#undef X + if (!brnf_probed) { brnf_probed = true; request_module("br_netfilter"); From 74feb7d373b32a63d7986a2caf9689c860c9a761 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 4 Apr 2026 12:09:05 +0200 Subject: [PATCH 1328/1561] netfilter: nfnetlink: prefer skb_mac_header helpers This adds implicit DEBUG_WARN_ON_ONCE for debug configurations. No other changes intended. Signed-off-by: Florian Westphal --- net/netfilter/nfnetlink_log.c | 19 ++++++++++--------- net/netfilter/nfnetlink_queue.c | 25 ++++++++++++------------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b2c24cb919d4..2439cbbd5b26 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -401,7 +401,7 @@ nfulnl_timer(struct timer_list *t) static u32 nfulnl_get_bridge_size(const struct sk_buff *skb) { - u32 size = 0; + u32 mac_len, size = 0; if (!skb_mac_header_was_set(skb)) return 0; @@ -412,14 +412,17 @@ static u32 nfulnl_get_bridge_size(const struct sk_buff *skb) size += nla_total_size(sizeof(u16)); /* tag */ } - if (skb->network_header > skb->mac_header) - size += nla_total_size(skb->network_header - skb->mac_header); + mac_len = skb_mac_header_len(skb); + if (mac_len > 0) + size += nla_total_size(mac_len); return size; } static int nfulnl_put_bridge(struct nfulnl_instance *inst, const struct sk_buff *skb) { + u32 mac_len; + if (!skb_mac_header_was_set(skb)) return 0; @@ -437,12 +440,10 @@ static int nfulnl_put_bridge(struct nfulnl_instance *inst, const struct sk_buff nla_nest_end(inst->skb, nest); } - if (skb->mac_header < skb->network_header) { - int len = (int)(skb->network_header - skb->mac_header); - - if (nla_put(inst->skb, NFULA_L2HDR, len, skb_mac_header(skb))) - goto nla_put_failure; - } + mac_len = skb_mac_header_len(skb); + if (mac_len > 0 && + nla_put(inst->skb, NFULA_L2HDR, mac_len, skb_mac_header(skb))) + goto nla_put_failure; return 0; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index c7ee6f6ff725..58304fd1f70f 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -579,6 +579,7 @@ static u32 nfqnl_get_bridge_size(struct nf_queue_entry *entry) { struct sk_buff *entskb = entry->skb; u32 nlalen = 0; + u32 mac_len; if (entry->state.pf != PF_BRIDGE || !skb_mac_header_was_set(entskb)) return 0; @@ -587,9 +588,9 @@ static u32 nfqnl_get_bridge_size(struct nf_queue_entry *entry) nlalen += nla_total_size(nla_total_size(sizeof(__be16)) + nla_total_size(sizeof(__be16))); - if (entskb->network_header > entskb->mac_header) - nlalen += nla_total_size((entskb->network_header - - entskb->mac_header)); + mac_len = skb_mac_header_len(entskb); + if (mac_len > 0) + nlalen += nla_total_size(mac_len); return nlalen; } @@ -597,6 +598,7 @@ static u32 nfqnl_get_bridge_size(struct nf_queue_entry *entry) static int nfqnl_put_bridge(struct nf_queue_entry *entry, struct sk_buff *skb) { struct sk_buff *entskb = entry->skb; + u32 mac_len; if (entry->state.pf != PF_BRIDGE || !skb_mac_header_was_set(entskb)) return 0; @@ -615,12 +617,10 @@ static int nfqnl_put_bridge(struct nf_queue_entry *entry, struct sk_buff *skb) nla_nest_end(skb, nest); } - if (entskb->mac_header < entskb->network_header) { - int len = (int)(entskb->network_header - entskb->mac_header); - - if (nla_put(skb, NFQA_L2HDR, len, skb_mac_header(entskb))) - goto nla_put_failure; - } + mac_len = skb_mac_header_len(entskb); + if (mac_len > 0 && + nla_put(skb, NFQA_L2HDR, mac_len, skb_mac_header(entskb))) + goto nla_put_failure; return 0; @@ -1004,13 +1004,13 @@ nf_queue_entry_dup(struct nf_queue_entry *e) static void nf_bridge_adjust_skb_data(struct sk_buff *skb) { if (nf_bridge_info_get(skb)) - __skb_push(skb, skb->network_header - skb->mac_header); + __skb_push(skb, skb_mac_header_len(skb)); } static void nf_bridge_adjust_segmented_data(struct sk_buff *skb) { if (nf_bridge_info_get(skb)) - __skb_pull(skb, skb->network_header - skb->mac_header); + __skb_pull(skb, skb_mac_header_len(skb)); } #else #define nf_bridge_adjust_skb_data(s) do {} while (0) @@ -1469,8 +1469,7 @@ static int nfqa_parse_bridge(struct nf_queue_entry *entry, } if (nfqa[NFQA_L2HDR]) { - int mac_header_len = entry->skb->network_header - - entry->skb->mac_header; + u32 mac_header_len = skb_mac_header_len(entry->skb); if (mac_header_len != nla_len(nfqa[NFQA_L2HDR])) return -EINVAL; From 24bd5c2679caf8a228d90cafa221da4b47fd6642 Mon Sep 17 00:00:00 2001 From: Marino Dzalto Date: Fri, 3 Apr 2026 22:59:07 +0200 Subject: [PATCH 1329/1561] netfilter: xt_HL: add pr_fmt and checkentry validation Add pr_fmt to prefix log messages with the module name for easier debugging in dmesg. Add checkentry functions for IPv4 (ttl_mt_check) and IPv6 (hl_mt6_check) to validate the match mode at rule registration time, rejecting invalid modes with -EINVAL. The evaluation function returns false in case the mode is unknown, so this is a cleanup, not a bug fix. Signed-off-by: Marino Dzalto Signed-off-by: Florian Westphal --- net/netfilter/xt_hl.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/net/netfilter/xt_hl.c b/net/netfilter/xt_hl.c index c1a70f8f0441..4a12a757ecbf 100644 --- a/net/netfilter/xt_hl.c +++ b/net/netfilter/xt_hl.c @@ -6,6 +6,7 @@ * Hop Limit matching module * (C) 2001-2002 Maciej Soltysiak */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include @@ -22,6 +23,18 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_ttl"); MODULE_ALIAS("ip6t_hl"); +static int ttl_mt_check(const struct xt_mtchk_param *par) +{ + const struct ipt_ttl_info *info = par->matchinfo; + + if (info->mode > IPT_TTL_GT) { + pr_err("Unknown TTL match mode: %d\n", info->mode); + return -EINVAL; + } + + return 0; +} + static bool ttl_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct ipt_ttl_info *info = par->matchinfo; @@ -41,6 +54,18 @@ static bool ttl_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; } +static int hl_mt6_check(const struct xt_mtchk_param *par) +{ + const struct ip6t_hl_info *info = par->matchinfo; + + if (info->mode > IP6T_HL_GT) { + pr_err("Unknown Hop Limit match mode: %d\n", info->mode); + return -EINVAL; + } + + return 0; +} + static bool hl_mt6(const struct sk_buff *skb, struct xt_action_param *par) { const struct ip6t_hl_info *info = par->matchinfo; @@ -65,6 +90,7 @@ static struct xt_match hl_mt_reg[] __read_mostly = { .name = "ttl", .revision = 0, .family = NFPROTO_IPV4, + .checkentry = ttl_mt_check, .match = ttl_mt, .matchsize = sizeof(struct ipt_ttl_info), .me = THIS_MODULE, @@ -73,6 +99,7 @@ static struct xt_match hl_mt_reg[] __read_mostly = { .name = "hl", .revision = 0, .family = NFPROTO_IPV6, + .checkentry = hl_mt6_check, .match = hl_mt6, .matchsize = sizeof(struct ip6t_hl_info), .me = THIS_MODULE, From 542be3fa5aff54210a02954c38f07e53ea9bdafd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 4 Apr 2026 12:12:59 +0200 Subject: [PATCH 1330/1561] netfilter: xt_socket: enable defrag after all other checks Originally this did not matter because defrag was enabled once per netns and only disabled again on netns dismantle. When this got changed I should have adjusted checkentry to not leave defrag enabled on error. Fixes: de8c12110a13 ("netfilter: disable defrag once its no longer needed") Signed-off-by: Florian Westphal --- net/netfilter/xt_socket.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 76e01f292aaf..811e53bee408 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -168,52 +168,41 @@ static int socket_mt_enable_defrag(struct net *net, int family) static int socket_mt_v1_check(const struct xt_mtchk_param *par) { const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; - int err; - - err = socket_mt_enable_defrag(par->net, par->family); - if (err) - return err; if (info->flags & ~XT_SOCKET_FLAGS_V1) { pr_info_ratelimited("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1); return -EINVAL; } - return 0; + + return socket_mt_enable_defrag(par->net, par->family); } static int socket_mt_v2_check(const struct xt_mtchk_param *par) { const struct xt_socket_mtinfo2 *info = (struct xt_socket_mtinfo2 *) par->matchinfo; - int err; - - err = socket_mt_enable_defrag(par->net, par->family); - if (err) - return err; if (info->flags & ~XT_SOCKET_FLAGS_V2) { pr_info_ratelimited("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2); return -EINVAL; } - return 0; + + return socket_mt_enable_defrag(par->net, par->family); } static int socket_mt_v3_check(const struct xt_mtchk_param *par) { const struct xt_socket_mtinfo3 *info = (struct xt_socket_mtinfo3 *)par->matchinfo; - int err; - err = socket_mt_enable_defrag(par->net, par->family); - if (err) - return err; if (info->flags & ~XT_SOCKET_FLAGS_V3) { pr_info_ratelimited("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V3); return -EINVAL; } - return 0; + + return socket_mt_enable_defrag(par->net, par->family); } static void socket_mt_destroy(const struct xt_mtdtor_param *par) From 84dee05d9d61884ee0986f5b4f3d69886f7dfeb0 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Mon, 30 Mar 2026 17:19:34 +0200 Subject: [PATCH 1331/1561] netfilter: conntrack: remove UDP-Lite conntrack support UDP-Lite (RFC 3828) socket support was recently retired from the core networking stack. As a follow-up of that, drop the connection tracker and NAT support for UDP-Lite in Netfilter. This patch removes CONFIG_NF_CT_PROTO_UDPLITE and scrubs UDP-Lite awareness from the conntrack core, NAT core, nft_ct, and ctnetlink. Please note that stateless packet inspection, matching, ipsets or logging support for IPPROTO_UDPLITE is preserved. As conntrack no longer extracts UDP-Lite ports or tracks its L4 state, when performing NAT the UDP-Lite checksum cannot be updated anymore. That is an expected and acceptable consequence of removing UDP-Lite conntrack module. Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Florian Westphal --- .../net/netfilter/ipv4/nf_conntrack_ipv4.h | 3 - include/net/netfilter/nf_conntrack_l4proto.h | 7 -- net/netfilter/Kconfig | 11 -- net/netfilter/nf_conntrack_core.c | 8 -- net/netfilter/nf_conntrack_proto.c | 3 - net/netfilter/nf_conntrack_proto_udp.c | 108 ------------------ net/netfilter/nf_conntrack_standalone.c | 2 - net/netfilter/nf_nat_core.c | 6 - net/netfilter/nf_nat_proto.c | 20 ---- net/netfilter/nfnetlink_cttimeout.c | 1 - net/netfilter/nft_ct.c | 1 - 11 files changed, 170 deletions(-) diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 8d65ffbf57de..b39417ad955e 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -16,9 +16,6 @@ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; #ifdef CONFIG_NF_CT_PROTO_SCTP extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp; #endif -#ifdef CONFIG_NF_CT_PROTO_UDPLITE -extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite; -#endif #ifdef CONFIG_NF_CT_PROTO_GRE extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre; #endif diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index cd5020835a6d..fde2427ceb8f 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -107,11 +107,6 @@ int nf_conntrack_udp_packet(struct nf_conn *ct, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state); -int nf_conntrack_udplite_packet(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state); int nf_conntrack_tcp_packet(struct nf_conn *ct, struct sk_buff *skb, unsigned int dataoff, @@ -139,8 +134,6 @@ void nf_conntrack_icmpv6_init_net(struct net *net); /* Existing built-in generic protocol */ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; -#define MAX_NF_CT_PROTO IPPROTO_UDPLITE - const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto); /* Generic netlink helpers */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index f3ea0cb26f36..682c675125fc 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -209,17 +209,6 @@ config NF_CT_PROTO_SCTP If unsure, say Y. -config NF_CT_PROTO_UDPLITE - bool 'UDP-Lite protocol connection tracking support' - depends on NETFILTER_ADVANCED - default y - help - With this option enabled, the layer 3 independent connection - tracking code will be able to do state tracking on UDP-Lite - connections. - - If unsure, say Y. - config NF_CONNTRACK_AMANDA tristate "Amanda backup protocol support" depends on NETFILTER_ADVANCED diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 27ce5fda8993..b08189226320 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -323,9 +323,6 @@ nf_ct_get_tuple(const struct sk_buff *skb, #endif case IPPROTO_TCP: case IPPROTO_UDP: -#ifdef CONFIG_NF_CT_PROTO_UDPLITE - case IPPROTO_UDPLITE: -#endif #ifdef CONFIG_NF_CT_PROTO_SCTP case IPPROTO_SCTP: #endif @@ -1987,11 +1984,6 @@ static int nf_conntrack_handle_packet(struct nf_conn *ct, case IPPROTO_ICMPV6: return nf_conntrack_icmpv6_packet(ct, skb, ctinfo, state); #endif -#ifdef CONFIG_NF_CT_PROTO_UDPLITE - case IPPROTO_UDPLITE: - return nf_conntrack_udplite_packet(ct, skb, dataoff, - ctinfo, state); -#endif #ifdef CONFIG_NF_CT_PROTO_SCTP case IPPROTO_SCTP: return nf_conntrack_sctp_packet(ct, skb, dataoff, diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index bc1d96686b9c..50ddd3d613e1 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -103,9 +103,6 @@ const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto) #ifdef CONFIG_NF_CT_PROTO_SCTP case IPPROTO_SCTP: return &nf_conntrack_l4proto_sctp; #endif -#ifdef CONFIG_NF_CT_PROTO_UDPLITE - case IPPROTO_UDPLITE: return &nf_conntrack_l4proto_udplite; -#endif #ifdef CONFIG_NF_CT_PROTO_GRE case IPPROTO_GRE: return &nf_conntrack_l4proto_gre; #endif diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 0030fbe8885c..cc9b7e5e1935 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -129,91 +129,6 @@ int nf_conntrack_udp_packet(struct nf_conn *ct, return NF_ACCEPT; } -#ifdef CONFIG_NF_CT_PROTO_UDPLITE -static void udplite_error_log(const struct sk_buff *skb, - const struct nf_hook_state *state, - const char *msg) -{ - nf_l4proto_log_invalid(skb, state, IPPROTO_UDPLITE, "%s", msg); -} - -static bool udplite_error(struct sk_buff *skb, - unsigned int dataoff, - const struct nf_hook_state *state) -{ - unsigned int udplen = skb->len - dataoff; - const struct udphdr *hdr; - struct udphdr _hdr; - unsigned int cscov; - - /* Header is too small? */ - hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); - if (!hdr) { - udplite_error_log(skb, state, "short packet"); - return true; - } - - cscov = ntohs(hdr->len); - if (cscov == 0) { - cscov = udplen; - } else if (cscov < sizeof(*hdr) || cscov > udplen) { - udplite_error_log(skb, state, "invalid checksum coverage"); - return true; - } - - /* UDPLITE mandates checksums */ - if (!hdr->check) { - udplite_error_log(skb, state, "checksum missing"); - return true; - } - - /* Checksum invalid? Ignore. */ - if (state->hook == NF_INET_PRE_ROUTING && - state->net->ct.sysctl_checksum && - nf_checksum_partial(skb, state->hook, dataoff, cscov, IPPROTO_UDP, - state->pf)) { - udplite_error_log(skb, state, "bad checksum"); - return true; - } - - return false; -} - -/* Returns verdict for packet, and may modify conntracktype */ -int nf_conntrack_udplite_packet(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state) -{ - unsigned int *timeouts; - - if (udplite_error(skb, dataoff, state)) - return -NF_ACCEPT; - - timeouts = nf_ct_timeout_lookup(ct); - if (!timeouts) - timeouts = udp_get_timeouts(nf_ct_net(ct)); - - /* If we've seen traffic both ways, this is some kind of UDP - stream. Extend timeout. */ - if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { - nf_ct_refresh_acct(ct, ctinfo, skb, - timeouts[UDP_CT_REPLIED]); - - if (unlikely((ct->status & IPS_NAT_CLASH))) - return NF_ACCEPT; - - /* Also, more likely to be important, and not a probe */ - if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_ASSURED, ct); - } else { - nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[UDP_CT_UNREPLIED]); - } - return NF_ACCEPT; -} -#endif - #ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include @@ -299,26 +214,3 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp = }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ }; - -#ifdef CONFIG_NF_CT_PROTO_UDPLITE -const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite = -{ - .l4proto = IPPROTO_UDPLITE, - .allow_clash = true, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, - .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, - .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, - .nla_policy = nf_ct_port_nla_policy, -#endif -#ifdef CONFIG_NF_CONNTRACK_TIMEOUT - .ctnl_timeout = { - .nlattr_to_obj = udp_timeout_nlattr_to_obj, - .obj_to_nlattr = udp_timeout_obj_to_nlattr, - .nlattr_max = CTA_TIMEOUT_UDP_MAX, - .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, - .nla_policy = udp_timeout_nla_policy, - }, -#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -}; -#endif diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 207b240b14e5..be2953c7d702 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -61,7 +61,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, ntohs(tuple->src.u.tcp.port), ntohs(tuple->dst.u.tcp.port)); break; - case IPPROTO_UDPLITE: case IPPROTO_UDP: seq_printf(s, "sport=%hu dport=%hu ", ntohs(tuple->src.u.udp.port), @@ -277,7 +276,6 @@ static const char* l4proto_name(u16 proto) case IPPROTO_UDP: return "udp"; case IPPROTO_GRE: return "gre"; case IPPROTO_SCTP: return "sctp"; - case IPPROTO_UDPLITE: return "udplite"; case IPPROTO_ICMPV6: return "icmpv6"; } diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 3b5434e4ec9c..83b2b5e9759a 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -68,7 +68,6 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb, fl4->daddr = t->dst.u3.ip; if (t->dst.protonum == IPPROTO_TCP || t->dst.protonum == IPPROTO_UDP || - t->dst.protonum == IPPROTO_UDPLITE || t->dst.protonum == IPPROTO_SCTP) fl4->fl4_dport = t->dst.u.all; } @@ -79,7 +78,6 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb, fl4->saddr = t->src.u3.ip; if (t->dst.protonum == IPPROTO_TCP || t->dst.protonum == IPPROTO_UDP || - t->dst.protonum == IPPROTO_UDPLITE || t->dst.protonum == IPPROTO_SCTP) fl4->fl4_sport = t->src.u.all; } @@ -99,7 +97,6 @@ static void nf_nat_ipv6_decode_session(struct sk_buff *skb, fl6->daddr = t->dst.u3.in6; if (t->dst.protonum == IPPROTO_TCP || t->dst.protonum == IPPROTO_UDP || - t->dst.protonum == IPPROTO_UDPLITE || t->dst.protonum == IPPROTO_SCTP) fl6->fl6_dport = t->dst.u.all; } @@ -110,7 +107,6 @@ static void nf_nat_ipv6_decode_session(struct sk_buff *skb, fl6->saddr = t->src.u3.in6; if (t->dst.protonum == IPPROTO_TCP || t->dst.protonum == IPPROTO_UDP || - t->dst.protonum == IPPROTO_UDPLITE || t->dst.protonum == IPPROTO_SCTP) fl6->fl6_sport = t->src.u.all; } @@ -415,7 +411,6 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple, case IPPROTO_GRE: /* all fall though */ case IPPROTO_TCP: case IPPROTO_UDP: - case IPPROTO_UDPLITE: case IPPROTO_SCTP: if (maniptype == NF_NAT_MANIP_SRC) port = tuple->src.u.all; @@ -612,7 +607,6 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, goto find_free_id; #endif case IPPROTO_UDP: - case IPPROTO_UDPLITE: case IPPROTO_TCP: case IPPROTO_SCTP: if (maniptype == NF_NAT_MANIP_SRC) diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index 97c0f841fc96..07f51fe75fbe 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -79,23 +79,6 @@ static bool udp_manip_pkt(struct sk_buff *skb, return true; } -static bool udplite_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ -#ifdef CONFIG_NF_CT_PROTO_UDPLITE - struct udphdr *hdr; - - if (skb_ensure_writable(skb, hdroff + sizeof(*hdr))) - return false; - - hdr = (struct udphdr *)(skb->data + hdroff); - __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, true); -#endif - return true; -} - static bool sctp_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, unsigned int hdroff, @@ -287,9 +270,6 @@ static bool l4proto_manip_pkt(struct sk_buff *skb, case IPPROTO_UDP: return udp_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); - case IPPROTO_UDPLITE: - return udplite_manip_pkt(skb, iphdroff, hdroff, - tuple, maniptype); case IPPROTO_SCTP: return sctp_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index fd8652aa7e88..dca6826af7de 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -457,7 +457,6 @@ static int cttimeout_default_get(struct sk_buff *skb, timeouts = nf_tcp_pernet(info->net)->timeouts; break; case IPPROTO_UDP: - case IPPROTO_UDPLITE: timeouts = nf_udp_pernet(info->net)->timeouts; break; case IPPROTO_ICMPV6: diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 425525b90ac9..60ee8d932fcb 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1252,7 +1252,6 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx, switch (priv->l4proto) { case IPPROTO_TCP: case IPPROTO_UDP: - case IPPROTO_UDPLITE: case IPPROTO_DCCP: case IPPROTO_SCTP: break; From f30e5a7291a879deeeb6b9ba92b12c9be1ee5f29 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 9 Apr 2026 16:34:43 -0600 Subject: [PATCH 1332/1561] netfilter: x_tables: Avoid a couple -Wflex-array-member-not-at-end warnings -Wflex-array-member-not-at-end was introduced in GCC-14, and we are getting ready to enable it, globally. Use the TRAILING_OVERLAP() helper to fix the following warnings: 1 net/netfilter/x_tables.c:816:39: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] 1 net/netfilter/x_tables.c:811:39: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] This helper creates a union between a flexible-array member (FAM) and a set of members that would otherwise follow it. This overlays the trailing members onto the FAM while preserving the original memory layout. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Florian Westphal --- net/netfilter/x_tables.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index b39017c80548..9f837fb5ceb4 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -819,13 +819,17 @@ EXPORT_SYMBOL_GPL(xt_compat_match_to_user); /* non-compat version may have padding after verdict */ struct compat_xt_standard_target { - struct compat_xt_entry_target t; - compat_uint_t verdict; + /* Must be last as it ends in a flexible-array member. */ + TRAILING_OVERLAP(struct compat_xt_entry_target, t, data, + compat_uint_t verdict; + ); }; struct compat_xt_error_target { - struct compat_xt_entry_target t; - char errorname[XT_FUNCTION_MAXNAMELEN]; + /* Must be last as it ends in a flexible-array member. */ + TRAILING_OVERLAP(struct compat_xt_entry_target, t, data, + char errorname[XT_FUNCTION_MAXNAMELEN]; + ); }; int xt_compat_check_entry_offsets(const void *base, const char *elems, From 1dfd95bdf4d18d263aa8fad06bfb9f4d9c992b18 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 9 Apr 2026 13:30:41 +0200 Subject: [PATCH 1333/1561] netfilter: nft_fwd_netdev: check ttl/hl before forwarding Drop packets if their ttl/hl is too small for forwarding. Fixes: d32de98ea70f ("netfilter: nft_fwd_netdev: allow to forward packets via neighbour layer") Signed-off-by: Florian Westphal --- net/netfilter/nft_fwd_netdev.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index ad48dcd45abe..4bce36c3a6a0 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -116,6 +116,11 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, goto out; } iph = ip_hdr(skb); + if (iph->ttl <= 1) { + verdict = NF_DROP; + goto out; + } + ip_decrease_ttl(iph); neigh_table = NEIGH_ARP_TABLE; break; @@ -132,6 +137,11 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, goto out; } ip6h = ipv6_hdr(skb); + if (ip6h->hop_limit <= 1) { + verdict = NF_DROP; + goto out; + } + ip6h->hop_limit--; neigh_table = NEIGH_ND_TABLE; break; From 62443dc21114c0bbc476fa62973db89743f2f137 Mon Sep 17 00:00:00 2001 From: Zhengchuan Liang Date: Sat, 4 Apr 2026 17:39:48 +0800 Subject: [PATCH 1334/1561] netfilter: require Ethernet MAC header before using eth_hdr() `ip6t_eui64`, `xt_mac`, the `bitmap:ip,mac`, `hash:ip,mac`, and `hash:mac` ipset types, and `nf_log_syslog` access `eth_hdr(skb)` after either assuming that the skb is associated with an Ethernet device or checking only that the `ETH_HLEN` bytes at `skb_mac_header(skb)` lie between `skb->head` and `skb->data`. Make these paths first verify that the skb is associated with an Ethernet device, that the MAC header was set, and that it spans at least a full Ethernet header before accessing `eth_hdr(skb)`. Suggested-by: Florian Westphal Tested-by: Ren Wei Signed-off-by: Zhengchuan Liang Signed-off-by: Ren Wei Signed-off-by: Florian Westphal --- net/ipv6/netfilter/ip6t_eui64.c | 7 +++++-- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 5 +++-- net/netfilter/ipset/ip_set_hash_ipmac.c | 9 +++++---- net/netfilter/ipset/ip_set_hash_mac.c | 5 +++-- net/netfilter/nf_log_syslog.c | 8 +++++++- net/netfilter/xt_mac.c | 4 +--- 6 files changed, 24 insertions(+), 14 deletions(-) diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c index da69a27e8332..bbb684f9964c 100644 --- a/net/ipv6/netfilter/ip6t_eui64.c +++ b/net/ipv6/netfilter/ip6t_eui64.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -21,8 +22,10 @@ eui64_mt6(const struct sk_buff *skb, struct xt_action_param *par) { unsigned char eui64[8]; - if (!(skb_mac_header(skb) >= skb->head && - skb_mac_header(skb) + ETH_HLEN <= skb->data)) { + if (!skb->dev || skb->dev->type != ARPHRD_ETHER) + return false; + + if (!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) < ETH_HLEN) { par->hotdrop = true; return false; } diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 2c625e0f49ec..752f59ef8744 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -220,8 +221,8 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, return -IPSET_ERR_BITMAP_RANGE; /* Backward compatibility: we don't check the second flag */ - if (skb_mac_header(skb) < skb->head || - (skb_mac_header(skb) + ETH_HLEN) > skb->data) + if (!skb->dev || skb->dev->type != ARPHRD_ETHER || + !skb_mac_header_was_set(skb) || skb_mac_header_len(skb) < ETH_HLEN) return -EINVAL; e.id = ip_to_id(map, ip); diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c index 467c59a83c0a..b9a2681e2488 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmac.c +++ b/net/netfilter/ipset/ip_set_hash_ipmac.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -89,8 +90,8 @@ hash_ipmac4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_ipmac4_elem e = { .ip = 0, { .foo[0] = 0, .foo[1] = 0 } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - if (skb_mac_header(skb) < skb->head || - (skb_mac_header(skb) + ETH_HLEN) > skb->data) + if (!skb->dev || skb->dev->type != ARPHRD_ETHER || + !skb_mac_header_was_set(skb) || skb_mac_header_len(skb) < ETH_HLEN) return -EINVAL; if (opt->flags & IPSET_DIM_TWO_SRC) @@ -205,8 +206,8 @@ hash_ipmac6_kadt(struct ip_set *set, const struct sk_buff *skb, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - if (skb_mac_header(skb) < skb->head || - (skb_mac_header(skb) + ETH_HLEN) > skb->data) + if (!skb->dev || skb->dev->type != ARPHRD_ETHER || + !skb_mac_header_was_set(skb) || skb_mac_header_len(skb) < ETH_HLEN) return -EINVAL; if (opt->flags & IPSET_DIM_TWO_SRC) diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c index 718814730acf..41a122591fe2 100644 --- a/net/netfilter/ipset/ip_set_hash_mac.c +++ b/net/netfilter/ipset/ip_set_hash_mac.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -77,8 +78,8 @@ hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - if (skb_mac_header(skb) < skb->head || - (skb_mac_header(skb) + ETH_HLEN) > skb->data) + if (!skb->dev || skb->dev->type != ARPHRD_ETHER || + !skb_mac_header_was_set(skb) || skb_mac_header_len(skb) < ETH_HLEN) return -EINVAL; if (opt->flags & IPSET_DIM_ONE_SRC) diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c index 0507d67cad27..7a8952b049d1 100644 --- a/net/netfilter/nf_log_syslog.c +++ b/net/netfilter/nf_log_syslog.c @@ -78,7 +78,10 @@ dump_arp_packet(struct nf_log_buf *m, else logflags = NF_LOG_DEFAULT_MASK; - if (logflags & NF_LOG_MACDECODE) { + if ((logflags & NF_LOG_MACDECODE) && + skb->dev && skb->dev->type == ARPHRD_ETHER && + skb_mac_header_was_set(skb) && + skb_mac_header_len(skb) >= ETH_HLEN) { nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ", eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest); nf_log_dump_vlan(m, skb); @@ -797,6 +800,9 @@ static void dump_mac_header(struct nf_log_buf *m, switch (dev->type) { case ARPHRD_ETHER: + if (!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) < ETH_HLEN) + return; + nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ", eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest); nf_log_dump_vlan(m, skb); diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c index 81649da57ba5..4798cd2ca26e 100644 --- a/net/netfilter/xt_mac.c +++ b/net/netfilter/xt_mac.c @@ -29,9 +29,7 @@ static bool mac_mt(const struct sk_buff *skb, struct xt_action_param *par) if (skb->dev == NULL || skb->dev->type != ARPHRD_ETHER) return false; - if (skb_mac_header(skb) < skb->head) - return false; - if (skb_mac_header(skb) + ETH_HLEN > skb->data) + if (!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) < ETH_HLEN) return false; ret = ether_addr_equal(eth_hdr(skb)->h_source, info->srcaddr); ret ^= info->invert; From 1f2ac009d3e06380400618e777c858e582872efa Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Fri, 30 Jan 2026 14:13:56 +0800 Subject: [PATCH 1335/1561] Bluetooth: btusb: MT7922: Add VID/PID 0489/e174 Add VID 0489 & PID e174 for MediaTek MT7922 USB Bluetooth chip. The information in /sys/kernel/debug/usb/devices about the Bluetooth device is listed as the below. T: Bus=06 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0489 ProdID=e174 Rev= 1.00 S: Manufacturer=MediaTek Inc. S: Product=Wireless_Device S: SerialNumber=000000000 C:* #Ifs= 3 Cfg#= 1 Atr=e0 MxPwr=100mA A: FirstIf#= 0 IfCount= 3 Cls=e0(wlcon) Sub=01 Prot=01 I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=125us E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms I: If#= 1 Alt= 6 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 63 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 63 Ivl=1ms I: If#= 2 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=8a(I) Atr=03(Int.) MxPS= 64 Ivl=125us E: Ad=0a(O) Atr=03(Int.) MxPS= 64 Ivl=125us I:* If#= 2 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=8a(I) Atr=03(Int.) MxPS= 512 Ivl=125us E: Ad=0a(O) Atr=03(Int.) MxPS= 512 Ivl=125us Signed-off-by: Chris Lu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 5c535f3ab722..aeba026bdb42 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -703,6 +703,8 @@ static const struct usb_device_id quirks_table[] = { BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0489, 0xe170), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x0489, 0xe174), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x04ca, 0x3804), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x04ca, 0x38e4), .driver_info = BTUSB_MEDIATEK | From edef6576853e51faa11bb286884c362ff7fc83a0 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Tue, 3 Feb 2026 21:57:21 +0200 Subject: [PATCH 1336/1561] Bluetooth: hci_core: Rate limit the logging of invalid ISO handle Some controller firmwares (eg for MT7925) continuously send invalid ISO packet, which result to "ISO unknown handle" error spam in logs. It's not important to show all of them to the user. Rate limit these ISO error messages, similarly as we do for SCO. Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 01f8ceeb1c0c..c46c1236ebfa 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3917,8 +3917,8 @@ static void hci_isodata_packet(struct hci_dev *hdev, struct sk_buff *skb) err = iso_recv(hdev, handle, skb, flags); if (err == -ENOENT) - bt_dev_err(hdev, "ISO packet for unknown connection handle %d", - handle); + bt_dev_err_ratelimited(hdev, "ISO packet for unknown connection handle %d", + handle); else if (err) bt_dev_dbg(hdev, "ISO packet recv for handle %d failed: %d", handle, err); From a80b51f066063e399a50c1e1ced008734a6ec800 Mon Sep 17 00:00:00 2001 From: Dongyang Jin Date: Tue, 3 Feb 2026 15:10:48 +0800 Subject: [PATCH 1337/1561] Bluetooth: btbcm: remove done label in btbcm_patchram There is no point in having the label since all it does is return the value in the 'err' variable. Instead make every goto return directly and remove the label. Signed-off-by: Dongyang Jin Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btbcm.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index d33cc70eec66..dccfbeee4721 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -223,7 +223,7 @@ int btbcm_patchram(struct hci_dev *hdev, const struct firmware *fw) err = PTR_ERR(skb); bt_dev_err(hdev, "BCM: Download Minidrv command failed (%d)", err); - goto done; + return err; } kfree_skb(skb); @@ -242,8 +242,7 @@ int btbcm_patchram(struct hci_dev *hdev, const struct firmware *fw) if (fw_size < cmd->plen) { bt_dev_err(hdev, "BCM: Patch is corrupted"); - err = -EINVAL; - goto done; + return -EINVAL; } cmd_param = fw_ptr; @@ -258,7 +257,7 @@ int btbcm_patchram(struct hci_dev *hdev, const struct firmware *fw) err = PTR_ERR(skb); bt_dev_err(hdev, "BCM: Patch command %04x failed (%d)", opcode, err); - goto done; + return err; } kfree_skb(skb); } @@ -266,8 +265,7 @@ int btbcm_patchram(struct hci_dev *hdev, const struct firmware *fw) /* 250 msec delay after Launch Ram completes */ msleep(250); -done: - return err; + return 0; } EXPORT_SYMBOL(btbcm_patchram); From 54f1f020e9f4a087779cc4d96a7c86f47d0c6797 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Tue, 3 Feb 2026 14:25:08 +0800 Subject: [PATCH 1338/1561] Bluetooth: btmtk: improve mt79xx firmware setup retry flow If retries are exhausted, driver should not do futher operation. During mt79xx firmware download process, if the retry count reaches0, driver will return an -EIO error and release the firmware resources. Signed-off-by: Chris Lu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtk.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index fa7533578f85..0ada5a12130d 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -205,6 +205,12 @@ int btmtk_setup_firmware_79xx(struct hci_dev *hdev, const char *fwname, } } + /* If retry exhausted goto err_release_fw */ + if (retry == 0) { + err = -EIO; + goto err_release_fw; + } + fw_ptr += section_offset; wmt_params.op = BTMTK_WMT_PATCH_DWNLD; wmt_params.status = NULL; From b27a306e9f206aaf10f22b0c6338d7b97799c7b7 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Tue, 3 Feb 2026 14:25:09 +0800 Subject: [PATCH 1339/1561] Bluetooth: btmtk: add status check in mt79xx firmware setup To prevent abnormal controller states, it is necessary to check status in another part of the mt79xx firmware setup. During this process, receiving the 'BTMTK_WMT_PATCH_PROGRESS' status is unexpected. If this occurs, it should be treated as an error, and driver must be prevented from continuing execution. Signed-off-by: Chris Lu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtk.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index 0ada5a12130d..5c62b802a199 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -213,7 +213,6 @@ int btmtk_setup_firmware_79xx(struct hci_dev *hdev, const char *fwname, fw_ptr += section_offset; wmt_params.op = BTMTK_WMT_PATCH_DWNLD; - wmt_params.status = NULL; while (dl_size > 0) { dlen = min_t(int, 250, dl_size); @@ -231,7 +230,14 @@ int btmtk_setup_firmware_79xx(struct hci_dev *hdev, const char *fwname, wmt_params.data = fw_ptr; err = wmt_cmd_sync(hdev, &wmt_params); - if (err < 0) { + /* Status BTMTK_WMT_PATCH_PROGRESS indicates firmware is + * in process of being downloaded, which is not expected to + * occur here. + */ + if (status == BTMTK_WMT_PATCH_PROGRESS) { + err = -EIO; + goto err_release_fw; + } else if (err < 0) { bt_dev_err(hdev, "Failed to send wmt patch dwnld (%d)", err); goto err_release_fw; From 679621a767bfa0a2cb77fbd8b664412e9e3f89cf Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Tue, 3 Feb 2026 14:25:10 +0800 Subject: [PATCH 1340/1561] Bluetooth: btmtk: Add reset mechanism if downloading firmware failed Add a new flag 'BTMTK_FIRMWARE_DL_RETRY'. If an error occurs during mt79xx firmware download process, this flag will be set and cleared after a reset. If the flag is already set and firmware still cannot be loaded successfully after a reset, no further reset attempts will be made. In other words, if there is a problem during firmware download, only one reset will be attempted. Signed-off-by: Chris Lu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtk.c | 6 ++++++ drivers/bluetooth/btmtk.h | 1 + 2 files changed, 7 insertions(+) diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index 5c62b802a199..e4cee251852e 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -1344,6 +1344,9 @@ int btmtk_usb_setup(struct hci_dev *hdev) err = btmtk_setup_firmware_79xx(hdev, fw_bin_name, btmtk_usb_hci_wmt_sync); if (err < 0) { + /* retry once if setup firmware error */ + if (!test_and_set_bit(BTMTK_FIRMWARE_DL_RETRY, &btmtk_data->flags)) + btmtk_reset_sync(hdev); bt_dev_err(hdev, "Failed to set up firmware (%d)", err); return err; } @@ -1371,6 +1374,9 @@ int btmtk_usb_setup(struct hci_dev *hdev) hci_set_msft_opcode(hdev, 0xFD30); hci_set_aosp_capable(hdev); + /* Clear BTMTK_FIRMWARE_DL_RETRY if setup successfully */ + test_and_clear_bit(BTMTK_FIRMWARE_DL_RETRY, &btmtk_data->flags); + /* Set up ISO interface after protocol enabled */ if (test_bit(BTMTK_ISOPKT_OVER_INTR, &btmtk_data->flags)) { if (!btmtk_usb_isointf_init(hdev)) diff --git a/drivers/bluetooth/btmtk.h b/drivers/bluetooth/btmtk.h index 5df7c3296624..b9df2b8f0627 100644 --- a/drivers/bluetooth/btmtk.h +++ b/drivers/bluetooth/btmtk.h @@ -147,6 +147,7 @@ enum { BTMTK_HW_RESET_ACTIVE, BTMTK_ISOPKT_OVER_INTR, BTMTK_ISOPKT_RUNNING, + BTMTK_FIRMWARE_DL_RETRY, }; typedef int (*btmtk_reset_sync_func_t)(struct hci_dev *, void *); From f29bc37dfc4ad7570d78f8cd482d4b83c3caffdf Mon Sep 17 00:00:00 2001 From: Vivek Sahu Date: Tue, 10 Feb 2026 17:31:01 +0530 Subject: [PATCH 1341/1561] Bluetooth: qca: Refactor code on the basis of chipset names Whenever new chipset support is added to the driver code, we ended up adding chipset name to the last of the switch case arising code readability issue because of improper sorting of the chipset names in various places of the code. Refactor code such a way that new chipset can be added easily in the code without compromising code readability. Signed-off-by: Vivek Sahu Reviewed-by: Dmitry Baryshkov Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btqca.c | 37 +++++++++++++++++++------------------ drivers/bluetooth/hci_qca.c | 30 +++++++++++++++--------------- 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 3b0626920193..dda76365726f 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -801,6 +801,14 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, snprintf(config.fwname, sizeof(config.fwname), "qca/%s", rampatch_name); } else { switch (soc_type) { + case QCA_QCA2066: + snprintf(config.fwname, sizeof(config.fwname), + "qca/hpbtfw%02x.tlv", rom_ver); + break; + case QCA_QCA6390: + snprintf(config.fwname, sizeof(config.fwname), + "qca/htbtfw%02x.tlv", rom_ver); + break; case QCA_WCN3950: snprintf(config.fwname, sizeof(config.fwname), "qca/cmbtfw%02x.tlv", rom_ver); @@ -815,14 +823,6 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, snprintf(config.fwname, sizeof(config.fwname), "qca/apbtfw%02x.tlv", rom_ver); break; - case QCA_QCA2066: - snprintf(config.fwname, sizeof(config.fwname), - "qca/hpbtfw%02x.tlv", rom_ver); - break; - case QCA_QCA6390: - snprintf(config.fwname, sizeof(config.fwname), - "qca/htbtfw%02x.tlv", rom_ver); - break; case QCA_WCN6750: /* Choose mbn file by default.If mbn file is not found * then choose tlv file @@ -892,6 +892,16 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, } } else { switch (soc_type) { + case QCA_QCA2066: + qca_get_nvm_name_by_board(config.fwname, + sizeof(config.fwname), + "hpnv", soc_type, ver, + rom_ver, boardid); + break; + case QCA_QCA6390: + snprintf(config.fwname, sizeof(config.fwname), + "qca/htnv%02x.bin", rom_ver); + break; case QCA_WCN3950: if (le32_to_cpu(ver.soc_id) == QCA_WCN3950_SOC_ID_T) variant = "t"; @@ -914,15 +924,6 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, snprintf(config.fwname, sizeof(config.fwname), "qca/apnv%02x.bin", rom_ver); break; - case QCA_QCA2066: - qca_get_nvm_name_by_board(config.fwname, - sizeof(config.fwname), "hpnv", soc_type, ver, - rom_ver, boardid); - break; - case QCA_QCA6390: - snprintf(config.fwname, sizeof(config.fwname), - "qca/htnv%02x.bin", rom_ver); - break; case QCA_WCN6750: snprintf(config.fwname, sizeof(config.fwname), "qca/msnv%02x.bin", rom_ver); @@ -956,9 +957,9 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, } switch (soc_type) { - case QCA_WCN3991: case QCA_QCA2066: case QCA_QCA6390: + case QCA_WCN3991: case QCA_WCN6750: case QCA_WCN6855: case QCA_WCN7850: diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index bb9f002aa85e..1086909691c6 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1850,6 +1850,7 @@ static int qca_power_on(struct hci_dev *hdev) return 0; switch (soc_type) { + case QCA_QCA6390: case QCA_WCN3950: case QCA_WCN3988: case QCA_WCN3990: @@ -1858,7 +1859,6 @@ static int qca_power_on(struct hci_dev *hdev) case QCA_WCN6750: case QCA_WCN6855: case QCA_WCN7850: - case QCA_QCA6390: ret = qca_regulator_init(hu); break; @@ -2096,6 +2096,18 @@ static const struct hci_uart_proto qca_proto = { .dequeue = qca_dequeue, }; +static const struct qca_device_data qca_soc_data_qca2066 __maybe_unused = { + .soc_type = QCA_QCA2066, + .num_vregs = 0, + .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES | + QCA_CAP_HFP_HW_OFFLOAD, +}; + +static const struct qca_device_data qca_soc_data_qca6390 __maybe_unused = { + .soc_type = QCA_QCA6390, + .num_vregs = 0, +}; + static const struct qca_device_data qca_soc_data_wcn3950 __maybe_unused = { .soc_type = QCA_WCN3950, .vregs = (struct qca_vreg []) { @@ -2152,18 +2164,6 @@ static const struct qca_device_data qca_soc_data_wcn3998 __maybe_unused = { .num_vregs = 4, }; -static const struct qca_device_data qca_soc_data_qca2066 __maybe_unused = { - .soc_type = QCA_QCA2066, - .num_vregs = 0, - .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES | - QCA_CAP_HFP_HW_OFFLOAD, -}; - -static const struct qca_device_data qca_soc_data_qca6390 __maybe_unused = { - .soc_type = QCA_QCA6390, - .num_vregs = 0, -}; - static const struct qca_device_data qca_soc_data_wcn6750 __maybe_unused = { .soc_type = QCA_WCN6750, .vregs = (struct qca_vreg []) { @@ -2402,6 +2402,7 @@ static int qca_serdev_probe(struct serdev_device *serdev) qcadev->btsoc_type = QCA_ROME; switch (qcadev->btsoc_type) { + case QCA_QCA6390: case QCA_WCN3950: case QCA_WCN3988: case QCA_WCN3990: @@ -2410,7 +2411,6 @@ static int qca_serdev_probe(struct serdev_device *serdev) case QCA_WCN6750: case QCA_WCN6855: case QCA_WCN7850: - case QCA_QCA6390: qcadev->bt_power = devm_kzalloc(&serdev->dev, sizeof(struct qca_power), GFP_KERNEL); @@ -2422,9 +2422,9 @@ static int qca_serdev_probe(struct serdev_device *serdev) } switch (qcadev->btsoc_type) { + case QCA_WCN6750: case QCA_WCN6855: case QCA_WCN7850: - case QCA_WCN6750: if (!device_property_present(&serdev->dev, "enable-gpios")) { /* * Backward compatibility with old DT sources. If the From 04c217a7fc8f23a1c99b014cb6a89cf77ac7a012 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 9 Feb 2026 18:11:48 +0100 Subject: [PATCH 1342/1561] Bluetooth: btbcm: Add entry for BCM4343A2 UART Bluetooth This patch adds the device ID for the BCM4343A2 module, found e.g. in the muRata 1YN WiFi+BT combined device. The required firmware file is named 'BCM4343A2.hcd'. Signed-off-by: Marek Vasut Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btbcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index dccfbeee4721..463d59890bef 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -505,6 +505,7 @@ static const struct bcm_subver_table bcm_uart_subver_table[] = { { 0x6119, "BCM4345C0" }, /* 003.001.025 */ { 0x6606, "BCM4345C5" }, /* 003.006.006 */ { 0x230f, "BCM4356A2" }, /* 001.003.015 */ + { 0x2310, "BCM4343A2" }, /* 001.003.016 */ { 0x220e, "BCM20702A1" }, /* 001.002.014 */ { 0x420d, "BCM4349B1" }, /* 002.002.013 */ { 0x420e, "BCM4349B1" }, /* 002.002.014 */ From 7e2e1e5859359c62b4b8f8780be2c8dea1708529 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 12 Feb 2026 15:17:20 +0100 Subject: [PATCH 1343/1561] Bluetooth: hci_qca: Fix confusing shutdown() and power_off() naming The function called qca_power_off() is actually the hci_dev shutdown handler, rename it to qca_hci_shutdown() to make this clear. While the qca_power_shutdown() function is actually the counter-part of qca_power_on() rename it to qca_power_off() to make this clear. Signed-off-by: Hans de Goede Reviewed-by: Paul Menzel Reviewed-by: Bartosz Golaszewski Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 1086909691c6..e66d1736acf6 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -236,8 +236,7 @@ struct qca_serdev { static int qca_regulator_enable(struct qca_serdev *qcadev); static void qca_regulator_disable(struct qca_serdev *qcadev); -static void qca_power_shutdown(struct hci_uart *hu); -static int qca_power_off(struct hci_dev *hdev); +static void qca_power_off(struct hci_uart *hu); static void qca_controller_memdump(struct work_struct *work); static void qca_dmp_hdr(struct hci_dev *hdev, struct sk_buff *skb); @@ -2047,7 +2046,7 @@ retry: out: if (ret) { - qca_power_shutdown(hu); + qca_power_off(hu); if (retries < MAX_INIT_RETRIES) { bt_dev_warn(hdev, "Retry BT power ON:%d", retries); @@ -2211,7 +2210,7 @@ static const struct qca_device_data qca_soc_data_wcn7850 __maybe_unused = { QCA_CAP_HFP_HW_OFFLOAD, }; -static void qca_power_shutdown(struct hci_uart *hu) +static void qca_power_off(struct hci_uart *hu) { struct qca_serdev *qcadev; struct qca_data *qca = hu->priv; @@ -2272,7 +2271,7 @@ static void qca_power_shutdown(struct hci_uart *hu) set_bit(QCA_BT_OFF, &qca->flags); } -static int qca_power_off(struct hci_dev *hdev) +static int qca_hci_shutdown(struct hci_dev *hdev) { struct hci_uart *hu = hci_get_drvdata(hdev); struct qca_data *qca = hu->priv; @@ -2291,7 +2290,7 @@ static int qca_power_off(struct hci_dev *hdev) usleep_range(8000, 10000); } - qca_power_shutdown(hu); + qca_power_off(hu); return 0; } @@ -2530,7 +2529,7 @@ static int qca_serdev_probe(struct serdev_device *serdev) if (power_ctrl_enabled) { hci_set_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP); - hdev->shutdown = qca_power_off; + hdev->shutdown = qca_hci_shutdown; } if (data) { @@ -2565,7 +2564,7 @@ static void qca_serdev_remove(struct serdev_device *serdev) case QCA_WCN6855: case QCA_WCN7850: if (power->vregs_on) - qca_power_shutdown(&qcadev->serdev_hu); + qca_power_off(&qcadev->serdev_hu); break; default: break; From 9ff5ff0b9175c37f1fc2ad4e94fa23c7bbc9504f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 12 Feb 2026 15:17:21 +0100 Subject: [PATCH 1344/1561] Bluetooth: hci_qca: Fix BT not getting powered-off on rmmod The BT core skips calling the hci_dev's shutdown method when the HCI is unregistered. This means that qca_power_off() was not getting called leaving BT powered on. This causes regulators / pwrseq providers to not get disabled which also causes problem when re-loading the module because regulators and pwrseq providers have an enablecount which now has never dropped to 0, causing the BT to not get properly reset between rmmod and re-load which causes initialization failure on the re-load. Fix this by calling qca_power_off() from qca_close() when BT has not already been powered off through a qca_hci_shutdown() call. hci_ldisc.c will call qca_close() after freeing the hdev, so this means that qca_power_off() can now no longer deref hu->hdev, change the logging in qca_power_off() to no longer use hu->hdev. Signed-off-by: Hans de Goede Reviewed-by: Bartosz Golaszewski Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index e66d1736acf6..d6e78201a675 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -722,6 +722,10 @@ static int qca_close(struct hci_uart *hu) BT_DBG("hu %p qca close", hu); + /* BT core skips qca_hci_shutdown() which calls qca_power_off() on rmmod */ + if (!test_bit(QCA_BT_OFF, &qca->flags)) + qca_power_off(hu); + serial_clock_vote(HCI_IBS_VOTE_STATS_UPDATE, hu); skb_queue_purge(&qca->tx_wait_q); @@ -2260,7 +2264,7 @@ static void qca_power_off(struct hci_uart *hu) qca_regulator_disable(qcadev); if (qcadev->sw_ctrl) { sw_ctrl_state = gpiod_get_value_cansleep(qcadev->sw_ctrl); - bt_dev_dbg(hu->hdev, "SW_CTRL is %d", sw_ctrl_state); + BT_DBG("SW_CTRL is %d", sw_ctrl_state); } break; From c0ad33d273ef1097b28421c3e8d19f52553f26e8 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Fri, 20 Feb 2026 09:00:59 +0100 Subject: [PATCH 1345/1561] Bluetooth: btintel_pcie: Replace snprintf("%s") with strscpy Replace snprintf("%s", ...) with the faster and more direct strscpy(). Signed-off-by: Thorsten Blum Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 37b744e35bc4..eda243988cfd 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -268,7 +269,7 @@ static inline void btintel_pcie_dump_debug_registers(struct hci_dev *hdev) if (!skb) return; - snprintf(buf, sizeof(buf), "%s", "---- Dump of debug registers ---"); + strscpy(buf, "---- Dump of debug registers ---"); bt_dev_dbg(hdev, "%s", buf); skb_put_data(skb, buf, strlen(buf)); From 67377cd38b89ce782ccdb83bda3f65a2def843cd Mon Sep 17 00:00:00 2001 From: Dylan Eray Date: Thu, 19 Feb 2026 20:32:09 +0100 Subject: [PATCH 1346/1561] Bluetooth: btusb: Add Lite-On 04ca:3807 for MediaTek MT7921 Add USB device ID (04ca:3807) for a Lite-On Wireless_Device containing a MediaTek MT7921 (MT7920) Bluetooth chipset found in Acer laptops. Without this entry, btusb binds via the generic USB class-based wildcard match but never sets the BTUSB_MEDIATEK flag. This means btmtk never triggers firmware loading, and the driver sends a raw HCI Reset that the uninitialized chip cannot respond to, resulting in: Bluetooth: hci0: Opcode 0x0c03 failed: -110 The information in /sys/kernel/debug/usb/devices about the Bluetooth device is listed as the below: T: Bus=03 Lev=01 Prnt=01 Port=09 Cnt=01 Dev#=5 Spd=480 MxCh=0 P: Vendor=04ca ProdID=3807 Rev=1.00 S: Manufacturer=MediaTek Inc. S: Product=Wireless_Device S: SerialNumber=000000000 C: #Ifs= 3 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 2 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=(none) Reviewed-by: Paul Menzel Signed-off-by: Dylan Eray Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index aeba026bdb42..d07db8e3a79d 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -707,6 +707,8 @@ static const struct usb_device_id quirks_table[] = { BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x04ca, 0x3804), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x04ca, 0x3807), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x04ca, 0x38e4), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x13d3, 0x3568), .driver_info = BTUSB_MEDIATEK | From 9a9d21f780805fd6f950e930eeae437748d72793 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 24 Feb 2026 00:33:42 +0100 Subject: [PATCH 1347/1561] Bluetooth: btintel_pcie: Use struct_size to improve hci_drv_read_info Use struct_size(), which provides additional compile-time checks for structures with flexible array members (e.g., __must_be_array()), to determine the allocation size for a new 'struct hci_drv_rp_read_info'. Signed-off-by: Thorsten Blum Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index eda243988cfd..473b2115e637 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -2373,7 +2374,7 @@ static int btintel_pcie_hci_drv_read_info(struct hci_dev *hdev, void *data, u16 opcode, num_supported_commands = ARRAY_SIZE(btintel_pcie_hci_drv_supported_commands); - rp_size = sizeof(*rp) + num_supported_commands * 2; + rp_size = struct_size(rp, supported_commands, num_supported_commands); rp = kmalloc(rp_size, GFP_KERNEL); if (!rp) From cde32a92d4562b686f730fc08d4d558ecc99d516 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Tue, 24 Feb 2026 00:13:18 -0600 Subject: [PATCH 1348/1561] mmc: sdio: add MediaTek MT7902 SDIO device ID Add SDIO device ID (0x790a) for MediaTek MT7902 to sdio_ids.h. Acked-by: Ulf Hansson Signed-off-by: Sean Wang Signed-off-by: Luiz Augusto von Dentz --- include/linux/mmc/sdio_ids.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 673cbdf43453..dce89c110691 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -111,6 +111,7 @@ #define SDIO_VENDOR_ID_MEDIATEK 0x037a #define SDIO_DEVICE_ID_MEDIATEK_MT7663 0x7663 #define SDIO_DEVICE_ID_MEDIATEK_MT7668 0x7668 +#define SDIO_DEVICE_ID_MEDIATEK_MT7902 0x790a #define SDIO_DEVICE_ID_MEDIATEK_MT7961 0x7961 #define SDIO_VENDOR_ID_MICROCHIP_WILC 0x0296 From aab25984e55972e53f3e58821cb85a7101876056 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Tue, 24 Feb 2026 00:13:19 -0600 Subject: [PATCH 1349/1561] Bluetooth: btmtk: add MT7902 MCU support Add MT7902 device ID and firmware filename to enable MCU firmware loading. Signed-off-by: Sean Wang Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtk.c | 1 + drivers/bluetooth/btmtk.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index e4cee251852e..55516b4602db 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -1338,6 +1338,7 @@ int btmtk_usb_setup(struct hci_dev *hdev) case 0x7922: case 0x7925: case 0x7961: + case 0x7902: btmtk_fw_get_filename(fw_bin_name, sizeof(fw_bin_name), dev_id, fw_version, fw_flavor); diff --git a/drivers/bluetooth/btmtk.h b/drivers/bluetooth/btmtk.h index b9df2b8f0627..adaf385626ee 100644 --- a/drivers/bluetooth/btmtk.h +++ b/drivers/bluetooth/btmtk.h @@ -5,6 +5,7 @@ #define FIRMWARE_MT7663 "mediatek/mt7663pr2h.bin" #define FIRMWARE_MT7668 "mediatek/mt7668pr2h.bin" #define FIRMWARE_MT7922 "mediatek/BT_RAM_CODE_MT7922_1_1_hdr.bin" +#define FIRMWARE_MT7902 "mediatek/BT_RAM_CODE_MT7902_1_1_hdr.bin" #define FIRMWARE_MT7961 "mediatek/BT_RAM_CODE_MT7961_1_2_hdr.bin" #define FIRMWARE_MT7925 "mediatek/mt7925/BT_RAM_CODE_MT7925_1_1_hdr.bin" From 51c4173b89fe7399bad1381016096cc154588660 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Tue, 24 Feb 2026 00:13:20 -0600 Subject: [PATCH 1350/1561] Bluetooth: btusb: Add new VID/PID 13d3/3579 for MT7902 Add VID 13d3 & PID 3579 for MediaTek MT7902 USB Bluetooth chip. The information in /sys/kernel/debug/usb/devices about the Bluetooth device is listed as the below. T: Bus=01 Lev=01 Prnt=01 Port=09 Cnt=04 Dev#= 7 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=13d3 ProdID=3579 Rev= 1.00 S: Manufacturer=MediaTek Inc. S: Product=Wireless_Device S: SerialNumber=000000000 C:* #Ifs= 3 Cfg#= 1 Atr=e0 MxPwr=100mA A: FirstIf#= 0 IfCount= 3 Cls=e0(wlcon) Sub=01 Prot=01 I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=125us E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms I: If#= 1 Alt= 6 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 63 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 63 Ivl=1ms I: If#= 2 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=8a(I) Atr=03(Int.) MxPS= 64 Ivl=125us E: Ad=0a(O) Atr=03(Int.) MxPS= 64 Ivl=125us I:* If#= 2 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=8a(I) Atr=03(Int.) MxPS= 512 Ivl=125us E: Ad=0a(O) Atr=03(Int.) MxPS= 512 Ivl=125us Signed-off-by: Sean Wang Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index d07db8e3a79d..1acc07d0dc06 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -671,7 +671,9 @@ static const struct usb_device_id quirks_table[] = { BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x13d3, 0x3606), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, - + /* MediaTek MT7902 Bluetooth devices */ + { USB_DEVICE(0x13d3, 0x3579), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH }, /* MediaTek MT7922 Bluetooth devices */ { USB_DEVICE(0x13d3, 0x3585), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, From cdb7f671bca136c6f4e2deb6163c4f5d482f5eae Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Tue, 24 Feb 2026 00:13:21 -0600 Subject: [PATCH 1351/1561] Bluetooth: btusb: Add new VID/PID 13d3/3580 for MT7902 Add VID 13d3 & PID 3580 for MediaTek MT7902 USB Bluetooth chip. The information in /sys/kernel/debug/usb/devices about the Bluetooth device is listed as the below. T: Bus=03 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=13d3 ProdID=3580 Rev= 1.00 S: Manufacturer=MediaTek Inc. S: Product=Wireless_Device S: SerialNumber=000000000 C:* #Ifs= 3 Cfg#= 1 Atr=e0 MxPwr=100mA A: FirstIf#= 0 IfCount= 3 Cls=e0(wlcon) Sub=01 Prot=01 I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=125us E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms I: If#= 1 Alt= 6 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 63 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 63 Ivl=1ms I: If#= 2 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=8a(I) Atr=03(Int.) MxPS= 64 Ivl=125us E: Ad=0a(O) Atr=03(Int.) MxPS= 64 Ivl=125us I:* If#= 2 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=8a(I) Atr=03(Int.) MxPS= 512 Ivl=125us E: Ad=0a(O) Atr=03(Int.) MxPS= 512 Ivl=125us Co-developed-by: Kush Kulshrestha Signed-off-by: Kush Kulshrestha Signed-off-by: Sean Wang Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 1acc07d0dc06..dbcef629dea4 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -674,6 +674,8 @@ static const struct usb_device_id quirks_table[] = { /* MediaTek MT7902 Bluetooth devices */ { USB_DEVICE(0x13d3, 0x3579), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x13d3, 0x3580), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH }, /* MediaTek MT7922 Bluetooth devices */ { USB_DEVICE(0x13d3, 0x3585), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, From 7968f6d34e0551a166e293a8d35cef605afde3ec Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Tue, 24 Feb 2026 00:13:22 -0600 Subject: [PATCH 1352/1561] Bluetooth: btusb: Add new VID/PID 13d3/3594 for MT7902 Add VID 13d3 & PID 3594 for MediaTek MT7902 USB Bluetooth chip. The information in /sys/kernel/debug/usb/devices about the Bluetooth device is listed as the below. T: Bus=03 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=13d3 ProdID=3594 Rev= 1.00 S: Manufacturer=MediaTek Inc. S: Product=Wireless_Device S: SerialNumber=000000000 C:* #Ifs= 3 Cfg#= 1 Atr=e0 MxPwr=100mA A: FirstIf#= 0 IfCount= 3 Cls=e0(wlcon) Sub=01 Prot=01 I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=125us E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms I: If#= 1 Alt= 6 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 63 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 63 Ivl=1ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=(none) E: Ad=8a(I) Atr=03(Int.) MxPS= 64 Ivl=125us E: Ad=0a(O) Atr=03(Int.) MxPS= 64 Ivl=125us I: If#= 2 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=(none) E: Ad=8a(I) Atr=03(Int.) MxPS= 512 Ivl=125us E: Ad=0a(O) Atr=03(Int.) MxPS= 512 Ivl=125us Co-developed-by: Kush Kulshrestha Signed-off-by: Kush Kulshrestha Signed-off-by: Sean Wang Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index dbcef629dea4..f55da2d61e35 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -676,6 +676,8 @@ static const struct usb_device_id quirks_table[] = { BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x13d3, 0x3580), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x13d3, 0x3594), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH }, /* MediaTek MT7922 Bluetooth devices */ { USB_DEVICE(0x13d3, 0x3585), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, From 6724fb35cdd6186bd68f8de84210fd40099fff86 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Tue, 24 Feb 2026 00:13:23 -0600 Subject: [PATCH 1353/1561] Bluetooth: btusb: Add new VID/PID 13d3/3596 for MT7902 Add VID 13d3 & PID 3596 for MediaTek MT7902 USB Bluetooth chip. The information in /sys/kernel/debug/usb/devices about the Bluetooth device is listed as the below. T: Bus=07 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=13d3 ProdID=3596 Rev= 1.00 S: Manufacturer=MediaTek Inc. S: Product=Wireless_Device S: SerialNumber=000000000 C:* #Ifs= 3 Cfg#= 1 Atr=e0 MxPwr=100mA A: FirstIf#= 0 IfCount= 3 Cls=e0(wlcon) Sub=01 Prot=01 I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=125us E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms I: If#= 1 Alt= 6 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 63 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 63 Ivl=1ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=(none) E: Ad=8a(I) Atr=03(Int.) MxPS= 64 Ivl=125us E: Ad=0a(O) Atr=03(Int.) MxPS= 64 Ivl=125us I: If#= 2 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=(none) E: Ad=8a(I) Atr=03(Int.) MxPS= 512 Ivl=125us E: Ad=0a(O) Atr=03(Int.) MxPS= 512 Ivl=125us Co-developed-by: Kush Kulshrestha Signed-off-by: Kush Kulshrestha Signed-off-by: Sean Wang Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index f55da2d61e35..e32ab3ea2a59 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -678,6 +678,8 @@ static const struct usb_device_id quirks_table[] = { BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x13d3, 0x3594), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x13d3, 0x3596), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH }, /* MediaTek MT7922 Bluetooth devices */ { USB_DEVICE(0x13d3, 0x3585), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, From 22fd19bf249a4fc80ec57ebe09c924469f53222b Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Tue, 24 Feb 2026 00:13:24 -0600 Subject: [PATCH 1354/1561] Bluetooth: btusb: Add new VID/PID 0e8d/1ede for MT7902 Add VID 0e8d & PID 1ede for MediaTek MT7902 USB Bluetooth chip. The information in /sys/kernel/debug/usb/devices about the Bluetooth device is listed as the below. T: Bus=01 Lev=01 Prnt=01 Port=05 Cnt=02 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0e8d ProdID=1ede Rev= 1.00 S: Manufacturer=MediaTek Inc. S: Product=Wireless_Device S: SerialNumber=000000000 C:* #Ifs= 3 Cfg#= 1 Atr=e0 MxPwr=100mA A: FirstIf#= 0 IfCount= 3 Cls=e0(wlcon) Sub=01 Prot=01 I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=125us E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms I: If#= 1 Alt= 6 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 63 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 63 Ivl=1ms I: If#= 2 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=8a(I) Atr=03(Int.) MxPS= 64 Ivl=125us E: Ad=0a(O) Atr=03(Int.) MxPS= 64 Ivl=125us I:* If#= 2 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=8a(I) Atr=03(Int.) MxPS= 512 Ivl=125us E: Ad=0a(O) Atr=03(Int.) MxPS= 512 Ivl=125us Co-developed-by: Bitterblue Smith Signed-off-by: Bitterblue Smith Signed-off-by: Sean Wang Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index e32ab3ea2a59..71f4af23b556 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -672,6 +672,8 @@ static const struct usb_device_id quirks_table[] = { { USB_DEVICE(0x13d3, 0x3606), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, /* MediaTek MT7902 Bluetooth devices */ + { USB_DEVICE(0x0e8d, 0x1ede), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x13d3, 0x3579), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x13d3, 0x3580), .driver_info = BTUSB_MEDIATEK | From 7f2c3c49ba0d3fead557a8026a021ebe23f919d6 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Tue, 24 Feb 2026 00:13:25 -0600 Subject: [PATCH 1355/1561] Bluetooth: btmtk: add MT7902 SDIO support Add MT7902 Bluetooth SDIO support by introducing chip data and registering the device ID. Runtime PM is not yet supported by the driver, but normal operation is unaffected. Signed-off-by: Sean Wang Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtksdio.c | 42 ++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index e986e5af51ae..042064464d34 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -42,24 +42,35 @@ struct btmtksdio_data { const char *fwname; u16 chipid; bool lp_mbox_supported; + bool pm_runtime_supported; }; static const struct btmtksdio_data mt7663_data = { .fwname = FIRMWARE_MT7663, .chipid = 0x7663, .lp_mbox_supported = false, + .pm_runtime_supported = true, }; static const struct btmtksdio_data mt7668_data = { .fwname = FIRMWARE_MT7668, .chipid = 0x7668, .lp_mbox_supported = false, + .pm_runtime_supported = true, }; static const struct btmtksdio_data mt7921_data = { .fwname = FIRMWARE_MT7961, .chipid = 0x7921, .lp_mbox_supported = true, + .pm_runtime_supported = true, +}; + +static const struct btmtksdio_data mt7902_data = { + .fwname = FIRMWARE_MT7902, + .chipid = 0x7902, + .lp_mbox_supported = false, + .pm_runtime_supported = false, }; static const struct sdio_device_id btmtksdio_table[] = { @@ -69,6 +80,8 @@ static const struct sdio_device_id btmtksdio_table[] = { .driver_data = (kernel_ulong_t)&mt7668_data }, {SDIO_DEVICE(SDIO_VENDOR_ID_MEDIATEK, SDIO_DEVICE_ID_MEDIATEK_MT7961), .driver_data = (kernel_ulong_t)&mt7921_data }, + {SDIO_DEVICE(SDIO_VENDOR_ID_MEDIATEK, SDIO_DEVICE_ID_MEDIATEK_MT7902), + .driver_data = (kernel_ulong_t)&mt7902_data }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(sdio, btmtksdio_table); @@ -1090,6 +1103,7 @@ static int btmtksdio_setup(struct hci_dev *hdev) set_bit(BTMTKSDIO_HW_TX_READY, &bdev->tx_state); switch (bdev->data->chipid) { + case 0x7902: case 0x7921: if (test_bit(BTMTKSDIO_HW_RESET_ACTIVE, &bdev->tx_state)) { err = btmtksdio_mtk_reg_read(hdev, MT7921_DLSTATUS, @@ -1167,22 +1181,24 @@ static int btmtksdio_setup(struct hci_dev *hdev) delta = ktime_sub(rettime, calltime); duration = (unsigned long long)ktime_to_ns(delta) >> 10; - pm_runtime_set_autosuspend_delay(bdev->dev, - MTKBTSDIO_AUTOSUSPEND_DELAY); - pm_runtime_use_autosuspend(bdev->dev); + if (bdev->data->pm_runtime_supported) { + pm_runtime_set_autosuspend_delay(bdev->dev, + MTKBTSDIO_AUTOSUSPEND_DELAY); + pm_runtime_use_autosuspend(bdev->dev); - err = pm_runtime_set_active(bdev->dev); - if (err < 0) - return err; + err = pm_runtime_set_active(bdev->dev); + if (err < 0) + return err; - /* Default forbid runtime auto suspend, that can be allowed by - * enable_autosuspend flag or the PM runtime entry under sysfs. - */ - pm_runtime_forbid(bdev->dev); - pm_runtime_enable(bdev->dev); + /* Default forbid runtime auto suspend, that can be allowed by + * enable_autosuspend flag or the PM runtime entry under sysfs. + */ + pm_runtime_forbid(bdev->dev); + pm_runtime_enable(bdev->dev); - if (enable_autosuspend) - pm_runtime_allow(bdev->dev); + if (enable_autosuspend) + pm_runtime_allow(bdev->dev); + } bt_dev_info(hdev, "Device setup in %llu usecs", duration); From 728a3d128325bad286b1e4f191026e8de8d12a85 Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Wed, 25 Feb 2026 18:07:26 +0100 Subject: [PATCH 1356/1561] Bluetooth: L2CAP: CoC: Disconnect if received packet size exceeds MPS Core 6.0, Vol 3, Part A, 3.4.3: "... If the payload size of any K-frame exceeds the receiver's MPS, the receiver shall disconnect the channel..." This fixes L2CAP/LE/CFC/BV-27-C (running together with 'l2test -r -P 0x0027 -V le_public -I 100'). Signed-off-by: Christian Eggers Signed-off-by: Luiz Augusto von Dentz Tested-by: Christian Eggers --- net/bluetooth/l2cap_core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 95c65fece39b..9916ae6abef0 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6705,6 +6705,13 @@ static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) return -ENOBUFS; } + if (skb->len > chan->mps) { + BT_ERR("Too big LE L2CAP MPS: len %u > %u", skb->len, + chan->mps); + l2cap_send_disconn_req(chan, ECONNRESET); + return -ENOBUFS; + } + chan->rx_credits--; BT_DBG("chan %p: rx_credits %u -> %u", chan, chan->rx_credits + 1, chan->rx_credits); From 21b51648f3f48d3623d3a1c434356c96e8fc15c1 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Sat, 28 Feb 2026 14:42:31 +0530 Subject: [PATCH 1357/1561] Bluetooth: btintel: Add support for hybrid signature for ScP2 onwards If FW image has hybrid signature (ECDSA and LMS) then send CSS header, ECDSA public key, ECDSA signature, LMS public key, LMS signature and command buffer to device. Signed-off-by: Kiran K Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel.c | 80 +++++++++++++++++++++++++++++++++---- 1 file changed, 72 insertions(+), 8 deletions(-) diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index ab146894ba4e..39e16dcca0ce 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -35,6 +35,19 @@ enum { DSM_SET_RESET_METHOD = 3, }; +/* Hybrid ECDSA + LMS */ +#define BTINTEL_RSA_HEADER_VER 0x00010000 +#define BTINTEL_ECDSA_HEADER_VER 0x00020000 +#define BTINTEL_HYBRID_HEADER_VER 0x00069700 +#define BTINTEL_ECDSA_OFFSET 128 +#define BTINTEL_CSS_HEADER_SIZE 128 +#define BTINTEL_ECDSA_PUB_KEY_SIZE 96 +#define BTINTEL_ECDSA_SIG_SIZE 96 +#define BTINTEL_LMS_OFFSET 320 +#define BTINTEL_LMS_PUB_KEY_SIZE 52 +#define BTINTEL_LMS_SIG_SIZE 1744 +#define BTINTEL_CMD_BUFFER_OFFSET 2116 + #define BTINTEL_BT_DOMAIN 0x12 #define BTINTEL_SAR_LEGACY 0 #define BTINTEL_SAR_INC_PWR 1 @@ -510,8 +523,8 @@ int btintel_version_info_tlv(struct hci_dev *hdev, return -EINVAL; } - /* Secure boot engine type should be either 1 (ECDSA) or 0 (RSA) */ - if (version->sbe_type > 0x01) { + /* Secure boot engine type can be 0 (RSA), 1 (ECDSA), 2 (LMS), 3 (ECDSA + LMS) */ + if (version->sbe_type > 0x03) { bt_dev_err(hdev, "Unsupported Intel secure boot engine type (0x%x)", version->sbe_type); return -EINVAL; @@ -1030,6 +1043,48 @@ static int btintel_sfi_ecdsa_header_secure_send(struct hci_dev *hdev, return 0; } +static int btintel_sfi_hybrid_header_secure_send(struct hci_dev *hdev, + const struct firmware *fw) +{ + int err; + + err = btintel_secure_send(hdev, 0x00, BTINTEL_CSS_HEADER_SIZE, fw->data); + if (err < 0) { + bt_dev_err(hdev, "Failed to send firmware CSS header (%d)", err); + return err; + } + + err = btintel_secure_send(hdev, 0x03, BTINTEL_ECDSA_PUB_KEY_SIZE, + fw->data + BTINTEL_ECDSA_OFFSET); + if (err < 0) { + bt_dev_err(hdev, "Failed to send firmware ECDSA pkey (%d)", err); + return err; + } + + err = btintel_secure_send(hdev, 0x02, BTINTEL_ECDSA_SIG_SIZE, + fw->data + BTINTEL_ECDSA_OFFSET + BTINTEL_ECDSA_PUB_KEY_SIZE); + if (err < 0) { + bt_dev_err(hdev, "Failed to send firmware ECDSA signature (%d)", err); + return err; + } + + err = btintel_secure_send(hdev, 0x05, BTINTEL_LMS_PUB_KEY_SIZE, + fw->data + BTINTEL_LMS_OFFSET); + if (err < 0) { + bt_dev_err(hdev, "Failed to send firmware LMS pkey (%d)", err); + return err; + } + + err = btintel_secure_send(hdev, 0x04, BTINTEL_LMS_SIG_SIZE, + fw->data + BTINTEL_LMS_OFFSET + BTINTEL_LMS_PUB_KEY_SIZE); + if (err < 0) { + bt_dev_err(hdev, "Failed to send firmware LMS signature (%d)", err); + return err; + } + + return 0; +} + static int btintel_download_firmware_payload(struct hci_dev *hdev, const struct firmware *fw, size_t offset) @@ -1203,11 +1258,12 @@ static int btintel_download_fw_tlv(struct hci_dev *hdev, * Command Buffer. * * CSS Header byte positions 0x08 to 0x0B represent the CSS Header - * version: RSA(0x00010000) , ECDSA (0x00020000) + * version: RSA(0x00010000) , ECDSA (0x00020000) , HYBRID (0x00069700) */ css_header_ver = get_unaligned_le32(fw->data + CSS_HEADER_OFFSET); - if (css_header_ver != 0x00010000) { - bt_dev_err(hdev, "Invalid CSS Header version"); + if (css_header_ver != BTINTEL_RSA_HEADER_VER && + css_header_ver != BTINTEL_HYBRID_HEADER_VER) { + bt_dev_err(hdev, "Invalid CSS Header version: 0x%8.8x", css_header_ver); return -EINVAL; } @@ -1225,15 +1281,15 @@ static int btintel_download_fw_tlv(struct hci_dev *hdev, err = btintel_download_firmware_payload(hdev, fw, RSA_HEADER_LEN); if (err) return err; - } else if (hw_variant >= 0x17) { + } else if (hw_variant >= 0x17 && css_header_ver == BTINTEL_RSA_HEADER_VER) { /* Check if CSS header for ECDSA follows the RSA header */ if (fw->data[ECDSA_OFFSET] != 0x06) return -EINVAL; /* Check if the CSS Header version is ECDSA(0x00020000) */ css_header_ver = get_unaligned_le32(fw->data + ECDSA_OFFSET + CSS_HEADER_OFFSET); - if (css_header_ver != 0x00020000) { - bt_dev_err(hdev, "Invalid CSS Header version"); + if (css_header_ver != BTINTEL_ECDSA_HEADER_VER) { + bt_dev_err(hdev, "Invalid CSS Header version: 0x%8.8x", css_header_ver); return -EINVAL; } @@ -1256,6 +1312,14 @@ static int btintel_download_fw_tlv(struct hci_dev *hdev, if (err) return err; } + } else if (hw_variant >= 0x20 && css_header_ver == BTINTEL_HYBRID_HEADER_VER) { + err = btintel_sfi_hybrid_header_secure_send(hdev, fw); + if (err) + return err; + + err = btintel_download_firmware_payload(hdev, fw, BTINTEL_CMD_BUFFER_OFFSET); + if (err) + return err; } return 0; } From c239ad6e531e00b474c8d30afaf320993bb53023 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Sat, 28 Feb 2026 14:42:32 +0530 Subject: [PATCH 1358/1561] Bluetooth: btintel: Replace CNVi id with hardware variant Use hardware variant instead of CNVi to send dsbr command. Signed-off-by: Kiran K Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel.c | 18 +++++++++--------- drivers/bluetooth/btintel.h | 7 +++++++ 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 39e16dcca0ce..7313bd101588 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -2818,31 +2818,31 @@ static int btintel_set_dsbr(struct hci_dev *hdev, struct intel_version_tlv *ver) struct btintel_dsbr_cmd cmd; struct sk_buff *skb; - u32 dsbr, cnvi; - u8 status; + u32 dsbr; + u8 status, hw_variant; int err; - cnvi = ver->cnvi_top & 0xfff; + hw_variant = INTEL_HW_VARIANT(ver->cnvi_bt); /* DSBR command needs to be sent for, * 1. BlazarI or BlazarIW + B0 step product in IML image. * 2. Gale Peak2 or BlazarU in OP image. * 3. Scorpious Peak in IML image. */ - switch (cnvi) { - case BTINTEL_CNVI_BLAZARI: - case BTINTEL_CNVI_BLAZARIW: + switch (hw_variant) { + case BTINTEL_HWID_BZRI: + case BTINTEL_HWID_BZRIW: if (ver->img_type == BTINTEL_IMG_IML && INTEL_CNVX_TOP_STEP(ver->cnvi_top) == 0x01) break; return 0; - case BTINTEL_CNVI_GAP: - case BTINTEL_CNVI_BLAZARU: + case BTINTEL_HWID_GAP: + case BTINTEL_HWID_BZRU: if (ver->img_type == BTINTEL_IMG_OP && hdev->bus == HCI_USB) break; return 0; - case BTINTEL_CNVI_SCP: + case BTINTEL_HWID_SCP: if (ver->img_type == BTINTEL_IMG_IML) break; return 0; diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h index 431998049e68..b7ff183f8886 100644 --- a/drivers/bluetooth/btintel.h +++ b/drivers/bluetooth/btintel.h @@ -69,6 +69,13 @@ struct intel_tlv { #define BTINTEL_FWID_MAXLEN 64 +/* CNVi Hardware variant */ +#define BTINTEL_HWID_GAP 0x1c /* Gale Peak2 - Meteor Lake */ +#define BTINTEL_HWID_BZRI 0x1e /* BlazarI - Lunar Lake */ +#define BTINTEL_HWID_BZRU 0x1d /* BlazarU - Meteor Lake */ +#define BTINTEL_HWID_SCP 0x1f /* Scorpius Peak - Panther Lake */ +#define BTINTEL_HWID_BZRIW 0x22 /* BlazarIW - Wildcat Lake */ + struct intel_version_tlv { u32 cnvi_top; u32 cnvr_top; From 3b74115bb35285f56b5d4cc5a7a90c5825663b9e Mon Sep 17 00:00:00 2001 From: Kiran K Date: Sat, 28 Feb 2026 14:42:33 +0530 Subject: [PATCH 1359/1561] Bluetooth: btintel: Add support for Scorpious Peak2 support Add support for Intel Bluetooth Scorpious Peak2 core. Signed-off-by: Kiran K Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel.c | 3 +++ drivers/bluetooth/btintel_pcie.c | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 7313bd101588..db6951b96d1c 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -502,6 +502,7 @@ int btintel_version_info_tlv(struct hci_dev *hdev, case 0x1d: /* BlazarU (BzrU) */ case 0x1e: /* BlazarI (Bzr) */ case 0x1f: /* Scorpious Peak */ + case 0x20: /* Scorpious Peak2 */ case 0x22: /* BlazarIW (BzrIW) */ break; default: @@ -3323,6 +3324,7 @@ void btintel_set_msft_opcode(struct hci_dev *hdev, u8 hw_variant) case 0x1d: case 0x1e: case 0x1f: + case 0x20: case 0x22: hci_set_msft_opcode(hdev, 0xFC1E); break; @@ -3664,6 +3666,7 @@ static int btintel_setup_combined(struct hci_dev *hdev) case 0x1d: case 0x1e: case 0x1f: + case 0x20: case 0x22: /* Display version information of TLV type */ btintel_version_info_tlv(hdev, &ver_tlv); diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 473b2115e637..0ce47b88db6e 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -2095,6 +2095,7 @@ static int btintel_pcie_setup_internal(struct hci_dev *hdev) switch (INTEL_HW_VARIANT(ver_tlv.cnvi_bt)) { case 0x1e: /* BzrI */ case 0x1f: /* ScP */ + case 0x20: /* ScP2 */ case 0x22: /* BzrIW */ /* Display version information of TLV type */ btintel_version_info_tlv(hdev, &ver_tlv); From 140f6afd3dd75a02d067b39806a998e5effb56b5 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Sat, 28 Feb 2026 14:42:34 +0530 Subject: [PATCH 1360/1561] Bluetooth: btintel: Add DSBR support for ScP2 onwards Add DSBR support for Scorpious Peak2 cores onwards. Refer commit eb9e749c0182 ("Bluetooth: btintel: Allow configuring drive strength of BRI") for details about DSBR. Signed-off-by: Kiran K Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel.c | 5 +++++ drivers/bluetooth/btintel.h | 1 + 2 files changed, 6 insertions(+) diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index db6951b96d1c..77930bc225f5 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -2828,6 +2828,7 @@ static int btintel_set_dsbr(struct hci_dev *hdev, struct intel_version_tlv *ver) * 1. BlazarI or BlazarIW + B0 step product in IML image. * 2. Gale Peak2 or BlazarU in OP image. * 3. Scorpious Peak in IML image. + * 4. Scorpious Peak2 onwards + PCIe transport in IML image. */ switch (hw_variant) { @@ -2848,6 +2849,10 @@ static int btintel_set_dsbr(struct hci_dev *hdev, struct intel_version_tlv *ver) break; return 0; default: + /* Scorpius Peak2 onwards */ + if (hw_variant >= BTINTEL_HWID_SCP2 && hdev->bus == HCI_PCI + && ver->img_type == BTINTEL_IMG_IML) + break; return 0; } diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h index b7ff183f8886..f16f852b83b8 100644 --- a/drivers/bluetooth/btintel.h +++ b/drivers/bluetooth/btintel.h @@ -74,6 +74,7 @@ struct intel_tlv { #define BTINTEL_HWID_BZRI 0x1e /* BlazarI - Lunar Lake */ #define BTINTEL_HWID_BZRU 0x1d /* BlazarU - Meteor Lake */ #define BTINTEL_HWID_SCP 0x1f /* Scorpius Peak - Panther Lake */ +#define BTINTEL_HWID_SCP2 0x20 /* Scorpius Peak2 - Nova Lake */ #define BTINTEL_HWID_BZRIW 0x22 /* BlazarIW - Wildcat Lake */ struct intel_version_tlv { From e64621ef1500f9c0c26d0b680223d4f3819cf24d Mon Sep 17 00:00:00 2001 From: Kiran K Date: Sat, 28 Feb 2026 14:42:35 +0530 Subject: [PATCH 1361/1561] Bluetooth: btintel_pcie: Add support for exception dump for ScP2 Add device coredump support for Scorpious Peak2 product. Signed-off-by: Kiran K Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel.h | 11 ++++++----- drivers/bluetooth/btintel_pcie.c | 7 +++++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h index f16f852b83b8..b5c00be8277a 100644 --- a/drivers/bluetooth/btintel.h +++ b/drivers/bluetooth/btintel.h @@ -54,11 +54,12 @@ struct intel_tlv { #define BTINTEL_HCI_OP_RESET 0xfc01 -#define BTINTEL_CNVI_BLAZARI 0x900 -#define BTINTEL_CNVI_BLAZARIW 0x901 -#define BTINTEL_CNVI_GAP 0x910 -#define BTINTEL_CNVI_BLAZARU 0x930 -#define BTINTEL_CNVI_SCP 0xA00 +#define BTINTEL_CNVI_BLAZARI 0x900 /* BlazarI - Lunar Lake */ +#define BTINTEL_CNVI_BLAZARIW 0x901 /* BlazarIW - Wildcat Lake */ +#define BTINTEL_CNVI_GAP 0x910 /* Gale Peak2 - Meteor Lake */ +#define BTINTEL_CNVI_BLAZARU 0x930 /* BlazarU - Meteor Lake */ +#define BTINTEL_CNVI_SCP 0xA00 /* Scorpius Peak - Panther Lake */ +#define BTINTEL_CNVI_SCP2 0xA10 /* Scorpius Peak2 - Nova Lake */ /* CNVR */ #define BTINTEL_CNVR_FMP2 0x910 diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 0ce47b88db6e..0febdbd0d86e 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -74,6 +74,9 @@ struct btintel_pcie_dev_recovery { #define BTINTEL_PCIE_SCP_HWEXP_SIZE 4096 #define BTINTEL_PCIE_SCP_HWEXP_DMP_ADDR 0xB030F800 +#define BTINTEL_PCIE_SCP2_HWEXP_SIZE 4096 +#define BTINTEL_PCIE_SCP2_HWEXP_DMP_ADDR 0xB031D000 + #define BTINTEL_PCIE_MAGIC_NUM 0xA5A5A5A5 #define BTINTEL_PCIE_TRIGGER_REASON_USER_TRIGGER 0x17A2 @@ -1231,6 +1234,10 @@ static void btintel_pcie_read_hwexp(struct btintel_pcie_data *data) len = BTINTEL_PCIE_SCP_HWEXP_SIZE; addr = BTINTEL_PCIE_SCP_HWEXP_DMP_ADDR; break; + case BTINTEL_CNVI_SCP2: + len = BTINTEL_PCIE_SCP2_HWEXP_SIZE; + addr = BTINTEL_PCIE_SCP2_HWEXP_DMP_ADDR; + break; default: bt_dev_err(data->hdev, "Unsupported cnvi 0x%8.8x", data->dmp_hdr.cnvi_top); return; From 9c52085e10af716812cfc8de79bc4f22747b3c19 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Sat, 28 Feb 2026 14:42:36 +0530 Subject: [PATCH 1362/1561] Bluetooth: btintel: Add support for Scorpious Peak2F support Add support for Intel Bluetooth Scorpious Peak2F core. Signed-off-by: Kiran K Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel.c | 3 +++ drivers/bluetooth/btintel_pcie.c | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 77930bc225f5..dcaaa4ca02b9 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -503,6 +503,7 @@ int btintel_version_info_tlv(struct hci_dev *hdev, case 0x1e: /* BlazarI (Bzr) */ case 0x1f: /* Scorpious Peak */ case 0x20: /* Scorpious Peak2 */ + case 0x21: /* Scorpious Peak2 F */ case 0x22: /* BlazarIW (BzrIW) */ break; default: @@ -3330,6 +3331,7 @@ void btintel_set_msft_opcode(struct hci_dev *hdev, u8 hw_variant) case 0x1e: case 0x1f: case 0x20: + case 0x21: case 0x22: hci_set_msft_opcode(hdev, 0xFC1E); break; @@ -3672,6 +3674,7 @@ static int btintel_setup_combined(struct hci_dev *hdev) case 0x1e: case 0x1f: case 0x20: + case 0x21: case 0x22: /* Display version information of TLV type */ btintel_version_info_tlv(hdev, &ver_tlv); diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 0febdbd0d86e..78209e8768bb 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -2103,6 +2103,7 @@ static int btintel_pcie_setup_internal(struct hci_dev *hdev) case 0x1e: /* BzrI */ case 0x1f: /* ScP */ case 0x20: /* ScP2 */ + case 0x21: /* ScP2 F */ case 0x22: /* BzrIW */ /* Display version information of TLV type */ btintel_version_info_tlv(hdev, &ver_tlv); From 68a1729ae5043d900e040565f3db87f06ce64a76 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Sat, 28 Feb 2026 14:42:37 +0530 Subject: [PATCH 1363/1561] Bluetooth: btintel_pcie: Add support for exception dump for ScP2F Add device coredump support for Scorpious Peak2F product. Signed-off-by: Kiran K Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel.h | 1 + drivers/bluetooth/btintel_pcie.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h index b5c00be8277a..0e9ca99aaaae 100644 --- a/drivers/bluetooth/btintel.h +++ b/drivers/bluetooth/btintel.h @@ -60,6 +60,7 @@ struct intel_tlv { #define BTINTEL_CNVI_BLAZARU 0x930 /* BlazarU - Meteor Lake */ #define BTINTEL_CNVI_SCP 0xA00 /* Scorpius Peak - Panther Lake */ #define BTINTEL_CNVI_SCP2 0xA10 /* Scorpius Peak2 - Nova Lake */ +#define BTINTEL_CNVI_SCP2F 0xA20 /* Scorpius Peak2F - Nova Lake */ /* CNVR */ #define BTINTEL_CNVR_FMP2 0x910 diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 78209e8768bb..a907b493d153 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -1235,6 +1235,7 @@ static void btintel_pcie_read_hwexp(struct btintel_pcie_data *data) addr = BTINTEL_PCIE_SCP_HWEXP_DMP_ADDR; break; case BTINTEL_CNVI_SCP2: + case BTINTEL_CNVI_SCP2F: len = BTINTEL_PCIE_SCP2_HWEXP_SIZE; addr = BTINTEL_PCIE_SCP2_HWEXP_DMP_ADDR; break; From 0e77b6ad7ae41894742b0e3ba0a80506e4ad1b5e Mon Sep 17 00:00:00 2001 From: Kiran K Date: Sat, 28 Feb 2026 14:42:38 +0530 Subject: [PATCH 1364/1561] Bluetooth: btintel_pcie: Add device id of Scorpius Peak2, Nova Lake-PCD-H sudo lspci -v -k -d 8086:d346 00:14.7 Bluetooth: Intel Corporation Device d346 Subsystem: Intel Corporation Device 0011 Flags: bus master, fast devsel, latency 0, IRQ 16, IOMMU group 14 Memory at b018378000 (64-bit, non-prefetchable) [size=16K] Capabilities: [c8] Power Management version 3 Capabilities: [d0] MSI: Enable- Count=1/1 Maskable- 64bit+ Capabilities: [40] Express Root Complex Integrated Endpoint, MSI 00 Capabilities: [80] MSI-X: Enable+ Count=32 Masked- Capabilities: [100] Latency Tolerance Reporting Kernel driver in use: btintel_pcie Kernel modules: btintel_pcie Signed-off-by: Kiran K Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index a907b493d153..785b624aaf5c 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -47,6 +47,8 @@ static const struct pci_device_id btintel_pcie_table[] = { { BTINTEL_PCI_DEVICE(0xE376, PCI_ANY_ID) }, /* Scorpious, Panther Lake-H404 */ { BTINTEL_PCI_DEVICE(0xE476, PCI_ANY_ID) }, + /* Scorpious2, Nova Lake-PCD-H */ + { BTINTEL_PCI_DEVICE(0xD346, PCI_ANY_ID) }, { 0 } }; MODULE_DEVICE_TABLE(pci, btintel_pcie_table); From 5741118b5da398f3864be04044de975aa154af1d Mon Sep 17 00:00:00 2001 From: Kiran K Date: Sat, 28 Feb 2026 14:42:39 +0530 Subject: [PATCH 1365/1561] Bluetooth: btintel_pcie: Add device id of Scorpious2, Nova Lake-PCD-S sudo lspci -v -k -d 8086:6e74 80:14.7 Bluetooth: Intel Corporation Device 6e74 (rev 10) Subsystem: Intel Corporation Device 0011 Flags: bus master, fast devsel, latency 0, IRQ 16 Memory at 200002a8000 (64-bit, non-prefetchable) [size=16K] Capabilities: [c8] Power Management version 3 Capabilities: [d0] MSI: Enable- Count=1/1 Maskable- 64bit+ Capabilities: [40] Express Root Complex Integrated Endpoint, MSI 00 Capabilities: [80] MSI-X: Enable+ Count=32 Masked- Capabilities: [100] Latency Tolerance Reporting Kernel driver in use: btintel_pcie Kernel modules: btintel_pcie Signed-off-by: Kiran K Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 785b624aaf5c..a817a571f720 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -49,6 +49,8 @@ static const struct pci_device_id btintel_pcie_table[] = { { BTINTEL_PCI_DEVICE(0xE476, PCI_ANY_ID) }, /* Scorpious2, Nova Lake-PCD-H */ { BTINTEL_PCI_DEVICE(0xD346, PCI_ANY_ID) }, + /* Scorpious2, Nova Lake-PCD-S */ + { BTINTEL_PCI_DEVICE(0x6E74, PCI_ANY_ID) }, { 0 } }; MODULE_DEVICE_TABLE(pci, btintel_pcie_table); From 15a315e4172b1dfd78f1d1ed62808a2c968d5a7b Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 2 Mar 2026 15:21:21 -0500 Subject: [PATCH 1366/1561] Bluetooth: btintel_pci: Fix btintel_pcie_read_hwexp code style Use proper alignment for break under a switch. Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index a817a571f720..05f82bc3f0d7 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -1233,16 +1233,16 @@ static void btintel_pcie_read_hwexp(struct btintel_pcie_data *data) return; len = BTINTEL_PCIE_BLZR_HWEXP_SIZE; /* exception data length */ addr = BTINTEL_PCIE_BLZR_HWEXP_DMP_ADDR; - break; + break; case BTINTEL_CNVI_SCP: len = BTINTEL_PCIE_SCP_HWEXP_SIZE; addr = BTINTEL_PCIE_SCP_HWEXP_DMP_ADDR; - break; + break; case BTINTEL_CNVI_SCP2: case BTINTEL_CNVI_SCP2F: len = BTINTEL_PCIE_SCP2_HWEXP_SIZE; addr = BTINTEL_PCIE_SCP2_HWEXP_DMP_ADDR; - break; + break; default: bt_dev_err(data->hdev, "Unsupported cnvi 0x%8.8x", data->dmp_hdr.cnvi_top); return; From 9f168e4de5fd43766f6d49b393f445be805c1e05 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 11 Mar 2026 01:02:58 +0200 Subject: [PATCH 1367/1561] Bluetooth: qca: enable pwrseq support for WCN39xx devices The WCN39xx family of WiFi/BT chips incorporates a simple PMU, spreading voltages over internal rails. Implement support for using powersequencer for this family of QCA devices in addition to using regulators. Reviewed-by: Bartosz Golaszewski Signed-off-by: Dmitry Baryshkov Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index d6e78201a675..c17a462aef55 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2241,6 +2241,18 @@ static void qca_power_off(struct hci_uart *hu) qcadev = serdev_device_get_drvdata(hu->serdev); power = qcadev->bt_power; + switch (soc_type) { + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + host_set_baudrate(hu, 2400); + qca_send_power_pulse(hu, false); + break; + default: + break; + } + if (power && power->pwrseq) { pwrseq_power_off(power->pwrseq); set_bit(QCA_BT_OFF, &qca->flags); @@ -2252,8 +2264,6 @@ static void qca_power_off(struct hci_uart *hu) case QCA_WCN3990: case QCA_WCN3991: case QCA_WCN3998: - host_set_baudrate(hu, 2400); - qca_send_power_pulse(hu, false); qca_regulator_disable(qcadev); break; @@ -2425,6 +2435,11 @@ static int qca_serdev_probe(struct serdev_device *serdev) } switch (qcadev->btsoc_type) { + case QCA_WCN3950: + case QCA_WCN3988: + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: case QCA_WCN6750: case QCA_WCN6855: case QCA_WCN7850: @@ -2449,12 +2464,7 @@ static int qca_serdev_probe(struct serdev_device *serdev) else break; } - fallthrough; - case QCA_WCN3950: - case QCA_WCN3988: - case QCA_WCN3990: - case QCA_WCN3991: - case QCA_WCN3998: + qcadev->bt_power->dev = &serdev->dev; err = qca_init_regulators(qcadev->bt_power, data->vregs, data->num_vregs); From bf9a38803b2626b01cc769aaf13485d8650f576f Mon Sep 17 00:00:00 2001 From: Mashiro Chen Date: Wed, 8 Apr 2026 01:31:01 +0800 Subject: [PATCH 1368/1561] net: hamradio: 6pack: fix uninit-value in sixpack_receive_buf sixpack_receive_buf() does not properly skip bytes with TTY error flags. The while loop iterates through the flags buffer but never advances the data pointer (cp), and passes the original count (including error bytes) to sixpack_decode(). This causes sixpack_decode() to process bytes that should have been skipped due to TTY errors. The TTY layer does not guarantee that cp[i] holds a meaningful value when fp[i] is set, so passing those positions to sixpack_decode() results in KMSAN reporting an uninit-value read. Fix this by processing bytes one at a time, advancing cp on each iteration, and only passing valid (non-error) bytes to sixpack_decode(). This matches the pattern used by slip_receive_buf() and mkiss_receive_buf() for the same purpose. Reported-by: syzbot+ecdb8c9878a81eb21e54@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=ecdb8c9878a81eb21e54 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Mashiro Chen Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260407173101.107352-1-mashiro.chen@mailbox.org Signed-off-by: Jakub Kicinski --- drivers/net/hamradio/6pack.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 885992951e8a..c8b2dc5c1bec 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -391,7 +391,6 @@ static void sixpack_receive_buf(struct tty_struct *tty, const u8 *cp, const u8 *fp, size_t count) { struct sixpack *sp; - size_t count1; if (!count) return; @@ -401,16 +400,16 @@ static void sixpack_receive_buf(struct tty_struct *tty, const u8 *cp, return; /* Read the characters out of the buffer */ - count1 = count; - while (count) { - count--; + while (count--) { if (fp && *fp++) { if (!test_and_set_bit(SIXPF_ERROR, &sp->flags)) sp->dev->stats.rx_errors++; + cp++; continue; } + sixpack_decode(sp, cp, 1); + cp++; } - sixpack_decode(sp, cp, count1); tty_unthrottle(tty); } From 18589df9344c459f062ad74a5890e87ad6c0f1a6 Mon Sep 17 00:00:00 2001 From: Dimitri Daskalakis Date: Tue, 7 Apr 2026 09:49:53 -0700 Subject: [PATCH 1369/1561] selftests: drv-net: Add ntuple (NFC) flow steering test Add a test for ethtool NFC (ntuple) flow steering rules. The test creates an ntuple rule matching on various flow fields and verifies that traffic is steered to the correct queue. The test forces all traffic to queue 0 via the indirection table, then installs an ntuple rule to steer select traffic to a specific queue. The test then verifies the expected number of packets is received on the queue. This test has variants for TCP/UDP over IPv4/IPv6, with rules matching the source IP. Additional match fields will be added in the next commit. TAP version 13 1..4 ok 1 ntuple.queue.tcp4.src_ip ok 2 ntuple.queue.udp4.src_ip ok 3 ntuple.queue.tcp6.src_ip ok 4 ntuple.queue.udp6.src_ip # Totals: pass:4 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Dimitri Daskalakis Link: https://patch.msgid.link/20260407164954.2977820-2-dimitri.daskalakis1@gmail.com Signed-off-by: Jakub Kicinski --- .../testing/selftests/drivers/net/hw/Makefile | 1 + .../selftests/drivers/net/hw/ntuple.py | 145 ++++++++++++++++++ 2 files changed, 146 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/hw/ntuple.py diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 28d245e11bc4..d84e955ac87b 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -36,6 +36,7 @@ TEST_PROGS = \ nic_timestamp.py \ nk_netns.py \ nk_qlease.py \ + ntuple.py \ pp_alloc_fail.py \ rss_api.py \ rss_ctx.py \ diff --git a/tools/testing/selftests/drivers/net/hw/ntuple.py b/tools/testing/selftests/drivers/net/hw/ntuple.py new file mode 100755 index 000000000000..e0d20e8893db --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/ntuple.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +"""Test ethtool NFC (ntuple) flow steering rules.""" + +import random +from enum import Enum, auto +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq, ksft_ge +from lib.py import ksft_variants, KsftNamedVariant +from lib.py import EthtoolFamily, NetDrvEpEnv, NetdevFamily +from lib.py import KsftSkipEx +from lib.py import cmd, ethtool, defer, rand_port, bkg, wait_port_listen + + +class NtupleField(Enum): + SRC_IP = auto() + + +def _require_ntuple(cfg): + features = ethtool(f"-k {cfg.ifname}", json=True)[0] + if not features["ntuple-filters"]["active"]: + raise KsftSkipEx("Ntuple filters not enabled on the device: " + str(features["ntuple-filters"])) + + +def _get_rx_cnts(cfg, prev=None): + """Get Rx packet counts for all queues, as a simple list of integers + if @prev is specified the prev counts will be subtracted""" + cfg.wait_hw_stats_settle() + data = cfg.netdevnl.qstats_get({"ifindex": cfg.ifindex, "scope": ["queue"]}, dump=True) + data = [x for x in data if x['queue-type'] == "rx"] + max_q = max([x["queue-id"] for x in data]) + queue_stats = [0] * (max_q + 1) + for q in data: + queue_stats[q["queue-id"]] = q["rx-packets"] + if prev and q["queue-id"] < len(prev): + queue_stats[q["queue-id"]] -= prev[q["queue-id"]] + return queue_stats + + +def _ntuple_rule_add(cfg, flow_spec): + """Install an NFC rule via ethtool.""" + + output = ethtool(f"-N {cfg.ifname} {flow_spec}").stdout + rule_id = int(output.split()[-1]) + defer(ethtool, f"-N {cfg.ifname} delete {rule_id}") + + +def _setup_isolated_queue(cfg): + """Default all traffic to queue 0, and pick a random queue to + steer NFC traffic to.""" + + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + ch_max = channels['combined-max'] + qcnt = channels['combined-count'] + + if ch_max < 2: + raise KsftSkipEx(f"Need at least 2 combined channels, max is {ch_max}") + + desired_queues = min(ch_max, 4) + if qcnt >= desired_queues: + desired_queues = qcnt + else: + ethtool(f"-L {cfg.ifname} combined {desired_queues}") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + + ethtool(f"-X {cfg.ifname} equal 1") + defer(ethtool, f"-X {cfg.ifname} default") + + return random.randint(1, desired_queues - 1) + + +def _send_traffic(cfg, ipver, proto, dst_port, pkt_cnt=40): + """Generate traffic with the desired flow signature.""" + + cfg.require_cmd("socat", remote=True) + + socat_proto = proto.upper() + dst_addr = f"[{cfg.addr_v['6']}]" if ipver == '6' else cfg.addr_v['4'] + + extra_opts = ",nodelay" if proto == "tcp" else ",shut-null" + + listen_cmd = (f"socat -{ipver} -t 2 -u " + f"{socat_proto}-LISTEN:{dst_port},reuseport /dev/null") + with bkg(listen_cmd, exit_wait=True): + wait_port_listen(dst_port, proto=proto) + send_cmd = f""" + bash -c 'for i in $(seq {pkt_cnt}); do echo msg; sleep 0.02; done' | + socat -{ipver} -u - \ + {socat_proto}:{dst_addr}:{dst_port},reuseaddr{extra_opts} + """ + cmd(send_cmd, shell=True, host=cfg.remote) + + +def _add_ntuple_rule_and_send_traffic(cfg, ipver, proto, fields, test_queue): + dst_port = rand_port() + flow_parts = [f"flow-type {proto}{ipver}"] + + for field in fields: + if field == NtupleField.SRC_IP: + flow_parts.append(f"src-ip {cfg.remote_addr_v[ipver]}") + + flow_parts.append(f"action {test_queue}") + _ntuple_rule_add(cfg, " ".join(flow_parts)) + _send_traffic(cfg, ipver, proto, dst_port=dst_port) + + +def _ntuple_variants(): + for ipver in ["4", "6"]: + for proto in ["tcp", "udp"]: + for fields in [[NtupleField.SRC_IP]]: + name = ".".join(f.name.lower() for f in fields) + yield KsftNamedVariant(f"{proto}{ipver}.{name}", + ipver, proto, fields) + + +@ksft_variants(_ntuple_variants()) +def queue(cfg, ipver, proto, fields): + """Test that an NFC rule steers traffic to the correct queue.""" + + cfg.require_ipver(ipver) + _require_ntuple(cfg) + + test_queue = _setup_isolated_queue(cfg) + + cnts = _get_rx_cnts(cfg) + _add_ntuple_rule_and_send_traffic(cfg, ipver, proto, fields, test_queue) + cnts = _get_rx_cnts(cfg, prev=cnts) + + ksft_ge(cnts[test_queue], 40, f"Traffic on test queue {test_queue}: {cnts}") + sum_idle = sum(cnts) - cnts[0] - cnts[test_queue] + ksft_eq(sum_idle, 0, f"Traffic on idle queues: {cnts}") + + +def main() -> None: + """Ksft boilerplate main.""" + + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netdevnl = NetdevFamily() + ksft_run([queue], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() From a66374a3eb0244f3c66b06d77fd5a1253e97ff27 Mon Sep 17 00:00:00 2001 From: Dimitri Daskalakis Date: Tue, 7 Apr 2026 09:49:54 -0700 Subject: [PATCH 1370/1561] selftests: drv-net: ntuple: Add dst-ip, src-port, dst-port fields Extend the ntuple flow steering test to cover dst-ip, src-port, and dst-port fields. The test supports arbitrary combinations of the fields, for now we test src_ip/dst_ip, and src_ip/dst_ip/src_port/dst_port. The tests currently match full fields, but we can consider adding support for masked fields in the future. TAP version 13 1..24 ok 1 ntuple.queue.tcp4.src_ip ok 2 ntuple.queue.tcp4.dst_ip ok 3 ntuple.queue.tcp4.src_port ok 4 ntuple.queue.tcp4.dst_port ok 5 ntuple.queue.tcp4.src_ip.dst_ip ok 6 ntuple.queue.tcp4.src_ip.dst_ip.src_port.dst_port ok 7 ntuple.queue.udp4.src_ip ok 8 ntuple.queue.udp4.dst_ip ok 9 ntuple.queue.udp4.src_port ok 10 ntuple.queue.udp4.dst_port ok 11 ntuple.queue.udp4.src_ip.dst_ip ok 12 ntuple.queue.udp4.src_ip.dst_ip.src_port.dst_port ok 13 ntuple.queue.tcp6.src_ip ok 14 ntuple.queue.tcp6.dst_ip ok 15 ntuple.queue.tcp6.src_port ok 16 ntuple.queue.tcp6.dst_port ok 17 ntuple.queue.tcp6.src_ip.dst_ip ok 18 ntuple.queue.tcp6.src_ip.dst_ip.src_port.dst_port ok 19 ntuple.queue.udp6.src_ip ok 20 ntuple.queue.udp6.dst_ip ok 21 ntuple.queue.udp6.src_port ok 22 ntuple.queue.udp6.dst_port ok 23 ntuple.queue.udp6.src_ip.dst_ip ok 24 ntuple.queue.udp6.src_ip.dst_ip.src_port.dst_port # Totals: pass:24 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Dimitri Daskalakis Link: https://patch.msgid.link/20260407164954.2977820-3-dimitri.daskalakis1@gmail.com Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/hw/ntuple.py | 29 +++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/ntuple.py b/tools/testing/selftests/drivers/net/hw/ntuple.py index e0d20e8893db..232733142c02 100755 --- a/tools/testing/selftests/drivers/net/hw/ntuple.py +++ b/tools/testing/selftests/drivers/net/hw/ntuple.py @@ -9,11 +9,14 @@ from lib.py import ksft_eq, ksft_ge from lib.py import ksft_variants, KsftNamedVariant from lib.py import EthtoolFamily, NetDrvEpEnv, NetdevFamily from lib.py import KsftSkipEx -from lib.py import cmd, ethtool, defer, rand_port, bkg, wait_port_listen +from lib.py import cmd, ethtool, defer, rand_ports, bkg, wait_port_listen class NtupleField(Enum): SRC_IP = auto() + DST_IP = auto() + SRC_PORT = auto() + DST_PORT = auto() def _require_ntuple(cfg): @@ -69,7 +72,7 @@ def _setup_isolated_queue(cfg): return random.randint(1, desired_queues - 1) -def _send_traffic(cfg, ipver, proto, dst_port, pkt_cnt=40): +def _send_traffic(cfg, ipver, proto, dst_port, src_port, pkt_cnt=40): """Generate traffic with the desired flow signature.""" cfg.require_cmd("socat", remote=True) @@ -86,28 +89,42 @@ def _send_traffic(cfg, ipver, proto, dst_port, pkt_cnt=40): send_cmd = f""" bash -c 'for i in $(seq {pkt_cnt}); do echo msg; sleep 0.02; done' | socat -{ipver} -u - \ - {socat_proto}:{dst_addr}:{dst_port},reuseaddr{extra_opts} + {socat_proto}:{dst_addr}:{dst_port},sourceport={src_port},reuseaddr{extra_opts} """ cmd(send_cmd, shell=True, host=cfg.remote) def _add_ntuple_rule_and_send_traffic(cfg, ipver, proto, fields, test_queue): - dst_port = rand_port() + ports = rand_ports(2) + src_port = ports[0] + dst_port = ports[1] flow_parts = [f"flow-type {proto}{ipver}"] for field in fields: if field == NtupleField.SRC_IP: flow_parts.append(f"src-ip {cfg.remote_addr_v[ipver]}") + elif field == NtupleField.DST_IP: + flow_parts.append(f"dst-ip {cfg.addr_v[ipver]}") + elif field == NtupleField.SRC_PORT: + flow_parts.append(f"src-port {src_port}") + elif field == NtupleField.DST_PORT: + flow_parts.append(f"dst-port {dst_port}") flow_parts.append(f"action {test_queue}") _ntuple_rule_add(cfg, " ".join(flow_parts)) - _send_traffic(cfg, ipver, proto, dst_port=dst_port) + _send_traffic(cfg, ipver, proto, dst_port=dst_port, src_port=src_port) def _ntuple_variants(): for ipver in ["4", "6"]: for proto in ["tcp", "udp"]: - for fields in [[NtupleField.SRC_IP]]: + for fields in [[NtupleField.SRC_IP], + [NtupleField.DST_IP], + [NtupleField.SRC_PORT], + [NtupleField.DST_PORT], + [NtupleField.SRC_IP, NtupleField.DST_IP], + [NtupleField.SRC_IP, NtupleField.DST_IP, + NtupleField.SRC_PORT, NtupleField.DST_PORT]]: name = ".".join(f.name.lower() for f in fields) yield KsftNamedVariant(f"{proto}{ipver}.{name}", ipver, proto, fields) From 54fc83a1728535831df0f251e155d05574918115 Mon Sep 17 00:00:00 2001 From: Andy Roulin Date: Sun, 5 Apr 2026 13:52:22 -0700 Subject: [PATCH 1371/1561] net: bridge: add stp_mode attribute for STP mode selection The bridge-stp usermode helper is currently restricted to the initial network namespace, preventing userspace STP daemons (e.g. mstpd) from operating on bridges in other network namespaces. Since commit ff62198553e4 ("bridge: Only call /sbin/bridge-stp for the initial network namespace"), bridges in non-init namespaces silently fall back to kernel STP with no way to use userspace STP. Add a new bridge attribute IFLA_BR_STP_MODE that allows explicit per-bridge control over STP mode selection: BR_STP_MODE_AUTO (default) - Existing behavior: invoke the /sbin/bridge-stp helper in init_net only; fall back to kernel STP if it fails or in non-init namespaces. BR_STP_MODE_USER - Directly enable userspace STP (BR_USER_STP) without invoking the helper. Works in any network namespace. Userspace is responsible for ensuring an STP daemon manages the bridge. BR_STP_MODE_KERNEL - Directly enable kernel STP (BR_KERNEL_STP) without invoking the helper. The mode can only be changed while STP is disabled, or set to the same value (-EBUSY otherwise). IFLA_BR_STP_MODE is processed before IFLA_BR_STP_STATE in br_changelink(), so both can be set atomically in a single netlink message. The mode can also be changed in the same message that disables STP. The stp_mode struct field is u8 since all possible values fit, while NLA_U32 is used for the netlink attribute since it occupies the same space in the netlink message as NLA_U8. A new stp_helper_active boolean tracks whether the /sbin/bridge-stp helper was invoked during br_stp_start(), so that br_stp_stop() only calls the helper for stop when it was called for start. This avoids calling the helper asymmetrically when stp_mode changes between start and stop. Suggested-by: Ido Schimmel Assisted-by: Claude:claude-opus-4-6 Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Signed-off-by: Andy Roulin Link: https://patch.msgid.link/20260405205224.3163000-2-aroulin@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt-link.yaml | 12 ++++++++ include/uapi/linux/if_link.h | 39 ++++++++++++++++++++++++ net/bridge/br_device.c | 1 + net/bridge/br_netlink.c | 24 ++++++++++++++- net/bridge/br_private.h | 2 ++ net/bridge/br_stp_if.c | 19 +++++++----- 6 files changed, 89 insertions(+), 8 deletions(-) diff --git a/Documentation/netlink/specs/rt-link.yaml b/Documentation/netlink/specs/rt-link.yaml index fcb5aaf0926f..f23aa5f229c5 100644 --- a/Documentation/netlink/specs/rt-link.yaml +++ b/Documentation/netlink/specs/rt-link.yaml @@ -840,6 +840,14 @@ definitions: entries: - p2p - mp + - + name: br-stp-mode + type: enum + enum-name: br-stp-mode + entries: + - auto + - user + - kernel attribute-sets: - @@ -1550,6 +1558,10 @@ attribute-sets: - name: fdb-max-learned type: u32 + - + name: stp-mode + type: u32 + enum: br-stp-mode - name: linkinfo-brport-attrs name-prefix: ifla-brport- diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 280bb1780512..79ce4bc24cba 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -744,6 +744,11 @@ enum in6_addr_gen_mode { * @IFLA_BR_FDB_MAX_LEARNED * Set the number of max dynamically learned FDB entries for the current * bridge. + * + * @IFLA_BR_STP_MODE + * Set the STP mode for the bridge, which controls how the bridge + * selects between userspace and kernel STP. The valid values are + * documented below in the ``BR_STP_MODE_*`` constants. */ enum { IFLA_BR_UNSPEC, @@ -796,11 +801,45 @@ enum { IFLA_BR_MCAST_QUERIER_STATE, IFLA_BR_FDB_N_LEARNED, IFLA_BR_FDB_MAX_LEARNED, + IFLA_BR_STP_MODE, __IFLA_BR_MAX, }; #define IFLA_BR_MAX (__IFLA_BR_MAX - 1) +/** + * DOC: Bridge STP mode values + * + * @BR_STP_MODE_AUTO + * Default. The kernel invokes the ``/sbin/bridge-stp`` helper to hand + * the bridge to a userspace STP daemon (e.g. mstpd). Only attempted in + * the initial network namespace; in other namespaces this falls back to + * kernel STP. + * + * @BR_STP_MODE_USER + * Directly enable userspace STP (``BR_USER_STP``) without invoking the + * ``/sbin/bridge-stp`` helper. Works in any network namespace. + * Userspace is responsible for ensuring an STP daemon manages the + * bridge. + * + * @BR_STP_MODE_KERNEL + * Directly enable kernel STP (``BR_KERNEL_STP``) without invoking the + * helper. + * + * The mode controls how the bridge selects between userspace and kernel + * STP when STP is enabled via ``IFLA_BR_STP_STATE``. It can only be + * changed while STP is disabled (``IFLA_BR_STP_STATE`` == 0), returns + * ``-EBUSY`` otherwise. The default value is ``BR_STP_MODE_AUTO``. + */ +enum br_stp_mode { + BR_STP_MODE_AUTO, + BR_STP_MODE_USER, + BR_STP_MODE_KERNEL, + __BR_STP_MODE_MAX +}; + +#define BR_STP_MODE_MAX (__BR_STP_MODE_MAX - 1) + struct ifla_bridge_id { __u8 prio[2]; __u8 addr[6]; /* ETH_ALEN */ diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index f7502e62dd35..a35ceae0a6f2 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -518,6 +518,7 @@ void br_dev_setup(struct net_device *dev) ether_addr_copy(br->group_addr, eth_stp_addr); br->stp_enabled = BR_NO_STP; + br->stp_mode = BR_STP_MODE_AUTO; br->group_fwd_mask = BR_GROUPFWD_DEFAULT; br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 0264730938f4..6fd5386a1d64 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1270,6 +1270,9 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { NLA_POLICY_EXACT_LEN(sizeof(struct br_boolopt_multi)), [IFLA_BR_FDB_N_LEARNED] = { .type = NLA_REJECT }, [IFLA_BR_FDB_MAX_LEARNED] = { .type = NLA_U32 }, + [IFLA_BR_STP_MODE] = NLA_POLICY_RANGE(NLA_U32, + BR_STP_MODE_AUTO, + BR_STP_MODE_MAX), }; static int br_changelink(struct net_device *brdev, struct nlattr *tb[], @@ -1306,6 +1309,23 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], return err; } + if (data[IFLA_BR_STP_MODE]) { + u32 mode = nla_get_u32(data[IFLA_BR_STP_MODE]); + + if (mode != br->stp_mode) { + bool stp_off = br->stp_enabled == BR_NO_STP || + (data[IFLA_BR_STP_STATE] && + !nla_get_u32(data[IFLA_BR_STP_STATE])); + + if (!stp_off) { + NL_SET_ERR_MSG_MOD(extack, + "Can't change STP mode while STP is enabled"); + return -EBUSY; + } + } + br->stp_mode = mode; + } + if (data[IFLA_BR_STP_STATE]) { u32 stp_enabled = nla_get_u32(data[IFLA_BR_STP_STATE]); @@ -1634,6 +1654,7 @@ static size_t br_get_size(const struct net_device *brdev) nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_ARPTABLES */ #endif nla_total_size(sizeof(struct br_boolopt_multi)) + /* IFLA_BR_MULTI_BOOLOPT */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_STP_MODE */ 0; } @@ -1686,7 +1707,8 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) nla_put(skb, IFLA_BR_MULTI_BOOLOPT, sizeof(bm), &bm) || nla_put_u32(skb, IFLA_BR_FDB_N_LEARNED, atomic_read(&br->fdb_n_learned)) || - nla_put_u32(skb, IFLA_BR_FDB_MAX_LEARNED, br->fdb_max_learned)) + nla_put_u32(skb, IFLA_BR_FDB_MAX_LEARNED, br->fdb_max_learned) || + nla_put_u32(skb, IFLA_BR_STP_MODE, br->stp_mode)) return -EMSGSIZE; #ifdef CONFIG_BRIDGE_VLAN_FILTERING diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 6dbca845e625..361a9b84451e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -523,6 +523,8 @@ struct net_bridge { unsigned char topology_change; unsigned char topology_change_detected; u16 root_port; + u8 stp_mode; + bool stp_helper_active; unsigned long max_age; unsigned long hello_time; unsigned long forward_delay; diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index cc4b27ff1b08..28c1d3f7e22f 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -149,7 +149,9 @@ static void br_stp_start(struct net_bridge *br) { int err = -ENOENT; - if (net_eq(dev_net(br->dev), &init_net)) + /* AUTO mode: try bridge-stp helper in init_net only */ + if (br->stp_mode == BR_STP_MODE_AUTO && + net_eq(dev_net(br->dev), &init_net)) err = br_stp_call_user(br, "start"); if (err && err != -ENOENT) @@ -162,8 +164,9 @@ static void br_stp_start(struct net_bridge *br) else if (br->bridge_forward_delay > BR_MAX_FORWARD_DELAY) __br_set_forward_delay(br, BR_MAX_FORWARD_DELAY); - if (!err) { + if (br->stp_mode == BR_STP_MODE_USER || !err) { br->stp_enabled = BR_USER_STP; + br->stp_helper_active = !err; br_debug(br, "userspace STP started\n"); } else { br->stp_enabled = BR_KERNEL_STP; @@ -180,12 +183,14 @@ static void br_stp_start(struct net_bridge *br) static void br_stp_stop(struct net_bridge *br) { - int err; - if (br->stp_enabled == BR_USER_STP) { - err = br_stp_call_user(br, "stop"); - if (err) - br_err(br, "failed to stop userspace STP (%d)\n", err); + if (br->stp_helper_active) { + int err = br_stp_call_user(br, "stop"); + + if (err) + br_err(br, "failed to stop userspace STP (%d)\n", err); + br->stp_helper_active = false; + } /* To start timers on any ports left in blocking */ spin_lock_bh(&br->lock); From c4f2aab121cdc8400dcd5ec3cc0aa0fb3c06694f Mon Sep 17 00:00:00 2001 From: Andy Roulin Date: Sun, 5 Apr 2026 13:52:23 -0700 Subject: [PATCH 1372/1561] docs: net: bridge: document stp_mode attribute Add documentation for the IFLA_BR_STP_MODE bridge attribute in the "User space STP helper" section of the bridge documentation. Reference the BR_STP_MODE_* values via kernel-doc and describe the use case for network namespace environments. Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Signed-off-by: Andy Roulin Link: https://patch.msgid.link/20260405205224.3163000-3-aroulin@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/networking/bridge.rst | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/Documentation/networking/bridge.rst b/Documentation/networking/bridge.rst index ef8b73e157b2..c1e6ea52c9e5 100644 --- a/Documentation/networking/bridge.rst +++ b/Documentation/networking/bridge.rst @@ -148,6 +148,28 @@ called by the kernel when STP is enabled/disabled on a bridge stp_state <0|1>``). The kernel enables user_stp mode if that command returns 0, or enables kernel_stp mode if that command returns any other value. +STP mode selection +------------------ + +The ``IFLA_BR_STP_MODE`` bridge attribute allows explicit control over how +STP operates when enabled, bypassing the ``/sbin/bridge-stp`` helper +entirely for the ``user`` and ``kernel`` modes. + +.. kernel-doc:: include/uapi/linux/if_link.h + :doc: Bridge STP mode values + +The default mode is ``BR_STP_MODE_AUTO``, which preserves the traditional +behavior of invoking the ``/sbin/bridge-stp`` helper. The ``user`` and +``kernel`` modes are particularly useful in network namespace environments +where the helper mechanism is not available, as ``call_usermodehelper()`` +is restricted to the initial network namespace. + +Example:: + + ip link set dev br0 type bridge stp_mode user stp_state 1 + +The mode can only be changed while STP is disabled. + VLAN ==== From 20ae6d76e381eb520d0d1db526a41b22390816f6 Mon Sep 17 00:00:00 2001 From: Andy Roulin Date: Sun, 5 Apr 2026 13:52:24 -0700 Subject: [PATCH 1373/1561] selftests: net: add bridge STP mode selection test Add a selftest for the IFLA_BR_STP_MODE bridge attribute that verifies: 1. stp_mode defaults to auto on new bridges 2. stp_mode can be toggled between user, kernel, and auto 3. Changing stp_mode while STP is active is rejected with -EBUSY 4. Re-setting the same stp_mode while STP is active succeeds 5. stp_mode user in a network namespace yields userspace STP (stp_state=2) 6. stp_mode kernel forces kernel STP (stp_state=1) 7. stp_mode auto in a netns preserves traditional fallback to kernel STP 8. stp_mode and stp_state can be set atomically in a single message 9. stp_mode persists across STP disable/enable cycles Test 5 is the key use case: it demonstrates that userspace STP can now be enabled in non-init network namespaces by setting stp_mode to user before enabling STP. Test 8 verifies the atomic usage pattern where both attributes are set in a single netlink message, which is supported because br_changelink() processes IFLA_BR_STP_MODE before IFLA_BR_STP_STATE. The test gracefully skips if the installed iproute2 does not support the stp_mode attribute. Assisted-by: Claude:claude-opus-4-6 Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Signed-off-by: Andy Roulin Link: https://patch.msgid.link/20260405205224.3163000-4-aroulin@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + .../testing/selftests/net/bridge_stp_mode.sh | 288 ++++++++++++++++++ 2 files changed, 289 insertions(+) create mode 100755 tools/testing/selftests/net/bridge_stp_mode.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index cab74ebdaced..231245a95879 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -15,6 +15,7 @@ TEST_PROGS := \ big_tcp.sh \ bind_bhash.sh \ bpf_offload.py \ + bridge_stp_mode.sh \ bridge_vlan_dump.sh \ broadcast_ether_dst.sh \ broadcast_pmtu.sh \ diff --git a/tools/testing/selftests/net/bridge_stp_mode.sh b/tools/testing/selftests/net/bridge_stp_mode.sh new file mode 100755 index 000000000000..0c81fd029d79 --- /dev/null +++ b/tools/testing/selftests/net/bridge_stp_mode.sh @@ -0,0 +1,288 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# shellcheck disable=SC2034,SC2154,SC2317,SC2329 +# +# Test for bridge STP mode selection (IFLA_BR_STP_MODE). +# +# Verifies that: +# - stp_mode defaults to auto on new bridges +# - stp_mode can be toggled between user, kernel, and auto +# - stp_mode change is rejected while STP is active (-EBUSY) +# - stp_mode user in a netns yields userspace STP (stp_state=2) +# - stp_mode kernel forces kernel STP (stp_state=1) +# - stp_mode auto preserves traditional fallback to kernel STP +# - stp_mode and stp_state can be set atomically in one message +# - stp_mode persists across STP disable/enable cycles + +source lib.sh + +require_command jq + +ALL_TESTS=" + test_default_auto + test_set_modes + test_reject_change_while_stp_active + test_idempotent_mode_while_stp_active + test_user_mode_in_netns + test_kernel_mode + test_auto_mode + test_atomic_mode_and_state + test_mode_persistence +" + +bridge_info_get() +{ + ip -n "$NS1" -d -j link show "$1" | \ + jq -r ".[0].linkinfo.info_data.$2" +} + +check_stp_mode() +{ + local br=$1; shift + local expected=$1; shift + local msg=$1; shift + local val + + val=$(bridge_info_get "$br" stp_mode) + [ "$val" = "$expected" ] + check_err $? "$msg: expected $expected, got $val" +} + +check_stp_state() +{ + local br=$1; shift + local expected=$1; shift + local msg=$1; shift + local val + + val=$(bridge_info_get "$br" stp_state) + [ "$val" = "$expected" ] + check_err $? "$msg: expected $expected, got $val" +} + +# Create a bridge in NS1, bring it up, and defer its deletion. +bridge_create() +{ + ip -n "$NS1" link add "$1" type bridge + ip -n "$NS1" link set "$1" up + defer ip -n "$NS1" link del "$1" +} + +setup_prepare() +{ + setup_ns NS1 +} + +cleanup() +{ + defer_scopes_cleanup + cleanup_all_ns +} + +# Check that stp_mode defaults to auto when creating a bridge. +test_default_auto() +{ + RET=0 + + ip -n "$NS1" link add br-test type bridge + defer ip -n "$NS1" link del br-test + + check_stp_mode br-test auto "stp_mode default" + + log_test "stp_mode defaults to auto" +} + +# Test setting stp_mode to user, kernel, and back to auto. +test_set_modes() +{ + RET=0 + + ip -n "$NS1" link add br-test type bridge + defer ip -n "$NS1" link del br-test + + ip -n "$NS1" link set dev br-test type bridge stp_mode user + check_err $? "Failed to set stp_mode to user" + check_stp_mode br-test user "after set user" + + ip -n "$NS1" link set dev br-test type bridge stp_mode kernel + check_err $? "Failed to set stp_mode to kernel" + check_stp_mode br-test kernel "after set kernel" + + ip -n "$NS1" link set dev br-test type bridge stp_mode auto + check_err $? "Failed to set stp_mode to auto" + check_stp_mode br-test auto "after set auto" + + log_test "stp_mode set user/kernel/auto" +} + +# Verify that stp_mode cannot be changed while STP is active. +test_reject_change_while_stp_active() +{ + RET=0 + + bridge_create br-test + + ip -n "$NS1" link set dev br-test type bridge stp_mode kernel + check_err $? "Failed to set stp_mode to kernel" + + ip -n "$NS1" link set dev br-test type bridge stp_state 1 + check_err $? "Failed to enable STP" + + # Changing stp_mode while STP is active should fail. + ip -n "$NS1" link set dev br-test type bridge stp_mode auto 2>/dev/null + check_fail $? "Changing stp_mode should fail while STP is active" + + check_stp_mode br-test kernel "mode unchanged after rejected change" + + # Disable STP, then change should succeed. + ip -n "$NS1" link set dev br-test type bridge stp_state 0 + check_err $? "Failed to disable STP" + + ip -n "$NS1" link set dev br-test type bridge stp_mode auto + check_err $? "Changing stp_mode should succeed after STP is disabled" + + log_test "reject stp_mode change while STP is active" +} + +# Verify that re-setting the same stp_mode while STP is active succeeds. +test_idempotent_mode_while_stp_active() +{ + RET=0 + + bridge_create br-test + + ip -n "$NS1" link set dev br-test type bridge stp_mode user stp_state 1 + check_err $? "Failed to enable STP with user mode" + + # Re-setting the same mode while STP is active should succeed. + ip -n "$NS1" link set dev br-test type bridge stp_mode user + check_err $? "Idempotent stp_mode set should succeed while STP is active" + + check_stp_state br-test 2 "stp_state after idempotent set" + + # Changing mode while disabling STP in the same message should succeed. + ip -n "$NS1" link set dev br-test type bridge stp_mode auto stp_state 0 + check_err $? "Mode change with simultaneous STP disable should succeed" + + check_stp_mode br-test auto "mode changed after disable+change" + check_stp_state br-test 0 "stp_state after disable+change" + + log_test "idempotent and simultaneous mode change while STP active" +} + +# Test that stp_mode user in a non-init netns yields userspace STP +# (stp_state == 2). This is the key use case: userspace STP without +# needing /sbin/bridge-stp or being in init_net. +test_user_mode_in_netns() +{ + RET=0 + + bridge_create br-test + + ip -n "$NS1" link set dev br-test type bridge stp_mode user + check_err $? "Failed to set stp_mode to user" + + ip -n "$NS1" link set dev br-test type bridge stp_state 1 + check_err $? "Failed to enable STP" + + check_stp_state br-test 2 "stp_state with user mode" + + log_test "stp_mode user in netns yields userspace STP" +} + +# Test that stp_mode kernel forces kernel STP (stp_state == 1) +# regardless of whether /sbin/bridge-stp exists. +test_kernel_mode() +{ + RET=0 + + bridge_create br-test + + ip -n "$NS1" link set dev br-test type bridge stp_mode kernel + check_err $? "Failed to set stp_mode to kernel" + + ip -n "$NS1" link set dev br-test type bridge stp_state 1 + check_err $? "Failed to enable STP" + + check_stp_state br-test 1 "stp_state with kernel mode" + + log_test "stp_mode kernel forces kernel STP" +} + +# Test that stp_mode auto preserves traditional behavior: in a netns +# (non-init_net), bridge-stp is not called and STP falls back to +# kernel mode (stp_state == 1). +test_auto_mode() +{ + RET=0 + + bridge_create br-test + + # Auto mode is the default; enable STP in a netns. + ip -n "$NS1" link set dev br-test type bridge stp_state 1 + check_err $? "Failed to enable STP" + + # In a netns with auto mode, bridge-stp is skipped (init_net only), + # so STP should fall back to kernel mode (stp_state == 1). + check_stp_state br-test 1 "stp_state with auto mode in netns" + + log_test "stp_mode auto preserves traditional behavior" +} + +# Test that stp_mode and stp_state can be set in a single netlink +# message. This is the intended atomic usage pattern. +test_atomic_mode_and_state() +{ + RET=0 + + bridge_create br-test + + # Set both stp_mode and stp_state in one command. + ip -n "$NS1" link set dev br-test type bridge stp_mode user stp_state 1 + check_err $? "Failed to set stp_mode user and stp_state 1 atomically" + + check_stp_state br-test 2 "stp_state after atomic set" + + log_test "atomic stp_mode user + stp_state 1 in single message" +} + +# Test that stp_mode persists across STP disable/enable cycles. +test_mode_persistence() +{ + RET=0 + + bridge_create br-test + + # Set user mode and enable STP. + ip -n "$NS1" link set dev br-test type bridge stp_mode user + ip -n "$NS1" link set dev br-test type bridge stp_state 1 + check_err $? "Failed to enable STP with user mode" + + # Disable STP. + ip -n "$NS1" link set dev br-test type bridge stp_state 0 + check_err $? "Failed to disable STP" + + # Verify mode is still user. + check_stp_mode br-test user "stp_mode after STP disable" + + # Re-enable STP -- should use user mode again. + ip -n "$NS1" link set dev br-test type bridge stp_state 1 + check_err $? "Failed to re-enable STP" + + check_stp_state br-test 2 "stp_state after re-enable" + + log_test "stp_mode persists across STP disable/enable cycles" +} + +# Check iproute2 support before setting up resources. +if ! ip link add type bridge help 2>&1 | grep -q "stp_mode"; then + echo "SKIP: iproute2 too old, missing stp_mode support" + exit "$ksft_skip" +fi + +trap cleanup EXIT + +setup_prepare +tests_run + +exit "$EXIT_STATUS" From 3a4056ec7ec8f71ae9722f86d3cfbc4589deeac4 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 7 Apr 2026 18:30:27 +0100 Subject: [PATCH 1374/1561] net: dsa: mxl862xx: reject DSA_PORT_TYPE_DSA DSA links aren't supported by the mxl862xx driver. Instead of returning early from .port_setup when called for DSA_PORT_TYPE_DSA ports rather return -EOPNOTSUPP and show an error message. The desired side-effect is that the framework will switch the port to DSA_PORT_TYPE_UNUSED, so we can stop caring about DSA_PORT_TYPE_DSA in all other places. Signed-off-by: Daniel Golle Link: https://patch.msgid.link/b686f3a22d8a6e7d470e7aa98da811a996a229b9.1775581804.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mxl862xx/mxl862xx.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/mxl862xx/mxl862xx.c b/drivers/net/dsa/mxl862xx/mxl862xx.c index e4e16c720763..9a9714c4859b 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx.c +++ b/drivers/net/dsa/mxl862xx/mxl862xx.c @@ -544,10 +544,14 @@ static int mxl862xx_port_setup(struct dsa_switch *ds, int port) mxl862xx_port_fast_age(ds, port); - if (dsa_port_is_unused(dp) || - dsa_port_is_dsa(dp)) + if (dsa_port_is_unused(dp)) return 0; + if (dsa_port_is_dsa(dp)) { + dev_err(ds->dev, "port %d: DSA links not supported\n", port); + return -EOPNOTSUPP; + } + ret = mxl862xx_configure_sp_tag_proto(ds, port, is_cpu_port); if (ret) return ret; @@ -591,7 +595,7 @@ static void mxl862xx_port_teardown(struct dsa_switch *ds, int port) struct mxl862xx_priv *priv = ds->priv; struct dsa_port *dp = dsa_to_port(ds, port); - if (dsa_port_is_unused(dp) || dsa_port_is_dsa(dp)) + if (dsa_port_is_unused(dp)) return; /* Prevent deferred host_flood_work from acting on stale state. From 71934b9e6f36b1786bd969c0e1d2de8f9bd65f0f Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 7 Apr 2026 18:30:35 +0100 Subject: [PATCH 1375/1561] net: dsa: mxl862xx: don't skip early bridge port configuration mxl862xx_bridge_port_set() is currently guarded by the mxl8622_port->setup_done flag, as the early call to mxl862xx_bridge_port_set() from mxl862xx_port_stp_state_set() would otherwise cause a NULL-pointer dereference on unused ports which don't have dp->cpu_dp despite not being a CPU port. Using the setup_done flag (which is never set for unused ports), however, also prevents mxl862xx_bridge_port_set() from configuring user ports' single-port bridges early, which was unintended. Fix this by returning early from mxl862xx_bridge_port_set() in case dsa_port_is_unused(). Fixes: 340bdf984613c ("net: dsa: mxl862xx: implement bridge offloading") Signed-off-by: Daniel Golle Link: https://patch.msgid.link/15962aac29ebe0a6eb77565451acff880c41ef33.1775581804.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mxl862xx/mxl862xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mxl862xx/mxl862xx.c b/drivers/net/dsa/mxl862xx/mxl862xx.c index 9a9714c4859b..f65525aff5e5 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx.c +++ b/drivers/net/dsa/mxl862xx/mxl862xx.c @@ -278,7 +278,7 @@ static int mxl862xx_set_bridge_port(struct dsa_switch *ds, int port) bool enable; int i, idx; - if (!p->setup_done) + if (dsa_port_is_unused(dp)) return 0; if (dsa_port_is_cpu(dp)) { From d587f9b6dcc98c1e8aeb5c189a7bfac60d6d29ac Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 7 Apr 2026 18:31:01 +0100 Subject: [PATCH 1376/1561] net: dsa: mxl862xx: implement VLAN functionality Add VLAN support using both the Extended VLAN (EVLAN) engine and the VLAN Filter (VF) engine in a hybrid architecture that allows a higher number of VIDs than either engine could achieve alone. The VLAN Filter engine handles per-port VID membership checks with discard-unmatched semantics. The Extended VLAN engine handles PVID insertion on ingress (via fixed catchall rules) and tag stripping on egress (2 rules per untagged VID). Tagged-only VIDs need no EVLAN egress rules at all, so they consume only a VF entry. Both engines draw from shared 1024-entry hardware pools. The VF pool is divided equally among user ports for VID membership, while the EVLAN pool is partitioned into small fixed-size ingress blocks (7 entries of catchall rules per port) and fixed-size egress blocks for tag stripping. With 5 user ports this yields up to 204 VIDs per port (limited by VF), of which up to 98 can be untagged (limited by EVLAN egress budget). With 9 user ports the numbers are 113 total and 53 untagged. Wire up .port_vlan_add, .port_vlan_del, and .port_vlan_filtering. Reprogram all EVLAN rules when the PVID or filtering mode changes. Detach blocks from the bridge port before freeing them on bridge leave to satisfy the firmware's internal refcount. Future optimizations could increase VID capacity by dynamically sizing the egress EVLAN blocks based on actual per-port untagged VID counts rather than worst-case pre-allocation, or by sharing EVLAN egress and VLAN Filter blocks across ports with identical VID sets. Signed-off-by: Daniel Golle Link: https://patch.msgid.link/9be29637675342b109a85fa08f5378800d9f7b78.1775581804.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mxl862xx/mxl862xx-api.h | 329 ++++++++++ drivers/net/dsa/mxl862xx/mxl862xx-cmd.h | 12 + drivers/net/dsa/mxl862xx/mxl862xx.c | 781 +++++++++++++++++++++++- drivers/net/dsa/mxl862xx/mxl862xx.h | 103 +++- 4 files changed, 1211 insertions(+), 14 deletions(-) diff --git a/drivers/net/dsa/mxl862xx/mxl862xx-api.h b/drivers/net/dsa/mxl862xx/mxl862xx-api.h index 8677763544d7..c902e90397e5 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx-api.h +++ b/drivers/net/dsa/mxl862xx/mxl862xx-api.h @@ -731,6 +731,335 @@ struct mxl862xx_cfg { u8 pause_mac_src[ETH_ALEN]; } __packed; +/** + * enum mxl862xx_extended_vlan_filter_type - Extended VLAN filter tag type + * @MXL862XX_EXTENDEDVLAN_FILTER_TYPE_NORMAL: Normal tagged + * @MXL862XX_EXTENDEDVLAN_FILTER_TYPE_NO_FILTER: No filter (wildcard) + * @MXL862XX_EXTENDEDVLAN_FILTER_TYPE_DEFAULT: Default entry + * @MXL862XX_EXTENDEDVLAN_FILTER_TYPE_NO_TAG: Untagged + */ +enum mxl862xx_extended_vlan_filter_type { + MXL862XX_EXTENDEDVLAN_FILTER_TYPE_NORMAL = 0, + MXL862XX_EXTENDEDVLAN_FILTER_TYPE_NO_FILTER = 1, + MXL862XX_EXTENDEDVLAN_FILTER_TYPE_DEFAULT = 2, + MXL862XX_EXTENDEDVLAN_FILTER_TYPE_NO_TAG = 3, +}; + +/** + * enum mxl862xx_extended_vlan_filter_tpid - Extended VLAN filter TPID + * @MXL862XX_EXTENDEDVLAN_FILTER_TPID_NO_FILTER: No TPID filter + * @MXL862XX_EXTENDEDVLAN_FILTER_TPID_8021Q: 802.1Q TPID + * @MXL862XX_EXTENDEDVLAN_FILTER_TPID_VTETYPE: VLAN type extension + */ +enum mxl862xx_extended_vlan_filter_tpid { + MXL862XX_EXTENDEDVLAN_FILTER_TPID_NO_FILTER = 0, + MXL862XX_EXTENDEDVLAN_FILTER_TPID_8021Q = 1, + MXL862XX_EXTENDEDVLAN_FILTER_TPID_VTETYPE = 2, +}; + +/** + * enum mxl862xx_extended_vlan_filter_dei - Extended VLAN filter DEI + * @MXL862XX_EXTENDEDVLAN_FILTER_DEI_NO_FILTER: No DEI filter + * @MXL862XX_EXTENDEDVLAN_FILTER_DEI_0: DEI = 0 + * @MXL862XX_EXTENDEDVLAN_FILTER_DEI_1: DEI = 1 + */ +enum mxl862xx_extended_vlan_filter_dei { + MXL862XX_EXTENDEDVLAN_FILTER_DEI_NO_FILTER = 0, + MXL862XX_EXTENDEDVLAN_FILTER_DEI_0 = 1, + MXL862XX_EXTENDEDVLAN_FILTER_DEI_1 = 2, +}; + +/** + * enum mxl862xx_extended_vlan_treatment_remove_tag - Tag removal action + * @MXL862XX_EXTENDEDVLAN_TREATMENT_NOT_REMOVE_TAG: Do not remove tag + * @MXL862XX_EXTENDEDVLAN_TREATMENT_REMOVE_1_TAG: Remove one tag + * @MXL862XX_EXTENDEDVLAN_TREATMENT_REMOVE_2_TAG: Remove two tags + * @MXL862XX_EXTENDEDVLAN_TREATMENT_DISCARD_UPSTREAM: Discard frame + */ +enum mxl862xx_extended_vlan_treatment_remove_tag { + MXL862XX_EXTENDEDVLAN_TREATMENT_NOT_REMOVE_TAG = 0, + MXL862XX_EXTENDEDVLAN_TREATMENT_REMOVE_1_TAG = 1, + MXL862XX_EXTENDEDVLAN_TREATMENT_REMOVE_2_TAG = 2, + MXL862XX_EXTENDEDVLAN_TREATMENT_DISCARD_UPSTREAM = 3, +}; + +/** + * enum mxl862xx_extended_vlan_treatment_priority - Treatment priority mode + * @MXL862XX_EXTENDEDVLAN_TREATMENT_PRIORITY_VAL: Use explicit value + * @MXL862XX_EXTENDEDVLAN_TREATMENT_INNER_PRIORITY: Copy from inner tag + * @MXL862XX_EXTENDEDVLAN_TREATMENT_OUTER_PRIORITY: Copy from outer tag + * @MXL862XX_EXTENDEDVLAN_TREATMENT_DSCP: Derive from DSCP + */ +enum mxl862xx_extended_vlan_treatment_priority { + MXL862XX_EXTENDEDVLAN_TREATMENT_PRIORITY_VAL = 0, + MXL862XX_EXTENDEDVLAN_TREATMENT_INNER_PRIORITY = 1, + MXL862XX_EXTENDEDVLAN_TREATMENT_OUTER_PRIORITY = 2, + MXL862XX_EXTENDEDVLAN_TREATMENT_DSCP = 3, +}; + +/** + * enum mxl862xx_extended_vlan_treatment_vid - Treatment VID mode + * @MXL862XX_EXTENDEDVLAN_TREATMENT_VID_VAL: Use explicit VID value + * @MXL862XX_EXTENDEDVLAN_TREATMENT_INNER_VID: Copy from inner tag + * @MXL862XX_EXTENDEDVLAN_TREATMENT_OUTER_VID: Copy from outer tag + */ +enum mxl862xx_extended_vlan_treatment_vid { + MXL862XX_EXTENDEDVLAN_TREATMENT_VID_VAL = 0, + MXL862XX_EXTENDEDVLAN_TREATMENT_INNER_VID = 1, + MXL862XX_EXTENDEDVLAN_TREATMENT_OUTER_VID = 2, +}; + +/** + * enum mxl862xx_extended_vlan_treatment_tpid - Treatment TPID mode + * @MXL862XX_EXTENDEDVLAN_TREATMENT_INNER_TPID: Copy from inner tag + * @MXL862XX_EXTENDEDVLAN_TREATMENT_OUTER_TPID: Copy from outer tag + * @MXL862XX_EXTENDEDVLAN_TREATMENT_VTETYPE: Use VLAN type extension + * @MXL862XX_EXTENDEDVLAN_TREATMENT_8021Q: Use 802.1Q TPID + */ +enum mxl862xx_extended_vlan_treatment_tpid { + MXL862XX_EXTENDEDVLAN_TREATMENT_INNER_TPID = 0, + MXL862XX_EXTENDEDVLAN_TREATMENT_OUTER_TPID = 1, + MXL862XX_EXTENDEDVLAN_TREATMENT_VTETYPE = 2, + MXL862XX_EXTENDEDVLAN_TREATMENT_8021Q = 3, +}; + +/** + * enum mxl862xx_extended_vlan_treatment_dei - Treatment DEI mode + * @MXL862XX_EXTENDEDVLAN_TREATMENT_INNER_DEI: Copy from inner tag + * @MXL862XX_EXTENDEDVLAN_TREATMENT_OUTER_DEI: Copy from outer tag + * @MXL862XX_EXTENDEDVLAN_TREATMENT_DEI_0: Set DEI to 0 + * @MXL862XX_EXTENDEDVLAN_TREATMENT_DEI_1: Set DEI to 1 + */ +enum mxl862xx_extended_vlan_treatment_dei { + MXL862XX_EXTENDEDVLAN_TREATMENT_INNER_DEI = 0, + MXL862XX_EXTENDEDVLAN_TREATMENT_OUTER_DEI = 1, + MXL862XX_EXTENDEDVLAN_TREATMENT_DEI_0 = 2, + MXL862XX_EXTENDEDVLAN_TREATMENT_DEI_1 = 3, +}; + +/** + * enum mxl862xx_extended_vlan_4_tpid_mode - 4-TPID mode selector + * @MXL862XX_EXTENDEDVLAN_TPID_VTETYPE_1: VLAN TPID type 1 + * @MXL862XX_EXTENDEDVLAN_TPID_VTETYPE_2: VLAN TPID type 2 + * @MXL862XX_EXTENDEDVLAN_TPID_VTETYPE_3: VLAN TPID type 3 + * @MXL862XX_EXTENDEDVLAN_TPID_VTETYPE_4: VLAN TPID type 4 + */ +enum mxl862xx_extended_vlan_4_tpid_mode { + MXL862XX_EXTENDEDVLAN_TPID_VTETYPE_1 = 0, + MXL862XX_EXTENDEDVLAN_TPID_VTETYPE_2 = 1, + MXL862XX_EXTENDEDVLAN_TPID_VTETYPE_3 = 2, + MXL862XX_EXTENDEDVLAN_TPID_VTETYPE_4 = 3, +}; + +/** + * enum mxl862xx_extended_vlan_filter_ethertype - Filter EtherType match + * @MXL862XX_EXTENDEDVLAN_FILTER_ETHERTYPE_NO_FILTER: No filter + * @MXL862XX_EXTENDEDVLAN_FILTER_ETHERTYPE_IPOE: IPoE + * @MXL862XX_EXTENDEDVLAN_FILTER_ETHERTYPE_PPPOE: PPPoE + * @MXL862XX_EXTENDEDVLAN_FILTER_ETHERTYPE_ARP: ARP + * @MXL862XX_EXTENDEDVLAN_FILTER_ETHERTYPE_IPV6IPOE: IPv6 IPoE + * @MXL862XX_EXTENDEDVLAN_FILTER_ETHERTYPE_EAPOL: EAPOL + * @MXL862XX_EXTENDEDVLAN_FILTER_ETHERTYPE_DHCPV4: DHCPv4 + * @MXL862XX_EXTENDEDVLAN_FILTER_ETHERTYPE_DHCPV6: DHCPv6 + */ +enum mxl862xx_extended_vlan_filter_ethertype { + MXL862XX_EXTENDEDVLAN_FILTER_ETHERTYPE_NO_FILTER = 0, + MXL862XX_EXTENDEDVLAN_FILTER_ETHERTYPE_IPOE = 1, + MXL862XX_EXTENDEDVLAN_FILTER_ETHERTYPE_PPPOE = 2, + MXL862XX_EXTENDEDVLAN_FILTER_ETHERTYPE_ARP = 3, + MXL862XX_EXTENDEDVLAN_FILTER_ETHERTYPE_IPV6IPOE = 4, + MXL862XX_EXTENDEDVLAN_FILTER_ETHERTYPE_EAPOL = 5, + MXL862XX_EXTENDEDVLAN_FILTER_ETHERTYPE_DHCPV4 = 6, + MXL862XX_EXTENDEDVLAN_FILTER_ETHERTYPE_DHCPV6 = 7, +}; + +/** + * struct mxl862xx_extendedvlan_filter_vlan - Per-tag filter in Extended VLAN + * @type: Tag presence/type match (see &enum mxl862xx_extended_vlan_filter_type) + * @priority_enable: Enable PCP value matching + * @priority_val: PCP value to match + * @vid_enable: Enable VID matching + * @vid_val: VID value to match + * @tpid: TPID match mode (see &enum mxl862xx_extended_vlan_filter_tpid) + * @dei: DEI match mode (see &enum mxl862xx_extended_vlan_filter_dei) + */ +struct mxl862xx_extendedvlan_filter_vlan { + __le32 type; + u8 priority_enable; + __le32 priority_val; + u8 vid_enable; + __le32 vid_val; + __le32 tpid; + __le32 dei; +} __packed; + +/** + * struct mxl862xx_extendedvlan_filter - Extended VLAN filter configuration + * @original_packet_filter_mode: If true, filter on original (pre-treatment) + * packet + * @filter_4_tpid_mode: 4-TPID mode (see &enum mxl862xx_extended_vlan_4_tpid_mode) + * @outer_vlan: Outer VLAN tag filter + * @inner_vlan: Inner VLAN tag filter + * @ether_type: EtherType filter (see + * &enum mxl862xx_extended_vlan_filter_ethertype) + */ +struct mxl862xx_extendedvlan_filter { + u8 original_packet_filter_mode; + __le32 filter_4_tpid_mode; + struct mxl862xx_extendedvlan_filter_vlan outer_vlan; + struct mxl862xx_extendedvlan_filter_vlan inner_vlan; + __le32 ether_type; +} __packed; + +/** + * struct mxl862xx_extendedvlan_treatment_vlan - Per-tag treatment in + * Extended VLAN + * @priority_mode: Priority assignment mode + * (see &enum mxl862xx_extended_vlan_treatment_priority) + * @priority_val: Priority value (when mode is VAL) + * @vid_mode: VID assignment mode + * (see &enum mxl862xx_extended_vlan_treatment_vid) + * @vid_val: VID value (when mode is VAL) + * @tpid: TPID assignment mode + * (see &enum mxl862xx_extended_vlan_treatment_tpid) + * @dei: DEI assignment mode + * (see &enum mxl862xx_extended_vlan_treatment_dei) + */ +struct mxl862xx_extendedvlan_treatment_vlan { + __le32 priority_mode; + __le32 priority_val; + __le32 vid_mode; + __le32 vid_val; + __le32 tpid; + __le32 dei; +} __packed; + +/** + * struct mxl862xx_extendedvlan_treatment - Extended VLAN treatment + * @remove_tag: Tag removal action + * (see &enum mxl862xx_extended_vlan_treatment_remove_tag) + * @treatment_4_tpid_mode: 4-TPID treatment mode + * @add_outer_vlan: Add outer VLAN tag + * @outer_vlan: Outer VLAN tag treatment parameters + * @add_inner_vlan: Add inner VLAN tag + * @inner_vlan: Inner VLAN tag treatment parameters + * @reassign_bridge_port: Reassign to different bridge port + * @new_bridge_port_id: New bridge port ID + * @new_dscp_enable: Enable new DSCP assignment + * @new_dscp: New DSCP value + * @new_traffic_class_enable: Enable new traffic class assignment + * @new_traffic_class: New traffic class value + * @new_meter_enable: Enable new metering + * @s_new_traffic_meter_id: New traffic meter ID + * @dscp2pcp_map: DSCP to PCP mapping table (64 entries) + * @loopback_enable: Enable loopback + * @da_sa_swap_enable: Enable DA/SA swap + * @mirror_enable: Enable mirroring + */ +struct mxl862xx_extendedvlan_treatment { + __le32 remove_tag; + __le32 treatment_4_tpid_mode; + u8 add_outer_vlan; + struct mxl862xx_extendedvlan_treatment_vlan outer_vlan; + u8 add_inner_vlan; + struct mxl862xx_extendedvlan_treatment_vlan inner_vlan; + u8 reassign_bridge_port; + __le16 new_bridge_port_id; + u8 new_dscp_enable; + __le16 new_dscp; + u8 new_traffic_class_enable; + u8 new_traffic_class; + u8 new_meter_enable; + __le16 s_new_traffic_meter_id; + u8 dscp2pcp_map[64]; + u8 loopback_enable; + u8 da_sa_swap_enable; + u8 mirror_enable; +} __packed; + +/** + * struct mxl862xx_extendedvlan_alloc - Extended VLAN block allocation + * @number_of_entries: Number of entries to allocate (input) / allocated + * (output) + * @extended_vlan_block_id: Block ID assigned by firmware (output on alloc, + * input on free) + * + * Used with %MXL862XX_EXTENDEDVLAN_ALLOC and %MXL862XX_EXTENDEDVLAN_FREE. + */ +struct mxl862xx_extendedvlan_alloc { + __le16 number_of_entries; + __le16 extended_vlan_block_id; +} __packed; + +/** + * struct mxl862xx_extendedvlan_config - Extended VLAN entry configuration + * @extended_vlan_block_id: Block ID from allocation + * @entry_index: Entry index within the block + * @filter: Filter (match) configuration + * @treatment: Treatment (action) configuration + * + * Used with %MXL862XX_EXTENDEDVLAN_SET and %MXL862XX_EXTENDEDVLAN_GET. + */ +struct mxl862xx_extendedvlan_config { + __le16 extended_vlan_block_id; + __le16 entry_index; + struct mxl862xx_extendedvlan_filter filter; + struct mxl862xx_extendedvlan_treatment treatment; +} __packed; + +/** + * enum mxl862xx_vlan_filter_tci_mask - VLAN Filter TCI mask + * @MXL862XX_VLAN_FILTER_TCI_MASK_VID: TCI mask for VLAN ID + * @MXL862XX_VLAN_FILTER_TCI_MASK_PCP: TCI mask for VLAN PCP + * @MXL862XX_VLAN_FILTER_TCI_MASK_TCI: TCI mask for VLAN TCI + */ +enum mxl862xx_vlan_filter_tci_mask { + MXL862XX_VLAN_FILTER_TCI_MASK_VID = 0, + MXL862XX_VLAN_FILTER_TCI_MASK_PCP = 1, + MXL862XX_VLAN_FILTER_TCI_MASK_TCI = 2, +}; + +/** + * struct mxl862xx_vlanfilter_alloc - VLAN Filter block allocation + * @number_of_entries: Number of entries to allocate (input) / allocated + * (output) + * @vlan_filter_block_id: Block ID assigned by firmware (output on alloc, + * input on free) + * @discard_untagged: Discard untagged packets + * @discard_unmatched_tagged: Discard tagged packets that do not match any + * entry in the block + * @use_default_port_vid: Use default port VLAN ID for filtering + * + * Used with %MXL862XX_VLANFILTER_ALLOC and %MXL862XX_VLANFILTER_FREE. + */ +struct mxl862xx_vlanfilter_alloc { + __le16 number_of_entries; + __le16 vlan_filter_block_id; + u8 discard_untagged; + u8 discard_unmatched_tagged; + u8 use_default_port_vid; +} __packed; + +/** + * struct mxl862xx_vlanfilter_config - VLAN Filter entry configuration + * @vlan_filter_block_id: Block ID from allocation + * @entry_index: Entry index within the block + * @vlan_filter_mask: TCI field(s) to match (see + * &enum mxl862xx_vlan_filter_tci_mask) + * @val: TCI value(s) to match (VID, PCP, or full TCI depending on mask) + * @discard_matched: When true, discard frames matching this entry; + * when false, allow them + * + * Used with %MXL862XX_VLANFILTER_SET and %MXL862XX_VLANFILTER_GET. + */ +struct mxl862xx_vlanfilter_config { + __le16 vlan_filter_block_id; + __le16 entry_index; + __le32 vlan_filter_mask; /* enum mxl862xx_vlan_filter_tci_mask */ + __le32 val; + u8 discard_matched; +} __packed; + /** * enum mxl862xx_ss_sp_tag_mask - Special tag valid field indicator bits * @MXL862XX_SS_SP_TAG_MASK_RX: valid RX special tag mode diff --git a/drivers/net/dsa/mxl862xx/mxl862xx-cmd.h b/drivers/net/dsa/mxl862xx/mxl862xx-cmd.h index 9f6c5bf9fdf2..45df37cde40d 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx-cmd.h +++ b/drivers/net/dsa/mxl862xx/mxl862xx-cmd.h @@ -17,6 +17,8 @@ #define MXL862XX_CTP_MAGIC 0x500 #define MXL862XX_QOS_MAGIC 0x600 #define MXL862XX_SWMAC_MAGIC 0xa00 +#define MXL862XX_EXTVLAN_MAGIC 0xb00 +#define MXL862XX_VLANFILTER_MAGIC 0xc00 #define MXL862XX_STP_MAGIC 0xf00 #define MXL862XX_SS_MAGIC 0x1600 #define GPY_GPY2XX_MAGIC 0x1800 @@ -47,6 +49,16 @@ #define MXL862XX_MAC_TABLEENTRYREMOVE (MXL862XX_SWMAC_MAGIC + 0x5) #define MXL862XX_MAC_TABLECLEARCOND (MXL862XX_SWMAC_MAGIC + 0x8) +#define MXL862XX_EXTENDEDVLAN_ALLOC (MXL862XX_EXTVLAN_MAGIC + 0x1) +#define MXL862XX_EXTENDEDVLAN_SET (MXL862XX_EXTVLAN_MAGIC + 0x2) +#define MXL862XX_EXTENDEDVLAN_GET (MXL862XX_EXTVLAN_MAGIC + 0x3) +#define MXL862XX_EXTENDEDVLAN_FREE (MXL862XX_EXTVLAN_MAGIC + 0x4) + +#define MXL862XX_VLANFILTER_ALLOC (MXL862XX_VLANFILTER_MAGIC + 0x1) +#define MXL862XX_VLANFILTER_SET (MXL862XX_VLANFILTER_MAGIC + 0x2) +#define MXL862XX_VLANFILTER_GET (MXL862XX_VLANFILTER_MAGIC + 0x3) +#define MXL862XX_VLANFILTER_FREE (MXL862XX_VLANFILTER_MAGIC + 0x4) + #define MXL862XX_SS_SPTAG_SET (MXL862XX_SS_MAGIC + 0x2) #define MXL862XX_STP_PORTCFGSET (MXL862XX_STP_MAGIC + 0x2) diff --git a/drivers/net/dsa/mxl862xx/mxl862xx.c b/drivers/net/dsa/mxl862xx/mxl862xx.c index f65525aff5e5..fca9a3e36bb6 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx.c +++ b/drivers/net/dsa/mxl862xx/mxl862xx.c @@ -50,6 +50,85 @@ static const int mxl862xx_flood_meters[] = { MXL862XX_BRIDGE_PORT_EGRESS_METER_BROADCAST, }; +enum mxl862xx_evlan_action { + EVLAN_ACCEPT, /* pass-through, no tag removal */ + EVLAN_STRIP_IF_UNTAGGED, /* remove 1 tag if entry's untagged flag set */ + EVLAN_PVID_OR_DISCARD, /* insert PVID tag or discard if no PVID */ + EVLAN_STRIP1_AND_PVID_OR_DISCARD,/* strip 1 tag + insert PVID, or discard */ +}; + +struct mxl862xx_evlan_rule_desc { + u8 outer_type; /* enum mxl862xx_extended_vlan_filter_type */ + u8 inner_type; /* enum mxl862xx_extended_vlan_filter_type */ + u8 outer_tpid; /* enum mxl862xx_extended_vlan_filter_tpid */ + u8 inner_tpid; /* enum mxl862xx_extended_vlan_filter_tpid */ + bool match_vid; /* true: match on VID from the vid parameter */ + u8 action; /* enum mxl862xx_evlan_action */ +}; + +/* Shorthand constants for readability */ +#define FT_NORMAL MXL862XX_EXTENDEDVLAN_FILTER_TYPE_NORMAL +#define FT_NO_FILTER MXL862XX_EXTENDEDVLAN_FILTER_TYPE_NO_FILTER +#define FT_DEFAULT MXL862XX_EXTENDEDVLAN_FILTER_TYPE_DEFAULT +#define FT_NO_TAG MXL862XX_EXTENDEDVLAN_FILTER_TYPE_NO_TAG +#define TP_NONE MXL862XX_EXTENDEDVLAN_FILTER_TPID_NO_FILTER +#define TP_8021Q MXL862XX_EXTENDEDVLAN_FILTER_TPID_8021Q + +/* + * VLAN-aware ingress: 7 final catchall rules. + * + * VLAN Filter handles VID membership for tagged frames, so the + * Extended VLAN ingress block only needs to handle: + * - Priority-tagged (VID=0): strip + insert PVID + * - Untagged: insert PVID or discard + * - Standard 802.1Q VID>0: pass through (VF handles membership) + * - Non-8021Q TPID (0x88A8 etc.): treat as untagged + * + * Rule ordering is critical: the EVLAN engine scans entries in + * ascending index order and stops at the first match. + * + * The 802.1Q ACCEPT rules (indices 3--4) must appear BEFORE the + * NO_FILTER catchalls (indices 5--6). NO_FILTER matches any tag + * regardless of TPID, so without the ACCEPT guard, it would also + * catch standard 802.1Q VID>0 frames and corrupt them. With the + * guard, 802.1Q VID>0 frames match the ACCEPT rules first and + * pass through untouched; only non-8021Q TPID frames pass through + * to the NO_FILTER catchalls. + */ +static const struct mxl862xx_evlan_rule_desc ingress_aware_final[] = { + /* 802.1p / priority-tagged (VID 0): strip + PVID */ + { FT_NORMAL, FT_NORMAL, TP_8021Q, TP_8021Q, true, EVLAN_STRIP1_AND_PVID_OR_DISCARD }, + { FT_NORMAL, FT_NO_TAG, TP_8021Q, TP_NONE, true, EVLAN_STRIP1_AND_PVID_OR_DISCARD }, + /* Untagged: PVID insertion or discard */ + { FT_NO_TAG, FT_NO_TAG, TP_NONE, TP_NONE, false, EVLAN_PVID_OR_DISCARD }, + /* 802.1Q VID>0: accept - VF handles membership. + * match_vid=false means any VID; VID=0 is already caught above. + */ + { FT_NORMAL, FT_NORMAL, TP_8021Q, TP_8021Q, false, EVLAN_ACCEPT }, + { FT_NORMAL, FT_NO_TAG, TP_8021Q, TP_NONE, false, EVLAN_ACCEPT }, + /* Non-8021Q TPID (0x88A8 etc.): treat as untagged - strip + PVID */ + { FT_NO_FILTER, FT_NO_FILTER, TP_NONE, TP_NONE, false, EVLAN_STRIP1_AND_PVID_OR_DISCARD }, + { FT_NO_FILTER, FT_NO_TAG, TP_NONE, TP_NONE, false, EVLAN_STRIP1_AND_PVID_OR_DISCARD }, +}; + +/* + * VID-specific accept rules (VLAN-aware, standard tag, 2 per VID). + * Outer tag carries the VLAN; inner may or may not be present. + */ +static const struct mxl862xx_evlan_rule_desc vid_accept_standard[] = { + { FT_NORMAL, FT_NORMAL, TP_8021Q, TP_8021Q, true, EVLAN_STRIP_IF_UNTAGGED }, + { FT_NORMAL, FT_NO_TAG, TP_8021Q, TP_NONE, true, EVLAN_STRIP_IF_UNTAGGED }, +}; + +/* + * Egress tag-stripping rules for VLAN-unaware mode (2 per untagged VID). + * The HW sees the MxL tag as outer; the real VLAN tag, if any, is inner. + */ +static const struct mxl862xx_evlan_rule_desc vid_accept_egress_unaware[] = { + { FT_NO_FILTER, FT_NORMAL, TP_NONE, TP_8021Q, true, EVLAN_STRIP_IF_UNTAGGED }, + { FT_NO_FILTER, FT_NO_TAG, TP_NONE, TP_NONE, false, EVLAN_STRIP_IF_UNTAGGED }, +}; + static enum dsa_tag_protocol mxl862xx_get_tag_protocol(struct dsa_switch *ds, int port, enum dsa_tag_protocol m) @@ -275,6 +354,7 @@ static int mxl862xx_set_bridge_port(struct dsa_switch *ds, int port) struct mxl862xx_port *p = &priv->ports[port]; struct dsa_port *member_dp; u16 bridge_id; + u16 vf_scan; bool enable; int i, idx; @@ -312,9 +392,69 @@ static int mxl862xx_set_bridge_port(struct dsa_switch *ds, int port) br_port_cfg.mask = cpu_to_le32(MXL862XX_BRIDGE_PORT_CONFIG_MASK_BRIDGE_ID | MXL862XX_BRIDGE_PORT_CONFIG_MASK_BRIDGE_PORT_MAP | MXL862XX_BRIDGE_PORT_CONFIG_MASK_MC_SRC_MAC_LEARNING | - MXL862XX_BRIDGE_PORT_CONFIG_MASK_EGRESS_SUB_METER); + MXL862XX_BRIDGE_PORT_CONFIG_MASK_EGRESS_SUB_METER | + MXL862XX_BRIDGE_PORT_CONFIG_MASK_INGRESS_VLAN | + MXL862XX_BRIDGE_PORT_CONFIG_MASK_EGRESS_VLAN | + MXL862XX_BRIDGE_PORT_CONFIG_MASK_INGRESS_VLAN_FILTER | + MXL862XX_BRIDGE_PORT_CONFIG_MASK_EGRESS_VLAN_FILTER1 | + MXL862XX_BRIDGE_PORT_CONFIG_MASK_VLAN_BASED_MAC_LEARNING); br_port_cfg.src_mac_learning_disable = !p->learning; + /* Extended VLAN block assignments. + * Ingress: block_size is sent as-is (all entries are finals). + * Egress: n_active narrows the scan window to only the + * entries actually written by evlan_program_egress. + */ + br_port_cfg.ingress_extended_vlan_enable = p->ingress_evlan.in_use; + br_port_cfg.ingress_extended_vlan_block_id = + cpu_to_le16(p->ingress_evlan.block_id); + br_port_cfg.ingress_extended_vlan_block_size = + cpu_to_le16(p->ingress_evlan.block_size); + br_port_cfg.egress_extended_vlan_enable = p->egress_evlan.in_use; + br_port_cfg.egress_extended_vlan_block_id = + cpu_to_le16(p->egress_evlan.block_id); + br_port_cfg.egress_extended_vlan_block_size = + cpu_to_le16(p->egress_evlan.n_active); + + /* VLAN Filter block assignments (per-port). + * The block_size sent to the firmware narrows the HW scan + * window to [block_id, block_id + active_count), relying on + * discard_unmatched_tagged for frames outside that range. + * When active_count=0, send 1 to scan only the DISCARD + * sentinel at index 0 (block_size=0 would disable narrowing + * and scan the entire allocated block). + * + * The bridge check ensures VF is disabled when the port + * leaves the bridge, without needing to prematurely clear + * vlan_filtering (which the DSA framework handles later via + * port_vlan_filtering). + */ + if (p->vf.allocated && p->vlan_filtering && + dsa_port_bridge_dev_get(dp)) { + vf_scan = max_t(u16, p->vf.active_count, 1); + br_port_cfg.ingress_vlan_filter_enable = 1; + br_port_cfg.ingress_vlan_filter_block_id = + cpu_to_le16(p->vf.block_id); + br_port_cfg.ingress_vlan_filter_block_size = + cpu_to_le16(vf_scan); + + br_port_cfg.egress_vlan_filter1enable = 1; + br_port_cfg.egress_vlan_filter1block_id = + cpu_to_le16(p->vf.block_id); + br_port_cfg.egress_vlan_filter1block_size = + cpu_to_le16(vf_scan); + } else { + br_port_cfg.ingress_vlan_filter_enable = 0; + br_port_cfg.egress_vlan_filter1enable = 0; + } + + /* IVL when VLAN-aware: include VID in FDB lookup keys so that + * learned entries are per-VID. In VLAN-unaware mode, SVL is + * used (VID excluded from key). + */ + br_port_cfg.vlan_src_mac_vid_enable = p->vlan_filtering; + br_port_cfg.vlan_dst_mac_vid_enable = p->vlan_filtering; + for (i = 0; i < ARRAY_SIZE(mxl862xx_flood_meters); i++) { idx = mxl862xx_flood_meters[i]; enable = !!(p->flood_block & BIT(idx)); @@ -343,6 +483,72 @@ static int mxl862xx_sync_bridge_members(struct dsa_switch *ds, return ret; } +static int mxl862xx_evlan_block_alloc(struct mxl862xx_priv *priv, + struct mxl862xx_evlan_block *blk) +{ + struct mxl862xx_extendedvlan_alloc param = {}; + int ret; + + param.number_of_entries = cpu_to_le16(blk->block_size); + + ret = MXL862XX_API_READ(priv, MXL862XX_EXTENDEDVLAN_ALLOC, param); + if (ret) + return ret; + + blk->block_id = le16_to_cpu(param.extended_vlan_block_id); + blk->allocated = true; + + return 0; +} + +static int mxl862xx_vf_block_alloc(struct mxl862xx_priv *priv, + u16 size, u16 *block_id) +{ + struct mxl862xx_vlanfilter_alloc param = {}; + int ret; + + param.number_of_entries = cpu_to_le16(size); + param.discard_untagged = 0; + param.discard_unmatched_tagged = 1; + + ret = MXL862XX_API_READ(priv, MXL862XX_VLANFILTER_ALLOC, param); + if (ret) + return ret; + + *block_id = le16_to_cpu(param.vlan_filter_block_id); + return 0; +} + +static int mxl862xx_vf_entry_discard(struct mxl862xx_priv *priv, + u16 block_id, u16 index) +{ + struct mxl862xx_vlanfilter_config cfg = {}; + + cfg.vlan_filter_block_id = cpu_to_le16(block_id); + cfg.entry_index = cpu_to_le16(index); + cfg.vlan_filter_mask = cpu_to_le32(MXL862XX_VLAN_FILTER_TCI_MASK_VID); + cfg.val = cpu_to_le32(0); + cfg.discard_matched = 1; + + return MXL862XX_API_WRITE(priv, MXL862XX_VLANFILTER_SET, cfg); +} + +static int mxl862xx_vf_alloc(struct mxl862xx_priv *priv, + struct mxl862xx_vf_block *vf) +{ + int ret; + + ret = mxl862xx_vf_block_alloc(priv, vf->block_size, &vf->block_id); + if (ret) + return ret; + + vf->allocated = true; + vf->active_count = 0; + + /* Sentinel: block VID-0 when scan window covers only index 0 */ + return mxl862xx_vf_entry_discard(priv, vf->block_id, 0); +} + static int mxl862xx_allocate_bridge(struct mxl862xx_priv *priv) { struct mxl862xx_bridge_alloc br_alloc = {}; @@ -378,6 +584,9 @@ static void mxl862xx_free_bridge(struct dsa_switch *ds, static int mxl862xx_setup(struct dsa_switch *ds) { struct mxl862xx_priv *priv = ds->priv; + int n_user_ports = 0, max_vlans; + int ingress_finals, vid_rules; + struct dsa_port *dp; int ret; ret = mxl862xx_reset(priv); @@ -388,6 +597,50 @@ static int mxl862xx_setup(struct dsa_switch *ds) if (ret) return ret; + /* Calculate Extended VLAN block sizes. + * With VLAN Filter handling VID membership checks: + * Ingress: only final catchall rules (PVID insertion, 802.1Q + * accept, non-8021Q TPID handling, discard). + * Block sized to exactly fit the finals -- no per-VID + * ingress EVLAN rules are needed. (7 entries.) + * Egress: 2 rules per VID that needs tag stripping (untagged VIDs). + * No egress final catchalls -- VLAN Filter does the discard. + * CPU: EVLAN is left disabled on CPU ports -- frames pass + * through without EVLAN processing. + * + * Total EVLAN budget: + * n_user_ports * (ingress + egress) <= 1024. + * Ingress blocks are small (7 entries), so almost all capacity + * goes to egress VID rules. + */ + dsa_switch_for_each_user_port(dp, ds) + n_user_ports++; + + if (n_user_ports) { + ingress_finals = ARRAY_SIZE(ingress_aware_final); + vid_rules = ARRAY_SIZE(vid_accept_standard); + + /* Ingress block: fixed at finals count (7 entries) */ + priv->evlan_ingress_size = ingress_finals; + + /* Egress block: remaining budget divided equally among + * user ports. Each untagged VID needs vid_rules (2) + * EVLAN entries for tag stripping. Tagged-only VIDs + * need no EVLAN rules at all. + */ + max_vlans = (MXL862XX_TOTAL_EVLAN_ENTRIES - + n_user_ports * ingress_finals) / + (n_user_ports * vid_rules); + priv->evlan_egress_size = vid_rules * max_vlans; + + /* VLAN Filter block: one per user port. The 1024-entry + * table is divided equally among user ports. Each port + * gets its own VF block for per-port VID membership -- + * discard_unmatched_tagged handles the rest. + */ + priv->vf_block_size = MXL862XX_TOTAL_VF_ENTRIES / n_user_ports; + } + ret = mxl862xx_setup_drop_meter(ds); if (ret) return ret; @@ -469,12 +722,509 @@ static int mxl862xx_configure_sp_tag_proto(struct dsa_switch *ds, int port, return MXL862XX_API_WRITE(ds->priv, MXL862XX_SS_SPTAG_SET, tag); } +static int mxl862xx_evlan_write_rule(struct mxl862xx_priv *priv, + u16 block_id, u16 entry_index, + const struct mxl862xx_evlan_rule_desc *desc, + u16 vid, bool untagged, u16 pvid) +{ + struct mxl862xx_extendedvlan_config cfg = {}; + struct mxl862xx_extendedvlan_filter_vlan *fv; + + cfg.extended_vlan_block_id = cpu_to_le16(block_id); + cfg.entry_index = cpu_to_le16(entry_index); + + /* Populate filter */ + cfg.filter.outer_vlan.type = cpu_to_le32(desc->outer_type); + cfg.filter.inner_vlan.type = cpu_to_le32(desc->inner_type); + cfg.filter.outer_vlan.tpid = cpu_to_le32(desc->outer_tpid); + cfg.filter.inner_vlan.tpid = cpu_to_le32(desc->inner_tpid); + + if (desc->match_vid) { + /* For egress unaware: outer=NO_FILTER, match on inner tag */ + if (desc->outer_type == FT_NO_FILTER) + fv = &cfg.filter.inner_vlan; + else + fv = &cfg.filter.outer_vlan; + + fv->vid_enable = 1; + fv->vid_val = cpu_to_le32(vid); + } + + /* Populate treatment based on action */ + switch (desc->action) { + case EVLAN_ACCEPT: + cfg.treatment.remove_tag = + cpu_to_le32(MXL862XX_EXTENDEDVLAN_TREATMENT_NOT_REMOVE_TAG); + break; + + case EVLAN_STRIP_IF_UNTAGGED: + cfg.treatment.remove_tag = cpu_to_le32(untagged ? + MXL862XX_EXTENDEDVLAN_TREATMENT_REMOVE_1_TAG : + MXL862XX_EXTENDEDVLAN_TREATMENT_NOT_REMOVE_TAG); + break; + + case EVLAN_PVID_OR_DISCARD: + if (pvid) { + cfg.treatment.remove_tag = + cpu_to_le32(MXL862XX_EXTENDEDVLAN_TREATMENT_NOT_REMOVE_TAG); + cfg.treatment.add_outer_vlan = 1; + cfg.treatment.outer_vlan.vid_mode = + cpu_to_le32(MXL862XX_EXTENDEDVLAN_TREATMENT_VID_VAL); + cfg.treatment.outer_vlan.vid_val = cpu_to_le32(pvid); + cfg.treatment.outer_vlan.tpid = + cpu_to_le32(MXL862XX_EXTENDEDVLAN_TREATMENT_8021Q); + } else { + cfg.treatment.remove_tag = + cpu_to_le32(MXL862XX_EXTENDEDVLAN_TREATMENT_DISCARD_UPSTREAM); + } + break; + + case EVLAN_STRIP1_AND_PVID_OR_DISCARD: + if (pvid) { + cfg.treatment.remove_tag = + cpu_to_le32(MXL862XX_EXTENDEDVLAN_TREATMENT_REMOVE_1_TAG); + cfg.treatment.add_outer_vlan = 1; + cfg.treatment.outer_vlan.vid_mode = + cpu_to_le32(MXL862XX_EXTENDEDVLAN_TREATMENT_VID_VAL); + cfg.treatment.outer_vlan.vid_val = cpu_to_le32(pvid); + cfg.treatment.outer_vlan.tpid = + cpu_to_le32(MXL862XX_EXTENDEDVLAN_TREATMENT_8021Q); + } else { + cfg.treatment.remove_tag = + cpu_to_le32(MXL862XX_EXTENDEDVLAN_TREATMENT_DISCARD_UPSTREAM); + } + break; + } + + return MXL862XX_API_WRITE(priv, MXL862XX_EXTENDEDVLAN_SET, cfg); +} + +static int mxl862xx_evlan_deactivate_entry(struct mxl862xx_priv *priv, + u16 block_id, u16 entry_index) +{ + struct mxl862xx_extendedvlan_config cfg = {}; + + cfg.extended_vlan_block_id = cpu_to_le16(block_id); + cfg.entry_index = cpu_to_le16(entry_index); + + /* Use an unreachable filter (DEFAULT+DEFAULT) with DISCARD treatment. + * A zeroed entry would have NORMAL+NORMAL filter which matches + * real double-tagged traffic and passes it through. + */ + cfg.filter.outer_vlan.type = + cpu_to_le32(MXL862XX_EXTENDEDVLAN_FILTER_TYPE_DEFAULT); + cfg.filter.inner_vlan.type = + cpu_to_le32(MXL862XX_EXTENDEDVLAN_FILTER_TYPE_DEFAULT); + cfg.treatment.remove_tag = + cpu_to_le32(MXL862XX_EXTENDEDVLAN_TREATMENT_DISCARD_UPSTREAM); + + return MXL862XX_API_WRITE(priv, MXL862XX_EXTENDEDVLAN_SET, cfg); +} + +static int mxl862xx_evlan_write_final_rules(struct mxl862xx_priv *priv, + struct mxl862xx_evlan_block *blk, + const struct mxl862xx_evlan_rule_desc *rules, + int n_rules, u16 pvid) +{ + u16 start_idx = blk->block_size - n_rules; + int i, ret; + + for (i = 0; i < n_rules; i++) { + ret = mxl862xx_evlan_write_rule(priv, blk->block_id, + start_idx + i, &rules[i], + 0, false, pvid); + if (ret) + return ret; + } + + return 0; +} + +static int mxl862xx_vf_entry_set(struct mxl862xx_priv *priv, + u16 block_id, u16 index, u16 vid) +{ + struct mxl862xx_vlanfilter_config cfg = {}; + + cfg.vlan_filter_block_id = cpu_to_le16(block_id); + cfg.entry_index = cpu_to_le16(index); + cfg.vlan_filter_mask = cpu_to_le32(MXL862XX_VLAN_FILTER_TCI_MASK_VID); + cfg.val = cpu_to_le32(vid); + cfg.discard_matched = 0; + + return MXL862XX_API_WRITE(priv, MXL862XX_VLANFILTER_SET, cfg); +} + +static struct mxl862xx_vf_vid *mxl862xx_vf_find_vid(struct mxl862xx_vf_block *vf, + u16 vid) +{ + struct mxl862xx_vf_vid *ve; + + list_for_each_entry(ve, &vf->vids, list) + if (ve->vid == vid) + return ve; + + return NULL; +} + +static int mxl862xx_vf_add_vid(struct mxl862xx_priv *priv, + struct mxl862xx_vf_block *vf, + u16 vid, bool untagged) +{ + struct mxl862xx_vf_vid *ve; + int ret; + + ve = mxl862xx_vf_find_vid(vf, vid); + if (ve) { + ve->untagged = untagged; + return 0; + } + + if (vf->active_count >= vf->block_size) + return -ENOSPC; + + ve = kzalloc_obj(*ve); + if (!ve) + return -ENOMEM; + + ve->vid = vid; + ve->index = vf->active_count; + ve->untagged = untagged; + + ret = mxl862xx_vf_entry_set(priv, vf->block_id, ve->index, vid); + if (ret) { + kfree(ve); + return ret; + } + + list_add_tail(&ve->list, &vf->vids); + vf->active_count++; + + return 0; +} + +static int mxl862xx_vf_del_vid(struct mxl862xx_priv *priv, + struct mxl862xx_vf_block *vf, u16 vid) +{ + struct mxl862xx_vf_vid *ve, *last_ve; + u16 gap, last; + int ret; + + ve = mxl862xx_vf_find_vid(vf, vid); + if (!ve) + return 0; + + if (!vf->allocated) { + /* Software-only state -- just remove the tracking entry */ + list_del(&ve->list); + kfree(ve); + vf->active_count--; + return 0; + } + + gap = ve->index; + last = vf->active_count - 1; + + if (vf->active_count == 1) { + /* Last VID -- restore DISCARD sentinel at index 0 */ + ret = mxl862xx_vf_entry_discard(priv, vf->block_id, 0); + if (ret) + return ret; + } else if (gap < last) { + /* Swap: move the last ALLOW entry into the gap */ + list_for_each_entry(last_ve, &vf->vids, list) + if (last_ve->index == last) + break; + + if (WARN_ON(list_entry_is_head(last_ve, &vf->vids, list))) + return -EINVAL; + + ret = mxl862xx_vf_entry_set(priv, vf->block_id, + gap, last_ve->vid); + if (ret) + return ret; + + last_ve->index = gap; + } + + list_del(&ve->list); + kfree(ve); + vf->active_count--; + + return 0; +} + +static int mxl862xx_evlan_program_ingress(struct mxl862xx_priv *priv, int port) +{ + struct mxl862xx_port *p = &priv->ports[port]; + struct mxl862xx_evlan_block *blk = &p->ingress_evlan; + + if (!p->vlan_filtering) + return 0; + + blk->in_use = true; + blk->n_active = blk->block_size; + + return mxl862xx_evlan_write_final_rules(priv, blk, + ingress_aware_final, + ARRAY_SIZE(ingress_aware_final), + p->pvid); +} + +static int mxl862xx_evlan_program_egress(struct mxl862xx_priv *priv, int port) +{ + struct mxl862xx_port *p = &priv->ports[port]; + struct mxl862xx_evlan_block *blk = &p->egress_evlan; + const struct mxl862xx_evlan_rule_desc *vid_rules; + struct mxl862xx_vf_vid *vfv; + u16 old_active = blk->n_active; + u16 idx = 0, i; + int n_vid, ret; + + if (p->vlan_filtering) { + vid_rules = vid_accept_standard; + n_vid = ARRAY_SIZE(vid_accept_standard); + } else { + vid_rules = vid_accept_egress_unaware; + n_vid = ARRAY_SIZE(vid_accept_egress_unaware); + } + + list_for_each_entry(vfv, &p->vf.vids, list) { + if (!vfv->untagged) + continue; + + if (idx + n_vid > blk->block_size) + return -ENOSPC; + + ret = mxl862xx_evlan_write_rule(priv, blk->block_id, + idx++, &vid_rules[0], + vfv->vid, vfv->untagged, + p->pvid); + if (ret) + return ret; + + if (n_vid > 1) { + ret = mxl862xx_evlan_write_rule(priv, blk->block_id, + idx++, &vid_rules[1], + vfv->vid, + vfv->untagged, + p->pvid); + if (ret) + return ret; + } + } + + /* Deactivate stale entries that are no longer needed. + * This closes the brief window between writing the new rules + * and set_bridge_port narrowing the scan window. + */ + for (i = idx; i < old_active; i++) { + ret = mxl862xx_evlan_deactivate_entry(priv, + blk->block_id, + i); + if (ret) + return ret; + } + + blk->n_active = idx; + blk->in_use = idx > 0; + + return 0; +} + +static int mxl862xx_port_vlan_filtering(struct dsa_switch *ds, int port, + bool vlan_filtering, + struct netlink_ext_ack *extack) +{ + struct mxl862xx_priv *priv = ds->priv; + struct mxl862xx_port *p = &priv->ports[port]; + bool old_vlan_filtering = p->vlan_filtering; + bool old_in_use = p->ingress_evlan.in_use; + bool changed = (p->vlan_filtering != vlan_filtering); + int ret; + + p->vlan_filtering = vlan_filtering; + + if (changed) { + /* When leaving VLAN-aware mode, release the ingress HW + * block. The firmware passes frames through unchanged + * when no ingress EVLAN block is assigned, so the block + * is unnecessary in unaware mode. + */ + if (!vlan_filtering) + p->ingress_evlan.in_use = false; + + ret = mxl862xx_evlan_program_ingress(priv, port); + if (ret) + goto err_restore; + + ret = mxl862xx_evlan_program_egress(priv, port); + if (ret) + goto err_restore; + } + + return mxl862xx_set_bridge_port(ds, port); + + /* No HW rollback -- restoring SW state is sufficient for a correct retry. */ +err_restore: + p->vlan_filtering = old_vlan_filtering; + p->ingress_evlan.in_use = old_in_use; + return ret; +} + +static int mxl862xx_port_vlan_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan, + struct netlink_ext_ack *extack) +{ + struct mxl862xx_priv *priv = ds->priv; + struct mxl862xx_port *p = &priv->ports[port]; + bool untagged = !!(vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED); + u16 vid = vlan->vid; + u16 old_pvid = p->pvid; + bool pvid_changed = false; + int ret; + + /* CPU port is VLAN-transparent: the SP tag handles port + * identification and the host-side DSA tagger manages VLAN + * delivery. Egress EVLAN catchalls are set up once in + * setup_cpu_bridge; no per-VID VF/EVLAN programming needed. + */ + if (dsa_is_cpu_port(ds, port)) + return 0; + + /* Update PVID tracking */ + if (vlan->flags & BRIDGE_VLAN_INFO_PVID) { + if (p->pvid != vid) { + p->pvid = vid; + pvid_changed = true; + } + } else if (p->pvid == vid) { + p->pvid = 0; + pvid_changed = true; + } + + /* Add/update VID in this port's VLAN Filter block. + * VF must be updated before programming egress EVLAN because + * evlan_program_egress walks the VF VID list. + */ + ret = mxl862xx_vf_add_vid(priv, &p->vf, vid, untagged); + if (ret) + goto err_pvid; + + /* Reprogram ingress finals if PVID changed */ + if (pvid_changed) { + ret = mxl862xx_evlan_program_ingress(priv, port); + if (ret) + goto err_rollback; + } + + /* Reprogram egress tag-stripping rules (walks VF VID list) */ + ret = mxl862xx_evlan_program_egress(priv, port); + if (ret) + goto err_rollback; + + /* Apply VLAN block IDs and MAC learning flags to bridge port */ + ret = mxl862xx_set_bridge_port(ds, port); + if (ret) + goto err_rollback; + + return 0; + +err_rollback: + /* Best-effort: undo VF add and restore consistent hardware state. + * A retry of port_vlan_add will converge since vf_add_vid is + * idempotent. + */ + p->pvid = old_pvid; + mxl862xx_vf_del_vid(priv, &p->vf, vid); + mxl862xx_evlan_program_ingress(priv, port); + mxl862xx_evlan_program_egress(priv, port); + mxl862xx_set_bridge_port(ds, port); + return ret; +err_pvid: + p->pvid = old_pvid; + return ret; +} + +static int mxl862xx_port_vlan_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) +{ + struct mxl862xx_priv *priv = ds->priv; + struct mxl862xx_port *p = &priv->ports[port]; + struct mxl862xx_vf_vid *ve; + bool pvid_changed = false; + u16 vid = vlan->vid; + bool old_untagged; + u16 old_pvid; + int ret; + + if (dsa_is_cpu_port(ds, port)) + return 0; + + ve = mxl862xx_vf_find_vid(&p->vf, vid); + if (!ve) + return 0; + old_untagged = ve->untagged; + old_pvid = p->pvid; + + /* Clear PVID if we're deleting it */ + if (p->pvid == vid) { + p->pvid = 0; + pvid_changed = true; + } + + /* Remove VID from this port's VLAN Filter block. + * Must happen before egress reprogram so the VID is no + * longer in the list that evlan_program_egress walks. + */ + ret = mxl862xx_vf_del_vid(priv, &p->vf, vid); + if (ret) + goto err_pvid; + + /* Reprogram egress tag-stripping rules (VID is now gone) */ + ret = mxl862xx_evlan_program_egress(priv, port); + if (ret) + goto err_rollback; + + /* If PVID changed, reprogram ingress finals */ + if (pvid_changed) { + ret = mxl862xx_evlan_program_ingress(priv, port); + if (ret) + goto err_rollback; + } + + ret = mxl862xx_set_bridge_port(ds, port); + if (ret) + goto err_rollback; + + return 0; + +err_rollback: + /* Best-effort: re-add the VID and restore consistent hardware + * state. A retry of port_vlan_del will converge. + */ + p->pvid = old_pvid; + mxl862xx_vf_add_vid(priv, &p->vf, vid, old_untagged); + mxl862xx_evlan_program_egress(priv, port); + mxl862xx_evlan_program_ingress(priv, port); + mxl862xx_set_bridge_port(ds, port); + return ret; +err_pvid: + p->pvid = old_pvid; + return ret; +} + static int mxl862xx_setup_cpu_bridge(struct dsa_switch *ds, int port) { struct mxl862xx_priv *priv = ds->priv; + struct mxl862xx_port *p = &priv->ports[port]; - priv->ports[port].fid = MXL862XX_DEFAULT_BRIDGE; - priv->ports[port].learning = true; + p->fid = MXL862XX_DEFAULT_BRIDGE; + p->learning = true; + + /* EVLAN is left disabled on CPU ports -- frames pass through + * without EVLAN processing. Only the portmap and bridge + * assignment need to be configured. + */ return mxl862xx_set_bridge_port(ds, port); } @@ -510,6 +1260,8 @@ static int mxl862xx_port_bridge_join(struct dsa_switch *ds, int port, static void mxl862xx_port_bridge_leave(struct dsa_switch *ds, int port, const struct dsa_bridge bridge) { + struct mxl862xx_priv *priv = ds->priv; + struct mxl862xx_port *p = &priv->ports[port]; int err; err = mxl862xx_sync_bridge_members(ds, &bridge); @@ -521,6 +1273,10 @@ static void mxl862xx_port_bridge_leave(struct dsa_switch *ds, int port, /* Revert leaving port, omitted by the sync above, to its * single-port bridge */ + p->pvid = 0; + p->ingress_evlan.in_use = false; + p->egress_evlan.in_use = false; + err = mxl862xx_set_bridge_port(ds, port); if (err) dev_err(ds->dev, @@ -585,6 +1341,22 @@ static int mxl862xx_port_setup(struct dsa_switch *ds, int port) if (ret) return ret; + priv->ports[port].ingress_evlan.block_size = priv->evlan_ingress_size; + ret = mxl862xx_evlan_block_alloc(priv, &priv->ports[port].ingress_evlan); + if (ret) + return ret; + + priv->ports[port].egress_evlan.block_size = priv->evlan_egress_size; + ret = mxl862xx_evlan_block_alloc(priv, &priv->ports[port].egress_evlan); + if (ret) + return ret; + + priv->ports[port].vf.block_size = priv->vf_block_size; + INIT_LIST_HEAD(&priv->ports[port].vf.vids); + ret = mxl862xx_vf_alloc(priv, &priv->ports[port].vf); + if (ret) + return ret; + priv->ports[port].setup_done = true; return 0; @@ -983,6 +1755,9 @@ static const struct dsa_switch_ops mxl862xx_switch_ops = { .port_fdb_dump = mxl862xx_port_fdb_dump, .port_mdb_add = mxl862xx_port_mdb_add, .port_mdb_del = mxl862xx_port_mdb_del, + .port_vlan_filtering = mxl862xx_port_vlan_filtering, + .port_vlan_add = mxl862xx_port_vlan_add, + .port_vlan_del = mxl862xx_port_vlan_del, }; static void mxl862xx_phylink_mac_config(struct phylink_config *config, diff --git a/drivers/net/dsa/mxl862xx/mxl862xx.h b/drivers/net/dsa/mxl862xx/mxl862xx.h index 8d03dd24faee..a010cf6b961a 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx.h +++ b/drivers/net/dsa/mxl862xx/mxl862xx.h @@ -13,6 +13,8 @@ struct mxl862xx_priv; #define MXL862XX_DEFAULT_BRIDGE 0 #define MXL862XX_MAX_BRIDGES 48 #define MXL862XX_MAX_BRIDGE_PORTS 128 +#define MXL862XX_TOTAL_EVLAN_ENTRIES 1024 +#define MXL862XX_TOTAL_VF_ENTRIES 1024 /* Number of __le16 words in a firmware portmap (128-bit bitmap). */ #define MXL862XX_FW_PORTMAP_WORDS (MXL862XX_MAX_BRIDGE_PORTS / 16) @@ -54,6 +56,66 @@ static inline bool mxl862xx_fw_portmap_is_empty(const __le16 *map) return true; } +/** + * struct mxl862xx_vf_vid - Per-VID entry within a VLAN Filter block + * @list: Linked into &mxl862xx_vf_block.vids + * @vid: VLAN ID + * @index: Entry index within the VLAN Filter HW block + * @untagged: Strip tag on egress for this VID (drives EVLAN tag-stripping) + */ +struct mxl862xx_vf_vid { + struct list_head list; + u16 vid; + u16 index; + bool untagged; +}; + +/** + * struct mxl862xx_vf_block - Per-port VLAN Filter block + * @allocated: Whether the HW block has been allocated via VLANFILTER_ALLOC + * @block_id: HW VLAN Filter block ID from VLANFILTER_ALLOC + * @block_size: Total entries allocated in this block + * @active_count: Number of ALLOW entries at indices [0, active_count). + * The bridge port config sends max(active_count, 1) as + * block_size to narrow the HW scan window. + * discard_unmatched_tagged handles frames outside this range. + * @vids: List of &mxl862xx_vf_vid entries programmed in this block + */ +struct mxl862xx_vf_block { + bool allocated; + u16 block_id; + u16 block_size; + u16 active_count; + struct list_head vids; +}; + +/** + * struct mxl862xx_evlan_block - Per-port per-direction extended VLAN block + * @allocated: Whether the HW block has been allocated via EXTENDEDVLAN_ALLOC. + * Guards alloc/free idempotency--the block_id is only valid + * while allocated is true. + * @in_use: Whether the EVLAN engine should be enabled for this block + * on the bridge port (sent as the enable flag in + * set_bridge_port). Can be false while allocated is still + * true -- e.g. when all egress VIDs are removed (idx == 0 in + * evlan_program_egress) the block stays allocated for + * potential reuse, but the engine is disabled so an empty + * rule set does not discard all traffic. + * @block_id: HW block ID from EXTENDEDVLAN_ALLOC + * @block_size: Total entries allocated + * @n_active: Number of HW entries currently written. The bridge port + * config sends this as the egress scan window, so entries + * beyond n_active are never scanned. Always equals + * block_size for ingress blocks (fixed catchall rules). + */ +struct mxl862xx_evlan_block { + bool allocated; + bool in_use; + u16 block_id; + u16 block_size; + u16 n_active; +}; + /** * struct mxl862xx_port - per-port state tracked by the driver * @priv: back-pointer to switch private data; needed by @@ -68,6 +130,11 @@ static inline bool mxl862xx_fw_portmap_is_empty(const __le16 *map) * @setup_done: set at end of port_setup, cleared at start of * port_teardown; guards deferred work against * acting on torn-down state + * @pvid: port VLAN ID (native VLAN) assigned to untagged traffic + * @vlan_filtering: true when VLAN filtering is enabled on this port + * @vf: per-port VLAN Filter block state + * @ingress_evlan: ingress extended VLAN block state + * @egress_evlan: egress extended VLAN block state * @host_flood_uc: desired host unicast flood state (true = flood); * updated atomically by port_set_host_flood, consumed * by the deferred host_flood_work @@ -85,6 +152,11 @@ struct mxl862xx_port { unsigned long flood_block; bool learning; bool setup_done; + u16 pvid; + bool vlan_filtering; + struct mxl862xx_vf_block vf; + struct mxl862xx_evlan_block ingress_evlan; + struct mxl862xx_evlan_block egress_evlan; bool host_flood_uc; bool host_flood_mc; struct work_struct host_flood_work; @@ -92,17 +164,23 @@ struct mxl862xx_port { /** * struct mxl862xx_priv - driver private data for an MxL862xx switch - * @ds: pointer to the DSA switch instance - * @mdiodev: MDIO device used to communicate with the switch firmware - * @crc_err_work: deferred work for shutting down all ports on MDIO CRC errors - * @crc_err: set atomically before CRC-triggered shutdown, cleared after - * @drop_meter: index of the single shared zero-rate firmware meter used - * to unconditionally drop traffic (used to block flooding) - * @ports: per-port state, indexed by switch port number - * @bridges: maps DSA bridge number to firmware bridge ID; - * zero means no firmware bridge allocated for that - * DSA bridge number. Indexed by dsa_bridge.num - * (0 .. ds->max_num_bridges). + * @ds: pointer to the DSA switch instance + * @mdiodev: MDIO device used to communicate with the switch firmware + * @crc_err_work: deferred work for shutting down all ports on MDIO CRC + * errors + * @crc_err: set atomically before CRC-triggered shutdown, cleared + * after + * @drop_meter: index of the single shared zero-rate firmware meter + * used to unconditionally drop traffic (used to block + * flooding) + * @ports: per-port state, indexed by switch port number + * @bridges: maps DSA bridge number to firmware bridge ID; + * zero means no firmware bridge allocated for that + * DSA bridge number. Indexed by dsa_bridge.num + * (0 .. ds->max_num_bridges). + * @evlan_ingress_size: per-port ingress Extended VLAN block size + * @evlan_egress_size: per-port egress Extended VLAN block size + * @vf_block_size: per-port VLAN Filter block size */ struct mxl862xx_priv { struct dsa_switch *ds; @@ -112,6 +190,9 @@ struct mxl862xx_priv { u16 drop_meter; struct mxl862xx_port ports[MXL862XX_MAX_PORTS]; u16 bridges[MXL862XX_MAX_BRIDGES + 1]; + u16 evlan_ingress_size; + u16 evlan_egress_size; + u16 vf_block_size; }; #endif /* __MXL862XX_H */ From 02f72964395911e7a09bb2ea2fe6f79eda4ea2c2 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 8 Apr 2026 12:20:09 +0200 Subject: [PATCH 1377/1561] net: airoha: Fix FE_PSE_BUF_SET configuration if PPE2 is available airoha_fe_set routine is used to set specified bits to 1 in the selected register. In the FE_PSE_BUF_SET case this can due to a overestimation of the required buffers for I/O queues since we can miss to set some bits of PSE_ALLRSV_MASK subfield to 0. Fix the issue relying on airoha_fe_rmw routine instead. Fixes: 8e38e08f2c560 ("net: airoha: fix PSE memory configuration in airoha_fe_pse_ports_init()") Tested-by: Xuegang Lu Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260408-airoha-reg_fe_pse_buf_set-v1-1-0c4fa8f4d1d9@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 9285a68f435f..c14cdce588a7 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -293,16 +293,18 @@ static void airoha_fe_pse_ports_init(struct airoha_eth *eth) [FE_PSE_PORT_GDM4] = 2, [FE_PSE_PORT_CDM5] = 2, }; - u32 all_rsv; int q; - all_rsv = airoha_fe_get_pse_all_rsv(eth); if (airoha_ppe_is_enabled(eth, 1)) { + u32 all_rsv; + /* hw misses PPE2 oq rsv */ + all_rsv = airoha_fe_get_pse_all_rsv(eth); all_rsv += PSE_RSV_PAGES * pse_port_num_queues[FE_PSE_PORT_PPE2]; + airoha_fe_rmw(eth, REG_FE_PSE_BUF_SET, PSE_ALLRSV_MASK, + FIELD_PREP(PSE_ALLRSV_MASK, all_rsv)); } - airoha_fe_set(eth, REG_FE_PSE_BUF_SET, all_rsv); /* CMD1 */ for (q = 0; q < pse_port_num_queues[FE_PSE_PORT_CDM1]; q++) From 3f3a2aefbc661b837c8e344f944982d61c2ae037 Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Thu, 9 Apr 2026 11:30:20 +0200 Subject: [PATCH 1378/1561] iavf: fix kernel-doc comment style in iavf_ethtool.c iavf_ethtool.c contains 31 kernel-doc comment blocks using the legacy `**/` terminator instead of the correct single `*/`. Two function headers also use a colon separator (`iavf_get_channels:`, `iavf_set_channels:`) instead of the ` - ` dash required by kernel-doc. Additionally several comments embed their return-value descriptions in the body paragraph, producing `scripts/kernel-doc -Wreturn` warnings. Void functions that incorrectly say "Returns ..." are also rephrased. Fix all issues across the full file: - Replace every `**/` terminator with `*/`. - Change `function_name:` doc headers to `function_name -`. - Move inline "Returns ..." sentences into dedicated `Return:` sections for non-void functions (iavf_get_msglevel, iavf_get_rxnfc, iavf_set_channels, iavf_get_rxfh_key_size, iavf_get_rxfh_indir_size, iavf_get_rxfh, iavf_set_rxfh). - Rephrase body descriptions in void functions that incorrectly said "Returns ..." (iavf_get_drvinfo, iavf_get_ringparam, iavf_get_coalesce). - Remove boilerplate body text for iavf_get_rxfh_key_size and iavf_get_rxfh_indir_size; the `Return:` line now conveys the same information without the vague "Returns the table size." sentence. Suggested-by: Anthony L. Nguyen Suggested-by: Leszek Pepiak Signed-off-by: Aleksandr Loktionov Reviewed-by: Breno Leitao Reviewed-by: Joe Damato Link: https://patch.msgid.link/20260409093020.3808687-1-aleksandr.loktionov@intel.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/intel/iavf/iavf_ethtool.c | 103 +++++++++--------- 1 file changed, 53 insertions(+), 50 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 1cd1f3f2930a..a615d599b88e 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -32,7 +32,7 @@ * statistics array. Thus, every statistic string in an array should have the * same type and number of format specifiers, to be formatted by variadic * arguments to the iavf_add_stat_string() helper function. - **/ + */ struct iavf_stats { char stat_string[ETH_GSTRING_LEN]; int sizeof_stat; @@ -116,7 +116,7 @@ iavf_add_one_ethtool_stat(u64 *data, void *pointer, * the next empty location for successive calls to __iavf_add_ethtool_stats. * If pointer is null, set the data values to zero and update the pointer to * skip these stats. - **/ + */ static void __iavf_add_ethtool_stats(u64 **data, void *pointer, const struct iavf_stats stats[], @@ -140,7 +140,7 @@ __iavf_add_ethtool_stats(u64 **data, void *pointer, * * The parameter @stats is evaluated twice, so parameters with side effects * should be avoided. - **/ + */ #define iavf_add_ethtool_stats(data, pointer, stats) \ __iavf_add_ethtool_stats(data, pointer, stats, ARRAY_SIZE(stats)) @@ -157,7 +157,7 @@ __iavf_add_ethtool_stats(u64 **data, void *pointer, * buffer and update the data pointer when finished. * * This function expects to be called while under rcu_read_lock(). - **/ + */ static void iavf_add_queue_stats(u64 **data, struct iavf_ring *ring) { @@ -189,7 +189,7 @@ iavf_add_queue_stats(u64 **data, struct iavf_ring *ring) * * Format and copy the strings described by stats into the buffer pointed at * by p. - **/ + */ static void __iavf_add_stat_strings(u8 **p, const struct iavf_stats stats[], const unsigned int size, ...) { @@ -216,7 +216,7 @@ static void __iavf_add_stat_strings(u8 **p, const struct iavf_stats stats[], * The parameter @stats is evaluated twice, so parameters with side effects * should be avoided. Additionally, stats must be an array such that * ARRAY_SIZE can be called on it. - **/ + */ #define iavf_add_stat_strings(p, stats, ...) \ __iavf_add_stat_strings(p, stats, ARRAY_SIZE(stats), ## __VA_ARGS__) @@ -249,7 +249,7 @@ static const struct iavf_stats iavf_gstrings_stats[] = { * * Reports speed/duplex settings. Because this is a VF, we don't know what * kind of link we really have, so we fake it. - **/ + */ static int iavf_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { @@ -308,7 +308,7 @@ static int iavf_get_link_ksettings(struct net_device *netdev, * @sset: id of string set * * Reports size of various string tables. - **/ + */ static int iavf_get_sset_count(struct net_device *netdev, int sset) { /* Report the maximum number queues, even if not every queue is @@ -331,7 +331,7 @@ static int iavf_get_sset_count(struct net_device *netdev, int sset) * @data: pointer to data buffer * * All statistics are added to the data buffer as an array of u64. - **/ + */ static void iavf_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, u64 *data) { @@ -367,7 +367,7 @@ static void iavf_get_ethtool_stats(struct net_device *netdev, * @data: buffer for string data * * Builds the statistics string table - **/ + */ static void iavf_get_stat_strings(struct net_device *netdev, u8 *data) { unsigned int i; @@ -392,7 +392,7 @@ static void iavf_get_stat_strings(struct net_device *netdev, u8 *data) * @data: buffer for string data * * Builds string tables for various string sets - **/ + */ static void iavf_get_strings(struct net_device *netdev, u32 sset, u8 *data) { switch (sset) { @@ -408,8 +408,8 @@ static void iavf_get_strings(struct net_device *netdev, u32 sset, u8 *data) * iavf_get_msglevel - Get debug message level * @netdev: network interface device structure * - * Returns current debug message level. - **/ + * Return: current debug message level. + */ static u32 iavf_get_msglevel(struct net_device *netdev) { struct iavf_adapter *adapter = netdev_priv(netdev); @@ -424,7 +424,7 @@ static u32 iavf_get_msglevel(struct net_device *netdev) * * Set current debug message level. Higher values cause the driver to * be noisier. - **/ + */ static void iavf_set_msglevel(struct net_device *netdev, u32 data) { struct iavf_adapter *adapter = netdev_priv(netdev); @@ -439,8 +439,8 @@ static void iavf_set_msglevel(struct net_device *netdev, u32 data) * @netdev: network interface device structure * @drvinfo: ethool driver info structure * - * Returns information about the driver and device for display to the user. - **/ + * Fills @drvinfo with information about the driver and device. + */ static void iavf_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { @@ -458,9 +458,9 @@ static void iavf_get_drvinfo(struct net_device *netdev, * @kernel_ring: ethtool extenal ringparam structure * @extack: netlink extended ACK report struct * - * Returns current ring parameters. TX and RX rings are reported separately, - * but the number of rings is not reported. - **/ + * Fills @ring with current ring parameters. TX and RX rings are reported + * separately, but the number of rings is not reported. + */ static void iavf_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, struct kernel_ethtool_ringparam *kernel_ring, @@ -483,7 +483,7 @@ static void iavf_get_ringparam(struct net_device *netdev, * * Sets ring parameters. TX and RX rings are controlled separately, but the * number of rings is not specified, so all rings get the same settings. - **/ + */ static int iavf_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, struct kernel_ethtool_ringparam *kernel_ring, @@ -551,7 +551,7 @@ static int iavf_set_ringparam(struct net_device *netdev, * Gets the per-queue settings for coalescence. Specifically Rx and Tx usecs * are per queue. If queue is <0 then we default to queue 0 as the * representative value. - **/ + */ static int __iavf_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, int queue) { @@ -588,11 +588,11 @@ static int __iavf_get_coalesce(struct net_device *netdev, * @kernel_coal: ethtool CQE mode setting structure * @extack: extack for reporting error messages * - * Returns current coalescing settings. This is referred to elsewhere in the - * driver as Interrupt Throttle Rate, as this is how the hardware describes - * this functionality. Note that if per-queue settings have been modified this - * only represents the settings of queue 0. - **/ + * Fills @ec with current coalescing settings. This is referred to elsewhere + * in the driver as Interrupt Throttle Rate, as this is how the hardware + * describes this functionality. Note that if per-queue settings have been + * modified this only represents the settings of queue 0. + */ static int iavf_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, struct kernel_ethtool_coalesce *kernel_coal, @@ -608,7 +608,7 @@ static int iavf_get_coalesce(struct net_device *netdev, * @queue: the queue to read * * Read specific queue's coalesce settings. - **/ + */ static int iavf_get_per_queue_coalesce(struct net_device *netdev, u32 queue, struct ethtool_coalesce *ec) { @@ -622,7 +622,7 @@ static int iavf_get_per_queue_coalesce(struct net_device *netdev, u32 queue, * @queue: the queue to modify * * Change the ITR settings for a specific queue. - **/ + */ static int iavf_set_itr_per_queue(struct iavf_adapter *adapter, struct ethtool_coalesce *ec, int queue) { @@ -680,7 +680,7 @@ static int iavf_set_itr_per_queue(struct iavf_adapter *adapter, * @queue: the queue to change * * Sets the coalesce settings for a particular queue. - **/ + */ static int __iavf_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, int queue) { @@ -722,7 +722,7 @@ static int __iavf_set_coalesce(struct net_device *netdev, * @extack: extack for reporting error messages * * Change current coalescing settings for every queue. - **/ + */ static int iavf_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, struct kernel_ethtool_coalesce *kernel_coal, @@ -1639,7 +1639,7 @@ static int iavf_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) * @netdev: network interface device structure * * Return: number of RX rings. - **/ + */ static u32 iavf_get_rx_ring_count(struct net_device *netdev) { struct iavf_adapter *adapter = netdev_priv(netdev); @@ -1653,8 +1653,8 @@ static u32 iavf_get_rx_ring_count(struct net_device *netdev) * @cmd: ethtool rxnfc command * @rule_locs: pointer to store rule locations * - * Returns Success if the command is supported. - **/ + * Return: 0 on success, -EOPNOTSUPP if the command is not supported. + */ static int iavf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, u32 *rule_locs) { @@ -1684,13 +1684,13 @@ static int iavf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, return ret; } /** - * iavf_get_channels: get the number of channels supported by the device + * iavf_get_channels - get the number of channels supported by the device * @netdev: network interface device structure * @ch: channel information structure * * For the purposes of our device, we only use combined channels, i.e. a tx/rx * queue pair. Report one extra channel to match our "other" MSI-X vector. - **/ + */ static void iavf_get_channels(struct net_device *netdev, struct ethtool_channels *ch) { @@ -1706,14 +1706,15 @@ static void iavf_get_channels(struct net_device *netdev, } /** - * iavf_set_channels: set the new channel count + * iavf_set_channels - set the new channel count * @netdev: network interface device structure * @ch: channel information structure * - * Negotiate a new number of channels with the PF then do a reset. During - * reset we'll realloc queues and fix the RSS table. Returns 0 on success, - * negative on failure. - **/ + * Negotiate a new number of channels with the PF then do a reset. During + * reset we'll realloc queues and fix the RSS table. + * + * Return: 0 on success, negative on failure. + */ static int iavf_set_channels(struct net_device *netdev, struct ethtool_channels *ch) { @@ -1750,8 +1751,8 @@ static int iavf_set_channels(struct net_device *netdev, * iavf_get_rxfh_key_size - get the RSS hash key size * @netdev: network interface device structure * - * Returns the table size. - **/ + * Return: the RSS hash key size. + */ static u32 iavf_get_rxfh_key_size(struct net_device *netdev) { struct iavf_adapter *adapter = netdev_priv(netdev); @@ -1763,8 +1764,8 @@ static u32 iavf_get_rxfh_key_size(struct net_device *netdev) * iavf_get_rxfh_indir_size - get the rx flow hash indirection table size * @netdev: network interface device structure * - * Returns the table size. - **/ + * Return: the indirection table size. + */ static u32 iavf_get_rxfh_indir_size(struct net_device *netdev) { struct iavf_adapter *adapter = netdev_priv(netdev); @@ -1777,8 +1778,10 @@ static u32 iavf_get_rxfh_indir_size(struct net_device *netdev) * @netdev: network interface device structure * @rxfh: pointer to param struct (indir, key, hfunc) * - * Reads the indirection table directly from the hardware. Always returns 0. - **/ + * Reads the indirection table directly from the hardware. + * + * Return: 0 always. + */ static int iavf_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh) { @@ -1806,9 +1809,9 @@ static int iavf_get_rxfh(struct net_device *netdev, * @rxfh: pointer to param struct (indir, key, hfunc) * @extack: extended ACK from the Netlink message * - * Returns -EINVAL if the table specifies an invalid queue id, otherwise - * returns 0 after programming the table. - **/ + * Return: 0 on success, -EOPNOTSUPP if the hash function is not supported, + * -EINVAL if the table specifies an invalid queue id. + */ static int iavf_set_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh, struct netlink_ext_ack *extack) @@ -1885,7 +1888,7 @@ static const struct ethtool_ops iavf_ethtool_ops = { * * Sets ethtool ops struct in our netdev so that ethtool can call * our functions. - **/ + */ void iavf_set_ethtool_ops(struct net_device *netdev) { netdev->ethtool_ops = &iavf_ethtool_ops; From 3c8c39768b10867e4f630080785b602245f01760 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 8 Apr 2026 12:27:12 +0200 Subject: [PATCH 1379/1561] dpll: zl3073x: clean up esync get/set and use zl3073x_out_is_ndiv() Return -EOPNOTSUPP early in esync_get callbacks when esync is not supported instead of conditionally populating the range at the end. This simplifies the control flow by removing the finish label/goto in the output variant and the conditional range assignment in both input and output variants. Replace open-coded N-div signal format switch statements with zl3073x_out_is_ndiv() helper in esync_get, esync_set and frequency_set callbacks. Reviewed-by: Petr Oros Reviewed-by: Prathosh Satish Signed-off-by: Ivan Vecera Link: https://patch.msgid.link/20260408102716.443099-2-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/dpll.c | 64 ++++++++++++------------------------- 1 file changed, 20 insertions(+), 44 deletions(-) diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c index d788ca45a17e..445f4bccb9aa 100644 --- a/drivers/dpll/zl3073x/dpll.c +++ b/drivers/dpll/zl3073x/dpll.c @@ -133,6 +133,12 @@ zl3073x_dpll_input_pin_esync_get(const struct dpll_pin *dpll_pin, ref_id = zl3073x_input_pin_ref_get(pin->id); ref = zl3073x_ref_state_get(zldev, ref_id); + if (!pin->esync_control || zl3073x_ref_freq_get(ref) <= 1) + return -EOPNOTSUPP; + + esync->range = esync_freq_ranges; + esync->range_num = ARRAY_SIZE(esync_freq_ranges); + switch (FIELD_GET(ZL_REF_SYNC_CTRL_MODE, ref->sync_ctrl)) { case ZL_REF_SYNC_CTRL_MODE_50_50_ESYNC_25_75: esync->freq = ref->esync_n_div == ZL_REF_ESYNC_DIV_1HZ ? 1 : 0; @@ -144,17 +150,6 @@ zl3073x_dpll_input_pin_esync_get(const struct dpll_pin *dpll_pin, break; } - /* If the pin supports esync control expose its range but only - * if the current reference frequency is > 1 Hz. - */ - if (pin->esync_control && zl3073x_ref_freq_get(ref) > 1) { - esync->range = esync_freq_ranges; - esync->range_num = ARRAY_SIZE(esync_freq_ranges); - } else { - esync->range = NULL; - esync->range_num = 0; - } - return 0; } @@ -599,8 +594,8 @@ zl3073x_dpll_output_pin_esync_get(const struct dpll_pin *dpll_pin, struct zl3073x_dpll_pin *pin = pin_priv; const struct zl3073x_synth *synth; const struct zl3073x_out *out; + u32 synth_freq, out_freq; u8 clock_type, out_id; - u32 synth_freq; out_id = zl3073x_output_pin_out_get(pin->id); out = zl3073x_out_state_get(zldev, out_id); @@ -609,17 +604,19 @@ zl3073x_dpll_output_pin_esync_get(const struct dpll_pin *dpll_pin, * for N-division is also used for the esync divider so both cannot * be used. */ - switch (zl3073x_out_signal_format_get(out)) { - case ZL_OUTPUT_MODE_SIGNAL_FORMAT_2_NDIV: - case ZL_OUTPUT_MODE_SIGNAL_FORMAT_2_NDIV_INV: + if (zl3073x_out_is_ndiv(out)) return -EOPNOTSUPP; - default: - break; - } /* Get attached synth frequency */ synth = zl3073x_synth_state_get(zldev, zl3073x_out_synth_get(out)); synth_freq = zl3073x_synth_freq_get(synth); + out_freq = synth_freq / out->div; + + if (!pin->esync_control || out_freq <= 1) + return -EOPNOTSUPP; + + esync->range = esync_freq_ranges; + esync->range_num = ARRAY_SIZE(esync_freq_ranges); clock_type = FIELD_GET(ZL_OUTPUT_MODE_CLOCK_TYPE, out->mode); if (clock_type != ZL_OUTPUT_MODE_CLOCK_TYPE_ESYNC) { @@ -627,11 +624,11 @@ zl3073x_dpll_output_pin_esync_get(const struct dpll_pin *dpll_pin, esync->freq = 0; esync->pulse = 0; - goto finish; + return 0; } /* Compute esync frequency */ - esync->freq = synth_freq / out->div / out->esync_n_period; + esync->freq = out_freq / out->esync_n_period; /* By comparing the esync_pulse_width to the half of the pulse width * the esync pulse percentage can be determined. @@ -640,18 +637,6 @@ zl3073x_dpll_output_pin_esync_get(const struct dpll_pin *dpll_pin, */ esync->pulse = (50 * out->esync_n_width) / out->div; -finish: - /* Set supported esync ranges if the pin supports esync control and - * if the output frequency is > 1 Hz. - */ - if (pin->esync_control && (synth_freq / out->div) > 1) { - esync->range = esync_freq_ranges; - esync->range_num = ARRAY_SIZE(esync_freq_ranges); - } else { - esync->range = NULL; - esync->range_num = 0; - } - return 0; } @@ -677,13 +662,8 @@ zl3073x_dpll_output_pin_esync_set(const struct dpll_pin *dpll_pin, * for N-division is also used for the esync divider so both cannot * be used. */ - switch (zl3073x_out_signal_format_get(&out)) { - case ZL_OUTPUT_MODE_SIGNAL_FORMAT_2_NDIV: - case ZL_OUTPUT_MODE_SIGNAL_FORMAT_2_NDIV_INV: + if (zl3073x_out_is_ndiv(&out)) return -EOPNOTSUPP; - default: - break; - } /* Select clock type */ if (freq) @@ -745,9 +725,9 @@ zl3073x_dpll_output_pin_frequency_set(const struct dpll_pin *dpll_pin, struct zl3073x_dev *zldev = zldpll->dev; struct zl3073x_dpll_pin *pin = pin_priv; const struct zl3073x_synth *synth; - u8 out_id, signal_format; u32 new_div, synth_freq; struct zl3073x_out out; + u8 out_id; out_id = zl3073x_output_pin_out_get(pin->id); out = *zl3073x_out_state_get(zldev, out_id); @@ -757,12 +737,8 @@ zl3073x_dpll_output_pin_frequency_set(const struct dpll_pin *dpll_pin, synth_freq = zl3073x_synth_freq_get(synth); new_div = synth_freq / (u32)frequency; - /* Get used signal format for the given output */ - signal_format = zl3073x_out_signal_format_get(&out); - /* Check signal format */ - if (signal_format != ZL_OUTPUT_MODE_SIGNAL_FORMAT_2_NDIV && - signal_format != ZL_OUTPUT_MODE_SIGNAL_FORMAT_2_NDIV_INV) { + if (!zl3073x_out_is_ndiv(&out)) { /* For non N-divided signal formats the frequency is computed * as division of synth frequency and output divisor. */ From 737cb6195c40acf67c876f509e209158436cf287 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 8 Apr 2026 12:27:13 +0200 Subject: [PATCH 1380/1561] dpll: zl3073x: use FIELD_MODIFY() for clear-and-set patterns Replace open-coded clear-and-set bitfield operations with FIELD_MODIFY(). Reviewed-by: Petr Oros Reviewed-by: Prathosh Satish Signed-off-by: Ivan Vecera Link: https://patch.msgid.link/20260408102716.443099-3-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/chan.h | 17 ++++++----------- drivers/dpll/zl3073x/core.c | 3 +-- drivers/dpll/zl3073x/flash.c | 3 +-- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/dpll/zl3073x/chan.h b/drivers/dpll/zl3073x/chan.h index e0f02d343208..481da2133202 100644 --- a/drivers/dpll/zl3073x/chan.h +++ b/drivers/dpll/zl3073x/chan.h @@ -66,8 +66,7 @@ static inline u8 zl3073x_chan_ref_get(const struct zl3073x_chan *chan) */ static inline void zl3073x_chan_mode_set(struct zl3073x_chan *chan, u8 mode) { - chan->mode_refsel &= ~ZL_DPLL_MODE_REFSEL_MODE; - chan->mode_refsel |= FIELD_PREP(ZL_DPLL_MODE_REFSEL_MODE, mode); + FIELD_MODIFY(ZL_DPLL_MODE_REFSEL_MODE, &chan->mode_refsel, mode); } /** @@ -77,8 +76,7 @@ static inline void zl3073x_chan_mode_set(struct zl3073x_chan *chan, u8 mode) */ static inline void zl3073x_chan_ref_set(struct zl3073x_chan *chan, u8 ref) { - chan->mode_refsel &= ~ZL_DPLL_MODE_REFSEL_REF; - chan->mode_refsel |= FIELD_PREP(ZL_DPLL_MODE_REFSEL_REF, ref); + FIELD_MODIFY(ZL_DPLL_MODE_REFSEL_REF, &chan->mode_refsel, ref); } /** @@ -110,13 +108,10 @@ zl3073x_chan_ref_prio_set(struct zl3073x_chan *chan, u8 ref, u8 prio) { u8 *val = &chan->ref_prio[ref / 2]; - if (!(ref & 1)) { - *val &= ~ZL_DPLL_REF_PRIO_REF_P; - *val |= FIELD_PREP(ZL_DPLL_REF_PRIO_REF_P, prio); - } else { - *val &= ~ZL_DPLL_REF_PRIO_REF_N; - *val |= FIELD_PREP(ZL_DPLL_REF_PRIO_REF_N, prio); - } + if (!(ref & 1)) + FIELD_MODIFY(ZL_DPLL_REF_PRIO_REF_P, val, prio); + else + FIELD_MODIFY(ZL_DPLL_REF_PRIO_REF_N, val, prio); } /** diff --git a/drivers/dpll/zl3073x/core.c b/drivers/dpll/zl3073x/core.c index cb47a5db061a..5f1e70f3e40a 100644 --- a/drivers/dpll/zl3073x/core.c +++ b/drivers/dpll/zl3073x/core.c @@ -805,8 +805,7 @@ int zl3073x_dev_phase_avg_factor_set(struct zl3073x_dev *zldev, u8 factor) value = (factor + 1) & 0x0f; /* Update phase measurement control register */ - dpll_meas_ctrl &= ~ZL_DPLL_MEAS_CTRL_AVG_FACTOR; - dpll_meas_ctrl |= FIELD_PREP(ZL_DPLL_MEAS_CTRL_AVG_FACTOR, value); + FIELD_MODIFY(ZL_DPLL_MEAS_CTRL_AVG_FACTOR, &dpll_meas_ctrl, value); rc = zl3073x_write_u8(zldev, ZL_REG_DPLL_MEAS_CTRL, dpll_meas_ctrl); if (rc) return rc; diff --git a/drivers/dpll/zl3073x/flash.c b/drivers/dpll/zl3073x/flash.c index 83452a77e3e9..f85535c8ad24 100644 --- a/drivers/dpll/zl3073x/flash.c +++ b/drivers/dpll/zl3073x/flash.c @@ -194,8 +194,7 @@ zl3073x_flash_cmd_wait(struct zl3073x_dev *zldev, u32 operation, if (rc) return rc; - value &= ~ZL_WRITE_FLASH_OP; - value |= FIELD_PREP(ZL_WRITE_FLASH_OP, operation); + FIELD_MODIFY(ZL_WRITE_FLASH_OP, &value, operation); rc = zl3073x_write_u8(zldev, ZL_REG_WRITE_FLASH, value); if (rc) From 63009eb92b0f379afddbba8dfdf8df087f6d5b62 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 8 Apr 2026 12:27:14 +0200 Subject: [PATCH 1381/1561] dpll: zl3073x: add ref sync and output clock type helpers Add ZL_REF_SYNC_CTRL_MODE_REFSYNC_PAIR and ZL_REF_SYNC_CTRL_PAIR register definitions. Add inline helpers to get and set the sync control mode and sync pair fields of the reference sync control register: zl3073x_ref_sync_mode_get/set() - ZL_REF_SYNC_CTRL_MODE field zl3073x_ref_sync_pair_get/set() - ZL_REF_SYNC_CTRL_PAIR field Add inline helpers to get and set the clock type field of the output mode register: zl3073x_out_clock_type_get/set() - ZL_OUTPUT_MODE_CLOCK_TYPE field Convert existing esync callbacks to use the new helpers. Reviewed-by: Petr Oros Reviewed-by: Prathosh Satish Signed-off-by: Ivan Vecera Link: https://patch.msgid.link/20260408102716.443099-4-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/dpll.c | 26 +++++++++------------ drivers/dpll/zl3073x/out.h | 22 ++++++++++++++++++ drivers/dpll/zl3073x/ref.h | 46 +++++++++++++++++++++++++++++++++++++ drivers/dpll/zl3073x/regs.h | 2 ++ 4 files changed, 81 insertions(+), 15 deletions(-) diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c index 445f4bccb9aa..dc649cf103cb 100644 --- a/drivers/dpll/zl3073x/dpll.c +++ b/drivers/dpll/zl3073x/dpll.c @@ -139,7 +139,7 @@ zl3073x_dpll_input_pin_esync_get(const struct dpll_pin *dpll_pin, esync->range = esync_freq_ranges; esync->range_num = ARRAY_SIZE(esync_freq_ranges); - switch (FIELD_GET(ZL_REF_SYNC_CTRL_MODE, ref->sync_ctrl)) { + switch (zl3073x_ref_sync_mode_get(ref)) { case ZL_REF_SYNC_CTRL_MODE_50_50_ESYNC_25_75: esync->freq = ref->esync_n_div == ZL_REF_ESYNC_DIV_1HZ ? 1 : 0; esync->pulse = 25; @@ -175,8 +175,7 @@ zl3073x_dpll_input_pin_esync_set(const struct dpll_pin *dpll_pin, else sync_mode = ZL_REF_SYNC_CTRL_MODE_50_50_ESYNC_25_75; - ref.sync_ctrl &= ~ZL_REF_SYNC_CTRL_MODE; - ref.sync_ctrl |= FIELD_PREP(ZL_REF_SYNC_CTRL_MODE, sync_mode); + zl3073x_ref_sync_mode_set(&ref, sync_mode); if (freq) { /* 1 Hz is only supported frequency now */ @@ -595,7 +594,7 @@ zl3073x_dpll_output_pin_esync_get(const struct dpll_pin *dpll_pin, const struct zl3073x_synth *synth; const struct zl3073x_out *out; u32 synth_freq, out_freq; - u8 clock_type, out_id; + u8 out_id; out_id = zl3073x_output_pin_out_get(pin->id); out = zl3073x_out_state_get(zldev, out_id); @@ -618,8 +617,7 @@ zl3073x_dpll_output_pin_esync_get(const struct dpll_pin *dpll_pin, esync->range = esync_freq_ranges; esync->range_num = ARRAY_SIZE(esync_freq_ranges); - clock_type = FIELD_GET(ZL_OUTPUT_MODE_CLOCK_TYPE, out->mode); - if (clock_type != ZL_OUTPUT_MODE_CLOCK_TYPE_ESYNC) { + if (zl3073x_out_clock_type_get(out) != ZL_OUTPUT_MODE_CLOCK_TYPE_ESYNC) { /* No need to read esync data if it is not enabled */ esync->freq = 0; esync->pulse = 0; @@ -652,8 +650,8 @@ zl3073x_dpll_output_pin_esync_set(const struct dpll_pin *dpll_pin, struct zl3073x_dpll_pin *pin = pin_priv; const struct zl3073x_synth *synth; struct zl3073x_out out; - u8 clock_type, out_id; u32 synth_freq; + u8 out_id; out_id = zl3073x_output_pin_out_get(pin->id); out = *zl3073x_out_state_get(zldev, out_id); @@ -665,15 +663,13 @@ zl3073x_dpll_output_pin_esync_set(const struct dpll_pin *dpll_pin, if (zl3073x_out_is_ndiv(&out)) return -EOPNOTSUPP; - /* Select clock type */ - if (freq) - clock_type = ZL_OUTPUT_MODE_CLOCK_TYPE_ESYNC; - else - clock_type = ZL_OUTPUT_MODE_CLOCK_TYPE_NORMAL; - /* Update clock type in output mode */ - out.mode &= ~ZL_OUTPUT_MODE_CLOCK_TYPE; - out.mode |= FIELD_PREP(ZL_OUTPUT_MODE_CLOCK_TYPE, clock_type); + if (freq) + zl3073x_out_clock_type_set(&out, + ZL_OUTPUT_MODE_CLOCK_TYPE_ESYNC); + else + zl3073x_out_clock_type_set(&out, + ZL_OUTPUT_MODE_CLOCK_TYPE_NORMAL); /* If esync is being disabled just write mailbox and finish */ if (!freq) diff --git a/drivers/dpll/zl3073x/out.h b/drivers/dpll/zl3073x/out.h index edf40432bba5..660889c57bff 100644 --- a/drivers/dpll/zl3073x/out.h +++ b/drivers/dpll/zl3073x/out.h @@ -42,6 +42,28 @@ const struct zl3073x_out *zl3073x_out_state_get(struct zl3073x_dev *zldev, int zl3073x_out_state_set(struct zl3073x_dev *zldev, u8 index, const struct zl3073x_out *out); +/** + * zl3073x_out_clock_type_get - get output clock type + * @out: pointer to out state + * + * Return: clock type of given output (ZL_OUTPUT_MODE_CLOCK_TYPE_*) + */ +static inline u8 zl3073x_out_clock_type_get(const struct zl3073x_out *out) +{ + return FIELD_GET(ZL_OUTPUT_MODE_CLOCK_TYPE, out->mode); +} + +/** + * zl3073x_out_clock_type_set - set output clock type + * @out: pointer to out state + * @type: clock type (ZL_OUTPUT_MODE_CLOCK_TYPE_*) + */ +static inline void +zl3073x_out_clock_type_set(struct zl3073x_out *out, u8 type) +{ + FIELD_MODIFY(ZL_OUTPUT_MODE_CLOCK_TYPE, &out->mode, type); +} + /** * zl3073x_out_signal_format_get - get output signal format * @out: pointer to out state diff --git a/drivers/dpll/zl3073x/ref.h b/drivers/dpll/zl3073x/ref.h index be16be20dbc7..55e80e4f0873 100644 --- a/drivers/dpll/zl3073x/ref.h +++ b/drivers/dpll/zl3073x/ref.h @@ -120,6 +120,52 @@ zl3073x_ref_freq_set(struct zl3073x_ref *ref, u32 freq) return 0; } +/** + * zl3073x_ref_sync_mode_get - get sync control mode + * @ref: pointer to ref state + * + * Return: sync control mode (ZL_REF_SYNC_CTRL_MODE_*) + */ +static inline u8 +zl3073x_ref_sync_mode_get(const struct zl3073x_ref *ref) +{ + return FIELD_GET(ZL_REF_SYNC_CTRL_MODE, ref->sync_ctrl); +} + +/** + * zl3073x_ref_sync_mode_set - set sync control mode + * @ref: pointer to ref state + * @mode: sync control mode (ZL_REF_SYNC_CTRL_MODE_*) + */ +static inline void +zl3073x_ref_sync_mode_set(struct zl3073x_ref *ref, u8 mode) +{ + FIELD_MODIFY(ZL_REF_SYNC_CTRL_MODE, &ref->sync_ctrl, mode); +} + +/** + * zl3073x_ref_sync_pair_get - get sync pair reference index + * @ref: pointer to ref state + * + * Return: paired reference index + */ +static inline u8 +zl3073x_ref_sync_pair_get(const struct zl3073x_ref *ref) +{ + return FIELD_GET(ZL_REF_SYNC_CTRL_PAIR, ref->sync_ctrl); +} + +/** + * zl3073x_ref_sync_pair_set - set sync pair reference index + * @ref: pointer to ref state + * @pair: paired reference index + */ +static inline void +zl3073x_ref_sync_pair_set(struct zl3073x_ref *ref, u8 pair) +{ + FIELD_MODIFY(ZL_REF_SYNC_CTRL_PAIR, &ref->sync_ctrl, pair); +} + /** * zl3073x_ref_is_diff - check if the given input reference is differential * @ref: pointer to ref state diff --git a/drivers/dpll/zl3073x/regs.h b/drivers/dpll/zl3073x/regs.h index 5ae50cb761a9..d425dc67250f 100644 --- a/drivers/dpll/zl3073x/regs.h +++ b/drivers/dpll/zl3073x/regs.h @@ -213,7 +213,9 @@ #define ZL_REG_REF_SYNC_CTRL ZL_REG(10, 0x2e, 1) #define ZL_REF_SYNC_CTRL_MODE GENMASK(2, 0) #define ZL_REF_SYNC_CTRL_MODE_REFSYNC_PAIR_OFF 0 +#define ZL_REF_SYNC_CTRL_MODE_REFSYNC_PAIR 1 #define ZL_REF_SYNC_CTRL_MODE_50_50_ESYNC_25_75 2 +#define ZL_REF_SYNC_CTRL_PAIR GENMASK(7, 4) #define ZL_REG_REF_ESYNC_DIV ZL_REG(10, 0x30, 4) #define ZL_REF_ESYNC_DIV_1HZ 0 From a1a702090def20ab0fea13700128861b70d91bc5 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 8 Apr 2026 12:27:15 +0200 Subject: [PATCH 1382/1561] dt-bindings: dpll: add ref-sync-sources property Add ref-sync-sources phandle-array property to the dpll-pin schema allowing board designers to declare which input pins can serve as sync sources in a Reference-Sync pair. A Ref-Sync pair consists of a clock reference and a low-frequency sync signal where the DPLL locks to the clock but phase-aligns to the sync reference. Update both examples in the Microchip ZL3073x binding to demonstrate the new property with a 1 PPS sync source paired to a clock source. Reviewed-by: Petr Oros Reviewed-by: Prathosh Satish Reviewed-by: Rob Herring (Arm) Signed-off-by: Ivan Vecera Link: https://patch.msgid.link/20260408102716.443099-5-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/dpll/dpll-pin.yaml | 13 ++++++++ .../bindings/dpll/microchip,zl30731.yaml | 30 ++++++++++++++----- 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/Documentation/devicetree/bindings/dpll/dpll-pin.yaml b/Documentation/devicetree/bindings/dpll/dpll-pin.yaml index 51db93b77306..1287a472f08f 100644 --- a/Documentation/devicetree/bindings/dpll/dpll-pin.yaml +++ b/Documentation/devicetree/bindings/dpll/dpll-pin.yaml @@ -36,6 +36,19 @@ properties: description: String exposed as the pin board label $ref: /schemas/types.yaml#/definitions/string + ref-sync-sources: + description: | + List of phandles to input pins that can serve as the sync source + in a Reference-Sync pair with this pin acting as the clock source. + A Ref-Sync pair consists of a clock reference and a low-frequency + sync signal. The DPLL locks to the clock reference but + phase-aligns to the sync reference. + Only valid for input pins. Each referenced pin must be a + different input pin on the same device. + $ref: /schemas/types.yaml#/definitions/phandle-array + items: + maxItems: 1 + supported-frequencies-hz: description: List of supported frequencies for this pin, expressed in Hz. diff --git a/Documentation/devicetree/bindings/dpll/microchip,zl30731.yaml b/Documentation/devicetree/bindings/dpll/microchip,zl30731.yaml index 17747f754b84..fa5a8f8e390c 100644 --- a/Documentation/devicetree/bindings/dpll/microchip,zl30731.yaml +++ b/Documentation/devicetree/bindings/dpll/microchip,zl30731.yaml @@ -52,11 +52,19 @@ examples: #address-cells = <1>; #size-cells = <0>; - pin@0 { /* REF0P */ + sync0: pin@0 { /* REF0P - 1 PPS sync source */ reg = <0>; connection-type = "ext"; - label = "Input 0"; - supported-frequencies-hz = /bits/ 64 <1 1000>; + label = "SMA1"; + supported-frequencies-hz = /bits/ 64 <1>; + }; + + pin@1 { /* REF0N - clock source, can pair with sync0 */ + reg = <1>; + connection-type = "ext"; + label = "SMA2"; + supported-frequencies-hz = /bits/ 64 <10000 10000000>; + ref-sync-sources = <&sync0>; }; }; @@ -90,11 +98,19 @@ examples: #address-cells = <1>; #size-cells = <0>; - pin@0 { /* REF0P */ + sync1: pin@0 { /* REF0P - 1 PPS sync source */ reg = <0>; - connection-type = "ext"; - label = "Input 0"; - supported-frequencies-hz = /bits/ 64 <1 1000>; + connection-type = "gnss"; + label = "GNSS_1PPS_IN"; + supported-frequencies-hz = /bits/ 64 <1>; + }; + + pin@1 { /* REF0N - clock source */ + reg = <1>; + connection-type = "gnss"; + label = "GNSS_10M_IN"; + supported-frequencies-hz = /bits/ 64 <10000000>; + ref-sync-sources = <&sync1>; }; }; From 14f269ae699869ddaca7c29c9c6c52288e3bfb73 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 8 Apr 2026 12:27:16 +0200 Subject: [PATCH 1383/1561] dpll: zl3073x: add ref-sync pair support Add support for ref-sync pair registration using the 'ref-sync-sources' phandle property from device tree. A ref-sync pair consists of a clock reference and a low-frequency sync signal where the DPLL locks to the clock reference but phase-aligns to the sync reference. The implementation: - Stores fwnode handle in zl3073x_dpll_pin during pin registration - Adds ref_sync_get/set callbacks to read and write the sync control mode and pair registers - Validates ref-sync frequency constraints: sync signal must be 8 kHz or less, clock reference must be 1 kHz or more and higher than sync - Excludes sync source from automatic reference selection by setting its priority to NONE on connect; on disconnect the priority is left as NONE and the user must explicitly make the pin selectable again - Iterates ref-sync-sources phandles to register declared pairings via dpll_pin_ref_sync_pair_add() Reviewed-by: Petr Oros Reviewed-by: Prathosh Satish Signed-off-by: Ivan Vecera Link: https://patch.msgid.link/20260408102716.443099-6-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/dpll.c | 207 +++++++++++++++++++++++++++++++++++- 1 file changed, 206 insertions(+), 1 deletion(-) diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c index dc649cf103cb..c95e93ef3ab0 100644 --- a/drivers/dpll/zl3073x/dpll.c +++ b/drivers/dpll/zl3073x/dpll.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -30,6 +31,7 @@ * @dpll: DPLL the pin is registered to * @dpll_pin: pointer to registered dpll_pin * @tracker: tracking object for the acquired reference + * @fwnode: firmware node handle * @label: package label * @dir: pin direction * @id: pin id @@ -46,6 +48,7 @@ struct zl3073x_dpll_pin { struct zl3073x_dpll *dpll; struct dpll_pin *dpll_pin; dpll_tracker tracker; + struct fwnode_handle *fwnode; char label[8]; enum dpll_pin_direction dir; u8 id; @@ -186,6 +189,109 @@ zl3073x_dpll_input_pin_esync_set(const struct dpll_pin *dpll_pin, return zl3073x_ref_state_set(zldev, ref_id, &ref); } +static int +zl3073x_dpll_input_pin_ref_sync_get(const struct dpll_pin *dpll_pin, + void *pin_priv, + const struct dpll_pin *ref_sync_pin, + void *ref_sync_pin_priv, + enum dpll_pin_state *state, + struct netlink_ext_ack *extack) +{ + struct zl3073x_dpll_pin *sync_pin = ref_sync_pin_priv; + struct zl3073x_dpll_pin *pin = pin_priv; + struct zl3073x_dpll *zldpll = pin->dpll; + struct zl3073x_dev *zldev = zldpll->dev; + const struct zl3073x_ref *ref; + u8 ref_id, mode, pair; + + ref_id = zl3073x_input_pin_ref_get(pin->id); + ref = zl3073x_ref_state_get(zldev, ref_id); + mode = zl3073x_ref_sync_mode_get(ref); + pair = zl3073x_ref_sync_pair_get(ref); + + if (mode == ZL_REF_SYNC_CTRL_MODE_REFSYNC_PAIR && + pair == zl3073x_input_pin_ref_get(sync_pin->id)) + *state = DPLL_PIN_STATE_CONNECTED; + else + *state = DPLL_PIN_STATE_DISCONNECTED; + + return 0; +} + +static int +zl3073x_dpll_input_pin_ref_sync_set(const struct dpll_pin *dpll_pin, + void *pin_priv, + const struct dpll_pin *ref_sync_pin, + void *ref_sync_pin_priv, + const enum dpll_pin_state state, + struct netlink_ext_ack *extack) +{ + struct zl3073x_dpll_pin *sync_pin = ref_sync_pin_priv; + struct zl3073x_dpll_pin *pin = pin_priv; + struct zl3073x_dpll *zldpll = pin->dpll; + struct zl3073x_dev *zldev = zldpll->dev; + u8 mode, ref_id, sync_ref_id; + struct zl3073x_chan chan; + struct zl3073x_ref ref; + int rc; + + ref_id = zl3073x_input_pin_ref_get(pin->id); + sync_ref_id = zl3073x_input_pin_ref_get(sync_pin->id); + ref = *zl3073x_ref_state_get(zldev, ref_id); + + if (state == DPLL_PIN_STATE_CONNECTED) { + const struct zl3073x_ref *sync_ref; + u32 ref_freq, sync_freq; + + sync_ref = zl3073x_ref_state_get(zldev, sync_ref_id); + ref_freq = zl3073x_ref_freq_get(&ref); + sync_freq = zl3073x_ref_freq_get(sync_ref); + + /* Sync signal must be 8 kHz or less and clock reference + * must be 1 kHz or more and higher than the sync signal. + */ + if (sync_freq > 8000) { + NL_SET_ERR_MSG(extack, + "sync frequency must be 8 kHz or less"); + return -EINVAL; + } + if (ref_freq < 1000) { + NL_SET_ERR_MSG(extack, + "clock frequency must be 1 kHz or more"); + return -EINVAL; + } + if (ref_freq <= sync_freq) { + NL_SET_ERR_MSG(extack, + "clock frequency must be higher than sync frequency"); + return -EINVAL; + } + + zl3073x_ref_sync_pair_set(&ref, sync_ref_id); + mode = ZL_REF_SYNC_CTRL_MODE_REFSYNC_PAIR; + } else { + mode = ZL_REF_SYNC_CTRL_MODE_REFSYNC_PAIR_OFF; + } + + zl3073x_ref_sync_mode_set(&ref, mode); + + rc = zl3073x_ref_state_set(zldev, ref_id, &ref); + if (rc) + return rc; + + /* Exclude sync source from automatic reference selection by setting + * its priority to NONE. On disconnect the priority is left as NONE + * and the user must explicitly make the pin selectable again. + */ + if (state == DPLL_PIN_STATE_CONNECTED) { + chan = *zl3073x_chan_state_get(zldev, zldpll->id); + zl3073x_chan_ref_prio_set(&chan, sync_ref_id, + ZL_DPLL_REF_PRIO_NONE); + return zl3073x_chan_state_set(zldev, zldpll->id, &chan); + } + + return 0; +} + static int zl3073x_dpll_input_pin_ffo_get(const struct dpll_pin *dpll_pin, void *pin_priv, const struct dpll_device *dpll, void *dpll_priv, @@ -1147,6 +1253,8 @@ static const struct dpll_pin_ops zl3073x_dpll_input_pin_ops = { .phase_adjust_set = zl3073x_dpll_input_pin_phase_adjust_set, .prio_get = zl3073x_dpll_input_pin_prio_get, .prio_set = zl3073x_dpll_input_pin_prio_set, + .ref_sync_get = zl3073x_dpll_input_pin_ref_sync_get, + .ref_sync_set = zl3073x_dpll_input_pin_ref_sync_set, .state_on_dpll_get = zl3073x_dpll_input_pin_state_on_dpll_get, .state_on_dpll_set = zl3073x_dpll_input_pin_state_on_dpll_set, }; @@ -1239,8 +1347,11 @@ zl3073x_dpll_pin_register(struct zl3073x_dpll_pin *pin, u32 index) if (IS_ERR(props)) return PTR_ERR(props); - /* Save package label, esync capability and phase adjust granularity */ + /* Save package label, fwnode, esync capability and phase adjust + * granularity. + */ strscpy(pin->label, props->package_label); + pin->fwnode = fwnode_handle_get(props->fwnode); pin->esync_control = props->esync_control; pin->phase_gran = props->dpll_props.phase_gran; @@ -1285,6 +1396,8 @@ err_register: dpll_pin_put(pin->dpll_pin, &pin->tracker); pin->dpll_pin = NULL; err_pin_get: + fwnode_handle_put(pin->fwnode); + pin->fwnode = NULL; zl3073x_pin_props_put(props); return rc; @@ -1314,6 +1427,9 @@ zl3073x_dpll_pin_unregister(struct zl3073x_dpll_pin *pin) dpll_pin_put(pin->dpll_pin, &pin->tracker); pin->dpll_pin = NULL; + + fwnode_handle_put(pin->fwnode); + pin->fwnode = NULL; } /** @@ -1827,6 +1943,88 @@ zl3073x_dpll_free(struct zl3073x_dpll *zldpll) kfree(zldpll); } +/** + * zl3073x_dpll_ref_sync_pair_register - register ref_sync pairs for a pin + * @pin: pointer to zl3073x_dpll_pin structure + * + * Iterates 'ref-sync-sources' phandles in the pin's firmware node and + * registers each declared pairing. + * + * Return: 0 on success, <0 on error + */ +static int +zl3073x_dpll_ref_sync_pair_register(struct zl3073x_dpll_pin *pin) +{ + struct zl3073x_dev *zldev = pin->dpll->dev; + struct fwnode_handle *fwnode; + struct dpll_pin *sync_pin; + dpll_tracker tracker; + int n, rc; + + for (n = 0; ; n++) { + /* Get n'th ref-sync source */ + fwnode = fwnode_find_reference(pin->fwnode, "ref-sync-sources", + n); + if (IS_ERR(fwnode)) { + rc = PTR_ERR(fwnode); + break; + } + + /* Find associated dpll pin */ + sync_pin = fwnode_dpll_pin_find(fwnode, &tracker); + fwnode_handle_put(fwnode); + if (!sync_pin) { + dev_warn(zldev->dev, "%s: ref-sync source %d not found", + pin->label, n); + continue; + } + + /* Register new ref-sync pair */ + rc = dpll_pin_ref_sync_pair_add(pin->dpll_pin, sync_pin); + dpll_pin_put(sync_pin, &tracker); + + /* -EBUSY means pairing already exists from another DPLL's + * registration. + */ + if (rc && rc != -EBUSY) { + dev_err(zldev->dev, + "%s: failed to add ref-sync source %d: %pe", + pin->label, n, ERR_PTR(rc)); + break; + } + } + + return rc != -ENOENT ? rc : 0; +} + +/** + * zl3073x_dpll_ref_sync_pairs_register - register ref_sync pairs for a DPLL + * @zldpll: pointer to zl3073x_dpll structure + * + * Iterates all registered input pins of the given DPLL and establishes + * ref_sync pairings declared by 'ref-sync-sources' phandles in the + * device tree. + * + * Return: 0 on success, <0 on error + */ +static int +zl3073x_dpll_ref_sync_pairs_register(struct zl3073x_dpll *zldpll) +{ + struct zl3073x_dpll_pin *pin; + int rc; + + list_for_each_entry(pin, &zldpll->pins, list) { + if (!zl3073x_dpll_is_input_pin(pin) || !pin->fwnode) + continue; + + rc = zl3073x_dpll_ref_sync_pair_register(pin); + if (rc) + return rc; + } + + return 0; +} + /** * zl3073x_dpll_register - register DPLL device and all its pins * @zldpll: pointer to zl3073x_dpll structure @@ -1850,6 +2048,13 @@ zl3073x_dpll_register(struct zl3073x_dpll *zldpll) return rc; } + rc = zl3073x_dpll_ref_sync_pairs_register(zldpll); + if (rc) { + zl3073x_dpll_pins_unregister(zldpll); + zl3073x_dpll_device_unregister(zldpll); + return rc; + } + return 0; } From f757a2da6df52299606512b0920eba728d642543 Mon Sep 17 00:00:00 2001 From: Nora Schiffer Date: Tue, 7 Apr 2026 12:48:01 +0200 Subject: [PATCH 1384/1561] dt-bindings: net: ti: k3-am654-cpsw-nuss: Add ti,j722s-cpsw-nuss compatible The J722S CPSW3G is mostly identical to the AM64's, but additionally supports SGMII. The AM64 compatible ti,am642-cpsw-nuss is used as a fallback. Signed-off-by: Nora Schiffer Acked-by: Krzysztof Kozlowski Link: https://patch.msgid.link/191e9f7e3a6c14eabe891a98c5fb646766479c0a.1775558273.git.nora.schiffer@ew.tq-group.com Signed-off-by: Jakub Kicinski --- .../bindings/net/ti,k3-am654-cpsw-nuss.yaml | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml index a959c1d7e643..c409c6310ed4 100644 --- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml +++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml @@ -53,13 +53,18 @@ properties: "#size-cells": true compatible: - enum: - - ti,am642-cpsw-nuss - - ti,am654-cpsw-nuss - - ti,j7200-cpswxg-nuss - - ti,j721e-cpsw-nuss - - ti,j721e-cpswxg-nuss - - ti,j784s4-cpswxg-nuss + oneOf: + - enum: + - ti,am642-cpsw-nuss + - ti,am654-cpsw-nuss + - ti,j7200-cpswxg-nuss + - ti,j721e-cpsw-nuss + - ti,j721e-cpswxg-nuss + - ti,j784s4-cpswxg-nuss + - items: + - enum: + - ti,j722s-cpsw-nuss + - const: ti,am642-cpsw-nuss reg: maxItems: 1 From 436e9e48ca5141658d65f1190fccbc60a490c84b Mon Sep 17 00:00:00 2001 From: Nora Schiffer Date: Tue, 7 Apr 2026 12:48:02 +0200 Subject: [PATCH 1385/1561] net: ethernet: ti: am65-cpsw: add support for J722S SoC family The J722S CPSW3G is mostly identical to the AM64's, but additionally supports SGMII. Signed-off-by: Nora Schiffer Link: https://patch.msgid.link/6118e81358d47f455e4c1dbddf3ece6f3329e184.1775558273.git.nora.schiffer@ew.tq-group.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index d9400599e80a..7ac75fc8cdcf 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -3468,6 +3468,13 @@ static const struct am65_cpsw_pdata am64x_cpswxg_pdata = { .fdqring_mode = K3_RINGACC_RING_MODE_RING, }; +static const struct am65_cpsw_pdata j722s_cpswxg_pdata = { + .quirks = AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ, + .ale_dev_id = "am64-cpswxg", + .fdqring_mode = K3_RINGACC_RING_MODE_RING, + .extra_modes = BIT(PHY_INTERFACE_MODE_SGMII), +}; + static const struct am65_cpsw_pdata j7200_cpswxg_pdata = { .quirks = 0, .ale_dev_id = "am64-cpswxg", @@ -3495,6 +3502,7 @@ static const struct of_device_id am65_cpsw_nuss_of_mtable[] = { { .compatible = "ti,am654-cpsw-nuss", .data = &am65x_sr1_0}, { .compatible = "ti,j721e-cpsw-nuss", .data = &j721e_pdata}, { .compatible = "ti,am642-cpsw-nuss", .data = &am64x_cpswxg_pdata}, + { .compatible = "ti,j722s-cpsw-nuss", .data = &j722s_cpswxg_pdata}, { .compatible = "ti,j7200-cpswxg-nuss", .data = &j7200_cpswxg_pdata}, { .compatible = "ti,j721e-cpswxg-nuss", .data = &j721e_cpswxg_pdata}, { .compatible = "ti,j784s4-cpswxg-nuss", .data = &j784s4_cpswxg_pdata}, From 4d19654dacef1ce6e29769c0c863df13de0d5be3 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 7 Apr 2026 10:56:08 +0800 Subject: [PATCH 1386/1561] net: ngbe: remove netdev->ethtool->wol_enabled setting netdev->ethtool->wol_enabled is set in ethtool core code, so remove the redundant setting in ngbe_set_wol(). Signed-off-by: Jiawen Wu Reviewed-by: Breno Leitao Reviewed-by: Joe Damato Link: https://patch.msgid.link/20260407025616.33652-2-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c index fc04040957bf..40779fee0fdf 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c @@ -37,9 +37,8 @@ static int ngbe_set_wol(struct net_device *netdev, wx->wol = 0; if (wol->wolopts & WAKE_MAGIC) wx->wol = WX_PSR_WKUP_CTL_MAG; - netdev->ethtool->wol_enabled = !!(wx->wol); wr32(wx, WX_PSR_WKUP_CTL, wx->wol); - device_set_wakeup_enable(&pdev->dev, netdev->ethtool->wol_enabled); + device_set_wakeup_enable(&pdev->dev, !!(wx->wol)); return 0; } From 752157d9eded7f9213d102048388a78753b84617 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 7 Apr 2026 10:56:09 +0800 Subject: [PATCH 1387/1561] net: ngbe: move the WOL functions to libwx Remove duplicate-defined register macros, move the WOL implementation to the library module. So that the WOL functions can be reused in txgbe later. Signed-off-by: Jiawen Wu Link: https://patch.msgid.link/20260407025616.33652-3-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/wangxun/libwx/wx_ethtool.c | 33 +++++++++++++++++ .../net/ethernet/wangxun/libwx/wx_ethtool.h | 4 +++ .../net/ethernet/wangxun/ngbe/ngbe_ethtool.c | 35 ++----------------- drivers/net/ethernet/wangxun/ngbe/ngbe_main.c | 12 +++---- drivers/net/ethernet/wangxun/ngbe/ngbe_type.h | 14 -------- 5 files changed, 45 insertions(+), 53 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c index f362e51c73ee..2de1170db8c7 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c @@ -262,6 +262,39 @@ int wx_set_link_ksettings(struct net_device *netdev, } EXPORT_SYMBOL(wx_set_link_ksettings); +void wx_get_wol(struct net_device *netdev, + struct ethtool_wolinfo *wol) +{ + struct wx *wx = netdev_priv(netdev); + + if (!wx->wol_hw_supported) + return; + wol->supported = WAKE_MAGIC; + wol->wolopts = 0; + if (wx->wol & WX_PSR_WKUP_CTL_MAG) + wol->wolopts |= WAKE_MAGIC; +} +EXPORT_SYMBOL(wx_get_wol); + +int wx_set_wol(struct net_device *netdev, + struct ethtool_wolinfo *wol) +{ + struct wx *wx = netdev_priv(netdev); + struct pci_dev *pdev = wx->pdev; + + if (!wx->wol_hw_supported) + return -EOPNOTSUPP; + + wx->wol = 0; + if (wol->wolopts & WAKE_MAGIC) + wx->wol = WX_PSR_WKUP_CTL_MAG; + wr32(wx, WX_PSR_WKUP_CTL, wx->wol); + device_set_wakeup_enable(&pdev->dev, !!(wx->wol)); + + return 0; +} +EXPORT_SYMBOL(wx_set_wol); + void wx_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) { diff --git a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.h b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.h index 727093970462..5b187d1587b1 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.h @@ -18,6 +18,10 @@ int wx_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd); int wx_set_link_ksettings(struct net_device *netdev, const struct ethtool_link_ksettings *cmd); +void wx_get_wol(struct net_device *netdev, + struct ethtool_wolinfo *wol); +int wx_set_wol(struct net_device *netdev, + struct ethtool_wolinfo *wol); void wx_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause); int wx_set_pauseparam(struct net_device *netdev, diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c index 40779fee0fdf..2b6356622a13 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c @@ -12,37 +12,6 @@ #include "ngbe_ethtool.h" #include "ngbe_type.h" -static void ngbe_get_wol(struct net_device *netdev, - struct ethtool_wolinfo *wol) -{ - struct wx *wx = netdev_priv(netdev); - - if (!wx->wol_hw_supported) - return; - wol->supported = WAKE_MAGIC; - wol->wolopts = 0; - if (wx->wol & WX_PSR_WKUP_CTL_MAG) - wol->wolopts |= WAKE_MAGIC; -} - -static int ngbe_set_wol(struct net_device *netdev, - struct ethtool_wolinfo *wol) -{ - struct wx *wx = netdev_priv(netdev); - struct pci_dev *pdev = wx->pdev; - - if (!wx->wol_hw_supported) - return -EOPNOTSUPP; - - wx->wol = 0; - if (wol->wolopts & WAKE_MAGIC) - wx->wol = WX_PSR_WKUP_CTL_MAG; - wr32(wx, WX_PSR_WKUP_CTL, wx->wol); - device_set_wakeup_enable(&pdev->dev, !!(wx->wol)); - - return 0; -} - static int ngbe_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, struct kernel_ethtool_ringparam *kernel_ring, @@ -121,8 +90,8 @@ static const struct ethtool_ops ngbe_ethtool_ops = { .get_link_ksettings = wx_get_link_ksettings, .set_link_ksettings = wx_set_link_ksettings, .nway_reset = wx_nway_reset, - .get_wol = ngbe_get_wol, - .set_wol = ngbe_set_wol, + .get_wol = wx_get_wol, + .set_wol = wx_set_wol, .get_sset_count = wx_get_sset_count, .get_strings = wx_get_strings, .get_ethtool_stats = wx_get_ethtool_stats, diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index 58488e138beb..e28ddf684a06 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -58,14 +58,14 @@ static void ngbe_init_type_code(struct wx *wx) wx->mac.type = wx_mac_em; type_mask = (u16)(wx->subsystem_device_id & NGBE_OEM_MASK); ncsi_mask = wx->subsystem_device_id & NGBE_NCSI_MASK; - wol_mask = wx->subsystem_device_id & NGBE_WOL_MASK; + wol_mask = wx->subsystem_device_id & WX_WOL_MASK; val = rd32(wx, WX_CFG_PORT_ST); wx->mac_type = (val & BIT(7)) >> 7 ? em_mac_type_rgmii : em_mac_type_mdi; - wx->wol_hw_supported = (wol_mask == NGBE_WOL_SUP) ? 1 : 0; + wx->wol_hw_supported = (wol_mask == WX_WOL_SUP) ? 1 : 0; wx->ncsi_enabled = (ncsi_mask == NGBE_NCSI_MASK || type_mask == NGBE_SUBID_OCP_CARD) ? 1 : 0; @@ -520,9 +520,9 @@ static void ngbe_dev_shutdown(struct pci_dev *pdev, bool *enable_wake) if (wufc) { wx_set_rx_mode(netdev); wx_configure_rx(wx); - wr32(wx, NGBE_PSR_WKUP_CTL, wufc); + wr32(wx, WX_PSR_WKUP_CTL, wufc); } else { - wr32(wx, NGBE_PSR_WKUP_CTL, 0); + wr32(wx, WX_PSR_WKUP_CTL, 0); } pci_wake_from_d3(pdev, !!wufc); *enable_wake = !!wufc; @@ -742,10 +742,10 @@ static int ngbe_probe(struct pci_dev *pdev, wx->wol = 0; if (wx->wol_hw_supported) - wx->wol = NGBE_PSR_WKUP_CTL_MAG; + wx->wol = WX_PSR_WKUP_CTL_MAG; netdev->ethtool->wol_enabled = !!(wx->wol); - wr32(wx, NGBE_PSR_WKUP_CTL, wx->wol); + wr32(wx, WX_PSR_WKUP_CTL, wx->wol); device_set_wakeup_enable(&pdev->dev, wx->wol); /* Save off EEPROM version number and Option Rom version which diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h index 3b2ca7f47e33..f1957fa0add4 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h @@ -39,8 +39,6 @@ #define NGBE_NCSI_SUP 0x8000 #define NGBE_NCSI_MASK 0x8000 -#define NGBE_WOL_SUP 0x4000 -#define NGBE_WOL_MASK 0x4000 /**************** EM Registers ****************************/ /* chip control Registers */ @@ -93,18 +91,6 @@ #define NGBE_CFG_LAN_SPEED 0x14440 #define NGBE_CFG_PORT_ST 0x14404 -/* Wake up registers */ -#define NGBE_PSR_WKUP_CTL 0x15B80 -/* Wake Up Filter Control Bit */ -#define NGBE_PSR_WKUP_CTL_LNKC BIT(0) /* Link Status Change Wakeup Enable*/ -#define NGBE_PSR_WKUP_CTL_MAG BIT(1) /* Magic Packet Wakeup Enable */ -#define NGBE_PSR_WKUP_CTL_EX BIT(2) /* Directed Exact Wakeup Enable */ -#define NGBE_PSR_WKUP_CTL_MC BIT(3) /* Directed Multicast Wakeup Enable*/ -#define NGBE_PSR_WKUP_CTL_BC BIT(4) /* Broadcast Wakeup Enable */ -#define NGBE_PSR_WKUP_CTL_ARP BIT(5) /* ARP Request Packet Wakeup Enable*/ -#define NGBE_PSR_WKUP_CTL_IPV4 BIT(6) /* Directed IPv4 Pkt Wakeup Enable */ -#define NGBE_PSR_WKUP_CTL_IPV6 BIT(7) /* Directed IPv6 Pkt Wakeup Enable */ - #define NGBE_FW_EEPROM_CHECKSUM_CMD 0xE9 #define NGBE_FW_NVM_DATA_OFFSET 3 #define NGBE_FW_CMD_DEFAULT_CHECKSUM 0xFF /* checksum always 0xFF */ From 9bc29a87fbc647db5f08643403d59f094d19298e Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 7 Apr 2026 10:56:10 +0800 Subject: [PATCH 1388/1561] net: ngbe: remove redundant macros NGBE_NCSI_SUP and NGBE_NCSI_MASK are duplicate-defined, they can be replaced by the macros defined in libwx. Just remove them. Signed-off-by: Jiawen Wu Reviewed-by: Joe Damato Link: https://patch.msgid.link/20260407025616.33652-4-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/ngbe/ngbe_main.c | 4 ++-- drivers/net/ethernet/wangxun/ngbe/ngbe_type.h | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index e28ddf684a06..8c9d505721b1 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -57,7 +57,7 @@ static void ngbe_init_type_code(struct wx *wx) wx->mac.type = wx_mac_em; type_mask = (u16)(wx->subsystem_device_id & NGBE_OEM_MASK); - ncsi_mask = wx->subsystem_device_id & NGBE_NCSI_MASK; + ncsi_mask = wx->subsystem_device_id & WX_NCSI_MASK; wol_mask = wx->subsystem_device_id & WX_WOL_MASK; val = rd32(wx, WX_CFG_PORT_ST); @@ -66,7 +66,7 @@ static void ngbe_init_type_code(struct wx *wx) em_mac_type_mdi; wx->wol_hw_supported = (wol_mask == WX_WOL_SUP) ? 1 : 0; - wx->ncsi_enabled = (ncsi_mask == NGBE_NCSI_MASK || + wx->ncsi_enabled = (ncsi_mask == WX_NCSI_SUP || type_mask == NGBE_SUBID_OCP_CARD) ? 1 : 0; switch (type_mask) { diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h index f1957fa0add4..7077a0da4c98 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h @@ -37,9 +37,6 @@ #define NGBE_OEM_MASK 0x00FF -#define NGBE_NCSI_SUP 0x8000 -#define NGBE_NCSI_MASK 0x8000 - /**************** EM Registers ****************************/ /* chip control Registers */ #define NGBE_MIS_PRB_CTL 0x10010 From d48df7e7c3fb456d2dc907707a124dab8462eb19 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 7 Apr 2026 10:56:11 +0800 Subject: [PATCH 1389/1561] net: wangxun: replace busy-wait reset flag with kernel mutex Replace the busy-wait loop using test_and_set_bit(WX_STATE_RESETTING) with a proper per-device mutex to serialize reset operations. The reset flag is reserved for other code paths (like watchdog), which need tocheck if a reset is in process. Signed-off-by: Jiawen Wu Link: https://patch.msgid.link/20260407025616.33652-5-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 1 + drivers/net/ethernet/wangxun/libwx/wx_type.h | 16 +--------------- .../net/ethernet/wangxun/libwx/wx_vf_common.c | 6 ++++-- drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c | 6 +++--- .../net/ethernet/wangxun/txgbe/txgbe_ethtool.c | 6 +++--- drivers/net/ethernet/wangxun/txgbe/txgbe_main.c | 10 +++------- 6 files changed, 15 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index bee9e245e792..05731a50d85f 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -2513,6 +2513,7 @@ int wx_sw_init(struct wx *wx) return -ENOMEM; } + mutex_init(&wx->reset_lock); bitmap_zero(wx->state, WX_STATE_NBITS); bitmap_zero(wx->flags, WX_PF_FLAGS_NBITS); wx->misc_irq_domain = false; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 29e5c5470c94..0fbdda63b141 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -1400,6 +1400,7 @@ struct wx { struct timer_list service_timer; struct work_struct service_task; + struct mutex reset_lock; /* mutex for reset */ }; #define WX_INTR_ALL (~0ULL) @@ -1478,21 +1479,6 @@ static inline struct wx *phylink_to_wx(struct phylink_config *config) return container_of(config, struct wx, phylink_config); } -static inline int wx_set_state_reset(struct wx *wx) -{ - u8 timeout = 50; - - while (test_and_set_bit(WX_STATE_RESETTING, wx->state)) { - timeout--; - if (!timeout) - return -EBUSY; - - usleep_range(1000, 2000); - } - - return 0; -} - static inline unsigned int wx_rx_pg_order(struct wx_ring *ring) { #if (PAGE_SIZE < 8192) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c b/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c index ade2bfe563aa..75a6f0898afe 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c @@ -339,14 +339,16 @@ static void wxvf_down(struct wx *wx) static void wxvf_reinit_locked(struct wx *wx) { - while (test_and_set_bit(WX_STATE_RESETTING, wx->state)) - usleep_range(1000, 2000); + mutex_lock(&wx->reset_lock); + set_bit(WX_STATE_RESETTING, wx->state); + wxvf_down(wx); wx_free_irq(wx); wx_configure_vf(wx); wx_request_msix_irqs_vf(wx); wxvf_up_complete(wx); clear_bit(WX_STATE_RESETTING, wx->state); + mutex_unlock(&wx->reset_lock); } static void wxvf_reset_subtask(struct wx *wx) diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c index 2b6356622a13..1b76ad897e97 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c @@ -32,9 +32,8 @@ static int ngbe_set_ringparam(struct net_device *netdev, new_rx_count == wx->rx_ring_count) return 0; - err = wx_set_state_reset(wx); - if (err) - return err; + mutex_lock(&wx->reset_lock); + set_bit(WX_STATE_RESETTING, wx->state); if (!netif_running(wx->netdev)) { for (i = 0; i < wx->num_tx_queues; i++) @@ -65,6 +64,7 @@ static int ngbe_set_ringparam(struct net_device *netdev, clear_reset: clear_bit(WX_STATE_RESETTING, wx->state); + mutex_unlock(&wx->reset_lock); return err; } diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c index 9157b8275be1..46375799d057 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c @@ -56,9 +56,8 @@ static int txgbe_set_ringparam(struct net_device *netdev, new_rx_count == wx->rx_ring_count) return 0; - err = wx_set_state_reset(wx); - if (err) - return err; + mutex_lock(&wx->reset_lock); + set_bit(WX_STATE_RESETTING, wx->state); if (!netif_running(wx->netdev)) { for (i = 0; i < wx->num_tx_queues; i++) @@ -88,6 +87,7 @@ static int txgbe_set_ringparam(struct net_device *netdev, clear_reset: clear_bit(WX_STATE_RESETTING, wx->state); + mutex_unlock(&wx->reset_lock); return err; } diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index 0de051450a82..00726605628b 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -598,20 +598,16 @@ int txgbe_setup_tc(struct net_device *dev, u8 tc) static void txgbe_reinit_locked(struct wx *wx) { - int err = 0; - netif_trans_update(wx->netdev); - err = wx_set_state_reset(wx); - if (err) { - wx_err(wx, "wait device reset timeout\n"); - return; - } + mutex_lock(&wx->reset_lock); + set_bit(WX_STATE_RESETTING, wx->state); txgbe_down(wx); txgbe_up(wx); clear_bit(WX_STATE_RESETTING, wx->state); + mutex_unlock(&wx->reset_lock); } void txgbe_do_reset(struct net_device *netdev) From b736ebed937e5e0ab68cff3ed6103c7aa058f414 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 7 Apr 2026 10:56:12 +0800 Subject: [PATCH 1390/1561] net: wangxun: move ethtool_ops.set_channels into libwx Since function ops wx->setup_tc() is set in txgbe and ngbe, ethtool_ops.set_channels can be implemented in libwx to reduce duplicated code. Signed-off-by: Jiawen Wu Link: https://patch.msgid.link/20260407025616.33652-6-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_ethtool.c | 2 +- drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c | 15 +-------------- .../net/ethernet/wangxun/txgbe/txgbe_ethtool.c | 15 +-------------- 3 files changed, 3 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c index 2de1170db8c7..6adb8cbcad1f 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c @@ -555,7 +555,7 @@ int wx_set_channels(struct net_device *dev, wx->ring_feature[RING_F_RSS].limit = count; - return 0; + return wx->setup_tc(dev, netdev_get_num_tc(dev)); } EXPORT_SYMBOL(wx_set_channels); diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c index 1b76ad897e97..b2e191982803 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c @@ -68,19 +68,6 @@ clear_reset: return err; } -static int ngbe_set_channels(struct net_device *dev, - struct ethtool_channels *ch) -{ - int err; - - err = wx_set_channels(dev, ch); - if (err < 0) - return err; - - /* use setup TC to update any traffic class queue mapping */ - return ngbe_setup_tc(dev, netdev_get_num_tc(dev)); -} - static const struct ethtool_ops ngbe_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ | @@ -104,7 +91,7 @@ static const struct ethtool_ops ngbe_ethtool_ops = { .get_coalesce = wx_get_coalesce, .set_coalesce = wx_set_coalesce, .get_channels = wx_get_channels, - .set_channels = ngbe_set_channels, + .set_channels = wx_set_channels, .get_rxfh_fields = wx_get_rxfh_fields, .set_rxfh_fields = wx_set_rxfh_fields, .get_rxfh_indir_size = wx_rss_indir_size, diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c index 46375799d057..3e32aca72806 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c @@ -91,19 +91,6 @@ clear_reset: return err; } -static int txgbe_set_channels(struct net_device *dev, - struct ethtool_channels *ch) -{ - int err; - - err = wx_set_channels(dev, ch); - if (err < 0) - return err; - - /* use setup TC to update any traffic class queue mapping */ - return txgbe_setup_tc(dev, netdev_get_num_tc(dev)); -} - static int txgbe_get_ethtool_fdir_entry(struct txgbe *txgbe, struct ethtool_rxnfc *cmd) { @@ -587,7 +574,7 @@ static const struct ethtool_ops txgbe_ethtool_ops = { .get_coalesce = wx_get_coalesce, .set_coalesce = wx_set_coalesce, .get_channels = wx_get_channels, - .set_channels = txgbe_set_channels, + .set_channels = wx_set_channels, .get_rxnfc = txgbe_get_rxnfc, .set_rxnfc = txgbe_set_rxnfc, .get_rx_ring_count = txgbe_get_rx_ring_count, From 58f6303572ec66e7c2967ac168125f444c9e880d Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 7 Apr 2026 10:56:13 +0800 Subject: [PATCH 1391/1561] net: wangxun: reorder timer and work sync cancellations When removing the device, timer_delete_sync(&wx->service_timer) is called in .ndo_stop() after cancel_work_sync(&wx->service_task). This may cause new work to be queued after device down. Move unregister_netdev() before cancel_work_sync(), and use timer_shutdown_sync() to prevent the timer from being re-armed. Signed-off-by: Jiawen Wu Link: https://patch.msgid.link/20260407025616.33652-7-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_vf_common.c | 3 ++- drivers/net/ethernet/wangxun/txgbe/txgbe_main.c | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c b/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c index 75a6f0898afe..29cdbed2e5ec 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_common.c @@ -48,9 +48,10 @@ void wxvf_remove(struct pci_dev *pdev) struct wx *wx = pci_get_drvdata(pdev); struct net_device *netdev; - cancel_work_sync(&wx->service_task); netdev = wx->netdev; unregister_netdev(netdev); + timer_shutdown_sync(&wx->service_timer); + cancel_work_sync(&wx->service_task); kfree(wx->vfinfo); kfree(wx->rss_key); kfree(wx->mac_table); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index 00726605628b..0dd128aa18da 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -946,12 +946,13 @@ static void txgbe_remove(struct pci_dev *pdev) struct txgbe *txgbe = wx->priv; struct net_device *netdev; - cancel_work_sync(&wx->service_task); - netdev = wx->netdev; wx_disable_sriov(wx); unregister_netdev(netdev); + timer_shutdown_sync(&wx->service_timer); + cancel_work_sync(&wx->service_task); + txgbe_remove_phy(txgbe); wx_free_isb_resources(wx); From dc33e52b8ce6f2d42dce18da12dc47d6c21f2e8b Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 7 Apr 2026 10:56:14 +0800 Subject: [PATCH 1392/1561] net: wangxun: schedule hardware stats update in watchdog Hardware statistics should be updated periodically in the watchdog to prevent 32-bit registers from overflowing. This is also required for the upcoming pause frame accounting logic, which relies on regular statistics sampling. Signed-off-by: Jiawen Wu Link: https://patch.msgid.link/20260407025616.33652-8-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 9 +++++ drivers/net/ethernet/wangxun/libwx/wx_type.h | 1 + drivers/net/ethernet/wangxun/ngbe/ngbe_main.c | 36 ++++++++++++++++++- .../net/ethernet/wangxun/txgbe/txgbe_main.c | 1 + 4 files changed, 46 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index 05731a50d85f..31259f69c0e2 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -2513,6 +2513,7 @@ int wx_sw_init(struct wx *wx) return -ENOMEM; } + spin_lock_init(&wx->hw_stats_lock); mutex_init(&wx->reset_lock); bitmap_zero(wx->state, WX_STATE_NBITS); bitmap_zero(wx->flags, WX_PF_FLAGS_NBITS); @@ -2845,6 +2846,12 @@ void wx_update_stats(struct wx *wx) u64 restart_queue = 0, tx_busy = 0; u32 i; + if (!netif_running(wx->netdev) || + test_bit(WX_STATE_RESETTING, wx->state)) + return; + + spin_lock(&wx->hw_stats_lock); + /* gather some stats to the wx struct that are per queue */ for (i = 0; i < wx->num_rx_queues; i++) { struct wx_ring *rx_ring = wx->rx_ring[i]; @@ -2913,6 +2920,8 @@ void wx_update_stats(struct wx *wx) for (i = wx->num_vfs * wx->num_rx_queues_per_pool; i < wx->mac.max_rx_queues; i++) hwstats->qmprc += rd32(wx, WX_PX_MPRC(i)); + + spin_unlock(&wx->hw_stats_lock); } EXPORT_SYMBOL(wx_update_stats); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 0fbdda63b141..7831c5035be8 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -1354,6 +1354,7 @@ struct wx { bool default_up; struct wx_hw_stats stats; + spinlock_t hw_stats_lock; /* spinlock for accessing to hw stats */ u64 tx_busy; u64 non_eop_descs; u64 restart_queue; diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index 8c9d505721b1..d8e3827a8b1f 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -138,6 +138,26 @@ static int ngbe_sw_init(struct wx *wx) return 0; } +/** + * ngbe_service_task - manages and runs subtasks + * @work: pointer to work_struct containing our data + **/ +static void ngbe_service_task(struct work_struct *work) +{ + struct wx *wx = container_of(work, struct wx, service_task); + + wx_update_stats(wx); + + wx_service_event_complete(wx); +} + +static void ngbe_init_service(struct wx *wx) +{ + timer_setup(&wx->service_timer, wx_service_timer, 0); + INIT_WORK(&wx->service_task, ngbe_service_task); + clear_bit(WX_STATE_SERVICE_SCHED, wx->state); +} + /** * ngbe_irq_enable - Enable default interrupt generation settings * @wx: board private structure @@ -368,6 +388,10 @@ static void ngbe_disable_device(struct wx *wx) wx_napi_disable_all(wx); netif_tx_stop_all_queues(netdev); netif_tx_disable(netdev); + + timer_delete_sync(&wx->service_timer); + cancel_work_sync(&wx->service_task); + if (wx->gpio_ctrl) ngbe_sfp_modules_txrx_powerctl(wx, false); wx_irq_disable(wx); @@ -407,6 +431,7 @@ void ngbe_up(struct wx *wx) wx_napi_enable_all(wx); /* enable transmits */ netif_tx_start_all_queues(wx->netdev); + mod_timer(&wx->service_timer, jiffies); /* clear any pending interrupts, may auto mask */ rd32(wx, WX_PX_IC(0)); @@ -770,9 +795,11 @@ static int ngbe_probe(struct pci_dev *pdev, eth_hw_addr_set(netdev, wx->mac.perm_addr); wx_mac_set_default_filter(wx, wx->mac.perm_addr); + ngbe_init_service(wx); + err = wx_init_interrupt_scheme(wx); if (err) - goto err_free_mac_table; + goto err_cancel_service; /* phy Interface Configuration */ err = ngbe_mdio_init(wx); @@ -792,6 +819,9 @@ err_register: wx_control_hw(wx, false); err_clear_interrupt_scheme: wx_clear_interrupt_scheme(wx); +err_cancel_service: + timer_delete_sync(&wx->service_timer); + cancel_work_sync(&wx->service_task); err_free_mac_table: kfree(wx->rss_key); kfree(wx->mac_table); @@ -820,6 +850,10 @@ static void ngbe_remove(struct pci_dev *pdev) netdev = wx->netdev; wx_disable_sriov(wx); unregister_netdev(netdev); + + timer_shutdown_sync(&wx->service_timer); + cancel_work_sync(&wx->service_task); + phylink_destroy(wx->phylink); pci_release_selected_regions(pdev, pci_select_bars(pdev, IORESOURCE_MEM)); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index 0dd128aa18da..ec32a5f422f2 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -130,6 +130,7 @@ static void txgbe_service_task(struct work_struct *work) txgbe_module_detection_subtask(wx); txgbe_link_config_subtask(wx); + wx_update_stats(wx); wx_service_event_complete(wx); } From 1dd9b0dafd21cfdac534b423b7e7ee980b3c535a Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 7 Apr 2026 10:56:15 +0800 Subject: [PATCH 1393/1561] net: libwx: wrap-around and reset qmprc counter The WX_PX_MPRC registers are not clear-on-read hardware counters. The previous implementation directly read and accumulated these 32-bit values into a 64-bit software counter. Now implement a rd32_wrap() helper function to calculate the delta counter to correct the statistic. Signed-off-by: Jiawen Wu Link: https://patch.msgid.link/20260407025616.33652-9-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 10 ++++++---- drivers/net/ethernet/wangxun/libwx/wx_type.h | 17 +++++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index 31259f69c0e2..57d6671ec618 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -2915,11 +2915,10 @@ void wx_update_stats(struct wx *wx) hwstats->fdirmiss += rd32(wx, WX_RDB_FDIR_MISS); } - /* qmprc is not cleared on read, manual reset it */ - hwstats->qmprc = 0; for (i = wx->num_vfs * wx->num_rx_queues_per_pool; i < wx->mac.max_rx_queues; i++) - hwstats->qmprc += rd32(wx, WX_PX_MPRC(i)); + hwstats->qmprc += rd32_wrap(wx, WX_PX_MPRC(i), + &wx->last_stats.qmprc[i]); spin_unlock(&wx->hw_stats_lock); } @@ -2936,8 +2935,11 @@ void wx_clear_hw_cntrs(struct wx *wx) { u16 i = 0; - for (i = 0; i < wx->mac.max_rx_queues; i++) + for (i = wx->num_vfs * wx->num_rx_queues_per_pool; + i < wx->mac.max_rx_queues; i++) { wr32(wx, WX_PX_MPRC(i), 0); + wx->last_stats.qmprc[i] = 0; + } rd32(wx, WX_RDM_PKT_CNT); rd32(wx, WX_TDM_PKT_CNT); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 7831c5035be8..3c5a351974dd 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -1182,6 +1182,10 @@ struct wx_hw_stats { u64 fdirmiss; }; +struct wx_last_stats { + u32 qmprc[128]; +}; + enum wx_state { WX_STATE_RESETTING, WX_STATE_SWFW_BUSY, @@ -1354,6 +1358,7 @@ struct wx { bool default_up; struct wx_hw_stats stats; + struct wx_last_stats last_stats; spinlock_t hw_stats_lock; /* spinlock for accessing to hw stats */ u64 tx_busy; u64 non_eop_descs; @@ -1464,6 +1469,18 @@ wr32ptp(struct wx *wx, u32 reg, u32 value) return wr32(wx, reg + 0xB500, value); } +static inline u32 +rd32_wrap(struct wx *wx, u32 reg, u32 *last) +{ + u32 val, delta; + + val = rd32(wx, reg); + delta = val - *last; + *last = val; + + return delta; +} + /* On some domestic CPU platforms, sometimes IO is not synchronized with * flushing memory, here use readl() to flush PCI read and write. */ From 40637e4a4477ab0f059f60e38d376390b4c71823 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 7 Apr 2026 10:56:16 +0800 Subject: [PATCH 1394/1561] net: libwx: improve flow control setting Save the current mode of flow control, and enhance the statistics of pause frames. The received pause frames are divided into XON and XOFF to be counted. And due to the hardware defect of SP devices, XON packets cannot be trasmitted correctly, so Tx XON pause is disabled by default for those devices. Signed-off-by: Jiawen Wu Link: https://patch.msgid.link/20260407025616.33652-10-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/wangxun/libwx/wx_ethtool.c | 2 +- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 46 +++++++++++++++++-- drivers/net/ethernet/wangxun/libwx/wx_type.h | 19 +++++++- 3 files changed, 61 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c index 6adb8cbcad1f..5df971aca9e3 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c @@ -211,7 +211,7 @@ void wx_get_pause_stats(struct net_device *netdev, hwstats = &wx->stats; stats->tx_pause_frames = hwstats->lxontxc + hwstats->lxofftxc; - stats->rx_pause_frames = hwstats->lxonoffrxc; + stats->rx_pause_frames = hwstats->lxonrxc + hwstats->lxoffrxc; } EXPORT_SYMBOL(wx_get_pause_stats); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index 57d6671ec618..d3772d01e00b 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -2774,6 +2774,15 @@ int wx_fc_enable(struct wx *wx, bool tx_pause, bool rx_pause) } } + if (rx_pause && tx_pause) + wx->fc.mode = wx_fc_full; + else if (rx_pause) + wx->fc.mode = wx_fc_rx_pause; + else if (tx_pause) + wx->fc.mode = wx_fc_tx_pause; + else + wx->fc.mode = wx_fc_none; + /* Disable any previous flow control settings */ mflcn_reg = rd32(wx, WX_MAC_RX_FLOW_CTRL); mflcn_reg &= ~WX_MAC_RX_FLOW_CTRL_RFE; @@ -2792,7 +2801,9 @@ int wx_fc_enable(struct wx *wx, bool tx_pause, bool rx_pause) /* Set up and enable Rx high/low water mark thresholds, enable XON. */ if (tx_pause && wx->fc.high_water) { - fcrtl = (wx->fc.low_water << 10) | WX_RDB_RFCL_XONE; + fcrtl = (wx->fc.low_water << 10); + if (wx->mac.type != wx_mac_sp) + fcrtl |= WX_RDB_RFCL_XONE; wr32(wx, WX_RDB_RFCL, fcrtl); fcrth = (wx->fc.high_water << 10) | WX_RDB_RFCH_XOFFE; } else { @@ -2833,6 +2844,21 @@ int wx_fc_enable(struct wx *wx, bool tx_pause, bool rx_pause) } EXPORT_SYMBOL(wx_fc_enable); +static void wx_update_xoff_rx_lfc(struct wx *wx) +{ + struct wx_hw_stats *hwstats = &wx->stats; + + if (wx->fc.mode != wx_fc_full && + wx->fc.mode != wx_fc_rx_pause) + return; + + if (wx->mac.type >= wx_mac_aml) + hwstats->lxoffrxc += rd32_wrap(wx, WX_MAC_LXOFFRXC_AML, + &wx->last_stats.lxoffrxc); + else + hwstats->lxoffrxc += rd64(wx, WX_MAC_LXOFFRXC); +} + /** * wx_update_stats - Update the board statistics counters. * @wx: board private structure @@ -2887,6 +2913,8 @@ void wx_update_stats(struct wx *wx) wx->restart_queue = restart_queue; wx->tx_busy = tx_busy; + wx_update_xoff_rx_lfc(wx); + hwstats->gprc += rd32(wx, WX_RDM_PKT_CNT); hwstats->gptc += rd32(wx, WX_TDM_PKT_CNT); hwstats->gorc += rd64(wx, WX_RDM_BYTE_CNT_LSB); @@ -2901,7 +2929,11 @@ void wx_update_stats(struct wx *wx) hwstats->mptc += rd64(wx, WX_TX_MC_FRAMES_GOOD_L); hwstats->roc += rd32(wx, WX_RX_OVERSIZE_FRAMES_GOOD); hwstats->ruc += rd32(wx, WX_RX_UNDERSIZE_FRAMES_GOOD); - hwstats->lxonoffrxc += rd32(wx, WX_MAC_LXONOFFRXC); + if (wx->mac.type >= wx_mac_aml) + hwstats->lxonrxc += rd32_wrap(wx, WX_MAC_LXONRXC_AML, + &wx->last_stats.lxonrxc); + else + hwstats->lxonrxc += rd32(wx, WX_MAC_LXONRXC); hwstats->lxontxc += rd32(wx, WX_RDB_LXONTXC); hwstats->lxofftxc += rd32(wx, WX_RDB_LXOFFTXC); hwstats->o2bgptc += rd32(wx, WX_TDM_OS2BMC_CNT); @@ -2958,7 +2990,15 @@ void wx_clear_hw_cntrs(struct wx *wx) rd64(wx, WX_RX_LEN_ERROR_FRAMES_L); rd32(wx, WX_RDB_LXONTXC); rd32(wx, WX_RDB_LXOFFTXC); - rd32(wx, WX_MAC_LXONOFFRXC); + if (wx->mac.type >= wx_mac_aml) { + wr32(wx, WX_MAC_LXONRXC_AML, 0); + wr32(wx, WX_MAC_LXOFFRXC_AML, 0); + wx->last_stats.lxonrxc = 0; + wx->last_stats.lxoffrxc = 0; + } else { + rd32(wx, WX_MAC_LXONRXC); + rd64(wx, WX_MAC_LXOFFRXC); + } } EXPORT_SYMBOL(wx_clear_hw_cntrs); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 3c5a351974dd..0da5565ee4ff 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -79,7 +79,10 @@ #define WX_RX_LEN_ERROR_FRAMES_L 0x11978 #define WX_RX_UNDERSIZE_FRAMES_GOOD 0x11938 #define WX_RX_OVERSIZE_FRAMES_GOOD 0x1193C -#define WX_MAC_LXONOFFRXC 0x11E0C +#define WX_MAC_LXOFFRXC 0x11988 +#define WX_MAC_LXONRXC 0x11E0C +#define WX_MAC_LXOFFRXC_AML 0x11F80 +#define WX_MAC_LXONRXC_AML 0x11F84 /*********************** Receive DMA registers **************************/ #define WX_RDM_VF_RE(_i) (0x12004 + ((_i) * 4)) @@ -1148,9 +1151,18 @@ enum wx_isb_idx { WX_ISB_MAX }; +/* Flow Control Settings */ +enum wx_fc_mode { + wx_fc_none = 0, + wx_fc_rx_pause, + wx_fc_tx_pause, + wx_fc_full +}; + struct wx_fc_info { u32 high_water; /* Flow Ctrl High-water */ u32 low_water; /* Flow Ctrl Low-water */ + enum wx_fc_mode mode; /* Flow Control Mode */ }; /* Statistics counters collected by the MAC */ @@ -1167,7 +1179,8 @@ struct wx_hw_stats { u64 mptc; u64 roc; u64 ruc; - u64 lxonoffrxc; + u64 lxonrxc; + u64 lxoffrxc; u64 lxontxc; u64 lxofftxc; u64 o2bgptc; @@ -1184,6 +1197,8 @@ struct wx_hw_stats { struct wx_last_stats { u32 qmprc[128]; + u32 lxoffrxc; + u32 lxonrxc; }; enum wx_state { From 65782b2db7321d5f97c16718c4c7f6c7205a56be Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 8 Apr 2026 17:24:36 +0200 Subject: [PATCH 1395/1561] net/sched: cls_fw: fix NULL dereference of "old" filters before change() Like pointed out by Sashiko [1], since commit ed76f5edccc9 ("net: sched: protect filter_chain list with filter_chain_lock mutex") TC filters are added to a shared block and published to datapath before their ->change() function is called. This is a problem for cls_fw: an invalid filter created with the "old" method can still classify some packets before it is destroyed by the validation logic added by Xiang. Therefore, insisting with repeated runs of the following script: # ip link add dev crash0 type dummy # ip link set dev crash0 up # mausezahn crash0 -c 100000 -P 10 \ > -A 4.3.2.1 -B 1.2.3.4 -t udp "dp=1234" -q & # sleep 1 # tc qdisc add dev crash0 egress_block 1 clsact # tc filter add block 1 protocol ip prio 1 matchall \ > action skbedit mark 65536 continue # tc filter add block 1 protocol ip prio 2 fw # ip link del dev crash0 can still make fw_classify() hit the WARN_ON() in [2]: WARNING: ./include/net/pkt_cls.h:88 at fw_classify+0x244/0x250 [cls_fw], CPU#18: mausezahn/1399 Modules linked in: cls_fw(E) act_skbedit(E) CPU: 18 UID: 0 PID: 1399 Comm: mausezahn Tainted: G E 7.0.0-rc6-virtme #17 PREEMPT(full) Tainted: [E]=UNSIGNED_MODULE Hardware name: Red Hat KVM, BIOS 1.16.3-2.el9 04/01/2014 RIP: 0010:fw_classify+0x244/0x250 [cls_fw] Code: 5c 49 c7 45 00 00 00 00 00 41 5d 41 5e 41 5f 5d c3 cc cc cc cc 5b b8 ff ff ff ff 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc 90 <0f> 0b 90 eb a0 0f 1f 80 00 00 00 00 90 90 90 90 90 90 90 90 90 90 RSP: 0018:ffffd1b7026bf8a8 EFLAGS: 00010202 RAX: ffff8c5ac9c60800 RBX: ffff8c5ac99322c0 RCX: 0000000000000004 RDX: 0000000000000001 RSI: ffff8c5b74d7a000 RDI: ffff8c5ac8284f40 RBP: ffffd1b7026bf8d0 R08: 0000000000000000 R09: ffffd1b7026bf9b0 R10: 00000000ffffffff R11: 0000000000000000 R12: 0000000000010000 R13: ffffd1b7026bf930 R14: ffff8c5ac8284f40 R15: 0000000000000000 FS: 00007fca40c37740(0000) GS:ffff8c5b74d7a000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fca40e822a0 CR3: 0000000005ca0001 CR4: 0000000000172ef0 Call Trace: tcf_classify+0x17d/0x5c0 tc_run+0x9d/0x150 __dev_queue_xmit+0x2ab/0x14d0 ip_finish_output2+0x340/0x8f0 ip_output+0xa4/0x250 raw_sendmsg+0x147d/0x14b0 __sys_sendto+0x1cc/0x1f0 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x126/0xf80 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fca40e822ba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 7e c3 0f 1f 44 00 00 41 54 48 83 ec 30 44 89 RSP: 002b:00007ffc248a42c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 000055ef233289d0 RCX: 00007fca40e822ba RDX: 000000000000001e RSI: 000055ef23328c30 RDI: 0000000000000003 RBP: 000055ef233289d0 R08: 00007ffc248a42d0 R09: 0000000000000010 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000001e R13: 00000000000186a0 R14: 0000000000000000 R15: 00007fca41043000 irq event stamp: 1045778 hardirqs last enabled at (1045784): [] __up_console_sem+0x52/0x60 hardirqs last disabled at (1045789): [] __up_console_sem+0x37/0x60 softirqs last enabled at (1045426): [] __alloc_skb+0x207/0x260 softirqs last disabled at (1045434): [] __dev_queue_xmit+0x78/0x14d0 Then, because of the value in the packet's mark, dereference on 'q->handle' with NULL 'q' occurs: BUG: kernel NULL pointer dereference, address: 0000000000000038 [...] RIP: 0010:fw_classify+0x1fe/0x250 [cls_fw] [...] Skip "old-style" classification on shared blocks, so that the NULL dereference is fixed and WARN_ON() is not hit anymore in the short lifetime of invalid cls_fw "old-style" filters. [1] https://sashiko.dev/#/patchset/20260331050217.504278-1-xmei5%40asu.edu [2] https://elixir.bootlin.com/linux/v7.0-rc6/source/include/net/pkt_cls.h#L86 Fixes: faeea8bbf6e9 ("net/sched: cls_fw: fix NULL pointer dereference on shared blocks") Fixes: ed76f5edccc9 ("net: sched: protect filter_chain list with filter_chain_lock mutex") Acked-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Link: https://patch.msgid.link/e39cbd3103a337f1e515d186fe697b4459d24757.1775661704.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- net/sched/cls_fw.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 23884ef8b80c..646a730dca93 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -74,9 +74,13 @@ TC_INDIRECT_SCOPE int fw_classify(struct sk_buff *skb, } } } else { - struct Qdisc *q = tcf_block_q(tp->chain->block); + struct Qdisc *q; /* Old method: classify the packet using its skb mark. */ + if (tcf_block_shared(tp->chain->block)) + return -1; + + q = tcf_block_q(tp->chain->block); if (id && (TC_H_MAJ(id) == 0 || !(TC_H_MAJ(id ^ q->handle)))) { res->classid = id; From 297e1f411ed4927a912c7e207ba6f978cb1f9f0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alvin=20=C5=A0ipraga?= Date: Wed, 8 Apr 2026 17:31:01 -0300 Subject: [PATCH 1396/1561] net: dsa: tag_rtl8_4: update format description MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document the updated tag layout fields (EFID, VSEL/VIDX) and clarify which bits are set/cleared when emitting tags. Co-developed-by: Alvin Šipraga Signed-off-by: Alvin Šipraga Signed-off-by: Luiz Angelo Daros de Luca Reviewed-by: Linus Walleij Link: https://patch.msgid.link/20260408-realtek_fixes-v1-1-915ff1404d56@gmail.com Signed-off-by: Jakub Kicinski --- net/dsa/tag_rtl8_4.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/net/dsa/tag_rtl8_4.c b/net/dsa/tag_rtl8_4.c index 2464545da4d2..b7ed39c5419f 100644 --- a/net/dsa/tag_rtl8_4.c +++ b/net/dsa/tag_rtl8_4.c @@ -17,8 +17,8 @@ * | (8-bit) | (8-bit) | * | Protocol [0x04] | REASON | b * |-----------------------------------+-----------------------------------| y - * | (1) | (1) | (2) | (1) | (3) | (1) | (1) | (1) | (5) | t - * | FID_EN | X | FID | PRI_EN | PRI | KEEP | X | LEARN_DIS | X | e + * | (1) | (3) | (1) | (3) | (1) | (1) | (1) | (5) | t + * | EFID_EN | EFID | PRI_EN | PRI | KEEP | VSEL | LEARN_DIS | VIDX | e * |-----------------------------------+-----------------------------------| s * | (1) | (15-bit) | | * | ALLOW | TX/RX | v @@ -32,19 +32,22 @@ * EtherType | note that Realtek uses the same EtherType for * | other incompatible tag formats (e.g. tag_rtl4_a.c) * Protocol | 0x04: indicates that this tag conforms to this format - * X | reserved * ------------+------------- * REASON | reason for forwarding packet to CPU * | 0: packet was forwarded or flooded to CPU * | 80: packet was trapped to CPU - * FID_EN | 1: packet has an FID - * | 0: no FID - * FID | FID of packet (if FID_EN=1) + * EFID_EN | 1: packet has an EFID + * | 0: no EFID + * EFID | Extended filter ID (EFID) of packet (if EFID_EN=1) * PRI_EN | 1: force priority of packet * | 0: don't force priority * PRI | priority of packet (if PRI_EN=1) * KEEP | preserve packet VLAN tag format + * VSEL | 0: switch should classify packet according to VLAN tag + * | 1: switch should classify packet according to VLAN membership + * | configuration with index VIDX * LEARN_DIS | don't learn the source MAC address of the packet + * VIDX | index of a VLAN membership configuration to use with VSEL * ALLOW | 1: treat TX/RX field as an allowance port mask, meaning the * | packet may only be forwarded to ports specified in the * | mask @@ -111,7 +114,7 @@ static void rtl8_4_write_tag(struct sk_buff *skb, struct net_device *dev, /* Set Protocol; zero REASON */ tag16[1] = htons(FIELD_PREP(RTL8_4_PROTOCOL, RTL8_4_PROTOCOL_RTL8365MB)); - /* Zero FID_EN, FID, PRI_EN, PRI, KEEP; set LEARN_DIS */ + /* Zero EFID_EN, EFID, PRI_EN, PRI, VSEL, VIDX, KEEP; set LEARN_DIS */ tag16[2] = htons(FIELD_PREP(RTL8_4_LEARN_DIS, 1)); /* Zero ALLOW; set RX (CPU->switch) forwarding port mask */ From 82f37bd9a4d779495479c0c13152208d5400c8a4 Mon Sep 17 00:00:00 2001 From: Luiz Angelo Daros de Luca Date: Wed, 8 Apr 2026 17:31:02 -0300 Subject: [PATCH 1397/1561] net: dsa: tag_rtl8_4: set KEEP flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KEEP=1 is needed because we should respect the format of the packet as the kernel sends it to us. Unless tx forward offloading is used, the kernel is giving us the packet exactly as it should leave the specified port on the wire. Until now this was not needed because the ports were always functioning in a standalone mode in a VLAN-unaware way, so the switch would not tag or untag frames anyway. But arguably it should have been KEEP=1 all along. Co-developed-by: Alvin Šipraga Signed-off-by: Alvin Šipraga Signed-off-by: Luiz Angelo Daros de Luca Reviewed-by: Linus Walleij Link: https://patch.msgid.link/20260408-realtek_fixes-v1-2-915ff1404d56@gmail.com Signed-off-by: Jakub Kicinski --- net/dsa/tag_rtl8_4.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/dsa/tag_rtl8_4.c b/net/dsa/tag_rtl8_4.c index b7ed39c5419f..852c6b88079a 100644 --- a/net/dsa/tag_rtl8_4.c +++ b/net/dsa/tag_rtl8_4.c @@ -99,6 +99,7 @@ #define RTL8_4_REASON_TRAP 80 #define RTL8_4_LEARN_DIS BIT(5) +#define RTL8_4_KEEP BIT(7) #define RTL8_4_TX GENMASK(3, 0) #define RTL8_4_RX GENMASK(10, 0) @@ -114,8 +115,9 @@ static void rtl8_4_write_tag(struct sk_buff *skb, struct net_device *dev, /* Set Protocol; zero REASON */ tag16[1] = htons(FIELD_PREP(RTL8_4_PROTOCOL, RTL8_4_PROTOCOL_RTL8365MB)); - /* Zero EFID_EN, EFID, PRI_EN, PRI, VSEL, VIDX, KEEP; set LEARN_DIS */ - tag16[2] = htons(FIELD_PREP(RTL8_4_LEARN_DIS, 1)); + /* Zero EFID_EN, EFID, PRI_EN, PRI, VSEL, VIDX; set KEEP, LEARN_DIS */ + tag16[2] = htons(FIELD_PREP(RTL8_4_LEARN_DIS, 1) | + FIELD_PREP(RTL8_4_KEEP, 1)); /* Zero ALLOW; set RX (CPU->switch) forwarding port mask */ tag16[3] = htons(FIELD_PREP(RTL8_4_RX, dsa_xmit_port_mask(skb, dev))); From b258cba1e05df758e4e99a0e374da3e044618475 Mon Sep 17 00:00:00 2001 From: Chris J Arges Date: Wed, 8 Apr 2026 14:10:47 -0500 Subject: [PATCH 1398/1561] net: Add net_cookie to Dead loop messages Network devices can have the same name within different network namespaces. To help distinguish these devices, add the net_cookie value which can be used to identify the netns. Signed-off-by: Chris J Arges Link: https://patch.msgid.link/20260408191056.1036330-1-carges@cloudflare.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 5 +++-- net/ipv4/ip_tunnel_core.c | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index e7bc95cbd1fa..ccc4418b3163 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4894,8 +4894,9 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) } else { /* Recursion is detected! It is possible unfortunately. */ recursion_alert: - net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", - dev->name); + net_crit_ratelimited("Dead loop on virtual device %s (net %llu), fix it urgently!\n", + dev->name, dev_net(dev)->net_cookie); + rc = -ENETDOWN; } diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index f430d6f0463e..2667f53482bd 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -60,8 +60,8 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, if (unlikely(dev_recursion_level() > IP_TUNNEL_RECURSION_LIMIT)) { if (dev) { - net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", - dev->name); + net_crit_ratelimited("Dead loop on virtual device %s (net %llu), fix it urgently!\n", + dev->name, dev_net(dev)->net_cookie); DEV_STATS_INC(dev, tx_errors); } ip_rt_put(rt); From 441ec8b5bdcc77f084b63800fd39404d9fc7c9f5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Apr 2026 08:52:37 +0000 Subject: [PATCH 1399/1561] ipvlan: ipvlan_handle_mode_l2() refactoring Reduce indentation level, and add a likely() clause as we expect to process more unicast packets than multicast ones. No functional change, this eases the following patch review. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260409085238.1122947-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ipvlan/ipvlan_core.c | 38 +++++++++++++++----------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 0b493a8aa338..214fd299a5aa 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -744,34 +744,32 @@ out: static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, struct ipvl_port *port) { - struct sk_buff *skb = *pskb; + struct sk_buff *nskb, *skb = *pskb; struct ethhdr *eth = eth_hdr(skb); - rx_handler_result_t ret = RX_HANDLER_PASS; if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) return RX_HANDLER_PASS; - if (is_multicast_ether_addr(eth->h_dest)) { - if (ipvlan_external_frame(skb, port)) { - struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + /* Perform like l3 mode for non-multicast packet */ + if (likely(!is_multicast_ether_addr(eth->h_dest))) + return ipvlan_handle_mode_l3(pskb, port); - /* External frames are queued for device local - * distribution, but a copy is given to master - * straight away to avoid sending duplicates later - * when work-queue processes this frame. This is - * achieved by returning RX_HANDLER_PASS. - */ - if (nskb) { - ipvlan_skb_crossing_ns(nskb, NULL); - ipvlan_multicast_enqueue(port, nskb, false); - } - } - } else { - /* Perform like l3 mode for non-multicast packet */ - ret = ipvlan_handle_mode_l3(pskb, port); + /* External frames are queued for device local + * distribution, but a copy is given to master + * straight away to avoid sending duplicates later + * when work-queue processes this frame. + * This is achieved by returning RX_HANDLER_PASS. + */ + if (!ipvlan_external_frame(skb, port)) + return RX_HANDLER_PASS; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (nskb) { + ipvlan_skb_crossing_ns(nskb, NULL); + ipvlan_multicast_enqueue(port, nskb, false); } - return ret; + return RX_HANDLER_PASS; } rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) From 6dd82499fa6c468237801541589eb83023d9fd46 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Apr 2026 08:52:38 +0000 Subject: [PATCH 1400/1561] ipvlan: avoid spinlock contention in ipvlan_multicast_enqueue() Under high stress, we spend a lot of time cloning skbs, then acquiring a spinlock, then freeing the clone because the queue is full. Add a shortcut to avoid these costs under pressure, as we did in macvlan with commit 0d5dc1d7aad1 ("macvlan: avoid spinlock contention in macvlan_broadcast_enqueue()") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260409085238.1122947-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ipvlan/ipvlan_core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 214fd299a5aa..1be8620ad397 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -763,10 +763,16 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, if (!ipvlan_external_frame(skb, port)) return RX_HANDLER_PASS; - nskb = skb_clone(skb, GFP_ATOMIC); + if (skb_queue_len_lockless(&port->backlog) >= IPVLAN_QBACKLOG_LIMIT) + nskb = NULL; + else + nskb = skb_clone(skb, GFP_ATOMIC); + if (nskb) { ipvlan_skb_crossing_ns(nskb, NULL); ipvlan_multicast_enqueue(port, nskb, false); + } else { + dev_core_stats_rx_dropped_inc(skb->dev); } return RX_HANDLER_PASS; From f5148298b0fe18cc91f07584bd0f75cbace3cece Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Apr 2026 10:11:47 +0000 Subject: [PATCH 1401/1561] tcp: return a drop_reason from tcp_add_backlog() Part of a stack canary removal from tcp_v{4,6}_rcv(). Return a drop_reason instead of a boolean, so that we no longer have to pass the address of a local variable. $ scripts/bloat-o-meter -t vmlinux.old vmlinux.new add/remove: 0/0 grow/shrink: 0/3 up/down: 0/-37 (-37) Function old new delta tcp_v6_rcv 3133 3129 -4 tcp_v4_rcv 3206 3202 -4 tcp_add_backlog 1281 1252 -29 Total: Before=25567186, After=25567149, chg -0.00% Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260409101147.1642967-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 3 +-- net/ipv4/tcp_ipv4.c | 21 +++++++++------------ net/ipv6/tcp_ipv6.c | 3 ++- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 6156d1d068e1..fce4b653c23e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1680,8 +1680,7 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb) __skb_checksum_complete(skb); } -bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, - enum skb_drop_reason *reason); +enum skb_drop_reason tcp_add_backlog(struct sock *sk, struct sk_buff *skb); static inline int tcp_filter(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason *reason) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 69ab236072e7..c9bbbf323648 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1900,8 +1900,7 @@ err_discard: } EXPORT_SYMBOL(tcp_v4_do_rcv); -bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, - enum skb_drop_reason *reason) +enum skb_drop_reason tcp_add_backlog(struct sock *sk, struct sk_buff *skb) { u32 tail_gso_size, tail_gso_segs; struct skb_shared_info *shinfo; @@ -1929,10 +1928,9 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, if (unlikely(tcp_checksum_complete(skb))) { bh_unlock_sock(sk); trace_tcp_bad_csum(skb); - *reason = SKB_DROP_REASON_TCP_CSUM; __TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); - return true; + return SKB_DROP_REASON_TCP_CSUM; } /* Attempt coalescing to last skb in backlog, even if we are @@ -2006,7 +2004,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGCOALESCE); kfree_skb_partial(skb, fragstolen); - return false; + return SKB_NOT_DROPPED_YET; } __skb_push(skb, hdrlen); @@ -2031,15 +2029,13 @@ no_coalesce: if (unlikely(err)) { bh_unlock_sock(sk); if (err == -ENOMEM) { - *reason = SKB_DROP_REASON_PFMEMALLOC; __NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP); - } else { - *reason = SKB_DROP_REASON_SOCKET_BACKLOG; - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP); + return SKB_DROP_REASON_PFMEMALLOC; } - return true; + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP); + return SKB_DROP_REASON_SOCKET_BACKLOG; } - return false; + return SKB_NOT_DROPPED_YET; } static void tcp_v4_restore_cb(struct sk_buff *skb) @@ -2247,7 +2243,8 @@ process: if (!sock_owned_by_user(sk)) { ret = tcp_v4_do_rcv(sk, skb); } else { - if (tcp_add_backlog(sk, skb, &drop_reason)) + drop_reason = tcp_add_backlog(sk, skb); + if (drop_reason) goto discard_and_relse; } bh_unlock_sock(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8dc3874e8b92..1d37826e8480 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1877,7 +1877,8 @@ process: if (!sock_owned_by_user(sk)) { ret = tcp_v6_do_rcv(sk, skb); } else { - if (tcp_add_backlog(sk, skb, &drop_reason)) + drop_reason = tcp_add_backlog(sk, skb); + if (drop_reason) goto discard_and_relse; } bh_unlock_sock(sk); From 0aa72fc37e15974827ceb72c5cf8e57085a29301 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 10 Apr 2026 08:36:00 -0700 Subject: [PATCH 1402/1561] net: fix reference tracker mismanagement in netdev_put_lock() dev_put() releases a reference which didn't have a tracker. References without a tracker are accounted in the tracking code as "no_tracker". We can't free the tracker and then call dev_put(). The references themselves will be fine but the tracking code will think it's a double-release: refcount_t: decrement hit 0; leaking memory. IOW commit under fixes confused dev_put() (release never tracked reference) with __dev_put() (just release the reference, skipping the reference tracking infra). Since __netdev_put_lock() uses dev_put() we can't feed a previously tracked netdev ref into it. Let's flip things around. netdev_put(dev, NULL) is the same as dev_put(dev) so make netdev_put_lock() the real function and have __netdev_put_lock() feed it a NULL tracker for all the cases that were untracked. Fixes: d04686d9bc86 ("net: Implement netdev_nl_queue_create_doit") Acked-by: Daniel Borkmann Link: https://patch.msgid.link/20260410153600.1984522-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/dev.c | 16 +++++----------- net/core/dev.h | 8 +++++++- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index ccc4418b3163..e59f6025067c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1060,16 +1060,18 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id) * This helper is intended for locking net_device after it has been looked up * using a lockless lookup helper. Lock prevents the instance from going away. */ -struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net) +struct net_device * +netdev_put_lock(struct net_device *dev, struct net *net, + netdevice_tracker *tracker) { netdev_lock(dev); if (dev->reg_state > NETREG_REGISTERED || dev->moving_ns || !net_eq(dev_net(dev), net)) { netdev_unlock(dev); - dev_put(dev); + netdev_put(dev, tracker); return NULL; } - dev_put(dev); + netdev_put(dev, tracker); return dev; } @@ -1121,14 +1123,6 @@ netdev_get_by_index_lock_ops_compat(struct net *net, int ifindex) return __netdev_put_lock_ops_compat(dev, net); } -struct net_device * -netdev_put_lock(struct net_device *dev, struct net *net, - netdevice_tracker *tracker) -{ - netdev_tracker_free(dev, tracker); - return __netdev_put_lock(dev, net); -} - struct net_device * netdev_xa_find_lock(struct net *net, struct net_device *dev, unsigned long *index) diff --git a/net/core/dev.h b/net/core/dev.h index 376bac4a82da..628bdaebf0ca 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -31,9 +31,15 @@ struct napi_struct * netdev_napi_by_id_lock(struct net *net, unsigned int napi_id); struct net_device *dev_get_by_napi_id(unsigned int napi_id); -struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net); struct net_device *netdev_put_lock(struct net_device *dev, struct net *net, netdevice_tracker *tracker); + +static inline struct net_device * +__netdev_put_lock(struct net_device *dev, struct net *net) +{ + return netdev_put_lock(dev, net, NULL); +} + struct net_device * netdev_xa_find_lock(struct net *net, struct net_device *dev, unsigned long *index); From e46ff213f7a5f5aaebd6bca589517844aa0fe73a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 9 Apr 2026 18:39:21 -0700 Subject: [PATCH 1403/1561] selftests: net: py: add test case filtering and listing When developing new test cases and reproducing failures in existing ones we currently have to run the entire test which can take minutes to finish. Add command line options for test selection, modeled after kselftest_harness.h: -l list tests (filtered, if filters were specified) -t name include test -T name exclude test Since we don't have as clean separation into fixture / variant / test as kselftest_harness this is not really a 1 to 1 match. We have to lean on glob patterns instead. Like in kselftest_harness filters are evaluated in order, first match wins. If only exclusions are specified everything else is included and vice versa. Glob patterns (*, ?, [) are supported in addition to exact matching. Reviewed-by: Willem de Bruijn Tested-by: Gal Pressman Reviewed-by: Breno Leitao Link: https://patch.msgid.link/20260410013921.1710295-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib/py/ksft.py | 65 +++++++++++++++++++++- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index 7083c99c9444..81287c2daff0 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -1,6 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 +import fnmatch import functools +import getopt import inspect import os import signal @@ -32,6 +34,34 @@ class KsftTerminate(KeyboardInterrupt): pass +class _KsftArgs: + def __init__(self): + self.list_tests = False + self.filters = [] + + try: + opts, _ = getopt.getopt(sys.argv[1:], 'hlt:T:') + except getopt.GetoptError as e: + print(e, file=sys.stderr) + sys.exit(1) + + for opt, val in opts: + if opt == '-h': + print(f"Usage: {sys.argv[0]} [-h|-l] [-t|-T name]\n" + f"\t-h print help\n" + f"\t-l list tests (filtered, if filters were specified)\n" + f"\t-t name include test\n" + f"\t-T name exclude test", + file=sys.stderr) + sys.exit(0) + elif opt == '-l': + self.list_tests = True + elif opt == '-t': + self.filters.append((True, val)) + elif opt == '-T': + self.filters.append((False, val)) + + @functools.lru_cache() def _ksft_supports_color(): if os.environ.get("NO_COLOR") is not None: @@ -298,8 +328,26 @@ def _ksft_intr(signum, frame): ksft_pr(f"Ignoring SIGTERM (cnt: {term_cnt}), already exiting...") -def _ksft_generate_test_cases(cases, globs, case_pfx, args): - """Generate a flat list of (func, args, name) tuples""" +def _ksft_name_matches(name, pattern): + if '*' in pattern or '?' in pattern or '[' in pattern: + return fnmatch.fnmatchcase(name, pattern) + return name == pattern + + +def _ksft_test_enabled(name, filters): + has_positive = False + for include, pattern in filters: + has_positive |= include + if _ksft_name_matches(name, pattern): + return include + return not has_positive + + +def _ksft_generate_test_cases(cases, globs, case_pfx, args, cli_args): + """Generate a filtered list of (func, args, name) tuples. + + If -l is given, prints matching test names and exits. + """ cases = cases or [] test_cases = [] @@ -329,11 +377,22 @@ def _ksft_generate_test_cases(cases, globs, case_pfx, args): else: test_cases.append((func, args, func.__name__)) + if cli_args.filters: + test_cases = [tc for tc in test_cases + if _ksft_test_enabled(tc[2], cli_args.filters)] + + if cli_args.list_tests: + for _, _, name in test_cases: + print(name) + sys.exit(0) + return test_cases def ksft_run(cases=None, globs=None, case_pfx=None, args=()): - test_cases = _ksft_generate_test_cases(cases, globs, case_pfx, args) + cli_args = _KsftArgs() + test_cases = _ksft_generate_test_cases(cases, globs, case_pfx, args, + cli_args) global term_cnt term_cnt = 0 From 59818773bab657f629a60ca534d198b85944417f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 10 Apr 2026 15:06:02 +0200 Subject: [PATCH 1404/1561] net: Rename ifq_idx to rxq_idx in netif_mp_* helpers Rename the leftover ifq_idx parameter naming to rxq_idx to be consistent with the rest of the file and the header declaration. Back then this was taken out of the queue leasing series given the cleanup is independent. No functional change. Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/netdev/20260131160237.07789674@kernel.org Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260410130602.552600-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- net/core/netdev_rx_queue.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index 8771e06a0afe..de4dac4c88b3 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -275,14 +275,14 @@ int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, return ret; } -static void __netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, +static void __netif_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, const struct pp_memory_provider_params *old_p) { struct netdev_queue_config qcfg[2]; struct netdev_rx_queue *rxq; int err; - rxq = __netif_get_rx_queue(dev, ifq_idx); + rxq = __netif_get_rx_queue(dev, rxq_idx); /* Callers holding a netdev ref may get here after we already * went thru shutdown via dev_memory_provider_uninstall(). @@ -295,28 +295,28 @@ static void __netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, rxq->mp_params.mp_priv != old_p->mp_priv)) return; - netdev_queue_config(dev, ifq_idx, &qcfg[0]); + netdev_queue_config(dev, rxq_idx, &qcfg[0]); memset(&rxq->mp_params, 0, sizeof(rxq->mp_params)); - netdev_queue_config(dev, ifq_idx, &qcfg[1]); + netdev_queue_config(dev, rxq_idx, &qcfg[1]); - err = netdev_rx_queue_reconfig(dev, ifq_idx, &qcfg[0], &qcfg[1]); + err = netdev_rx_queue_reconfig(dev, rxq_idx, &qcfg[0], &qcfg[1]); WARN_ON(err && err != -ENETDOWN); } -void netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, +void netif_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, const struct pp_memory_provider_params *old_p) { - if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues)) + if (WARN_ON_ONCE(rxq_idx >= dev->real_num_rx_queues)) return; - if (!netif_rxq_is_leased(dev, ifq_idx)) - return __netif_mp_close_rxq(dev, ifq_idx, old_p); + if (!netif_rxq_is_leased(dev, rxq_idx)) + return __netif_mp_close_rxq(dev, rxq_idx, old_p); - if (!__netif_get_rx_queue_lease(&dev, &ifq_idx, NETIF_VIRT_TO_PHYS)) { + if (!__netif_get_rx_queue_lease(&dev, &rxq_idx, NETIF_VIRT_TO_PHYS)) { WARN_ON_ONCE(1); return; } netdev_lock(dev); - __netif_mp_close_rxq(dev, ifq_idx, old_p); + __netif_mp_close_rxq(dev, rxq_idx, old_p); netdev_unlock(dev); } @@ -339,11 +339,11 @@ void netif_rxq_cleanup_unlease(struct netdev_rx_queue *phys_rxq, struct netdev_rx_queue *virt_rxq) { struct pp_memory_provider_params *p = &phys_rxq->mp_params; - unsigned int ifq_idx = get_netdev_rx_queue_index(phys_rxq); + unsigned int rxq_idx = get_netdev_rx_queue_index(phys_rxq); if (!p->mp_ops) return; __netif_mp_uninstall_rxq(virt_rxq, p); - __netif_mp_close_rxq(phys_rxq->dev, ifq_idx, p); + __netif_mp_close_rxq(phys_rxq->dev, rxq_idx, p); } From 29703d7813f991e4ef80741ee15fe30e529a2192 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 10 Apr 2026 17:49:50 +0000 Subject: [PATCH 1405/1561] tcp: add indirect call wrapper in tcp_conn_request() Small improvement in SYN processing, to directly call tcp_v6_init_seq_and_ts_off() or tcp_v4_init_seq_and_ts_off(). Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260410174950.745670-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 6 ++++++ net/ipv4/tcp_input.c | 5 ++++- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index fce4b653c23e..23ce0329b230 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -3084,4 +3084,10 @@ static inline int tcp_recv_should_stop(struct sock *sk) signal_pending(current); } +INDIRECT_CALLABLE_DECLARE(union tcp_seq_and_ts_off + tcp_v4_init_seq_and_ts_off(const struct net *net, + const struct sk_buff *skb)); +INDIRECT_CALLABLE_DECLARE(union tcp_seq_and_ts_off + tcp_v6_init_seq_and_ts_off(const struct net *net, + const struct sk_buff *skb)); #endif /* _TCP_H */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7171442c3ed7..021f745747c5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -7658,7 +7658,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop_and_free; if (tmp_opt.tstamp_ok || (!want_cookie && !isn)) - st = af_ops->init_seq_and_ts_off(net, skb); + st = INDIRECT_CALL_INET(af_ops->init_seq_and_ts_off, + tcp_v6_init_seq_and_ts_off, + tcp_v4_init_seq_and_ts_off, + net, skb); if (tmp_opt.tstamp_ok) { tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c9bbbf323648..6813b03515a2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -105,7 +105,7 @@ static DEFINE_PER_CPU(struct sock_bh_locked, ipv4_tcp_sk) = { static DEFINE_MUTEX(tcp_exit_batch_mutex); -static union tcp_seq_and_ts_off +INDIRECT_CALLABLE_SCOPE union tcp_seq_and_ts_off tcp_v4_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb) { return secure_tcp_seq_and_ts_off(net, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1d37826e8480..0cfde020d1f0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -105,7 +105,7 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) } } -static union tcp_seq_and_ts_off +INDIRECT_CALLABLE_SCOPE union tcp_seq_and_ts_off tcp_v6_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb) { return secure_tcpv6_seq_and_ts_off(net, From f462dca0c8415bf0058d0ffa476354c4476d0f09 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Fri, 10 Apr 2026 07:16:27 -0400 Subject: [PATCH 1406/1561] net/sched: act_ct: Only release RCU read lock after ct_ft When looking up a flow table in act_ct in tcf_ct_flow_table_get(), rhashtable_lookup_fast() internally opens and closes an RCU read critical section before returning ct_ft. The tcf_ct_flow_table_cleanup_work() can complete before refcount_inc_not_zero() is invoked on the returned ct_ft resulting in a UAF on the already freed ct_ft object. This vulnerability can lead to privilege escalation. Analysis from zdi-disclosures@trendmicro.com: When initializing act_ct, tcf_ct_init() is called, which internally triggers tcf_ct_flow_table_get(). static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params) { struct zones_ht_key key = { .net = net, .zone = params->zone }; struct tcf_ct_flow_table *ct_ft; int err = -ENOMEM; mutex_lock(&zones_mutex); ct_ft = rhashtable_lookup_fast(&zones_ht, &key, zones_params); // [1] if (ct_ft && refcount_inc_not_zero(&ct_ft->ref)) // [2] goto out_unlock; ... } static __always_inline void *rhashtable_lookup_fast( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { void *obj; rcu_read_lock(); obj = rhashtable_lookup(ht, key, params); rcu_read_unlock(); return obj; } At [1], rhashtable_lookup_fast() looks up and returns the corresponding ct_ft from zones_ht . The lookup is performed within an RCU read critical section through rcu_read_lock() / rcu_read_unlock(), which prevents the object from being freed. However, at the point of function return, rcu_read_unlock() has already been called, and there is nothing preventing ct_ft from being freed before reaching refcount_inc_not_zero(&ct_ft->ref) at [2]. This interval becomes the race window, during which ct_ft can be freed. Free Process: tcf_ct_flow_table_put() is executed through the path tcf_ct_cleanup() call_rcu() tcf_ct_params_free_rcu() tcf_ct_params_free() tcf_ct_flow_table_put(). static void tcf_ct_flow_table_put(struct tcf_ct_flow_table *ct_ft) { if (refcount_dec_and_test(&ct_ft->ref)) { rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params); INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work); // [3] queue_rcu_work(act_ct_wq, &ct_ft->rwork); } } At [3], tcf_ct_flow_table_cleanup_work() is scheduled as RCU work static void tcf_ct_flow_table_cleanup_work(struct work_struct *work) { struct tcf_ct_flow_table *ct_ft; struct flow_block *block; ct_ft = container_of(to_rcu_work(work), struct tcf_ct_flow_table, rwork); nf_flow_table_free(&ct_ft->nf_ft); block = &ct_ft->nf_ft.flow_block; down_write(&ct_ft->nf_ft.flow_block_lock); WARN_ON(!list_empty(&block->cb_list)); up_write(&ct_ft->nf_ft.flow_block_lock); kfree(ct_ft); // [4] module_put(THIS_MODULE); } tcf_ct_flow_table_cleanup_work() frees ct_ft at [4]. When this function executes between [1] and [2], UAF occurs. This race condition has a very short race window, making it generally difficult to trigger. Therefore, to trigger the vulnerability an msleep(100) was inserted after[1] Fixes: 138470a9b2cc2 ("net/sched: act_ct: fix lockdep splat in tcf_ct_flow_table_get") Reported-by: zdi-disclosures@trendmicro.com Tested-by: Victor Nogueira Signed-off-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260410111627.46611-1-jhs@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/act_ct.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 7d5e50c921a0..6158e13c98d3 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -328,9 +328,13 @@ static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params) int err = -ENOMEM; mutex_lock(&zones_mutex); - ct_ft = rhashtable_lookup_fast(&zones_ht, &key, zones_params); - if (ct_ft && refcount_inc_not_zero(&ct_ft->ref)) + rcu_read_lock(); + ct_ft = rhashtable_lookup(&zones_ht, &key, zones_params); + if (ct_ft && refcount_inc_not_zero(&ct_ft->ref)) { + rcu_read_unlock(); goto out_unlock; + } + rcu_read_unlock(); ct_ft = kzalloc_obj(*ct_ft); if (!ct_ft) From 61119542663cac70898aef532eb57ee41ea9b477 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 10 Apr 2026 00:45:02 +0200 Subject: [PATCH 1407/1561] selftests: netfilter: nft_tproxy.sh: adjust to socat changes Like e65d8b6f3092 ("selftests: drv-net: adjust to socat changes") we need to add shut-none for this test too. The extra 0-packet can trigger a second (unexpected) reply from the server. Fixes: 7e37e0eacd22 ("selftests: netfilter: nft_tproxy.sh: add tcp tests") Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20260408152432.24b8ad0d@kernel.org/ Suggested-by: Jakub Kicinski Signed-off-by: Florian Westphal Link: https://patch.msgid.link/20260409224506.27072-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- .../selftests/net/netfilter/nft_tproxy_udp.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh b/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh index d16de13fe5a7..1dc7b0450145 100755 --- a/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh +++ b/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh @@ -190,13 +190,13 @@ table inet filter { } EOF - timeout "$timeout" ip netns exec "$nsrouter" socat -u "$socat_ipproto" udp-listen:12345,fork,ip-transparent,reuseport udp:"$ns1_ip_port",ip-transparent,reuseport,bind="$ns2_ip_port" 2>/dev/null & + timeout "$timeout" ip netns exec "$nsrouter" socat -u "$socat_ipproto" udp-listen:12345,fork,ip-transparent,reuseport,shut-none udp:"$ns1_ip_port",ip-transparent,reuseport,bind="$ns2_ip_port",shut-none 2>/dev/null & local tproxy_pid=$! - timeout "$timeout" ip netns exec "$ns2" socat "$socat_ipproto" udp-listen:8080,fork SYSTEM:"echo PONG_NS2" 2>/dev/null & + timeout "$timeout" ip netns exec "$ns2" socat "$socat_ipproto" udp-listen:8080,fork,shut-none SYSTEM:"echo PONG_NS2" 2>/dev/null & local server2_pid=$! - timeout "$timeout" ip netns exec "$ns3" socat "$socat_ipproto" udp-listen:8080,fork SYSTEM:"echo PONG_NS3" 2>/dev/null & + timeout "$timeout" ip netns exec "$ns3" socat "$socat_ipproto" udp-listen:8080,fork,shut-none SYSTEM:"echo PONG_NS3" 2>/dev/null & local server3_pid=$! busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" 12345 "-u" @@ -205,7 +205,7 @@ EOF local result # request from ns1 to ns2 (forwarded traffic) - result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port",sourceport=18888) + result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port",sourceport=18888,shut-none) if [ "$result" == "$expect_ns1_ns2" ] ;then echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2" else @@ -214,7 +214,7 @@ EOF fi # request from ns1 to ns3 (forwarded traffic) - result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port") + result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port",shut-none) if [ "$result" = "$expect_ns1_ns3" ] ;then echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3" else @@ -223,7 +223,7 @@ EOF fi # request from nsrouter to ns2 (localy originated traffic) - result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port") + result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port",shut-none) if [ "$result" == "$expect_nsrouter_ns2" ] ;then echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2" else @@ -232,7 +232,7 @@ EOF fi # request from nsrouter to ns3 (localy originated traffic) - result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port") + result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port",shut-none) if [ "$result" = "$expect_nsrouter_ns3" ] ;then echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3" else From 469faa546e7a82be85114e322cec6438790870ff Mon Sep 17 00:00:00 2001 From: Thangaraj Samynathan Date: Fri, 10 Apr 2026 14:27:10 +0530 Subject: [PATCH 1408/1561] net: lan743x: rename chip_rev to fpga_rev The variable chip_rev stores the value read from the FPGA_REV register and represents the FPGA revision. Rename it to fpga_rev to better reflect its meaning. No functional change intended. Signed-off-by: Thangaraj Samynathan Link: https://patch.msgid.link/20260410085710.9246-1-thangaraj.s@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan743x_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index b4cabde6625a..f3332417162e 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -36,7 +36,7 @@ static bool pci11x1x_is_a0(struct lan743x_adapter *adapter) static void pci11x1x_strap_get_status(struct lan743x_adapter *adapter) { - u32 chip_rev; + u32 fpga_rev; u32 cfg_load; u32 hw_cfg; u32 strap; @@ -63,9 +63,9 @@ static void pci11x1x_strap_get_status(struct lan743x_adapter *adapter) else adapter->is_sgmii_en = false; } else { - chip_rev = lan743x_csr_read(adapter, FPGA_REV); - if (chip_rev) { - if (chip_rev & FPGA_SGMII_OP) + fpga_rev = lan743x_csr_read(adapter, FPGA_REV); + if (fpga_rev) { + if (fpga_rev & FPGA_SGMII_OP) adapter->is_sgmii_en = true; else adapter->is_sgmii_en = false; From e530b484b70552d6222e2327e311f364724ce616 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 10 Apr 2026 09:23:34 +0200 Subject: [PATCH 1409/1561] netkit: Don't emit scrub attribute for single device mode When userspace reads a single mode netkit device via RTM_GETLINK, it receives IFLA_NETKIT_SCRUB=NETKIT_SCRUB_DEFAULT attribute from netkit_fill_info(). If that attribute is echoed back to recreate the device, the seen_scrub presence check in netkit_new_link() causes creation to fail with -EOPNOTSUPP. Since it has no meaning for single devices at this point, just don't dump it. Fixes: 481038960538 ("netkit: Add single device mode for netkit") Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260410072334.548232-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- drivers/net/netkit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index 5619209329d5..7b56a7ad7a49 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -1214,7 +1214,8 @@ static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev) return -EMSGSIZE; if (nla_put_u32(skb, IFLA_NETKIT_MODE, nk->mode)) return -EMSGSIZE; - if (nla_put_u32(skb, IFLA_NETKIT_SCRUB, nk->scrub)) + if (nk->pair == NETKIT_DEVICE_PAIR && + nla_put_u32(skb, IFLA_NETKIT_SCRUB, nk->scrub)) return -EMSGSIZE; if (nla_put_u16(skb, IFLA_NETKIT_HEADROOM, dev->needed_headroom)) return -EMSGSIZE; From 5758be283ff8b37beed49e270b908a251d5ca2d7 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Fri, 10 Apr 2026 11:47:32 +0800 Subject: [PATCH 1410/1561] net: skb: clean up dead code after skb_kfree_head() simplification Since commit 0f42e3f4fe2a ("net: skb: fix cross-cache free of KFENCE-allocated skb head"), skb_kfree_head() always calls kfree() and no longer uses end_offset to distinguish between skb_small_head_cache and generic kmalloc caches. Clean up the leftovers: - Remove the unused end_offset parameter from skb_kfree_head() and update all callers. - Remove the SKB_SMALL_HEAD_HEADROOM guard in __skb_unclone_keeptruesize() which was protecting the old skb_kfree_head() logic. - Update the SKB_SMALL_HEAD_CACHE_SIZE comment to reflect that the non-power-of-2 sizing is no longer used for free-path disambiguation. No functional change. Signed-off-by: Jiayuan Chen Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260410034736.297900-1-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 33 +++++++++------------------------ 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e50f2d4867c1..c5441154795c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -106,10 +106,9 @@ static struct kmem_cache *skbuff_ext_cache __ro_after_init; #define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(max(MAX_TCP_HEADER, \ GRO_MAX_HEAD_PAD)) -/* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two. - * This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique - * size, and we can differentiate heads from skb_small_head_cache - * vs system slabs by looking at their size (skb_end_offset()). +/* SKB_SMALL_HEAD_CACHE_SIZE is the size used for the skbuff_small_head + * kmem_cache. The non-power-of-2 padding is kept for historical reasons and + * to avoid potential collisions with generic kmalloc bucket sizes. */ #define SKB_SMALL_HEAD_CACHE_SIZE \ (is_power_of_2(SKB_SMALL_HEAD_SIZE) ? \ @@ -1071,7 +1070,7 @@ static int skb_pp_frag_ref(struct sk_buff *skb) return 0; } -static void skb_kfree_head(void *head, unsigned int end_offset) +static void skb_kfree_head(void *head) { kfree(head); } @@ -1085,7 +1084,7 @@ static void skb_free_head(struct sk_buff *skb) return; skb_free_frag(head); } else { - skb_kfree_head(head, skb_end_offset(skb)); + skb_kfree_head(head); } } @@ -2361,7 +2360,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, return 0; nofrags: - skb_kfree_head(data, size); + skb_kfree_head(data); nodata: return -ENOMEM; } @@ -2407,20 +2406,6 @@ int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) if (likely(skb_end_offset(skb) == saved_end_offset)) return 0; - /* We can not change skb->end if the original or new value - * is SKB_SMALL_HEAD_HEADROOM, as it might break skb_kfree_head(). - */ - if (saved_end_offset == SKB_SMALL_HEAD_HEADROOM || - skb_end_offset(skb) == SKB_SMALL_HEAD_HEADROOM) { - /* We think this path should not be taken. - * Add a temporary trace to warn us just in case. - */ - pr_err_once("__skb_unclone_keeptruesize() skb_end_offset() %u -> %u\n", - saved_end_offset, skb_end_offset(skb)); - WARN_ON_ONCE(1); - return 0; - } - shinfo = skb_shinfo(skb); /* We are about to change back skb->end, @@ -6815,7 +6800,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, if (skb_cloned(skb)) { /* drop the old head gracefully */ if (skb_orphan_frags(skb, gfp_mask)) { - skb_kfree_head(data, size); + skb_kfree_head(data); return -ENOMEM; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) @@ -6922,7 +6907,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, memcpy((struct skb_shared_info *)(data + size), skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0])); if (skb_orphan_frags(skb, gfp_mask)) { - skb_kfree_head(data, size); + skb_kfree_head(data); return -ENOMEM; } shinfo = (struct skb_shared_info *)(data + size); @@ -6958,7 +6943,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, /* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */ if (skb_has_frag_list(skb)) kfree_skb_list(skb_shinfo(skb)->frag_list); - skb_kfree_head(data, size); + skb_kfree_head(data); return -ENOMEM; } skb_release_data(skb, SKB_CONSUMED); From 006679268a2942f897a1d601779867a8dcbb8ed0 Mon Sep 17 00:00:00 2001 From: Luigi Leonardi Date: Wed, 8 Apr 2026 17:21:02 +0200 Subject: [PATCH 1411/1561] vsock/virtio: remove unnecessary call to `virtio_transport_get_ops` `virtio_transport_send_pkt_info` gets all the transport information from the parameter `t_ops`. There is no need to call `virtio_transport_get_ops()`. Remove it. Acked-by: Arseniy Krasnov Acked-by: Michael S. Tsirkin Signed-off-by: Luigi Leonardi Reviewed-by: Stefano Garzarella Link: https://patch.msgid.link/20260408-remove_parameter-v2-1-e00f31cf7a17@redhat.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/virtio_transport_common.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 8a9fb23c6e85..a152a9e208d0 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -60,8 +60,6 @@ static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops, return false; /* Check that transport can send data in zerocopy mode. */ - t_ops = virtio_transport_get_ops(info->vsk); - if (t_ops->can_msgzerocopy) { int pages_to_send = iov_iter_npages(iov_iter, MAX_SKB_FRAGS); From 82db77f6fb16d23ea60d0f96dcf2b502a322a28f Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Wed, 8 Apr 2026 16:05:50 -0700 Subject: [PATCH 1412/1561] net: tso: Introduce tso_dma_map and helpers Add struct tso_dma_map to tso.h for tracking DMA addresses of mapped GSO payload data and tso_dma_map_completion_state. The tso_dma_map combines DMA mapping storage with iterator state, allowing drivers to walk pre-mapped DMA regions linearly. Includes fields for the DMA IOVA path (iova_state, iova_offset, total_len) and a fallback per-region path (linear_dma, frags[], frag_idx, offset). The tso_dma_map_completion_state makes the IOVA completion state opaque for drivers. Drivers are expected to allocate this and use the added helpers to update the completion state. Adds skb_frag_phys() to skbuff.h, returning the physical address of a paged fragment's data, which is used by the tso_dma_map helpers introduced in this commit described below. The added TSO DMA map helpers are: tso_dma_map_init(): DMA-maps the linear payload region and all frags upfront. Prefers the DMA IOVA API for a single contiguous mapping with one IOTLB sync; falls back to per-region dma_map_phys() otherwise. Returns 0 on success, cleans up partial mappings on failure. tso_dma_map_cleanup(): Handles both IOVA and fallback teardown paths. tso_dma_map_count(): counts how many descriptors the next N bytes of payload will need. Returns 1 if IOVA is used since the mapping is contiguous. tso_dma_map_next(): yields the next (dma_addr, chunk_len) pair. On the IOVA path, each segment is a single contiguous chunk. On the fallback path, indicates when a chunk starts a new DMA mapping so the driver can set dma_unmap_len on that descriptor for completion-time unmapping. tso_dma_map_completion_save(): updates the completion state. Drivers will call this at xmit time. tso_dma_map_complete(): tears down the mapping at completion time and returns true if the IOVA path was used. If it was not used, this is a no-op and returns false. Suggested-by: Jakub Kicinski Signed-off-by: Joe Damato Link: https://patch.msgid.link/20260408230607.2019402-2-joe@dama.to Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 11 ++ include/net/tso.h | 100 +++++++++++++++ net/core/tso.c | 269 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 380 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 26fe18bcfad8..2bcf78a4de7b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3763,6 +3763,17 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag) return ptr + skb_frag_off(frag); } +/** + * skb_frag_phys - gets the physical address of the data in a paged fragment + * @frag: the paged fragment buffer + * + * Returns: the physical address of the data within @frag. + */ +static inline phys_addr_t skb_frag_phys(const skb_frag_t *frag) +{ + return page_to_phys(skb_frag_page(frag)) + skb_frag_off(frag); +} + /** * skb_frag_page_copy() - sets the page in a fragment from another fragment * @fragto: skb fragment where page is set diff --git a/include/net/tso.h b/include/net/tso.h index e7e157ae0526..da82aabd1d48 100644 --- a/include/net/tso.h +++ b/include/net/tso.h @@ -3,6 +3,7 @@ #define _TSO_H #include +#include #include #define TSO_HEADER_SIZE 256 @@ -28,4 +29,103 @@ void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso, void tso_build_data(const struct sk_buff *skb, struct tso_t *tso, int size); int tso_start(struct sk_buff *skb, struct tso_t *tso); +/** + * struct tso_dma_map - DMA mapping state for GSO payload + * @dev: device used for DMA mapping + * @skb: the GSO skb being mapped + * @hdr_len: per-segment header length + * @iova_state: DMA IOVA state (when IOMMU available) + * @iova_offset: global byte offset into IOVA range (IOVA path only) + * @total_len: total payload length + * @frag_idx: current region (-1 = linear, 0..nr_frags-1 = frag) + * @offset: byte offset within current region + * @linear_dma: DMA address of the linear payload + * @linear_len: length of the linear payload + * @nr_frags: number of frags successfully DMA-mapped + * @frags: per-frag DMA address and length + * + * DMA-maps the payload regions of a GSO skb (linear data + frags). + * Prefers the DMA IOVA API for a single contiguous mapping with one + * IOTLB sync; falls back to per-region dma_map_phys() otherwise. + */ +struct tso_dma_map { + struct device *dev; + const struct sk_buff *skb; + unsigned int hdr_len; + /* IOVA path */ + struct dma_iova_state iova_state; + size_t iova_offset; + size_t total_len; + /* Fallback path if IOVA path fails */ + int frag_idx; + unsigned int offset; + dma_addr_t linear_dma; + unsigned int linear_len; + unsigned int nr_frags; + struct { + dma_addr_t dma; + unsigned int len; + } frags[MAX_SKB_FRAGS]; +}; + +/** + * struct tso_dma_map_completion_state - Completion-time cleanup state + * @iova_state: DMA IOVA state (when IOMMU available) + * @total_len: total payload length of the IOVA mapping + * + * Drivers store this on their SW ring at xmit time via + * tso_dma_map_completion_save(), then call tso_dma_map_complete() at + * completion time. + */ +struct tso_dma_map_completion_state { + struct dma_iova_state iova_state; + size_t total_len; +}; + +int tso_dma_map_init(struct tso_dma_map *map, struct device *dev, + const struct sk_buff *skb, unsigned int hdr_len); +void tso_dma_map_cleanup(struct tso_dma_map *map); +unsigned int tso_dma_map_count(struct tso_dma_map *map, unsigned int len); +bool tso_dma_map_next(struct tso_dma_map *map, dma_addr_t *addr, + unsigned int *chunk_len, unsigned int *mapping_len, + unsigned int seg_remaining); + +/** + * tso_dma_map_completion_save - save state needed for completion-time cleanup + * @map: the xmit-time DMA map + * @cstate: driver-owned storage that persists until completion + * + * Should be called at xmit time to update the completion state and later passed + * to tso_dma_map_complete(). + */ +static inline void +tso_dma_map_completion_save(const struct tso_dma_map *map, + struct tso_dma_map_completion_state *cstate) +{ + cstate->iova_state = map->iova_state; + cstate->total_len = map->total_len; +} + +/** + * tso_dma_map_complete - tear down mapping at completion time + * @dev: the device that owns the mapping + * @cstate: state saved by tso_dma_map_completion_save() + * + * Return: true if the IOVA path was used and the mapping has been + * destroyed; false if the fallback per-region path was used and the + * driver must unmap via its normal completion path. + */ +static inline bool +tso_dma_map_complete(struct device *dev, + struct tso_dma_map_completion_state *cstate) +{ + if (dma_use_iova(&cstate->iova_state)) { + dma_iova_destroy(dev, &cstate->iova_state, cstate->total_len, + DMA_TO_DEVICE, 0); + return true; + } + + return false; +} + #endif /* _TSO_H */ diff --git a/net/core/tso.c b/net/core/tso.c index 6df997b9076e..347b3856ddb9 100644 --- a/net/core/tso.c +++ b/net/core/tso.c @@ -3,6 +3,7 @@ #include #include #include +#include #include void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso, @@ -87,3 +88,271 @@ int tso_start(struct sk_buff *skb, struct tso_t *tso) return hdr_len; } EXPORT_SYMBOL(tso_start); + +static int tso_dma_iova_try(struct device *dev, struct tso_dma_map *map, + phys_addr_t phys, size_t linear_len, + size_t total_len, size_t *offset) +{ + const struct sk_buff *skb; + unsigned int nr_frags; + int i; + + if (!dma_iova_try_alloc(dev, &map->iova_state, phys, total_len)) + return 1; + + skb = map->skb; + nr_frags = skb_shinfo(skb)->nr_frags; + + if (linear_len) { + if (dma_iova_link(dev, &map->iova_state, + phys, *offset, linear_len, + DMA_TO_DEVICE, 0)) + goto iova_fail; + map->linear_len = linear_len; + *offset += linear_len; + } + + for (i = 0; i < nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + unsigned int frag_len = skb_frag_size(frag); + + if (dma_iova_link(dev, &map->iova_state, + skb_frag_phys(frag), *offset, + frag_len, DMA_TO_DEVICE, 0)) { + map->nr_frags = i; + goto iova_fail; + } + map->frags[i].len = frag_len; + *offset += frag_len; + map->nr_frags = i + 1; + } + + if (dma_iova_sync(dev, &map->iova_state, 0, total_len)) + goto iova_fail; + + return 0; + +iova_fail: + dma_iova_destroy(dev, &map->iova_state, *offset, + DMA_TO_DEVICE, 0); + memset(&map->iova_state, 0, sizeof(map->iova_state)); + + /* reset map state */ + map->frag_idx = -1; + map->offset = 0; + map->linear_len = 0; + map->nr_frags = 0; + + return 1; +} + +/** + * tso_dma_map_init - DMA-map GSO payload regions + * @map: map struct to initialize + * @dev: device for DMA mapping + * @skb: the GSO skb + * @hdr_len: per-segment header length in bytes + * + * DMA-maps the linear payload (after headers) and all frags. + * Prefers the DMA IOVA API (one contiguous mapping, one IOTLB sync); + * falls back to per-region dma_map_phys() when IOVA is not available. + * Positions the iterator at byte 0 of the payload. + * + * Return: 0 on success, -ENOMEM on DMA mapping failure (partial mappings + * are cleaned up internally). + */ +int tso_dma_map_init(struct tso_dma_map *map, struct device *dev, + const struct sk_buff *skb, unsigned int hdr_len) +{ + unsigned int linear_len = skb_headlen(skb) - hdr_len; + unsigned int nr_frags = skb_shinfo(skb)->nr_frags; + size_t total_len = skb->len - hdr_len; + size_t offset = 0; + phys_addr_t phys; + int i; + + map->dev = dev; + map->skb = skb; + map->hdr_len = hdr_len; + map->frag_idx = -1; + map->offset = 0; + map->iova_offset = 0; + map->total_len = total_len; + map->linear_len = 0; + map->nr_frags = 0; + memset(&map->iova_state, 0, sizeof(map->iova_state)); + + if (!total_len) + return 0; + + if (linear_len) + phys = virt_to_phys(skb->data + hdr_len); + else + phys = skb_frag_phys(&skb_shinfo(skb)->frags[0]); + + if (tso_dma_iova_try(dev, map, phys, linear_len, total_len, &offset)) { + /* IOVA path failed, map state was reset. Fallback to + * per-region dma_map_phys() + */ + if (linear_len) { + map->linear_dma = dma_map_phys(dev, phys, linear_len, + DMA_TO_DEVICE, 0); + if (dma_mapping_error(dev, map->linear_dma)) + return -ENOMEM; + map->linear_len = linear_len; + } + + for (i = 0; i < nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + unsigned int frag_len = skb_frag_size(frag); + + map->frags[i].len = frag_len; + map->frags[i].dma = dma_map_phys(dev, skb_frag_phys(frag), + frag_len, DMA_TO_DEVICE, 0); + if (dma_mapping_error(dev, map->frags[i].dma)) { + tso_dma_map_cleanup(map); + return -ENOMEM; + } + map->nr_frags = i + 1; + } + } + + if (linear_len == 0 && nr_frags > 0) + map->frag_idx = 0; + + return 0; +} +EXPORT_SYMBOL(tso_dma_map_init); + +/** + * tso_dma_map_cleanup - unmap all DMA regions in a tso_dma_map + * @map: the map to clean up + * + * Handles both IOVA and fallback paths. For IOVA, calls + * dma_iova_destroy(). For fallback, unmaps each region individually. + */ +void tso_dma_map_cleanup(struct tso_dma_map *map) +{ + int i; + + if (dma_use_iova(&map->iova_state)) { + dma_iova_destroy(map->dev, &map->iova_state, map->total_len, + DMA_TO_DEVICE, 0); + memset(&map->iova_state, 0, sizeof(map->iova_state)); + } else { + if (map->linear_len) + dma_unmap_phys(map->dev, map->linear_dma, + map->linear_len, DMA_TO_DEVICE, 0); + + for (i = 0; i < map->nr_frags; i++) + dma_unmap_phys(map->dev, map->frags[i].dma, + map->frags[i].len, DMA_TO_DEVICE, 0); + } + + map->linear_len = 0; + map->nr_frags = 0; +} +EXPORT_SYMBOL(tso_dma_map_cleanup); + +/** + * tso_dma_map_count - count descriptors for a payload range + * @map: the payload map + * @len: number of payload bytes in this segment + * + * Counts how many contiguous DMA region chunks the next @len bytes + * will span, without advancing the iterator. On the IOVA path this + * is always 1 (contiguous). On the fallback path, uses region sizes + * from the current position. + * + * Return: the number of descriptors needed for @len bytes of payload. + */ +unsigned int tso_dma_map_count(struct tso_dma_map *map, unsigned int len) +{ + unsigned int offset = map->offset; + int idx = map->frag_idx; + unsigned int count = 0; + + if (!len) + return 0; + + if (dma_use_iova(&map->iova_state)) + return 1; + + while (len > 0) { + unsigned int region_len, chunk; + + if (idx == -1) + region_len = map->linear_len; + else + region_len = map->frags[idx].len; + + chunk = min(len, region_len - offset); + len -= chunk; + count++; + offset = 0; + idx++; + } + + return count; +} +EXPORT_SYMBOL(tso_dma_map_count); + +/** + * tso_dma_map_next - yield the next DMA address range + * @map: the payload map + * @addr: output DMA address + * @chunk_len: output chunk length + * @mapping_len: full DMA mapping length when this chunk starts a new + * mapping region, or 0 when continuing a previous one. + * On the IOVA path this is always 0 (driver must not + * do per-region unmaps; use tso_dma_map_cleanup instead). + * @seg_remaining: bytes left in current segment + * + * Yields the next (dma_addr, chunk_len) pair and advances the iterator. + * On the IOVA path, the entire payload is contiguous so each segment + * is always a single chunk. + * + * Return: true if a chunk was yielded, false when @seg_remaining is 0. + */ +bool tso_dma_map_next(struct tso_dma_map *map, dma_addr_t *addr, + unsigned int *chunk_len, unsigned int *mapping_len, + unsigned int seg_remaining) +{ + unsigned int region_len, chunk; + + if (!seg_remaining) + return false; + + /* IOVA path: contiguous DMA range, no region boundaries */ + if (dma_use_iova(&map->iova_state)) { + *addr = map->iova_state.addr + map->iova_offset; + *chunk_len = seg_remaining; + *mapping_len = 0; + map->iova_offset += seg_remaining; + return true; + } + + /* Fallback path: per-region iteration */ + + if (map->frag_idx == -1) { + region_len = map->linear_len; + chunk = min(seg_remaining, region_len - map->offset); + *addr = map->linear_dma + map->offset; + } else { + region_len = map->frags[map->frag_idx].len; + chunk = min(seg_remaining, region_len - map->offset); + *addr = map->frags[map->frag_idx].dma + map->offset; + } + + *mapping_len = (map->offset == 0) ? region_len : 0; + *chunk_len = chunk; + map->offset += chunk; + + if (map->offset >= region_len) { + map->frag_idx++; + map->offset = 0; + } + + return true; +} +EXPORT_SYMBOL(tso_dma_map_next); From 268c63f2c6b23c80f3c903642f300a8e37ab3aa3 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Wed, 8 Apr 2026 16:05:51 -0700 Subject: [PATCH 1413/1561] net: bnxt: Export bnxt_xmit_get_cfa_action Export bnxt_xmit_get_cfa_action so that it can be used in future commits which add software USO support to bnxt. Suggested-by: Jakub Kicinski Reviewed-by: Pavan Chebbi Signed-off-by: Joe Damato Link: https://patch.msgid.link/20260408230607.2019402-3-joe@dama.to Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index fe8b886ff82e..d4288c458576 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -447,7 +447,7 @@ const u16 bnxt_lhint_arr[] = { TX_BD_FLAGS_LHINT_2048_AND_LARGER, }; -static u16 bnxt_xmit_get_cfa_action(struct sk_buff *skb) +u16 bnxt_xmit_get_cfa_action(struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 3558a36ece12..2b40a5bd57af 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2969,6 +2969,7 @@ unsigned int bnxt_get_avail_cp_rings_for_en(struct bnxt *bp); int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init); void bnxt_tx_disable(struct bnxt *bp); void bnxt_tx_enable(struct bnxt *bp); +u16 bnxt_xmit_get_cfa_action(struct sk_buff *skb); void bnxt_sched_reset_txr(struct bnxt *bp, struct bnxt_tx_ring_info *txr, u16 curr); void bnxt_report_link(struct bnxt *bp); From 637237d3d93cab7f183075f4fadbfbf62663c6f4 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Wed, 8 Apr 2026 16:05:52 -0700 Subject: [PATCH 1414/1561] net: bnxt: Add a helper for tx_bd_ext Factor out some code to setup tx_bd_exts into a helper function. This helper will be used by SW USO implementation in the following commits. Suggested-by: Jakub Kicinski Reviewed-by: Pavan Chebbi Signed-off-by: Joe Damato Link: https://patch.msgid.link/20260408230607.2019402-4-joe@dama.to Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 ++------- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index d4288c458576..d1f0969b781c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -663,10 +663,9 @@ normal_tx: txbd->tx_bd_opaque = SET_TX_OPAQUE(bp, txr, prod, 2 + last_frag); prod = NEXT_TX(prod); - txbd1 = (struct tx_bd_ext *) - &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)]; + txbd1 = bnxt_init_ext_bd(bp, txr, prod, lflags, vlan_tag_flags, + cfa_action); - txbd1->tx_bd_hsize_lflags = lflags; if (skb_is_gso(skb)) { bool udp_gso = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4); u32 hdr_len; @@ -693,7 +692,6 @@ normal_tx: } else if (skb->ip_summed == CHECKSUM_PARTIAL) { txbd1->tx_bd_hsize_lflags |= cpu_to_le32(TX_BD_FLAGS_TCP_UDP_CHKSUM); - txbd1->tx_bd_mss = 0; } length >>= 9; @@ -706,9 +704,6 @@ normal_tx: flags |= bnxt_lhint_arr[length]; txbd->tx_bd_len_flags_type = cpu_to_le32(flags); - txbd1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags); - txbd1->tx_bd_cfa_action = - cpu_to_le32(cfa_action << TX_BD_CFA_ACTION_SHIFT); txbd0 = txbd; for (i = 0; i < last_frag; i++) { frag = &skb_shinfo(skb)->frags[i]; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 2b40a5bd57af..83b4136ccd31 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2836,6 +2836,24 @@ static inline u32 bnxt_tx_avail(struct bnxt *bp, return bp->tx_ring_size - (used & bp->tx_ring_mask); } +static inline struct tx_bd_ext * +bnxt_init_ext_bd(struct bnxt *bp, struct bnxt_tx_ring_info *txr, + u16 prod, __le32 lflags, u32 vlan_tag_flags, + u32 cfa_action) +{ + struct tx_bd_ext *txbd1; + + txbd1 = (struct tx_bd_ext *) + &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)]; + txbd1->tx_bd_hsize_lflags = lflags; + txbd1->tx_bd_mss = 0; + txbd1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags); + txbd1->tx_bd_cfa_action = + cpu_to_le32(cfa_action << TX_BD_CFA_ACTION_SHIFT); + + return txbd1; +} + static inline void bnxt_writeq(struct bnxt *bp, u64 val, volatile void __iomem *addr) { From 3cb430e62c83823b76fc7dc1aec8dc7bbf81a729 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Wed, 8 Apr 2026 16:05:53 -0700 Subject: [PATCH 1415/1561] net: bnxt: Use dma_unmap_len for TX completion unmapping Store the DMA mapping length in each TX buffer descriptor via dma_unmap_len_set at submit time, and use dma_unmap_len at completion time. This is a no-op for normal packets but prepares for software USO, where header BDs set dma_unmap_len to 0 because the header buffer is unmapped collectively rather than per-segment. Suggested-by: Jakub Kicinski Reviewed-by: Pavan Chebbi Signed-off-by: Joe Damato Link: https://patch.msgid.link/20260408230607.2019402-5-joe@dama.to Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 63 +++++++++++++++-------- 1 file changed, 41 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index d1f0969b781c..bc2dac2f137d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -656,6 +656,7 @@ normal_tx: goto tx_free; dma_unmap_addr_set(tx_buf, mapping, mapping); + dma_unmap_len_set(tx_buf, len, len); flags = (len << TX_BD_LEN_SHIFT) | TX_BD_TYPE_LONG_TX_BD | TX_BD_CNT(last_frag + 2); @@ -720,6 +721,7 @@ normal_tx: tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)]; netmem_dma_unmap_addr_set(skb_frag_netmem(frag), tx_buf, mapping, mapping); + dma_unmap_len_set(tx_buf, len, len); txbd->tx_bd_haddr = cpu_to_le64(mapping); @@ -809,7 +811,8 @@ static bool __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr, u16 hw_cons = txr->tx_hw_cons; unsigned int tx_bytes = 0; u16 cons = txr->tx_cons; - skb_frag_t *frag; + unsigned int dma_len; + dma_addr_t dma_addr; int tx_pkts = 0; bool rc = false; @@ -844,19 +847,27 @@ static bool __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr, goto next_tx_int; } - dma_unmap_single(&pdev->dev, dma_unmap_addr(tx_buf, mapping), - skb_headlen(skb), DMA_TO_DEVICE); + if (dma_unmap_len(tx_buf, len)) { + dma_addr = dma_unmap_addr(tx_buf, mapping); + dma_len = dma_unmap_len(tx_buf, len); + + dma_unmap_single(&pdev->dev, dma_addr, dma_len, + DMA_TO_DEVICE); + } + last = tx_buf->nr_frags; for (j = 0; j < last; j++) { - frag = &skb_shinfo(skb)->frags[j]; cons = NEXT_TX(cons); tx_buf = &txr->tx_buf_ring[RING_TX(bp, cons)]; - netmem_dma_unmap_page_attrs(&pdev->dev, - dma_unmap_addr(tx_buf, - mapping), - skb_frag_size(frag), - DMA_TO_DEVICE, 0); + if (dma_unmap_len(tx_buf, len)) { + dma_addr = dma_unmap_addr(tx_buf, mapping); + dma_len = dma_unmap_len(tx_buf, len); + + netmem_dma_unmap_page_attrs(&pdev->dev, + dma_addr, dma_len, + DMA_TO_DEVICE, 0); + } } if (unlikely(is_ts_pkt)) { if (BNXT_CHIP_P5(bp)) { @@ -3394,6 +3405,8 @@ static void bnxt_free_one_tx_ring_skbs(struct bnxt *bp, { int i, max_idx; struct pci_dev *pdev = bp->pdev; + unsigned int dma_len; + dma_addr_t dma_addr; max_idx = bp->tx_nr_pages * TX_DESC_CNT; @@ -3404,9 +3417,10 @@ static void bnxt_free_one_tx_ring_skbs(struct bnxt *bp, if (idx < bp->tx_nr_rings_xdp && tx_buf->action == XDP_REDIRECT) { - dma_unmap_single(&pdev->dev, - dma_unmap_addr(tx_buf, mapping), - dma_unmap_len(tx_buf, len), + dma_addr = dma_unmap_addr(tx_buf, mapping); + dma_len = dma_unmap_len(tx_buf, len); + + dma_unmap_single(&pdev->dev, dma_addr, dma_len, DMA_TO_DEVICE); xdp_return_frame(tx_buf->xdpf); tx_buf->action = 0; @@ -3429,23 +3443,28 @@ static void bnxt_free_one_tx_ring_skbs(struct bnxt *bp, continue; } - dma_unmap_single(&pdev->dev, - dma_unmap_addr(tx_buf, mapping), - skb_headlen(skb), - DMA_TO_DEVICE); + if (dma_unmap_len(tx_buf, len)) { + dma_addr = dma_unmap_addr(tx_buf, mapping); + dma_len = dma_unmap_len(tx_buf, len); + + dma_unmap_single(&pdev->dev, dma_addr, dma_len, + DMA_TO_DEVICE); + } last = tx_buf->nr_frags; i += 2; for (j = 0; j < last; j++, i++) { int ring_idx = i & bp->tx_ring_mask; - skb_frag_t *frag = &skb_shinfo(skb)->frags[j]; tx_buf = &txr->tx_buf_ring[ring_idx]; - netmem_dma_unmap_page_attrs(&pdev->dev, - dma_unmap_addr(tx_buf, - mapping), - skb_frag_size(frag), - DMA_TO_DEVICE, 0); + if (dma_unmap_len(tx_buf, len)) { + dma_addr = dma_unmap_addr(tx_buf, mapping); + dma_len = dma_unmap_len(tx_buf, len); + + netmem_dma_unmap_page_attrs(&pdev->dev, + dma_addr, dma_len, + DMA_TO_DEVICE, 0); + } } dev_kfree_skb(skb); } From 0c26a0e765e70b7342a47dd3a29f5e0734081d7d Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Wed, 8 Apr 2026 16:05:54 -0700 Subject: [PATCH 1416/1561] net: bnxt: Add TX inline buffer infrastructure Add per-ring pre-allocated inline buffer fields (tx_inline_buf, tx_inline_dma, tx_inline_size) to bnxt_tx_ring_info and helpers to allocate and free them. A producer and consumer (tx_inline_prod, tx_inline_cons) are added to track which slot(s) of the inline buffer are in-use. The inline buffer will be used by the SW USO path for pre-allocated, pre-DMA-mapped per-segment header copies. In the future, this could be extended to support TX copybreak. Allocation helper is marked __maybe_unused in this commit because it will be wired in later. Suggested-by: Jakub Kicinski Reviewed-by: Pavan Chebbi Signed-off-by: Joe Damato Link: https://patch.msgid.link/20260408230607.2019402-6-joe@dama.to Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 35 +++++++++++++++++++++++ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 6 ++++ 2 files changed, 41 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index bc2dac2f137d..bd93edb09ee0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3979,6 +3979,39 @@ static int bnxt_alloc_rx_rings(struct bnxt *bp) return rc; } +static void bnxt_free_tx_inline_buf(struct bnxt_tx_ring_info *txr, + struct pci_dev *pdev) +{ + if (!txr->tx_inline_buf) + return; + + dma_unmap_single(&pdev->dev, txr->tx_inline_dma, + txr->tx_inline_size, DMA_TO_DEVICE); + kfree(txr->tx_inline_buf); + txr->tx_inline_buf = NULL; + txr->tx_inline_size = 0; +} + +static int __maybe_unused bnxt_alloc_tx_inline_buf(struct bnxt_tx_ring_info *txr, + struct pci_dev *pdev, + unsigned int size) +{ + txr->tx_inline_buf = kmalloc(size, GFP_KERNEL); + if (!txr->tx_inline_buf) + return -ENOMEM; + + txr->tx_inline_dma = dma_map_single(&pdev->dev, txr->tx_inline_buf, + size, DMA_TO_DEVICE); + if (dma_mapping_error(&pdev->dev, txr->tx_inline_dma)) { + kfree(txr->tx_inline_buf); + txr->tx_inline_buf = NULL; + return -ENOMEM; + } + txr->tx_inline_size = size; + + return 0; +} + static void bnxt_free_tx_rings(struct bnxt *bp) { int i; @@ -3997,6 +4030,8 @@ static void bnxt_free_tx_rings(struct bnxt *bp) txr->tx_push = NULL; } + bnxt_free_tx_inline_buf(txr, pdev); + ring = &txr->tx_ring_struct; bnxt_free_ring(bp, &ring->ring_mem); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 83b4136ccd31..d98a58aa30f6 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -996,6 +996,12 @@ struct bnxt_tx_ring_info { dma_addr_t tx_push_mapping; __le64 data_mapping; + void *tx_inline_buf; + dma_addr_t tx_inline_dma; + unsigned int tx_inline_size; + u16 tx_inline_prod; + u16 tx_inline_cons; + #define BNXT_DEV_STATE_CLOSING 0x1 u32 dev_state; From 0440e27eedace1c96b46b67d5607348bb07281ec Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Wed, 8 Apr 2026 16:05:55 -0700 Subject: [PATCH 1417/1561] net: bnxt: Add boilerplate GSO code Add bnxt_gso.c and bnxt_gso.h with a stub bnxt_sw_udp_gso_xmit() function, SW USO constants (BNXT_SW_USO_MAX_SEGS, BNXT_SW_USO_MAX_DESCS), and the is_sw_gso field in bnxt_sw_tx_bd with BNXT_SW_GSO_MID/LAST markers. The full SW USO implementation will be added in a future commit. Suggested-by: Jakub Kicinski Reviewed-by: Pavan Chebbi Signed-off-by: Joe Damato Link: https://patch.msgid.link/20260408230607.2019402-7-joe@dama.to Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/Makefile | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 4 +++ drivers/net/ethernet/broadcom/bnxt/bnxt_gso.c | 30 ++++++++++++++++++ drivers/net/ethernet/broadcom/bnxt/bnxt_gso.h | 31 +++++++++++++++++++ 4 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/broadcom/bnxt/bnxt_gso.c create mode 100644 drivers/net/ethernet/broadcom/bnxt/bnxt_gso.h diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile index ba6c239d52fa..debef78c8b6d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/Makefile +++ b/drivers/net/ethernet/broadcom/bnxt/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_BNXT) += bnxt_en.o -bnxt_en-y := bnxt.o bnxt_hwrm.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_ptp.o bnxt_vfr.o bnxt_devlink.o bnxt_dim.o bnxt_coredump.o +bnxt_en-y := bnxt.o bnxt_hwrm.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_ptp.o bnxt_vfr.o bnxt_devlink.o bnxt_dim.o bnxt_coredump.o bnxt_gso.o bnxt_en-$(CONFIG_BNXT_FLOWER_OFFLOAD) += bnxt_tc.o bnxt_en-$(CONFIG_DEBUG_FS) += bnxt_debugfs.o bnxt_en-$(CONFIG_BNXT_HWMON) += bnxt_hwmon.o diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index d98a58aa30f6..6b38b84924e0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -892,6 +892,7 @@ struct bnxt_sw_tx_bd { struct page *page; u8 is_ts_pkt; u8 is_push; + u8 is_sw_gso; u8 action; unsigned short nr_frags; union { @@ -900,6 +901,9 @@ struct bnxt_sw_tx_bd { }; }; +#define BNXT_SW_GSO_MID 1 +#define BNXT_SW_GSO_LAST 2 + struct bnxt_sw_rx_bd { void *data; u8 *data_ptr; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_gso.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_gso.c new file mode 100644 index 000000000000..b296769ee4fe --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_gso.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Broadcom NetXtreme-C/E network driver. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bnxt.h" +#include "bnxt_gso.h" + +netdev_tx_t bnxt_sw_udp_gso_xmit(struct bnxt *bp, + struct bnxt_tx_ring_info *txr, + struct netdev_queue *txq, + struct sk_buff *skb) +{ + dev_kfree_skb_any(skb); + dev_core_stats_tx_dropped_inc(bp->dev); + return NETDEV_TX_OK; +} diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_gso.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_gso.h new file mode 100644 index 000000000000..f01e8102dcd7 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_gso.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Broadcom NetXtreme-C/E network driver. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + */ + +#ifndef BNXT_GSO_H +#define BNXT_GSO_H + +/* Maximum segments the stack may send in a single SW USO skb. + * This caps gso_max_segs for NICs without HW USO support. + */ +#define BNXT_SW_USO_MAX_SEGS 64 + +/* Worst-case TX descriptors consumed by one SW USO packet: + * Each segment: 1 long BD + 1 ext BD + payload BDs. + * Total payload BDs across all segs <= num_segs + nr_frags (each frag + * boundary crossing adds at most 1 extra BD). + * So: 3 * max_segs + MAX_SKB_FRAGS + 1 = 3 * 64 + 17 + 1 = 210. + */ +#define BNXT_SW_USO_MAX_DESCS (3 * BNXT_SW_USO_MAX_SEGS + MAX_SKB_FRAGS + 1) + +netdev_tx_t bnxt_sw_udp_gso_xmit(struct bnxt *bp, + struct bnxt_tx_ring_info *txr, + struct netdev_queue *txq, + struct sk_buff *skb); + +#endif From cc5d90667db81474ed7a92a1b2fa3daec5559307 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Wed, 8 Apr 2026 16:05:56 -0700 Subject: [PATCH 1418/1561] net: bnxt: Implement software USO Implement bnxt_sw_udp_gso_xmit() using the core tso_dma_map API and the pre-allocated TX inline buffer for per-segment headers. The xmit path: 1. Calls tso_start() to initialize TSO state 2. Stack-allocates a tso_dma_map and calls tso_dma_map_init() to DMA-map the linear payload and all frags upfront. 3. For each segment: - Copies and patches headers via tso_build_hdr() into the pre-allocated tx_inline_buf (DMA-synced per segment) - Counts payload BDs via tso_dma_map_count() - Emits long BD (header) + ext BD + payload BDs - Payload BDs use tso_dma_map_next() which yields (dma_addr, chunk_len, mapping_len) tuples. Header BDs set dma_unmap_len=0 since the inline buffer is pre-allocated and unmapped only at ring teardown. Completion state is updated by calling tso_dma_map_completion_save() for the last segment. Suggested-by: Jakub Kicinski Signed-off-by: Joe Damato Link: https://patch.msgid.link/20260408230607.2019402-8-joe@dama.to Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 3 + drivers/net/ethernet/broadcom/bnxt/bnxt_gso.c | 210 ++++++++++++++++++ drivers/net/ethernet/broadcom/bnxt/bnxt_gso.h | 6 + 3 files changed, 219 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 6b38b84924e0..fe50576ae525 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -11,6 +11,8 @@ #ifndef BNXT_H #define BNXT_H +#include + #define DRV_MODULE_NAME "bnxt_en" /* DO NOT CHANGE DRV_VER_* defines @@ -899,6 +901,7 @@ struct bnxt_sw_tx_bd { u16 rx_prod; u16 txts_prod; }; + struct tso_dma_map_completion_state sw_gso_cstate; }; #define BNXT_SW_GSO_MID 1 diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_gso.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_gso.c index b296769ee4fe..f317f60414e8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_gso.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_gso.c @@ -19,11 +19,221 @@ #include "bnxt.h" #include "bnxt_gso.h" +static u32 bnxt_sw_gso_lhint(unsigned int len) +{ + if (len <= 512) + return TX_BD_FLAGS_LHINT_512_AND_SMALLER; + else if (len <= 1023) + return TX_BD_FLAGS_LHINT_512_TO_1023; + else if (len <= 2047) + return TX_BD_FLAGS_LHINT_1024_TO_2047; + else + return TX_BD_FLAGS_LHINT_2048_AND_LARGER; +} + netdev_tx_t bnxt_sw_udp_gso_xmit(struct bnxt *bp, struct bnxt_tx_ring_info *txr, struct netdev_queue *txq, struct sk_buff *skb) { + unsigned int last_unmap_len __maybe_unused = 0; + dma_addr_t last_unmap_addr __maybe_unused = 0; + struct bnxt_sw_tx_bd *last_unmap_buf = NULL; + unsigned int hdr_len, mss, num_segs; + struct pci_dev *pdev = bp->pdev; + unsigned int total_payload; + struct tso_dma_map map; + u32 vlan_tag_flags = 0; + int i, bds_needed; + struct tso_t tso; + u16 cfa_action; + __le32 csum; + u16 prod; + + hdr_len = tso_start(skb, &tso); + mss = skb_shinfo(skb)->gso_size; + total_payload = skb->len - hdr_len; + num_segs = DIV_ROUND_UP(total_payload, mss); + + if (unlikely(num_segs <= 1)) + goto drop; + + /* Upper bound on the number of descriptors needed. + * + * Each segment uses 1 long BD + 1 ext BD + payload BDs, which is + * at most num_segs + nr_frags (each frag boundary crossing adds at + * most 1 extra BD). + */ + bds_needed = 3 * num_segs + skb_shinfo(skb)->nr_frags + 1; + + if (unlikely(bnxt_tx_avail(bp, txr) < bds_needed)) { + netif_txq_try_stop(txq, bnxt_tx_avail(bp, txr), + bp->tx_wake_thresh); + return NETDEV_TX_BUSY; + } + + /* BD backpressure alone cannot prevent overwriting in-flight + * headers in the inline buffer. Check slot availability directly. + */ + if (!netif_txq_maybe_stop(txq, bnxt_inline_avail(txr), + num_segs, num_segs)) + return NETDEV_TX_BUSY; + + if (unlikely(tso_dma_map_init(&map, &pdev->dev, skb, hdr_len))) + goto drop; + + cfa_action = bnxt_xmit_get_cfa_action(skb); + if (skb_vlan_tag_present(skb)) { + vlan_tag_flags = TX_BD_CFA_META_KEY_VLAN | + skb_vlan_tag_get(skb); + if (skb->vlan_proto == htons(ETH_P_8021Q)) + vlan_tag_flags |= 1 << TX_BD_CFA_META_TPID_SHIFT; + } + + csum = cpu_to_le32(TX_BD_FLAGS_TCP_UDP_CHKSUM); + if (!tso.ipv6) + csum |= cpu_to_le32(TX_BD_FLAGS_IP_CKSUM); + + prod = txr->tx_prod; + + for (i = 0; i < num_segs; i++) { + unsigned int seg_payload = min_t(unsigned int, mss, + total_payload - i * mss); + u16 slot = (txr->tx_inline_prod + i) & + (BNXT_SW_USO_MAX_SEGS - 1); + struct bnxt_sw_tx_bd *tx_buf; + unsigned int mapping_len; + dma_addr_t this_hdr_dma; + unsigned int chunk_len; + unsigned int offset; + dma_addr_t dma_addr; + struct tx_bd *txbd; + struct udphdr *uh; + void *this_hdr; + int bd_count; + bool last; + u32 flags; + + last = (i == num_segs - 1); + offset = slot * TSO_HEADER_SIZE; + this_hdr = txr->tx_inline_buf + offset; + this_hdr_dma = txr->tx_inline_dma + offset; + + tso_build_hdr(skb, this_hdr, &tso, seg_payload, last); + + /* Zero stale csum fields copied from the original skb; + * HW offload recomputes from scratch. + */ + uh = this_hdr + skb_transport_offset(skb); + uh->check = 0; + if (!tso.ipv6) { + struct iphdr *iph = this_hdr + skb_network_offset(skb); + + iph->check = 0; + } + + dma_sync_single_for_device(&pdev->dev, this_hdr_dma, + hdr_len, DMA_TO_DEVICE); + + bd_count = tso_dma_map_count(&map, seg_payload); + + tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)]; + txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)]; + + tx_buf->skb = skb; + tx_buf->nr_frags = bd_count; + tx_buf->is_push = 0; + tx_buf->is_ts_pkt = 0; + + dma_unmap_addr_set(tx_buf, mapping, this_hdr_dma); + dma_unmap_len_set(tx_buf, len, 0); + + if (last) { + tx_buf->is_sw_gso = BNXT_SW_GSO_LAST; + tso_dma_map_completion_save(&map, &tx_buf->sw_gso_cstate); + } else { + tx_buf->is_sw_gso = BNXT_SW_GSO_MID; + } + + flags = (hdr_len << TX_BD_LEN_SHIFT) | + TX_BD_TYPE_LONG_TX_BD | + TX_BD_CNT(2 + bd_count); + + flags |= bnxt_sw_gso_lhint(hdr_len + seg_payload); + + txbd->tx_bd_len_flags_type = cpu_to_le32(flags); + txbd->tx_bd_haddr = cpu_to_le64(this_hdr_dma); + txbd->tx_bd_opaque = SET_TX_OPAQUE(bp, txr, prod, + 2 + bd_count); + + prod = NEXT_TX(prod); + bnxt_init_ext_bd(bp, txr, prod, csum, + vlan_tag_flags, cfa_action); + + /* set dma_unmap_len on the LAST BD touching each + * region. Since completions are in-order, the last segment + * completes after all earlier ones, so the unmap is safe. + */ + while (tso_dma_map_next(&map, &dma_addr, &chunk_len, + &mapping_len, seg_payload)) { + prod = NEXT_TX(prod); + txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)]; + tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)]; + + txbd->tx_bd_haddr = cpu_to_le64(dma_addr); + dma_unmap_addr_set(tx_buf, mapping, dma_addr); + dma_unmap_len_set(tx_buf, len, 0); + tx_buf->skb = NULL; + tx_buf->is_sw_gso = 0; + + if (mapping_len) { + if (last_unmap_buf) { + dma_unmap_addr_set(last_unmap_buf, + mapping, + last_unmap_addr); + dma_unmap_len_set(last_unmap_buf, + len, + last_unmap_len); + } + last_unmap_addr = dma_addr; + last_unmap_len = mapping_len; + } + last_unmap_buf = tx_buf; + + flags = chunk_len << TX_BD_LEN_SHIFT; + txbd->tx_bd_len_flags_type = cpu_to_le32(flags); + txbd->tx_bd_opaque = 0; + + seg_payload -= chunk_len; + } + + txbd->tx_bd_len_flags_type |= + cpu_to_le32(TX_BD_FLAGS_PACKET_END); + + prod = NEXT_TX(prod); + } + + if (last_unmap_buf) { + dma_unmap_addr_set(last_unmap_buf, mapping, last_unmap_addr); + dma_unmap_len_set(last_unmap_buf, len, last_unmap_len); + } + + txr->tx_inline_prod += num_segs; + + netdev_tx_sent_queue(txq, skb->len); + + WRITE_ONCE(txr->tx_prod, prod); + /* Sync BDs before doorbell */ + wmb(); + bnxt_db_write(bp, &txr->tx_db, prod); + + if (unlikely(bnxt_tx_avail(bp, txr) <= bp->tx_wake_thresh)) + netif_txq_try_stop(txq, bnxt_tx_avail(bp, txr), + bp->tx_wake_thresh); + + return NETDEV_TX_OK; + +drop: dev_kfree_skb_any(skb); dev_core_stats_tx_dropped_inc(bp->dev); return NETDEV_TX_OK; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_gso.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_gso.h index f01e8102dcd7..6ba8ccc451de 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_gso.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_gso.h @@ -23,6 +23,12 @@ */ #define BNXT_SW_USO_MAX_DESCS (3 * BNXT_SW_USO_MAX_SEGS + MAX_SKB_FRAGS + 1) +static inline u16 bnxt_inline_avail(struct bnxt_tx_ring_info *txr) +{ + return BNXT_SW_USO_MAX_SEGS - + (u16)(txr->tx_inline_prod - READ_ONCE(txr->tx_inline_cons)); +} + netdev_tx_t bnxt_sw_udp_gso_xmit(struct bnxt *bp, struct bnxt_tx_ring_info *txr, struct netdev_queue *txq, From 87550ba2dc39489be292ecb485118ea662800b82 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Wed, 8 Apr 2026 16:05:57 -0700 Subject: [PATCH 1419/1561] net: bnxt: Add SW GSO completion and teardown support Update __bnxt_tx_int and bnxt_free_one_tx_ring_skbs to handle SW GSO segments: - MID segments: adjust tx_pkts/tx_bytes accounting and skip skb free (the skb is shared across all segments and freed only once) - LAST segments: call tso_dma_map_complete() to tear down the IOVA mapping if one was used. On the fallback path, payload DMA unmapping is handled by the existing per-BD dma_unmap_len walk. Both MID and LAST completions advance tx_inline_cons to release the segment's inline header slot back to the ring. is_sw_gso is initialized to zero, so the new code paths are not run. Add logic for feature advertisement and guardrails for ring sizing. Suggested-by: Jakub Kicinski Reviewed-by: Pavan Chebbi Signed-off-by: Joe Damato Link: https://patch.msgid.link/20260408230607.2019402-9-joe@dama.to Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 75 ++++++++++++++++--- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 19 ++++- drivers/net/ethernet/broadcom/bnxt/bnxt_gso.h | 9 +++ 3 files changed, 92 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index bd93edb09ee0..26aae48a7d0e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -74,6 +74,8 @@ #include "bnxt_debugfs.h" #include "bnxt_coredump.h" #include "bnxt_hwmon.h" +#include "bnxt_gso.h" +#include #define BNXT_TX_TIMEOUT (5 * HZ) #define BNXT_DEF_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_HW | \ @@ -817,12 +819,13 @@ static bool __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr, bool rc = false; while (RING_TX(bp, cons) != hw_cons) { - struct bnxt_sw_tx_bd *tx_buf; + struct bnxt_sw_tx_bd *tx_buf, *head_buf; struct sk_buff *skb; bool is_ts_pkt; int j, last; tx_buf = &txr->tx_buf_ring[RING_TX(bp, cons)]; + head_buf = tx_buf; skb = tx_buf->skb; if (unlikely(!skb)) { @@ -869,6 +872,22 @@ static bool __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr, DMA_TO_DEVICE, 0); } } + + if (unlikely(head_buf->is_sw_gso)) { + u16 inline_cons = txr->tx_inline_cons + 1; + + WRITE_ONCE(txr->tx_inline_cons, inline_cons); + if (head_buf->is_sw_gso == BNXT_SW_GSO_LAST) { + tso_dma_map_complete(&pdev->dev, + &head_buf->sw_gso_cstate); + } else { + tx_pkts--; + tx_bytes -= skb->len; + skb = NULL; + } + head_buf->is_sw_gso = 0; + } + if (unlikely(is_ts_pkt)) { if (BNXT_CHIP_P5(bp)) { /* PTP worker takes ownership of the skb */ @@ -3412,6 +3431,7 @@ static void bnxt_free_one_tx_ring_skbs(struct bnxt *bp, for (i = 0; i < max_idx;) { struct bnxt_sw_tx_bd *tx_buf = &txr->tx_buf_ring[i]; + struct bnxt_sw_tx_bd *head_buf = tx_buf; struct sk_buff *skb; int j, last; @@ -3466,7 +3486,20 @@ static void bnxt_free_one_tx_ring_skbs(struct bnxt *bp, DMA_TO_DEVICE, 0); } } - dev_kfree_skb(skb); + if (head_buf->is_sw_gso) { + u16 inline_cons = txr->tx_inline_cons + 1; + + WRITE_ONCE(txr->tx_inline_cons, inline_cons); + if (head_buf->is_sw_gso == BNXT_SW_GSO_LAST) { + tso_dma_map_complete(&pdev->dev, + &head_buf->sw_gso_cstate); + } else { + skb = NULL; + } + head_buf->is_sw_gso = 0; + } + if (skb) + dev_kfree_skb(skb); } netdev_tx_reset_queue(netdev_get_tx_queue(bp->dev, idx)); } @@ -3992,9 +4025,9 @@ static void bnxt_free_tx_inline_buf(struct bnxt_tx_ring_info *txr, txr->tx_inline_size = 0; } -static int __maybe_unused bnxt_alloc_tx_inline_buf(struct bnxt_tx_ring_info *txr, - struct pci_dev *pdev, - unsigned int size) +static int bnxt_alloc_tx_inline_buf(struct bnxt_tx_ring_info *txr, + struct pci_dev *pdev, + unsigned int size) { txr->tx_inline_buf = kmalloc(size, GFP_KERNEL); if (!txr->tx_inline_buf) @@ -4097,6 +4130,13 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp) sizeof(struct tx_push_bd); txr->data_mapping = cpu_to_le64(mapping); } + if (!(bp->flags & BNXT_FLAG_UDP_GSO_CAP)) { + rc = bnxt_alloc_tx_inline_buf(txr, pdev, + BNXT_SW_USO_MAX_SEGS * + TSO_HEADER_SIZE); + if (rc) + return rc; + } qidx = bp->tc_to_qidx[j]; ring->queue_id = bp->q_info[qidx].queue_id; spin_lock_init(&txr->xdp_tx_lock); @@ -4635,10 +4675,13 @@ static int bnxt_init_rx_rings(struct bnxt *bp) static int bnxt_init_tx_rings(struct bnxt *bp) { + netdev_features_t features; u16 i; + features = bp->dev->features; + bp->tx_wake_thresh = max_t(int, bp->tx_ring_size / 2, - BNXT_MIN_TX_DESC_CNT); + bnxt_min_tx_desc_cnt(bp, features)); for (i = 0; i < bp->tx_nr_rings; i++) { struct bnxt_tx_ring_info *txr = &bp->tx_ring[i]; @@ -13837,6 +13880,11 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev, if ((features & NETIF_F_NTUPLE) && !bnxt_rfs_capable(bp, false)) features &= ~NETIF_F_NTUPLE; + if ((features & NETIF_F_GSO_UDP_L4) && + !(bp->flags & BNXT_FLAG_UDP_GSO_CAP) && + bp->tx_ring_size < 2 * BNXT_SW_USO_MAX_DESCS) + features &= ~NETIF_F_GSO_UDP_L4; + if ((bp->flags & BNXT_FLAG_NO_AGG_RINGS) || bp->xdp_prog) features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW); @@ -13882,6 +13930,9 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features) int rc = 0; bool re_init = false; + bp->tx_wake_thresh = max_t(int, bp->tx_ring_size / 2, + bnxt_min_tx_desc_cnt(bp, features)); + flags &= ~BNXT_FLAG_ALL_CONFIG_FEATS; if (features & NETIF_F_GRO_HW) flags |= BNXT_FLAG_GRO; @@ -16907,8 +16958,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_PARTIAL | NETIF_F_RXHASH | NETIF_F_RXCSUM | NETIF_F_GRO; - if (bp->flags & BNXT_FLAG_UDP_GSO_CAP) - dev->hw_features |= NETIF_F_GSO_UDP_L4; + dev->hw_features |= NETIF_F_GSO_UDP_L4; if (BNXT_SUPPORTS_TPA(bp)) dev->hw_features |= NETIF_F_LRO; @@ -16941,8 +16991,15 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->priv_flags |= IFF_UNICAST_FLT; netif_set_tso_max_size(dev, GSO_MAX_SIZE); - if (bp->tso_max_segs) + if (!(bp->flags & BNXT_FLAG_UDP_GSO_CAP)) { + u16 max_segs = BNXT_SW_USO_MAX_SEGS; + + if (bp->tso_max_segs) + max_segs = min_t(u16, max_segs, bp->tso_max_segs); + netif_set_tso_max_segs(dev, max_segs); + } else if (bp->tso_max_segs) { netif_set_tso_max_segs(dev, bp->tso_max_segs); + } dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | NETDEV_XDP_ACT_RX_SG; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 6826bf762d26..9ded88196bb4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -33,6 +33,7 @@ #include "bnxt_xdp.h" #include "bnxt_ptp.h" #include "bnxt_ethtool.h" +#include "bnxt_gso.h" #include "bnxt_nvm_defs.h" /* NVRAM content constant and structure defs */ #include "bnxt_fw_hdr.h" /* Firmware hdr constant and structure defs */ #include "bnxt_coredump.h" @@ -852,12 +853,18 @@ static int bnxt_set_ringparam(struct net_device *dev, u8 tcp_data_split = kernel_ering->tcp_data_split; struct bnxt *bp = netdev_priv(dev); u8 hds_config_mod; + int rc; if ((ering->rx_pending > BNXT_MAX_RX_DESC_CNT) || (ering->tx_pending > BNXT_MAX_TX_DESC_CNT) || (ering->tx_pending < BNXT_MIN_TX_DESC_CNT)) return -EINVAL; + if ((dev->features & NETIF_F_GSO_UDP_L4) && + !(bp->flags & BNXT_FLAG_UDP_GSO_CAP) && + ering->tx_pending < 2 * BNXT_SW_USO_MAX_DESCS) + return -EINVAL; + hds_config_mod = tcp_data_split != dev->cfg->hds_config; if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_DISABLED && hds_config_mod) return -EINVAL; @@ -882,9 +889,17 @@ static int bnxt_set_ringparam(struct net_device *dev, bp->tx_ring_size = ering->tx_pending; bnxt_set_ring_params(bp); - if (netif_running(dev)) - return bnxt_open_nic(bp, false, false); + if (netif_running(dev)) { + rc = bnxt_open_nic(bp, false, false); + if (rc) + return rc; + } + /* ring size changes may affect features (SW USO requires a minimum + * ring size), so recalculate features to ensure the correct features + * are blocked/available. + */ + netdev_update_features(dev); return 0; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_gso.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_gso.h index 6ba8ccc451de..47528c20f311 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_gso.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_gso.h @@ -29,6 +29,15 @@ static inline u16 bnxt_inline_avail(struct bnxt_tx_ring_info *txr) (u16)(txr->tx_inline_prod - READ_ONCE(txr->tx_inline_cons)); } +static inline int bnxt_min_tx_desc_cnt(struct bnxt *bp, + netdev_features_t features) +{ + if (!(bp->flags & BNXT_FLAG_UDP_GSO_CAP) && + (features & NETIF_F_GSO_UDP_L4)) + return BNXT_SW_USO_MAX_DESCS; + return BNXT_MIN_TX_DESC_CNT; +} + netdev_tx_t bnxt_sw_udp_gso_xmit(struct bnxt *bp, struct bnxt_tx_ring_info *txr, struct netdev_queue *txq, From 28f2c22398fbaaad9f16ac84647a3ed9e1a1e284 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Wed, 8 Apr 2026 16:05:58 -0700 Subject: [PATCH 1420/1561] net: bnxt: Dispatch to SW USO Wire in the SW USO path added in preceding commits when hardware USO is not possible. When a GSO skb with SKB_GSO_UDP_L4 arrives and the NIC lacks HW USO capability, redirect to bnxt_sw_udp_gso_xmit() which handles software segmentation into individual UDP frames submitted directly to the TX ring. Suggested-by: Jakub Kicinski Reviewed-by: Pavan Chebbi Signed-off-by: Joe Damato Link: https://patch.msgid.link/20260408230607.2019402-10-joe@dama.to Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 26aae48a7d0e..2715632115a5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -508,6 +508,11 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) } } #endif + if (skb_is_gso(skb) && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) && + !(bp->flags & BNXT_FLAG_UDP_GSO_CAP)) + return bnxt_sw_udp_gso_xmit(bp, txr, txq, skb); + free_size = bnxt_tx_avail(bp, txr); if (unlikely(free_size < skb_shinfo(skb)->nr_frags + 2)) { /* We must have raced with NAPI cleanup */ From 5d3b12d1a24b72e147fbb585158f51585593f640 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Wed, 8 Apr 2026 16:05:59 -0700 Subject: [PATCH 1421/1561] selftests: drv-net: Add USO test Add a simple test for USO. Tests both ipv4 and ipv6 with several full segments and a partial segment. Suggested-by: Jakub Kicinski Signed-off-by: Joe Damato Link: https://patch.msgid.link/20260408230607.2019402-11-joe@dama.to Signed-off-by: Jakub Kicinski --- .../testing/selftests/drivers/net/hw/Makefile | 1 + tools/testing/selftests/drivers/net/hw/uso.py | 103 ++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/hw/uso.py diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index d84e955ac87b..85ca4d1ecf9e 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -45,6 +45,7 @@ TEST_PROGS = \ rss_input_xfrm.py \ toeplitz.py \ tso.py \ + uso.py \ xdp_metadata.py \ xsk_reconfig.py \ # diff --git a/tools/testing/selftests/drivers/net/hw/uso.py b/tools/testing/selftests/drivers/net/hw/uso.py new file mode 100755 index 000000000000..6d61e56cab3c --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/uso.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""Test USO + +Sends large UDP datagrams with UDP_SEGMENT and verifies that the peer +receives the expected total payload and that the NIC transmitted at least +the expected number of segments. +""" +import random +import socket +import string + +from lib.py import ksft_run, ksft_exit, KsftSkipEx +from lib.py import ksft_eq, ksft_ge, ksft_variants, KsftNamedVariant +from lib.py import NetDrvEpEnv +from lib.py import bkg, defer, ethtool, ip, rand_port, wait_port_listen + +# python doesn't expose this constant, so we need to hardcode it to enable UDP +# segmentation for large payloads +UDP_SEGMENT = 103 + + +def _send_uso(cfg, ipver, mss, total_payload, port): + if ipver == "4": + sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + dst = (cfg.remote_addr_v["4"], port) + else: + sock = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + dst = (cfg.remote_addr_v["6"], port) + + sock.setsockopt(socket.IPPROTO_UDP, UDP_SEGMENT, mss) + payload = ''.join(random.choice(string.ascii_lowercase) + for _ in range(total_payload)) + sock.sendto(payload.encode(), dst) + sock.close() + + +def _get_tx_packets(cfg): + stats = ip(f"-s link show dev {cfg.ifname}", json=True)[0] + return stats['stats64']['tx']['packets'] + + +def _test_uso(cfg, ipver, mss, total_payload): + cfg.require_ipver(ipver) + cfg.require_cmd("socat", remote=True) + + features = ethtool(f"-k {cfg.ifname}", json=True) + uso_was_on = features[0]["tx-udp-segmentation"]["active"] + + try: + ethtool(f"-K {cfg.ifname} tx-udp-segmentation on") + except Exception as exc: + raise KsftSkipEx( + "Device does not support tx-udp-segmentation") from exc + if not uso_was_on: + defer(ethtool, f"-K {cfg.ifname} tx-udp-segmentation off") + + expected_segs = (total_payload + mss - 1) // mss + + port = rand_port(stype=socket.SOCK_DGRAM) + rx_cmd = f"socat -{ipver} -T 2 -u UDP-LISTEN:{port},reuseport STDOUT" + + tx_before = _get_tx_packets(cfg) + + with bkg(rx_cmd, host=cfg.remote, exit_wait=True) as rx: + wait_port_listen(port, proto="udp", host=cfg.remote) + _send_uso(cfg, ipver, mss, total_payload, port) + + ksft_eq(len(rx.stdout), total_payload, + comment=f"Received {len(rx.stdout)}B, expected {total_payload}B") + + cfg.wait_hw_stats_settle() + + tx_after = _get_tx_packets(cfg) + tx_delta = tx_after - tx_before + + ksft_ge(tx_delta, expected_segs, + comment=f"Expected >= {expected_segs} tx packets, got {tx_delta}") + + +def _uso_variants(): + for ipver in ["4", "6"]: + yield KsftNamedVariant(f"v{ipver}_partial", ipver, 1400, 1400 * 10 + 500) + yield KsftNamedVariant(f"v{ipver}_exact", ipver, 1400, 1400 * 5) + + +@ksft_variants(_uso_variants()) +def test_uso(cfg, ipver, mss, total_payload): + """Send a USO datagram and verify the peer receives the expected segments.""" + _test_uso(cfg, ipver, mss, total_payload) + + +def main() -> None: + """Run USO tests.""" + with NetDrvEpEnv(__file__) as cfg: + ksft_run([test_uso], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() From 2095da234017760068b654de25b466ae0654287d Mon Sep 17 00:00:00 2001 From: Bhargava Marreddy Date: Mon, 6 Apr 2026 23:34:11 +0530 Subject: [PATCH 1422/1561] bng_en: add per-PF workqueue, timer, and slow-path task Add a dedicated single-thread workqueue and a timer for each PF to drive deferred slow-path work such as link event handling and stats collection. The timer is stopped via timer_delete_sync() when interrupts are disabled and restarted on open. While the close path stops the timer to prevent new tasks from being scheduled, the sp_task and workqueue are preserved to maintain state continuity. Final draining and destruction of the workqueue are handled during PCI remove. Signed-off-by: Bhargava Marreddy Reviewed-by: Vikas Gupta Reviewed-by: Ajit Kumar Khaparde Link: https://patch.msgid.link/20260406180420.279470-2-bhargava.marreddy@broadcom.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/broadcom/bnge/bnge_netdev.c | 59 ++++++++++++++++++- .../net/ethernet/broadcom/bnge/bnge_netdev.h | 12 +++- 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c index a20dc3ca640c..8d5d828e19a3 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c @@ -101,6 +101,36 @@ err_free_ring_stats: return rc; } +static void bnge_timer(struct timer_list *t) +{ + struct bnge_net *bn = timer_container_of(bn, t, timer); + struct bnge_dev *bd = bn->bd; + + if (!netif_running(bn->netdev) || + !test_bit(BNGE_STATE_OPEN, &bd->state)) + return; + + /* Periodic work added by later patches */ + + mod_timer(&bn->timer, jiffies + bn->current_interval); +} + +static void bnge_sp_task(struct work_struct *work) +{ + struct bnge_net *bn = container_of(work, struct bnge_net, sp_task); + struct bnge_dev *bd = bn->bd; + + netdev_lock(bn->netdev); + if (!test_bit(BNGE_STATE_OPEN, &bd->state)) { + netdev_unlock(bn->netdev); + return; + } + + /* Event handling work added by later patches */ + + netdev_unlock(bn->netdev); +} + static void bnge_free_nq_desc_arr(struct bnge_nq_ring_info *nqr) { struct bnge_ring_struct *ring = &nqr->ring_struct; @@ -2507,6 +2537,9 @@ static int bnge_open_core(struct bnge_net *bn) bnge_enable_int(bn); bnge_tx_enable(bn); + + mod_timer(&bn->timer, jiffies + bn->current_interval); + return 0; err_free_irq: @@ -2542,6 +2575,8 @@ static void bnge_close_core(struct bnge_net *bn) bnge_tx_disable(bn); clear_bit(BNGE_STATE_OPEN, &bd->state); + + timer_delete_sync(&bn->timer); bnge_shutdown_nic(bn); bnge_disable_napi(bn); bnge_free_all_rings_bufs(bn); @@ -2774,6 +2809,18 @@ int bnge_netdev_alloc(struct bnge_dev *bd, int max_irqs) if (bd->tso_max_segs) netif_set_tso_max_segs(netdev, bd->tso_max_segs); + INIT_WORK(&bn->sp_task, bnge_sp_task); + timer_setup(&bn->timer, bnge_timer, 0); + bn->current_interval = BNGE_TIMER_INTERVAL; + + bn->bnge_pf_wq = alloc_ordered_workqueue("bnge_pf_wq-%s", 0, + dev_name(bd->dev)); + if (!bn->bnge_pf_wq) { + netdev_err(netdev, "Unable to create workqueue.\n"); + rc = -ENOMEM; + goto err_netdev; + } + bn->rx_ring_size = BNGE_DEFAULT_RX_RING_SIZE; bn->tx_ring_size = BNGE_DEFAULT_TX_RING_SIZE; bn->rx_dir = DMA_FROM_DEVICE; @@ -2789,11 +2836,13 @@ int bnge_netdev_alloc(struct bnge_dev *bd, int max_irqs) rc = register_netdev(netdev); if (rc) { dev_err(bd->dev, "Register netdev failed rc: %d\n", rc); - goto err_netdev; + goto err_free_workq; } return 0; +err_free_workq: + destroy_workqueue(bn->bnge_pf_wq); err_netdev: free_netdev(netdev); return rc; @@ -2802,8 +2851,16 @@ err_netdev: void bnge_netdev_free(struct bnge_dev *bd) { struct net_device *netdev = bd->netdev; + struct bnge_net *bn; + + bn = netdev_priv(netdev); unregister_netdev(netdev); + + timer_shutdown_sync(&bn->timer); + cancel_work_sync(&bn->sp_task); + destroy_workqueue(bn->bnge_pf_wq); + free_netdev(netdev); bd->netdev = NULL; } diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h index 70f1a7c24814..d2ccee725454 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h +++ b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h @@ -224,6 +224,12 @@ struct bnge_tpa_info { #define BNGE_NQ_HDL_TYPE(hdl) (((hdl) & BNGE_NQ_HDL_TYPE_MASK) >> \ BNGE_NQ_HDL_TYPE_SHIFT) +enum bnge_net_state { + BNGE_STATE_NAPI_DISABLED, +}; + +#define BNGE_TIMER_INTERVAL HZ + struct bnge_net { struct bnge_dev *bd; struct net_device *netdev; @@ -281,13 +287,17 @@ struct bnge_net { u32 stats_coal_ticks; unsigned long state; -#define BNGE_STATE_NAPI_DISABLED 0 u32 msg_enable; u16 max_tpa; __be16 vxlan_port; __be16 nge_port; __be16 vxlan_gpe_port; + + unsigned int current_interval; + struct timer_list timer; + struct workqueue_struct *bnge_pf_wq; + struct work_struct sp_task; }; #define BNGE_DEFAULT_RX_RING_SIZE 511 From 7626cd3d53be5249769bd2784a65bfccdce9ffaf Mon Sep 17 00:00:00 2001 From: Bhargava Marreddy Date: Mon, 6 Apr 2026 23:34:12 +0530 Subject: [PATCH 1423/1561] bng_en: query PHY capabilities and report link status Query PHY capabilities and supported speeds from firmware, retrieve current link state (speed, duplex, pause, FEC), and log the information. Seed initial link state during probe. Signed-off-by: Bhargava Marreddy Reviewed-by: Vikas Gupta Reviewed-by: Rajashekar Hudumula Reviewed-by: Ajit Kumar Khaparde Link: https://patch.msgid.link/20260406180420.279470-3-bhargava.marreddy@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnge/Makefile | 3 +- drivers/net/ethernet/broadcom/bnge/bnge.h | 8 + .../ethernet/broadcom/bnge/bnge_hwrm_lib.c | 214 +++++++++ .../ethernet/broadcom/bnge/bnge_hwrm_lib.h | 5 + .../net/ethernet/broadcom/bnge/bnge_link.c | 407 ++++++++++++++++++ .../net/ethernet/broadcom/bnge/bnge_link.h | 165 +++++++ .../net/ethernet/broadcom/bnge/bnge_netdev.c | 54 ++- .../net/ethernet/broadcom/bnge/bnge_netdev.h | 12 + 8 files changed, 865 insertions(+), 3 deletions(-) create mode 100644 drivers/net/ethernet/broadcom/bnge/bnge_link.c create mode 100644 drivers/net/ethernet/broadcom/bnge/bnge_link.h diff --git a/drivers/net/ethernet/broadcom/bnge/Makefile b/drivers/net/ethernet/broadcom/bnge/Makefile index fa604ee20264..8e07cb307d21 100644 --- a/drivers/net/ethernet/broadcom/bnge/Makefile +++ b/drivers/net/ethernet/broadcom/bnge/Makefile @@ -11,4 +11,5 @@ bng_en-y := bnge_core.o \ bnge_netdev.o \ bnge_ethtool.o \ bnge_auxr.o \ - bnge_txrx.o + bnge_txrx.o \ + bnge_link.o diff --git a/drivers/net/ethernet/broadcom/bnge/bnge.h b/drivers/net/ethernet/broadcom/bnge/bnge.h index f376913aa321..eddc5a4a5148 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge.h +++ b/drivers/net/ethernet/broadcom/bnge/bnge.h @@ -94,6 +94,9 @@ struct bnge_queue_info { u8 queue_profile; }; +#define BNGE_PHY_FLAGS2_SHIFT 8 +#define BNGE_PHY_FL_NO_FCS PORT_PHY_QCAPS_RESP_FLAGS_NO_FCS + struct bnge_dev { struct device *dev; struct pci_dev *pdev; @@ -207,6 +210,11 @@ struct bnge_dev { struct bnge_auxr_priv *aux_priv; struct bnge_auxr_dev *auxr_dev; + + struct bnge_link_info link_info; + + /* Copied from flags and flags2 in hwrm_port_phy_qcaps_output */ + u32 phy_flags; }; static inline bool bnge_is_roce_en(struct bnge_dev *bd) diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c index c46da3413417..c2e47b1d7034 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c @@ -981,6 +981,220 @@ void bnge_hwrm_vnic_ctx_free_one(struct bnge_dev *bd, vnic->fw_rss_cos_lb_ctx[ctx_idx] = INVALID_HW_RING_ID; } +static bool bnge_phy_qcaps_no_speed(struct hwrm_port_phy_qcaps_output *resp) +{ + return !resp->supported_speeds2_auto_mode && + !resp->supported_speeds2_force_mode; +} + +int bnge_hwrm_phy_qcaps(struct bnge_dev *bd) +{ + struct bnge_link_info *link_info = &bd->link_info; + struct hwrm_port_phy_qcaps_output *resp; + struct hwrm_port_phy_qcaps_input *req; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_PORT_PHY_QCAPS); + if (rc) + return rc; + + resp = bnge_hwrm_req_hold(bd, req); + rc = bnge_hwrm_req_send(bd, req); + if (rc) + goto hwrm_phy_qcaps_exit; + + bd->phy_flags = resp->flags | + (le16_to_cpu(resp->flags2) << BNGE_PHY_FLAGS2_SHIFT); + + if (bnge_phy_qcaps_no_speed(resp)) { + link_info->phy_enabled = false; + netdev_warn(bd->netdev, "Ethernet link disabled\n"); + } else if (!link_info->phy_enabled) { + link_info->phy_enabled = true; + netdev_info(bd->netdev, "Ethernet link enabled\n"); + /* Phy re-enabled, reprobe the speeds */ + link_info->support_auto_speeds2 = 0; + } + + /* Firmware may report 0 for autoneg supported speeds when no + * SFP module is present. Skip the update to preserve the + * current supported speeds -- storing 0 would cause autoneg + * default fallback to advertise nothing. + */ + if (resp->supported_speeds2_auto_mode) + link_info->support_auto_speeds2 = + le16_to_cpu(resp->supported_speeds2_auto_mode); + + bd->port_count = resp->port_cnt; + +hwrm_phy_qcaps_exit: + bnge_hwrm_req_drop(bd, req); + return rc; +} + +int bnge_hwrm_set_link_setting(struct bnge_net *bn, bool set_pause) +{ + struct hwrm_port_phy_cfg_input *req; + struct bnge_dev *bd = bn->bd; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_PORT_PHY_CFG); + if (rc) + return rc; + + if (set_pause) + bnge_hwrm_set_pause_common(bn, req); + + bnge_hwrm_set_link_common(bn, req); + + rc = bnge_hwrm_req_send(bd, req); + if (!rc) + bn->eth_link_info.force_link_chng = false; + + return rc; +} + +int bnge_update_link(struct bnge_net *bn, bool chng_link_state) +{ + struct hwrm_port_phy_qcfg_output *resp; + struct hwrm_port_phy_qcfg_input *req; + struct bnge_link_info *link_info; + struct bnge_dev *bd = bn->bd; + bool support_changed; + u8 link_state; + int rc; + + link_info = &bd->link_info; + link_state = link_info->link_state; + + rc = bnge_hwrm_req_init(bd, req, HWRM_PORT_PHY_QCFG); + if (rc) + return rc; + + resp = bnge_hwrm_req_hold(bd, req); + rc = bnge_hwrm_req_send(bd, req); + if (rc) { + bnge_hwrm_req_drop(bd, req); + return rc; + } + + memcpy(&link_info->phy_qcfg_resp, resp, sizeof(*resp)); + link_info->phy_link_status = resp->link; + link_info->duplex = resp->duplex_state; + link_info->pause = resp->pause; + link_info->auto_mode = resp->auto_mode; + link_info->auto_pause_setting = resp->auto_pause; + link_info->lp_pause = resp->link_partner_adv_pause; + link_info->force_pause_setting = resp->force_pause; + link_info->duplex_setting = resp->duplex_cfg; + if (link_info->phy_link_status == BNGE_LINK_LINK) { + link_info->link_speed = le16_to_cpu(resp->link_speed); + link_info->active_lanes = resp->active_lanes; + } else { + link_info->link_speed = 0; + link_info->active_lanes = 0; + } + link_info->force_link_speed2 = le16_to_cpu(resp->force_link_speeds2); + link_info->support_speeds2 = le16_to_cpu(resp->support_speeds2); + link_info->auto_link_speeds2 = le16_to_cpu(resp->auto_link_speeds2); + link_info->lp_auto_link_speeds = + le16_to_cpu(resp->link_partner_adv_speeds); + link_info->media_type = resp->media_type; + link_info->phy_type = resp->phy_type; + link_info->phy_addr = resp->eee_config_phy_addr & + PORT_PHY_QCFG_RESP_PHY_ADDR_MASK; + link_info->module_status = resp->module_status; + + link_info->fec_cfg = le16_to_cpu(resp->fec_cfg); + link_info->active_fec_sig_mode = resp->active_fec_signal_mode; + + if (chng_link_state) { + if (link_info->phy_link_status == BNGE_LINK_LINK) + link_info->link_state = BNGE_LINK_STATE_UP; + else + link_info->link_state = BNGE_LINK_STATE_DOWN; + if (link_state != link_info->link_state) + bnge_report_link(bd); + } else { + /* always link down if not required to update link state */ + link_info->link_state = BNGE_LINK_STATE_DOWN; + } + bnge_hwrm_req_drop(bd, req); + + if (!BNGE_PHY_CFG_ABLE(bd)) + return 0; + + support_changed = bnge_support_speed_dropped(bn); + if (support_changed && (bn->eth_link_info.autoneg & BNGE_AUTONEG_SPEED)) + rc = bnge_hwrm_set_link_setting(bn, true); + return rc; +} + +int bnge_hwrm_set_pause(struct bnge_net *bn) +{ + struct bnge_ethtool_link_info *elink_info = &bn->eth_link_info; + struct hwrm_port_phy_cfg_input *req; + struct bnge_dev *bd = bn->bd; + bool pause_autoneg; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_PORT_PHY_CFG); + if (rc) + return rc; + + pause_autoneg = !!(elink_info->autoneg & BNGE_AUTONEG_FLOW_CTRL); + + /* Prepare PHY pause-advertisement or forced-pause settings. */ + bnge_hwrm_set_pause_common(bn, req); + + /* Prepare speed/autoneg settings */ + if (pause_autoneg || elink_info->force_link_chng) + bnge_hwrm_set_link_common(bn, req); + + rc = bnge_hwrm_req_send(bd, req); + if (!rc && !pause_autoneg) { + /* Since changing of pause setting, with pause autoneg off, + * doesn't trigger any link change event, the driver needs to + * update the current MAC pause upon successful return of the + * phy_cfg command. + */ + bd->link_info.force_pause_setting = + bd->link_info.pause = elink_info->req_flow_ctrl; + bd->link_info.auto_pause_setting = 0; + if (!elink_info->force_link_chng) + bnge_report_link(bd); + } + if (!rc) + elink_info->force_link_chng = false; + + return rc; +} + +int bnge_hwrm_shutdown_link(struct bnge_dev *bd) +{ + struct hwrm_port_phy_cfg_input *req; + int rc; + + if (!BNGE_PHY_CFG_ABLE(bd)) + return 0; + + rc = bnge_hwrm_req_init(bd, req, HWRM_PORT_PHY_CFG); + if (rc) + return rc; + + req->flags = cpu_to_le32(PORT_PHY_CFG_REQ_FLAGS_FORCE_LINK_DWN); + rc = bnge_hwrm_req_send(bd, req); + if (!rc) { + /* Device is not obliged to link down in certain scenarios, + * even when forced. Setting the state unknown is consistent + * with driver startup and will force link state to be + * reported during subsequent open based on PORT_PHY_QCFG. + */ + bd->link_info.link_state = BNGE_LINK_STATE_UNKNOWN; + } + return rc; +} + void bnge_hwrm_stat_ctx_free(struct bnge_net *bn) { struct hwrm_stat_ctx_free_input *req; diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.h b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.h index 38b046237feb..86ca3ac2244b 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.h +++ b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.h @@ -57,4 +57,9 @@ int hwrm_ring_alloc_send_msg(struct bnge_net *bn, int bnge_hwrm_set_async_event_cr(struct bnge_dev *bd, int idx); int bnge_hwrm_vnic_set_tpa(struct bnge_dev *bd, struct bnge_vnic_info *vnic, u32 tpa_flags); +int bnge_update_link(struct bnge_net *bn, bool chng_link_state); +int bnge_hwrm_phy_qcaps(struct bnge_dev *bd); +int bnge_hwrm_set_link_setting(struct bnge_net *bn, bool set_pause); +int bnge_hwrm_set_pause(struct bnge_net *bn); +int bnge_hwrm_shutdown_link(struct bnge_dev *bd); #endif /* _BNGE_HWRM_LIB_H_ */ diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_link.c b/drivers/net/ethernet/broadcom/bnge/bnge_link.c new file mode 100644 index 000000000000..6eeb1c521535 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnge/bnge_link.c @@ -0,0 +1,407 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2026 Broadcom. + +#include + +#include "bnge.h" +#include "bnge_link.h" +#include "bnge_hwrm_lib.h" + +static u32 bnge_fw_to_ethtool_speed(u16 fw_link_speed) +{ + switch (fw_link_speed) { + case BNGE_LINK_SPEED_50GB: + case BNGE_LINK_SPEED_50GB_PAM4: + return SPEED_50000; + case BNGE_LINK_SPEED_100GB: + case BNGE_LINK_SPEED_100GB_PAM4: + case BNGE_LINK_SPEED_100GB_PAM4_112: + return SPEED_100000; + case BNGE_LINK_SPEED_200GB: + case BNGE_LINK_SPEED_200GB_PAM4: + case BNGE_LINK_SPEED_200GB_PAM4_112: + return SPEED_200000; + case BNGE_LINK_SPEED_400GB: + case BNGE_LINK_SPEED_400GB_PAM4: + case BNGE_LINK_SPEED_400GB_PAM4_112: + return SPEED_400000; + case BNGE_LINK_SPEED_800GB: + case BNGE_LINK_SPEED_800GB_PAM4_112: + return SPEED_800000; + default: + return SPEED_UNKNOWN; + } +} + +static void bnge_set_auto_speed(struct bnge_net *bn) +{ + struct bnge_ethtool_link_info *elink_info = &bn->eth_link_info; + struct bnge_link_info *link_info; + + link_info = &bn->bd->link_info; + elink_info->advertising = link_info->auto_link_speeds2; +} + +static void bnge_set_force_speed(struct bnge_net *bn) +{ + struct bnge_ethtool_link_info *elink_info = &bn->eth_link_info; + struct bnge_link_info *link_info; + + link_info = &bn->bd->link_info; + elink_info->req_link_speed = link_info->force_link_speed2; + switch (elink_info->req_link_speed) { + case BNGE_LINK_SPEED_50GB_PAM4: + case BNGE_LINK_SPEED_100GB_PAM4: + case BNGE_LINK_SPEED_200GB_PAM4: + case BNGE_LINK_SPEED_400GB_PAM4: + elink_info->req_signal_mode = BNGE_SIG_MODE_PAM4; + break; + case BNGE_LINK_SPEED_100GB_PAM4_112: + case BNGE_LINK_SPEED_200GB_PAM4_112: + case BNGE_LINK_SPEED_400GB_PAM4_112: + case BNGE_LINK_SPEED_800GB_PAM4_112: + elink_info->req_signal_mode = BNGE_SIG_MODE_PAM4_112; + break; + default: + elink_info->req_signal_mode = BNGE_SIG_MODE_NRZ; + break; + } +} + +void bnge_init_ethtool_link_settings(struct bnge_net *bn) +{ + struct bnge_ethtool_link_info *elink_info = &bn->eth_link_info; + struct bnge_link_info *link_info; + struct bnge_dev *bd = bn->bd; + + link_info = &bd->link_info; + + if (BNGE_AUTO_MODE(link_info->auto_mode)) { + elink_info->autoneg = BNGE_AUTONEG_SPEED; + if (link_info->auto_pause_setting & + PORT_PHY_QCFG_RESP_AUTO_PAUSE_AUTONEG_PAUSE) + elink_info->autoneg |= BNGE_AUTONEG_FLOW_CTRL; + bnge_set_auto_speed(bn); + } else { + elink_info->autoneg = 0; + elink_info->advertising = 0; + bnge_set_force_speed(bn); + elink_info->req_duplex = link_info->duplex_setting; + } + if (elink_info->autoneg & BNGE_AUTONEG_FLOW_CTRL) + elink_info->req_flow_ctrl = + link_info->auto_pause_setting & BNGE_LINK_PAUSE_BOTH; + else + elink_info->req_flow_ctrl = link_info->force_pause_setting; +} + +int bnge_probe_phy(struct bnge_net *bn, bool fw_dflt) +{ + struct bnge_dev *bd = bn->bd; + int rc; + + bd->phy_flags = 0; + rc = bnge_hwrm_phy_qcaps(bd); + if (rc) { + netdev_err(bn->netdev, + "Probe PHY can't get PHY qcaps (rc: %d)\n", rc); + return rc; + } + if (bd->phy_flags & BNGE_PHY_FL_NO_FCS) + bn->netdev->priv_flags |= IFF_SUPP_NOFCS; + else + bn->netdev->priv_flags &= ~IFF_SUPP_NOFCS; + if (!fw_dflt) + return 0; + + rc = bnge_update_link(bn, false); + if (rc) { + netdev_err(bn->netdev, "Probe PHY can't update link (rc: %d)\n", + rc); + return rc; + } + bnge_init_ethtool_link_settings(bn); + + return 0; +} + +void bnge_hwrm_set_link_common(struct bnge_net *bn, + struct hwrm_port_phy_cfg_input *req) +{ + struct bnge_ethtool_link_info *elink_info = &bn->eth_link_info; + + if (elink_info->autoneg & BNGE_AUTONEG_SPEED) { + req->auto_mode |= PORT_PHY_CFG_REQ_AUTO_MODE_SPEED_MASK; + req->enables |= cpu_to_le32(BNGE_PHY_AUTO_SPEEDS2_MASK); + req->auto_link_speeds2_mask = + cpu_to_le16(elink_info->advertising); + req->enables |= cpu_to_le32(PORT_PHY_CFG_REQ_ENABLES_AUTO_MODE); + req->flags |= cpu_to_le32(BNGE_PHY_FLAGS_RESTART_AUTO); + } else { + req->flags |= cpu_to_le32(PORT_PHY_CFG_REQ_FLAGS_FORCE); + req->force_link_speeds2 = + cpu_to_le16(elink_info->req_link_speed); + req->enables |= + cpu_to_le32(BNGE_PHY_FLAGS_ENA_FORCE_SPEEDS2); + netif_info(bn, link, bn->netdev, + "Forcing FW speed2: %d\n", + (u32)elink_info->req_link_speed); + } + + /* tell FW that the setting takes effect immediately */ + req->flags |= cpu_to_le32(PORT_PHY_CFG_REQ_FLAGS_RESET_PHY); +} + +static bool bnge_auto_speed_updated(struct bnge_net *bn) +{ + struct bnge_ethtool_link_info *elink_info = &bn->eth_link_info; + struct bnge_link_info *link_info = &bn->bd->link_info; + + return elink_info->advertising != link_info->auto_link_speeds2; +} + +void bnge_hwrm_set_pause_common(struct bnge_net *bn, + struct hwrm_port_phy_cfg_input *req) +{ + if (bn->eth_link_info.autoneg & BNGE_AUTONEG_FLOW_CTRL) { + req->auto_pause = PORT_PHY_CFG_REQ_AUTO_PAUSE_AUTONEG_PAUSE; + if (bn->eth_link_info.req_flow_ctrl & BNGE_LINK_PAUSE_RX) + req->auto_pause |= PORT_PHY_CFG_REQ_AUTO_PAUSE_RX; + if (bn->eth_link_info.req_flow_ctrl & BNGE_LINK_PAUSE_TX) + req->auto_pause |= PORT_PHY_CFG_REQ_AUTO_PAUSE_TX; + req->enables |= + cpu_to_le32(PORT_PHY_CFG_REQ_ENABLES_AUTO_PAUSE); + } else { + if (bn->eth_link_info.req_flow_ctrl & BNGE_LINK_PAUSE_RX) + req->force_pause |= PORT_PHY_CFG_REQ_FORCE_PAUSE_RX; + if (bn->eth_link_info.req_flow_ctrl & BNGE_LINK_PAUSE_TX) + req->force_pause |= PORT_PHY_CFG_REQ_FORCE_PAUSE_TX; + req->enables |= + cpu_to_le32(PORT_PHY_CFG_REQ_ENABLES_FORCE_PAUSE); + req->auto_pause = req->force_pause; + req->enables |= + cpu_to_le32(PORT_PHY_CFG_REQ_ENABLES_AUTO_PAUSE); + } +} + +static bool bnge_force_speed_updated(struct bnge_net *bn) +{ + struct bnge_ethtool_link_info *elink_info = &bn->eth_link_info; + struct bnge_link_info *link_info = &bn->bd->link_info; + + return elink_info->req_link_speed != link_info->force_link_speed2; +} + +int bnge_update_phy_setting(struct bnge_net *bn) +{ + struct bnge_ethtool_link_info *elink_info; + struct bnge_link_info *link_info; + struct bnge_dev *bd = bn->bd; + bool update_pause = false; + bool update_link = false; + bool hw_pause_autoneg; + bool pause_autoneg; + int rc; + + link_info = &bd->link_info; + elink_info = &bn->eth_link_info; + rc = bnge_update_link(bn, true); + if (rc) { + netdev_err(bn->netdev, "failed to update link (rc: %d)\n", + rc); + return rc; + } + + pause_autoneg = !!(elink_info->autoneg & BNGE_AUTONEG_FLOW_CTRL); + hw_pause_autoneg = !!(link_info->auto_pause_setting & + PORT_PHY_QCFG_RESP_AUTO_PAUSE_AUTONEG_PAUSE); + + /* Check if pause autonegotiation state has changed */ + if (pause_autoneg != hw_pause_autoneg) { + update_pause = true; + } else if (pause_autoneg) { + /* If pause autoneg is enabled, check if the + * requested RX/TX bits changed + */ + if ((link_info->auto_pause_setting & BNGE_LINK_PAUSE_BOTH) != + elink_info->req_flow_ctrl) + update_pause = true; + } else { + /* If pause autoneg is disabled, check if the + * forced RX/TX bits changed + */ + if (link_info->force_pause_setting != elink_info->req_flow_ctrl) + update_pause = true; + } + + /* Force update if link change is requested */ + if (elink_info->force_link_chng) + update_pause = true; + + /* Check if link speed or duplex settings have changed */ + if (!(elink_info->autoneg & BNGE_AUTONEG_SPEED)) { + if (BNGE_AUTO_MODE(link_info->auto_mode) || + bnge_force_speed_updated(bn) || + elink_info->req_duplex != link_info->duplex_setting) + update_link = true; + } else { + if (link_info->auto_mode == BNGE_LINK_AUTO_NONE || + bnge_auto_speed_updated(bn)) + update_link = true; + } + + /* The last close may have shut down the link, so need to call + * PHY_CFG to bring it back up. + */ + if (!BNGE_LINK_IS_UP(bd)) + update_link = true; + + if (update_link) + rc = bnge_hwrm_set_link_setting(bn, update_pause); + else if (update_pause) + rc = bnge_hwrm_set_pause(bn); + + if (rc) { + netdev_err(bn->netdev, + "failed to update PHY setting (rc: %d)\n", rc); + return rc; + } + + return 0; +} + +void bnge_get_port_module_status(struct bnge_net *bn) +{ + struct hwrm_port_phy_qcfg_output *resp; + struct bnge_link_info *link_info; + struct bnge_dev *bd = bn->bd; + u8 module_status; + + link_info = &bd->link_info; + resp = &link_info->phy_qcfg_resp; + + if (bnge_update_link(bn, true)) + return; + + module_status = link_info->module_status; + switch (module_status) { + case PORT_PHY_QCFG_RESP_MODULE_STATUS_DISABLETX: + case PORT_PHY_QCFG_RESP_MODULE_STATUS_PWRDOWN: + case PORT_PHY_QCFG_RESP_MODULE_STATUS_WARNINGMSG: + netdev_warn(bn->netdev, + "Unqualified SFP+ module detected on port %d\n", + bd->pf.port_id); + netdev_warn(bn->netdev, "Module part number %.*s\n", + (int)sizeof(resp->phy_vendor_partnumber), + resp->phy_vendor_partnumber); + if (module_status == PORT_PHY_QCFG_RESP_MODULE_STATUS_DISABLETX) + netdev_warn(bn->netdev, "TX is disabled\n"); + if (module_status == PORT_PHY_QCFG_RESP_MODULE_STATUS_PWRDOWN) + netdev_warn(bn->netdev, "SFP+ module is shut down\n"); + break; + } +} + +static bool bnge_support_dropped(u16 advertising, u16 supported) +{ + return (advertising & ~supported) != 0; +} + +bool bnge_support_speed_dropped(struct bnge_net *bn) +{ + struct bnge_ethtool_link_info *elink_info = &bn->eth_link_info; + struct bnge_link_info *link_info = &bn->bd->link_info; + + /* Check if any advertised speeds are no longer supported. The caller + * holds the netdev instance lock, so we can modify link_info settings. + */ + if (bnge_support_dropped(elink_info->advertising, + link_info->support_auto_speeds2)) { + elink_info->advertising = link_info->support_auto_speeds2; + return true; + } + return false; +} + +static char *bnge_report_fec(struct bnge_link_info *link_info) +{ + u8 active_fec = link_info->active_fec_sig_mode & + PORT_PHY_QCFG_RESP_ACTIVE_FEC_MASK; + + switch (active_fec) { + default: + case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_NONE_ACTIVE: + return "None"; + case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_CLAUSE74_ACTIVE: + return "Clause 74 BaseR"; + case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_CLAUSE91_ACTIVE: + return "Clause 91 RS(528,514)"; + case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS544_1XN_ACTIVE: + return "Clause 91 RS544_1XN"; + case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS544_IEEE_ACTIVE: + return "Clause 91 RS(544,514)"; + case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS272_1XN_ACTIVE: + return "Clause 91 RS272_1XN"; + case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS272_IEEE_ACTIVE: + return "Clause 91 RS(272,257)"; + } +} + +void bnge_report_link(struct bnge_dev *bd) +{ + if (BNGE_LINK_IS_UP(bd)) { + const char *signal = ""; + const char *flow_ctrl; + const char *duplex; + u32 speed; + u16 fec; + + netif_carrier_on(bd->netdev); + speed = bnge_fw_to_ethtool_speed(bd->link_info.link_speed); + if (speed == SPEED_UNKNOWN) { + netdev_info(bd->netdev, + "NIC Link is Up, speed unknown\n"); + return; + } + if (bd->link_info.duplex == BNGE_LINK_DUPLEX_FULL) + duplex = "full"; + else + duplex = "half"; + if (bd->link_info.pause == BNGE_LINK_PAUSE_BOTH) + flow_ctrl = "ON - receive & transmit"; + else if (bd->link_info.pause == BNGE_LINK_PAUSE_TX) + flow_ctrl = "ON - transmit"; + else if (bd->link_info.pause == BNGE_LINK_PAUSE_RX) + flow_ctrl = "ON - receive"; + else + flow_ctrl = "none"; + if (bd->link_info.phy_qcfg_resp.option_flags & + PORT_PHY_QCFG_RESP_OPTION_FLAGS_SIGNAL_MODE_KNOWN) { + u8 sig_mode = bd->link_info.active_fec_sig_mode & + PORT_PHY_QCFG_RESP_SIGNAL_MODE_MASK; + switch (sig_mode) { + case PORT_PHY_QCFG_RESP_SIGNAL_MODE_NRZ: + signal = "(NRZ) "; + break; + case PORT_PHY_QCFG_RESP_SIGNAL_MODE_PAM4: + signal = "(PAM4 56Gbps) "; + break; + case PORT_PHY_QCFG_RESP_SIGNAL_MODE_PAM4_112: + signal = "(PAM4 112Gbps) "; + break; + default: + break; + } + } + netdev_info(bd->netdev, "NIC Link is Up, %u Mbps %s%s duplex, Flow control: %s\n", + speed, signal, duplex, flow_ctrl); + fec = bd->link_info.fec_cfg; + if (!(fec & PORT_PHY_QCFG_RESP_FEC_CFG_FEC_NONE_SUPPORTED)) + netdev_info(bd->netdev, "FEC autoneg %s encoding: %s\n", + (fec & BNGE_FEC_AUTONEG) ? "on" : "off", + bnge_report_fec(&bd->link_info)); + } else { + netif_carrier_off(bd->netdev); + netdev_info(bd->netdev, "NIC Link is Down\n"); + } +} diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_link.h b/drivers/net/ethernet/broadcom/bnge/bnge_link.h new file mode 100644 index 000000000000..68eaa229fffe --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnge/bnge_link.h @@ -0,0 +1,165 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2026 Broadcom */ + +#ifndef _BNGE_LINK_H_ +#define _BNGE_LINK_H_ + +#define BNGE_PHY_CFG_ABLE(bd) \ + ((bd)->link_info.phy_enabled) + +#define BNGE_PHY_AUTO_SPEEDS2_MASK \ + PORT_PHY_CFG_REQ_ENABLES_AUTO_LINK_SPEEDS2_MASK +#define BNGE_PHY_FLAGS_RESTART_AUTO \ + PORT_PHY_CFG_REQ_FLAGS_RESTART_AUTONEG +#define BNGE_PHY_FLAGS_ENA_FORCE_SPEEDS2 \ + PORT_PHY_CFG_REQ_ENABLES_FORCE_LINK_SPEEDS2 + +#define BNGE_LINK_LINK PORT_PHY_QCFG_RESP_LINK_LINK + +enum bnge_link_state { + BNGE_LINK_STATE_UNKNOWN, + BNGE_LINK_STATE_DOWN, + BNGE_LINK_STATE_UP, +}; + +#define BNGE_LINK_IS_UP(bd) \ + ((bd)->link_info.link_state == BNGE_LINK_STATE_UP) + +#define BNGE_LINK_DUPLEX_FULL PORT_PHY_QCFG_RESP_DUPLEX_STATE_FULL + +#define BNGE_LINK_PAUSE_TX PORT_PHY_QCFG_RESP_PAUSE_TX +#define BNGE_LINK_PAUSE_RX PORT_PHY_QCFG_RESP_PAUSE_RX +#define BNGE_LINK_PAUSE_BOTH (PORT_PHY_QCFG_RESP_PAUSE_RX | \ + PORT_PHY_QCFG_RESP_PAUSE_TX) + +#define BNGE_LINK_AUTO_NONE PORT_PHY_QCFG_RESP_AUTO_MODE_NONE +#define BNGE_LINK_AUTO_MSK PORT_PHY_QCFG_RESP_AUTO_MODE_SPEED_MASK +#define BNGE_AUTO_MODE(mode) ((mode) > BNGE_LINK_AUTO_NONE && \ + (mode) <= BNGE_LINK_AUTO_MSK) + +#define BNGE_LINK_SPEED_50GB PORT_PHY_QCFG_RESP_LINK_SPEED_50GB +#define BNGE_LINK_SPEED_100GB PORT_PHY_QCFG_RESP_LINK_SPEED_100GB +#define BNGE_LINK_SPEED_200GB PORT_PHY_QCFG_RESP_LINK_SPEED_200GB +#define BNGE_LINK_SPEED_400GB PORT_PHY_QCFG_RESP_LINK_SPEED_400GB +#define BNGE_LINK_SPEED_800GB PORT_PHY_QCFG_RESP_LINK_SPEED_800GB + +#define BNGE_LINK_SPEEDS2_MSK_50GB PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_50GB +#define BNGE_LINK_SPEEDS2_MSK_100GB PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_100GB +#define BNGE_LINK_SPEEDS2_MSK_50GB_PAM4 \ + PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_50GB_PAM4_56 +#define BNGE_LINK_SPEEDS2_MSK_100GB_PAM4 \ + PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_100GB_PAM4_56 +#define BNGE_LINK_SPEEDS2_MSK_200GB_PAM4 \ + PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_200GB_PAM4_56 +#define BNGE_LINK_SPEEDS2_MSK_400GB_PAM4 \ + PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_400GB_PAM4_56 +#define BNGE_LINK_SPEEDS2_MSK_100GB_PAM4_112 \ + PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_100GB_PAM4_112 +#define BNGE_LINK_SPEEDS2_MSK_200GB_PAM4_112 \ + PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_200GB_PAM4_112 +#define BNGE_LINK_SPEEDS2_MSK_400GB_PAM4_112 \ + PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_400GB_PAM4_112 +#define BNGE_LINK_SPEEDS2_MSK_800GB_PAM4_112 \ + PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_800GB_PAM4_112 + +#define BNGE_LINK_SPEED_50GB_PAM4 \ + PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_50GB_PAM4_56 +#define BNGE_LINK_SPEED_100GB_PAM4 \ + PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_100GB_PAM4_56 +#define BNGE_LINK_SPEED_200GB_PAM4 \ + PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_200GB_PAM4_56 +#define BNGE_LINK_SPEED_400GB_PAM4 \ + PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_400GB_PAM4_56 +#define BNGE_LINK_SPEED_100GB_PAM4_112 \ + PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_100GB_PAM4_112 +#define BNGE_LINK_SPEED_200GB_PAM4_112 \ + PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_200GB_PAM4_112 +#define BNGE_LINK_SPEED_400GB_PAM4_112 \ + PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_400GB_PAM4_112 +#define BNGE_LINK_SPEED_800GB_PAM4_112 \ + PORT_PHY_CFG_REQ_FORCE_LINK_SPEEDS2_800GB_PAM4_112 + +#define BNGE_FEC_NONE PORT_PHY_QCFG_RESP_FEC_CFG_FEC_NONE_SUPPORTED +#define BNGE_FEC_AUTONEG PORT_PHY_QCFG_RESP_FEC_CFG_FEC_AUTONEG_ENABLED +#define BNGE_FEC_ENC_BASE_R_CAP \ + PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE74_SUPPORTED +#define BNGE_FEC_ENC_BASE_R PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE74_ENABLED +#define BNGE_FEC_ENC_RS_CAP \ + PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_SUPPORTED +#define BNGE_FEC_ENC_LLRS_CAP \ + (PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS272_1XN_SUPPORTED | \ + PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS272_IEEE_SUPPORTED) +#define BNGE_FEC_ENC_RS \ + (PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_ENABLED | \ + PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS544_1XN_ENABLED | \ + PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS544_IEEE_ENABLED) +#define BNGE_FEC_ENC_LLRS \ + (PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS272_1XN_ENABLED | \ + PORT_PHY_QCFG_RESP_FEC_CFG_FEC_RS272_IEEE_ENABLED) + +struct bnge_link_info { + u8 phy_type; + u8 media_type; + u8 phy_addr; + u8 phy_link_status; + bool phy_enabled; + + u8 link_state; + u8 active_lanes; + u8 duplex; + u8 pause; + u8 lp_pause; + u8 auto_pause_setting; + u8 force_pause_setting; + u8 duplex_setting; + u8 auto_mode; + u16 link_speed; + u16 support_speeds2; + u16 auto_link_speeds2; + u16 support_auto_speeds2; + u16 lp_auto_link_speeds; + u16 force_link_speed2; + + u8 module_status; + u8 active_fec_sig_mode; + u16 fec_cfg; + + /* A copy of phy_qcfg output used to report link + * info to VF + */ + struct hwrm_port_phy_qcfg_output phy_qcfg_resp; + + bool phy_retry; + unsigned long phy_retry_expires; +}; + +#define BNGE_AUTONEG_SPEED 1 +#define BNGE_AUTONEG_FLOW_CTRL 2 + +#define BNGE_SIG_MODE_NRZ PORT_PHY_QCFG_RESP_SIGNAL_MODE_NRZ +#define BNGE_SIG_MODE_PAM4 PORT_PHY_QCFG_RESP_SIGNAL_MODE_PAM4 +#define BNGE_SIG_MODE_PAM4_112 PORT_PHY_QCFG_RESP_SIGNAL_MODE_PAM4_112 +#define BNGE_SIG_MODE_MAX (PORT_PHY_QCFG_RESP_SIGNAL_MODE_LAST + 1) + +struct bnge_ethtool_link_info { + /* copy of requested setting from ethtool cmd */ + u8 autoneg; + u8 req_signal_mode; + u8 req_duplex; + u8 req_flow_ctrl; + u16 req_link_speed; + u16 advertising; /* user adv setting */ + bool force_link_chng; +}; + +void bnge_hwrm_set_link_common(struct bnge_net *bn, + struct hwrm_port_phy_cfg_input *req); +void bnge_hwrm_set_pause_common(struct bnge_net *bn, + struct hwrm_port_phy_cfg_input *req); +int bnge_update_phy_setting(struct bnge_net *bn); +void bnge_get_port_module_status(struct bnge_net *bn); +void bnge_report_link(struct bnge_dev *bd); +bool bnge_support_speed_dropped(struct bnge_net *bn); +void bnge_init_ethtool_link_settings(struct bnge_net *bn); +int bnge_probe_phy(struct bnge_net *bn, bool fw_dflt); +#endif /* _BNGE_LINK_H_ */ diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c index 8d5d828e19a3..185f5bfce1d5 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c @@ -101,6 +101,17 @@ err_free_ring_stats: return rc; } +void __bnge_queue_sp_work(struct bnge_net *bn) +{ + queue_work(bn->bnge_pf_wq, &bn->sp_task); +} + +static void bnge_queue_sp_work(struct bnge_net *bn, unsigned int event) +{ + set_bit(event, &bn->sp_event); + __bnge_queue_sp_work(bn); +} + static void bnge_timer(struct timer_list *t) { struct bnge_net *bn = timer_container_of(bn, t, timer); @@ -110,7 +121,14 @@ static void bnge_timer(struct timer_list *t) !test_bit(BNGE_STATE_OPEN, &bd->state)) return; - /* Periodic work added by later patches */ + if (READ_ONCE(bd->link_info.phy_retry)) { + if (time_after(jiffies, READ_ONCE(bd->link_info.phy_retry_expires))) { + WRITE_ONCE(bd->link_info.phy_retry, false); + netdev_warn(bn->netdev, "failed to update PHY settings after maximum retries.\n"); + } else { + bnge_queue_sp_work(bn, BNGE_UPDATE_PHY_SP_EVENT); + } + } mod_timer(&bn->timer, jiffies + bn->current_interval); } @@ -126,7 +144,17 @@ static void bnge_sp_task(struct work_struct *work) return; } - /* Event handling work added by later patches */ + if (test_and_clear_bit(BNGE_UPDATE_PHY_SP_EVENT, &bn->sp_event)) { + int rc; + + rc = bnge_update_phy_setting(bn); + if (rc) { + netdev_warn(bn->netdev, "update PHY settings retry failed\n"); + } else { + WRITE_ONCE(bd->link_info.phy_retry, false); + netdev_info(bn->netdev, "update PHY settings retry succeeded\n"); + } + } netdev_unlock(bn->netdev); } @@ -2496,6 +2524,8 @@ static void bnge_tx_enable(struct bnge_net *bn) /* Make sure napi polls see @dev_state change */ synchronize_net(); netif_tx_wake_all_queues(bn->netdev); + if (BNGE_LINK_IS_UP(bn->bd)) + netif_carrier_on(bn->netdev); } static int bnge_open_core(struct bnge_net *bn) @@ -2532,6 +2562,16 @@ static int bnge_open_core(struct bnge_net *bn) bnge_enable_napi(bn); + rc = bnge_update_phy_setting(bn); + if (rc) { + netdev_warn(bn->netdev, "failed to update PHY settings (rc: %d)\n", + rc); + WRITE_ONCE(bd->link_info.phy_retry_expires, jiffies + 5 * HZ); + WRITE_ONCE(bd->link_info.phy_retry, true); + } else { + WRITE_ONCE(bd->link_info.phy_retry, false); + } + set_bit(BNGE_STATE_OPEN, &bd->state); bnge_enable_int(bn); @@ -2540,6 +2580,9 @@ static int bnge_open_core(struct bnge_net *bn) mod_timer(&bn->timer, jiffies + bn->current_interval); + /* Poll link status and check for SFP+ module status */ + bnge_get_port_module_status(bn); + return 0; err_free_irq: @@ -2591,6 +2634,8 @@ static int bnge_close(struct net_device *dev) struct bnge_net *bn = netdev_priv(dev); bnge_close_core(bn); + bnge_hwrm_shutdown_link(bn->bd); + bn->sp_event = 0; return 0; } @@ -2832,6 +2877,10 @@ int bnge_netdev_alloc(struct bnge_dev *bd, int max_irqs) bnge_init_l2_fltr_tbl(bn); bnge_init_mac_addr(bd); + rc = bnge_probe_phy(bn, true); + if (rc) + goto err_free_workq; + netdev->request_ops_lock = true; rc = register_netdev(netdev); if (rc) { @@ -2859,6 +2908,7 @@ void bnge_netdev_free(struct bnge_dev *bd) timer_shutdown_sync(&bn->timer); cancel_work_sync(&bn->sp_task); + bn->sp_event = 0; destroy_workqueue(bn->bnge_pf_wq); free_netdev(netdev); diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h index d2ccee725454..5636eb371e24 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h +++ b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h @@ -9,6 +9,7 @@ #include #include "bnge_db.h" #include "bnge_hw_def.h" +#include "bnge_link.h" struct tx_bd { __le32 tx_bd_len_flags_type; @@ -230,6 +231,13 @@ enum bnge_net_state { #define BNGE_TIMER_INTERVAL HZ +enum bnge_sp_event { + BNGE_LINK_CHNG_SP_EVENT, + BNGE_LINK_SPEED_CHNG_SP_EVENT, + BNGE_LINK_CFG_CHANGE_SP_EVENT, + BNGE_UPDATE_PHY_SP_EVENT, +}; + struct bnge_net { struct bnge_dev *bd; struct net_device *netdev; @@ -298,6 +306,9 @@ struct bnge_net { struct timer_list timer; struct workqueue_struct *bnge_pf_wq; struct work_struct sp_task; + unsigned long sp_event; + + struct bnge_ethtool_link_info eth_link_info; }; #define BNGE_DEFAULT_RX_RING_SIZE 511 @@ -576,4 +587,5 @@ u8 *__bnge_alloc_rx_frag(struct bnge_net *bn, dma_addr_t *mapping, struct bnge_rx_ring_info *rxr, gfp_t gfp); int bnge_alloc_rx_netmem(struct bnge_net *bn, struct bnge_rx_ring_info *rxr, u16 prod, gfp_t gfp); +void __bnge_queue_sp_work(struct bnge_net *bn); #endif /* _BNGE_NETDEV_H_ */ From 169f6e8dd14957bbb66bf5bab40bf251a3950fc2 Mon Sep 17 00:00:00 2001 From: Bhargava Marreddy Date: Mon, 6 Apr 2026 23:34:13 +0530 Subject: [PATCH 1424/1561] bng_en: add ethtool link settings, get_link, and nway_reset Add get/set_link_ksettings, get_link, and nway_reset support. Report supported, advertised, and link-partner speeds across NRZ, PAM4, and PAM4-112 signaling modes. Enable lane count reporting. Signed-off-by: Bhargava Marreddy Reviewed-by: Vikas Gupta Reviewed-by: Rajashekar Hudumula Reviewed-by: Ajit Kumar Khaparde Signed-off-by: Vikas Gupta Link: https://patch.msgid.link/20260406180420.279470-4-bhargava.marreddy@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnge/bnge.h | 2 + .../net/ethernet/broadcom/bnge/bnge_core.c | 1 + .../net/ethernet/broadcom/bnge/bnge_ethtool.c | 28 + .../net/ethernet/broadcom/bnge/bnge_link.c | 707 ++++++++++++++++++ .../net/ethernet/broadcom/bnge/bnge_link.h | 7 + 5 files changed, 745 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnge/bnge.h b/drivers/net/ethernet/broadcom/bnge/bnge.h index eddc5a4a5148..f21cff651fd4 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge.h +++ b/drivers/net/ethernet/broadcom/bnge/bnge.h @@ -96,6 +96,8 @@ struct bnge_queue_info { #define BNGE_PHY_FLAGS2_SHIFT 8 #define BNGE_PHY_FL_NO_FCS PORT_PHY_QCAPS_RESP_FLAGS_NO_FCS +#define BNGE_PHY_FL_NO_PAUSE \ + (PORT_PHY_QCAPS_RESP_FLAGS2_PAUSE_UNSUPPORTED << 8) struct bnge_dev { struct device *dev; diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_core.c b/drivers/net/ethernet/broadcom/bnge/bnge_core.c index b4090283df0f..1c14c5fe8d61 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_core.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_core.c @@ -10,6 +10,7 @@ #include "bnge_devlink.h" #include "bnge_hwrm.h" #include "bnge_hwrm_lib.h" +#include "bnge_link.h" MODULE_LICENSE("GPL"); MODULE_DESCRIPTION(DRV_SUMMARY); diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_ethtool.c b/drivers/net/ethernet/broadcom/bnge/bnge_ethtool.c index 569371c1b4f2..6bb74a84ea03 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_ethtool.c @@ -11,6 +11,29 @@ #include "bnge.h" #include "bnge_ethtool.h" +#include "bnge_hwrm_lib.h" + +static int bnge_nway_reset(struct net_device *dev) +{ + struct bnge_net *bn = netdev_priv(dev); + struct bnge_dev *bd = bn->bd; + bool set_pause = false; + int rc = 0; + + if (!BNGE_PHY_CFG_ABLE(bd)) + return -EOPNOTSUPP; + + if (!(bn->eth_link_info.autoneg & BNGE_AUTONEG_SPEED)) + return -EINVAL; + + if (!(bd->phy_flags & BNGE_PHY_FL_NO_PAUSE)) + set_pause = true; + + if (netif_running(dev)) + rc = bnge_hwrm_set_link_setting(bn, set_pause); + + return rc; +} static void bnge_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) @@ -24,7 +47,12 @@ static void bnge_get_drvinfo(struct net_device *dev, } static const struct ethtool_ops bnge_ethtool_ops = { + .cap_link_lanes_supported = 1, + .get_link_ksettings = bnge_get_link_ksettings, + .set_link_ksettings = bnge_set_link_ksettings, .get_drvinfo = bnge_get_drvinfo, + .get_link = bnge_get_link, + .nway_reset = bnge_nway_reset, }; void bnge_set_ethtool_ops(struct net_device *dev) diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_link.c b/drivers/net/ethernet/broadcom/bnge/bnge_link.c index 6eeb1c521535..2e2aaa764443 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_link.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_link.c @@ -7,6 +7,51 @@ #include "bnge_link.h" #include "bnge_hwrm_lib.h" +enum bnge_media_type { + BNGE_MEDIA_UNKNOWN = 0, + BNGE_MEDIA_CR, + BNGE_MEDIA_SR, + BNGE_MEDIA_LR_ER_FR, + BNGE_MEDIA_KR, + __BNGE_MEDIA_END, +}; + +static const enum bnge_media_type bnge_phy_types[] = { + [PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASECR4] = BNGE_MEDIA_CR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASESR4] = BNGE_MEDIA_SR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASELR4] = BNGE_MEDIA_LR_ER_FR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASEER4] = BNGE_MEDIA_LR_ER_FR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASESR10] = BNGE_MEDIA_SR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASECR4] = BNGE_MEDIA_CR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASESR4] = BNGE_MEDIA_SR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASELR4] = BNGE_MEDIA_LR_ER_FR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASEER4] = BNGE_MEDIA_LR_ER_FR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_50G_BASECR] = BNGE_MEDIA_CR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_50G_BASESR] = BNGE_MEDIA_SR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_50G_BASELR] = BNGE_MEDIA_LR_ER_FR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_50G_BASEER] = BNGE_MEDIA_LR_ER_FR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASECR2] = BNGE_MEDIA_CR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASESR2] = BNGE_MEDIA_SR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASELR2] = BNGE_MEDIA_LR_ER_FR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASEER2] = BNGE_MEDIA_LR_ER_FR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASECR] = BNGE_MEDIA_CR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASESR] = BNGE_MEDIA_SR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASELR] = BNGE_MEDIA_LR_ER_FR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_100G_BASEER] = BNGE_MEDIA_LR_ER_FR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASECR2] = BNGE_MEDIA_CR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASESR2] = BNGE_MEDIA_SR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASELR2] = BNGE_MEDIA_LR_ER_FR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_200G_BASEER2] = BNGE_MEDIA_LR_ER_FR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASECR8] = BNGE_MEDIA_CR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASESR8] = BNGE_MEDIA_SR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASELR8] = BNGE_MEDIA_LR_ER_FR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASEER8] = BNGE_MEDIA_LR_ER_FR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASECR4] = BNGE_MEDIA_CR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASESR4] = BNGE_MEDIA_SR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASELR4] = BNGE_MEDIA_LR_ER_FR, + [PORT_PHY_QCFG_RESP_PHY_TYPE_400G_BASEER4] = BNGE_MEDIA_LR_ER_FR, +}; + static u32 bnge_fw_to_ethtool_speed(u16 fw_link_speed) { switch (fw_link_speed) { @@ -302,6 +347,14 @@ void bnge_get_port_module_status(struct bnge_net *bn) } } +static void bnge_set_default_adv_speeds(struct bnge_net *bn) +{ + struct bnge_ethtool_link_info *elink_info = &bn->eth_link_info; + struct bnge_link_info *link_info = &bn->bd->link_info; + + elink_info->advertising = link_info->support_auto_speeds2; +} + static bool bnge_support_dropped(u16 advertising, u16 supported) { return (advertising & ~supported) != 0; @@ -405,3 +458,657 @@ void bnge_report_link(struct bnge_dev *bd) netdev_info(bd->netdev, "NIC Link is Down\n"); } } + +static void bnge_get_ethtool_modes(struct bnge_net *bn, + struct ethtool_link_ksettings *lk_ksettings) +{ + struct bnge_ethtool_link_info *elink_info; + struct bnge_link_info *link_info; + struct bnge_dev *bd = bn->bd; + + elink_info = &bn->eth_link_info; + link_info = &bd->link_info; + + if (!(bd->phy_flags & BNGE_PHY_FL_NO_PAUSE)) { + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, + lk_ksettings->link_modes.supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + lk_ksettings->link_modes.supported); + } + + if (link_info->support_auto_speeds2) + linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + lk_ksettings->link_modes.supported); + + if (~elink_info->autoneg & BNGE_AUTONEG_FLOW_CTRL) + return; + + if (link_info->auto_pause_setting & BNGE_LINK_PAUSE_RX) + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, + lk_ksettings->link_modes.advertising); + if (hweight8(link_info->auto_pause_setting & BNGE_LINK_PAUSE_BOTH) == 1) + linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + lk_ksettings->link_modes.advertising); + if (link_info->lp_pause & BNGE_LINK_PAUSE_RX) + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, + lk_ksettings->link_modes.lp_advertising); + if (hweight8(link_info->lp_pause & BNGE_LINK_PAUSE_BOTH) == 1) + linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + lk_ksettings->link_modes.lp_advertising); +} + +u32 bnge_get_link(struct net_device *dev) +{ + struct bnge_net *bn = netdev_priv(dev); + + return BNGE_LINK_IS_UP(bn->bd); +} + +static enum bnge_media_type +bnge_get_media(struct bnge_link_info *link_info) +{ + switch (link_info->media_type) { + case PORT_PHY_QCFG_RESP_MEDIA_TYPE_DAC: + return BNGE_MEDIA_CR; + default: + if (link_info->phy_type < ARRAY_SIZE(bnge_phy_types)) + return bnge_phy_types[link_info->phy_type]; + return BNGE_MEDIA_UNKNOWN; + } +} + +enum bnge_link_speed_indices { + BNGE_LINK_SPEED_UNKNOWN = 0, + BNGE_LINK_SPEED_50GB_IDX, + BNGE_LINK_SPEED_100GB_IDX, + BNGE_LINK_SPEED_200GB_IDX, + BNGE_LINK_SPEED_400GB_IDX, + BNGE_LINK_SPEED_800GB_IDX, + __BNGE_LINK_SPEED_END +}; + +static enum bnge_link_speed_indices bnge_fw_speed_idx(u16 speed) +{ + switch (speed) { + case BNGE_LINK_SPEED_50GB: + case BNGE_LINK_SPEED_50GB_PAM4: + return BNGE_LINK_SPEED_50GB_IDX; + case BNGE_LINK_SPEED_100GB: + case BNGE_LINK_SPEED_100GB_PAM4: + case BNGE_LINK_SPEED_100GB_PAM4_112: + return BNGE_LINK_SPEED_100GB_IDX; + case BNGE_LINK_SPEED_200GB: + case BNGE_LINK_SPEED_200GB_PAM4: + case BNGE_LINK_SPEED_200GB_PAM4_112: + return BNGE_LINK_SPEED_200GB_IDX; + case BNGE_LINK_SPEED_400GB: + case BNGE_LINK_SPEED_400GB_PAM4: + case BNGE_LINK_SPEED_400GB_PAM4_112: + return BNGE_LINK_SPEED_400GB_IDX; + case BNGE_LINK_SPEED_800GB: + case BNGE_LINK_SPEED_800GB_PAM4_112: + return BNGE_LINK_SPEED_800GB_IDX; + default: + return BNGE_LINK_SPEED_UNKNOWN; + } +} + +/* Compile-time link mode mapping table. + * Indexed by [speed_idx][sig_mode][media]. + */ +#define BNGE_LINK_M(speed, sig, media, lm) \ + [BNGE_LINK_SPEED_##speed##_IDX] \ + [BNGE_SIG_MODE_##sig] \ + [BNGE_MEDIA_##media] = ETHTOOL_LINK_MODE_##lm##_Full_BIT + +static const enum ethtool_link_mode_bit_indices +bnge_link_modes[__BNGE_LINK_SPEED_END] + [BNGE_SIG_MODE_MAX] + [__BNGE_MEDIA_END] = { + /* 50GB PAM4 */ + BNGE_LINK_M(50GB, PAM4, CR, 50000baseCR), + BNGE_LINK_M(50GB, PAM4, SR, 50000baseSR), + BNGE_LINK_M(50GB, PAM4, LR_ER_FR, 50000baseLR_ER_FR), + BNGE_LINK_M(50GB, PAM4, KR, 50000baseKR), + + /* 100GB NRZ */ + BNGE_LINK_M(100GB, NRZ, CR, 100000baseCR4), + BNGE_LINK_M(100GB, NRZ, SR, 100000baseSR4), + BNGE_LINK_M(100GB, NRZ, LR_ER_FR, 100000baseLR4_ER4), + BNGE_LINK_M(100GB, NRZ, KR, 100000baseKR4), + + /* 100GB PAM4 */ + BNGE_LINK_M(100GB, PAM4, CR, 100000baseCR2), + BNGE_LINK_M(100GB, PAM4, SR, 100000baseSR2), + BNGE_LINK_M(100GB, PAM4, LR_ER_FR, 100000baseLR2_ER2_FR2), + BNGE_LINK_M(100GB, PAM4, KR, 100000baseKR2), + + /* 100GB PAM4_112 */ + BNGE_LINK_M(100GB, PAM4_112, CR, 100000baseCR), + BNGE_LINK_M(100GB, PAM4_112, SR, 100000baseSR), + BNGE_LINK_M(100GB, PAM4_112, LR_ER_FR, 100000baseLR_ER_FR), + BNGE_LINK_M(100GB, PAM4_112, KR, 100000baseKR), + + /* 200GB PAM4 */ + BNGE_LINK_M(200GB, PAM4, CR, 200000baseCR4), + BNGE_LINK_M(200GB, PAM4, SR, 200000baseSR4), + BNGE_LINK_M(200GB, PAM4, LR_ER_FR, 200000baseLR4_ER4_FR4), + BNGE_LINK_M(200GB, PAM4, KR, 200000baseKR4), + + /* 200GB PAM4_112 */ + BNGE_LINK_M(200GB, PAM4_112, CR, 200000baseCR2), + BNGE_LINK_M(200GB, PAM4_112, SR, 200000baseSR2), + BNGE_LINK_M(200GB, PAM4_112, LR_ER_FR, 200000baseLR2_ER2_FR2), + BNGE_LINK_M(200GB, PAM4_112, KR, 200000baseKR2), + + /* 400GB PAM4 */ + BNGE_LINK_M(400GB, PAM4, CR, 400000baseCR8), + BNGE_LINK_M(400GB, PAM4, SR, 400000baseSR8), + BNGE_LINK_M(400GB, PAM4, LR_ER_FR, 400000baseLR8_ER8_FR8), + BNGE_LINK_M(400GB, PAM4, KR, 400000baseKR8), + + /* 400GB PAM4_112 */ + BNGE_LINK_M(400GB, PAM4_112, CR, 400000baseCR4), + BNGE_LINK_M(400GB, PAM4_112, SR, 400000baseSR4), + BNGE_LINK_M(400GB, PAM4_112, LR_ER_FR, 400000baseLR4_ER4_FR4), + BNGE_LINK_M(400GB, PAM4_112, KR, 400000baseKR4), + + /* 800GB PAM4_112 */ + BNGE_LINK_M(800GB, PAM4_112, CR, 800000baseCR8), + BNGE_LINK_M(800GB, PAM4_112, SR, 800000baseSR8), + BNGE_LINK_M(800GB, PAM4_112, KR, 800000baseKR8), +}; + +#define BNGE_LINK_MODE_UNKNOWN -1 + +static enum ethtool_link_mode_bit_indices +bnge_get_link_mode(struct bnge_net *bn) +{ + enum ethtool_link_mode_bit_indices link_mode; + struct bnge_ethtool_link_info *elink_info; + enum bnge_link_speed_indices speed; + struct bnge_link_info *link_info; + struct bnge_dev *bd = bn->bd; + enum bnge_media_type media; + u8 sig_mode; + + elink_info = &bn->eth_link_info; + link_info = &bd->link_info; + + if (link_info->phy_link_status != BNGE_LINK_LINK) + return BNGE_LINK_MODE_UNKNOWN; + + media = bnge_get_media(link_info); + if (BNGE_AUTO_MODE(link_info->auto_mode)) { + speed = bnge_fw_speed_idx(link_info->link_speed); + sig_mode = link_info->active_fec_sig_mode & + PORT_PHY_QCFG_RESP_SIGNAL_MODE_MASK; + } else { + speed = bnge_fw_speed_idx(elink_info->req_link_speed); + sig_mode = elink_info->req_signal_mode; + } + if (sig_mode >= BNGE_SIG_MODE_MAX) + return BNGE_LINK_MODE_UNKNOWN; + + /* Since ETHTOOL_LINK_MODE_10baseT_Half_BIT is defined as 0 and + * not actually supported, the zeroes in this map can be safely + * used to represent unknown link modes. + */ + link_mode = bnge_link_modes[speed][sig_mode][media]; + if (!link_mode) + return BNGE_LINK_MODE_UNKNOWN; + + return link_mode; +} + +static const u16 bnge_nrz_speeds2_masks[__BNGE_LINK_SPEED_END] = { + [BNGE_LINK_SPEED_100GB_IDX] = BNGE_LINK_SPEEDS2_MSK_100GB, +}; + +static const u16 bnge_pam4_speeds2_masks[__BNGE_LINK_SPEED_END] = { + [BNGE_LINK_SPEED_50GB_IDX] = BNGE_LINK_SPEEDS2_MSK_50GB_PAM4, + [BNGE_LINK_SPEED_100GB_IDX] = BNGE_LINK_SPEEDS2_MSK_100GB_PAM4, + [BNGE_LINK_SPEED_200GB_IDX] = BNGE_LINK_SPEEDS2_MSK_200GB_PAM4, + [BNGE_LINK_SPEED_400GB_IDX] = BNGE_LINK_SPEEDS2_MSK_400GB_PAM4, +}; + +static const u16 bnge_pam4_112_speeds2_masks[__BNGE_LINK_SPEED_END] = { + [BNGE_LINK_SPEED_100GB_IDX] = BNGE_LINK_SPEEDS2_MSK_100GB_PAM4_112, + [BNGE_LINK_SPEED_200GB_IDX] = BNGE_LINK_SPEEDS2_MSK_200GB_PAM4_112, + [BNGE_LINK_SPEED_400GB_IDX] = BNGE_LINK_SPEEDS2_MSK_400GB_PAM4_112, + [BNGE_LINK_SPEED_800GB_IDX] = BNGE_LINK_SPEEDS2_MSK_800GB_PAM4_112, +}; + +static enum bnge_link_speed_indices +bnge_encoding_speed_idx(u8 sig_mode, u16 speed_msk) +{ + const u16 *speeds; + int idx, len; + + switch (sig_mode) { + case BNGE_SIG_MODE_NRZ: + speeds = bnge_nrz_speeds2_masks; + len = ARRAY_SIZE(bnge_nrz_speeds2_masks); + break; + case BNGE_SIG_MODE_PAM4: + speeds = bnge_pam4_speeds2_masks; + len = ARRAY_SIZE(bnge_pam4_speeds2_masks); + break; + case BNGE_SIG_MODE_PAM4_112: + speeds = bnge_pam4_112_speeds2_masks; + len = ARRAY_SIZE(bnge_pam4_112_speeds2_masks); + break; + default: + return BNGE_LINK_SPEED_UNKNOWN; + } + + for (idx = 0; idx < len; idx++) { + if (speeds[idx] == speed_msk) + return idx; + } + + return BNGE_LINK_SPEED_UNKNOWN; +} + +#define BNGE_FW_SPEED_MSK_BITS 16 + +static void +__bnge_get_ethtool_speeds(unsigned long fw_mask, enum bnge_media_type media, + u8 sig_mode, unsigned long *et_mask) +{ + enum ethtool_link_mode_bit_indices link_mode; + enum bnge_link_speed_indices speed; + u8 bit; + + for_each_set_bit(bit, &fw_mask, BNGE_FW_SPEED_MSK_BITS) { + speed = bnge_encoding_speed_idx(sig_mode, 1 << bit); + if (!speed) + continue; + + link_mode = bnge_link_modes[speed][sig_mode][media]; + if (!link_mode) + continue; + + linkmode_set_bit(link_mode, et_mask); + } +} + +static void +bnge_get_ethtool_speeds(unsigned long fw_mask, enum bnge_media_type media, + u8 sig_mode, unsigned long *et_mask) +{ + if (media) { + __bnge_get_ethtool_speeds(fw_mask, media, sig_mode, et_mask); + return; + } + + /* list speeds for all media if unknown */ + for (media = 1; media < __BNGE_MEDIA_END; media++) + __bnge_get_ethtool_speeds(fw_mask, media, sig_mode, et_mask); +} + +static void +bnge_get_all_ethtool_support_speeds(struct bnge_dev *bd, + enum bnge_media_type media, + struct ethtool_link_ksettings *lk_ksettings) +{ + u16 sp = bd->link_info.support_speeds2; + + bnge_get_ethtool_speeds(sp, media, BNGE_SIG_MODE_NRZ, + lk_ksettings->link_modes.supported); + bnge_get_ethtool_speeds(sp, media, BNGE_SIG_MODE_PAM4, + lk_ksettings->link_modes.supported); + bnge_get_ethtool_speeds(sp, media, BNGE_SIG_MODE_PAM4_112, + lk_ksettings->link_modes.supported); +} + +static void +bnge_get_all_ethtool_adv_speeds(struct bnge_net *bn, + enum bnge_media_type media, + struct ethtool_link_ksettings *lk_ksettings) +{ + u16 sp = bn->eth_link_info.advertising; + + bnge_get_ethtool_speeds(sp, media, BNGE_SIG_MODE_NRZ, + lk_ksettings->link_modes.advertising); + bnge_get_ethtool_speeds(sp, media, BNGE_SIG_MODE_PAM4, + lk_ksettings->link_modes.advertising); + bnge_get_ethtool_speeds(sp, media, BNGE_SIG_MODE_PAM4_112, + lk_ksettings->link_modes.advertising); +} + +static void +bnge_get_all_ethtool_lp_speeds(struct bnge_dev *bd, + enum bnge_media_type media, + struct ethtool_link_ksettings *lk_ksettings) +{ + u16 sp = bd->link_info.lp_auto_link_speeds; + + bnge_get_ethtool_speeds(sp, media, BNGE_SIG_MODE_NRZ, + lk_ksettings->link_modes.lp_advertising); + bnge_get_ethtool_speeds(sp, media, BNGE_SIG_MODE_PAM4, + lk_ksettings->link_modes.lp_advertising); + bnge_get_ethtool_speeds(sp, media, BNGE_SIG_MODE_PAM4_112, + lk_ksettings->link_modes.lp_advertising); +} + +static void bnge_update_speed(u32 *delta, bool installed_media, u16 *speeds, + u16 speed_msk, const unsigned long *et_mask, + enum ethtool_link_mode_bit_indices mode) +{ + bool mode_desired = linkmode_test_bit(mode, et_mask); + + if (!mode || !mode_desired) + return; + + /* installed media takes priority; for non-installed media, only allow + * one change per fw_speed bit (many to one mapping). + */ + if (installed_media || !(*delta & speed_msk)) { + *speeds |= speed_msk; + *delta |= speed_msk; + } +} + +static void bnge_set_ethtool_speeds(struct bnge_net *bn, + const unsigned long *et_mask) +{ + struct bnge_ethtool_link_info *elink_info = &bn->eth_link_info; + enum bnge_media_type media; + u32 delta_pam4_112 = 0; + u32 delta_pam4 = 0; + u32 delta_nrz = 0; + int i, m; + + elink_info->advertising = 0; + + media = bnge_get_media(&bn->bd->link_info); + for (i = 1; i < __BNGE_LINK_SPEED_END; i++) { + /* accept any legal media from user */ + for (m = 1; m < __BNGE_MEDIA_END; m++) { + bnge_update_speed(&delta_nrz, m == media, + &elink_info->advertising, + bnge_nrz_speeds2_masks[i], et_mask, + bnge_link_modes[i][BNGE_SIG_MODE_NRZ][m]); + bnge_update_speed(&delta_pam4, m == media, + &elink_info->advertising, + bnge_pam4_speeds2_masks[i], et_mask, + bnge_link_modes[i][BNGE_SIG_MODE_PAM4][m]); + bnge_update_speed(&delta_pam4_112, m == media, + &elink_info->advertising, + bnge_pam4_112_speeds2_masks[i], + et_mask, + bnge_link_modes[i][BNGE_SIG_MODE_PAM4_112][m]); + } + } +} + +static void +bnge_fw_to_ethtool_advertised_fec(struct bnge_link_info *link_info, + struct ethtool_link_ksettings *lk_ksettings) +{ + u16 fec_cfg = link_info->fec_cfg; + + if ((fec_cfg & BNGE_FEC_NONE) || !(fec_cfg & BNGE_FEC_AUTONEG)) { + linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, + lk_ksettings->link_modes.advertising); + return; + } + if (fec_cfg & BNGE_FEC_ENC_BASE_R) + linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_BASER_BIT, + lk_ksettings->link_modes.advertising); + if (fec_cfg & BNGE_FEC_ENC_RS) + linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_RS_BIT, + lk_ksettings->link_modes.advertising); + if (fec_cfg & BNGE_FEC_ENC_LLRS) + linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_LLRS_BIT, + lk_ksettings->link_modes.advertising); +} + +static void +bnge_fw_to_ethtool_support_fec(struct bnge_link_info *link_info, + struct ethtool_link_ksettings *lk_ksettings) +{ + u16 fec_cfg = link_info->fec_cfg; + + if (fec_cfg & BNGE_FEC_NONE) { + linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, + lk_ksettings->link_modes.supported); + return; + } + if (fec_cfg & BNGE_FEC_ENC_BASE_R_CAP) + linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_BASER_BIT, + lk_ksettings->link_modes.supported); + if (fec_cfg & BNGE_FEC_ENC_RS_CAP) + linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_RS_BIT, + lk_ksettings->link_modes.supported); + if (fec_cfg & BNGE_FEC_ENC_LLRS_CAP) + linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_LLRS_BIT, + lk_ksettings->link_modes.supported); +} + +static void bnge_get_default_speeds(struct bnge_net *bn, + struct ethtool_link_ksettings *lk_ksettings) +{ + struct bnge_ethtool_link_info *elink_info = &bn->eth_link_info; + struct ethtool_link_settings *base = &lk_ksettings->base; + struct bnge_link_info *link_info; + struct bnge_dev *bd = bn->bd; + + link_info = &bd->link_info; + + if (link_info->link_state == BNGE_LINK_STATE_UP) { + base->speed = bnge_fw_to_ethtool_speed(link_info->link_speed); + base->duplex = DUPLEX_HALF; + if (link_info->duplex & BNGE_LINK_DUPLEX_FULL) + base->duplex = DUPLEX_FULL; + lk_ksettings->lanes = link_info->active_lanes; + } else if (!elink_info->autoneg) { + base->speed = + bnge_fw_to_ethtool_speed(elink_info->req_link_speed); + base->duplex = DUPLEX_HALF; + if (elink_info->req_duplex == BNGE_LINK_DUPLEX_FULL) + base->duplex = DUPLEX_FULL; + } +} + +int bnge_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *lk_ksettings) +{ + struct ethtool_link_settings *base = &lk_ksettings->base; + enum ethtool_link_mode_bit_indices link_mode; + struct bnge_net *bn = netdev_priv(dev); + struct bnge_link_info *link_info; + struct bnge_dev *bd = bn->bd; + enum bnge_media_type media; + + ethtool_link_ksettings_zero_link_mode(lk_ksettings, lp_advertising); + ethtool_link_ksettings_zero_link_mode(lk_ksettings, advertising); + ethtool_link_ksettings_zero_link_mode(lk_ksettings, supported); + base->duplex = DUPLEX_UNKNOWN; + base->speed = SPEED_UNKNOWN; + link_info = &bd->link_info; + + bnge_get_ethtool_modes(bn, lk_ksettings); + media = bnge_get_media(link_info); + bnge_get_all_ethtool_support_speeds(bd, media, lk_ksettings); + bnge_fw_to_ethtool_support_fec(link_info, lk_ksettings); + link_mode = bnge_get_link_mode(bn); + if (link_mode != BNGE_LINK_MODE_UNKNOWN) + ethtool_params_from_link_mode(lk_ksettings, link_mode); + else + bnge_get_default_speeds(bn, lk_ksettings); + + if (bn->eth_link_info.autoneg) { + bnge_fw_to_ethtool_advertised_fec(link_info, lk_ksettings); + linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + lk_ksettings->link_modes.advertising); + base->autoneg = AUTONEG_ENABLE; + bnge_get_all_ethtool_adv_speeds(bn, media, lk_ksettings); + if (link_info->phy_link_status == BNGE_LINK_LINK) + bnge_get_all_ethtool_lp_speeds(bd, media, lk_ksettings); + } else { + base->autoneg = AUTONEG_DISABLE; + } + + linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, + lk_ksettings->link_modes.supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, + lk_ksettings->link_modes.advertising); + + if (link_info->media_type == PORT_PHY_QCFG_RESP_MEDIA_TYPE_DAC) + base->port = PORT_DA; + else + base->port = PORT_FIBRE; + base->phy_address = link_info->phy_addr; + + return 0; +} + +static bool bnge_lanes_match(u32 user_lanes, u32 supported_lanes) +{ + /* 0 means lanes unspecified (auto) */ + return !user_lanes || user_lanes == supported_lanes; +} + +static int +bnge_force_link_speed(struct net_device *dev, u32 ethtool_speed, u32 user_lanes) +{ + struct bnge_ethtool_link_info *elink_info; + struct bnge_net *bn = netdev_priv(dev); + u8 sig_mode = BNGE_SIG_MODE_NRZ; + u16 support_spds2; + u16 fw_speed = 0; + + elink_info = &bn->eth_link_info; + support_spds2 = bn->bd->link_info.support_speeds2; + + switch (ethtool_speed) { + case SPEED_50000: + if (bnge_lanes_match(user_lanes, 1) && + (support_spds2 & BNGE_LINK_SPEEDS2_MSK_50GB_PAM4)) { + fw_speed = BNGE_LINK_SPEED_50GB_PAM4; + sig_mode = BNGE_SIG_MODE_PAM4; + } + break; + case SPEED_100000: + if (bnge_lanes_match(user_lanes, 4) && + (support_spds2 & BNGE_LINK_SPEEDS2_MSK_100GB)) { + fw_speed = PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100GB; + } else if (bnge_lanes_match(user_lanes, 2) && + (support_spds2 & BNGE_LINK_SPEEDS2_MSK_100GB_PAM4)) { + fw_speed = BNGE_LINK_SPEED_100GB_PAM4; + sig_mode = BNGE_SIG_MODE_PAM4; + } else if (bnge_lanes_match(user_lanes, 1) && + (support_spds2 & BNGE_LINK_SPEEDS2_MSK_100GB_PAM4_112)) { + fw_speed = BNGE_LINK_SPEED_100GB_PAM4_112; + sig_mode = BNGE_SIG_MODE_PAM4_112; + } + break; + case SPEED_200000: + if (bnge_lanes_match(user_lanes, 4) && + (support_spds2 & BNGE_LINK_SPEEDS2_MSK_200GB_PAM4)) { + fw_speed = BNGE_LINK_SPEED_200GB_PAM4; + sig_mode = BNGE_SIG_MODE_PAM4; + } else if (bnge_lanes_match(user_lanes, 2) && + (support_spds2 & BNGE_LINK_SPEEDS2_MSK_200GB_PAM4_112)) { + fw_speed = BNGE_LINK_SPEED_200GB_PAM4_112; + sig_mode = BNGE_SIG_MODE_PAM4_112; + } + break; + case SPEED_400000: + if (bnge_lanes_match(user_lanes, 8) && + (support_spds2 & BNGE_LINK_SPEEDS2_MSK_400GB_PAM4)) { + fw_speed = BNGE_LINK_SPEED_400GB_PAM4; + sig_mode = BNGE_SIG_MODE_PAM4; + } else if (bnge_lanes_match(user_lanes, 4) && + (support_spds2 & BNGE_LINK_SPEEDS2_MSK_400GB_PAM4_112)) { + fw_speed = BNGE_LINK_SPEED_400GB_PAM4_112; + sig_mode = BNGE_SIG_MODE_PAM4_112; + } + break; + case SPEED_800000: + if (bnge_lanes_match(user_lanes, 8) && + (support_spds2 & BNGE_LINK_SPEEDS2_MSK_800GB_PAM4_112)) { + fw_speed = BNGE_LINK_SPEED_800GB_PAM4_112; + sig_mode = BNGE_SIG_MODE_PAM4_112; + } + break; + default: + break; + } + + if (!fw_speed) { + if (user_lanes) + netdev_err(dev, "unsupported speed or number of lanes!\n"); + else + netdev_err(dev, "unsupported speed!\n"); + return -EINVAL; + } + + if (elink_info->req_link_speed == fw_speed && + elink_info->req_signal_mode == sig_mode && + elink_info->autoneg == 0) + return -EALREADY; + + elink_info->req_link_speed = fw_speed; + elink_info->req_signal_mode = sig_mode; + elink_info->req_duplex = BNGE_LINK_DUPLEX_FULL; + elink_info->autoneg = 0; + elink_info->advertising = 0; + + return 0; +} + +int bnge_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *lk_ksettings) +{ + const struct ethtool_link_settings *base = &lk_ksettings->base; + struct bnge_ethtool_link_info old_elink_info; + struct bnge_ethtool_link_info *elink_info; + struct bnge_net *bn = netdev_priv(dev); + struct bnge_dev *bd = bn->bd; + bool set_pause = false; + int rc = 0; + + if (!BNGE_PHY_CFG_ABLE(bd)) + return -EOPNOTSUPP; + + elink_info = &bn->eth_link_info; + old_elink_info = *elink_info; + + if (base->autoneg == AUTONEG_ENABLE) { + bnge_set_ethtool_speeds(bn, + lk_ksettings->link_modes.advertising); + elink_info->autoneg |= BNGE_AUTONEG_SPEED; + if (!elink_info->advertising) + bnge_set_default_adv_speeds(bn); + /* any change to autoneg will cause link change, therefore the + * driver should put back the original pause setting in autoneg + */ + if (!(bd->phy_flags & BNGE_PHY_FL_NO_PAUSE)) + set_pause = true; + } else { + if (base->duplex == DUPLEX_HALF) { + netdev_err(dev, "HALF DUPLEX is not supported!\n"); + rc = -EINVAL; + goto set_setting_exit; + } + rc = bnge_force_link_speed(dev, base->speed, + lk_ksettings->lanes); + if (rc) { + if (rc == -EALREADY) + rc = 0; + goto set_setting_exit; + } + } + + if (netif_running(dev)) { + rc = bnge_hwrm_set_link_setting(bn, set_pause); + if (rc) + *elink_info = old_elink_info; + } + +set_setting_exit: + return rc; +} diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_link.h b/drivers/net/ethernet/broadcom/bnge/bnge_link.h index 68eaa229fffe..a0e6e62e03d8 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_link.h +++ b/drivers/net/ethernet/broadcom/bnge/bnge_link.h @@ -4,6 +4,8 @@ #ifndef _BNGE_LINK_H_ #define _BNGE_LINK_H_ +#include + #define BNGE_PHY_CFG_ABLE(bd) \ ((bd)->link_info.phy_enabled) @@ -162,4 +164,9 @@ void bnge_report_link(struct bnge_dev *bd); bool bnge_support_speed_dropped(struct bnge_net *bn); void bnge_init_ethtool_link_settings(struct bnge_net *bn); int bnge_probe_phy(struct bnge_net *bn, bool fw_dflt); +int bnge_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *lk_ksettings); +int bnge_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *lk_ksettings); +u32 bnge_get_link(struct net_device *dev); #endif /* _BNGE_LINK_H_ */ From dc85e8a51f5a6b8f8a3a8de0f89467ae797b5cd8 Mon Sep 17 00:00:00 2001 From: Bhargava Marreddy Date: Mon, 6 Apr 2026 23:34:14 +0530 Subject: [PATCH 1425/1561] bng_en: implement ethtool pauseparam operations Implement .get_pauseparam and .set_pauseparam to support flow control configuration. This allows reporting and setting of autoneg, RX pause, and TX pause states. Signed-off-by: Bhargava Marreddy Reviewed-by: Vikas Gupta Link: https://patch.msgid.link/20260406180420.279470-5-bhargava.marreddy@broadcom.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/broadcom/bnge/bnge_ethtool.c | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_ethtool.c b/drivers/net/ethernet/broadcom/bnge/bnge_ethtool.c index 6bb74a84ea03..424bf2d6721c 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_ethtool.c @@ -46,6 +46,67 @@ static void bnge_get_drvinfo(struct net_device *dev, strscpy(info->bus_info, pci_name(bd->pdev), sizeof(info->bus_info)); } +static void bnge_get_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *epause) +{ + struct bnge_net *bn = netdev_priv(dev); + struct bnge_dev *bd = bn->bd; + + if (bd->phy_flags & BNGE_PHY_FL_NO_PAUSE) { + epause->autoneg = 0; + epause->rx_pause = 0; + epause->tx_pause = 0; + return; + } + + epause->autoneg = !!(bn->eth_link_info.autoneg & + BNGE_AUTONEG_FLOW_CTRL); + epause->rx_pause = !!(bn->eth_link_info.req_flow_ctrl & + BNGE_LINK_PAUSE_RX); + epause->tx_pause = !!(bn->eth_link_info.req_flow_ctrl & + BNGE_LINK_PAUSE_TX); +} + +static int bnge_set_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *epause) +{ + struct bnge_ethtool_link_info old_elink_info, *elink_info; + struct bnge_net *bn = netdev_priv(dev); + struct bnge_dev *bd = bn->bd; + int rc = 0; + + if (!BNGE_PHY_CFG_ABLE(bd) || (bd->phy_flags & BNGE_PHY_FL_NO_PAUSE)) + return -EOPNOTSUPP; + + elink_info = &bn->eth_link_info; + old_elink_info = *elink_info; + + if (epause->autoneg) { + if (!(elink_info->autoneg & BNGE_AUTONEG_SPEED)) + return -EINVAL; + + elink_info->autoneg |= BNGE_AUTONEG_FLOW_CTRL; + } else { + if (elink_info->autoneg & BNGE_AUTONEG_FLOW_CTRL) + elink_info->force_link_chng = true; + elink_info->autoneg &= ~BNGE_AUTONEG_FLOW_CTRL; + } + + elink_info->req_flow_ctrl = 0; + if (epause->rx_pause) + elink_info->req_flow_ctrl |= BNGE_LINK_PAUSE_RX; + if (epause->tx_pause) + elink_info->req_flow_ctrl |= BNGE_LINK_PAUSE_TX; + + if (netif_running(dev)) { + rc = bnge_hwrm_set_pause(bn); + if (rc) + *elink_info = old_elink_info; + } + + return rc; +} + static const struct ethtool_ops bnge_ethtool_ops = { .cap_link_lanes_supported = 1, .get_link_ksettings = bnge_get_link_ksettings, @@ -53,6 +114,8 @@ static const struct ethtool_ops bnge_ethtool_ops = { .get_drvinfo = bnge_get_drvinfo, .get_link = bnge_get_link, .nway_reset = bnge_nway_reset, + .get_pauseparam = bnge_get_pauseparam, + .set_pauseparam = bnge_set_pauseparam, }; void bnge_set_ethtool_ops(struct net_device *dev) From 4a75900989c964fc839a615542f96353b1c7bc80 Mon Sep 17 00:00:00 2001 From: Bhargava Marreddy Date: Mon, 6 Apr 2026 23:34:15 +0530 Subject: [PATCH 1426/1561] bng_en: add support for link async events Register for firmware asynchronous events, including link-status, link-speed, and PHY configuration changes. Upon event reception, re-query the PHY and update ethtool settings accordingly. Signed-off-by: Bhargava Marreddy Reviewed-by: Vikas Gupta Reviewed-by: Rajashekar Hudumula Reviewed-by: Ajit Kumar Khaparde Link: https://patch.msgid.link/20260406180420.279470-6-bhargava.marreddy@broadcom.com Signed-off-by: Jakub Kicinski --- .../ethernet/broadcom/bnge/bnge_hwrm_lib.c | 19 +++++++++- .../net/ethernet/broadcom/bnge/bnge_link.c | 18 ++++++++++ .../net/ethernet/broadcom/bnge/bnge_link.h | 1 + .../net/ethernet/broadcom/bnge/bnge_netdev.c | 23 ++++++++++++ .../net/ethernet/broadcom/bnge/bnge_txrx.c | 35 ++++++++++++++++--- 5 files changed, 91 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c index c2e47b1d7034..83c0b16c3e81 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c @@ -15,6 +15,13 @@ #include "bnge_rmem.h" #include "bnge_resc.h" +static const u16 bnge_async_events_arr[] = { + ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE, + ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CHANGE, + ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE, + ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE, +}; + int bnge_hwrm_ver_get(struct bnge_dev *bd) { u32 dev_caps_cfg, hwrm_ver, hwrm_spec_code; @@ -166,10 +173,12 @@ int bnge_hwrm_fw_set_time(struct bnge_dev *bd) int bnge_hwrm_func_drv_rgtr(struct bnge_dev *bd) { + DECLARE_BITMAP(async_events_bmap, 256); struct hwrm_func_drv_rgtr_output *resp; struct hwrm_func_drv_rgtr_input *req; + u32 events[8]; u32 flags; - int rc; + int rc, i; rc = bnge_hwrm_req_init(bd, req, HWRM_FUNC_DRV_RGTR); if (rc) @@ -190,6 +199,14 @@ int bnge_hwrm_func_drv_rgtr(struct bnge_dev *bd) req->ver_min = cpu_to_le16(DRV_VER_MIN); req->ver_upd = cpu_to_le16(DRV_VER_UPD); + memset(async_events_bmap, 0, sizeof(async_events_bmap)); + for (i = 0; i < ARRAY_SIZE(bnge_async_events_arr); i++) + __set_bit(bnge_async_events_arr[i], async_events_bmap); + + bitmap_to_arr32(events, async_events_bmap, 256); + for (i = 0; i < ARRAY_SIZE(req->async_event_fwd); i++) + req->async_event_fwd[i] |= cpu_to_le32(events[i]); + resp = bnge_hwrm_req_hold(bd, req); rc = bnge_hwrm_req_send(bd, req); if (!rc) { diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_link.c b/drivers/net/ethernet/broadcom/bnge/bnge_link.c index 2e2aaa764443..3b0dfcf27376 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_link.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_link.c @@ -1112,3 +1112,21 @@ int bnge_set_link_ksettings(struct net_device *dev, set_setting_exit: return rc; } + +void bnge_link_async_event_process(struct bnge_net *bn, u16 event_id) +{ + switch (event_id) { + case ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE: + set_bit(BNGE_LINK_SPEED_CHNG_SP_EVENT, &bn->sp_event); + break; + case ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CHANGE: + case ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE: + set_bit(BNGE_LINK_CFG_CHANGE_SP_EVENT, &bn->sp_event); + break; + case ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE: + set_bit(BNGE_LINK_CHNG_SP_EVENT, &bn->sp_event); + break; + default: + break; + } +} diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_link.h b/drivers/net/ethernet/broadcom/bnge/bnge_link.h index a0e6e62e03d8..ccaf343aa018 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_link.h +++ b/drivers/net/ethernet/broadcom/bnge/bnge_link.h @@ -169,4 +169,5 @@ int bnge_set_link_ksettings(struct net_device *dev, int bnge_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *lk_ksettings); u32 bnge_get_link(struct net_device *dev); +void bnge_link_async_event_process(struct bnge_net *bn, u16 event_id); #endif /* _BNGE_LINK_H_ */ diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c index 185f5bfce1d5..90458b323120 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c @@ -136,6 +136,7 @@ static void bnge_timer(struct timer_list *t) static void bnge_sp_task(struct work_struct *work) { struct bnge_net *bn = container_of(work, struct bnge_net, sp_task); + bool speed_chng, cfg_chng, link_chng; struct bnge_dev *bd = bn->bd; netdev_lock(bn->netdev); @@ -156,6 +157,28 @@ static void bnge_sp_task(struct work_struct *work) } } + speed_chng = test_and_clear_bit(BNGE_LINK_SPEED_CHNG_SP_EVENT, + &bn->sp_event); + cfg_chng = test_and_clear_bit(BNGE_LINK_CFG_CHANGE_SP_EVENT, + &bn->sp_event); + link_chng = test_and_clear_bit(BNGE_LINK_CHNG_SP_EVENT, + &bn->sp_event); + + if (speed_chng || cfg_chng || link_chng) { + int rc; + + if (speed_chng) + bnge_hwrm_phy_qcaps(bd); + + rc = bnge_update_link(bn, true); + if (rc) + netdev_err(bn->netdev, "SP task cannot update link (rc: %d)\n", + rc); + + if (speed_chng || cfg_chng) + bnge_init_ethtool_link_settings(bn); + } + netdev_unlock(bn->netdev); } diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_txrx.c b/drivers/net/ethernet/broadcom/bnge/bnge_txrx.c index a2616f037557..7d45e057f2e8 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_txrx.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_txrx.c @@ -1128,6 +1128,29 @@ static void __bnge_poll_work_done(struct bnge_net *bn, struct bnge_napi *bnapi, } } +static void bnge_async_event_process(struct bnge_net *bn, + struct hwrm_async_event_cmpl *cmpl) +{ + u16 event_id = le16_to_cpu(cmpl->event_id); + u32 data1 = le32_to_cpu(cmpl->event_data1); + u32 data2 = le32_to_cpu(cmpl->event_data2); + + netdev_dbg(bn->netdev, "hwrm event 0x%x {0x%x, 0x%x}\n", + event_id, data1, data2); + + switch (event_id) { + case ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE: + case ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CHANGE: + case ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE: + case ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE: + bnge_link_async_event_process(bn, event_id); + break; + default: + return; + } + __bnge_queue_sp_work(bn); +} + static void bnge_hwrm_update_token(struct bnge_dev *bd, u16 seq_id, enum bnge_hwrm_wait_state state) @@ -1146,7 +1169,7 @@ bnge_hwrm_update_token(struct bnge_dev *bd, u16 seq_id, dev_err(bd->dev, "Invalid hwrm seq id %d\n", seq_id); } -static int bnge_hwrm_handler(struct bnge_dev *bd, struct tx_cmp *txcmp) +static int bnge_hwrm_handler(struct bnge_net *bn, struct tx_cmp *txcmp) { struct hwrm_cmpl *h_cmpl = (struct hwrm_cmpl *)txcmp; u16 cmpl_type = TX_CMP_TYPE(txcmp), seq_id; @@ -1154,10 +1177,14 @@ static int bnge_hwrm_handler(struct bnge_dev *bd, struct tx_cmp *txcmp) switch (cmpl_type) { case CMPL_BASE_TYPE_HWRM_DONE: seq_id = le16_to_cpu(h_cmpl->sequence_id); - bnge_hwrm_update_token(bd, seq_id, BNGE_HWRM_COMPLETE); + bnge_hwrm_update_token(bn->bd, seq_id, BNGE_HWRM_COMPLETE); break; case CMPL_BASE_TYPE_HWRM_ASYNC_EVENT: + bnge_async_event_process(bn, + (struct hwrm_async_event_cmpl *)txcmp); + break; + default: break; } @@ -1235,7 +1262,7 @@ static int __bnge_poll_work(struct bnge_net *bn, struct bnge_cp_ring_info *cpr, } else if (unlikely(cmp_type == CMPL_BASE_TYPE_HWRM_DONE || cmp_type == CMPL_BASE_TYPE_HWRM_FWD_REQ || cmp_type == CMPL_BA_TY_HWRM_ASY_EVT)) { - bnge_hwrm_handler(bn->bd, txcmp); + bnge_hwrm_handler(bn, txcmp); } raw_cons = NEXT_RAW_CMP(raw_cons); @@ -1355,7 +1382,7 @@ int bnge_napi_poll(struct napi_struct *napi, int budget) budget - work_done); nqr->has_more_work |= cpr->has_more_work; } else { - bnge_hwrm_handler(bn->bd, (struct tx_cmp *)nqcmp); + bnge_hwrm_handler(bn, (struct tx_cmp *)nqcmp); } raw_cons = NEXT_RAW_CMP(raw_cons); } From 8438239bd2b2b1c85f57f7fa24c57f70692fd095 Mon Sep 17 00:00:00 2001 From: Bhargava Marreddy Date: Mon, 6 Apr 2026 23:34:16 +0530 Subject: [PATCH 1427/1561] bng_en: add HW stats infra and structured ethtool ops Implement the hardware-level statistics foundation and modern structured ethtool operations. 1. Infrastructure: Add HWRM firmware wrappers (FUNC_QSTATS_EXT, PORT_QSTATS_EXT, and PORT_QSTATS) to query ring and port counters. 2. Structured ops: Implement .get_eth_phy_stats, .get_eth_mac_stats, .get_eth_ctrl_stats, .get_pause_stats, and .get_rmon_stats. Stats are initially reported as 0; accumulation logic is added in a subsequent patch. Signed-off-by: Bhargava Marreddy Reviewed-by: Vikas Gupta Link: https://patch.msgid.link/20260406180420.279470-7-bhargava.marreddy@broadcom.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/broadcom/bnge/bnge_ethtool.c | 160 ++++++++++++++++ .../ethernet/broadcom/bnge/bnge_hwrm_lib.c | 151 ++++++++++++++- .../ethernet/broadcom/bnge/bnge_hwrm_lib.h | 3 + .../net/ethernet/broadcom/bnge/bnge_netdev.c | 180 +++++++++++++++++- .../net/ethernet/broadcom/bnge/bnge_netdev.h | 62 +++++- 5 files changed, 544 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_ethtool.c b/drivers/net/ethernet/broadcom/bnge/bnge_ethtool.c index 424bf2d6721c..87226d5bf15c 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_ethtool.c @@ -46,6 +46,161 @@ static void bnge_get_drvinfo(struct net_device *dev, strscpy(info->bus_info, pci_name(bd->pdev), sizeof(info->bus_info)); } +static void bnge_get_eth_phy_stats(struct net_device *dev, + struct ethtool_eth_phy_stats *phy_stats) +{ + struct bnge_net *bn = netdev_priv(dev); + u64 *rx; + + if (!(bn->flags & BNGE_FLAG_PORT_STATS_EXT)) + return; + + rx = bn->rx_port_stats_ext.sw_stats; + phy_stats->SymbolErrorDuringCarrier = + *(rx + BNGE_RX_STATS_EXT_OFFSET(rx_pcs_symbol_err)); +} + +static void bnge_get_eth_mac_stats(struct net_device *dev, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct bnge_net *bn = netdev_priv(dev); + u64 *rx, *tx; + + if (!(bn->flags & BNGE_FLAG_PORT_STATS)) + return; + + rx = bn->port_stats.sw_stats; + tx = bn->port_stats.sw_stats + BNGE_TX_PORT_STATS_BYTE_OFFSET / 8; + + mac_stats->FramesReceivedOK = + BNGE_GET_RX_PORT_STATS64(rx, rx_good_frames); + mac_stats->FramesTransmittedOK = + BNGE_GET_TX_PORT_STATS64(tx, tx_good_frames); + mac_stats->FrameCheckSequenceErrors = + BNGE_GET_RX_PORT_STATS64(rx, rx_fcs_err_frames); + mac_stats->AlignmentErrors = + BNGE_GET_RX_PORT_STATS64(rx, rx_align_err_frames); + mac_stats->OutOfRangeLengthField = + BNGE_GET_RX_PORT_STATS64(rx, rx_oor_len_frames); + mac_stats->OctetsReceivedOK = BNGE_GET_RX_PORT_STATS64(rx, rx_bytes); + mac_stats->OctetsTransmittedOK = BNGE_GET_TX_PORT_STATS64(tx, tx_bytes); + mac_stats->MulticastFramesReceivedOK = + BNGE_GET_RX_PORT_STATS64(rx, rx_mcast_frames); + mac_stats->BroadcastFramesReceivedOK = + BNGE_GET_RX_PORT_STATS64(rx, rx_bcast_frames); + mac_stats->MulticastFramesXmittedOK = + BNGE_GET_TX_PORT_STATS64(tx, tx_mcast_frames); + mac_stats->BroadcastFramesXmittedOK = + BNGE_GET_TX_PORT_STATS64(tx, tx_bcast_frames); + mac_stats->FrameTooLongErrors = + BNGE_GET_RX_PORT_STATS64(rx, rx_ovrsz_frames); +} + +static void bnge_get_eth_ctrl_stats(struct net_device *dev, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + struct bnge_net *bn = netdev_priv(dev); + u64 *rx; + + if (!(bn->flags & BNGE_FLAG_PORT_STATS)) + return; + + rx = bn->port_stats.sw_stats; + ctrl_stats->MACControlFramesReceived = + BNGE_GET_RX_PORT_STATS64(rx, rx_ctrl_frames); +} + +static void bnge_get_pause_stats(struct net_device *dev, + struct ethtool_pause_stats *pause_stats) +{ + struct bnge_net *bn = netdev_priv(dev); + u64 *rx, *tx; + + if (!(bn->flags & BNGE_FLAG_PORT_STATS)) + return; + + rx = bn->port_stats.sw_stats; + tx = bn->port_stats.sw_stats + BNGE_TX_PORT_STATS_BYTE_OFFSET / 8; + + pause_stats->rx_pause_frames = + BNGE_GET_RX_PORT_STATS64(rx, rx_pause_frames); + pause_stats->tx_pause_frames = + BNGE_GET_TX_PORT_STATS64(tx, tx_pause_frames); +} + +static const struct ethtool_rmon_hist_range bnge_rmon_ranges[] = { + { 0, 64 }, + { 65, 127 }, + { 128, 255 }, + { 256, 511 }, + { 512, 1023 }, + { 1024, 1518 }, + { 1519, 2047 }, + { 2048, 4095 }, + { 4096, 9216 }, + { 9217, 16383 }, + {} +}; + +static void bnge_get_rmon_stats(struct net_device *dev, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges) +{ + struct bnge_net *bn = netdev_priv(dev); + u64 *rx, *tx; + + if (!(bn->flags & BNGE_FLAG_PORT_STATS)) + return; + + rx = bn->port_stats.sw_stats; + tx = bn->port_stats.sw_stats + BNGE_TX_PORT_STATS_BYTE_OFFSET / 8; + + rmon_stats->jabbers = BNGE_GET_RX_PORT_STATS64(rx, rx_jbr_frames); + rmon_stats->oversize_pkts = + BNGE_GET_RX_PORT_STATS64(rx, rx_ovrsz_frames); + rmon_stats->undersize_pkts = + BNGE_GET_RX_PORT_STATS64(rx, rx_undrsz_frames); + + rmon_stats->hist[0] = BNGE_GET_RX_PORT_STATS64(rx, rx_64b_frames); + rmon_stats->hist[1] = BNGE_GET_RX_PORT_STATS64(rx, rx_65b_127b_frames); + rmon_stats->hist[2] = BNGE_GET_RX_PORT_STATS64(rx, rx_128b_255b_frames); + rmon_stats->hist[3] = BNGE_GET_RX_PORT_STATS64(rx, rx_256b_511b_frames); + rmon_stats->hist[4] = + BNGE_GET_RX_PORT_STATS64(rx, rx_512b_1023b_frames); + rmon_stats->hist[5] = + BNGE_GET_RX_PORT_STATS64(rx, rx_1024b_1518b_frames); + rmon_stats->hist[6] = + BNGE_GET_RX_PORT_STATS64(rx, rx_1519b_2047b_frames); + rmon_stats->hist[7] = + BNGE_GET_RX_PORT_STATS64(rx, rx_2048b_4095b_frames); + rmon_stats->hist[8] = + BNGE_GET_RX_PORT_STATS64(rx, rx_4096b_9216b_frames); + rmon_stats->hist[9] = + BNGE_GET_RX_PORT_STATS64(rx, rx_9217b_16383b_frames); + + rmon_stats->hist_tx[0] = BNGE_GET_TX_PORT_STATS64(tx, tx_64b_frames); + rmon_stats->hist_tx[1] = + BNGE_GET_TX_PORT_STATS64(tx, tx_65b_127b_frames); + rmon_stats->hist_tx[2] = + BNGE_GET_TX_PORT_STATS64(tx, tx_128b_255b_frames); + rmon_stats->hist_tx[3] = + BNGE_GET_TX_PORT_STATS64(tx, tx_256b_511b_frames); + rmon_stats->hist_tx[4] = + BNGE_GET_TX_PORT_STATS64(tx, tx_512b_1023b_frames); + rmon_stats->hist_tx[5] = + BNGE_GET_TX_PORT_STATS64(tx, tx_1024b_1518b_frames); + rmon_stats->hist_tx[6] = + BNGE_GET_TX_PORT_STATS64(tx, tx_1519b_2047b_frames); + rmon_stats->hist_tx[7] = + BNGE_GET_TX_PORT_STATS64(tx, tx_2048b_4095b_frames); + rmon_stats->hist_tx[8] = + BNGE_GET_TX_PORT_STATS64(tx, tx_4096b_9216b_frames); + rmon_stats->hist_tx[9] = + BNGE_GET_TX_PORT_STATS64(tx, tx_9217b_16383b_frames); + + *ranges = bnge_rmon_ranges; +} + static void bnge_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *epause) { @@ -116,6 +271,11 @@ static const struct ethtool_ops bnge_ethtool_ops = { .nway_reset = bnge_nway_reset, .get_pauseparam = bnge_get_pauseparam, .set_pauseparam = bnge_set_pauseparam, + .get_eth_phy_stats = bnge_get_eth_phy_stats, + .get_eth_mac_stats = bnge_get_eth_mac_stats, + .get_eth_ctrl_stats = bnge_get_eth_ctrl_stats, + .get_pause_stats = bnge_get_pause_stats, + .get_rmon_stats = bnge_get_rmon_stats, }; void bnge_set_ethtool_ops(struct net_device *dev) diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c index 83c0b16c3e81..eb11800f5573 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c @@ -14,6 +14,7 @@ #include "bnge_hwrm_lib.h" #include "bnge_rmem.h" #include "bnge_resc.h" +#include "bnge_netdev.h" static const u16 bnge_async_events_arr[] = { ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE, @@ -594,7 +595,7 @@ int bnge_hwrm_func_qcaps(struct bnge_dev *bd) struct hwrm_func_qcaps_output *resp; struct hwrm_func_qcaps_input *req; struct bnge_pf_info *pf = &bd->pf; - u32 flags; + u32 flags, flags_ext; int rc; rc = bnge_hwrm_req_init(bd, req, HWRM_FUNC_QCAPS); @@ -612,6 +613,12 @@ int bnge_hwrm_func_qcaps(struct bnge_dev *bd) bd->flags |= BNGE_EN_ROCE_V1; if (flags & FUNC_QCAPS_RESP_FLAGS_ROCE_V2_SUPPORTED) bd->flags |= BNGE_EN_ROCE_V2; + if (flags & FUNC_QCAPS_RESP_FLAGS_EXT_STATS_SUPPORTED) + bd->fw_cap |= BNGE_FW_CAP_EXT_STATS_SUPPORTED; + + flags_ext = le32_to_cpu(resp->flags_ext); + if (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_EXT_HW_STATS_SUPPORTED) + bd->fw_cap |= BNGE_FW_CAP_EXT_HW_STATS_SUPPORTED; pf->fw_fid = le16_to_cpu(resp->fid); pf->port_id = le16_to_cpu(resp->port_id); @@ -1479,3 +1486,145 @@ int bnge_hwrm_vnic_set_tpa(struct bnge_dev *bd, struct bnge_vnic_info *vnic, return bnge_hwrm_req_send(bd, req); } + +int bnge_hwrm_func_qstat_ext(struct bnge_dev *bd, struct bnge_stats_mem *stats) +{ + struct hwrm_func_qstats_ext_output *resp; + struct hwrm_func_qstats_ext_input *req; + __le64 *hw_masks; + int rc; + + if (!(bd->fw_cap & BNGE_FW_CAP_EXT_HW_STATS_SUPPORTED)) + return -EOPNOTSUPP; + + rc = bnge_hwrm_req_init(bd, req, HWRM_FUNC_QSTATS_EXT); + if (rc) + return rc; + + req->fid = cpu_to_le16(0xffff); + req->flags = FUNC_QSTATS_EXT_REQ_FLAGS_COUNTER_MASK; + + resp = bnge_hwrm_req_hold(bd, req); + rc = bnge_hwrm_req_send(bd, req); + if (!rc) { + hw_masks = &resp->rx_ucast_pkts; + bnge_copy_hw_masks(stats->hw_masks, hw_masks, stats->len / 8); + } + bnge_hwrm_req_drop(bd, req); + return rc; +} + +int bnge_hwrm_port_qstats_ext(struct bnge_dev *bd, u8 flags) +{ + struct hwrm_queue_pri2cos_qcfg_output *resp_qc; + struct bnge_net *bn = netdev_priv(bd->netdev); + struct hwrm_queue_pri2cos_qcfg_input *req_qc; + struct hwrm_port_qstats_ext_output *resp_qs; + struct hwrm_port_qstats_ext_input *req_qs; + struct bnge_pf_info *pf = &bd->pf; + u32 tx_stat_size; + int rc; + + if (!(bn->flags & BNGE_FLAG_PORT_STATS_EXT)) + return 0; + + if (flags && !(bd->fw_cap & BNGE_FW_CAP_EXT_HW_STATS_SUPPORTED)) + return -EOPNOTSUPP; + + rc = bnge_hwrm_req_init(bd, req_qs, HWRM_PORT_QSTATS_EXT); + if (rc) + return rc; + + req_qs->flags = flags; + req_qs->port_id = cpu_to_le16(pf->port_id); + req_qs->rx_stat_size = cpu_to_le16(sizeof(struct rx_port_stats_ext)); + req_qs->rx_stat_host_addr = + cpu_to_le64(bn->rx_port_stats_ext.hw_stats_map); + tx_stat_size = bn->tx_port_stats_ext.hw_stats ? + sizeof(struct tx_port_stats_ext) : 0; + req_qs->tx_stat_size = cpu_to_le16(tx_stat_size); + req_qs->tx_stat_host_addr = + cpu_to_le64(bn->tx_port_stats_ext.hw_stats_map); + resp_qs = bnge_hwrm_req_hold(bd, req_qs); + rc = bnge_hwrm_req_send(bd, req_qs); + if (!rc) { + bn->fw_rx_stats_ext_size = + le16_to_cpu(resp_qs->rx_stat_size) / 8; + bn->fw_tx_stats_ext_size = tx_stat_size ? + le16_to_cpu(resp_qs->tx_stat_size) / 8 : 0; + } else { + bn->fw_rx_stats_ext_size = 0; + bn->fw_tx_stats_ext_size = 0; + } + bnge_hwrm_req_drop(bd, req_qs); + + if (flags) + return rc; + + if (bn->fw_tx_stats_ext_size <= + offsetof(struct tx_port_stats_ext, pfc_pri0_tx_duration_us) / 8) { + bn->pri2cos_valid = false; + return rc; + } + + rc = bnge_hwrm_req_init(bd, req_qc, HWRM_QUEUE_PRI2COS_QCFG); + if (rc) + return rc; + + req_qc->flags = cpu_to_le32(QUEUE_PRI2COS_QCFG_REQ_FLAGS_IVLAN); + + resp_qc = bnge_hwrm_req_hold(bd, req_qc); + rc = bnge_hwrm_req_send(bd, req_qc); + if (!rc) { + u8 *pri2cos; + int i, j; + + pri2cos = &resp_qc->pri0_cos_queue_id; + for (i = 0; i < 8; i++) { + u8 queue_id = pri2cos[i]; + u8 queue_idx; + + /* Per port queue IDs start from 0, 10, 20, etc */ + queue_idx = queue_id % 10; + if (queue_idx >= BNGE_MAX_QUEUE) { + bn->pri2cos_valid = false; + rc = -EINVAL; + goto drop_req; + } + for (j = 0; j < bd->max_q; j++) { + if (bd->q_ids[j] == queue_id) + bn->pri2cos_idx[i] = queue_idx; + } + } + bn->pri2cos_valid = true; + } +drop_req: + bnge_hwrm_req_drop(bd, req_qc); + return rc; +} + +int bnge_hwrm_port_qstats(struct bnge_dev *bd, u8 flags) +{ + struct bnge_net *bn = netdev_priv(bd->netdev); + struct hwrm_port_qstats_input *req; + struct bnge_pf_info *pf = &bd->pf; + int rc; + + if (!(bn->flags & BNGE_FLAG_PORT_STATS)) + return 0; + + if (flags && !(bd->fw_cap & BNGE_FW_CAP_EXT_HW_STATS_SUPPORTED)) + return -EOPNOTSUPP; + + rc = bnge_hwrm_req_init(bd, req, HWRM_PORT_QSTATS); + if (rc) + return rc; + + req->flags = flags; + req->port_id = cpu_to_le16(pf->port_id); + req->tx_stat_host_addr = cpu_to_le64(bn->port_stats.hw_stats_map + + BNGE_TX_PORT_STATS_BYTE_OFFSET); + req->rx_stat_host_addr = cpu_to_le64(bn->port_stats.hw_stats_map); + + return bnge_hwrm_req_send(bd, req); +} diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.h b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.h index 86ca3ac2244b..3501de7a89b9 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.h +++ b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.h @@ -62,4 +62,7 @@ int bnge_hwrm_phy_qcaps(struct bnge_dev *bd); int bnge_hwrm_set_link_setting(struct bnge_net *bn, bool set_pause); int bnge_hwrm_set_pause(struct bnge_net *bn); int bnge_hwrm_shutdown_link(struct bnge_dev *bd); +int bnge_hwrm_port_qstats(struct bnge_dev *bd, u8 flags); +int bnge_hwrm_port_qstats_ext(struct bnge_dev *bd, u8 flags); +int bnge_hwrm_func_qstat_ext(struct bnge_dev *bd, struct bnge_stats_mem *stats); #endif /* _BNGE_HWRM_LIB_H_ */ diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c index 90458b323120..290bf1cd96c9 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c @@ -39,6 +39,10 @@ static void bnge_free_stats_mem(struct bnge_net *bn, { struct bnge_dev *bd = bn->bd; + kfree(stats->hw_masks); + stats->hw_masks = NULL; + kfree(stats->sw_stats); + stats->sw_stats = NULL; if (stats->hw_stats) { dma_free_coherent(bd->dev, stats->len, stats->hw_stats, stats->hw_stats_map); @@ -47,7 +51,7 @@ static void bnge_free_stats_mem(struct bnge_net *bn, } static int bnge_alloc_stats_mem(struct bnge_net *bn, - struct bnge_stats_mem *stats) + struct bnge_stats_mem *stats, bool alloc_masks) { struct bnge_dev *bd = bn->bd; @@ -56,7 +60,20 @@ static int bnge_alloc_stats_mem(struct bnge_net *bn, if (!stats->hw_stats) return -ENOMEM; + stats->sw_stats = kzalloc(stats->len, GFP_KERNEL); + if (!stats->sw_stats) + goto stats_mem_err; + + if (alloc_masks) { + stats->hw_masks = kzalloc(stats->len, GFP_KERNEL); + if (!stats->hw_masks) + goto stats_mem_err; + } return 0; + +stats_mem_err: + bnge_free_stats_mem(bn, stats); + return -ENOMEM; } static void bnge_free_ring_stats(struct bnge_net *bn) @@ -75,6 +92,107 @@ static void bnge_free_ring_stats(struct bnge_net *bn) } } +static void bnge_fill_masks(u64 *mask_arr, u64 mask, int count) +{ + int i; + + for (i = 0; i < count; i++) + mask_arr[i] = mask; +} + +void bnge_copy_hw_masks(u64 *mask_arr, __le64 *hw_mask_arr, int count) +{ + int i; + + for (i = 0; i < count; i++) + mask_arr[i] = le64_to_cpu(hw_mask_arr[i]); +} + +static void bnge_init_stats(struct bnge_net *bn) +{ + struct bnge_napi *bnapi = bn->bnapi[0]; + struct bnge_nq_ring_info *nqr; + struct bnge_stats_mem *stats; + struct bnge_dev *bd = bn->bd; + __le64 *rx_stats, *tx_stats; + int rc, rx_count, tx_count; + u64 *rx_masks, *tx_masks; + u8 flags; + + nqr = &bnapi->nq_ring; + stats = &nqr->stats; + rc = bnge_hwrm_func_qstat_ext(bd, stats); + if (rc) { + u64 mask = (1ULL << 48) - 1; + + bnge_fill_masks(stats->hw_masks, mask, stats->len / 8); + } + + if (bn->flags & BNGE_FLAG_PORT_STATS) { + stats = &bn->port_stats; + rx_stats = stats->hw_stats; + rx_masks = stats->hw_masks; + rx_count = sizeof(struct rx_port_stats) / 8; + tx_stats = rx_stats + BNGE_TX_PORT_STATS_BYTE_OFFSET / 8; + tx_masks = rx_masks + BNGE_TX_PORT_STATS_BYTE_OFFSET / 8; + tx_count = sizeof(struct tx_port_stats) / 8; + + flags = PORT_QSTATS_REQ_FLAGS_COUNTER_MASK; + rc = bnge_hwrm_port_qstats(bd, flags); + if (rc) { + u64 mask = (1ULL << 40) - 1; + + bnge_fill_masks(rx_masks, mask, rx_count); + bnge_fill_masks(tx_masks, mask, tx_count); + } else { + bnge_copy_hw_masks(rx_masks, rx_stats, rx_count); + bnge_copy_hw_masks(tx_masks, tx_stats, tx_count); + } + bnge_hwrm_port_qstats(bd, 0); + } + + if (bn->flags & BNGE_FLAG_PORT_STATS_EXT) { + stats = &bn->rx_port_stats_ext; + rx_stats = stats->hw_stats; + rx_masks = stats->hw_masks; + rx_count = sizeof(struct rx_port_stats_ext) / 8; + stats = &bn->tx_port_stats_ext; + tx_stats = stats->hw_stats; + tx_masks = stats->hw_masks; + tx_count = sizeof(struct tx_port_stats_ext) / 8; + + flags = PORT_QSTATS_EXT_REQ_FLAGS_COUNTER_MASK; + rc = bnge_hwrm_port_qstats_ext(bd, flags); + if (rc) { + u64 mask = (1ULL << 40) - 1; + + bnge_fill_masks(rx_masks, mask, rx_count); + if (tx_stats) + bnge_fill_masks(tx_masks, mask, tx_count); + } else { + bnge_copy_hw_masks(rx_masks, rx_stats, rx_count); + if (tx_stats) + bnge_copy_hw_masks(tx_masks, tx_stats, + tx_count); + } + bnge_hwrm_port_qstats_ext(bd, 0); + } +} + +static void bnge_free_port_ext_stats(struct bnge_net *bn) +{ + bn->flags &= ~BNGE_FLAG_PORT_STATS_EXT; + bnge_free_stats_mem(bn, &bn->rx_port_stats_ext); + bnge_free_stats_mem(bn, &bn->tx_port_stats_ext); +} + +static void bnge_free_port_stats(struct bnge_net *bn) +{ + bn->flags &= ~BNGE_FLAG_PORT_STATS; + bnge_free_stats_mem(bn, &bn->port_stats); + bnge_free_port_ext_stats(bn); +} + static int bnge_alloc_ring_stats(struct bnge_net *bn) { struct bnge_dev *bd = bn->bd; @@ -88,12 +206,13 @@ static int bnge_alloc_ring_stats(struct bnge_net *bn) struct bnge_nq_ring_info *nqr = &bnapi->nq_ring; nqr->stats.len = size; - rc = bnge_alloc_stats_mem(bn, &nqr->stats); + rc = bnge_alloc_stats_mem(bn, &nqr->stats, !i); if (rc) goto err_free_ring_stats; nqr->hw_stats_ctx_id = INVALID_STATS_CTX_ID; } + return 0; err_free_ring_stats: @@ -101,6 +220,51 @@ err_free_ring_stats: return rc; } +static void bnge_alloc_port_ext_stats(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + int rc; + + if (!(bd->fw_cap & BNGE_FW_CAP_EXT_STATS_SUPPORTED)) + return; + + if (!bn->rx_port_stats_ext.hw_stats) { + bn->rx_port_stats_ext.len = sizeof(struct rx_port_stats_ext); + rc = bnge_alloc_stats_mem(bn, &bn->rx_port_stats_ext, true); + /* Extended stats are optional */ + if (rc) + return; + } + + if (!bn->tx_port_stats_ext.hw_stats) { + bn->tx_port_stats_ext.len = sizeof(struct tx_port_stats_ext); + rc = bnge_alloc_stats_mem(bn, &bn->tx_port_stats_ext, true); + /* Extended stats are optional */ + if (rc) { + bnge_free_port_ext_stats(bn); + return; + } + } + bn->flags |= BNGE_FLAG_PORT_STATS_EXT; +} + +static int bnge_alloc_port_stats(struct bnge_net *bn) +{ + int rc; + + if (!bn->port_stats.hw_stats) { + bn->port_stats.len = BNGE_PORT_STATS_SIZE; + rc = bnge_alloc_stats_mem(bn, &bn->port_stats, true); + if (rc) + return rc; + + bn->flags |= BNGE_FLAG_PORT_STATS; + } + + bnge_alloc_port_ext_stats(bn); + return 0; +} + void __bnge_queue_sp_work(struct bnge_net *bn) { queue_work(bn->bnge_pf_wq, &bn->sp_task); @@ -1028,6 +1192,8 @@ static int bnge_alloc_core(struct bnge_net *bn) if (rc) goto err_free_core; + bnge_init_stats(bn); + rc = bnge_alloc_vnics(bn); if (rc) goto err_free_core; @@ -2904,15 +3070,21 @@ int bnge_netdev_alloc(struct bnge_dev *bd, int max_irqs) if (rc) goto err_free_workq; + rc = bnge_alloc_port_stats(bn); + if (rc) + goto err_free_workq; + netdev->request_ops_lock = true; rc = register_netdev(netdev); if (rc) { dev_err(bd->dev, "Register netdev failed rc: %d\n", rc); - goto err_free_workq; + goto err_free_port_stats; } return 0; +err_free_port_stats: + bnge_free_port_stats(bn); err_free_workq: destroy_workqueue(bn->bnge_pf_wq); err_netdev: @@ -2934,6 +3106,8 @@ void bnge_netdev_free(struct bnge_dev *bd) bn->sp_event = 0; destroy_workqueue(bn->bnge_pf_wq); + bnge_free_port_stats(bn); + free_netdev(netdev); bd->netdev = NULL; } diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h index 5636eb371e24..15ededb801f6 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h +++ b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h @@ -7,6 +7,7 @@ #include #include #include +#include #include "bnge_db.h" #include "bnge_hw_def.h" #include "bnge_link.h" @@ -224,6 +225,42 @@ struct bnge_tpa_info { #define BNGE_NQ_HDL_IDX(hdl) ((hdl) & BNGE_NQ_HDL_IDX_MASK) #define BNGE_NQ_HDL_TYPE(hdl) (((hdl) & BNGE_NQ_HDL_TYPE_MASK) >> \ BNGE_NQ_HDL_TYPE_SHIFT) +#define BNGE_GET_RING_STATS64(sw, counter) \ + (*((sw) + offsetof(struct ctx_hw_stats, counter) / 8)) + +#define BNGE_GET_RX_PORT_STATS64(sw, counter) \ + (*((sw) + offsetof(struct rx_port_stats, counter) / 8)) + +#define BNGE_GET_TX_PORT_STATS64(sw, counter) \ + (*((sw) + offsetof(struct tx_port_stats, counter) / 8)) + +#define BNGE_PORT_STATS_SIZE \ + (sizeof(struct rx_port_stats) + sizeof(struct tx_port_stats) + 1024) + +#define BNGE_TX_PORT_STATS_BYTE_OFFSET \ + (sizeof(struct rx_port_stats) + 512) + +#define BNGE_RX_STATS_OFFSET(counter) \ + (offsetof(struct rx_port_stats, counter) / 8) + +#define BNGE_TX_STATS_OFFSET(counter) \ + ((offsetof(struct tx_port_stats, counter) + \ + BNGE_TX_PORT_STATS_BYTE_OFFSET) / 8) + +#define BNGE_RX_STATS_EXT_OFFSET(counter) \ + (offsetof(struct rx_port_stats_ext, counter) / 8) + +#define BNGE_TX_STATS_EXT_OFFSET(counter) \ + (offsetof(struct tx_port_stats_ext, counter) / 8) + +struct bnge_stats_mem { + u64 *sw_stats; + u64 *hw_masks; + void *hw_stats; + dma_addr_t hw_stats_map; + u32 len; + struct u64_stats_sync syncp; +}; enum bnge_net_state { BNGE_STATE_NAPI_DISABLED, @@ -231,6 +268,11 @@ enum bnge_net_state { #define BNGE_TIMER_INTERVAL HZ +enum bnge_net_flag { + BNGE_FLAG_PORT_STATS = BIT(0), + BNGE_FLAG_PORT_STATS_EXT = BIT(1), +}; + enum bnge_sp_event { BNGE_LINK_CHNG_SP_EVENT, BNGE_LINK_SPEED_CHNG_SP_EVENT, @@ -309,6 +351,17 @@ struct bnge_net { unsigned long sp_event; struct bnge_ethtool_link_info eth_link_info; + + u64 flags; + + struct bnge_stats_mem port_stats; + struct bnge_stats_mem rx_port_stats_ext; + struct bnge_stats_mem tx_port_stats_ext; + u16 fw_rx_stats_ext_size; + u16 fw_tx_stats_ext_size; + + u8 pri2cos_idx[8]; + bool pri2cos_valid; }; #define BNGE_DEFAULT_RX_RING_SIZE 511 @@ -374,14 +427,6 @@ void bnge_set_ring_params(struct bnge_dev *bd); bnge_writeq(bd, (db)->db_key64 | DBR_TYPE_NQ_ARM | \ DB_RING_IDX(db, idx), (db)->doorbell) -struct bnge_stats_mem { - u64 *sw_stats; - u64 *hw_masks; - void *hw_stats; - dma_addr_t hw_stats_map; - int len; -}; - struct nqe_cn { __le16 type; #define NQ_CN_TYPE_MASK 0x3fUL @@ -588,4 +633,5 @@ u8 *__bnge_alloc_rx_frag(struct bnge_net *bn, dma_addr_t *mapping, int bnge_alloc_rx_netmem(struct bnge_net *bn, struct bnge_rx_ring_info *rxr, u16 prod, gfp_t gfp); void __bnge_queue_sp_work(struct bnge_net *bn); +void bnge_copy_hw_masks(u64 *mask_arr, __le64 *hw_mask_arr, int count); #endif /* _BNGE_NETDEV_H_ */ From 50c885cb2ebec05ca2ba1ff2723a654ac4b22df9 Mon Sep 17 00:00:00 2001 From: Bhargava Marreddy Date: Mon, 6 Apr 2026 23:34:17 +0530 Subject: [PATCH 1428/1561] bng_en: periodically fetch and accumulate hardware statistics Use the timer to schedule periodic stats collection via the workqueue when the link is up. Fetch fresh counters from hardware via DMA and accumulate them into 64-bit software shadows, handling wrap-around for counters narrower than 64 bits. Signed-off-by: Bhargava Marreddy Reviewed-by: Vikas Gupta Reviewed-by: Rahul Gupta Reviewed-by: Ajit Kumar Khaparde Link: https://patch.msgid.link/20260406180420.279470-8-bhargava.marreddy@broadcom.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/broadcom/bnge/bnge_netdev.c | 98 +++++++++++++++++++ .../net/ethernet/broadcom/bnge/bnge_netdev.h | 1 + 2 files changed, 99 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c index 290bf1cd96c9..6f41dff3fbcb 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c @@ -69,6 +69,9 @@ static int bnge_alloc_stats_mem(struct bnge_net *bn, if (!stats->hw_masks) goto stats_mem_err; } + + u64_stats_init(&stats->syncp); + return 0; stats_mem_err: @@ -294,9 +297,98 @@ static void bnge_timer(struct timer_list *t) } } + if (BNGE_LINK_IS_UP(bd) && bn->stats_coal_ticks) + bnge_queue_sp_work(bn, BNGE_PERIODIC_STATS_SP_EVENT); + mod_timer(&bn->timer, jiffies + bn->current_interval); } +static void bnge_add_one_ctr(u64 hw, u64 *sw, u64 mask) +{ + u64 sw_tmp, sw_val; + + hw &= mask; + sw_val = READ_ONCE(*sw); + sw_tmp = (sw_val & ~mask) | hw; + if (hw < (sw_val & mask)) + sw_tmp += mask + 1; + WRITE_ONCE(*sw, sw_tmp); +} + +static void __bnge_accumulate_stats(__le64 *hw_stats, u64 *sw_stats, u64 *masks, + int count, struct u64_stats_sync *syncp) +{ + unsigned long flags; + int i; + + flags = u64_stats_update_begin_irqsave(syncp); + for (i = 0; i < count; i++) { + u64 hw = le64_to_cpu(READ_ONCE(hw_stats[i])); + + if (masks[i] == -1ULL) + WRITE_ONCE(sw_stats[i], hw); + else + bnge_add_one_ctr(hw, &sw_stats[i], masks[i]); + } + u64_stats_update_end_irqrestore(syncp, flags); +} + +static void bnge_accumulate_stats(struct bnge_stats_mem *stats) +{ + if (!stats->hw_stats) + return; + + __bnge_accumulate_stats(stats->hw_stats, stats->sw_stats, + stats->hw_masks, stats->len / 8, &stats->syncp); +} + +static void bnge_accumulate_all_stats(struct bnge_dev *bd) +{ + struct bnge_net *bn = netdev_priv(bd->netdev); + struct bnge_stats_mem *ring0_stats = NULL; + int i; + + for (i = 0; i < bd->nq_nr_rings; i++) { + struct bnge_napi *bnapi = bn->bnapi[i]; + struct bnge_nq_ring_info *nqr; + struct bnge_stats_mem *stats; + + nqr = &bnapi->nq_ring; + stats = &nqr->stats; + + if (!ring0_stats) + ring0_stats = &bn->bnapi[0]->nq_ring.stats; + + __bnge_accumulate_stats(stats->hw_stats, stats->sw_stats, + ring0_stats->hw_masks, + ring0_stats->len / 8, &stats->syncp); + } + + if (bn->flags & BNGE_FLAG_PORT_STATS) { + struct bnge_stats_mem *stats = &bn->port_stats; + __le64 *hw_stats = stats->hw_stats; + u64 *sw_stats = stats->sw_stats; + u64 *masks = stats->hw_masks; + u16 cnt; + + cnt = sizeof(struct rx_port_stats) / 8; + __bnge_accumulate_stats(hw_stats, sw_stats, masks, cnt, + &stats->syncp); + + hw_stats += BNGE_TX_PORT_STATS_BYTE_OFFSET / 8; + sw_stats += BNGE_TX_PORT_STATS_BYTE_OFFSET / 8; + masks += BNGE_TX_PORT_STATS_BYTE_OFFSET / 8; + cnt = sizeof(struct tx_port_stats) / 8; + __bnge_accumulate_stats(hw_stats, sw_stats, masks, cnt, + &stats->syncp); + } + + if (bn->flags & BNGE_FLAG_PORT_STATS_EXT) { + bnge_accumulate_stats(&bn->rx_port_stats_ext); + bnge_accumulate_stats(&bn->tx_port_stats_ext); + } +} + static void bnge_sp_task(struct work_struct *work) { struct bnge_net *bn = container_of(work, struct bnge_net, sp_task); @@ -309,6 +401,12 @@ static void bnge_sp_task(struct work_struct *work) return; } + if (test_and_clear_bit(BNGE_PERIODIC_STATS_SP_EVENT, &bn->sp_event)) { + bnge_hwrm_port_qstats(bd, 0); + bnge_hwrm_port_qstats_ext(bd, 0); + bnge_accumulate_all_stats(bd); + } + if (test_and_clear_bit(BNGE_UPDATE_PHY_SP_EVENT, &bn->sp_event)) { int rc; diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h index 15ededb801f6..a73f51b01bc2 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h +++ b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h @@ -278,6 +278,7 @@ enum bnge_sp_event { BNGE_LINK_SPEED_CHNG_SP_EVENT, BNGE_LINK_CFG_CHANGE_SP_EVENT, BNGE_UPDATE_PHY_SP_EVENT, + BNGE_PERIODIC_STATS_SP_EVENT, }; struct bnge_net { From d4f802eb4e7d4eb786db9ce2a6d50eed7cbf4e5c Mon Sep 17 00:00:00 2001 From: Bhargava Marreddy Date: Mon, 6 Apr 2026 23:34:18 +0530 Subject: [PATCH 1429/1561] bng_en: implement ndo_get_stats64 Implement the ndo_get_stats64 callback to report aggregate network statistics. The driver gathers these by accumulating the per-ring counters into the provided rtnl_link_stats64 structure. Signed-off-by: Bhargava Marreddy Reviewed-by: Vikas Gupta Link: https://patch.msgid.link/20260406180420.279470-9-bhargava.marreddy@broadcom.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/broadcom/bnge/bnge_netdev.c | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c index 6f41dff3fbcb..4148a9ec6d23 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c @@ -2898,6 +2898,46 @@ static int bnge_shutdown_nic(struct bnge_net *bn) return 0; } +static void bnge_get_port_stats64(struct bnge_net *bn, + struct rtnl_link_stats64 *stats) +{ + unsigned int start; + u64 *tx, *rx; + + rx = bn->port_stats.sw_stats; + tx = bn->port_stats.sw_stats + BNGE_TX_PORT_STATS_BYTE_OFFSET / 8; + + do { + start = u64_stats_fetch_begin(&bn->port_stats.syncp); + + stats->rx_crc_errors = + BNGE_GET_RX_PORT_STATS64(rx, rx_fcs_err_frames); + stats->rx_frame_errors = + BNGE_GET_RX_PORT_STATS64(rx, rx_align_err_frames); + stats->rx_length_errors = + BNGE_GET_RX_PORT_STATS64(rx, rx_undrsz_frames) + + BNGE_GET_RX_PORT_STATS64(rx, rx_ovrsz_frames) + + BNGE_GET_RX_PORT_STATS64(rx, rx_runt_frames); + stats->rx_errors = + BNGE_GET_RX_PORT_STATS64(rx, rx_false_carrier_frames) + + BNGE_GET_RX_PORT_STATS64(rx, rx_jbr_frames); + stats->collisions = + BNGE_GET_TX_PORT_STATS64(tx, tx_total_collisions); + stats->tx_fifo_errors = + BNGE_GET_TX_PORT_STATS64(tx, tx_fifo_underruns); + stats->tx_errors = BNGE_GET_TX_PORT_STATS64(tx, tx_err); + } while (u64_stats_fetch_retry(&bn->port_stats.syncp, start)); +} + +static void bnge_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct bnge_net *bn = netdev_priv(dev); + + if (bn->flags & BNGE_FLAG_PORT_STATS) + bnge_get_port_stats64(bn, stats); +} + static void bnge_close_core(struct bnge_net *bn) { struct bnge_dev *bd = bn->bd; @@ -2931,6 +2971,7 @@ static const struct net_device_ops bnge_netdev_ops = { .ndo_open = bnge_open, .ndo_stop = bnge_close, .ndo_start_xmit = bnge_start_xmit, + .ndo_get_stats64 = bnge_get_stats64, .ndo_features_check = bnge_features_check, }; From c1da271f0d35b2c043a0b7cd58b3683a3776b135 Mon Sep 17 00:00:00 2001 From: Bhargava Marreddy Date: Mon, 6 Apr 2026 23:34:19 +0530 Subject: [PATCH 1430/1561] bng_en: implement netdev_stat_ops Implement netdev_stat_ops to provide standardized per-queue statistics via the Netlink API. Below is the description of the hardware drop counters: rx-hw-drop-overruns: Packets dropped by HW due to resource limitations (e.g., no BDs available in the host ring). rx-hw-drops: Total packets dropped by HW (sum of overruns and error drops). tx-hw-drop-errors: Packets dropped by HW because they were invalid or malformed. tx-hw-drops: Total packets dropped by HW (sum of resource limitations and error drops). The implementation was verified using the ynl tool: ./tools/net/ynl/pyynl/cli.py --spec \ Documentation/netlink/specs/netdev.yaml --dump qstats-get --json \ '{"ifindex":14, "scope":"queue"}' [{'ifindex': 14, 'queue-id': 0, 'queue-type': 'rx', 'rx-bytes': 758, 'rx-hw-drop-overruns': 0, 'rx-hw-drops': 0, 'rx-packets': 11}, {'ifindex': 14, 'queue-id': 1, 'queue-type': 'rx', 'rx-bytes': 0, 'rx-hw-drop-overruns': 0, 'rx-hw-drops': 0, 'rx-packets': 0}, {'ifindex': 14, 'queue-id': 0, 'queue-type': 'tx', 'tx-bytes': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drops': 0, 'tx-packets': 0}, {'ifindex': 14, 'queue-id': 1, 'queue-type': 'tx', 'tx-bytes': 0, 'tx-hw-drop-errors': 0, 'tx-hw-drops': 0, 'tx-packets': 0}, {'ifindex': 14, 'queue-id': 2, 'queue-type': 'tx', 'tx-bytes': 810, 'tx-hw-drop-errors': 0, 'tx-hw-drops': 0, 'tx-packets': 10},] Signed-off-by: Bhargava Marreddy Reviewed-by: Vikas Gupta Link: https://patch.msgid.link/20260406180420.279470-10-bhargava.marreddy@broadcom.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/broadcom/bnge/bnge_netdev.c | 227 ++++++++++++++++++ .../net/ethernet/broadcom/bnge/bnge_netdev.h | 7 + 2 files changed, 234 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c index 4148a9ec6d23..70768193004c 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c @@ -2860,6 +2860,7 @@ static int bnge_open_core(struct bnge_net *bn) } set_bit(BNGE_STATE_OPEN, &bd->state); + set_bit(BNGE_STATE_STATS_ENABLE, &bn->state); bnge_enable_int(bn); @@ -2898,6 +2899,77 @@ static int bnge_shutdown_nic(struct bnge_net *bn) return 0; } +static void bnge_add_prev_ring_stats64(struct bnge_net *bn, + struct rtnl_link_stats64 *stats) +{ + struct netdev_queue_stats_rx *rx_save = &bn->rxq_prv_stats; + struct netdev_queue_stats_tx *tx_save = &bn->txq_prv_stats; + struct rtnl_link_stats64 *stats64_save = &bn->prv_stats64; + + stats->rx_packets += rx_save->packets; + stats->tx_packets += tx_save->packets; + stats->rx_bytes += rx_save->bytes; + stats->tx_bytes += tx_save->bytes; + stats->rx_missed_errors += rx_save->hw_drop_overruns; + stats->tx_dropped += tx_save->hw_drop_errors; + + stats->multicast += stats64_save->multicast; +} + +static void bnge_get_ring_stats64(struct bnge_dev *bd, + struct rtnl_link_stats64 *stats) +{ + struct bnge_net *bn = netdev_priv(bd->netdev); + int i; + + for (i = 0; i < bd->nq_nr_rings; i++) { + struct bnge_napi *bnapi = bn->bnapi[i]; + u64 tx_bytes, tx_packets, tx_dropped; + u64 multicast, rx_missed_errors; + struct bnge_nq_ring_info *nqr; + u64 rx_bytes, rx_packets; + unsigned int start; + u64 *sw; + + nqr = &bnapi->nq_ring; + sw = nqr->stats.sw_stats; + + do { + start = u64_stats_fetch_begin(&nqr->stats.syncp); + + rx_packets = BNGE_GET_RING_STATS64(sw, rx_ucast_pkts); + rx_packets += BNGE_GET_RING_STATS64(sw, rx_mcast_pkts); + rx_packets += BNGE_GET_RING_STATS64(sw, rx_bcast_pkts); + + tx_packets = BNGE_GET_RING_STATS64(sw, tx_ucast_pkts); + tx_packets += BNGE_GET_RING_STATS64(sw, tx_mcast_pkts); + tx_packets += BNGE_GET_RING_STATS64(sw, tx_bcast_pkts); + + rx_bytes = BNGE_GET_RING_STATS64(sw, rx_ucast_bytes); + rx_bytes += BNGE_GET_RING_STATS64(sw, rx_mcast_bytes); + rx_bytes += BNGE_GET_RING_STATS64(sw, rx_bcast_bytes); + + tx_bytes = BNGE_GET_RING_STATS64(sw, tx_ucast_bytes); + tx_bytes += BNGE_GET_RING_STATS64(sw, tx_mcast_bytes); + tx_bytes += BNGE_GET_RING_STATS64(sw, tx_bcast_bytes); + + multicast = BNGE_GET_RING_STATS64(sw, rx_mcast_pkts); + rx_missed_errors = + BNGE_GET_RING_STATS64(sw, rx_discard_pkts); + tx_dropped = + BNGE_GET_RING_STATS64(sw, tx_error_pkts); + } while (u64_stats_fetch_retry(&nqr->stats.syncp, start)); + + stats->rx_packets += rx_packets; + stats->tx_packets += tx_packets; + stats->rx_bytes += rx_bytes; + stats->tx_bytes += tx_bytes; + stats->multicast += multicast; + stats->rx_missed_errors += rx_missed_errors; + stats->tx_dropped += tx_dropped; + } +} + static void bnge_get_port_stats64(struct bnge_net *bn, struct rtnl_link_stats64 *stats) { @@ -2929,6 +3001,22 @@ static void bnge_get_port_stats64(struct bnge_net *bn, } while (u64_stats_fetch_retry(&bn->port_stats.syncp, start)); } +static void bnge_fill_prev_stats64(struct bnge_net *bn, + struct rtnl_link_stats64 *stats) +{ + struct netdev_queue_stats_rx *rx_save = &bn->rxq_prv_stats; + struct netdev_queue_stats_tx *tx_save = &bn->txq_prv_stats; + struct rtnl_link_stats64 *stats64_save = &bn->prv_stats64; + + stats->rx_packets = rx_save->packets; + stats->tx_packets = tx_save->packets; + stats->rx_bytes = rx_save->bytes; + stats->tx_bytes = tx_save->bytes; + stats->rx_missed_errors = rx_save->hw_drop_overruns; + stats->tx_dropped = tx_save->hw_drop_errors; + stats->multicast = stats64_save->multicast; +} + static void bnge_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { @@ -2936,6 +3024,57 @@ static void bnge_get_stats64(struct net_device *dev, if (bn->flags & BNGE_FLAG_PORT_STATS) bnge_get_port_stats64(bn, stats); + + spin_lock_bh(&bn->stats_lock); + if (!test_bit(BNGE_STATE_STATS_ENABLE, &bn->state)) { + bnge_fill_prev_stats64(bn, stats); + spin_unlock_bh(&bn->stats_lock); + return; + } + + bnge_get_ring_stats64(bn->bd, stats); + bnge_add_prev_ring_stats64(bn, stats); + spin_unlock_bh(&bn->stats_lock); +} + +static void bnge_save_ring_stats(struct bnge_net *bn) +{ + struct netdev_queue_stats_rx *rx_save = &bn->rxq_prv_stats; + struct netdev_queue_stats_tx *tx_save = &bn->txq_prv_stats; + struct rtnl_link_stats64 *stats64_save = &bn->prv_stats64; + int i; + + for (i = 0; i < bn->bd->nq_nr_rings; i++) { + struct bnge_napi *bnapi = bn->bnapi[i]; + struct bnge_nq_ring_info *nqr; + u64 *sw; + + nqr = &bnapi->nq_ring; + sw = nqr->stats.sw_stats; + + rx_save->packets += BNGE_GET_RING_STATS64(sw, rx_ucast_pkts); + rx_save->packets += BNGE_GET_RING_STATS64(sw, rx_mcast_pkts); + rx_save->packets += BNGE_GET_RING_STATS64(sw, rx_bcast_pkts); + rx_save->bytes += BNGE_GET_RING_STATS64(sw, rx_ucast_bytes); + rx_save->bytes += BNGE_GET_RING_STATS64(sw, rx_mcast_bytes); + rx_save->bytes += BNGE_GET_RING_STATS64(sw, rx_bcast_bytes); + rx_save->hw_drop_overruns += BNGE_GET_RING_STATS64(sw, rx_discard_pkts); + rx_save->hw_drops += BNGE_GET_RING_STATS64(sw, rx_error_pkts) + + BNGE_GET_RING_STATS64(sw, rx_discard_pkts); + + tx_save->packets += BNGE_GET_RING_STATS64(sw, tx_ucast_pkts); + tx_save->packets += BNGE_GET_RING_STATS64(sw, tx_mcast_pkts); + tx_save->packets += BNGE_GET_RING_STATS64(sw, tx_bcast_pkts); + tx_save->bytes += BNGE_GET_RING_STATS64(sw, tx_ucast_bytes); + tx_save->bytes += BNGE_GET_RING_STATS64(sw, tx_mcast_bytes); + tx_save->bytes += BNGE_GET_RING_STATS64(sw, tx_bcast_bytes); + tx_save->hw_drop_errors += BNGE_GET_RING_STATS64(sw, tx_error_pkts); + tx_save->hw_drops += BNGE_GET_RING_STATS64(sw, tx_discard_pkts) + + BNGE_GET_RING_STATS64(sw, tx_error_pkts); + + stats64_save->multicast += + BNGE_GET_RING_STATS64(sw, rx_mcast_pkts); + } } static void bnge_close_core(struct bnge_net *bn) @@ -2949,6 +3088,13 @@ static void bnge_close_core(struct bnge_net *bn) timer_delete_sync(&bn->timer); bnge_shutdown_nic(bn); bnge_disable_napi(bn); + + /* Save ring stats before shutdown */ + spin_lock_bh(&bn->stats_lock); + bnge_save_ring_stats(bn); + clear_bit(BNGE_STATE_STATS_ENABLE, &bn->state); + spin_unlock_bh(&bn->stats_lock); + bnge_free_all_rings_bufs(bn); bnge_free_irq(bn); bnge_del_napi(bn); @@ -2967,6 +3113,85 @@ static int bnge_close(struct net_device *dev) return 0; } +static void bnge_get_queue_stats_rx(struct net_device *dev, int i, + struct netdev_queue_stats_rx *stats) +{ + struct bnge_net *bn = netdev_priv(dev); + struct bnge_nq_ring_info *nqr; + u64 *sw; + + if (!bn->bnapi) + return; + + nqr = &bn->bnapi[i]->nq_ring; + sw = nqr->stats.sw_stats; + + stats->packets = 0; + stats->packets += BNGE_GET_RING_STATS64(sw, rx_ucast_pkts); + stats->packets += BNGE_GET_RING_STATS64(sw, rx_mcast_pkts); + stats->packets += BNGE_GET_RING_STATS64(sw, rx_bcast_pkts); + + stats->bytes = 0; + stats->bytes += BNGE_GET_RING_STATS64(sw, rx_ucast_bytes); + stats->bytes += BNGE_GET_RING_STATS64(sw, rx_mcast_bytes); + stats->bytes += BNGE_GET_RING_STATS64(sw, rx_bcast_bytes); + + stats->hw_drop_overruns = BNGE_GET_RING_STATS64(sw, rx_discard_pkts); + stats->hw_drops = BNGE_GET_RING_STATS64(sw, rx_error_pkts) + + stats->hw_drop_overruns; +} + +static void bnge_get_queue_stats_tx(struct net_device *dev, int i, + struct netdev_queue_stats_tx *stats) +{ + struct bnge_net *bn = netdev_priv(dev); + struct bnge_napi *bnapi; + u64 *sw; + + if (!bn->tx_ring) + return; + + bnapi = bn->tx_ring[bn->tx_ring_map[i]].bnapi; + sw = bnapi->nq_ring.stats.sw_stats; + + stats->packets = 0; + stats->packets += BNGE_GET_RING_STATS64(sw, tx_ucast_pkts); + stats->packets += BNGE_GET_RING_STATS64(sw, tx_mcast_pkts); + stats->packets += BNGE_GET_RING_STATS64(sw, tx_bcast_pkts); + + stats->bytes = 0; + stats->bytes += BNGE_GET_RING_STATS64(sw, tx_ucast_bytes); + stats->bytes += BNGE_GET_RING_STATS64(sw, tx_mcast_bytes); + stats->bytes += BNGE_GET_RING_STATS64(sw, tx_bcast_bytes); + + stats->hw_drop_errors = BNGE_GET_RING_STATS64(sw, tx_error_pkts); + stats->hw_drops = BNGE_GET_RING_STATS64(sw, tx_discard_pkts) + + stats->hw_drop_errors; +} + +static void bnge_get_base_stats(struct net_device *dev, + struct netdev_queue_stats_rx *rx, + struct netdev_queue_stats_tx *tx) +{ + struct bnge_net *bn = netdev_priv(dev); + + rx->packets = bn->rxq_prv_stats.packets; + rx->bytes = bn->rxq_prv_stats.bytes; + rx->hw_drops = bn->rxq_prv_stats.hw_drops; + rx->hw_drop_overruns = bn->rxq_prv_stats.hw_drop_overruns; + + tx->packets = bn->txq_prv_stats.packets; + tx->bytes = bn->txq_prv_stats.bytes; + tx->hw_drops = bn->txq_prv_stats.hw_drops; + tx->hw_drop_errors = bn->txq_prv_stats.hw_drop_errors; +} + +static const struct netdev_stat_ops bnge_stat_ops = { + .get_queue_stats_rx = bnge_get_queue_stats_rx, + .get_queue_stats_tx = bnge_get_queue_stats_tx, + .get_base_stats = bnge_get_base_stats, +}; + static const struct net_device_ops bnge_netdev_ops = { .ndo_open = bnge_open, .ndo_stop = bnge_close, @@ -3113,6 +3338,7 @@ int bnge_netdev_alloc(struct bnge_dev *bd, int max_irqs) bd->netdev = netdev; netdev->netdev_ops = &bnge_netdev_ops; + netdev->stat_ops = &bnge_stat_ops; bnge_set_ethtool_ops(netdev); @@ -3212,6 +3438,7 @@ int bnge_netdev_alloc(struct bnge_dev *bd, int max_irqs) rc = bnge_alloc_port_stats(bn); if (rc) goto err_free_workq; + spin_lock_init(&bn->stats_lock); netdev->request_ops_lock = true; rc = register_netdev(netdev); diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h index a73f51b01bc2..f4636b5b0cf3 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h +++ b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h @@ -8,6 +8,7 @@ #include #include #include +#include #include "bnge_db.h" #include "bnge_hw_def.h" #include "bnge_link.h" @@ -264,6 +265,7 @@ struct bnge_stats_mem { enum bnge_net_state { BNGE_STATE_NAPI_DISABLED, + BNGE_STATE_STATS_ENABLE, }; #define BNGE_TIMER_INTERVAL HZ @@ -361,6 +363,11 @@ struct bnge_net { u16 fw_rx_stats_ext_size; u16 fw_tx_stats_ext_size; + struct netdev_queue_stats_rx rxq_prv_stats; + struct netdev_queue_stats_tx txq_prv_stats; + struct rtnl_link_stats64 prv_stats64; + spinlock_t stats_lock; + u8 pri2cos_idx[8]; bool pri2cos_valid; }; From bcc0f4c0f257135a37b674382efcaef4a257642f Mon Sep 17 00:00:00 2001 From: Bhargava Marreddy Date: Mon, 6 Apr 2026 23:34:20 +0530 Subject: [PATCH 1431/1561] bng_en: add support for ethtool -S stats display Implement the legacy ethtool statistics interface (get_sset_count, get_strings, get_ethtool_stats) to expose hardware counters not available through standard kernel stats APIs. Ex: a) Per-queue ring stats rxq0_ucast_packets: 2 rxq0_mcast_packets: 0 rxq0_bcast_packets: 15 rxq0_ucast_bytes: 120 rxq0_mcast_bytes: 0 rxq0_bcast_bytes: 900 txq0_ucast_packets: 0 txq0_mcast_packets: 0 txq0_bcast_packets: 0 txq0_ucast_bytes: 0 txq0_mcast_bytes: 0 txq0_bcast_bytes: 0 b) Per-queue TPA(LRO/GRO) stats rxq4_tpa_eligible_pkt: 0 rxq4_tpa_eligible_bytes: 0 rxq4_tpa_pkt: 0 rxq4_tpa_bytes: 0 rxq4_tpa_errors: 0 rxq4_tpa_events: 0 c) Port level stats rxp_good_vlan_frames: 0 rxp_mtu_err_frames: 0 rxp_tagged_frames: 0 rxp_double_tagged_frames: 0 rxp_pfc_ena_frames_pri0: 0 rxp_pfc_ena_frames_pri1: 0 rxp_pfc_ena_frames_pri2: 0 rxp_pfc_ena_frames_pri3: 0 rxp_pfc_ena_frames_pri4: 0 rxp_pfc_ena_frames_pri5: 0 rxp_pfc_ena_frames_pri6: 0 rxp_pfc_ena_frames_pri7: 0 rxp_eee_lpi_events: 0 rxp_eee_lpi_duration: 0 rxp_runt_bytes: 0 rxp_runt_frames: 0 txp_good_vlan_frames: 0 txp_jabber_frames: 0 txp_fcs_err_frames: 0 txp_pfc_ena_frames_pri0: 0 txp_pfc_ena_frames_pri1: 0 txp_pfc_ena_frames_pri2: 0 txp_pfc_ena_frames_pri3: 0 txp_pfc_ena_frames_pri4: 0 txp_pfc_ena_frames_pri5: 0 txp_pfc_ena_frames_pri6: 0 txp_pfc_ena_frames_pri7: 0 txp_eee_lpi_events: 0 txp_eee_lpi_duration: 0 txp_xthol_frames: 0 d) Per-priority stats rx_bytes_pri0: 4182650 rx_bytes_pri1: 4182650 rx_bytes_pri2: 4182650 rx_bytes_pri3: 4182650 rx_bytes_pri4: 4182650 rx_bytes_pri5: 4182650 rx_bytes_pri6: 4182650 rx_bytes_pri7: 4182650 Signed-off-by: Bhargava Marreddy Reviewed-by: Vikas Gupta Link: https://patch.msgid.link/20260406180420.279470-11-bhargava.marreddy@broadcom.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/broadcom/bnge/bnge_ethtool.c | 481 ++++++++++++++++++ 1 file changed, 481 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_ethtool.c b/drivers/net/ethernet/broadcom/bnge/bnge_ethtool.c index 87226d5bf15c..2467e44de291 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_ethtool.c @@ -35,6 +35,271 @@ static int bnge_nway_reset(struct net_device *dev) return rc; } +static const char * const bnge_ring_q_stats_str[] = { + "ucast_packets", + "mcast_packets", + "bcast_packets", + "ucast_bytes", + "mcast_bytes", + "bcast_bytes", +}; + +static const char * const bnge_ring_tpa2_stats_str[] = { + "tpa_eligible_pkt", + "tpa_eligible_bytes", + "tpa_pkt", + "tpa_bytes", + "tpa_errors", + "tpa_events", +}; + +#define BNGE_RX_PORT_STATS_ENTRY(suffix) \ + { BNGE_RX_STATS_OFFSET(rx_##suffix), "rxp_" __stringify(suffix) } + +#define BNGE_TX_PORT_STATS_ENTRY(suffix) \ + { BNGE_TX_STATS_OFFSET(tx_##suffix), "txp_" __stringify(suffix) } + +#define BNGE_RX_STATS_EXT_ENTRY(counter) \ + { BNGE_RX_STATS_EXT_OFFSET(counter), __stringify(counter) } + +#define BNGE_TX_STATS_EXT_ENTRY(counter) \ + { BNGE_TX_STATS_EXT_OFFSET(counter), __stringify(counter) } + +#define BNGE_RX_STATS_EXT_PFC_ENTRY(n) \ + BNGE_RX_STATS_EXT_ENTRY(pfc_pri##n##_rx_duration_us), \ + BNGE_RX_STATS_EXT_ENTRY(pfc_pri##n##_rx_transitions) + +#define BNGE_TX_STATS_EXT_PFC_ENTRY(n) \ + BNGE_TX_STATS_EXT_ENTRY(pfc_pri##n##_tx_duration_us), \ + BNGE_TX_STATS_EXT_ENTRY(pfc_pri##n##_tx_transitions) + +#define BNGE_RX_STATS_EXT_PFC_ENTRIES \ + BNGE_RX_STATS_EXT_PFC_ENTRY(0), \ + BNGE_RX_STATS_EXT_PFC_ENTRY(1), \ + BNGE_RX_STATS_EXT_PFC_ENTRY(2), \ + BNGE_RX_STATS_EXT_PFC_ENTRY(3), \ + BNGE_RX_STATS_EXT_PFC_ENTRY(4), \ + BNGE_RX_STATS_EXT_PFC_ENTRY(5), \ + BNGE_RX_STATS_EXT_PFC_ENTRY(6), \ + BNGE_RX_STATS_EXT_PFC_ENTRY(7) + +#define BNGE_TX_STATS_EXT_PFC_ENTRIES \ + BNGE_TX_STATS_EXT_PFC_ENTRY(0), \ + BNGE_TX_STATS_EXT_PFC_ENTRY(1), \ + BNGE_TX_STATS_EXT_PFC_ENTRY(2), \ + BNGE_TX_STATS_EXT_PFC_ENTRY(3), \ + BNGE_TX_STATS_EXT_PFC_ENTRY(4), \ + BNGE_TX_STATS_EXT_PFC_ENTRY(5), \ + BNGE_TX_STATS_EXT_PFC_ENTRY(6), \ + BNGE_TX_STATS_EXT_PFC_ENTRY(7) + +#define BNGE_RX_STATS_EXT_COS_ENTRY(n) \ + BNGE_RX_STATS_EXT_ENTRY(rx_bytes_cos##n), \ + BNGE_RX_STATS_EXT_ENTRY(rx_packets_cos##n) + +#define BNGE_TX_STATS_EXT_COS_ENTRY(n) \ + BNGE_TX_STATS_EXT_ENTRY(tx_bytes_cos##n), \ + BNGE_TX_STATS_EXT_ENTRY(tx_packets_cos##n) + +#define BNGE_RX_STATS_EXT_COS_ENTRIES \ + BNGE_RX_STATS_EXT_COS_ENTRY(0), \ + BNGE_RX_STATS_EXT_COS_ENTRY(1), \ + BNGE_RX_STATS_EXT_COS_ENTRY(2), \ + BNGE_RX_STATS_EXT_COS_ENTRY(3), \ + BNGE_RX_STATS_EXT_COS_ENTRY(4), \ + BNGE_RX_STATS_EXT_COS_ENTRY(5), \ + BNGE_RX_STATS_EXT_COS_ENTRY(6), \ + BNGE_RX_STATS_EXT_COS_ENTRY(7) \ + +#define BNGE_TX_STATS_EXT_COS_ENTRIES \ + BNGE_TX_STATS_EXT_COS_ENTRY(0), \ + BNGE_TX_STATS_EXT_COS_ENTRY(1), \ + BNGE_TX_STATS_EXT_COS_ENTRY(2), \ + BNGE_TX_STATS_EXT_COS_ENTRY(3), \ + BNGE_TX_STATS_EXT_COS_ENTRY(4), \ + BNGE_TX_STATS_EXT_COS_ENTRY(5), \ + BNGE_TX_STATS_EXT_COS_ENTRY(6), \ + BNGE_TX_STATS_EXT_COS_ENTRY(7) \ + +#define BNGE_RX_STATS_EXT_DISCARD_COS_ENTRY(n) \ + BNGE_RX_STATS_EXT_ENTRY(rx_discard_bytes_cos##n), \ + BNGE_RX_STATS_EXT_ENTRY(rx_discard_packets_cos##n) + +#define BNGE_RX_STATS_EXT_DISCARD_COS_ENTRIES \ + BNGE_RX_STATS_EXT_DISCARD_COS_ENTRY(0), \ + BNGE_RX_STATS_EXT_DISCARD_COS_ENTRY(1), \ + BNGE_RX_STATS_EXT_DISCARD_COS_ENTRY(2), \ + BNGE_RX_STATS_EXT_DISCARD_COS_ENTRY(3), \ + BNGE_RX_STATS_EXT_DISCARD_COS_ENTRY(4), \ + BNGE_RX_STATS_EXT_DISCARD_COS_ENTRY(5), \ + BNGE_RX_STATS_EXT_DISCARD_COS_ENTRY(6), \ + BNGE_RX_STATS_EXT_DISCARD_COS_ENTRY(7) + +#define BNGE_RX_STATS_PRI_ENTRY(counter, n) \ + { BNGE_RX_STATS_EXT_OFFSET(counter##_cos0), \ + __stringify(counter##_pri##n) } + +#define BNGE_TX_STATS_PRI_ENTRY(counter, n) \ + { BNGE_TX_STATS_EXT_OFFSET(counter##_cos0), \ + __stringify(counter##_pri##n) } + +#define BNGE_RX_STATS_PRI_ENTRIES(counter) \ + BNGE_RX_STATS_PRI_ENTRY(counter, 0), \ + BNGE_RX_STATS_PRI_ENTRY(counter, 1), \ + BNGE_RX_STATS_PRI_ENTRY(counter, 2), \ + BNGE_RX_STATS_PRI_ENTRY(counter, 3), \ + BNGE_RX_STATS_PRI_ENTRY(counter, 4), \ + BNGE_RX_STATS_PRI_ENTRY(counter, 5), \ + BNGE_RX_STATS_PRI_ENTRY(counter, 6), \ + BNGE_RX_STATS_PRI_ENTRY(counter, 7) + +#define BNGE_TX_STATS_PRI_ENTRIES(counter) \ + BNGE_TX_STATS_PRI_ENTRY(counter, 0), \ + BNGE_TX_STATS_PRI_ENTRY(counter, 1), \ + BNGE_TX_STATS_PRI_ENTRY(counter, 2), \ + BNGE_TX_STATS_PRI_ENTRY(counter, 3), \ + BNGE_TX_STATS_PRI_ENTRY(counter, 4), \ + BNGE_TX_STATS_PRI_ENTRY(counter, 5), \ + BNGE_TX_STATS_PRI_ENTRY(counter, 6), \ + BNGE_TX_STATS_PRI_ENTRY(counter, 7) + +#define NUM_RING_Q_HW_STATS ARRAY_SIZE(bnge_ring_q_stats_str) + +static const struct { + long offset; + char string[ETH_GSTRING_LEN]; +} bnge_tx_port_stats_ext_arr[] = { + BNGE_TX_STATS_EXT_COS_ENTRIES, + BNGE_TX_STATS_EXT_PFC_ENTRIES, +}; + +static const struct { + long base_off; + char string[ETH_GSTRING_LEN]; +} bnge_rx_bytes_pri_arr[] = { + BNGE_RX_STATS_PRI_ENTRIES(rx_bytes), +}; + +static const struct { + long base_off; + char string[ETH_GSTRING_LEN]; +} bnge_rx_pkts_pri_arr[] = { + BNGE_RX_STATS_PRI_ENTRIES(rx_packets), +}; + +static const struct { + long base_off; + char string[ETH_GSTRING_LEN]; +} bnge_tx_bytes_pri_arr[] = { + BNGE_TX_STATS_PRI_ENTRIES(tx_bytes), +}; + +static const struct { + long base_off; + char string[ETH_GSTRING_LEN]; +} bnge_tx_pkts_pri_arr[] = { + BNGE_TX_STATS_PRI_ENTRIES(tx_packets), +}; + +static const struct { + long offset; + char string[ETH_GSTRING_LEN]; +} bnge_port_stats_arr[] = { + BNGE_RX_PORT_STATS_ENTRY(good_vlan_frames), + BNGE_RX_PORT_STATS_ENTRY(mtu_err_frames), + BNGE_RX_PORT_STATS_ENTRY(tagged_frames), + BNGE_RX_PORT_STATS_ENTRY(double_tagged_frames), + BNGE_RX_PORT_STATS_ENTRY(pfc_ena_frames_pri0), + BNGE_RX_PORT_STATS_ENTRY(pfc_ena_frames_pri1), + BNGE_RX_PORT_STATS_ENTRY(pfc_ena_frames_pri2), + BNGE_RX_PORT_STATS_ENTRY(pfc_ena_frames_pri3), + BNGE_RX_PORT_STATS_ENTRY(pfc_ena_frames_pri4), + BNGE_RX_PORT_STATS_ENTRY(pfc_ena_frames_pri5), + BNGE_RX_PORT_STATS_ENTRY(pfc_ena_frames_pri6), + BNGE_RX_PORT_STATS_ENTRY(pfc_ena_frames_pri7), + BNGE_RX_PORT_STATS_ENTRY(eee_lpi_events), + BNGE_RX_PORT_STATS_ENTRY(eee_lpi_duration), + BNGE_RX_PORT_STATS_ENTRY(runt_bytes), + BNGE_RX_PORT_STATS_ENTRY(runt_frames), + + BNGE_TX_PORT_STATS_ENTRY(good_vlan_frames), + BNGE_TX_PORT_STATS_ENTRY(jabber_frames), + BNGE_TX_PORT_STATS_ENTRY(fcs_err_frames), + BNGE_TX_PORT_STATS_ENTRY(pfc_ena_frames_pri0), + BNGE_TX_PORT_STATS_ENTRY(pfc_ena_frames_pri1), + BNGE_TX_PORT_STATS_ENTRY(pfc_ena_frames_pri2), + BNGE_TX_PORT_STATS_ENTRY(pfc_ena_frames_pri3), + BNGE_TX_PORT_STATS_ENTRY(pfc_ena_frames_pri4), + BNGE_TX_PORT_STATS_ENTRY(pfc_ena_frames_pri5), + BNGE_TX_PORT_STATS_ENTRY(pfc_ena_frames_pri6), + BNGE_TX_PORT_STATS_ENTRY(pfc_ena_frames_pri7), + BNGE_TX_PORT_STATS_ENTRY(eee_lpi_events), + BNGE_TX_PORT_STATS_ENTRY(eee_lpi_duration), + BNGE_TX_PORT_STATS_ENTRY(xthol_frames), +}; + +static const struct { + long offset; + char string[ETH_GSTRING_LEN]; +} bnge_port_stats_ext_arr[] = { + BNGE_RX_STATS_EXT_ENTRY(continuous_pause_events), + BNGE_RX_STATS_EXT_ENTRY(resume_pause_events), + BNGE_RX_STATS_EXT_ENTRY(continuous_roce_pause_events), + BNGE_RX_STATS_EXT_ENTRY(resume_roce_pause_events), + BNGE_RX_STATS_EXT_COS_ENTRIES, + BNGE_RX_STATS_EXT_PFC_ENTRIES, + BNGE_RX_STATS_EXT_ENTRY(rx_bits), + BNGE_RX_STATS_EXT_ENTRY(rx_buffer_passed_threshold), + BNGE_RX_STATS_EXT_DISCARD_COS_ENTRIES, + BNGE_RX_STATS_EXT_ENTRY(rx_filter_miss), +}; + +static int bnge_get_num_tpa_ring_stats(struct bnge_dev *bd) +{ + if (BNGE_SUPPORTS_TPA(bd)) + return BNGE_NUM_TPA_RING_STATS; + return 0; +} + +#define BNGE_NUM_PORT_STATS ARRAY_SIZE(bnge_port_stats_arr) +#define BNGE_NUM_STATS_PRI \ + (ARRAY_SIZE(bnge_rx_bytes_pri_arr) + \ + ARRAY_SIZE(bnge_rx_pkts_pri_arr) + \ + ARRAY_SIZE(bnge_tx_bytes_pri_arr) + \ + ARRAY_SIZE(bnge_tx_pkts_pri_arr)) + +static int bnge_get_num_ring_stats(struct bnge_dev *bd) +{ + int rx, tx; + + rx = NUM_RING_Q_HW_STATS + bnge_get_num_tpa_ring_stats(bd); + tx = NUM_RING_Q_HW_STATS; + return rx * bd->rx_nr_rings + + tx * bd->tx_nr_rings_per_tc; +} + +static u32 bnge_get_num_stats(struct bnge_net *bn) +{ + u32 num_stats = bnge_get_num_ring_stats(bn->bd); + u32 len; + + if (bn->flags & BNGE_FLAG_PORT_STATS) + num_stats += BNGE_NUM_PORT_STATS; + + if (bn->flags & BNGE_FLAG_PORT_STATS_EXT) { + len = min_t(u32, bn->fw_rx_stats_ext_size, + ARRAY_SIZE(bnge_port_stats_ext_arr)); + num_stats += len; + len = min_t(u32, bn->fw_tx_stats_ext_size, + ARRAY_SIZE(bnge_tx_port_stats_ext_arr)); + num_stats += len; + if (bn->pri2cos_valid) + num_stats += BNGE_NUM_STATS_PRI; + } + + return num_stats; +} + static void bnge_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { @@ -46,6 +311,219 @@ static void bnge_get_drvinfo(struct net_device *dev, strscpy(info->bus_info, pci_name(bd->pdev), sizeof(info->bus_info)); } +static int bnge_get_sset_count(struct net_device *dev, int sset) +{ + struct bnge_net *bn = netdev_priv(dev); + + switch (sset) { + case ETH_SS_STATS: + return bnge_get_num_stats(bn); + default: + return -EOPNOTSUPP; + } +} + +static bool is_rx_ring(struct bnge_dev *bd, u16 ring_num) +{ + return ring_num < bd->rx_nr_rings; +} + +static bool is_tx_ring(struct bnge_dev *bd, u16 ring_num) +{ + u16 tx_base = 0; + + if (!(bd->flags & BNGE_EN_SHARED_CHNL)) + tx_base = bd->rx_nr_rings; + + return ring_num >= tx_base && ring_num < (tx_base + bd->tx_nr_rings_per_tc); +} + +static void bnge_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *buf) +{ + struct bnge_net *bn = netdev_priv(dev); + struct bnge_dev *bd = bn->bd; + u32 tpa_stats; + u32 i, j = 0; + + if (!bn->bnapi) { + j += bnge_get_num_ring_stats(bd); + goto skip_ring_stats; + } + + tpa_stats = bnge_get_num_tpa_ring_stats(bd); + for (i = 0; i < bd->nq_nr_rings; i++) { + struct bnge_napi *bnapi = bn->bnapi[i]; + struct bnge_nq_ring_info *nqr; + u64 *sw_stats; + int k; + + nqr = &bnapi->nq_ring; + sw_stats = nqr->stats.sw_stats; + + if (is_rx_ring(bd, i)) { + buf[j++] = BNGE_GET_RING_STATS64(sw_stats, rx_ucast_pkts); + buf[j++] = BNGE_GET_RING_STATS64(sw_stats, rx_mcast_pkts); + buf[j++] = BNGE_GET_RING_STATS64(sw_stats, rx_bcast_pkts); + buf[j++] = BNGE_GET_RING_STATS64(sw_stats, rx_ucast_bytes); + buf[j++] = BNGE_GET_RING_STATS64(sw_stats, rx_mcast_bytes); + buf[j++] = BNGE_GET_RING_STATS64(sw_stats, rx_bcast_bytes); + } + if (is_tx_ring(bd, i)) { + buf[j++] = BNGE_GET_RING_STATS64(sw_stats, tx_ucast_pkts); + buf[j++] = BNGE_GET_RING_STATS64(sw_stats, tx_mcast_pkts); + buf[j++] = BNGE_GET_RING_STATS64(sw_stats, tx_bcast_pkts); + buf[j++] = BNGE_GET_RING_STATS64(sw_stats, tx_ucast_bytes); + buf[j++] = BNGE_GET_RING_STATS64(sw_stats, tx_mcast_bytes); + buf[j++] = BNGE_GET_RING_STATS64(sw_stats, tx_bcast_bytes); + } + if (!tpa_stats || !is_rx_ring(bd, i)) + continue; + + k = BNGE_NUM_RX_RING_STATS + BNGE_NUM_TX_RING_STATS; + for (; k < BNGE_NUM_RX_RING_STATS + BNGE_NUM_TX_RING_STATS + + tpa_stats; j++, k++) + buf[j] = sw_stats[k]; + } + +skip_ring_stats: + if (bn->flags & BNGE_FLAG_PORT_STATS) { + u64 *port_stats = bn->port_stats.sw_stats; + + for (i = 0; i < BNGE_NUM_PORT_STATS; i++, j++) + buf[j] = *(port_stats + bnge_port_stats_arr[i].offset); + } + if (bn->flags & BNGE_FLAG_PORT_STATS_EXT) { + u64 *rx_port_stats_ext = bn->rx_port_stats_ext.sw_stats; + u64 *tx_port_stats_ext = bn->tx_port_stats_ext.sw_stats; + u32 len; + + len = min_t(u32, bn->fw_rx_stats_ext_size, + ARRAY_SIZE(bnge_port_stats_ext_arr)); + for (i = 0; i < len; i++, j++) { + buf[j] = *(rx_port_stats_ext + + bnge_port_stats_ext_arr[i].offset); + } + len = min_t(u32, bn->fw_tx_stats_ext_size, + ARRAY_SIZE(bnge_tx_port_stats_ext_arr)); + for (i = 0; i < len; i++, j++) { + buf[j] = *(tx_port_stats_ext + + bnge_tx_port_stats_ext_arr[i].offset); + } + if (bn->pri2cos_valid) { + for (i = 0; i < 8; i++, j++) { + long n = bnge_rx_bytes_pri_arr[i].base_off + + bn->pri2cos_idx[i]; + + buf[j] = *(rx_port_stats_ext + n); + } + for (i = 0; i < 8; i++, j++) { + long n = bnge_rx_pkts_pri_arr[i].base_off + + bn->pri2cos_idx[i]; + + buf[j] = *(rx_port_stats_ext + n); + } + for (i = 0; i < 8; i++, j++) { + long n = bnge_tx_bytes_pri_arr[i].base_off + + bn->pri2cos_idx[i]; + + buf[j] = *(tx_port_stats_ext + n); + } + for (i = 0; i < 8; i++, j++) { + long n = bnge_tx_pkts_pri_arr[i].base_off + + bn->pri2cos_idx[i]; + + buf[j] = *(tx_port_stats_ext + n); + } + } + } +} + +static void bnge_get_strings(struct net_device *dev, u32 stringset, u8 *buf) +{ + struct bnge_net *bn = netdev_priv(dev); + struct bnge_dev *bd = bn->bd; + u32 i, j, num_str; + const char *str; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < bd->nq_nr_rings; i++) { + if (is_rx_ring(bd, i)) + for (j = 0; j < NUM_RING_Q_HW_STATS; j++) { + str = bnge_ring_q_stats_str[j]; + ethtool_sprintf(&buf, "rxq%d_%s", i, + str); + } + if (is_tx_ring(bd, i)) + for (j = 0; j < NUM_RING_Q_HW_STATS; j++) { + str = bnge_ring_q_stats_str[j]; + ethtool_sprintf(&buf, "txq%d_%s", i, + str); + } + num_str = bnge_get_num_tpa_ring_stats(bd); + if (!num_str || !is_rx_ring(bd, i)) + continue; + + for (j = 0; j < num_str; j++) { + str = bnge_ring_tpa2_stats_str[j]; + ethtool_sprintf(&buf, "rxq%d_%s", i, str); + } + } + + if (bn->flags & BNGE_FLAG_PORT_STATS) + for (i = 0; i < BNGE_NUM_PORT_STATS; i++) { + str = bnge_port_stats_arr[i].string; + ethtool_puts(&buf, str); + } + + if (bn->flags & BNGE_FLAG_PORT_STATS_EXT) { + u32 len; + + len = min_t(u32, bn->fw_rx_stats_ext_size, + ARRAY_SIZE(bnge_port_stats_ext_arr)); + for (i = 0; i < len; i++) { + str = bnge_port_stats_ext_arr[i].string; + ethtool_puts(&buf, str); + } + + len = min_t(u32, bn->fw_tx_stats_ext_size, + ARRAY_SIZE(bnge_tx_port_stats_ext_arr)); + for (i = 0; i < len; i++) { + str = bnge_tx_port_stats_ext_arr[i].string; + ethtool_puts(&buf, str); + } + + if (bn->pri2cos_valid) { + for (i = 0; i < 8; i++) { + str = bnge_rx_bytes_pri_arr[i].string; + ethtool_puts(&buf, str); + } + + for (i = 0; i < 8; i++) { + str = bnge_rx_pkts_pri_arr[i].string; + ethtool_puts(&buf, str); + } + + for (i = 0; i < 8; i++) { + str = bnge_tx_bytes_pri_arr[i].string; + ethtool_puts(&buf, str); + } + + for (i = 0; i < 8; i++) { + str = bnge_tx_pkts_pri_arr[i].string; + ethtool_puts(&buf, str); + } + } + } + break; + default: + netdev_err(bd->netdev, "%s invalid request %x\n", + __func__, stringset); + break; + } +} + static void bnge_get_eth_phy_stats(struct net_device *dev, struct ethtool_eth_phy_stats *phy_stats) { @@ -271,6 +749,9 @@ static const struct ethtool_ops bnge_ethtool_ops = { .nway_reset = bnge_nway_reset, .get_pauseparam = bnge_get_pauseparam, .set_pauseparam = bnge_set_pauseparam, + .get_sset_count = bnge_get_sset_count, + .get_strings = bnge_get_strings, + .get_ethtool_stats = bnge_get_ethtool_stats, .get_eth_phy_stats = bnge_get_eth_phy_stats, .get_eth_mac_stats = bnge_get_eth_mac_stats, .get_eth_ctrl_stats = bnge_get_eth_ctrl_stats, From 2b5dd4632966c39da6ba74dbc8689b309065e82c Mon Sep 17 00:00:00 2001 From: Junxi Qian Date: Wed, 8 Apr 2026 16:10:06 +0800 Subject: [PATCH 1432/1561] nfc: llcp: add missing return after LLCP_CLOSED checks In nfc_llcp_recv_hdlc() and nfc_llcp_recv_disc(), when the socket state is LLCP_CLOSED, the code correctly calls release_sock() and nfc_llcp_sock_put() but fails to return. Execution falls through to the remainder of the function, which calls release_sock() and nfc_llcp_sock_put() again. This results in a double release_sock() and a refcount underflow via double nfc_llcp_sock_put(), leading to a use-after-free. Add the missing return statements after the LLCP_CLOSED branches in both functions to prevent the fall-through. Fixes: d646960f7986 ("NFC: Initial LLCP support") Signed-off-by: Junxi Qian Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260408081006.3723-1-qjx1298677004@gmail.com Signed-off-by: Jakub Kicinski --- net/nfc/llcp_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 366d7566308c..db5bc6a878dd 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -1091,6 +1091,7 @@ static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local, if (sk->sk_state == LLCP_CLOSED) { release_sock(sk); nfc_llcp_sock_put(llcp_sock); + return; } /* Pass the payload upstream */ @@ -1182,6 +1183,7 @@ static void nfc_llcp_recv_disc(struct nfc_llcp_local *local, if (sk->sk_state == LLCP_CLOSED) { release_sock(sk); nfc_llcp_sock_put(llcp_sock); + return; } if (sk->sk_state == LLCP_CONNECTED) { From c116f07ab9d22bb6f355f3cf9e44c1e6a47fe559 Mon Sep 17 00:00:00 2001 From: Erni Sri Satya Vennela Date: Wed, 8 Apr 2026 01:12:19 -0700 Subject: [PATCH 1433/1561] net: mana: Use pci_name() for debugfs directory naming Use pci_name(pdev) for the per-device debugfs directory instead of hardcoded "0" for PFs and pci_slot_name(pdev->slot) for VFs. The previous approach had two issues: 1. pci_slot_name() dereferences pdev->slot, which can be NULL for VFs in environments like generic VFIO passthrough or nested KVM, causing a NULL pointer dereference. 2. Multiple PFs would all use "0", and VFs across different PCI domains or buses could share the same slot name, leading to -EEXIST errors from debugfs_create_dir(). pci_name(pdev) returns the unique BDF address, is always valid, and is unique across the system. Fixes: 6607c17c6c5e ("net: mana: Enable debugfs files for MANA device") Signed-off-by: Erni Sri Satya Vennela Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260408081224.302308-2-ernis@linux.microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/mana/gdma_main.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 786186c9a115..c2e855ff3ca9 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -2007,11 +2007,8 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) gc->dev = &pdev->dev; xa_init(&gc->irq_contexts); - if (gc->is_pf) - gc->mana_pci_debugfs = debugfs_create_dir("0", mana_debugfs_root); - else - gc->mana_pci_debugfs = debugfs_create_dir(pci_slot_name(pdev->slot), - mana_debugfs_root); + gc->mana_pci_debugfs = debugfs_create_dir(pci_name(pdev), + mana_debugfs_root); err = mana_gd_setup(pdev); if (err) From 3b7c7fc97aea7b4048001d12f45777201c74a17f Mon Sep 17 00:00:00 2001 From: Erni Sri Satya Vennela Date: Wed, 8 Apr 2026 01:12:20 -0700 Subject: [PATCH 1434/1561] net: mana: Move current_speed debugfs file to mana_init_port() Move the current_speed debugfs file creation from mana_probe_port() to mana_init_port(). The file was previously created only during initial probe, but mana_cleanup_port_context() removes the entire vPort debugfs directory during detach/attach cycles. Since mana_init_port() recreates the directory on re-attach, moving current_speed here ensures it survives these cycles. Fixes: 75cabb46935b ("net: mana: Add support for net_shaper_ops") Signed-off-by: Erni Sri Satya Vennela Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260408081224.302308-3-ernis@linux.microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/mana/mana_en.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 09a53c977545..a9f5efc5af37 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -3117,6 +3117,8 @@ static int mana_init_port(struct net_device *ndev) eth_hw_addr_set(ndev, apc->mac_addr); sprintf(vport, "vport%d", port_idx); apc->mana_port_debugfs = debugfs_create_dir(vport, gc->mana_pci_debugfs); + debugfs_create_u32("current_speed", 0400, apc->mana_port_debugfs, + &apc->speed); return 0; reset_apc: @@ -3393,8 +3395,6 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, netif_carrier_on(ndev); - debugfs_create_u32("current_speed", 0400, apc->mana_port_debugfs, &apc->speed); - return 0; free_indir: From 22ef8a263c17f02176e3a322b76da07411092c17 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 8 Apr 2026 15:08:49 +0800 Subject: [PATCH 1435/1561] tools: ynl: move ethtool.py to selftest We have converted all the samples to selftests. This script is the last piece of random "PoC" code we still have lying around. Let's move it to tests. Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20260408-b4-ynl_ethtool-v2-1-7623a5e8f70b@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/tests/Makefile | 5 ++++- tools/net/ynl/{pyynl => tests}/ethtool.py | 2 +- tools/net/ynl/tests/test_ynl_ethtool.sh | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) rename tools/net/ynl/{pyynl => tests}/ethtool.py (99%) diff --git a/tools/net/ynl/tests/Makefile b/tools/net/ynl/tests/Makefile index 9215e84cca05..40827ca8e579 100644 --- a/tools/net/ynl/tests/Makefile +++ b/tools/net/ynl/tests/Makefile @@ -36,7 +36,10 @@ TEST_GEN_FILES := \ rt-route \ # end of TEST_GEN_FILES -TEST_FILES := ynl_nsim_lib.sh +TEST_FILES := \ + ethtool.py \ + ynl_nsim_lib.sh \ +# end of TEST_FILES CFLAGS_netdev:=$(CFLAGS_netdev) $(CFLAGS_rt-link) CFLAGS_ovs:=$(CFLAGS_ovs_datapath) diff --git a/tools/net/ynl/pyynl/ethtool.py b/tools/net/ynl/tests/ethtool.py similarity index 99% rename from tools/net/ynl/pyynl/ethtool.py rename to tools/net/ynl/tests/ethtool.py index f1a2a2a89985..6eeeb867edcf 100755 --- a/tools/net/ynl/pyynl/ethtool.py +++ b/tools/net/ynl/tests/ethtool.py @@ -14,7 +14,7 @@ import re import os # pylint: disable=no-name-in-module,wrong-import-position -sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix()) +sys.path.append(pathlib.Path(__file__).resolve().parent.parent.joinpath('pyynl').as_posix()) # pylint: disable=import-error from cli import schema_dir, spec_dir from lib import YnlFamily diff --git a/tools/net/ynl/tests/test_ynl_ethtool.sh b/tools/net/ynl/tests/test_ynl_ethtool.sh index b826269017f4..b4480e9be7b7 100755 --- a/tools/net/ynl/tests/test_ynl_ethtool.sh +++ b/tools/net/ynl/tests/test_ynl_ethtool.sh @@ -8,7 +8,7 @@ KSELFTEST_KTAP_HELPERS="$(dirname "$(realpath "$0")")/../../../testing/selftests source "$KSELFTEST_KTAP_HELPERS" # Default ynl-ethtool path for direct execution, can be overridden by make install -ynl_ethtool="../pyynl/ethtool.py" +ynl_ethtool="./ethtool.py" readonly NSIM_ID="1337" readonly NSIM_DEV_NAME="nsim${NSIM_ID}" From 1c43d471a513e93b5420d8af5d43afd3a75f95f1 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 8 Apr 2026 15:08:50 +0800 Subject: [PATCH 1436/1561] tools: ynl: ethtool: use doit instead of dumpit for per-device GET Rename the local helper doit() to do_set() and dumpit() to do_get() to better reflect their purpose. Convert do_get() to use ynl.do() with an explicit device header instead of ynl.dump() followed by client-side filtering. This is more efficient as the kernel only processes and returns data for the requested device, rather than dumping all devices across the netns. Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20260408-b4-ynl_ethtool-v2-2-7623a5e8f70b@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/tests/ethtool.py | 68 +++++++++++++++++----------------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/tools/net/ynl/tests/ethtool.py b/tools/net/ynl/tests/ethtool.py index 6eeeb867edcf..63854d21818c 100755 --- a/tools/net/ynl/tests/ethtool.py +++ b/tools/net/ynl/tests/ethtool.py @@ -84,9 +84,9 @@ def print_speed(name, value): speed = [ k for k, v in value.items() if v and speed_re.match(k) ] print(f'{name}: {" ".join(speed)}') -def doit(ynl, args, op_name): +def do_set(ynl, args, op_name): """ - Prepare request header, parse arguments and doit. + Prepare request header, parse arguments and do a set operation. """ req = { 'header': { @@ -97,26 +97,24 @@ def doit(ynl, args, op_name): args_to_req(ynl, op_name, args.args, req) ynl.do(op_name, req) -def dumpit(ynl, args, op_name, extra=None): +def do_get(ynl, args, op_name, extra=None): """ - Prepare request header, parse arguments and dumpit (filtering out the - devices we're not interested in). + Prepare request header and get info for a specific device using doit. """ extra = extra or {} - reply = ynl.dump(op_name, { 'header': {} } | extra) + req = {'header': {'dev-name': args.device}} + req['header'].update(extra.pop('header', {})) + req.update(extra) + + reply = ynl.do(op_name, req) if not reply: return {} - for msg in reply: - if msg['header']['dev-name'] == args.device: - if args.json: - pprint.PrettyPrinter().pprint(msg) - sys.exit(0) - msg.pop('header', None) - return msg - - print(f"Not supported for device {args.device}") - sys.exit(1) + if args.json: + pprint.PrettyPrinter().pprint(reply) + sys.exit(0) + reply.pop('header', None) + return reply def bits_to_dict(attr): """ @@ -181,15 +179,15 @@ def main(): return if args.set_eee: - doit(ynl, args, 'eee-set') + do_set(ynl, args, 'eee-set') return if args.set_pause: - doit(ynl, args, 'pause-set') + do_set(ynl, args, 'pause-set') return if args.set_coalesce: - doit(ynl, args, 'coalesce-set') + do_set(ynl, args, 'coalesce-set') return if args.set_features: @@ -198,20 +196,20 @@ def main(): return if args.set_channels: - doit(ynl, args, 'channels-set') + do_set(ynl, args, 'channels-set') return if args.set_ring: - doit(ynl, args, 'rings-set') + do_set(ynl, args, 'rings-set') return if args.show_priv_flags: - flags = bits_to_dict(dumpit(ynl, args, 'privflags-get')['flags']) + flags = bits_to_dict(do_get(ynl, args, 'privflags-get')['flags']) print_field(flags) return if args.show_eee: - eee = dumpit(ynl, args, 'eee-get') + eee = do_get(ynl, args, 'eee-get') ours = bits_to_dict(eee['modes-ours']) peer = bits_to_dict(eee['modes-peer']) @@ -232,18 +230,18 @@ def main(): return if args.show_pause: - print_field(dumpit(ynl, args, 'pause-get'), + print_field(do_get(ynl, args, 'pause-get'), ('autoneg', 'Autonegotiate', 'bool'), ('rx', 'RX', 'bool'), ('tx', 'TX', 'bool')) return if args.show_coalesce: - print_field(dumpit(ynl, args, 'coalesce-get')) + print_field(do_get(ynl, args, 'coalesce-get')) return if args.show_features: - reply = dumpit(ynl, args, 'features-get') + reply = do_get(ynl, args, 'features-get') available = bits_to_dict(reply['hw']) requested = bits_to_dict(reply['wanted']).keys() active = bits_to_dict(reply['active']).keys() @@ -270,7 +268,7 @@ def main(): return if args.show_channels: - reply = dumpit(ynl, args, 'channels-get') + reply = do_get(ynl, args, 'channels-get') print(f'Channel parameters for {args.device}:') print('Pre-set maximums:') @@ -290,7 +288,7 @@ def main(): return if args.show_ring: - reply = dumpit(ynl, args, 'channels-get') + reply = do_get(ynl, args, 'channels-get') print(f'Ring parameters for {args.device}:') @@ -319,7 +317,7 @@ def main(): print('NIC statistics:') # TODO: pass id? - strset = dumpit(ynl, args, 'strset-get') + strset = do_get(ynl, args, 'strset-get') pprint.PrettyPrinter().pprint(strset) req = { @@ -338,7 +336,7 @@ def main(): }, } - rsp = dumpit(ynl, args, 'stats-get', req) + rsp = do_get(ynl, args, 'stats-get', req) pprint.PrettyPrinter().pprint(rsp) return @@ -349,7 +347,7 @@ def main(): }, } - tsinfo = dumpit(ynl, args, 'tsinfo-get', req) + tsinfo = do_get(ynl, args, 'tsinfo-get', req) print(f'Time stamping parameters for {args.device}:') @@ -377,7 +375,7 @@ def main(): return print(f'Settings for {args.device}:') - linkmodes = dumpit(ynl, args, 'linkmodes-get') + linkmodes = do_get(ynl, args, 'linkmodes-get') ours = bits_to_dict(linkmodes['ours']) supported_ports = ('TP', 'AUI', 'BNC', 'MII', 'FIBRE', 'Backplane') @@ -425,7 +423,7 @@ def main(): 5: 'Directly Attached Copper', 0xef: 'None', } - linkinfo = dumpit(ynl, args, 'linkinfo-get') + linkinfo = do_get(ynl, args, 'linkinfo-get') print(f'Port: {ports.get(linkinfo["port"], "Other")}') print_field(linkinfo, ('phyaddr', 'PHYAD')) @@ -447,11 +445,11 @@ def main(): mdix = mdix_ctrl.get(linkinfo['tp-mdix'], 'Unknown (auto)') print(f'MDI-X: {mdix}') - debug = dumpit(ynl, args, 'debug-get') + debug = do_get(ynl, args, 'debug-get') msgmask = bits_to_dict(debug.get("msgmask", [])).keys() print(f'Current message level: {" ".join(msgmask)}') - linkstate = dumpit(ynl, args, 'linkstate-get') + linkstate = do_get(ynl, args, 'linkstate-get') detected_states = { 0: 'no', 1: 'yes', From 594ba4477164af58c9703039b63b8b07a3a55f18 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 8 Apr 2026 15:08:51 +0800 Subject: [PATCH 1437/1561] tools: ynl: ethtool: add --dbg-small-recv option Add a --dbg-small-recv debug option to control the recv() buffer size used by YNL, matching the same option already present in cli.py. This is useful if user need to get large netlink message. Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20260408-b4-ynl_ethtool-v2-3-7623a5e8f70b@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/tests/ethtool.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/net/ynl/tests/ethtool.py b/tools/net/ynl/tests/ethtool.py index 63854d21818c..db3b62c652e7 100755 --- a/tools/net/ynl/tests/ethtool.py +++ b/tools/net/ynl/tests/ethtool.py @@ -166,12 +166,19 @@ def main(): parser.add_argument('device', metavar='device', type=str) parser.add_argument('args', metavar='args', type=str, nargs='*') + dbg_group = parser.add_argument_group('Debug options') + dbg_group.add_argument('--dbg-small-recv', default=0, const=4000, + action='store', nargs='?', type=int, metavar='INT', + help="Length of buffers used for recv()") + args = parser.parse_args() spec = os.path.join(spec_dir(), 'ethtool.yaml') schema = os.path.join(schema_dir(), 'genetlink-legacy.yaml') - ynl = YnlFamily(spec, schema) + ynl = YnlFamily(spec, schema, recv_size=args.dbg_small_recv) + if args.dbg_small_recv: + ynl.set_recv_dbg(True) if args.set_priv_flags: # TODO: parse the bitmask From 1346586a9ac96588eff586ca1893dd2e88b88510 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 8 Apr 2026 15:08:52 +0800 Subject: [PATCH 1438/1561] netlink: add a nla_nest_end_safe() helper The nla_len field in struct nlattr is a __u16, which can only hold values up to 65535. If a nested attribute grows beyond this limit, nla_nest_end() silently truncates the length, producing a corrupted netlink message with no indication of the problem. Since nla_nest_end() is used everywhere and this issue rarely happens, let's add a new helper to check the length. Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20260408-b4-ynl_ethtool-v2-4-7623a5e8f70b@gmail.com Signed-off-by: Jakub Kicinski --- include/net/netlink.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/include/net/netlink.h b/include/net/netlink.h index 1a8356ca4b78..546d10586576 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -2264,6 +2264,25 @@ static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start) return skb->len; } +/** + * nla_nest_end_safe - Validate and finalize nesting of attributes + * @skb: socket buffer the attributes are stored in + * @start: container attribute + * + * Corrects the container attribute header to include all appended + * attributes. + * + * Returns: the total data length of the skb, or -EMSGSIZE if the + * nested attribute length exceeds U16_MAX. + */ +static inline int nla_nest_end_safe(struct sk_buff *skb, struct nlattr *start) +{ + if (skb_tail_pointer(skb) - (unsigned char *)start > U16_MAX) + return -EMSGSIZE; + + return nla_nest_end(skb, start); +} + /** * nla_nest_cancel - Cancel nesting of attributes * @skb: socket buffer the message is stored in From b2fb1a336383f1fb4667a9cc930c70f52ae1e20e Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 8 Apr 2026 15:08:53 +0800 Subject: [PATCH 1439/1561] ethtool: strset: check nla_len overflow The netlink attribute length field nla_len is a __u16, which can only represent values up to 65535 bytes. NICs with a large number of statistics strings (e.g. mlx5_core with thousands of ETH_SS_STATS entries) can produce a ETHTOOL_A_STRINGSET_STRINGS nest that exceeds this limit. When nla_nest_end() writes the actual nest size back to nla_len, the value is silently truncated. This results in a corrupted netlink message being sent to userspace: the parser reads a wrong (truncated) attribute length and misaligns all subsequent attribute boundaries, causing decode errors. Fix this by using the new helper nla_nest_end_safe and error out if the size exceeds U16_MAX. Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20260408-b4-ynl_ethtool-v2-5-7623a5e8f70b@gmail.com Signed-off-by: Jakub Kicinski --- net/ethtool/strset.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index 9271aba8255e..bb1e829ba099 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -443,7 +443,8 @@ static int strset_fill_set(struct sk_buff *skb, if (strset_fill_string(skb, set_info, i) < 0) goto nla_put_failure; } - nla_nest_end(skb, strings_attr); + if (nla_nest_end_safe(skb, strings_attr) < 0) + goto nla_put_failure; } nla_nest_end(skb, stringset_attr); From 656121b155030086b01cfce9bd31b0c925ee6860 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 8 Apr 2026 20:26:56 +0200 Subject: [PATCH 1440/1561] net: airoha: Add missing RX_CPU_IDX() configuration in airoha_qdma_cleanup_rx_queue() When the descriptor index written in REG_RX_CPU_IDX() is equal to the one stored in REG_RX_DMA_IDX(), the hw will stop since the QDMA RX ring is empty. Add missing REG_RX_CPU_IDX() configuration in airoha_qdma_cleanup_rx_queue routine during QDMA RX ring cleanup. Fixes: 514aac359987 ("net: airoha: Add missing cleanup bits in airoha_qdma_cleanup_rx_queue()") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260408-airoha-cpu-idx-airoha_qdma_cleanup_rx_queue-v1-1-8efa64844308@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index c14cdce588a7..9e995094c32a 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -825,6 +825,11 @@ static void airoha_qdma_cleanup_rx_queue(struct airoha_queue *q) } q->head = q->tail; + /* Set RX_DMA_IDX to RX_CPU_IDX to notify the hw the QDMA RX ring is + * empty. + */ + airoha_qdma_rmw(qdma, REG_RX_CPU_IDX(qid), RX_RING_CPU_IDX_MASK, + FIELD_PREP(RX_RING_CPU_IDX_MASK, q->head)); airoha_qdma_rmw(qdma, REG_RX_DMA_IDX(qid), RX_RING_DMA_IDX_MASK, FIELD_PREP(RX_RING_DMA_IDX_MASK, q->tail)); } From 7ef629b458018ed01dcab6cbdc644ef26b0d0d83 Mon Sep 17 00:00:00 2001 From: Nicolai Buchwitz Date: Mon, 6 Apr 2026 09:13:07 +0200 Subject: [PATCH 1441/1561] net: phy: add support for disabling PHY-autonomous EEE Some PHYs (e.g. Broadcom BCM54xx, Realtek RTL8211F) implement autonomous EEE where the PHY manages LPI signaling without forwarding it to the MAC. This conflicts with MAC drivers that implement their own LPI control. Add a .disable_autonomous_eee callback to struct phy_driver and call it from phy_support_eee(). When a MAC driver indicates it supports EEE via phy_support_eee(), the PHY's autonomous EEE is automatically disabled so the MAC can manage LPI entry/exit. Signed-off-by: Nicolai Buchwitz Link: https://patch.msgid.link/20260406-devel-autonomous-eee-v1-1-b335e7143711@tipi-net.de Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_device.c | 22 ++++++++++++++++++++++ include/linux/phy.h | 14 ++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 0edff47478c2..cda4abf4e68c 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1375,6 +1375,14 @@ int phy_init_hw(struct phy_device *phydev) return ret; } + /* Re-apply autonomous EEE disable after soft reset */ + if (phydev->autonomous_eee_disabled && + phydev->drv->disable_autonomous_eee) { + ret = phydev->drv->disable_autonomous_eee(phydev); + if (ret) + return ret; + } + return 0; } EXPORT_SYMBOL(phy_init_hw); @@ -2898,6 +2906,20 @@ void phy_support_eee(struct phy_device *phydev) linkmode_copy(phydev->advertising_eee, phydev->supported_eee); phydev->eee_cfg.tx_lpi_enabled = true; phydev->eee_cfg.eee_enabled = true; + + /* If the PHY supports autonomous EEE, disable it so the MAC can + * manage LPI signaling instead. The flag is stored so it can be + * re-applied after a PHY soft reset (e.g. suspend/resume). + */ + if (phydev->drv && phydev->drv->disable_autonomous_eee) { + int ret = phydev->drv->disable_autonomous_eee(phydev); + + if (ret) + phydev_warn(phydev, "Failed to disable autonomous EEE: %pe\n", + ERR_PTR(ret)); + else + phydev->autonomous_eee_disabled = true; + } } EXPORT_SYMBOL(phy_support_eee); diff --git a/include/linux/phy.h b/include/linux/phy.h index 5de4b172cd0b..199a7aaa341b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -612,6 +612,8 @@ struct phy_oatc14_sqi_capability { * @advertising_eee: Currently advertised EEE linkmodes * @enable_tx_lpi: When True, MAC should transmit LPI to PHY * @eee_active: phylib private state, indicating that EEE has been negotiated + * @autonomous_eee_disabled: Set when autonomous EEE has been disabled, + * used to re-apply after PHY soft reset * @eee_cfg: User configuration of EEE * @lp_advertising: Current link partner advertised linkmodes * @host_interfaces: PHY interface modes supported by host @@ -739,6 +741,7 @@ struct phy_device { __ETHTOOL_DECLARE_LINK_MODE_MASK(eee_disabled_modes); bool enable_tx_lpi; bool eee_active; + bool autonomous_eee_disabled; struct eee_config eee_cfg; /* Host supported PHY interface types. Should be ignored if empty. */ @@ -1359,6 +1362,17 @@ struct phy_driver { void (*get_stats)(struct phy_device *dev, struct ethtool_stats *stats, u64 *data); + /** + * @disable_autonomous_eee: Disable PHY-autonomous EEE + * + * Some PHYs manage EEE autonomously, preventing the MAC from + * controlling LPI signaling. This callback disables autonomous + * EEE at the PHY. + * + * Return: 0 on success, negative errno on failure. + */ + int (*disable_autonomous_eee)(struct phy_device *dev); + /* Get and Set PHY tunables */ /** @get_tunable: Return the value of a tunable */ int (*get_tunable)(struct phy_device *dev, From bcb3e89fc0ecbe7a2b7ce614b72deda39083ac74 Mon Sep 17 00:00:00 2001 From: Nicolai Buchwitz Date: Mon, 6 Apr 2026 09:13:08 +0200 Subject: [PATCH 1442/1561] net: phy: broadcom: implement .disable_autonomous_eee for BCM54xx Implement the .disable_autonomous_eee callback for the BCM54210E. In AutogrEEEn mode the PHY manages EEE autonomously. Clearing the AutogrEEEn enable bit in MII_BUF_CNTL_0 switches the PHY to Native EEE mode. Signed-off-by: Nicolai Buchwitz Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20260406-devel-autonomous-eee-v1-2-b335e7143711@tipi-net.de Signed-off-by: Jakub Kicinski --- drivers/net/phy/broadcom.c | 7 +++++++ include/linux/brcmphy.h | 3 +++ 2 files changed, 10 insertions(+) diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index cb306f9e80cc..bf0c6a04481e 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -1452,6 +1452,12 @@ static int bcm54811_read_status(struct phy_device *phydev) return genphy_read_status(phydev); } +static int bcm54xx_disable_autonomous_eee(struct phy_device *phydev) +{ + return bcm_phy_modify_exp(phydev, BCM54XX_TOP_MISC_MII_BUF_CNTL0, + BCM54XX_MII_BUF_CNTL0_AUTOGREEEN_EN, 0); +} + static struct phy_driver broadcom_drivers[] = { { PHY_ID_MATCH_MODEL(PHY_ID_BCM5411), @@ -1495,6 +1501,7 @@ static struct phy_driver broadcom_drivers[] = { .get_wol = bcm54xx_phy_get_wol, .set_wol = bcm54xx_phy_set_wol, .led_brightness_set = bcm_phy_led_brightness_set, + .disable_autonomous_eee = bcm54xx_disable_autonomous_eee, }, { PHY_ID_MATCH_MODEL(PHY_ID_BCM5461), .name = "Broadcom BCM5461", diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 115a964f3006..174687c4c80a 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -266,6 +266,9 @@ #define BCM54XX_TOP_MISC_IDDQ_SD (1 << 2) #define BCM54XX_TOP_MISC_IDDQ_SR (1 << 3) +#define BCM54XX_TOP_MISC_MII_BUF_CNTL0 (MII_BCM54XX_EXP_SEL_TOP + 0x00) +#define BCM54XX_MII_BUF_CNTL0_AUTOGREEEN_EN BIT(0) + #define BCM54XX_TOP_MISC_LED_CTL (MII_BCM54XX_EXP_SEL_TOP + 0x0C) #define BCM54XX_LED4_SEL_INTR BIT(1) From bb14e3b63c63a48307843c82180bc8abb34e1acc Mon Sep 17 00:00:00 2001 From: Nicolai Buchwitz Date: Mon, 6 Apr 2026 09:13:09 +0200 Subject: [PATCH 1443/1561] net: phy: realtek: convert RTL8211F to .disable_autonomous_eee The RTL8211F previously unconditionally disabled PHY-mode EEE in config_init. Convert this to use the new .disable_autonomous_eee callback so it is only disabled when the MAC indicates EEE support via phy_support_eee(). This preserves PHY-autonomous EEE for MACs that do not support EEE, while still disabling it when the MAC manages LPI. Signed-off-by: Nicolai Buchwitz Link: https://patch.msgid.link/20260406-devel-autonomous-eee-v1-3-b335e7143711@tipi-net.de Signed-off-by: Jakub Kicinski --- drivers/net/phy/realtek/realtek_main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index bdb1221023fb..9c26531abe4a 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -726,9 +726,8 @@ static int rtl8211f_config_aldps(struct phy_device *phydev) return phy_modify(phydev, RTL8211F_PHYCR1, mask, mask); } -static int rtl8211f_config_phy_eee(struct phy_device *phydev) +static int rtl8211f_disable_autonomous_eee(struct phy_device *phydev) { - /* Disable PHY-mode EEE so LPI is passed to the MAC */ return phy_modify(phydev, RTL8211F_PHYCR2, RTL8211F_PHYCR2_PHY_EEE_ENABLE, 0); } @@ -866,7 +865,7 @@ static int rtl8211f_config_init(struct phy_device *phydev) return ret; } - return rtl8211f_config_phy_eee(phydev); + return 0; } static int rtl821x_suspend(struct phy_device *phydev) @@ -2460,6 +2459,7 @@ static struct phy_driver realtek_drvs[] = { .led_hw_is_supported = rtl8211x_led_hw_is_supported, .led_hw_control_get = rtl8211f_led_hw_control_get, .led_hw_control_set = rtl8211f_led_hw_control_set, + .disable_autonomous_eee = rtl8211f_disable_autonomous_eee, }, { PHY_ID_MATCH_EXACT(RTL_8211FVD_PHYID), .name = "RTL8211F-VD Gigabit Ethernet", @@ -2476,6 +2476,7 @@ static struct phy_driver realtek_drvs[] = { .led_hw_is_supported = rtl8211x_led_hw_is_supported, .led_hw_control_get = rtl8211f_led_hw_control_get, .led_hw_control_set = rtl8211f_led_hw_control_set, + .disable_autonomous_eee = rtl8211f_disable_autonomous_eee, }, { .name = "Generic FE-GE Realtek PHY", .match_phy_device = rtlgen_match_phy_device, From a6bd339dbb3514bce690fdcf252e788dfab4ee76 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 8 Apr 2026 12:00:44 +0200 Subject: [PATCH 1444/1561] net_sched: fix skb memory leak in deferred qdisc drops When the network stack cleans up the deferred list via qdisc_run_end(), it operates on the root qdisc. If the root qdisc do not implement the TCQ_F_DEQUEUE_DROPS flag the packets queue to free are never freed and gets stranded on the child's local to_free list. Fix this by making qdisc_dequeue_drop() aware of the root qdisc. It fetches the root qdisc and check for the TCQ_F_DEQUEUE_DROPS flag. If the flag is present, the packet is appended directly to the root's to_free list. Otherwise, drop it directly as it was done before the optimization was implemented. Fixes: a6efc273ab82 ("net_sched: use qdisc_dequeue_drop() in cake, codel, fq_codel") Reported-by: Damilola Bello Closes: https://lore.kernel.org/netdev/CAPgFtOLaedBMU0f_BxV2bXftTJSmJr018Q5uozOo5vVo6b9tjw@mail.gmail.com/ Signed-off-by: Fernando Fernandez Mancera Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260408100044.4530-1-fmancera@suse.de Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c3d657359a3d..5fc0b1ebaf25 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1170,12 +1170,22 @@ static inline void tcf_kfree_skb_list(struct sk_buff *skb) static inline void qdisc_dequeue_drop(struct Qdisc *q, struct sk_buff *skb, enum skb_drop_reason reason) { + struct Qdisc *root; + DEBUG_NET_WARN_ON_ONCE(!(q->flags & TCQ_F_DEQUEUE_DROPS)); DEBUG_NET_WARN_ON_ONCE(q->flags & TCQ_F_NOLOCK); - tcf_set_drop_reason(skb, reason); - skb->next = q->to_free; - q->to_free = skb; + rcu_read_lock(); + root = qdisc_root_sleeping(q); + + if (root->flags & TCQ_F_DEQUEUE_DROPS) { + tcf_set_drop_reason(skb, reason); + skb->next = root->to_free; + root->to_free = skb; + } else { + kfree_skb_reason(skb, reason); + } + rcu_read_unlock(); } /* Instead of calling kfree_skb() while root qdisc lock is held, From 46ce8be2ced389bccd84bcc04a12cf2f4d0c22d1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 9 Apr 2026 17:18:14 +0200 Subject: [PATCH 1445/1561] NFC: digital: Bounds check NFC-A cascade depth in SDD response handler The NFC-A anti-collision cascade in digital_in_recv_sdd_res() appends 3 or 4 bytes to target->nfcid1 on each round, but the number of cascade rounds is controlled entirely by the peer device. The peer sets the cascade tag in the SDD_RES (deciding 3 vs 4 bytes) and the cascade-incomplete bit in the SEL_RES (deciding whether another round follows). ISO 14443-3 limits NFC-A to three cascade levels and target->nfcid1 is sized accordingly (NFC_NFCID1_MAXSIZE = 10), but nothing in the driver actually enforces this. This means a malicious peer can keep the cascade running, writing past the heap-allocated nfc_target with each round. Fix this by rejecting the response when the accumulated UID would exceed the buffer. Commit e329e71013c9 ("NFC: nci: Bounds check struct nfc_target arrays") fixed similar missing checks against the same field on the NCI path. Cc: Simon Horman Cc: Kees Cook Cc: Thierry Escande Cc: Samuel Ortiz Fixes: 2c66daecc409 ("NFC Digital: Add NFC-A technology support") Cc: stable Assisted-by: gregkh_clanker_t1000 Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2026040913-figure-seducing-bd3f@gregkh Signed-off-by: Jakub Kicinski --- net/nfc/digital_technology.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c index 63f1b721c71d..ae63c5eb06fa 100644 --- a/net/nfc/digital_technology.c +++ b/net/nfc/digital_technology.c @@ -424,6 +424,12 @@ static void digital_in_recv_sdd_res(struct nfc_digital_dev *ddev, void *arg, size = 4; } + if (target->nfcid1_len + size > NFC_NFCID1_MAXSIZE) { + PROTOCOL_ERR("4.7.2.1"); + rc = -EPROTO; + goto exit; + } + memcpy(target->nfcid1 + target->nfcid1_len, sdd_res->nfcid1 + offset, size); target->nfcid1_len += size; From 10f86a2a5c91fc4c4d001960f1c21abe52545ef6 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Tue, 7 Apr 2026 10:26:27 +0800 Subject: [PATCH 1446/1561] bpf: Fix same-register dst/src OOB read and pointer leak in sock_ops When a BPF sock_ops program accesses ctx fields with dst_reg == src_reg, the SOCK_OPS_GET_SK() and SOCK_OPS_GET_FIELD() macros fail to zero the destination register in the !fullsock / !locked_tcp_sock path. Both macros borrow a temporary register to check is_fullsock / is_locked_tcp_sock when dst_reg == src_reg, because dst_reg holds the ctx pointer. When the check is false (e.g., TCP_NEW_SYN_RECV state with a request_sock), dst_reg should be zeroed but is not, leaving the stale ctx pointer: - SOCK_OPS_GET_SK: dst_reg retains the ctx pointer, passes NULL checks as PTR_TO_SOCKET_OR_NULL, and can be used as a bogus socket pointer, leading to stack-out-of-bounds access in helpers like bpf_skc_to_tcp6_sock(). - SOCK_OPS_GET_FIELD: dst_reg retains the ctx pointer which the verifier believes is a SCALAR_VALUE, leaking a kernel pointer. Fix both macros by: - Changing JMP_A(1) to JMP_A(2) in the fullsock path to skip the added instruction. - Adding BPF_MOV64_IMM(si->dst_reg, 0) after the temp register restore in the !fullsock path, placed after the restore because dst_reg == src_reg means we need src_reg intact to read ctx->temp. Fixes: fd09af010788 ("bpf: sock_ops ctx access may stomp registers in corner case") Fixes: 84f44df664e9 ("bpf: sock_ops sk access may stomp registers when dst_reg = src_reg") Reported-by: Quan Sun <2022090917019@std.uestc.edu.cn> Reported-by: Yinhao Hu Reported-by: Kaiyan Mei Reported-by: Dongliang Mu Reviewed-by: Emil Tsalapatis Closes: https://lore.kernel.org/bpf/6fe1243e-149b-4d3b-99c7-fcc9e2f75787@std.uestc.edu.cn/T/#u Signed-off-by: Jiayuan Chen Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/20260407022720.162151-2-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- net/core/filter.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 78b548158fb0..53ce06ed4a88 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -10581,10 +10581,11 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, si->dst_reg, si->dst_reg, \ offsetof(OBJ, OBJ_FIELD)); \ if (si->dst_reg == si->src_reg) { \ - *insn++ = BPF_JMP_A(1); \ + *insn++ = BPF_JMP_A(2); \ *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \ offsetof(struct bpf_sock_ops_kern, \ temp)); \ + *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); \ } \ } while (0) @@ -10618,10 +10619,11 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, si->dst_reg, si->src_reg, \ offsetof(struct bpf_sock_ops_kern, sk));\ if (si->dst_reg == si->src_reg) { \ - *insn++ = BPF_JMP_A(1); \ + *insn++ = BPF_JMP_A(2); \ *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \ offsetof(struct bpf_sock_ops_kern, \ temp)); \ + *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); \ } \ } while (0) From 04013c3ca022734ec2897b28a96d4cbd8a930407 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Tue, 7 Apr 2026 10:26:28 +0800 Subject: [PATCH 1447/1561] selftests/bpf: Add tests for sock_ops ctx access with same src/dst register Add selftests to verify SOCK_OPS_GET_SK() and SOCK_OPS_GET_FIELD() correctly return NULL/zero when dst_reg == src_reg and is_fullsock == 0. Three subtests are included: - get_sk: ctx->sk with same src/dst register (SOCK_OPS_GET_SK) - get_field: ctx->snd_cwnd with same src/dst register (SOCK_OPS_GET_FIELD) - get_sk_diff_reg: ctx->sk with different src/dst register (baseline) Each BPF program uses inline asm (__naked) to force specific register allocation, reads is_fullsock first, then loads the field using the same (or different) register. The test triggers TCP_NEW_SYN_RECV via a TCP handshake and checks that the result is NULL/zero when is_fullsock == 0. Reviewed-by: Sun Jian Signed-off-by: Jiayuan Chen Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/20260407022720.162151-3-jiayuan.chen@linux.dev Signed-off-by: Jakub Kicinski --- .../bpf/prog_tests/sock_ops_get_sk.c | 76 ++++++++++++ .../selftests/bpf/progs/sock_ops_get_sk.c | 117 ++++++++++++++++++ 2 files changed, 193 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/sock_ops_get_sk.c create mode 100644 tools/testing/selftests/bpf/progs/sock_ops_get_sk.c diff --git a/tools/testing/selftests/bpf/prog_tests/sock_ops_get_sk.c b/tools/testing/selftests/bpf/prog_tests/sock_ops_get_sk.c new file mode 100644 index 000000000000..343d92c4df30 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sock_ops_get_sk.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "cgroup_helpers.h" +#include "network_helpers.h" +#include "sock_ops_get_sk.skel.h" + +/* See progs/sock_ops_get_sk.c for the bug description. */ +static void run_sock_ops_test(int cgroup_fd, int prog_fd) +{ + int server_fd, client_fd, err; + + err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0); + if (!ASSERT_OK(err, "prog_attach")) + return; + + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); + if (!ASSERT_OK_FD(server_fd, "start_server")) + goto detach; + + /* Trigger TCP handshake which causes TCP_NEW_SYN_RECV state where + * is_fullsock == 0 and is_locked_tcp_sock == 0. + */ + client_fd = connect_to_fd(server_fd, 0); + if (!ASSERT_OK_FD(client_fd, "connect_to_fd")) + goto close_server; + + close(client_fd); + +close_server: + close(server_fd); +detach: + bpf_prog_detach(cgroup_fd, BPF_CGROUP_SOCK_OPS); +} + +void test_ns_sock_ops_get_sk(void) +{ + struct sock_ops_get_sk *skel; + int cgroup_fd; + + cgroup_fd = test__join_cgroup("/sock_ops_get_sk"); + if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup")) + return; + + skel = sock_ops_get_sk__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_load")) + goto close_cgroup; + + /* Test SOCK_OPS_GET_SK with same src/dst register */ + if (test__start_subtest("get_sk")) { + run_sock_ops_test(cgroup_fd, + bpf_program__fd(skel->progs.sock_ops_get_sk_same_reg)); + ASSERT_EQ(skel->bss->null_seen, 1, "null_seen"); + ASSERT_EQ(skel->bss->bug_detected, 0, "bug_not_detected"); + } + + /* Test SOCK_OPS_GET_FIELD with same src/dst register */ + if (test__start_subtest("get_field")) { + run_sock_ops_test(cgroup_fd, + bpf_program__fd(skel->progs.sock_ops_get_field_same_reg)); + ASSERT_EQ(skel->bss->field_null_seen, 1, "field_null_seen"); + ASSERT_EQ(skel->bss->field_bug_detected, 0, "field_bug_not_detected"); + } + + /* Test SOCK_OPS_GET_SK with different src/dst register */ + if (test__start_subtest("get_sk_diff_reg")) { + run_sock_ops_test(cgroup_fd, + bpf_program__fd(skel->progs.sock_ops_get_sk_diff_reg)); + ASSERT_EQ(skel->bss->diff_reg_null_seen, 1, "diff_reg_null_seen"); + ASSERT_EQ(skel->bss->diff_reg_bug_detected, 0, "diff_reg_bug_not_detected"); + } + + sock_ops_get_sk__destroy(skel); +close_cgroup: + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/progs/sock_ops_get_sk.c b/tools/testing/selftests/bpf/progs/sock_ops_get_sk.c new file mode 100644 index 000000000000..3a0689f8ce7c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sock_ops_get_sk.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include +#include "bpf_misc.h" + +/* + * Test the SOCK_OPS_GET_SK() and SOCK_OPS_GET_FIELD() macros in + * sock_ops_convert_ctx_access() when dst_reg == src_reg. + * + * When dst_reg == src_reg, the macros borrow a temporary register to load + * is_fullsock / is_locked_tcp_sock, because dst_reg holds the ctx pointer + * and cannot be clobbered before ctx->sk / ctx->field is read. If + * is_fullsock == 0 (e.g., TCP_NEW_SYN_RECV with a request_sock), the macro + * must still zero dst_reg so the verifier's PTR_TO_SOCKET_OR_NULL / + * SCALAR_VALUE type is correct at runtime. A missing clear leaves a stale + * ctx pointer in dst_reg that passes NULL checks (GET_SK) or leaks a kernel + * address as a scalar (GET_FIELD). + * + * When dst_reg != src_reg, dst_reg itself is used to load is_fullsock, so + * the JEQ (dst_reg == 0) naturally leaves it zeroed on the !fullsock path. + */ + +int bug_detected; +int null_seen; + +SEC("sockops") +__naked void sock_ops_get_sk_same_reg(void) +{ + asm volatile ( + "r7 = *(u32 *)(r1 + %[is_fullsock_off]);" + "r1 = *(u64 *)(r1 + %[sk_off]);" + "if r7 != 0 goto 2f;" + "if r1 == 0 goto 1f;" + "r1 = %[bug_detected] ll;" + "r2 = 1;" + "*(u32 *)(r1 + 0) = r2;" + "goto 2f;" + "1:" + "r1 = %[null_seen] ll;" + "r2 = 1;" + "*(u32 *)(r1 + 0) = r2;" + "2:" + "r0 = 1;" + "exit;" + : + : __imm_const(is_fullsock_off, offsetof(struct bpf_sock_ops, is_fullsock)), + __imm_const(sk_off, offsetof(struct bpf_sock_ops, sk)), + __imm_addr(bug_detected), + __imm_addr(null_seen) + : __clobber_all); +} + +/* SOCK_OPS_GET_FIELD: same-register, is_locked_tcp_sock == 0 path. */ +int field_bug_detected; +int field_null_seen; + +SEC("sockops") +__naked void sock_ops_get_field_same_reg(void) +{ + asm volatile ( + "r7 = *(u32 *)(r1 + %[is_fullsock_off]);" + "r1 = *(u32 *)(r1 + %[snd_cwnd_off]);" + "if r7 != 0 goto 2f;" + "if r1 == 0 goto 1f;" + "r1 = %[field_bug_detected] ll;" + "r2 = 1;" + "*(u32 *)(r1 + 0) = r2;" + "goto 2f;" + "1:" + "r1 = %[field_null_seen] ll;" + "r2 = 1;" + "*(u32 *)(r1 + 0) = r2;" + "2:" + "r0 = 1;" + "exit;" + : + : __imm_const(is_fullsock_off, offsetof(struct bpf_sock_ops, is_fullsock)), + __imm_const(snd_cwnd_off, offsetof(struct bpf_sock_ops, snd_cwnd)), + __imm_addr(field_bug_detected), + __imm_addr(field_null_seen) + : __clobber_all); +} + +/* SOCK_OPS_GET_SK: different-register, is_fullsock == 0 path. */ +int diff_reg_bug_detected; +int diff_reg_null_seen; + +SEC("sockops") +__naked void sock_ops_get_sk_diff_reg(void) +{ + asm volatile ( + "r7 = r1;" + "r6 = *(u32 *)(r7 + %[is_fullsock_off]);" + "r2 = *(u64 *)(r7 + %[sk_off]);" + "if r6 != 0 goto 2f;" + "if r2 == 0 goto 1f;" + "r1 = %[diff_reg_bug_detected] ll;" + "r3 = 1;" + "*(u32 *)(r1 + 0) = r3;" + "goto 2f;" + "1:" + "r1 = %[diff_reg_null_seen] ll;" + "r3 = 1;" + "*(u32 *)(r1 + 0) = r3;" + "2:" + "r0 = 1;" + "exit;" + : + : __imm_const(is_fullsock_off, offsetof(struct bpf_sock_ops, is_fullsock)), + __imm_const(sk_off, offsetof(struct bpf_sock_ops, sk)), + __imm_addr(diff_reg_bug_detected), + __imm_addr(diff_reg_null_seen) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; From 8632175ccb0c8cfc69b0f54c47b4b15b44c263ff Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 9 Apr 2026 12:09:45 +0300 Subject: [PATCH 1448/1561] gre: Count GRE packet drops GRE is silently dropping packets without updating statistics. In case of drop, increment rx_dropped counter to provide visibility into packet loss. For the case where no GRE protocol handler is registered, use rx_nohandler. Reviewed-by: Dragos Tatulea Reviewed-by: Nimrod Oren Signed-off-by: Gal Pressman Link: https://patch.msgid.link/20260409090945.1542440-1-gal@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/gre_demux.c | 8 ++++++-- net/ipv4/ip_gre.c | 1 + net/ipv6/ip6_gre.c | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index dafd68f3436a..96fd7dc6d82d 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -159,14 +159,18 @@ static int gre_rcv(struct sk_buff *skb) rcu_read_lock(); proto = rcu_dereference(gre_proto[ver]); if (!proto || !proto->handler) - goto drop_unlock; + goto drop_nohandler; ret = proto->handler(skb); rcu_read_unlock(); return ret; -drop_unlock: +drop_nohandler: rcu_read_unlock(); + dev_core_stats_rx_nohandler_inc(skb->dev); + kfree_skb(skb); + return NET_RX_DROP; drop: + dev_core_stats_rx_dropped_inc(skb->dev); kfree_skb(skb); return NET_RX_DROP; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 35f0baa99d40..169e2921a851 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -468,6 +468,7 @@ static int gre_rcv(struct sk_buff *skb) out: icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); drop: + dev_core_stats_rx_dropped_inc(skb->dev); kfree_skb(skb); return 0; } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index dafcc0dcd77a..63fc8556b475 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -593,6 +593,7 @@ static int gre_rcv(struct sk_buff *skb) out: icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); drop: + dev_core_stats_rx_dropped_inc(skb->dev); kfree_skb(skb); return 0; } From c6c223fd06edbc34c7b04170e375abfb2a13cbf8 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 8 Apr 2026 13:58:45 +0800 Subject: [PATCH 1449/1561] net: enetc: add support for the standardized counters ENETC v4 provides 64-bit counters for IEEE 802.3 basic and mandatory managed objects, the IETF Management Information Database (MIB) package (RFC2665), and Remote Network Monitoring (RMON) statistics. In addition, some ENETCs support preemption, so these ENETCs have two MACs: MAC 0 is the express MAC (eMAC), MAC 1 is the preemptible MAC (pMAC). Both MACs support these statistics. Signed-off-by: Wei Fang Link: https://patch.msgid.link/20260408055849.1314033-2-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.h | 2 + .../net/ethernet/freescale/enetc/enetc4_hw.h | 120 +++++++++++++++ .../ethernet/freescale/enetc/enetc_ethtool.c | 141 +++++++++++++++--- 3 files changed, 245 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index aecd40aeef9c..e663bb5e614e 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -264,6 +264,8 @@ struct enetc_msg_swbd { }; #define ENETC_REV1 0x1 +#define ENETC_REV4 0x4 + enum enetc_errata { ENETC_ERR_VLAN_ISOL = BIT(0), ENETC_ERR_UCMCSWP = BIT(1), diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h index 719c88ceb801..392992a646fb 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h @@ -196,6 +196,126 @@ #define PM_SINGLE_STEP_OFFSET_SET(o) FIELD_PREP(PM_SINGLE_STEP_OFFSET, o) #define PM_SINGLE_STEP_EN BIT(31) +/* Port MAC 0/1 Receive Ethernet Octets Counter */ +#define ENETC4_PM_REOCT(mac) (0x5100 + (mac) * 0x400) + +/* Port MAC 0/1 Receive Alignment Error Counter Register */ +#define ENETC4_PM_RALN(mac) (0x5110 + (mac) * 0x400) + +/* Port MAC 0/1 Receive Valid Pause Frame Counter */ +#define ENETC4_PM_RXPF(mac) (0x5118 + (mac) * 0x400) + +/* Port MAC 0/1 Receive Frame Counter */ +#define ENETC4_PM_RFRM(mac) (0x5120 + (mac) * 0x400) + +/* Port MAC 0/1 Receive Frame Check Sequence Error Counter */ +#define ENETC4_PM_RFCS(mac) (0x5128 + (mac) * 0x400) + +/* Port MAC 0/1 Receive Multicast Frame Counter */ +#define ENETC4_PM_RMCA(mac) (0x5148 + (mac) * 0x400) + +/* Port MAC 0/1 Receive Broadcast Frame Counter */ +#define ENETC4_PM_RBCA(mac) (0x5150 + (mac) * 0x400) + +/* Port MAC 0/1 Receive Undersized Packet Counter */ +#define ENETC4_PM_RUND(mac) (0x5168 + (mac) * 0x400) + +/* Port MAC 0/1 Receive 64-Octet Packet Counter */ +#define ENETC4_PM_R64(mac) (0x5170 + (mac) * 0x400) + +/* Port MAC 0/1 Receive 65 to 127-Octet Packet Counter */ +#define ENETC4_PM_R127(mac) (0x5178 + (mac) * 0x400) + +/* Port MAC 0/1 Receive 128 to 255-Octet Packet Counter */ +#define ENETC4_PM_R255(mac) (0x5180 + (mac) * 0x400) + +/* Port MAC 0/1 Receive 256 to 511-Octet Packet Counter */ +#define ENETC4_PM_R511(mac) (0x5188 + (mac) * 0x400) + +/* Port MAC 0/1 Receive 512 to 1023-Octet Packet Counter */ +#define ENETC4_PM_R1023(mac) (0x5190 + (mac) * 0x400) + +/* Port MAC 0/1 Receive 1024 to 1522-Octet Packet Counter */ +#define ENETC4_PM_R1522(mac) (0x5198 + (mac) * 0x400) + +/* Port MAC 0/1 Receive 1523 to Max-Octet Packet Counter */ +#define ENETC4_PM_R1523X(mac) (0x51a0 + (mac) * 0x400) + +/* Port MAC 0/1 Receive Oversized Packet Counter */ +#define ENETC4_PM_ROVR(mac) (0x51a8 + (mac) * 0x400) + +/* Port MAC 0/1 Receive Jabber Packet Counter */ +#define ENETC4_PM_RJBR(mac) (0x51b0 + (mac) * 0x400) + +/* Port MAC 0/1 Receive Fragment Packet Counter */ +#define ENETC4_PM_RFRG(mac) (0x51b8 + (mac) * 0x400) + +/* Port MAC 0/1 Receive Control Packet Counter */ +#define ENETC4_PM_RCNP(mac) (0x51c0 + (mac) * 0x400) + +/* Port MAC 0/1 Receive Dropped Not Truncated Packets Counter */ +#define ENETC4_PM_RDRNTP(mac) (0x51c8 + (mac) * 0x400) + +/* Port MAC 0/1 Transmit Ethernet Octets Counter */ +#define ENETC4_PM_TEOCT(mac) (0x5200 + (mac) * 0x400) + +/* Port MAC 0/1 Transmit Valid Pause Frame Counter */ +#define ENETC4_PM_TXPF(mac) (0x5218 + (mac) * 0x400) + +/* Port MAC 0/1 Transmit Frame Counter */ +#define ENETC4_PM_TFRM(mac) (0x5220 + (mac) * 0x400) + +/* Port MAC 0/1 Transmit Frame Error Counter */ +#define ENETC4_PM_TERR(mac) (0x5238 + (mac) * 0x400) + +/* Port MAC 0/1 Transmit Multicast Frame Counter */ +#define ENETC4_PM_TMCA(mac) (0x5248 + (mac) * 0x400) + +/* Port MAC 0/1 Transmit Broadcast Frame Counter */ +#define ENETC4_PM_TBCA(mac) (0x5250 + (mac) * 0x400) + +/* Port MAC 0/1 Transmit Undersized Packet Counter */ +#define ENETC4_PM_TUND(mac) (0x5268 + (mac) * 0x400) + +/* Port MAC 0/1 Transmit 64-Octet Packet Counter */ +#define ENETC4_PM_T64(mac) (0x5270 + (mac) * 0x400) + +/* Port MAC 0/1 Transmit 65 to 127-Octet Packet Counter */ +#define ENETC4_PM_T127(mac) (0x5278 + (mac) * 0x400) + +/* Port MAC 0/1 Transmit 128 to 255-Octet Packet Counter */ +#define ENETC4_PM_T255(mac) (0x5280 + (mac) * 0x400) + +/* Port MAC 0/1 Transmit 256 to 511-Octet Packet Counter */ +#define ENETC4_PM_T511(mac) (0x5288 + (mac) * 0x400) + +/* Port MAC 0/1 Transmit 512 to 1023-Octet Packet Counter */ +#define ENETC4_PM_T1023(mac) (0x5290 + (mac) * 0x400) + +/* Port MAC 0/1 Transmit 1024 to 1522-Octet Packet Counter */ +#define ENETC4_PM_T1522(mac) (0x5298 + (mac) * 0x400) + +/* Port MAC 0/1 Transmit 1523 to TX_MTU-Octet Packet Counter */ +#define ENETC4_PM_T1523X(mac) (0x52a0 + (mac) * 0x400) + +/* Port MAC 0/1 Transmit Control Packet Counter */ +#define ENETC4_PM_TCNP(mac) (0x52c0 + (mac) * 0x400) + +/* Port MAC 0/1 Transmit Deferred Packet Counter */ +#define ENETC4_PM_TDFR(mac) (0x52d0 + (mac) * 0x400) + +/* Port MAC 0/1 Transmit Multiple Collisions Counter */ +#define ENETC4_PM_TMCOL(mac) (0x52d8 + (mac) * 0x400) + +/* Port MAC 0/1 Transmit Single Collision */ +#define ENETC4_PM_TSCOL(mac) (0x52e0 + (mac) * 0x400) + +/* Port MAC 0/1 Transmit Late Collision Counter */ +#define ENETC4_PM_TLCOL(mac) (0x52e8 + (mac) * 0x400) + +/* Port MAC 0/1 Transmit Excessive Collisions Counter */ +#define ENETC4_PM_TECOL(mac) (0x52f0 + (mac) * 0x400) + /* Port MAC 0 Interface Mode Control Register */ #define ENETC4_PM_IF_MODE(mac) (0x5300 + (mac) * 0x400) #define PM_IF_MODE_IFMODE GENMASK(2, 0) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 7c17acaf7a38..c30a119e9142 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -320,27 +320,38 @@ static void enetc_get_ethtool_stats(struct net_device *ndev, data[o++] = enetc_port_rd64(hw, enetc_pm_counters[i].reg); } -static void enetc_pause_stats(struct enetc_hw *hw, int mac, +static void enetc_pause_stats(struct enetc_si *si, int mac, struct ethtool_pause_stats *pause_stats) { - pause_stats->tx_pause_frames = enetc_port_rd64(hw, ENETC_PM_TXPF(mac)); - pause_stats->rx_pause_frames = enetc_port_rd64(hw, ENETC_PM_RXPF(mac)); + struct enetc_hw *hw = &si->hw; + + switch (si->pdev->revision) { + case ENETC_REV1: + pause_stats->tx_pause_frames = enetc_port_rd64(hw, ENETC_PM_TXPF(mac)); + pause_stats->rx_pause_frames = enetc_port_rd64(hw, ENETC_PM_RXPF(mac)); + break; + case ENETC_REV4: + pause_stats->tx_pause_frames = enetc_port_rd64(hw, ENETC4_PM_TXPF(mac)); + pause_stats->rx_pause_frames = enetc_port_rd64(hw, ENETC4_PM_RXPF(mac)); + break; + default: + break; + } } static void enetc_get_pause_stats(struct net_device *ndev, struct ethtool_pause_stats *pause_stats) { struct enetc_ndev_priv *priv = netdev_priv(ndev); - struct enetc_hw *hw = &priv->si->hw; struct enetc_si *si = priv->si; switch (pause_stats->src) { case ETHTOOL_MAC_STATS_SRC_EMAC: - enetc_pause_stats(hw, 0, pause_stats); + enetc_pause_stats(si, 0, pause_stats); break; case ETHTOOL_MAC_STATS_SRC_PMAC: if (si->hw_features & ENETC_SI_F_QBU) - enetc_pause_stats(hw, 1, pause_stats); + enetc_pause_stats(si, 1, pause_stats); break; case ETHTOOL_MAC_STATS_SRC_AGGREGATE: ethtool_aggregate_pause_stats(ndev, pause_stats); @@ -371,11 +382,45 @@ static void enetc_mac_stats(struct enetc_hw *hw, int mac, s->BroadcastFramesReceivedOK = enetc_port_rd64(hw, ENETC_PM_RBCA(mac)); } -static void enetc_ctrl_stats(struct enetc_hw *hw, int mac, +static void enetc4_mac_stats(struct enetc_hw *hw, int mac, + struct ethtool_eth_mac_stats *s) +{ + s->FramesTransmittedOK = enetc_port_rd64(hw, ENETC4_PM_TFRM(mac)); + s->SingleCollisionFrames = enetc_port_rd64(hw, ENETC4_PM_TSCOL(mac)); + s->MultipleCollisionFrames = enetc_port_rd64(hw, ENETC4_PM_TMCOL(mac)); + s->FramesReceivedOK = enetc_port_rd64(hw, ENETC4_PM_RFRM(mac)); + s->FrameCheckSequenceErrors = enetc_port_rd64(hw, ENETC4_PM_RFCS(mac)); + s->AlignmentErrors = enetc_port_rd64(hw, ENETC4_PM_RALN(mac)); + s->OctetsTransmittedOK = enetc_port_rd64(hw, ENETC4_PM_TEOCT(mac)); + s->FramesWithDeferredXmissions = enetc_port_rd64(hw, ENETC4_PM_TDFR(mac)); + s->LateCollisions = enetc_port_rd64(hw, ENETC4_PM_TLCOL(mac)); + s->FramesAbortedDueToXSColls = enetc_port_rd64(hw, ENETC4_PM_TECOL(mac)); + s->FramesLostDueToIntMACXmitError = enetc_port_rd64(hw, ENETC4_PM_TERR(mac)); + s->OctetsReceivedOK = enetc_port_rd64(hw, ENETC4_PM_REOCT(mac)); + s->FramesLostDueToIntMACRcvError = enetc_port_rd64(hw, ENETC4_PM_RDRNTP(mac)); + s->MulticastFramesXmittedOK = enetc_port_rd64(hw, ENETC4_PM_TMCA(mac)); + s->BroadcastFramesXmittedOK = enetc_port_rd64(hw, ENETC4_PM_TBCA(mac)); + s->MulticastFramesReceivedOK = enetc_port_rd64(hw, ENETC4_PM_RMCA(mac)); + s->BroadcastFramesReceivedOK = enetc_port_rd64(hw, ENETC4_PM_RBCA(mac)); +} + +static void enetc_ctrl_stats(struct enetc_si *si, int mac, struct ethtool_eth_ctrl_stats *s) { - s->MACControlFramesTransmitted = enetc_port_rd64(hw, ENETC_PM_TCNP(mac)); - s->MACControlFramesReceived = enetc_port_rd64(hw, ENETC_PM_RCNP(mac)); + struct enetc_hw *hw = &si->hw; + + switch (si->pdev->revision) { + case ENETC_REV1: + s->MACControlFramesTransmitted = enetc_port_rd64(hw, ENETC_PM_TCNP(mac)); + s->MACControlFramesReceived = enetc_port_rd64(hw, ENETC_PM_RCNP(mac)); + break; + case ENETC_REV4: + s->MACControlFramesTransmitted = enetc_port_rd64(hw, ENETC4_PM_TCNP(mac)); + s->MACControlFramesReceived = enetc_port_rd64(hw, ENETC4_PM_RCNP(mac)); + break; + default: + break; + } } static const struct ethtool_rmon_hist_range enetc_rmon_ranges[] = { @@ -414,20 +459,61 @@ static void enetc_rmon_stats(struct enetc_hw *hw, int mac, s->hist_tx[6] = enetc_port_rd64(hw, ENETC_PM_T1523X(mac)); } +static void enetc4_rmon_stats(struct enetc_hw *hw, int mac, + struct ethtool_rmon_stats *s) +{ + s->undersize_pkts = enetc_port_rd64(hw, ENETC4_PM_RUND(mac)); + s->oversize_pkts = enetc_port_rd64(hw, ENETC4_PM_ROVR(mac)); + s->fragments = enetc_port_rd64(hw, ENETC4_PM_RFRG(mac)); + s->jabbers = enetc_port_rd64(hw, ENETC4_PM_RJBR(mac)); + + s->hist[0] = enetc_port_rd64(hw, ENETC4_PM_R64(mac)); + s->hist[1] = enetc_port_rd64(hw, ENETC4_PM_R127(mac)); + s->hist[2] = enetc_port_rd64(hw, ENETC4_PM_R255(mac)); + s->hist[3] = enetc_port_rd64(hw, ENETC4_PM_R511(mac)); + s->hist[4] = enetc_port_rd64(hw, ENETC4_PM_R1023(mac)); + s->hist[5] = enetc_port_rd64(hw, ENETC4_PM_R1522(mac)); + s->hist[6] = enetc_port_rd64(hw, ENETC4_PM_R1523X(mac)); + + s->hist_tx[0] = enetc_port_rd64(hw, ENETC4_PM_T64(mac)); + s->hist_tx[1] = enetc_port_rd64(hw, ENETC4_PM_T127(mac)); + s->hist_tx[2] = enetc_port_rd64(hw, ENETC4_PM_T255(mac)); + s->hist_tx[3] = enetc_port_rd64(hw, ENETC4_PM_T511(mac)); + s->hist_tx[4] = enetc_port_rd64(hw, ENETC4_PM_T1023(mac)); + s->hist_tx[5] = enetc_port_rd64(hw, ENETC4_PM_T1522(mac)); + s->hist_tx[6] = enetc_port_rd64(hw, ENETC4_PM_T1523X(mac)); +} + +static void enetc_get_mac_stats(struct enetc_si *si, int mac, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct enetc_hw *hw = &si->hw; + + switch (si->pdev->revision) { + case ENETC_REV1: + enetc_mac_stats(hw, mac, mac_stats); + break; + case ENETC_REV4: + enetc4_mac_stats(hw, mac, mac_stats); + break; + default: + break; + } +} + static void enetc_get_eth_mac_stats(struct net_device *ndev, struct ethtool_eth_mac_stats *mac_stats) { struct enetc_ndev_priv *priv = netdev_priv(ndev); - struct enetc_hw *hw = &priv->si->hw; struct enetc_si *si = priv->si; switch (mac_stats->src) { case ETHTOOL_MAC_STATS_SRC_EMAC: - enetc_mac_stats(hw, 0, mac_stats); + enetc_get_mac_stats(si, 0, mac_stats); break; case ETHTOOL_MAC_STATS_SRC_PMAC: if (si->hw_features & ENETC_SI_F_QBU) - enetc_mac_stats(hw, 1, mac_stats); + enetc_get_mac_stats(si, 1, mac_stats); break; case ETHTOOL_MAC_STATS_SRC_AGGREGATE: ethtool_aggregate_mac_stats(ndev, mac_stats); @@ -481,16 +567,15 @@ static void enetc_get_eth_ctrl_stats(struct net_device *ndev, struct ethtool_eth_ctrl_stats *ctrl_stats) { struct enetc_ndev_priv *priv = netdev_priv(ndev); - struct enetc_hw *hw = &priv->si->hw; struct enetc_si *si = priv->si; switch (ctrl_stats->src) { case ETHTOOL_MAC_STATS_SRC_EMAC: - enetc_ctrl_stats(hw, 0, ctrl_stats); + enetc_ctrl_stats(si, 0, ctrl_stats); break; case ETHTOOL_MAC_STATS_SRC_PMAC: if (si->hw_features & ENETC_SI_F_QBU) - enetc_ctrl_stats(hw, 1, ctrl_stats); + enetc_ctrl_stats(si, 1, ctrl_stats); break; case ETHTOOL_MAC_STATS_SRC_AGGREGATE: ethtool_aggregate_ctrl_stats(ndev, ctrl_stats); @@ -498,23 +583,39 @@ static void enetc_get_eth_ctrl_stats(struct net_device *ndev, } } +static void enetc_get_mac_rmon_stats(struct enetc_si *si, int mac, + struct ethtool_rmon_stats *rmon_stats) +{ + struct enetc_hw *hw = &si->hw; + + switch (si->pdev->revision) { + case ENETC_REV1: + enetc_rmon_stats(hw, mac, rmon_stats); + break; + case ENETC_REV4: + enetc4_rmon_stats(hw, mac, rmon_stats); + break; + default: + break; + } +} + static void enetc_get_rmon_stats(struct net_device *ndev, struct ethtool_rmon_stats *rmon_stats, const struct ethtool_rmon_hist_range **ranges) { struct enetc_ndev_priv *priv = netdev_priv(ndev); - struct enetc_hw *hw = &priv->si->hw; struct enetc_si *si = priv->si; *ranges = enetc_rmon_ranges; switch (rmon_stats->src) { case ETHTOOL_MAC_STATS_SRC_EMAC: - enetc_rmon_stats(hw, 0, rmon_stats); + enetc_get_mac_rmon_stats(si, 0, rmon_stats); break; case ETHTOOL_MAC_STATS_SRC_PMAC: if (si->hw_features & ENETC_SI_F_QBU) - enetc_rmon_stats(hw, 1, rmon_stats); + enetc_get_mac_rmon_stats(si, 1, rmon_stats); break; case ETHTOOL_MAC_STATS_SRC_AGGREGATE: ethtool_aggregate_rmon_stats(ndev, rmon_stats); @@ -1398,6 +1499,10 @@ const struct ethtool_ops enetc4_pf_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | ETHTOOL_COALESCE_USE_ADAPTIVE_RX, + .get_pause_stats = enetc_get_pause_stats, + .get_rmon_stats = enetc_get_rmon_stats, + .get_eth_ctrl_stats = enetc_get_eth_ctrl_stats, + .get_eth_mac_stats = enetc_get_eth_mac_stats, .get_ringparam = enetc_get_ringparam, .get_coalesce = enetc_get_coalesce, .set_coalesce = enetc_set_coalesce, From c571d309d4cf9ac4c3129cdf312696e1c6b18cce Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 8 Apr 2026 13:58:46 +0800 Subject: [PATCH 1450/1561] net: enetc: show RX drop counters only for assigned RX rings For ENETC v1, each SI provides 16 RBDCR registers for RX ring drop counters, but this does not imply that an SI actually owns 16 RX rings. The ENETC hardware supports a total of 16 RX rings, which are assigned to 3 SIs (1 PSI and 2 VSIs), so each SI is assigned fewer than 16 RX rings. The current implementation always reports 16 RX drop counters per SI, leading to redundant output for SIs with fewer RX rings. Update the logic to display drop counters only for the RX rings that are actually assigned to the SI. Signed-off-by: Wei Fang Link: https://patch.msgid.link/20260408055849.1314033-3-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- .../ethernet/freescale/enetc/enetc_ethtool.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index c30a119e9142..36d1a2b810c2 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -124,22 +124,6 @@ static const struct { { ENETC_SITFRM, "SI tx frames" }, { ENETC_SITUCA, "SI tx u-cast frames" }, { ENETC_SITMCA, "SI tx m-cast frames" }, - { ENETC_RBDCR(0), "Rx ring 0 discarded frames" }, - { ENETC_RBDCR(1), "Rx ring 1 discarded frames" }, - { ENETC_RBDCR(2), "Rx ring 2 discarded frames" }, - { ENETC_RBDCR(3), "Rx ring 3 discarded frames" }, - { ENETC_RBDCR(4), "Rx ring 4 discarded frames" }, - { ENETC_RBDCR(5), "Rx ring 5 discarded frames" }, - { ENETC_RBDCR(6), "Rx ring 6 discarded frames" }, - { ENETC_RBDCR(7), "Rx ring 7 discarded frames" }, - { ENETC_RBDCR(8), "Rx ring 8 discarded frames" }, - { ENETC_RBDCR(9), "Rx ring 9 discarded frames" }, - { ENETC_RBDCR(10), "Rx ring 10 discarded frames" }, - { ENETC_RBDCR(11), "Rx ring 11 discarded frames" }, - { ENETC_RBDCR(12), "Rx ring 12 discarded frames" }, - { ENETC_RBDCR(13), "Rx ring 13 discarded frames" }, - { ENETC_RBDCR(14), "Rx ring 14 discarded frames" }, - { ENETC_RBDCR(15), "Rx ring 15 discarded frames" }, }; static const struct { @@ -224,6 +208,7 @@ static const char rx_ring_stats[][ETH_GSTRING_LEN] = { "Rx ring %2d recycle failures", "Rx ring %2d redirects", "Rx ring %2d redirect failures", + "Rx ring %2d discarded frames", }; static const char tx_ring_stats[][ETH_GSTRING_LEN] = { @@ -308,6 +293,7 @@ static void enetc_get_ethtool_stats(struct net_device *ndev, data[o++] = priv->rx_ring[i]->stats.recycle_failures; data[o++] = priv->rx_ring[i]->stats.xdp_redirect; data[o++] = priv->rx_ring[i]->stats.xdp_redirect_failures; + data[o++] = enetc_rd(hw, ENETC_RBDCR(i)); } if (!enetc_si_is_pf(priv->si)) From 6d78c37a73e01955104c2cb8f65584d23aba4f88 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 8 Apr 2026 13:58:47 +0800 Subject: [PATCH 1451/1561] net: enetc: remove standardized counters from enetc_pm_counters The standardized counters are already exposed via the get_pause_stats(), get_rmon_stats(), get_eth_ctrl_stats() and get_eth_mac_stats() interfaces. Keeping the same counters in enetc_pm_counters results in redundant output. Remove these standardized counters from enetc_pm_counters and rely on the existing statistics interfaces to report them. Signed-off-by: Wei Fang Link: https://patch.msgid.link/20260408055849.1314033-4-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- .../ethernet/freescale/enetc/enetc_ethtool.c | 40 ------------------- 1 file changed, 40 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 36d1a2b810c2..504def405489 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -130,57 +130,17 @@ static const struct { int reg; char name[ETH_GSTRING_LEN] __nonstring; } enetc_pm_counters[] = { - { ENETC_PM_REOCT(0), "MAC rx ethernet octets" }, - { ENETC_PM_RALN(0), "MAC rx alignment errors" }, - { ENETC_PM_RXPF(0), "MAC rx valid pause frames" }, - { ENETC_PM_RFRM(0), "MAC rx valid frames" }, - { ENETC_PM_RFCS(0), "MAC rx fcs errors" }, { ENETC_PM_RVLAN(0), "MAC rx VLAN frames" }, { ENETC_PM_RERR(0), "MAC rx frame errors" }, { ENETC_PM_RUCA(0), "MAC rx unicast frames" }, - { ENETC_PM_RMCA(0), "MAC rx multicast frames" }, - { ENETC_PM_RBCA(0), "MAC rx broadcast frames" }, { ENETC_PM_RDRP(0), "MAC rx dropped packets" }, { ENETC_PM_RPKT(0), "MAC rx packets" }, - { ENETC_PM_RUND(0), "MAC rx undersized packets" }, - { ENETC_PM_R64(0), "MAC rx 64 byte packets" }, - { ENETC_PM_R127(0), "MAC rx 65-127 byte packets" }, - { ENETC_PM_R255(0), "MAC rx 128-255 byte packets" }, - { ENETC_PM_R511(0), "MAC rx 256-511 byte packets" }, - { ENETC_PM_R1023(0), "MAC rx 512-1023 byte packets" }, - { ENETC_PM_R1522(0), "MAC rx 1024-1522 byte packets" }, - { ENETC_PM_R1523X(0), "MAC rx 1523 to max-octet packets" }, - { ENETC_PM_ROVR(0), "MAC rx oversized packets" }, - { ENETC_PM_RJBR(0), "MAC rx jabber packets" }, - { ENETC_PM_RFRG(0), "MAC rx fragment packets" }, - { ENETC_PM_RCNP(0), "MAC rx control packets" }, - { ENETC_PM_RDRNTP(0), "MAC rx fifo drop" }, - { ENETC_PM_TEOCT(0), "MAC tx ethernet octets" }, { ENETC_PM_TOCT(0), "MAC tx octets" }, - { ENETC_PM_TCRSE(0), "MAC tx carrier sense errors" }, - { ENETC_PM_TXPF(0), "MAC tx valid pause frames" }, - { ENETC_PM_TFRM(0), "MAC tx frames" }, { ENETC_PM_TFCS(0), "MAC tx fcs errors" }, { ENETC_PM_TVLAN(0), "MAC tx VLAN frames" }, - { ENETC_PM_TERR(0), "MAC tx frame errors" }, { ENETC_PM_TUCA(0), "MAC tx unicast frames" }, - { ENETC_PM_TMCA(0), "MAC tx multicast frames" }, - { ENETC_PM_TBCA(0), "MAC tx broadcast frames" }, { ENETC_PM_TPKT(0), "MAC tx packets" }, { ENETC_PM_TUND(0), "MAC tx undersized packets" }, - { ENETC_PM_T64(0), "MAC tx 64 byte packets" }, - { ENETC_PM_T127(0), "MAC tx 65-127 byte packets" }, - { ENETC_PM_T255(0), "MAC tx 128-255 byte packets" }, - { ENETC_PM_T511(0), "MAC tx 256-511 byte packets" }, - { ENETC_PM_T1023(0), "MAC tx 512-1023 byte packets" }, - { ENETC_PM_T1522(0), "MAC tx 1024-1522 byte packets" }, - { ENETC_PM_T1523X(0), "MAC tx 1523 to max-octet packets" }, - { ENETC_PM_TCNP(0), "MAC tx control packets" }, - { ENETC_PM_TDFR(0), "MAC tx deferred packets" }, - { ENETC_PM_TMCOL(0), "MAC tx multiple collisions" }, - { ENETC_PM_TSCOL(0), "MAC tx single collisions" }, - { ENETC_PM_TLCOL(0), "MAC tx late collisions" }, - { ENETC_PM_TECOL(0), "MAC tx excessive collisions" }, }; static const struct { From dbc30b154e331dc023c4ace3fd57056c6c7a98e2 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 8 Apr 2026 13:58:48 +0800 Subject: [PATCH 1452/1561] net: enetc: add unstructured pMAC counters for ENETC v1 The ENETC v1 has two MACs (eMAC and pMAC) to support preemption. The existing unstructured counters include the eMAC counters, but not the pMAC counters. So add pMAC counters to improve statistical coverage. Signed-off-by: Wei Fang Link: https://patch.msgid.link/20260408055849.1314033-5-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- .../ethernet/freescale/enetc/enetc_ethtool.c | 77 ++++++++++++++----- 1 file changed, 56 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 504def405489..bdc5916e4400 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -129,18 +129,35 @@ static const struct { static const struct { int reg; char name[ETH_GSTRING_LEN] __nonstring; -} enetc_pm_counters[] = { - { ENETC_PM_RVLAN(0), "MAC rx VLAN frames" }, - { ENETC_PM_RERR(0), "MAC rx frame errors" }, - { ENETC_PM_RUCA(0), "MAC rx unicast frames" }, - { ENETC_PM_RDRP(0), "MAC rx dropped packets" }, - { ENETC_PM_RPKT(0), "MAC rx packets" }, - { ENETC_PM_TOCT(0), "MAC tx octets" }, - { ENETC_PM_TFCS(0), "MAC tx fcs errors" }, - { ENETC_PM_TVLAN(0), "MAC tx VLAN frames" }, - { ENETC_PM_TUCA(0), "MAC tx unicast frames" }, - { ENETC_PM_TPKT(0), "MAC tx packets" }, - { ENETC_PM_TUND(0), "MAC tx undersized packets" }, +} enetc_emac_counters[] = { + { ENETC_PM_RVLAN(0), "eMAC rx VLAN frames" }, + { ENETC_PM_RERR(0), "eMAC rx frame errors" }, + { ENETC_PM_RUCA(0), "eMAC rx unicast frames" }, + { ENETC_PM_RDRP(0), "eMAC rx dropped packets" }, + { ENETC_PM_RPKT(0), "eMAC rx packets" }, + { ENETC_PM_TOCT(0), "eMAC tx octets" }, + { ENETC_PM_TFCS(0), "eMAC tx fcs errors" }, + { ENETC_PM_TVLAN(0), "eMAC tx VLAN frames" }, + { ENETC_PM_TUCA(0), "eMAC tx unicast frames" }, + { ENETC_PM_TPKT(0), "eMAC tx packets" }, + { ENETC_PM_TUND(0), "eMAC tx undersized packets" }, +}; + +static const struct { + int reg; + char name[ETH_GSTRING_LEN] __nonstring; +} enetc_pmac_counters[] = { + { ENETC_PM_RVLAN(1), "pMAC rx VLAN frames" }, + { ENETC_PM_RERR(1), "pMAC rx frame errors" }, + { ENETC_PM_RUCA(1), "pMAC rx unicast frames" }, + { ENETC_PM_RDRP(1), "pMAC rx dropped packets" }, + { ENETC_PM_RPKT(1), "pMAC rx packets" }, + { ENETC_PM_TOCT(1), "pMAC tx octets" }, + { ENETC_PM_TFCS(1), "pMAC tx fcs errors" }, + { ENETC_PM_TVLAN(1), "pMAC tx VLAN frames" }, + { ENETC_PM_TUCA(1), "pMAC tx unicast frames" }, + { ENETC_PM_TPKT(1), "pMAC tx packets" }, + { ENETC_PM_TUND(1), "pMAC tx undersized packets" }, }; static const struct { @@ -181,6 +198,7 @@ static const char tx_ring_stats[][ETH_GSTRING_LEN] = { static int enetc_get_sset_count(struct net_device *ndev, int sset) { struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_si *si = priv->si; int len; if (sset != ETH_SS_STATS) @@ -190,11 +208,14 @@ static int enetc_get_sset_count(struct net_device *ndev, int sset) ARRAY_SIZE(tx_ring_stats) * priv->num_tx_rings + ARRAY_SIZE(rx_ring_stats) * priv->num_rx_rings; - if (!enetc_si_is_pf(priv->si)) + if (!enetc_si_is_pf(si)) return len; len += ARRAY_SIZE(enetc_port_counters); - len += ARRAY_SIZE(enetc_pm_counters); + len += ARRAY_SIZE(enetc_emac_counters); + + if (si->hw_features & ENETC_SI_F_QBU) + len += ARRAY_SIZE(enetc_pmac_counters); return len; } @@ -202,6 +223,7 @@ static int enetc_get_sset_count(struct net_device *ndev, int sset) static void enetc_get_strings(struct net_device *ndev, u32 stringset, u8 *data) { struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_si *si = priv->si; int i, j; switch (stringset) { @@ -215,14 +237,20 @@ static void enetc_get_strings(struct net_device *ndev, u32 stringset, u8 *data) for (j = 0; j < ARRAY_SIZE(rx_ring_stats); j++) ethtool_sprintf(&data, rx_ring_stats[j], i); - if (!enetc_si_is_pf(priv->si)) + if (!enetc_si_is_pf(si)) break; for (i = 0; i < ARRAY_SIZE(enetc_port_counters); i++) ethtool_cpy(&data, enetc_port_counters[i].name); - for (i = 0; i < ARRAY_SIZE(enetc_pm_counters); i++) - ethtool_cpy(&data, enetc_pm_counters[i].name); + for (i = 0; i < ARRAY_SIZE(enetc_emac_counters); i++) + ethtool_cpy(&data, enetc_emac_counters[i].name); + + if (!(si->hw_features & ENETC_SI_F_QBU)) + break; + + for (i = 0; i < ARRAY_SIZE(enetc_pmac_counters); i++) + ethtool_cpy(&data, enetc_pmac_counters[i].name); break; } @@ -232,7 +260,8 @@ static void enetc_get_ethtool_stats(struct net_device *ndev, struct ethtool_stats *stats, u64 *data) { struct enetc_ndev_priv *priv = netdev_priv(ndev); - struct enetc_hw *hw = &priv->si->hw; + struct enetc_si *si = priv->si; + struct enetc_hw *hw = &si->hw; int i, o = 0; for (i = 0; i < ARRAY_SIZE(enetc_si_counters); i++) @@ -256,14 +285,20 @@ static void enetc_get_ethtool_stats(struct net_device *ndev, data[o++] = enetc_rd(hw, ENETC_RBDCR(i)); } - if (!enetc_si_is_pf(priv->si)) + if (!enetc_si_is_pf(si)) return; for (i = 0; i < ARRAY_SIZE(enetc_port_counters); i++) data[o++] = enetc_port_rd(hw, enetc_port_counters[i].reg); - for (i = 0; i < ARRAY_SIZE(enetc_pm_counters); i++) - data[o++] = enetc_port_rd64(hw, enetc_pm_counters[i].reg); + for (i = 0; i < ARRAY_SIZE(enetc_emac_counters); i++) + data[o++] = enetc_port_rd64(hw, enetc_emac_counters[i].reg); + + if (!(si->hw_features & ENETC_SI_F_QBU)) + return; + + for (i = 0; i < ARRAY_SIZE(enetc_pmac_counters); i++) + data[o++] = enetc_port_rd64(hw, enetc_pmac_counters[i].reg); } static void enetc_pause_stats(struct enetc_si *si, int mac, From 98a4f3d341322f3c6dd341902b419f701f3e678e Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 8 Apr 2026 13:58:49 +0800 Subject: [PATCH 1453/1561] net: enetc: add unstructured counters for ENETC v4 Like ENETC v1, ENETC v4 also has many non-standard counters, so these counters are added to improve statistical coverage. Signed-off-by: Wei Fang Link: https://patch.msgid.link/20260408055849.1314033-6-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/freescale/enetc/enetc4_hw.h | 72 +++++++ .../ethernet/freescale/enetc/enetc_ethtool.c | 189 +++++++++++++++--- 2 files changed, 235 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h index 392992a646fb..f18437556a0e 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h @@ -64,6 +64,9 @@ #define ENETC4_PPAUONTR 0x108 #define ENETC4_PPAUOFFTR 0x10c +/* Port ingress congestion DRa (a=0,1,2,3) discard count register */ +#define ENETC4_PICDRDCR(a) ((a) * 0x10 + 0x140) + /* Port Station interface promiscuous MAC mode register */ #define ENETC4_PSIPMMR 0x200 #define PSIPMMR_SI_MAC_UP(a) BIT(a) /* a = SI index */ @@ -72,6 +75,12 @@ /* Port Station interface promiscuous VLAN mode register */ #define ENETC4_PSIPVMR 0x204 +/* Port broadcast frames dropped due to MAC filtering register */ +#define ENETC4_PBFDSIR 0x208 + +/* Port frame drop MAC source address pruning register */ +#define ENETC4_PFDMSAPR 0x20c + /* Port RSS key register n. n = 0,1,2,...,9 */ #define ENETC4_PRSSKR(n) ((n) * 0x4 + 0x250) @@ -79,6 +88,12 @@ #define ENETC4_PSIMAFCAPR 0x280 #define PSIMAFCAPR_NUM_MAC_AFTE GENMASK(11, 0) +/* Port unicast frames dropped due to MAC filtering register */ +#define ENETC4_PUFDMFR 0x284 + +/* Port multicast frames dropped due to MAC filtering register */ +#define ENETC4_PMFDMFR 0x288 + /* Port station interface VLAN filtering capability register */ #define ENETC4_PSIVLANFCAPR 0x2c0 #define PSIVLANFCAPR_NUM_VLAN_FTE GENMASK(11, 0) @@ -87,6 +102,15 @@ #define ENETC4_PSIVLANFMR 0x2c4 #define PSIVLANFMR_VS BIT(0) +/* Port unicast frames dropped VLAN filtering register */ +#define ENETC4_PUFDVFR 0x2d0 + +/* Port multicast frames dropped VLAN filtering register */ +#define ENETC4_PMFDVFR 0x2d4 + +/* Port broadcast frames dropped VLAN filtering register */ +#define ENETC4_PBFDVFR 0x2d8 + /* Port Station interface a primary MAC address registers */ #define ENETC4_PSIPMAR0(a) ((a) * 0x80 + 0x2000) #define ENETC4_PSIPMAR1(a) ((a) * 0x80 + 0x2004) @@ -141,6 +165,18 @@ #define ENETC4_PSR 0x4104 #define PSR_RX_BUSY BIT(1) +/* Port Rx discard count register */ +#define ENETC4_PRXDCR 0x41c0 + +/* Port Rx discard count read-reset register */ +#define ENETC4_PRXDCRRR 0x41c4 + +/* Port Rx discard count reason register 0 */ +#define ENETC4_PRXDCRR0 0x41c8 + +/* Port Rx discard count reason register 1 */ +#define ENETC4_PRXDCRR1 0x41cc + /* Port traffic class a transmit maximum SDU register */ #define ENETC4_PTCTMSDUR(a) ((a) * 0x20 + 0x4208) #define PTCTMSDUR_MAXSDU GENMASK(15, 0) @@ -199,6 +235,9 @@ /* Port MAC 0/1 Receive Ethernet Octets Counter */ #define ENETC4_PM_REOCT(mac) (0x5100 + (mac) * 0x400) +/* Port MAC 0/1 Receive Octets Counter */ +#define ENETC4_PM_ROCT(mac) (0x5108 + (mac) * 0x400) + /* Port MAC 0/1 Receive Alignment Error Counter Register */ #define ENETC4_PM_RALN(mac) (0x5110 + (mac) * 0x400) @@ -211,12 +250,27 @@ /* Port MAC 0/1 Receive Frame Check Sequence Error Counter */ #define ENETC4_PM_RFCS(mac) (0x5128 + (mac) * 0x400) +/* Port MAC 0/1 Receive VLAN Frame Counter */ +#define ENETC4_PM_RVLAN(mac) (0x5130 + (mac) * 0x400) + +/* Port MAC 0/1 Receive Frame Error Counter */ +#define ENETC4_PM_RERR(mac) (0x5138 + (mac) * 0x400) + +/* Port MAC 0/1 Receive Unicast Frame Counter */ +#define ENETC4_PM_RUCA(mac) (0x5140 + (mac) * 0x400) + /* Port MAC 0/1 Receive Multicast Frame Counter */ #define ENETC4_PM_RMCA(mac) (0x5148 + (mac) * 0x400) /* Port MAC 0/1 Receive Broadcast Frame Counter */ #define ENETC4_PM_RBCA(mac) (0x5150 + (mac) * 0x400) +/* Port MAC 0/1 Receive Dropped Packets Counter */ +#define ENETC4_PM_RDRP(mac) (0x5158 + (mac) * 0x400) + +/* Port MAC 0/1 Receive Packets Counter */ +#define ENETC4_PM_RPKT(mac) (0x5160 + (mac) * 0x400) + /* Port MAC 0/1 Receive Undersized Packet Counter */ #define ENETC4_PM_RUND(mac) (0x5168 + (mac) * 0x400) @@ -259,21 +313,36 @@ /* Port MAC 0/1 Transmit Ethernet Octets Counter */ #define ENETC4_PM_TEOCT(mac) (0x5200 + (mac) * 0x400) +/* Port MAC 0/1 Transmit Octets Counter */ +#define ENETC4_PM_TOCT(mac) (0x5208 + (mac) * 0x400) + /* Port MAC 0/1 Transmit Valid Pause Frame Counter */ #define ENETC4_PM_TXPF(mac) (0x5218 + (mac) * 0x400) /* Port MAC 0/1 Transmit Frame Counter */ #define ENETC4_PM_TFRM(mac) (0x5220 + (mac) * 0x400) +/* Port MAC 0/1 Transmit Frame Check Sequence Error Counter */ +#define ENETC4_PM_TFCS(mac) (0x5228 + (mac) * 0x400) + +/* Port MAC 0/1 Transmit VLAN Frame Counter */ +#define ENETC4_PM_TVLAN(mac) (0x5230 + (mac) * 0x400) + /* Port MAC 0/1 Transmit Frame Error Counter */ #define ENETC4_PM_TERR(mac) (0x5238 + (mac) * 0x400) +/* Port MAC 0/1 Transmit Unicast Frame Counter */ +#define ENETC4_PM_TUCA(mac) (0x5240 + (mac) * 0x400) + /* Port MAC 0/1 Transmit Multicast Frame Counter */ #define ENETC4_PM_TMCA(mac) (0x5248 + (mac) * 0x400) /* Port MAC 0/1 Transmit Broadcast Frame Counter */ #define ENETC4_PM_TBCA(mac) (0x5250 + (mac) * 0x400) +/* Port MAC 0/1 Transmit Packets Counter */ +#define ENETC4_PM_TPKT(mac) (0x5260 + (mac) * 0x400) + /* Port MAC 0/1 Transmit Undersized Packet Counter */ #define ENETC4_PM_TUND(mac) (0x5268 + (mac) * 0x400) @@ -316,6 +385,9 @@ /* Port MAC 0/1 Transmit Excessive Collisions Counter */ #define ENETC4_PM_TECOL(mac) (0x52f0 + (mac) * 0x400) +/* Port MAC 0/1 Transmit Invalid Octets Counter */ +#define ENETC4_PM_TIOCT(mac) (0x52f8 + (mac) * 0x400) + /* Port MAC 0 Interface Mode Control Register */ #define ENETC4_PM_IF_MODE(mac) (0x5300 + (mac) * 0x400) #define PM_IF_MODE_IFMODE GENMASK(2, 0) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index bdc5916e4400..71f376ef1be1 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -177,6 +177,65 @@ static const struct { { ENETC_PICDR(3), "ICM DR3 discarded frames" }, }; +static const struct { + int reg; + char name[ETH_GSTRING_LEN] __nonstring; +} enetc4_emac_counters[] = { + { ENETC4_PM_ROCT(0), "eMAC rx octets" }, + { ENETC4_PM_RVLAN(0), "eMAC rx VLAN frames" }, + { ENETC4_PM_RERR(0), "eMAC rx frame errors" }, + { ENETC4_PM_RUCA(0), "eMAC rx unicast frames" }, + { ENETC4_PM_RDRP(0), "eMAC rx dropped packets" }, + { ENETC4_PM_RPKT(0), "eMAC rx packets" }, + { ENETC4_PM_TOCT(0), "eMAC tx octets" }, + { ENETC4_PM_TVLAN(0), "eMAC tx VLAN frames" }, + { ENETC4_PM_TFCS(0), "eMAC tx fcs errors" }, + { ENETC4_PM_TUCA(0), "eMAC tx unicast frames" }, + { ENETC4_PM_TPKT(0), "eMAC tx packets" }, + { ENETC4_PM_TUND(0), "eMAC tx undersized packets" }, + { ENETC4_PM_TIOCT(0), "eMAC tx invalid octets" }, +}; + +static const struct { + int reg; + char name[ETH_GSTRING_LEN] __nonstring; +} enetc4_pmac_counters[] = { + { ENETC4_PM_ROCT(1), "pMAC rx octets" }, + { ENETC4_PM_RVLAN(1), "pMAC rx VLAN frames" }, + { ENETC4_PM_RERR(1), "pMAC rx frame errors" }, + { ENETC4_PM_RUCA(1), "pMAC rx unicast frames" }, + { ENETC4_PM_RDRP(1), "pMAC rx dropped packets" }, + { ENETC4_PM_RPKT(1), "pMAC rx packets" }, + { ENETC4_PM_TOCT(1), "pMAC tx octets" }, + { ENETC4_PM_TVLAN(1), "pMAC tx VLAN frames" }, + { ENETC4_PM_TFCS(1), "pMAC tx fcs errors" }, + { ENETC4_PM_TUCA(1), "pMAC tx unicast frames" }, + { ENETC4_PM_TPKT(1), "pMAC tx packets" }, + { ENETC4_PM_TUND(1), "pMAC tx undersized packets" }, + { ENETC4_PM_TIOCT(1), "pMAC tx invalid octets" }, +}; + +static const struct { + int reg; + char name[ETH_GSTRING_LEN] __nonstring; +} enetc4_port_counters[] = { + { ENETC4_PICDRDCR(0), "ICM DR0 discarded frames" }, + { ENETC4_PICDRDCR(1), "ICM DR1 discarded frames" }, + { ENETC4_PICDRDCR(2), "ICM DR2 discarded frames" }, + { ENETC4_PICDRDCR(3), "ICM DR3 discarded frames" }, + { ENETC4_PUFDMFR, "MAC filter discarded unicast" }, + { ENETC4_PMFDMFR, "MAC filter discarded multicast" }, + { ENETC4_PBFDSIR, "MAC filter discarded broadcast" }, + { ENETC4_PFDMSAPR, "MAC SA pruning discarded frames" }, + { ENETC4_PUFDVFR, "VLAN filter discarded unicast" }, + { ENETC4_PMFDVFR, "VLAN filter discarded multicast" }, + { ENETC4_PBFDVFR, "VLAN filter discarded broadcast" }, + { ENETC4_PRXDCR, "MAC rx discarded frames" }, + { ENETC4_PRXDCRRR, "MAC rx discard read-reset" }, + { ENETC4_PRXDCRR0, "MAC rx discard reason 0" }, + { ENETC4_PRXDCRR1, "MAC rx discard reason 1" }, +}; + static const char rx_ring_stats[][ETH_GSTRING_LEN] = { "Rx ring %2d frames", "Rx ring %2d alloc errors", @@ -211,15 +270,62 @@ static int enetc_get_sset_count(struct net_device *ndev, int sset) if (!enetc_si_is_pf(si)) return len; - len += ARRAY_SIZE(enetc_port_counters); - len += ARRAY_SIZE(enetc_emac_counters); + if (is_enetc_rev1(si)) { + len += ARRAY_SIZE(enetc_port_counters); + len += ARRAY_SIZE(enetc_emac_counters); + if (si->hw_features & ENETC_SI_F_QBU) + len += ARRAY_SIZE(enetc_pmac_counters); + } else { + len += ARRAY_SIZE(enetc4_port_counters); - if (si->hw_features & ENETC_SI_F_QBU) - len += ARRAY_SIZE(enetc_pmac_counters); + if (enetc_is_pseudo_mac(si)) + return len; + + len += ARRAY_SIZE(enetc4_emac_counters); + if (si->hw_features & ENETC_SI_F_QBU) + len += ARRAY_SIZE(enetc4_pmac_counters); + } return len; } +static void enetc_get_pf_strings(struct enetc_si *si, u8 *data) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(enetc_port_counters); i++) + ethtool_cpy(&data, enetc_port_counters[i].name); + + for (i = 0; i < ARRAY_SIZE(enetc_emac_counters); i++) + ethtool_cpy(&data, enetc_emac_counters[i].name); + + if (!(si->hw_features & ENETC_SI_F_QBU)) + return; + + for (i = 0; i < ARRAY_SIZE(enetc_pmac_counters); i++) + ethtool_cpy(&data, enetc_pmac_counters[i].name); +} + +static void enetc4_get_pf_strings(struct enetc_si *si, u8 *data) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(enetc4_port_counters); i++) + ethtool_cpy(&data, enetc4_port_counters[i].name); + + if (enetc_is_pseudo_mac(si)) + return; + + for (i = 0; i < ARRAY_SIZE(enetc4_emac_counters); i++) + ethtool_cpy(&data, enetc4_emac_counters[i].name); + + if (!(si->hw_features & ENETC_SI_F_QBU)) + return; + + for (i = 0; i < ARRAY_SIZE(enetc4_pmac_counters); i++) + ethtool_cpy(&data, enetc4_pmac_counters[i].name); +} + static void enetc_get_strings(struct net_device *ndev, u32 stringset, u8 *data) { struct enetc_ndev_priv *priv = netdev_priv(ndev); @@ -240,22 +346,54 @@ static void enetc_get_strings(struct net_device *ndev, u32 stringset, u8 *data) if (!enetc_si_is_pf(si)) break; - for (i = 0; i < ARRAY_SIZE(enetc_port_counters); i++) - ethtool_cpy(&data, enetc_port_counters[i].name); - - for (i = 0; i < ARRAY_SIZE(enetc_emac_counters); i++) - ethtool_cpy(&data, enetc_emac_counters[i].name); - - if (!(si->hw_features & ENETC_SI_F_QBU)) - break; - - for (i = 0; i < ARRAY_SIZE(enetc_pmac_counters); i++) - ethtool_cpy(&data, enetc_pmac_counters[i].name); + if (is_enetc_rev1(si)) + enetc_get_pf_strings(si, data); + else + enetc4_get_pf_strings(si, data); break; } } +static void enetc_pf_get_ethtool_stats(struct enetc_si *si, int *o, u64 *data) +{ + struct enetc_hw *hw = &si->hw; + int i; + + for (i = 0; i < ARRAY_SIZE(enetc_port_counters); i++) + data[(*o)++] = enetc_port_rd(hw, enetc_port_counters[i].reg); + + for (i = 0; i < ARRAY_SIZE(enetc_emac_counters); i++) + data[(*o)++] = enetc_port_rd64(hw, enetc_emac_counters[i].reg); + + if (!(si->hw_features & ENETC_SI_F_QBU)) + return; + + for (i = 0; i < ARRAY_SIZE(enetc_pmac_counters); i++) + data[(*o)++] = enetc_port_rd64(hw, enetc_pmac_counters[i].reg); +} + +static void enetc4_pf_get_ethtool_stats(struct enetc_si *si, int *o, u64 *data) +{ + struct enetc_hw *hw = &si->hw; + int i; + + for (i = 0; i < ARRAY_SIZE(enetc4_port_counters); i++) + data[(*o)++] = enetc_port_rd(hw, enetc4_port_counters[i].reg); + + if (enetc_is_pseudo_mac(si)) + return; + + for (i = 0; i < ARRAY_SIZE(enetc4_emac_counters); i++) + data[(*o)++] = enetc_port_rd64(hw, enetc4_emac_counters[i].reg); + + if (!(si->hw_features & ENETC_SI_F_QBU)) + return; + + for (i = 0; i < ARRAY_SIZE(enetc4_pmac_counters); i++) + data[(*o)++] = enetc_port_rd64(hw, enetc4_pmac_counters[i].reg); +} + static void enetc_get_ethtool_stats(struct net_device *ndev, struct ethtool_stats *stats, u64 *data) { @@ -288,17 +426,10 @@ static void enetc_get_ethtool_stats(struct net_device *ndev, if (!enetc_si_is_pf(si)) return; - for (i = 0; i < ARRAY_SIZE(enetc_port_counters); i++) - data[o++] = enetc_port_rd(hw, enetc_port_counters[i].reg); - - for (i = 0; i < ARRAY_SIZE(enetc_emac_counters); i++) - data[o++] = enetc_port_rd64(hw, enetc_emac_counters[i].reg); - - if (!(si->hw_features & ENETC_SI_F_QBU)) - return; - - for (i = 0; i < ARRAY_SIZE(enetc_pmac_counters); i++) - data[o++] = enetc_port_rd64(hw, enetc_pmac_counters[i].reg); + if (is_enetc_rev1(si)) + enetc_pf_get_ethtool_stats(si, &o, data); + else + enetc4_pf_get_ethtool_stats(si, &o, data); } static void enetc_pause_stats(struct enetc_si *si, int mac, @@ -1438,6 +1569,9 @@ const struct ethtool_ops enetc4_ppm_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | ETHTOOL_COALESCE_USE_ADAPTIVE_RX, + .get_sset_count = enetc_get_sset_count, + .get_strings = enetc_get_strings, + .get_ethtool_stats = enetc_get_ethtool_stats, .get_eth_mac_stats = enetc_ppm_get_eth_mac_stats, .get_rx_ring_count = enetc_get_rx_ring_count, .get_rxfh_key_size = enetc_get_rxfh_key_size, @@ -1480,6 +1614,9 @@ const struct ethtool_ops enetc4_pf_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | ETHTOOL_COALESCE_USE_ADAPTIVE_RX, + .get_sset_count = enetc_get_sset_count, + .get_strings = enetc_get_strings, + .get_ethtool_stats = enetc_get_ethtool_stats, .get_pause_stats = enetc_get_pause_stats, .get_rmon_stats = enetc_get_rmon_stats, .get_eth_ctrl_stats = enetc_get_eth_ctrl_stats, From 2835750dd6475a5ddc116be0b4c81fee8ce1a902 Mon Sep 17 00:00:00 2001 From: Mashiro Chen Date: Thu, 9 Apr 2026 01:25:51 +0800 Subject: [PATCH 1454/1561] net: rose: reject truncated CLEAR_REQUEST frames in state machines All five ROSE state machines (states 1-5) handle ROSE_CLEAR_REQUEST by reading the cause and diagnostic bytes directly from skb->data[3] and skb->data[4] without verifying that the frame is long enough: rose_disconnect(sk, ..., skb->data[3], skb->data[4]); The entry-point check in rose_route_frame() only enforces ROSE_MIN_LEN (3 bytes), so a remote peer on a ROSE network can send a syntactically valid but truncated CLEAR_REQUEST (3 or 4 bytes) while a connection is open in any state. Processing such a frame causes a one- or two-byte out-of-bounds read past the skb data, leaking uninitialized heap content as the cause/diagnostic values returned to user space via getsockopt(ROSE_GETCAUSE). Add a single length check at the rose_process_rx_frame() dispatch point, before any state machine is entered, to drop frames that carry the CLEAR_REQUEST type code but are too short to contain the required cause and diagnostic fields. Signed-off-by: Mashiro Chen Link: https://patch.msgid.link/20260408172551.281486-1-mashiro.chen@mailbox.org Signed-off-by: Jakub Kicinski --- net/rose/rose_in.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c index 0276b393f0e5..e26800581962 100644 --- a/net/rose/rose_in.c +++ b/net/rose/rose_in.c @@ -271,6 +271,13 @@ int rose_process_rx_frame(struct sock *sk, struct sk_buff *skb) frametype = rose_decode(skb, &ns, &nr, &q, &d, &m); + /* + * ROSE_CLEAR_REQUEST carries cause and diagnostic in bytes 3..4. + * Reject a malformed frame that is too short to contain them. + */ + if (frametype == ROSE_CLEAR_REQUEST && skb->len < 5) + return 0; + switch (rose->state) { case ROSE_STATE_1: queued = rose_state1_machine(sk, skb, frametype); From 6183bd8723a3eecd2d89cbc506fe938bc6288345 Mon Sep 17 00:00:00 2001 From: Mashiro Chen Date: Thu, 9 Apr 2026 10:49:26 +0800 Subject: [PATCH 1455/1561] net: hamradio: bpqether: validate frame length in bpq_rcv() The BPQ length field is decoded as: len = skb->data[0] + skb->data[1] * 256 - 5; If the sender sets bytes [0..1] to values whose combined value is less than 5, len becomes negative. Passing a negative int to skb_trim() silently converts to a huge unsigned value, causing the function to be a no-op. The frame is then passed up to AX.25 with its original (untrimmed) payload, delivering garbage beyond the declared frame boundary. Additionally, a negative len corrupts the 64-bit rx_bytes counter through implicit sign-extension. Add a bounds check before pulling the length bytes: reject frames where len is negative or exceeds the remaining skb data. Acked-by: Joerg Reuter Signed-off-by: Mashiro Chen Link: https://patch.msgid.link/20260409024927.24397-2-mashiro.chen@mailbox.org Signed-off-by: Jakub Kicinski --- drivers/net/hamradio/bpqether.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index 045c5177262e..214fd1f819a1 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -187,6 +187,9 @@ static int bpq_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty len = skb->data[0] + skb->data[1] * 256 - 5; + if (len < 0 || len > skb->len - 2) + goto drop_unlock; + skb_pull(skb, 2); /* Remove the length bytes */ skb_trim(skb, len); /* Set the length of the data */ From 8263e484d6622464ec72a5ad563f62492d84fa54 Mon Sep 17 00:00:00 2001 From: Mashiro Chen Date: Thu, 9 Apr 2026 10:49:27 +0800 Subject: [PATCH 1456/1561] net: hamradio: scc: validate bufsize in SIOCSCCSMEM ioctl The SIOCSCCSMEM ioctl copies a scc_mem_config from user space and assigns its bufsize field directly to scc->stat.bufsize without any range validation: scc->stat.bufsize = memcfg.bufsize; If a privileged user (CAP_SYS_RAWIO) sets bufsize to 0, the receive interrupt handler later calls dev_alloc_skb(0) and immediately writes a KISS type byte via skb_put_u8() into a zero-capacity socket buffer, corrupting the adjacent skb_shared_info region. Reject bufsize values smaller than 16; this is large enough to hold at least one KISS header byte plus useful data. Signed-off-by: Mashiro Chen Acked-by: Joerg Reuter Link: https://patch.msgid.link/20260409024927.24397-3-mashiro.chen@mailbox.org Signed-off-by: Jakub Kicinski --- drivers/net/hamradio/scc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c index ae5048efde68..8569db4a7140 100644 --- a/drivers/net/hamradio/scc.c +++ b/drivers/net/hamradio/scc.c @@ -1909,6 +1909,8 @@ static int scc_net_siocdevprivate(struct net_device *dev, if (!capable(CAP_SYS_RAWIO)) return -EPERM; if (!arg || copy_from_user(&memcfg, arg, sizeof(memcfg))) return -EINVAL; + if (memcfg.bufsize < 16) + return -EINVAL; scc->stat.bufsize = memcfg.bufsize; return 0; From 236f718ac885965fa886440b9898dfae185c9733 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Bugge?= Date: Wed, 8 Apr 2026 01:04:19 -0700 Subject: [PATCH 1457/1561] net/rds: Optimize rds_ib_laddr_check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rds_ib_laddr_check() creates a CM_ID and attempts to bind the address in question to it. This in order to qualify the allegedly local address as a usable IB/RoCE address. In the field, ExaWatcher runs rds-ping to all ports in the fabric from all local ports. This using all active ToS'es. In a full rack system, we have 14 cell servers and eight db servers. Typically, 6 ToS'es are used. This implies 528 rds-ping invocations per ExaWatcher's "RDSinfo" interval. Adding to this, each rds-ping invocation creates eight sockets and binds the local address to them: socket(AF_RDS, SOCK_SEQPACKET, 0) = 3 bind(3, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("192.168.36.2")}, 16) = 0 socket(AF_RDS, SOCK_SEQPACKET, 0) = 4 bind(4, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("192.168.36.2")}, 16) = 0 socket(AF_RDS, SOCK_SEQPACKET, 0) = 5 bind(5, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("192.168.36.2")}, 16) = 0 socket(AF_RDS, SOCK_SEQPACKET, 0) = 6 bind(6, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("192.168.36.2")}, 16) = 0 socket(AF_RDS, SOCK_SEQPACKET, 0) = 7 bind(7, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("192.168.36.2")}, 16) = 0 socket(AF_RDS, SOCK_SEQPACKET, 0) = 8 bind(8, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("192.168.36.2")}, 16) = 0 socket(AF_RDS, SOCK_SEQPACKET, 0) = 9 bind(9, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("192.168.36.2")}, 16) = 0 socket(AF_RDS, SOCK_SEQPACKET, 0) = 10 bind(10, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("192.168.36.2")}, 16) = 0 So, at every interval ExaWatcher executes rds-ping's, 4224 CM_IDs are allocated, considering this full-rack system. After the a CM_ID has been allocated, rdma_bind_addr() is called, with the port number being zero. This implies that the CMA will attempt to search for an un-used ephemeral port. Simplified, the algorithm is to start at a random position in the available port space, and then if needed, iterate until an un-used port is found. The book-keeping of used ports uses the idr system, which again uses slab to allocate new struct idr_layer's. The size is 2092 bytes and slab tries to reduce the wasted space. Hence, it chooses an order:3 allocation, for which 15 idr_layer structs will fit and only 1388 bytes are wasted per the 32KiB order:3 chunk. Although this order:3 allocation seems like a good space/speed trade-off, it does not resonate well with how it used by the CMA. The combination of the randomized starting point in the port space (which has close to zero spatial locality) and the close proximity in time of the 4224 invocations of the rds-ping's, creates a memory hog for order:3 allocations. These costly allocations may need reclaims and/or compaction. At worst, they may fail and produce a stack trace such as (from uek4): [] __inc_zone_page_state+0x35/0x40 [] page_add_file_rmap+0x57/0x60 [] remove_migration_pte+0x3f/0x3c0 [ksplice_6cn872bt_vmlinux_new] [] rmap_walk+0xd8/0x340 [] remove_migration_ptes+0x40/0x50 [] migrate_pages+0x3ec/0x890 [] compact_zone+0x32d/0x9a0 [] compact_zone_order+0x6d/0x90 [] try_to_compact_pages+0x102/0x270 [] __alloc_pages_direct_compact+0x46/0x100 [] __alloc_pages_nodemask+0x74b/0xaa0 [] alloc_pages_current+0x91/0x110 [] new_slab+0x38b/0x480 [] __slab_alloc+0x3b7/0x4a0 [ksplice_s0dk66a8_vmlinux_new] [] kmem_cache_alloc+0x1fb/0x250 [] idr_layer_alloc+0x36/0x90 [] idr_get_empty_slot+0x28c/0x3d0 [] idr_alloc+0x4d/0xf0 [] cma_alloc_port+0x4d/0xa0 [rdma_cm] [] rdma_bind_addr+0x2ae/0x5b0 [rdma_cm] [] rds_ib_laddr_check+0x83/0x2c0 [ksplice_6l2xst5i_rds_rdma_new] [] rds_trans_get_preferred+0x5b/0xa0 [rds] [] rds_bind+0x212/0x280 [rds] [] SYSC_bind+0xe6/0x120 [] SyS_bind+0xe/0x10 [] system_call_fastpath+0x18/0xd4 To avoid these excessive calls to rdma_bind_addr(), we optimize rds_ib_laddr_check() by simply checking if the address in question has been used before. The rds_rdma module keeps track of addresses associated with IB devices, and the function rds_ib_get_device() is used to determine if the address already has been qualified as a valid local address. If not found, we call the legacy rds_ib_laddr_check(), now renamed to rds_ib_laddr_check_cm(). Signed-off-by: Håkon Bugge Signed-off-by: Somasundaram Krishnasamy Signed-off-by: Gerd Rausch Signed-off-by: Allison Henderson Link: https://patch.msgid.link/20260408080420.540032-2-achender@kernel.org Signed-off-by: Jakub Kicinski --- net/rds/ib.c | 20 ++++++++++++++++++-- net/rds/ib.h | 1 + net/rds/ib_rdma.c | 2 +- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/net/rds/ib.c b/net/rds/ib.c index ac6affa33ce7..412ff61e74fa 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -401,8 +401,8 @@ static void rds6_ib_ic_info(struct socket *sock, unsigned int len, * allowed to influence which paths have priority. We could call userspace * asserting this policy "routing". */ -static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr, - __u32 scope_id) +static int rds_ib_laddr_check_cm(struct net *net, const struct in6_addr *addr, + __u32 scope_id) { int ret; struct rdma_cm_id *cm_id; @@ -487,6 +487,22 @@ out: return ret; } +static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr, + __u32 scope_id) +{ + struct rds_ib_device *rds_ibdev = NULL; + + if (ipv6_addr_v4mapped(addr)) { + rds_ibdev = rds_ib_get_device(addr->s6_addr32[3]); + if (rds_ibdev) { + rds_ib_dev_put(rds_ibdev); + return 0; + } + } + + return rds_ib_laddr_check_cm(net, addr, scope_id); +} + static void rds_ib_unregister_client(void) { ib_unregister_client(&rds_ib_client); diff --git a/net/rds/ib.h b/net/rds/ib.h index 8ef3178ed4d6..5ff346a1e8ba 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -381,6 +381,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, __rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt) /* ib_rdma.c */ +struct rds_ib_device *rds_ib_get_device(__be32 ipaddr); int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, struct in6_addr *ipaddr); void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 2cfec252eeac..9594ea245f7f 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -43,7 +43,7 @@ struct workqueue_struct *rds_ib_mr_wq; static void rds_ib_odp_mr_worker(struct work_struct *work); -static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr) +struct rds_ib_device *rds_ib_get_device(__be32 ipaddr) { struct rds_ib_device *rds_ibdev; struct rds_ib_ipaddr *i_ipaddr; From ebf71dd4aff46e8e421d455db3e231ba43d2fa8a Mon Sep 17 00:00:00 2001 From: Greg Jumper Date: Wed, 8 Apr 2026 01:04:20 -0700 Subject: [PATCH 1458/1561] net/rds: Restrict use of RDS/IB to the initial network namespace Prevent using RDS/IB in network namespaces other than the initial one. The existing RDS/IB code will not work properly in non-initial network namespaces. Fixes: d5a8ac28a7ff ("RDS-TCP: Make RDS-TCP work correctly when it is set up in a netns other than init_net") Reported-by: syzbot+da8e060735ae02c8f3d1@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=da8e060735ae02c8f3d1 Signed-off-by: Greg Jumper Signed-off-by: Allison Henderson Link: https://patch.msgid.link/20260408080420.540032-3-achender@kernel.org Signed-off-by: Jakub Kicinski --- net/rds/af_rds.c | 10 ++++++++-- net/rds/ib.c | 4 ++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index b396c673dfaf..76f625986a7f 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -357,7 +357,8 @@ static int rds_cong_monitor(struct rds_sock *rs, sockptr_t optval, int optlen) return ret; } -static int rds_set_transport(struct rds_sock *rs, sockptr_t optval, int optlen) +static int rds_set_transport(struct net *net, struct rds_sock *rs, + sockptr_t optval, int optlen) { int t_type; @@ -373,6 +374,10 @@ static int rds_set_transport(struct rds_sock *rs, sockptr_t optval, int optlen) if (t_type < 0 || t_type >= RDS_TRANS_COUNT) return -EINVAL; + /* RDS/IB is restricted to the initial network namespace */ + if (t_type != RDS_TRANS_TCP && !net_eq(net, &init_net)) + return -EPROTOTYPE; + rs->rs_transport = rds_trans_get(t_type); return rs->rs_transport ? 0 : -ENOPROTOOPT; @@ -433,6 +438,7 @@ static int rds_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct rds_sock *rs = rds_sk_to_rs(sock->sk); + struct net *net = sock_net(sock->sk); int ret; if (level != SOL_RDS) { @@ -461,7 +467,7 @@ static int rds_setsockopt(struct socket *sock, int level, int optname, break; case SO_RDS_TRANSPORT: lock_sock(sock->sk); - ret = rds_set_transport(rs, optval, optlen); + ret = rds_set_transport(net, rs, optval, optlen); release_sock(sock->sk); break; case SO_TIMESTAMP_OLD: diff --git a/net/rds/ib.c b/net/rds/ib.c index 412ff61e74fa..39f87272e071 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -492,6 +492,10 @@ static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr, { struct rds_ib_device *rds_ibdev = NULL; + /* RDS/IB is restricted to the initial network namespace */ + if (!net_eq(net, &init_net)) + return -EPROTOTYPE; + if (ipv6_addr_v4mapped(addr)) { rds_ibdev = rds_ib_get_device(addr->s6_addr32[3]); if (rds_ibdev) { From 2bb6379416fd19f44c3423a00bfd8626259f6067 Mon Sep 17 00:00:00 2001 From: Taegu Ha Date: Thu, 9 Apr 2026 16:11:15 +0900 Subject: [PATCH 1459/1561] ppp: require CAP_NET_ADMIN in target netns for unattached ioctls /dev/ppp open is currently authorized against file->f_cred->user_ns, while unattached administrative ioctls operate on current->nsproxy->net_ns. As a result, a local unprivileged user can create a new user namespace with CLONE_NEWUSER, gain CAP_NET_ADMIN only in that new user namespace, and still issue PPPIOCNEWUNIT, PPPIOCATTACH, or PPPIOCATTCHAN against an inherited network namespace. Require CAP_NET_ADMIN in the user namespace that owns the target network namespace before handling unattached PPP administrative ioctls. This preserves normal pppd operation in the network namespace it is actually privileged in, while rejecting the userns-only inherited-netns case. Fixes: 273ec51dd7ce ("net: ppp_generic - introduce net-namespace functionality v2") Signed-off-by: Taegu Ha Link: https://patch.msgid.link/20260409071117.4354-1-hataegu0826@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ppp/ppp_generic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index e9b41777be80..c2024684b10d 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1057,6 +1057,9 @@ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf, struct ppp_net *pn; int __user *p = (int __user *)arg; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + switch (cmd) { case PPPIOCNEWUNIT: /* Create a new ppp unit */ From de08f9585692813bd41ee654fca0487664c4de30 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Thu, 9 Apr 2026 10:13:31 +0200 Subject: [PATCH 1460/1561] net: ipa: Fix programming of QTIME_TIMESTAMP_CFG The 'val' variable gets overwritten multiple times, discarding previous values. Looking at the git log shows these should be combined with |= instead. Fixes: 9265a4f0f0b4 ("net: ipa: define even more IPA register fields") Link: https://sashiko.dev/#/patchset/20260403-milos-ipa-v1-0-01e9e4e03d3e%40fairphone.com?part=4 Signed-off-by: Luca Weiss Reviewed-by: Konrad Dybcio Link: https://patch.msgid.link/20260409-ipa-fixes-v1-1-a817c30678ac@fairphone.com Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index edead9c48d1f..216506eeef1f 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -361,7 +361,7 @@ static void ipa_qtime_config(struct ipa *ipa) { const struct reg *reg; u32 offset; - u32 val; + u32 val = 0; /* Timer clock divider must be disabled when we change the rate */ reg = ipa_reg(ipa, TIMERS_XO_CLK_DIV_CFG); @@ -374,8 +374,8 @@ static void ipa_qtime_config(struct ipa *ipa) val |= reg_bit(reg, DPL_TIMESTAMP_SEL); } /* Configure tag and NAT Qtime timestamp resolution as well */ - val = reg_encode(reg, TAG_TIMESTAMP_LSB, TAG_TIMESTAMP_SHIFT); - val = reg_encode(reg, NAT_TIMESTAMP_LSB, NAT_TIMESTAMP_SHIFT); + val |= reg_encode(reg, TAG_TIMESTAMP_LSB, TAG_TIMESTAMP_SHIFT); + val |= reg_encode(reg, NAT_TIMESTAMP_LSB, NAT_TIMESTAMP_SHIFT); iowrite32(val, ipa->reg_virt + reg_offset(reg)); From 1335b903cf2e8aeaca87fd665683384c731ec941 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Thu, 9 Apr 2026 10:13:32 +0200 Subject: [PATCH 1461/1561] net: ipa: Fix decoding EV_PER_EE for IPA v5.0+ Initially 'reg' and 'val' are assigned from HW_PARAM_2. But since IPA v5.0+ takes EV_PER_EE from HW_PARAM_4 (instead of NUM_EV_PER_EE from HW_PARAM_2), we not only need to re-assign 'reg' but also read the register value of that register into 'val' so that reg_decode() works on the correct value. Fixes: f651334e1ef5 ("net: ipa: add HW_PARAM_4 GSI register") Link: https://sashiko.dev/#/patchset/20260403-milos-ipa-v1-0-01e9e4e03d3e%40fairphone.com?part=2 Signed-off-by: Luca Weiss Reviewed-by: Konrad Dybcio Link: https://patch.msgid.link/20260409-ipa-fixes-v1-2-a817c30678ac@fairphone.com Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 4c3227e77898..624649484d62 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -2044,6 +2044,7 @@ static int gsi_ring_setup(struct gsi *gsi) count = reg_decode(reg, NUM_EV_PER_EE, val); } else { reg = gsi_reg(gsi, HW_PARAM_4); + val = ioread32(gsi->virt + reg_offset(reg)); count = reg_decode(reg, EV_PER_EE, val); } if (!count) { From 21ad19a99d943d794d132c52eeb28b8731369516 Mon Sep 17 00:00:00 2001 From: Nobuhiro Iwamatsu Date: Thu, 9 Apr 2026 14:08:11 +0900 Subject: [PATCH 1462/1561] octeon_ep: Remove unnecessary semicolons in octep_oq_drop_rx() Remove unnecessary semicolons in octep_oq_drop_rx(). Signed-off-by: Nobuhiro Iwamatsu Link: https://patch.msgid.link/1775711291-13938-1-git-send-email-nobuhiro.iwamatsu.x90@mail.toshiba Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeon_ep/octep_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c index 74de19166488..e6ebc7e44a00 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c @@ -392,7 +392,7 @@ static void octep_oq_drop_rx(struct octep_oq *oq, while (data_len > 0) { octep_oq_next_pkt(oq, buff_info, read_idx, desc_used); data_len -= oq->buffer_size; - }; + } } /** From 600f01dc4bd0c736b3ffea9f7976136d8bf1b136 Mon Sep 17 00:00:00 2001 From: Josua Mayer Date: Thu, 9 Apr 2026 14:34:33 +0200 Subject: [PATCH 1463/1561] dt-bindings: net: dsa: nxp,sja1105: make spi-cpol optional for sja1110 Currently, the binding requires 'spi-cpha' for SJA1105 and 'spi-cpol' for SJA1110. However, the SJA1110 supports both SPI modes 0 and 2. Mode 2 (cpha=0, cpol=1) is used by the NXP LX2160 Bluebox 3. On the SolidRun i.MX8DXL HummingBoard Telematics, mode 0 is stable, while forcing mode 2 introduces CRC errors especially during bursts. Drop the requirement on spi-cpol for SJA1110. Fixes: af2eab1a8243 ("dt-bindings: net: nxp,sja1105: document spi-cpol/cpha") Signed-off-by: Josua Mayer Acked-by: Conor Dooley Link: https://patch.msgid.link/20260409-imx8dxl-sr-som-v2-1-83ff20629ba0@solid-run.com Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml index 607b7fe8d28e..0486489114cd 100644 --- a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml +++ b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml @@ -143,8 +143,6 @@ allOf: else: properties: spi-cpha: false - required: - - spi-cpol unevaluatedProperties: false From 6f533abe7bbad2eef1e42c639b6bb9dad2b02362 Mon Sep 17 00:00:00 2001 From: Charles Perry Date: Thu, 9 Apr 2026 06:36:54 -0700 Subject: [PATCH 1464/1561] net: phy: fix a return path in get_phy_c45_ids() The return value of phy_c45_probe_present() is stored in "ret", not "phy_reg", fix this. "phy_reg" always has a positive value if we reach this return path (since it would have returned earlier otherwise), which means that the original goal of the patch of not considering -ENODEV fatal wasn't achieved. Fixes: 17b447539408 ("net: phy: c45 scanning: Don't consider -ENODEV fatal") Signed-off-by: Charles Perry Reviewed-by: Andrew Lunn Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20260409133654.3203336-1-charles.perry@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 3bd415710bf3..f3696d9819d3 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -927,8 +927,8 @@ static int get_phy_c45_ids(struct mii_bus *bus, int addr, /* returning -ENODEV doesn't stop bus * scanning */ - return (phy_reg == -EIO || - phy_reg == -ENODEV) ? -ENODEV : -EIO; + return (ret == -EIO || + ret == -ENODEV) ? -ENODEV : -EIO; if (!ret) continue; From 268bb35d1a34c9965ec79922a412e966af98be34 Mon Sep 17 00:00:00 2001 From: Charles Perry Date: Wed, 8 Apr 2026 06:18:14 -0700 Subject: [PATCH 1465/1561] dt-bindings: net: document Microchip PIC64-HPSC/HX MDIO controller This MDIO hardware is based on a Microsemi design supported in Linux by mdio-mscc-miim.c. However, The register interface is completely different with pic64hpsc, hence the need for separate documentation. The hardware supports C22 and C45. The documentation recommends an input clock of 156.25MHz and a prescaler of 39, which yields an MDIO clock of 1.95MHz. The hardware supports an interrupt pin to signal transaction completion which is not strictly needed as the software can also poll a "TRIGGER" bit for this. Signed-off-by: Charles Perry Acked-by: Conor Dooley Link: https://patch.msgid.link/20260408131821.1145334-2-charles.perry@microchip.com Signed-off-by: Jakub Kicinski --- .../net/microchip,pic64hpsc-mdio.yaml | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/microchip,pic64hpsc-mdio.yaml diff --git a/Documentation/devicetree/bindings/net/microchip,pic64hpsc-mdio.yaml b/Documentation/devicetree/bindings/net/microchip,pic64hpsc-mdio.yaml new file mode 100644 index 000000000000..20f29b71566b --- /dev/null +++ b/Documentation/devicetree/bindings/net/microchip,pic64hpsc-mdio.yaml @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/microchip,pic64hpsc-mdio.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Microchip PIC64-HPSC/HX MDIO controller + +maintainers: + - Charles Perry + +description: + This is the MDIO bus controller present in Microchip PIC64-HPSC/HX SoCs. It + supports C22 and C45 register access and is named "MDIO Initiator" in the + documentation. + +allOf: + - $ref: mdio.yaml# + +properties: + compatible: + oneOf: + - const: microchip,pic64hpsc-mdio + - items: + - const: microchip,pic64hx-mdio + - const: microchip,pic64hpsc-mdio + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + clock-frequency: + default: 2500000 + + interrupts: + maxItems: 1 + +required: + - compatible + - reg + - clocks + - interrupts + +unevaluatedProperties: false + +examples: + - | + #include + bus { + #address-cells = <2>; + #size-cells = <2>; + + mdio@4000c21e000 { + compatible = "microchip,pic64hpsc-mdio"; + reg = <0x400 0x0c21e000 0x0 0x1000>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&svc_clk>; + interrupt-parent = <&saplic0>; + interrupts = <168 IRQ_TYPE_LEVEL_HIGH>; + + ethernet-phy@0 { + reg = <0>; + }; + }; + }; From f76aef980206e7c6bc09933fd3c8e2b8a3479bd2 Mon Sep 17 00:00:00 2001 From: Charles Perry Date: Wed, 8 Apr 2026 06:18:15 -0700 Subject: [PATCH 1466/1561] net: mdio: add a driver for PIC64-HPSC/HX MDIO controller This adds an MDIO driver for PIC64-HPSC/HX. The hardware supports C22 and C45 but only C22 is implemented in this commit. This MDIO hardware is based on a Microsemi design supported in Linux by mdio-mscc-miim.c. However, The register interface is completely different with pic64hpsc, hence the need for a separate driver. The documentation recommends an input clock of 156.25MHz and a prescaler of 39, which yields an MDIO clock of 1.95MHz. The hardware supports an interrupt pin or a "TRIGGER" bit that can be polled to signal transaction completion. This commit uses polling. This was tested on Microchip HB1301 evalkit with a VSC8574 and a VSC8541. Signed-off-by: Charles Perry Reviewed-by: Maxime Chevallier Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20260408131821.1145334-3-charles.perry@microchip.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 6 + drivers/net/mdio/Kconfig | 7 ++ drivers/net/mdio/Makefile | 1 + drivers/net/mdio/mdio-pic64hpsc.c | 190 ++++++++++++++++++++++++++++++ 4 files changed, 204 insertions(+) create mode 100644 drivers/net/mdio/mdio-pic64hpsc.c diff --git a/MAINTAINERS b/MAINTAINERS index d8947c78de04..65902b97f5df 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17392,6 +17392,12 @@ L: linux-serial@vger.kernel.org S: Maintained F: drivers/tty/serial/8250/8250_pci1xxxx.c +MICROCHIP PIC64-HPSC/HX DRIVERS +M: Charles Perry +S: Supported +F: Documentation/devicetree/bindings/net/microchip,pic64hpsc-mdio.yaml +F: drivers/net/mdio/mdio-pic64hpsc.c + MICROCHIP POLARFIRE FPGA DRIVERS M: Conor Dooley L: linux-fpga@vger.kernel.org diff --git a/drivers/net/mdio/Kconfig b/drivers/net/mdio/Kconfig index 53e2c047d804..516b0d05e16e 100644 --- a/drivers/net/mdio/Kconfig +++ b/drivers/net/mdio/Kconfig @@ -145,6 +145,13 @@ config MDIO_OCTEON buses. It is required by the Octeon and ThunderX ethernet device drivers on some systems. +config MDIO_PIC64HPSC + tristate "PIC64-HPSC/HX MDIO interface support" + depends on HAS_IOMEM && OF_MDIO + help + This driver supports the MDIO interface found on the PIC64-HPSC/HX + SoCs. + config MDIO_IPQ4019 tristate "Qualcomm IPQ4019 MDIO interface support" depends on HAS_IOMEM && OF_MDIO diff --git a/drivers/net/mdio/Makefile b/drivers/net/mdio/Makefile index fbec636700e7..048586746026 100644 --- a/drivers/net/mdio/Makefile +++ b/drivers/net/mdio/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_MDIO_MOXART) += mdio-moxart.o obj-$(CONFIG_MDIO_MSCC_MIIM) += mdio-mscc-miim.o obj-$(CONFIG_MDIO_MVUSB) += mdio-mvusb.o obj-$(CONFIG_MDIO_OCTEON) += mdio-octeon.o +obj-$(CONFIG_MDIO_PIC64HPSC) += mdio-pic64hpsc.o obj-$(CONFIG_MDIO_REALTEK_RTL9300) += mdio-realtek-rtl9300.o obj-$(CONFIG_MDIO_REGMAP) += mdio-regmap.o obj-$(CONFIG_MDIO_SUN4I) += mdio-sun4i.o diff --git a/drivers/net/mdio/mdio-pic64hpsc.c b/drivers/net/mdio/mdio-pic64hpsc.c new file mode 100644 index 000000000000..28be77ad6cf7 --- /dev/null +++ b/drivers/net/mdio/mdio-pic64hpsc.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Microchip PIC64-HPSC/HX MDIO controller driver + * + * Copyright (c) 2026 Microchip Technology Inc. and its subsidiaries. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define MDIO_REG_PRESCALER 0x20 +#define MDIO_CFG_PRESCALE_MASK GENMASK(7, 0) + +#define MDIO_REG_FRAME_CFG_1 0x24 +#define MDIO_WDATA_MASK GENMASK(15, 0) + +#define MDIO_REG_FRAME_CFG_2 0x28 +#define MDIO_TRIGGER_BIT BIT(31) +#define MDIO_REG_DEV_ADDR_MASK GENMASK(20, 16) +#define MDIO_PHY_PRT_ADDR_MASK GENMASK(8, 4) +#define MDIO_OPERATION_MASK GENMASK(3, 2) +#define MDIO_START_OF_FRAME_MASK GENMASK(1, 0) + +/* Possible value of MDIO_OPERATION_MASK */ +#define MDIO_OPERATION_WRITE BIT(0) +#define MDIO_OPERATION_READ BIT(1) + +#define MDIO_REG_FRAME_STATUS 0x2C +#define MDIO_READOK_BIT BIT(24) +#define MDIO_RDATA_MASK GENMASK(15, 0) + +struct pic64hpsc_mdio_dev { + void __iomem *regs; +}; + +static int pic64hpsc_mdio_wait_trigger(struct mii_bus *bus) +{ + struct pic64hpsc_mdio_dev *priv = bus->priv; + u32 val; + int ret; + + /* The MDIO_TRIGGER bit returns 0 when a transaction has completed. */ + ret = readl_poll_timeout(priv->regs + MDIO_REG_FRAME_CFG_2, val, + !(val & MDIO_TRIGGER_BIT), 50, 10000); + + if (ret < 0) + dev_dbg(&bus->dev, "TRIGGER bit timeout: %x\n", val); + + return ret; +} + +static int pic64hpsc_mdio_c22_read(struct mii_bus *bus, int mii_id, int regnum) +{ + struct pic64hpsc_mdio_dev *priv = bus->priv; + u32 val; + int ret; + + ret = pic64hpsc_mdio_wait_trigger(bus); + if (ret) + return ret; + + writel(MDIO_TRIGGER_BIT | FIELD_PREP(MDIO_REG_DEV_ADDR_MASK, regnum) | + FIELD_PREP(MDIO_PHY_PRT_ADDR_MASK, mii_id) | + FIELD_PREP(MDIO_OPERATION_MASK, MDIO_OPERATION_READ) | + FIELD_PREP(MDIO_START_OF_FRAME_MASK, 1), + priv->regs + MDIO_REG_FRAME_CFG_2); + + ret = pic64hpsc_mdio_wait_trigger(bus); + if (ret) + return ret; + + val = readl(priv->regs + MDIO_REG_FRAME_STATUS); + + /* The MDIO_READOK is a 1-bit value reflecting the inverse of the MDIO + * bus value captured during the 2nd TA cycle. A PHY/Port should drive + * the MDIO bus with a logic 0 on the 2nd TA cycle, however, the + * PHY/Port could optionally drive a logic 1, to communicate a read + * failure. This feature is optional, not defined by the 802.3 standard + * and not supported in standard external PHYs. + */ + if (!(bus->phy_ignore_ta_mask & 1 << mii_id) && + !FIELD_GET(MDIO_READOK_BIT, val)) { + dev_dbg(&bus->dev, "READOK bit cleared\n"); + return -EIO; + } + + return FIELD_GET(MDIO_RDATA_MASK, val); +} + +static int pic64hpsc_mdio_c22_write(struct mii_bus *bus, int mii_id, int regnum, + u16 value) +{ + struct pic64hpsc_mdio_dev *priv = bus->priv; + int ret; + + ret = pic64hpsc_mdio_wait_trigger(bus); + if (ret < 0) + return ret; + + writel(FIELD_PREP(MDIO_WDATA_MASK, value), + priv->regs + MDIO_REG_FRAME_CFG_1); + + writel(MDIO_TRIGGER_BIT | FIELD_PREP(MDIO_REG_DEV_ADDR_MASK, regnum) | + FIELD_PREP(MDIO_PHY_PRT_ADDR_MASK, mii_id) | + FIELD_PREP(MDIO_OPERATION_MASK, MDIO_OPERATION_WRITE) | + FIELD_PREP(MDIO_START_OF_FRAME_MASK, 1), + priv->regs + MDIO_REG_FRAME_CFG_2); + + return 0; +} + +static int pic64hpsc_mdio_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct device *dev = &pdev->dev; + struct pic64hpsc_mdio_dev *priv; + struct mii_bus *bus; + unsigned long rate; + struct clk *clk; + u32 bus_freq; + u32 div; + int ret; + + bus = devm_mdiobus_alloc_size(dev, sizeof(*priv)); + if (!bus) + return -ENOMEM; + + priv = bus->priv; + + priv->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(priv->regs)) + return PTR_ERR(priv->regs); + + bus->name = KBUILD_MODNAME; + bus->read = pic64hpsc_mdio_c22_read; + bus->write = pic64hpsc_mdio_c22_write; + snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev)); + bus->parent = dev; + + clk = devm_clk_get_enabled(dev, NULL); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + if (of_property_read_u32(np, "clock-frequency", &bus_freq)) + bus_freq = 2500000; + + rate = clk_get_rate(clk); + + div = DIV_ROUND_UP(rate, 2 * bus_freq) - 1; + if (div == 0 || div & ~MDIO_CFG_PRESCALE_MASK) { + dev_err(dev, "MDIO clock-frequency out of range\n"); + return -EINVAL; + } + + dev_dbg(dev, "rate=%lu bus_freq=%u real_bus_freq=%lu div=%u\n", rate, + bus_freq, rate / (2 * (1 + div)), div); + writel(div, priv->regs + MDIO_REG_PRESCALER); + + ret = devm_of_mdiobus_register(dev, bus, np); + if (ret) { + dev_err(dev, "Cannot register MDIO bus (%d)\n", ret); + return ret; + } + + return 0; +} + +static const struct of_device_id pic64hpsc_mdio_match[] = { + { .compatible = "microchip,pic64hpsc-mdio" }, + {} +}; +MODULE_DEVICE_TABLE(of, pic64hpsc_mdio_match); + +static struct platform_driver pic64hpsc_mdio_driver = { + .probe = pic64hpsc_mdio_probe, + .driver = { + .name = KBUILD_MODNAME, + .of_match_table = pic64hpsc_mdio_match, + }, +}; +module_platform_driver(pic64hpsc_mdio_driver); + +MODULE_AUTHOR("Charles Perry "); +MODULE_DESCRIPTION("Microchip PIC64-HPSC/HX MDIO driver"); +MODULE_LICENSE("GPL"); From 96aefe3afe0e122a1472967224ffe21c3d51b67b Mon Sep 17 00:00:00 2001 From: Charles Perry Date: Wed, 8 Apr 2026 06:18:16 -0700 Subject: [PATCH 1467/1561] net: phy: add a PHY write barrier when disabling interrupts MDIO bus controllers are not required to wait for write transactions to complete before returning as synchronization is often achieved by polling status bits. This can cause issues when disabling interrupts since an interrupt could fire before the interrupt handler is unregistered and there's no status bit to poll. Add a phy_write_barrier() function and use it in phy_disable_interrupts() to fix this issue. The write barrier just reads an MII register and discards the value, which is enough to guarantee that previous writes have completed. Signed-off-by: Charles Perry Link: https://patch.msgid.link/20260408131821.1145334-4-charles.perry@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 1f1039084f51..fce9bc7be330 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1368,14 +1368,37 @@ void phy_error(struct phy_device *phydev) } EXPORT_SYMBOL(phy_error); +/** + * phy_write_barrier - ensure the last write completed for this PHY device + * @phydev: target phy_device struct + * + * MDIO bus controllers are not required to wait for write transactions to + * complete before returning. Calling this function ensures that the previous + * write has completed. + */ +static void phy_write_barrier(struct phy_device *phydev) +{ + if (mdiobus_read(phydev->mdio.bus, phydev->mdio.addr, MII_PHYSID1) == + -EOPNOTSUPP) + mdiobus_c45_read(phydev->mdio.bus, phydev->mdio.addr, + MDIO_MMD_PMAPMD, MII_PHYSID1); +} + /** * phy_disable_interrupts - Disable the PHY interrupts from the PHY side * @phydev: target phy_device struct */ int phy_disable_interrupts(struct phy_device *phydev) { + int err; + /* Disable PHY interrupts */ - return phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED); + err = phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED); + if (err) + return err; + + phy_write_barrier(phydev); + return 0; } /** From 900f27fb797c7eaf0b84b7a6516613e19746bc4e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Apr 2026 14:56:20 +0000 Subject: [PATCH 1468/1561] net: change sock_queue_rcv_skb_reason() to return a drop_reason Change sock_queue_rcv_skb_reason() to return the drop_reason directly instead of using a reference. This is part of an effort to remove stack canaries and reduce bloat. $ scripts/bloat-o-meter -t vmlinux.old vmlinux.new add/remove: 0/0 grow/shrink: 3/7 up/down: 79/-301 (-222) Function old new delta vsock_queue_rcv_skb 50 79 +29 ipmr_cache_report 1290 1315 +25 ip6mr_cache_report 1322 1347 +25 packet_rcv_spkt 329 327 -2 sock_queue_rcv_skb_reason 166 128 -38 raw_rcv_skb 122 80 -42 ping_queue_rcv_skb 109 61 -48 ping_rcv 215 162 -53 rawv6_rcv_skb 278 224 -54 raw_rcv 591 527 -64 Total: Before=29722890, After=29722668, chg -0.00% Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260409145625.2306224-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 17 ++++++++++++++--- net/can/bcm.c | 5 ++--- net/can/isotp.c | 3 ++- net/can/j1939/socket.c | 3 ++- net/can/raw.c | 3 ++- net/core/sock.c | 20 ++++++-------------- net/ipv4/ping.c | 3 ++- net/ipv4/raw.c | 3 ++- net/ipv6/raw.c | 3 ++- 9 files changed, 34 insertions(+), 26 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 7d51ac9e7d9a..5831a4d1ebe7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2502,12 +2502,23 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, struct sk_buff *skb)); int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); -int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb, - enum skb_drop_reason *reason); +enum skb_drop_reason +sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb); static inline int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { - return sock_queue_rcv_skb_reason(sk, skb, NULL); + enum skb_drop_reason drop_reason = sock_queue_rcv_skb_reason(sk, skb); + + switch (drop_reason) { + case SKB_DROP_REASON_SOCKET_RCVBUFF: + return -ENOMEM; + case SKB_DROP_REASON_PROTO_MEM: + return -ENOBUFS; + case 0: + return 0; + default: + return -EPERM; + } } int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb); diff --git a/net/can/bcm.c b/net/can/bcm.c index fd9fa072881e..d6291381afb0 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -363,7 +363,6 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, struct sockaddr_can *addr; struct sock *sk = op->sk; unsigned int datalen = head->nframes * op->cfsiz; - int err; unsigned int *pflags; enum skb_drop_reason reason; @@ -420,8 +419,8 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, addr->can_family = AF_CAN; addr->can_ifindex = op->rx_ifindex; - err = sock_queue_rcv_skb_reason(sk, skb, &reason); - if (err < 0) { + reason = sock_queue_rcv_skb_reason(sk, skb); + if (reason) { struct bcm_sock *bo = bcm_sk(sk); sk_skb_reason_drop(sk, skb, reason); diff --git a/net/can/isotp.c b/net/can/isotp.c index 2770f43f4951..c48b4a818297 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -291,7 +291,8 @@ static void isotp_rcv_skb(struct sk_buff *skb, struct sock *sk) addr->can_family = AF_CAN; addr->can_ifindex = skb->dev->ifindex; - if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) + reason = sock_queue_rcv_skb_reason(sk, skb); + if (reason) sk_skb_reason_drop(sk, skb, reason); } diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 0502b030d238..50a598ef5fd4 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -333,7 +333,8 @@ static void j1939_sk_recv_one(struct j1939_sock *jsk, struct sk_buff *oskb) if (skb->sk) skcb->msg_flags |= MSG_DONTROUTE; - if (sock_queue_rcv_skb_reason(&jsk->sk, skb, &reason) < 0) + reason = sock_queue_rcv_skb_reason(&jsk->sk, skb); + if (reason) sk_skb_reason_drop(&jsk->sk, skb, reason); } diff --git a/net/can/raw.c b/net/can/raw.c index eee244ffc31e..56c95c768778 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -207,7 +207,8 @@ static void raw_rcv(struct sk_buff *oskb, void *data) if (oskb->sk == sk) *pflags |= MSG_CONFIRM; - if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) + reason = sock_queue_rcv_skb_reason(sk, skb); + if (reason) sk_skb_reason_drop(sk, skb, reason); } diff --git a/net/core/sock.c b/net/core/sock.c index e821b95e0015..d39a4d6ccafd 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -520,32 +520,24 @@ int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(__sock_queue_rcv_skb); -int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb, - enum skb_drop_reason *reason) +enum skb_drop_reason +sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb) { enum skb_drop_reason drop_reason; int err; err = sk_filter_reason(sk, skb, &drop_reason); if (err) - goto out; + return drop_reason; err = __sock_queue_rcv_skb(sk, skb); switch (err) { case -ENOMEM: - drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF; - break; + return SKB_DROP_REASON_SOCKET_RCVBUFF; case -ENOBUFS: - drop_reason = SKB_DROP_REASON_PROTO_MEM; - break; - default: - drop_reason = SKB_NOT_DROPPED_YET; - break; + return SKB_DROP_REASON_PROTO_MEM; } -out: - if (reason) - *reason = drop_reason; - return err; + return SKB_NOT_DROPPED_YET; } EXPORT_SYMBOL(sock_queue_rcv_skb_reason); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index bda245c80893..1273d1028ed9 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -935,7 +935,8 @@ static enum skb_drop_reason __ping_queue_rcv_skb(struct sock *sk, pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n", inet_sk(sk), inet_sk(sk)->inet_num, skb); - if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { + reason = sock_queue_rcv_skb_reason(sk, skb); + if (reason) { sk_skb_reason_drop(sk, skb, reason); pr_debug("ping_queue_rcv_skb -> failed\n"); return reason; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 34859e537b49..319428bf06bb 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -300,7 +300,8 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) /* Charge it to the socket. */ ipv4_pktinfo_prepare(sk, skb, true); - if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { + reason = sock_queue_rcv_skb_reason(sk, skb); + if (reason) { sk_skb_reason_drop(sk, skb, reason); return NET_RX_DROP; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 0ac704691100..3cc58698cbbd 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -369,7 +369,8 @@ static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb) /* Charge it to the socket. */ skb_dst_drop(skb); - if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { + reason = sock_queue_rcv_skb_reason(sk, skb); + if (reason) { sk_skb_reason_drop(sk, skb, reason); return NET_RX_DROP; } From 734ea7e324ad1cee2a21f909f756b5e2d903a224 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Apr 2026 14:56:21 +0000 Subject: [PATCH 1469/1561] net: always set reason in sk_filter_trim_cap() sk_filter_trim_cap() will soon return the drop reason by value. Make sure *reason is cleared when no error is returned, to ease this conversion. $ scripts/bloat-o-meter -t vmlinux.old vmlinux.new add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-7 (-7) Function old new delta sk_filter_trim_cap 889 882 -7 Total: Before=29722668, After=29722661, chg -0.00% Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260409145625.2306224-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/filter.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index cf2113af4bc9..5569d83b8be0 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -121,7 +121,7 @@ EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user); * @sk: sock associated with &sk_buff * @skb: buffer to filter * @cap: limit on how short the eBPF program may trim the packet - * @reason: record drop reason on errors (negative return value) + * @reason: record drop reason * * Run the eBPF program and then cut skb->data to correct size returned by * the program. If pkt_len is 0 we toss packet. If skb->len is smaller @@ -168,11 +168,10 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, pkt_len = bpf_prog_run_save_cb(filter->prog, skb); skb->sk = save_sk; err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM; - if (err) - *reason = SKB_DROP_REASON_SOCKET_FILTER; } rcu_read_unlock(); + *reason = err ? SKB_DROP_REASON_SOCKET_FILTER : 0; return err; } EXPORT_SYMBOL(sk_filter_trim_cap); From c78bcbd51976f123909e5c2baf8cebb699453c2f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Apr 2026 14:56:22 +0000 Subject: [PATCH 1470/1561] net: change sk_filter_reason() to return the reason by value sk_filter_trim_cap will soon return the reason by value, do the same for sk_filter_reason(). $ scripts/bloat-o-meter -t vmlinux.old vmlinux.new add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-21 (-21) Function old new delta sock_queue_rcv_skb_reason 128 126 -2 tun_net_xmit 1146 1127 -19 Total: Before=29722661, After=29722640, chg -0.00% Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260409145625.2306224-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 8 +++++--- include/linux/filter.h | 9 ++++++--- net/core/sock.c | 4 ++-- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index c492fda6fc15..b183189f1853 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1031,9 +1031,11 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) goto drop; } - if (tfile->socket.sk->sk_filter && - sk_filter_reason(tfile->socket.sk, skb, &drop_reason)) - goto drop; + if (tfile->socket.sk->sk_filter) { + drop_reason = sk_filter_reason(tfile->socket.sk, skb); + if (drop_reason) + goto drop; + } len = run_ebpf_filter(tun, skb, len); if (len == 0) { diff --git a/include/linux/filter.h b/include/linux/filter.h index 44d7ae95ddbc..59931e5810b4 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1102,10 +1102,13 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb) return sk_filter_trim_cap(sk, skb, 1, &ignore_reason); } -static inline int sk_filter_reason(struct sock *sk, struct sk_buff *skb, - enum skb_drop_reason *reason) +static inline enum skb_drop_reason +sk_filter_reason(struct sock *sk, struct sk_buff *skb) { - return sk_filter_trim_cap(sk, skb, 1, reason); + enum skb_drop_reason drop_reason; + + sk_filter_trim_cap(sk, skb, 1, &drop_reason); + return drop_reason; } struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err); diff --git a/net/core/sock.c b/net/core/sock.c index d39a4d6ccafd..1ffcb15d0fc5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -526,8 +526,8 @@ sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb) enum skb_drop_reason drop_reason; int err; - err = sk_filter_reason(sk, skb, &drop_reason); - if (err) + drop_reason = sk_filter_reason(sk, skb); + if (drop_reason) return drop_reason; err = __sock_queue_rcv_skb(sk, skb); From 97449a5f1a586d2befde5297b0fcb0bfdade774e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Apr 2026 14:56:23 +0000 Subject: [PATCH 1471/1561] tcp: change tcp_filter() to return the reason by value sk_filter_trim_cap() will soon return the reason by value, do the same for tcp_filter(). Note: tcp_filter() is no longer inlined. Following patch will inline it again. $ scripts/bloat-o-meter -t vmlinux.4 vmlinux.5 add/remove: 2/0 grow/shrink: 0/2 up/down: 186/-43 (143) Function old new delta tcp_filter - 154 +154 __pfx_tcp_filter - 32 +32 tcp_v4_rcv 3152 3143 -9 tcp_v6_rcv 3169 3135 -34 Total: Before=29722640, After=29722783, chg +0.00% Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260409145625.2306224-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 8 +++++--- net/ipv4/tcp_ipv4.c | 6 ++++-- net/ipv6/tcp_ipv6.c | 6 ++++-- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 6156d1d068e1..098e52269a04 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1683,12 +1683,14 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb) bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason *reason); -static inline int tcp_filter(struct sock *sk, struct sk_buff *skb, - enum skb_drop_reason *reason) +static inline enum skb_drop_reason +tcp_filter(struct sock *sk, struct sk_buff *skb) { const struct tcphdr *th = (const struct tcphdr *)skb->data; + enum skb_drop_reason reason; - return sk_filter_trim_cap(sk, skb, __tcp_hdrlen(th), reason); + sk_filter_trim_cap(sk, skb, __tcp_hdrlen(th), &reason); + return reason; } void tcp_set_state(struct sock *sk, int state); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 69ab236072e7..e2da3246a641 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2164,7 +2164,8 @@ lookup: } refcounted = true; nsk = NULL; - if (!tcp_filter(sk, skb, &drop_reason)) { + drop_reason = tcp_filter(sk, skb); + if (!drop_reason) { th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); tcp_v4_fill_cb(skb, iph, th); @@ -2225,7 +2226,8 @@ process: nf_reset_ct(skb); - if (tcp_filter(sk, skb, &drop_reason)) + drop_reason = tcp_filter(sk, skb); + if (drop_reason) goto discard_and_relse; th = (const struct tcphdr *)skb->data; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8dc3874e8b92..d64d28e9842f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1794,7 +1794,8 @@ lookup: } refcounted = true; nsk = NULL; - if (!tcp_filter(sk, skb, &drop_reason)) { + drop_reason = tcp_filter(sk, skb); + if (!drop_reason) { th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); tcp_v6_fill_cb(skb, hdr, th); @@ -1855,7 +1856,8 @@ process: nf_reset_ct(skb); - if (tcp_filter(sk, skb, &drop_reason)) + drop_reason = tcp_filter(sk, skb); + if (drop_reason) goto discard_and_relse; th = (const struct tcphdr *)skb->data; From fb37aea2a00e67ef5264ea39371d350a1d19b24f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Apr 2026 14:56:24 +0000 Subject: [PATCH 1472/1561] net: change sk_filter_trim_cap() to return a drop_reason by value Current return value can be replaced with the drop_reason, reducing kernel bloat: $ scripts/bloat-o-meter -t vmlinux.old vmlinux.new add/remove: 0/2 grow/shrink: 1/11 up/down: 32/-603 (-571) Function old new delta tcp_v6_rcv 3135 3167 +32 unix_dgram_sendmsg 1731 1726 -5 netlink_unicast 957 945 -12 netlink_dump 1372 1359 -13 sk_filter_trim_cap 882 858 -24 tcp_v4_rcv 3143 3111 -32 __pfx_tcp_filter 32 - -32 netlink_broadcast_filtered 1633 1595 -38 sock_queue_rcv_skb_reason 126 76 -50 tun_net_xmit 1127 1074 -53 __sk_receive_skb 690 632 -58 udpv6_queue_rcv_one_skb 935 869 -66 udp_queue_rcv_one_skb 919 853 -66 tcp_filter 154 - -154 Total: Before=29722783, After=29722212, chg -0.00% Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260409145625.2306224-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/filter.h | 14 ++++++-------- include/net/tcp.h | 4 +--- net/core/filter.c | 31 ++++++++++++++----------------- net/core/sock.c | 5 +++-- net/ipv4/udp.c | 3 ++- net/ipv6/udp.c | 3 ++- net/rose/rose_in.c | 3 +-- 7 files changed, 29 insertions(+), 34 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 59931e5810b4..5ac08aa70123 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1092,23 +1092,21 @@ bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) return set_memory_rox((unsigned long)hdr, hdr->size >> PAGE_SHIFT); } -int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap, - enum skb_drop_reason *reason); +enum skb_drop_reason +sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); static inline int sk_filter(struct sock *sk, struct sk_buff *skb) { - enum skb_drop_reason ignore_reason; + enum skb_drop_reason drop_reason; - return sk_filter_trim_cap(sk, skb, 1, &ignore_reason); + drop_reason = sk_filter_trim_cap(sk, skb, 1); + return drop_reason ? -EPERM : 0; } static inline enum skb_drop_reason sk_filter_reason(struct sock *sk, struct sk_buff *skb) { - enum skb_drop_reason drop_reason; - - sk_filter_trim_cap(sk, skb, 1, &drop_reason); - return drop_reason; + return sk_filter_trim_cap(sk, skb, 1); } struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err); diff --git a/include/net/tcp.h b/include/net/tcp.h index 098e52269a04..49f45bcff917 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1687,10 +1687,8 @@ static inline enum skb_drop_reason tcp_filter(struct sock *sk, struct sk_buff *skb) { const struct tcphdr *th = (const struct tcphdr *)skb->data; - enum skb_drop_reason reason; - sk_filter_trim_cap(sk, skb, __tcp_hdrlen(th), &reason); - return reason; + return sk_filter_trim_cap(sk, skb, __tcp_hdrlen(th)); } void tcp_set_state(struct sock *sk, int state); diff --git a/net/core/filter.c b/net/core/filter.c index 5569d83b8be0..bf9c37b27646 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -121,20 +121,20 @@ EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user); * @sk: sock associated with &sk_buff * @skb: buffer to filter * @cap: limit on how short the eBPF program may trim the packet - * @reason: record drop reason * * Run the eBPF program and then cut skb->data to correct size returned by * the program. If pkt_len is 0 we toss packet. If skb->len is smaller * than pkt_len we keep whole skb->data. This is the socket level * wrapper to bpf_prog_run. It returns 0 if the packet should - * be accepted or -EPERM if the packet should be tossed. + * be accepted or a drop_reason if the packet should be tossed. * */ -int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, - unsigned int cap, enum skb_drop_reason *reason) +enum skb_drop_reason +sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap) { - int err; + enum skb_drop_reason drop_reason; struct sk_filter *filter; + int err; /* * If the skb was allocated from pfmemalloc reserves, only @@ -143,21 +143,17 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, */ if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP); - *reason = SKB_DROP_REASON_PFMEMALLOC; - return -ENOMEM; + return SKB_DROP_REASON_PFMEMALLOC; } err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb); - if (err) { - *reason = SKB_DROP_REASON_SOCKET_FILTER; - return err; - } + if (err) + return SKB_DROP_REASON_SOCKET_FILTER; err = security_sock_rcv_skb(sk, skb); - if (err) { - *reason = SKB_DROP_REASON_SECURITY_HOOK; - return err; - } + if (err) + return SKB_DROP_REASON_SECURITY_HOOK; + drop_reason = 0; rcu_read_lock(); filter = rcu_dereference(sk->sk_filter); if (filter) { @@ -168,11 +164,12 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, pkt_len = bpf_prog_run_save_cb(filter->prog, skb); skb->sk = save_sk; err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM; + if (err) + drop_reason = SKB_DROP_REASON_SOCKET_FILTER; } rcu_read_unlock(); - *reason = err ? SKB_DROP_REASON_SOCKET_FILTER : 0; - return err; + return drop_reason; } EXPORT_SYMBOL(sk_filter_trim_cap); diff --git a/net/core/sock.c b/net/core/sock.c index 1ffcb15d0fc5..367fd7bad4ac 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -544,11 +544,12 @@ EXPORT_SYMBOL(sock_queue_rcv_skb_reason); int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested, unsigned int trim_cap, bool refcounted) { - enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; + enum skb_drop_reason reason; int rc = NET_RX_SUCCESS; int err; - if (sk_filter_trim_cap(sk, skb, trim_cap, &reason)) + reason = sk_filter_trim_cap(sk, skb, trim_cap); + if (reason) goto discard_and_relse; skb->dev = NULL; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ab415de32443..2fddc7b6b717 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2392,7 +2392,8 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) udp_lib_checksum_complete(skb)) goto csum_error; - if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr), &drop_reason)) + drop_reason = sk_filter_trim_cap(sk, skb, sizeof(struct udphdr)); + if (drop_reason) goto drop; udp_csum_pull_header(skb); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d7cf4c9508b2..3fac9cb47ae0 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -853,7 +853,8 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) udp_lib_checksum_complete(skb)) goto csum_error; - if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr), &drop_reason)) + drop_reason = sk_filter_trim_cap(sk, skb, sizeof(struct udphdr)); + if (drop_reason) goto drop; udp_csum_pull_header(skb); diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c index 0276b393f0e5..3aff3c2d45a9 100644 --- a/net/rose/rose_in.c +++ b/net/rose/rose_in.c @@ -101,7 +101,6 @@ static int rose_state2_machine(struct sock *sk, struct sk_buff *skb, int framety */ static int rose_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype, int ns, int nr, int q, int d, int m) { - enum skb_drop_reason dr; /* ignored */ struct rose_sock *rose = rose_sk(sk); int queued = 0; @@ -163,7 +162,7 @@ static int rose_state3_machine(struct sock *sk, struct sk_buff *skb, int framety rose_frames_acked(sk, nr); if (ns == rose->vr) { rose_start_idletimer(sk); - if (!sk_filter_trim_cap(sk, skb, ROSE_MIN_LEN, &dr) && + if (!sk_filter_trim_cap(sk, skb, ROSE_MIN_LEN) && __sock_queue_rcv_skb(sk, skb) == 0) { rose->vr = (rose->vr + 1) % ROSE_MODULUS; queued = 1; From d114bfdc9b76bf93b881e195b7ec957c14227bab Mon Sep 17 00:00:00 2001 From: Norbert Szetei Date: Thu, 9 Apr 2026 18:34:12 +0200 Subject: [PATCH 1473/1561] vsock: fix buffer size clamping order In vsock_update_buffer_size(), the buffer size was being clamped to the maximum first, and then to the minimum. If a user sets a minimum buffer size larger than the maximum, the minimum check overrides the maximum check, inverting the constraint. This breaks the intended socket memory boundaries by allowing the vsk->buffer_size to grow beyond the configured vsk->buffer_max_size. Fix this by checking the minimum first, and then the maximum. This ensures the buffer size never exceeds the buffer_max_size. Fixes: b9f2b0ffde0c ("vsock: handle buffer_size sockopts in the core") Suggested-by: Stefano Garzarella Signed-off-by: Norbert Szetei Reviewed-by: Stefano Garzarella Link: https://patch.msgid.link/180118C5-8BCF-4A63-A305-4EE53A34AB9C@doyensec.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/af_vsock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index d912ed2f012a..08f4dfb9782c 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1951,12 +1951,12 @@ static void vsock_update_buffer_size(struct vsock_sock *vsk, const struct vsock_transport *transport, u64 val) { - if (val > vsk->buffer_max_size) - val = vsk->buffer_max_size; - if (val < vsk->buffer_min_size) val = vsk->buffer_min_size; + if (val > vsk->buffer_max_size) + val = vsk->buffer_max_size; + if (val != vsk->buffer_size && transport && transport->notify_buffer_size) transport->notify_buffer_size(vsk, &val); From 9994ad4df82d64e57135c0f0906897685f5a9e87 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 9 Apr 2026 23:28:51 +0300 Subject: [PATCH 1474/1561] net/mlx5e: Fix features not applied during netdev registration mlx5e_fix_features() returns early when the netdevice is not present. This is correct during profile transitions where priv is cleared, but it also incorrectly blocks feature fixups during register_netdev(), when the device is also not yet present. It is not trivial to distinguish between both cases as we cannot use priv to carry state, and in both cases reg_state == NETREG_REGISTERED. Force a netdev features update after register_netdev() completes, where the device is present and fix_features() can actually work. This is not a pretty solution, as it results in an additional features update call (register_netdevice() already calls __netdev_update_features() internally), but it is the simplest, cleanest, and most robust way I found to fix this issue after multiple attempts. This fixes an issue on systems where CQE compression is enabled by default, RXHASH remains enabled after registration despite the two features being mutually exclusive. Fixes: ab4b01bfdaa6 ("net/mlx5e: Verify dev is present for fix features ndo") Signed-off-by: Gal Pressman Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260409202852.158059-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b6c12460b54a..0b8b44bbcb9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -6756,6 +6756,14 @@ static int _mlx5e_probe(struct auxiliary_device *adev) goto err_resume; } + /* mlx5e_fix_features() returns early when the device is not present + * to avoid dereferencing cleared priv during profile changes. + * This also causes it to be a no-op during register_netdev(), where + * the device is not yet present. + * Trigger an additional features update that will actually work. + */ + mlx5e_update_features(netdev); + mlx5e_dcbnl_init_app(priv); mlx5_core_uplink_netdev_set(mdev, netdev); mlx5e_params_print_info(mdev, &priv->channels.params); From edccdd1eb94712da97a6ce71123ec27890add754 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 9 Apr 2026 23:28:52 +0300 Subject: [PATCH 1475/1561] net/mlx5e: IPsec, fix ASO poll timeout with read_poll_timeout_atomic() The do-while poll loop uses jiffies for its timeout: expires = jiffies + msecs_to_jiffies(10); jiffies is sampled at an arbitrary point within the current tick, so the first partial tick contributes anywhere from a full tick down to nearly zero real time. For small msecs_to_jiffies() results this is significant, the effective poll window can be much shorter than the requested 10ms, and in the worst case the loop exits after a single iteration (e.g., when HZ=100), well before the device has delivered the CQE. Replace the loop with read_poll_timeout_atomic(), which counts elapsed time via udelay() accounting rather than jiffies, guaranteeing the full poll window regardless of HZ. Additionally, read_poll_timeout_atomic() executes the poll operation one more time after the timeout has expired, giving the CQE a final chance to be detected. The old do-while loop could exit without a final poll if the timeout expired during the udelay() between iterations. Fixes: 76e463f6508b ("net/mlx5e: Overcome slow response for first IPsec ASO WQE") Signed-off-by: Gal Pressman Reviewed-by: Jianbo Liu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260409202852.158059-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en_accel/ipsec_offload.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 05faad5083d9..145677ce9640 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. */ +#include + #include "mlx5_core.h" #include "en.h" #include "ipsec.h" @@ -592,7 +594,6 @@ int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5_wqe_aso_ctrl_seg *ctrl; struct mlx5e_hw_objs *res; struct mlx5_aso_wqe *wqe; - unsigned long expires; u8 ds_cnt; int ret; @@ -614,13 +615,8 @@ int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry, mlx5e_ipsec_aso_copy(ctrl, data); mlx5_aso_post_wqe(aso->aso, false, &wqe->ctrl); - expires = jiffies + msecs_to_jiffies(10); - do { - ret = mlx5_aso_poll_cq(aso->aso, false); - if (ret) - /* We are in atomic context */ - udelay(10); - } while (ret && time_is_after_jiffies(expires)); + read_poll_timeout_atomic(mlx5_aso_poll_cq, ret, !ret, 10, + 10 * USEC_PER_MSEC, false, aso->aso, false); if (!ret) memcpy(sa_entry->ctx, aso->ctx, MLX5_ST_SZ_BYTES(ipsec_aso)); spin_unlock_bh(&aso->lock); From 105369d627b946f6a05f25e9c399167b1674d4bc Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Fri, 10 Apr 2026 13:49:49 +0800 Subject: [PATCH 1476/1561] pppox: remove sk_pppox() helper The sk member can be directly accessed from struct pppox_sock without relying on type casting. Remove the sk_pppox() helper and update all call sites to use po->sk directly. Signed-off-by: Qingfang Deng Link: https://patch.msgid.link/20260410054954.114031-1-qingfang.deng@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ppp/pppoe.c | 10 +++++----- drivers/net/ppp/pptp.c | 6 +++--- include/linux/if_pppox.h | 5 ----- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 1ac61c273b28..d546a7af0d54 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -231,7 +231,7 @@ static inline struct pppox_sock *get_item(struct pppoe_net *pn, __be16 sid, struct pppox_sock *po; po = __get_item(pn, sid, addr, ifindex); - if (po && !refcount_inc_not_zero(&sk_pppox(po)->sk_refcnt)) + if (po && !refcount_inc_not_zero(&po->sk.sk_refcnt)) po = NULL; return po; @@ -273,7 +273,7 @@ static void pppoe_flush_dev(struct net_device *dev) if (!po) break; - sk = sk_pppox(po); + sk = &po->sk; /* We always grab the socket lock, followed by the * hash_lock, in that order. Since we should hold the @@ -413,7 +413,7 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev, if (!po) goto drop; - return __sk_receive_skb(sk_pppox(po), skb, 0, 1, false); + return __sk_receive_skb(&po->sk, skb, 0, 1, false); drop: kfree_skb(skb); @@ -425,7 +425,7 @@ static void pppoe_unbind_sock_work(struct work_struct *work) { struct pppox_sock *po = container_of(work, struct pppox_sock, proto.pppoe.padt_work); - struct sock *sk = sk_pppox(po); + struct sock *sk = &po->sk; lock_sock(sk); if (po->pppoe_dev) { @@ -469,7 +469,7 @@ static int pppoe_disc_rcv(struct sk_buff *skb, struct net_device *dev, po = get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex); if (po) if (!schedule_work(&po->proto.pppoe.padt_work)) - sock_put(sk_pppox(po)); + sock_put(&po->sk); abort: kfree_skb(skb); diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index b18acd810561..cc8c102122d8 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -62,7 +62,7 @@ static struct pppox_sock *lookup_chan(u16 call_id, __be32 s_addr) if (opt->dst_addr.sin_addr.s_addr != s_addr) sock = NULL; else - sock_hold(sk_pppox(sock)); + sock_hold(&sock->sk); } rcu_read_unlock(); @@ -164,7 +164,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) struct iphdr *iph; int max_headroom; - if (sk_pppox(po)->sk_state & PPPOX_DEAD) + if (po->sk.sk_state & PPPOX_DEAD) goto tx_drop; rt = pptp_route_output(po, &fl4); @@ -375,7 +375,7 @@ static int pptp_rcv(struct sk_buff *skb) if (po) { skb_dst_drop(skb); nf_reset_ct(skb); - return sk_receive_skb(sk_pppox(po), skb, 0); + return sk_receive_skb(&po->sk, skb, 0); } drop: kfree_skb(skb); diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 8bbf676c2a85..636772693f9a 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -57,11 +57,6 @@ static inline struct pppox_sock *pppox_sk(struct sock *sk) return (struct pppox_sock *)sk; } -static inline struct sock *sk_pppox(struct pppox_sock *po) -{ - return (struct sock *)po; -} - struct module; struct pppox_proto { From 6bc78039a77a46d89df987813fbafe333cd81367 Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Fri, 10 Apr 2026 13:49:50 +0800 Subject: [PATCH 1477/1561] pppox: convert pppox_sk() to use container_of() Use container_of() macro instead of direct pointer casting to get the pppox_sock from a sock pointer. Signed-off-by: Qingfang Deng Link: https://patch.msgid.link/20260410054954.114031-2-qingfang.deng@linux.dev Signed-off-by: Jakub Kicinski --- include/linux/if_pppox.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 636772693f9a..594d6dc3f4c9 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -54,7 +54,7 @@ struct pppox_sock { static inline struct pppox_sock *pppox_sk(struct sock *sk) { - return (struct pppox_sock *)sk; + return container_of(sk, struct pppox_sock, sk); } struct module; From 2dddb34dd0d07b01fa770eca89480a4da4f13153 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Fri, 10 Apr 2026 03:57:52 +0100 Subject: [PATCH 1478/1561] net: ethernet: mtk_eth_soc: initialize PPE per-tag-layer MTU registers The PPE enforces output frame size limits via per-tag-layer VLAN_MTU registers that the driver never initializes. The hardware defaults do not account for PPPoE overhead, causing the PPE to punt encapsulated frames back to the CPU instead of forwarding them. Initialize the registers at PPE start and on MTU changes using the maximum GMAC MTU. This is a conservative approximation -- the actual per-PPE requirement depends on egress path, but using the global maximum ensures the limits are never too small. Fixes: ba37b7caf1ed2 ("net: ethernet: mtk_eth_soc: add support for initializing the PPE") Signed-off-by: Daniel Golle Link: https://patch.msgid.link/ec995ab8ce8be423267a1cc093147a74d2eb9d82.1775789829.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 22 ++++++++++++++- drivers/net/ethernet/mediatek/mtk_ppe.c | 30 +++++++++++++++++++++ drivers/net/ethernet/mediatek/mtk_ppe.h | 1 + 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index ddc321a02fda..796f79088f36 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -3566,12 +3566,23 @@ found: return NOTIFY_DONE; } +static int mtk_max_gmac_mtu(struct mtk_eth *eth) +{ + int i, max_mtu = ETH_DATA_LEN; + + for (i = 0; i < ARRAY_SIZE(eth->netdev); i++) + if (eth->netdev[i] && eth->netdev[i]->mtu > max_mtu) + max_mtu = eth->netdev[i]->mtu; + + return max_mtu; +} + static int mtk_open(struct net_device *dev) { struct mtk_mac *mac = netdev_priv(dev); struct mtk_eth *eth = mac->hw; struct mtk_mac *target_mac; - int i, err, ppe_num; + int i, err, ppe_num, mtu; ppe_num = eth->soc->ppe_num; @@ -3618,6 +3629,10 @@ static int mtk_open(struct net_device *dev) mtk_gdm_config(eth, target_mac->id, gdm_config); } + mtu = mtk_max_gmac_mtu(eth); + for (i = 0; i < ARRAY_SIZE(eth->ppe); i++) + mtk_ppe_update_mtu(eth->ppe[i], mtu); + napi_enable(ð->tx_napi); napi_enable(ð->rx_napi); mtk_tx_irq_enable(eth, MTK_TX_DONE_INT); @@ -4311,6 +4326,7 @@ static int mtk_change_mtu(struct net_device *dev, int new_mtu) int length = new_mtu + MTK_RX_ETH_HLEN; struct mtk_mac *mac = netdev_priv(dev); struct mtk_eth *eth = mac->hw; + int max_mtu, i; if (rcu_access_pointer(eth->prog) && length > MTK_PP_MAX_BUF_SIZE) { @@ -4321,6 +4337,10 @@ static int mtk_change_mtu(struct net_device *dev, int new_mtu) mtk_set_mcr_max_rx(mac, length); WRITE_ONCE(dev->mtu, new_mtu); + max_mtu = mtk_max_gmac_mtu(eth); + for (i = 0; i < ARRAY_SIZE(eth->ppe); i++) + mtk_ppe_update_mtu(eth->ppe[i], max_mtu); + return 0; } diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c index 75f7728fc796..18279e2a7022 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe.c @@ -973,6 +973,36 @@ static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe) } } +void mtk_ppe_update_mtu(struct mtk_ppe *ppe, int mtu) +{ + int base; + u32 val; + + if (!ppe) + return; + + /* The PPE checks output frame size against per-tag-layer MTU limits, + * treating PPPoE and DSA tags just like 802.1Q VLAN tags. The Linux + * device MTU already accounts for PPPoE (PPPOE_SES_HLEN) and DSA tag + * overhead, but 802.1Q VLAN tags are handled transparently without + * being reflected by the lower device MTU being increased by 4. + * Use the maximum MTU across all GMAC interfaces so that PPE output + * frame limits are sufficiently high regardless of which port a flow + * egresses through. + */ + base = ETH_HLEN + mtu; + + val = FIELD_PREP(MTK_PPE_VLAN_MTU0_NONE, base) | + FIELD_PREP(MTK_PPE_VLAN_MTU0_1TAG, base + VLAN_HLEN); + ppe_w32(ppe, MTK_PPE_VLAN_MTU0, val); + + val = FIELD_PREP(MTK_PPE_VLAN_MTU1_2TAG, + base + 2 * VLAN_HLEN) | + FIELD_PREP(MTK_PPE_VLAN_MTU1_3TAG, + base + 3 * VLAN_HLEN); + ppe_w32(ppe, MTK_PPE_VLAN_MTU1, val); +} + void mtk_ppe_start(struct mtk_ppe *ppe) { u32 val; diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h index 223f709e2704..ba85e39a155b 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.h +++ b/drivers/net/ethernet/mediatek/mtk_ppe.h @@ -346,6 +346,7 @@ struct mtk_ppe { struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base, int index); void mtk_ppe_deinit(struct mtk_eth *eth); +void mtk_ppe_update_mtu(struct mtk_ppe *ppe, int mtu); void mtk_ppe_start(struct mtk_ppe *ppe); int mtk_ppe_stop(struct mtk_ppe *ppe); int mtk_ppe_prepare_reset(struct mtk_ppe *ppe); From d471d70cc964c3fe5c5e7765ab07b5f2f70f276e Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Fri, 10 Apr 2026 09:40:07 +0200 Subject: [PATCH 1479/1561] dt-bindings: net: qcom,ipa: add Milos compatible Add support for the Milos SoC, which uses IPA v5.2. Acked-by: Krzysztof Kozlowski Signed-off-by: Luca Weiss Link: https://patch.msgid.link/20260410-ipa-v5-2-v2-1-778422a05060@fairphone.com Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/qcom,ipa.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml index e4bb627e1757..fdeaa81b9645 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml @@ -44,6 +44,7 @@ properties: compatible: oneOf: - enum: + - qcom,milos-ipa - qcom,msm8998-ipa - qcom,sc7180-ipa - qcom,sc7280-ipa From 4bf38bac1b2e2586a14529053cf56346db8a0032 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Fri, 10 Apr 2026 09:40:08 +0200 Subject: [PATCH 1480/1561] net: ipa: add IPA v5.2 configuration data Add the configuration data required for IPA v5.2, which is used in the Qualcomm Milos SoC. Reviewed-by: Simon Horman Signed-off-by: Luca Weiss Link: https://patch.msgid.link/20260410-ipa-v5-2-v2-2-778422a05060@fairphone.com Signed-off-by: Jakub Kicinski --- drivers/net/ipa/Makefile | 2 +- drivers/net/ipa/data/ipa_data-v5.2.c | 452 +++++++++++++++++++++++++++ drivers/net/ipa/gsi_reg.c | 1 + drivers/net/ipa/ipa_data.h | 1 + drivers/net/ipa/ipa_main.c | 4 + drivers/net/ipa/ipa_reg.c | 1 + drivers/net/ipa/ipa_sysfs.c | 2 + drivers/net/ipa/ipa_version.h | 2 + 8 files changed, 464 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ipa/data/ipa_data-v5.2.c diff --git a/drivers/net/ipa/Makefile b/drivers/net/ipa/Makefile index d3abb38633e0..e148ec3c1a10 100644 --- a/drivers/net/ipa/Makefile +++ b/drivers/net/ipa/Makefile @@ -7,7 +7,7 @@ IPA_REG_VERSIONS := 3.1 3.5.1 4.2 4.5 4.7 4.9 4.11 5.0 5.5 # Some IPA versions can reuse another set of GSI register definitions. GSI_REG_VERSIONS := 3.1 3.5.1 4.0 4.5 4.9 4.11 5.0 -IPA_DATA_VERSIONS := 3.1 3.5.1 4.2 4.5 4.7 4.9 4.11 5.0 5.5 +IPA_DATA_VERSIONS := 3.1 3.5.1 4.2 4.5 4.7 4.9 4.11 5.0 5.2 5.5 obj-$(CONFIG_QCOM_IPA) += ipa.o diff --git a/drivers/net/ipa/data/ipa_data-v5.2.c b/drivers/net/ipa/data/ipa_data-v5.2.c new file mode 100644 index 000000000000..c56c4f1836ae --- /dev/null +++ b/drivers/net/ipa/data/ipa_data-v5.2.c @@ -0,0 +1,452 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023-2024 Linaro Ltd. + * Copyright (c) 2026, Luca Weiss + */ + +#include +#include + +#include "../ipa_data.h" +#include "../ipa_endpoint.h" +#include "../ipa_mem.h" +#include "../ipa_version.h" + +/** enum ipa_resource_type - IPA resource types for an SoC having IPA v5.2 */ +enum ipa_resource_type { + /* Source resource types; first must have value 0 */ + IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS = 0, + IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS, + IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF, + IPA_RESOURCE_TYPE_SRC_HPS_DMARS, + IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES, + + /* Destination resource types; first must have value 0 */ + IPA_RESOURCE_TYPE_DST_DATA_SECTORS = 0, + IPA_RESOURCE_TYPE_DST_DPS_DMARS, + IPA_RESOURCE_TYPE_DST_ULSO_SEGMENTS, +}; + +/* Resource groups used for an SoC having IPA v5.2 */ +enum ipa_rsrc_group_id { + /* Source resource group identifiers */ + IPA_RSRC_GROUP_SRC_UL = 0, + IPA_RSRC_GROUP_SRC_DL, + IPA_RSRC_GROUP_SRC_URLLC, + IPA_RSRC_GROUP_SRC_COUNT, /* Last in set; not a source group */ + + /* Destination resource group identifiers */ + IPA_RSRC_GROUP_DST_UL = 0, + IPA_RSRC_GROUP_DST_DL, + IPA_RSRC_GROUP_DST_UNUSED_1, + IPA_RSRC_GROUP_DST_DRB_IP, + IPA_RSRC_GROUP_DST_COUNT, /* Last; not a destination group */ +}; + +/* QSB configuration data for an SoC having IPA v5.2 */ +static const struct ipa_qsb_data ipa_qsb_data[] = { + [IPA_QSB_MASTER_DDR] = { + .max_writes = 13, + .max_reads = 13, + .max_reads_beats = 0, + }, +}; + +/* Endpoint configuration data for an SoC having IPA v5.2 */ +static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = { + [IPA_ENDPOINT_AP_COMMAND_TX] = { + .ee_id = GSI_EE_AP, + .channel_id = 6, + .endpoint_id = 9, + .toward_ipa = true, + .channel = { + .tre_count = 256, + .event_count = 256, + .tlv_count = 20, + }, + .endpoint = { + .config = { + .resource_group = IPA_RSRC_GROUP_SRC_UL, + .dma_mode = true, + .dma_endpoint = IPA_ENDPOINT_AP_LAN_RX, + .tx = { + .seq_type = IPA_SEQ_DMA, + }, + }, + }, + }, + [IPA_ENDPOINT_AP_LAN_RX] = { + .ee_id = GSI_EE_AP, + .channel_id = 7, + .endpoint_id = 11, + .toward_ipa = false, + .channel = { + .tre_count = 256, + .event_count = 256, + .tlv_count = 9, + }, + .endpoint = { + .config = { + .resource_group = IPA_RSRC_GROUP_DST_UL, + .aggregation = true, + .status_enable = true, + .rx = { + .buffer_size = 8192, + .pad_align = ilog2(sizeof(u32)), + .aggr_time_limit = 500, + }, + }, + }, + }, + [IPA_ENDPOINT_AP_MODEM_TX] = { + .ee_id = GSI_EE_AP, + .channel_id = 5, + .endpoint_id = 2, + .toward_ipa = true, + .channel = { + .tre_count = 512, + .event_count = 512, + .tlv_count = 25, + }, + .endpoint = { + .filter_support = true, + .config = { + .resource_group = IPA_RSRC_GROUP_SRC_UL, + .checksum = true, + .qmap = true, + .status_enable = true, + .tx = { + .seq_type = IPA_SEQ_2_PASS_SKIP_LAST_UC, + .status_endpoint = + IPA_ENDPOINT_MODEM_AP_RX, + }, + }, + }, + }, + [IPA_ENDPOINT_AP_MODEM_RX] = { + .ee_id = GSI_EE_AP, + .channel_id = 9, + .endpoint_id = 18, + .toward_ipa = false, + .channel = { + .tre_count = 256, + .event_count = 256, + .tlv_count = 9, + }, + .endpoint = { + .config = { + .resource_group = IPA_RSRC_GROUP_DST_DL, + .checksum = true, + .qmap = true, + .aggregation = true, + .rx = { + .buffer_size = 8192, + .aggr_time_limit = 500, + .aggr_close_eof = true, + }, + }, + }, + }, + [IPA_ENDPOINT_MODEM_AP_TX] = { + .ee_id = GSI_EE_MODEM, + .channel_id = 0, + .endpoint_id = 7, + .toward_ipa = true, + .endpoint = { + .filter_support = true, + }, + }, + [IPA_ENDPOINT_MODEM_AP_RX] = { + .ee_id = GSI_EE_MODEM, + .channel_id = 7, + .endpoint_id = 16, + .toward_ipa = false, + }, + [IPA_ENDPOINT_MODEM_DL_NLO_TX] = { + .ee_id = GSI_EE_MODEM, + .channel_id = 2, + .endpoint_id = 10, + .toward_ipa = true, + .endpoint = { + .filter_support = true, + }, + }, +}; + +/* Source resource configuration data for an SoC having IPA v5.2 */ +static const struct ipa_resource ipa_resource_src[] = { + [IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS] = { + .limits[IPA_RSRC_GROUP_SRC_UL] = { + .min = 1, .max = 7, + }, + .limits[IPA_RSRC_GROUP_SRC_DL] = { + .min = 1, .max = 7, + }, + .limits[IPA_RSRC_GROUP_SRC_URLLC] = { + .min = 0, .max = 5, + }, + }, + [IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS] = { + .limits[IPA_RSRC_GROUP_SRC_UL] = { + .min = 8, .max = 8, + }, + .limits[IPA_RSRC_GROUP_SRC_DL] = { + .min = 8, .max = 8, + }, + .limits[IPA_RSRC_GROUP_SRC_URLLC] = { + .min = 8, .max = 8, + }, + }, + [IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF] = { + .limits[IPA_RSRC_GROUP_SRC_UL] = { + .min = 10, .max = 10, + }, + .limits[IPA_RSRC_GROUP_SRC_DL] = { + .min = 12, .max = 12, + }, + .limits[IPA_RSRC_GROUP_SRC_URLLC] = { + .min = 12, .max = 12, + }, + }, + [IPA_RESOURCE_TYPE_SRC_HPS_DMARS] = { + .limits[IPA_RSRC_GROUP_SRC_UL] = { + .min = 0, .max = 63, + }, + .limits[IPA_RSRC_GROUP_SRC_DL] = { + .min = 0, .max = 63, + }, + .limits[IPA_RSRC_GROUP_SRC_URLLC] = { + .min = 0, .max = 63, + }, + }, + [IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES] = { + .limits[IPA_RSRC_GROUP_SRC_UL] = { + .min = 15, .max = 15, + }, + .limits[IPA_RSRC_GROUP_SRC_DL] = { + .min = 15, .max = 15, + }, + .limits[IPA_RSRC_GROUP_SRC_URLLC] = { + .min = 12, .max = 12, + }, + }, +}; + +/* Destination resource configuration data for an SoC having IPA v5.2 */ +static const struct ipa_resource ipa_resource_dst[] = { + [IPA_RESOURCE_TYPE_DST_DATA_SECTORS] = { + .limits[IPA_RSRC_GROUP_DST_UL] = { + .min = 3, .max = 3, + }, + .limits[IPA_RSRC_GROUP_DST_DL] = { + .min = 3, .max = 3, + }, + .limits[IPA_RSRC_GROUP_DST_DRB_IP] = { + .min = 23, .max = 23, + }, + }, + [IPA_RESOURCE_TYPE_DST_DPS_DMARS] = { + .limits[IPA_RSRC_GROUP_DST_UL] = { + .min = 1, .max = 2, + }, + .limits[IPA_RSRC_GROUP_DST_DL] = { + .min = 1, .max = 2, + }, + }, + [IPA_RESOURCE_TYPE_DST_ULSO_SEGMENTS] = { + .limits[IPA_RSRC_GROUP_DST_UL] = { + .min = 1, .max = 63, + }, + .limits[IPA_RSRC_GROUP_DST_DL] = { + .min = 1, .max = 63, + }, + }, +}; + +/* Resource configuration data for an SoC having IPA v5.2 */ +static const struct ipa_resource_data ipa_resource_data = { + .rsrc_group_dst_count = IPA_RSRC_GROUP_DST_COUNT, + .rsrc_group_src_count = IPA_RSRC_GROUP_SRC_COUNT, + .resource_src_count = ARRAY_SIZE(ipa_resource_src), + .resource_src = ipa_resource_src, + .resource_dst_count = ARRAY_SIZE(ipa_resource_dst), + .resource_dst = ipa_resource_dst, +}; + +/* IPA-resident memory region data for an SoC having IPA v5.2 */ +static const struct ipa_mem ipa_mem_local_data[] = { + { + .id = IPA_MEM_UC_SHARED, + .offset = 0x0000, + .size = 0x0080, + .canary_count = 0, + }, + { + .id = IPA_MEM_UC_INFO, + .offset = 0x0080, + .size = 0x0200, + .canary_count = 0, + }, + { + .id = IPA_MEM_V4_FILTER_HASHED, + .offset = 0x0288, + .size = 0x0078, + .canary_count = 2, + }, + { + .id = IPA_MEM_V4_FILTER, + .offset = 0x0308, + .size = 0x0078, + .canary_count = 2, + }, + { + .id = IPA_MEM_V6_FILTER_HASHED, + .offset = 0x0388, + .size = 0x0078, + .canary_count = 2, + }, + { + .id = IPA_MEM_V6_FILTER, + .offset = 0x0408, + .size = 0x0078, + .canary_count = 2, + }, + { + .id = IPA_MEM_V4_ROUTE_HASHED, + .offset = 0x0488, + .size = 0x0098, + .canary_count = 2, + }, + { + .id = IPA_MEM_V4_ROUTE, + .offset = 0x0528, + .size = 0x0098, + .canary_count = 2, + }, + { + .id = IPA_MEM_V6_ROUTE_HASHED, + .offset = 0x05c8, + .size = 0x0098, + .canary_count = 2, + }, + { + .id = IPA_MEM_V6_ROUTE, + .offset = 0x0668, + .size = 0x0098, + .canary_count = 2, + }, + { + .id = IPA_MEM_MODEM_HEADER, + .offset = 0x0708, + .size = 0x0240, + .canary_count = 2, + }, + { + .id = IPA_MEM_AP_HEADER, + .offset = 0x0948, + .size = 0x01e0, + .canary_count = 0, + }, + { + .id = IPA_MEM_MODEM_PROC_CTX, + .offset = 0x0b40, + .size = 0x0b20, + .canary_count = 2, + }, + { + .id = IPA_MEM_AP_PROC_CTX, + .offset = 0x1660, + .size = 0x0200, + .canary_count = 0, + }, + { + .id = IPA_MEM_STATS_QUOTA_MODEM, + .offset = 0x1868, + .size = 0x0060, + .canary_count = 2, + }, + { + .id = IPA_MEM_STATS_QUOTA_AP, + .offset = 0x18c8, + .size = 0x0048, + .canary_count = 0, + }, + { + .id = IPA_MEM_STATS_TETHERING, + .offset = 0x1910, + .size = 0x03c0, + .canary_count = 0, + }, + { + .id = IPA_MEM_STATS_FILTER_ROUTE, + .offset = 0x1cd0, + .size = 0x0ba0, + .canary_count = 0, + }, + { + .id = IPA_MEM_STATS_DROP, + .offset = 0x2870, + .size = 0x0020, + .canary_count = 0, + }, + { + .id = IPA_MEM_MODEM, + .offset = 0x2898, + .size = 0x0d48, + .canary_count = 2, + }, + { + .id = IPA_MEM_NAT_TABLE, + .offset = 0x35e0, + .size = 0x0900, + .canary_count = 0, + }, + { + .id = IPA_MEM_PDN_CONFIG, + .offset = 0x3ee8, + .size = 0x0100, + .canary_count = 2, + }, +}; + +/* Memory configuration data for an SoC having IPA v5.2 */ +static const struct ipa_mem_data ipa_mem_data = { + .local_count = ARRAY_SIZE(ipa_mem_local_data), + .local = ipa_mem_local_data, + .smem_size = 0x0000b000, +}; + +/* Interconnect rates are in 1000 byte/second units */ +static const struct ipa_interconnect_data ipa_interconnect_data[] = { + { + .name = "memory", + .peak_bandwidth = 1300000, /* 1.3 GBps */ + .average_bandwidth = 600000, /* 600 MBps */ + }, + /* Average rate is unused for the next interconnect */ + { + .name = "config", + .peak_bandwidth = 76800, /* 76.8 MBps */ + .average_bandwidth = 0, /* unused */ + }, +}; + +/* Clock and interconnect configuration data for an SoC having IPA v5.2 */ +static const struct ipa_power_data ipa_power_data = { + .core_clock_rate = 120 * 1000 * 1000, /* Hz */ + .interconnect_count = ARRAY_SIZE(ipa_interconnect_data), + .interconnect_data = ipa_interconnect_data, +}; + +/* Configuration data for an SoC having IPA v5.2. */ +const struct ipa_data ipa_data_v5_2 = { + .version = IPA_VERSION_5_2, + .qsb_count = ARRAY_SIZE(ipa_qsb_data), + .qsb_data = ipa_qsb_data, + .modem_route_count = 11, + .endpoint_count = ARRAY_SIZE(ipa_gsi_endpoint_data), + .endpoint_data = ipa_gsi_endpoint_data, + .resource_data = &ipa_resource_data, + .mem_data = &ipa_mem_data, + .power_data = &ipa_power_data, +}; diff --git a/drivers/net/ipa/gsi_reg.c b/drivers/net/ipa/gsi_reg.c index 825598661188..e13cf835a013 100644 --- a/drivers/net/ipa/gsi_reg.c +++ b/drivers/net/ipa/gsi_reg.c @@ -110,6 +110,7 @@ static const struct regs *gsi_regs(struct gsi *gsi) return &gsi_regs_v4_11; case IPA_VERSION_5_0: + case IPA_VERSION_5_2: case IPA_VERSION_5_5: return &gsi_regs_v5_0; diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h index f3bdc64cef05..3eb9dc2ce339 100644 --- a/drivers/net/ipa/ipa_data.h +++ b/drivers/net/ipa/ipa_data.h @@ -253,6 +253,7 @@ extern const struct ipa_data ipa_data_v4_7; extern const struct ipa_data ipa_data_v4_9; extern const struct ipa_data ipa_data_v4_11; extern const struct ipa_data ipa_data_v5_0; +extern const struct ipa_data ipa_data_v5_2; extern const struct ipa_data ipa_data_v5_5; #endif /* _IPA_DATA_H_ */ diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index edead9c48d1f..8e2b4bf7b14e 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -669,6 +669,10 @@ static const struct of_device_id ipa_match[] = { .compatible = "qcom,sdx65-ipa", .data = &ipa_data_v5_0, }, + { + .compatible = "qcom,milos-ipa", + .data = &ipa_data_v5_2, + }, { .compatible = "qcom,sm8550-ipa", .data = &ipa_data_v5_5, diff --git a/drivers/net/ipa/ipa_reg.c b/drivers/net/ipa/ipa_reg.c index c574f798fdc9..30bd69f4c147 100644 --- a/drivers/net/ipa/ipa_reg.c +++ b/drivers/net/ipa/ipa_reg.c @@ -125,6 +125,7 @@ static const struct regs *ipa_regs(enum ipa_version version) case IPA_VERSION_4_11: return &ipa_regs_v4_11; case IPA_VERSION_5_0: + case IPA_VERSION_5_2: return &ipa_regs_v5_0; case IPA_VERSION_5_5: return &ipa_regs_v5_5; diff --git a/drivers/net/ipa/ipa_sysfs.c b/drivers/net/ipa/ipa_sysfs.c index a53e9e6f6cdf..8b805a9d49e6 100644 --- a/drivers/net/ipa/ipa_sysfs.c +++ b/drivers/net/ipa/ipa_sysfs.c @@ -39,6 +39,8 @@ static const char *ipa_version_string(struct ipa *ipa) return "5.0"; case IPA_VERSION_5_1: return "5.1"; + case IPA_VERSION_5_2: + return "5.2"; case IPA_VERSION_5_5: return "5.5"; default: diff --git a/drivers/net/ipa/ipa_version.h b/drivers/net/ipa/ipa_version.h index 38c47f51a50c..c157c72a5bad 100644 --- a/drivers/net/ipa/ipa_version.h +++ b/drivers/net/ipa/ipa_version.h @@ -23,6 +23,7 @@ * @IPA_VERSION_4_11: IPA version 4.11/GSI version 2.11 (2.1.1) * @IPA_VERSION_5_0: IPA version 5.0/GSI version 3.0 * @IPA_VERSION_5_1: IPA version 5.1/GSI version 3.0 + * @IPA_VERSION_5_2: IPA version 5.2/GSI version 5.2 * @IPA_VERSION_5_5: IPA version 5.5/GSI version 5.5 * @IPA_VERSION_COUNT: Number of defined IPA versions * @@ -43,6 +44,7 @@ enum ipa_version { IPA_VERSION_4_11, IPA_VERSION_5_0, IPA_VERSION_5_1, + IPA_VERSION_5_2, IPA_VERSION_5_5, IPA_VERSION_COUNT, /* Last; not a version */ }; From c0b4382c86e3d92f79b71c9ed55654db520d7b36 Mon Sep 17 00:00:00 2001 From: Konstantin Khorenko Date: Fri, 10 Apr 2026 19:21:49 +0300 Subject: [PATCH 1481/1561] net: fix skb_ext_total_length() BUILD_BUG_ON with CONFIG_GCOV_PROFILE_ALL When CONFIG_GCOV_PROFILE_ALL=y is enabled, the kernel fails to build: In file included from : In function 'skb_extensions_init', inlined from 'skb_init' at net/core/skbuff.c:5214:2: ././include/linux/compiler_types.h:706:45: error: call to '__compiletime_assert_1490' declared with attribute error: BUILD_BUG_ON failed: skb_ext_total_length() > 255 CONFIG_GCOV_PROFILE_ALL adds -fprofile-arcs -ftest-coverage -fno-tree-loop-im to CFLAGS globally. GCC inserts branch profiling counters into the skb_ext_total_length() loop and, combined with -fno-tree-loop-im (which disables loop invariant motion), cannot constant-fold the result. BUILD_BUG_ON requires a compile-time constant and fails. The issue manifests in kernels with 5+ SKB extension types enabled (e.g., after addition of SKB_EXT_CAN, SKB_EXT_PSP). With 4 extensions GCC can still unroll and fold the loop despite GCOV instrumentation; with 5+ it gives up. Mark skb_ext_total_length() with __no_profile to prevent GCOV from inserting counters into this function. Without counters the loop is "clean" and GCC can constant-fold it even with -fno-tree-loop-im active. This allows BUILD_BUG_ON to work correctly while keeping GCOV profiling for the rest of the kernel. This also removes the CONFIG_KCOV_INSTRUMENT_ALL preprocessor guard introduced by d6e5794b06c0. That guard was added as a precaution because KCOV instrumentation was also suspected of inhibiting constant folding. However, KCOV uses -fsanitize-coverage=trace-pc, which inserts lightweight trace callbacks that do not interfere with GCC's constant folding or loop optimization passes. Only GCOV's -fprofile-arcs combined with -fno-tree-loop-im actually prevents the compiler from evaluating the loop at compile time. The guard is therefore unnecessary and can be safely removed. Fixes: 96ea3a1e2d31 ("can: add CAN skb extension infrastructure") Signed-off-by: Konstantin Khorenko Reviewed-by: Thomas Weissschuh Link: https://patch.msgid.link/20260410162150.3105738-2-khorenko@virtuozzo.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c5441154795c..87e7d689ab25 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5118,7 +5118,7 @@ static const u8 skb_ext_type_len[] = { #endif }; -static __always_inline unsigned int skb_ext_total_length(void) +static __always_inline __no_profile unsigned int skb_ext_total_length(void) { unsigned int l = SKB_EXT_CHUNKSIZEOF(struct skb_ext); int i; @@ -5132,9 +5132,7 @@ static __always_inline unsigned int skb_ext_total_length(void) static void skb_extensions_init(void) { BUILD_BUG_ON(SKB_EXT_NUM > 8); -#if !IS_ENABLED(CONFIG_KCOV_INSTRUMENT_ALL) BUILD_BUG_ON(skb_ext_total_length() > 255); -#endif skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache", SKB_EXT_ALIGN_VALUE * skb_ext_total_length(), From 29b1ee8788c5fe03588e06adf0868008305d9b01 Mon Sep 17 00:00:00 2001 From: Konstantin Khorenko Date: Fri, 10 Apr 2026 19:21:50 +0300 Subject: [PATCH 1482/1561] net: add noinline __init __no_profile to skb_extensions_init() for GCOV compatibility With -fprofile-update=atomic in global CFLAGS_GCOV, GCC still cannot constant-fold the skb_ext_total_length() loop when it is inlined into a profiled caller. The existing __no_profile on skb_ext_total_length() itself is insufficient because after __always_inline expansion the code resides in the caller's body, which still carries GCOV instrumentation. Mark skb_extensions_init() with __no_profile so the BUILD_BUG_ON checks can be evaluated at compile time. Also mark it noinline to prevent the compiler from inlining it into skb_init() (which lacks __no_profile), which would re-expose the function body to GCOV instrumentation. Add __init since skb_extensions_init() is only called from __init skb_init(). Previously it was implicitly inlined into the .init.text section; with noinline it would otherwise remain in permanent .text, wasting memory after boot. Build-tested with both CONFIG_GCOV_PROFILE_ALL=y and CONFIG_KCOV_INSTRUMENT_ALL=y. Signed-off-by: Konstantin Khorenko Link: https://patch.msgid.link/20260410162150.3105738-3-khorenko@virtuozzo.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 87e7d689ab25..7dad68e3b518 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5129,7 +5129,7 @@ static __always_inline __no_profile unsigned int skb_ext_total_length(void) return l; } -static void skb_extensions_init(void) +static noinline void __init __no_profile skb_extensions_init(void) { BUILD_BUG_ON(SKB_EXT_NUM > 8); BUILD_BUG_ON(skb_ext_total_length() > 255); From 3faf0ce6e499dfd32e596bcb5bca2c44d64f4cc1 Mon Sep 17 00:00:00 2001 From: Marc Harvey Date: Thu, 9 Apr 2026 02:59:23 +0000 Subject: [PATCH 1483/1561] net: team: Annotate reads and writes for mixed lock accessed values The team_port's "index" and the team's "en_port_count" are read in the hot transmit path, but are only written to when holding the rtnl lock. Use READ_ONCE() for all lockless reads of these values, and use WRITE_ONCE() for all writes. Reviewed-by: Jiri Pirko Signed-off-by: Marc Harvey Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260409-teaming-driver-internal-v7-1-f47e7589685d@google.com Signed-off-by: Paolo Abeni --- drivers/net/team/team_core.c | 11 ++++++----- drivers/net/team/team_mode_random.c | 2 +- include/linux/if_team.h | 4 ++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index 566a5d102c23..becd066279a6 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -938,7 +938,8 @@ static void team_port_enable(struct team *team, { if (team_port_enabled(port)) return; - port->index = team->en_port_count++; + WRITE_ONCE(port->index, team->en_port_count); + WRITE_ONCE(team->en_port_count, team->en_port_count + 1); hlist_add_head_rcu(&port->hlist, team_port_index_hash(team, port->index)); team_adjust_ops(team); @@ -958,7 +959,7 @@ static void __reconstruct_port_hlist(struct team *team, int rm_index) for (i = rm_index + 1; i < team->en_port_count; i++) { port = team_get_port_by_index(team, i); hlist_del_rcu(&port->hlist); - port->index--; + WRITE_ONCE(port->index, port->index - 1); hlist_add_head_rcu(&port->hlist, team_port_index_hash(team, port->index)); } @@ -973,8 +974,8 @@ static void team_port_disable(struct team *team, team->ops.port_disabled(team, port); hlist_del_rcu(&port->hlist); __reconstruct_port_hlist(team, port->index); - port->index = -1; - team->en_port_count--; + WRITE_ONCE(port->index, -1); + WRITE_ONCE(team->en_port_count, team->en_port_count - 1); team_queue_override_port_del(team, port); team_adjust_ops(team); team_lower_state_changed(port); @@ -1245,7 +1246,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev, netif_addr_unlock_bh(dev); } - port->index = -1; + WRITE_ONCE(port->index, -1); list_add_tail_rcu(&port->list, &team->port_list); team_port_enable(team, port); netdev_compute_master_upper_features(dev, true); diff --git a/drivers/net/team/team_mode_random.c b/drivers/net/team/team_mode_random.c index 53d0ce34b8ce..169a7bc865b2 100644 --- a/drivers/net/team/team_mode_random.c +++ b/drivers/net/team/team_mode_random.c @@ -16,7 +16,7 @@ static bool rnd_transmit(struct team *team, struct sk_buff *skb) struct team_port *port; int port_index; - port_index = get_random_u32_below(team->en_port_count); + port_index = get_random_u32_below(READ_ONCE(team->en_port_count)); port = team_get_port_by_index_rcu(team, port_index); if (unlikely(!port)) goto drop; diff --git a/include/linux/if_team.h b/include/linux/if_team.h index ccb5327de26d..06f4d7400c1e 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -77,7 +77,7 @@ static inline struct team_port *team_port_get_rcu(const struct net_device *dev) static inline bool team_port_enabled(struct team_port *port) { - return port->index != -1; + return READ_ONCE(port->index) != -1; } static inline bool team_port_txable(struct team_port *port) @@ -272,7 +272,7 @@ static inline struct team_port *team_get_port_by_index_rcu(struct team *team, struct hlist_head *head = team_port_index_hash(team, port_index); hlist_for_each_entry_rcu(port, head, hlist) - if (port->index == port_index) + if (READ_ONCE(port->index) == port_index) return port; return NULL; } From 014f249121d73909528df320818fba7693d0ec92 Mon Sep 17 00:00:00 2001 From: Marc Harvey Date: Thu, 9 Apr 2026 02:59:24 +0000 Subject: [PATCH 1484/1561] net: team: Remove unused team_mode_op, port_enabled This team_mode_op wasn't used by any of the team modes, so remove it. Reviewed-by: Jiri Pirko Signed-off-by: Marc Harvey Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260409-teaming-driver-internal-v7-2-f47e7589685d@google.com Signed-off-by: Paolo Abeni --- drivers/net/team/team_core.c | 2 -- include/linux/if_team.h | 1 - 2 files changed, 3 deletions(-) diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index becd066279a6..e54bd21bd068 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -944,8 +944,6 @@ static void team_port_enable(struct team *team, team_port_index_hash(team, port->index)); team_adjust_ops(team); team_queue_override_port_add(team, port); - if (team->ops.port_enabled) - team->ops.port_enabled(team, port); team_notify_peers(team); team_mcast_rejoin(team); team_lower_state_changed(port); diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 06f4d7400c1e..a761f5282bcf 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -121,7 +121,6 @@ struct team_mode_ops { int (*port_enter)(struct team *team, struct team_port *port); void (*port_leave)(struct team *team, struct team_port *port); void (*port_change_dev_addr)(struct team *team, struct team_port *port); - void (*port_enabled)(struct team *team, struct team_port *port); void (*port_disabled)(struct team *team, struct team_port *port); }; From cfa477df2cc62ba53cb936669886361152b594a7 Mon Sep 17 00:00:00 2001 From: Marc Harvey Date: Thu, 9 Apr 2026 02:59:25 +0000 Subject: [PATCH 1485/1561] net: team: Rename port_disabled team mode op to port_tx_disabled This team mode op is only used by the load balance mode, and it only uses it in the tx path. Reviewed-by: Jiri Pirko Signed-off-by: Marc Harvey Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260409-teaming-driver-internal-v7-3-f47e7589685d@google.com Signed-off-by: Paolo Abeni --- drivers/net/team/team_core.c | 4 ++-- drivers/net/team/team_mode_loadbalance.c | 4 ++-- include/linux/if_team.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index e54bd21bd068..2ce31999c99f 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -968,8 +968,8 @@ static void team_port_disable(struct team *team, { if (!team_port_enabled(port)) return; - if (team->ops.port_disabled) - team->ops.port_disabled(team, port); + if (team->ops.port_tx_disabled) + team->ops.port_tx_disabled(team, port); hlist_del_rcu(&port->hlist); __reconstruct_port_hlist(team, port->index); WRITE_ONCE(port->index, -1); diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index 684954c2a8de..840f409d250b 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -655,7 +655,7 @@ static void lb_port_leave(struct team *team, struct team_port *port) free_percpu(lb_port_priv->pcpu_stats); } -static void lb_port_disabled(struct team *team, struct team_port *port) +static void lb_port_tx_disabled(struct team *team, struct team_port *port) { lb_tx_hash_to_port_mapping_null_port(team, port); } @@ -665,7 +665,7 @@ static const struct team_mode_ops lb_mode_ops = { .exit = lb_exit, .port_enter = lb_port_enter, .port_leave = lb_port_leave, - .port_disabled = lb_port_disabled, + .port_tx_disabled = lb_port_tx_disabled, .receive = lb_receive, .transmit = lb_transmit, }; diff --git a/include/linux/if_team.h b/include/linux/if_team.h index a761f5282bcf..740cb3100dfc 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -121,7 +121,7 @@ struct team_mode_ops { int (*port_enter)(struct team *team, struct team_port *port); void (*port_leave)(struct team *team, struct team_port *port); void (*port_change_dev_addr)(struct team *team, struct team_port *port); - void (*port_disabled)(struct team *team, struct team_port *port); + void (*port_tx_disabled)(struct team *team, struct team_port *port); }; extern int team_modeop_port_enter(struct team *team, struct team_port *port); From 05e352444b2430de4b183b4a988085381e5fd6ad Mon Sep 17 00:00:00 2001 From: Marc Harvey Date: Thu, 9 Apr 2026 02:59:26 +0000 Subject: [PATCH 1486/1561] selftests: net: Add tests for failover of team-aggregated ports There are currently no kernel tests that verify the effect of setting the enabled team driver option. In a followup patch, there will be changes to this option, so it will be important to make sure it still behaves as it does now. The test verifies that tcp continues to work across two different team devices in separate network namespaces, even when member links are manually disabled. Signed-off-by: Marc Harvey Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260409-teaming-driver-internal-v7-4-f47e7589685d@google.com Signed-off-by: Paolo Abeni --- .../selftests/drivers/net/team/Makefile | 2 + .../testing/selftests/drivers/net/team/config | 4 + .../selftests/drivers/net/team/team_lib.sh | 148 ++++++++++++++++ .../drivers/net/team/transmit_failover.sh | 158 ++++++++++++++++++ tools/testing/selftests/net/forwarding/lib.sh | 9 +- 5 files changed, 319 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/drivers/net/team/team_lib.sh create mode 100755 tools/testing/selftests/drivers/net/team/transmit_failover.sh diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile index 02d6f51d5a06..777da2e0429e 100644 --- a/tools/testing/selftests/drivers/net/team/Makefile +++ b/tools/testing/selftests/drivers/net/team/Makefile @@ -7,9 +7,11 @@ TEST_PROGS := \ options.sh \ propagation.sh \ refleak.sh \ + transmit_failover.sh \ # end of TEST_PROGS TEST_INCLUDES := \ + team_lib.sh \ ../bonding/lag_lib.sh \ ../../../net/forwarding/lib.sh \ ../../../net/in_netns.sh \ diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config index 5d36a22ef080..8f04ae419c53 100644 --- a/tools/testing/selftests/drivers/net/team/config +++ b/tools/testing/selftests/drivers/net/team/config @@ -6,4 +6,8 @@ CONFIG_NETDEVSIM=m CONFIG_NET_IPGRE=y CONFIG_NET_TEAM=y CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=y +CONFIG_NET_TEAM_MODE_BROADCAST=y CONFIG_NET_TEAM_MODE_LOADBALANCE=y +CONFIG_NET_TEAM_MODE_RANDOM=y +CONFIG_NET_TEAM_MODE_ROUNDROBIN=y +CONFIG_VETH=y diff --git a/tools/testing/selftests/drivers/net/team/team_lib.sh b/tools/testing/selftests/drivers/net/team/team_lib.sh new file mode 100644 index 000000000000..2057f5edee79 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/team_lib.sh @@ -0,0 +1,148 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +test_dir="$(dirname "$0")" +export REQUIRE_MZ=no +export NUM_NETIFS=0 +# shellcheck disable=SC1091 +source "${test_dir}/../../../net/forwarding/lib.sh" + +TCP_PORT="43434" + +# Create a team interface inside of a given network namespace with a given +# mode, members, and IP address. +# Arguments: +# namespace - Network namespace to put the team interface into. +# team - The name of the team interface to setup. +# mode - The team mode of the interface. +# ip_address - The IP address to assign to the team interface. +# prefix_length - The prefix length for the IP address subnet. +# $@ - members - The member interfaces of the aggregation. +setup_team() +{ + local namespace=$1 + local team=$2 + local mode=$3 + local ip_address=$4 + local prefix_length=$5 + shift 5 + local members=("$@") + + # Prerequisite: team must have no members + for member in "${members[@]}"; do + ip -n "${namespace}" link set "${member}" nomaster + done + + # Prerequisite: team must have no address in order to set it + # shellcheck disable=SC2086 + ip -n "${namespace}" addr del "${ip_address}/${prefix_length}" \ + ${NODAD} dev "${team}" + + echo "Setting team in ${namespace} to mode ${mode}" + + if ! ip -n "${namespace}" link set "${team}" down; then + echo "Failed to bring team device down" + return 1 + fi + if ! ip netns exec "${namespace}" teamnl "${team}" setoption mode \ + "${mode}"; then + echo "Failed to set ${team} mode to '${mode}'" + return 1 + fi + + # Aggregate the members into teams. + for member in "${members[@]}"; do + ip -n "${namespace}" link set "${member}" master "${team}" + done + + # Bring team devices up and give them addresses. + if ! ip -n "${namespace}" link set "${team}" up; then + echo "Failed to set ${team} up" + return 1 + fi + + # shellcheck disable=SC2086 + if ! ip -n "${namespace}" addr add "${ip_address}/${prefix_length}" \ + ${NODAD} dev "${team}"; then + echo "Failed to give ${team} IP address in ${namespace}" + return 1 + fi +} + +# This is global used to keep track of the sender's iperf3 process, so that it +# can be terminated. +declare sender_pid + +# Start sending and receiving TCP traffic with iperf3. +# Globals: +# sender_pid - The process ID of the iperf3 sender process. Used to kill it +# later. +start_listening_and_sending() +{ + ip netns exec "${NS2}" iperf3 -s -p "${TCP_PORT}" --logfile /dev/null & + # Wait for server to become reachable before starting client. + slowwait 5 ip netns exec "${NS1}" iperf3 -c "${NS2_IP}" -p \ + "${TCP_PORT}" -t 1 --logfile /dev/null + ip netns exec "${NS1}" iperf3 -c "${NS2_IP}" -p "${TCP_PORT}" -b 1M -l \ + 1K -t 0 --logfile /dev/null & + sender_pid=$! +} + +# Stop sending TCP traffic with iperf3. +# Globals: +# sender_pid - The process ID of the iperf3 sender process. +stop_sending_and_listening() +{ + kill "${sender_pid}" && wait "${sender_pid}" 2>/dev/null || true +} + +# Monitor for TCP traffic with Tcpdump, save results to temp files. +# Arguments: +# namespace - The network namespace to run tcpdump inside of. +# $@ - interfaces - The interfaces to listen to. +save_tcpdump_outputs() +{ + local namespace=$1 + shift 1 + local interfaces=("$@") + + for interface in "${interfaces[@]}"; do + tcpdump_start "${interface}" "${namespace}" + done + + sleep 1 + + for interface in "${interfaces[@]}"; do + tcpdump_stop_nosleep "${interface}" + done +} + +clear_tcpdump_outputs() +{ + local interfaces=("$@") + + for interface in "${interfaces[@]}"; do + tcpdump_cleanup "${interface}" + done +} + +# Read Tcpdump output, determine packet counts. +# Arguments: +# interface - The name of the interface to count packets for. +# ip_address - The destination IP address. +did_interface_receive() +{ + local interface="$1" + local ip_address="$2" + local packet_count + + packet_count=$(tcpdump_show "$interface" | grep -c \ + "> ${ip_address}.${TCP_PORT}") + echo "Packet count for ${interface} was ${packet_count}" + + if [[ "${packet_count}" -gt 0 ]]; then + true + else + false + fi +} diff --git a/tools/testing/selftests/drivers/net/team/transmit_failover.sh b/tools/testing/selftests/drivers/net/team/transmit_failover.sh new file mode 100755 index 000000000000..b2bdcd27bc98 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/transmit_failover.sh @@ -0,0 +1,158 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# These tests verify the basic failover capability of the team driver via the +# `enabled` team driver option across different team driver modes. This does not +# rely on teamd, and instead just uses teamnl to set the `enabled` option +# directly. +# +# Topology: +# +# +-------------------------+ NS1 +# | test_team1 | +# | + | +# | eth0 | eth1 | +# | +---+---+ | +# | | | | +# +-------------------------+ +# | | +# +-------------------------+ NS2 +# | | | | +# | +-------+ | +# | eth0 | eth1 | +# | + | +# | test_team2 | +# +-------------------------+ + +export ALL_TESTS="team_test_failover" + +test_dir="$(dirname "$0")" +# shellcheck disable=SC1091 +source "${test_dir}/../../../net/lib.sh" +# shellcheck disable=SC1091 +source "${test_dir}/team_lib.sh" + +NS1="" +NS2="" +export NODAD="nodad" +PREFIX_LENGTH="64" +NS1_IP="fd00::1" +NS2_IP="fd00::2" +NS1_IP4="192.168.0.1" +NS2_IP4="192.168.0.2" +MEMBERS=("eth0" "eth1") + +while getopts "4" opt; do + case $opt in + 4) + echo "IPv4 mode selected." + export NODAD= + PREFIX_LENGTH="24" + NS1_IP="${NS1_IP4}" + NS2_IP="${NS2_IP4}" + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + exit 1 + ;; + esac +done + +# Create the network namespaces, veth pair, and team devices in the specified +# mode. +# Globals: +# RET - Used by test infra, set by `check_err` functions. +# Arguments: +# mode - The team driver mode to use for the team devices. +environment_create() +{ + trap cleanup_all_ns EXIT + setup_ns ns1 ns2 + NS1="${NS_LIST[0]}" + NS2="${NS_LIST[1]}" + + # Create the interfaces. + ip -n "${NS1}" link add eth0 type veth peer name eth0 netns "${NS2}" + ip -n "${NS1}" link add eth1 type veth peer name eth1 netns "${NS2}" + ip -n "${NS1}" link add test_team1 type team + ip -n "${NS2}" link add test_team2 type team + + # Set up the receiving network namespace's team interface. + setup_team "${NS2}" test_team2 roundrobin "${NS2_IP}" \ + "${PREFIX_LENGTH}" "${MEMBERS[@]}" +} + + +# Check that failover works for a specific team driver mode. +# Globals: +# RET - Used by test infra, set by `check_err` functions. +# Arguments: +# mode - The mode to set the team interfaces to. +team_test_mode_failover() +{ + local mode="$1" + export RET=0 + + # Set up the sender team with the correct mode. + setup_team "${NS1}" test_team1 "${mode}" "${NS1_IP}" \ + "${PREFIX_LENGTH}" "${MEMBERS[@]}" + check_err $? "Failed to set up sender team" + + start_listening_and_sending + + ### Scenario 1: All interfaces initially enabled. + save_tcpdump_outputs "${NS2}" "${MEMBERS[@]}" + did_interface_receive eth0 "${NS2_IP}" + check_err $? "eth0 not transmitting when both links enabled" + did_interface_receive eth1 "${NS2_IP}" + check_err $? "eth1 not transmitting when both links enabled" + clear_tcpdump_outputs "${MEMBERS[@]}" + + ### Scenario 2: One tx-side interface disabled. + ip netns exec "${NS1}" teamnl test_team1 setoption enabled false \ + --port=eth1 + slowwait 2 bash -c "ip netns exec ${NS1} teamnl test_team1 getoption \ + enabled --port=eth1 | grep -q false" + + save_tcpdump_outputs "${NS2}" "${MEMBERS[@]}" + did_interface_receive eth0 "${NS2_IP}" + check_err $? "eth0 not transmitting when enabled" + did_interface_receive eth1 "${NS2_IP}" + check_fail $? "eth1 IS transmitting when disabled" + clear_tcpdump_outputs "${MEMBERS[@]}" + + ### Scenario 3: The interface is re-enabled. + ip netns exec "${NS1}" teamnl test_team1 setoption enabled true \ + --port=eth1 + slowwait 2 bash -c "ip netns exec ${NS1} teamnl test_team1 getoption \ + enabled --port=eth1 | grep -q true" + + save_tcpdump_outputs "${NS2}" "${MEMBERS[@]}" + did_interface_receive eth0 "${NS2_IP}" + check_err $? "eth0 not transmitting when both links enabled" + did_interface_receive eth1 "${NS2_IP}" + check_err $? "eth1 not transmitting when both links enabled" + clear_tcpdump_outputs "${MEMBERS[@]}" + + log_test "Failover of '${mode}' test" + + # Clean up + stop_sending_and_listening +} + +team_test_failover() +{ + team_test_mode_failover broadcast + team_test_mode_failover roundrobin + team_test_mode_failover random + # Don't test `activebackup` or `loadbalance` modes, since they are too + # complicated for just setting `enabled` to work. They use more than + # the `enabled` option for transmit. +} + +require_command teamnl +require_command iperf3 +require_command tcpdump +environment_create +tests_run +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index d2bdbff68075..ac8358bcb22c 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1750,12 +1750,17 @@ tcpdump_start() sleep 1 } -tcpdump_stop() +tcpdump_stop_nosleep() { local if_name=$1 local pid=${cappid[$if_name]} $ns_cmd kill "$pid" && wait "$pid" +} + +tcpdump_stop() +{ + tcpdump_stop_nosleep "$1" sleep 1 } @@ -1770,7 +1775,7 @@ tcpdump_show() { local if_name=$1 - tcpdump -e -n -r ${capfile[$if_name]} 2>&1 + tcpdump -e -nn -r ${capfile[$if_name]} 2>&1 } # return 0 if the packet wasn't seen on host2_if or 1 if it was From 10407eebe8861802d5117956604f94d364df85d5 Mon Sep 17 00:00:00 2001 From: Marc Harvey Date: Thu, 9 Apr 2026 02:59:27 +0000 Subject: [PATCH 1487/1561] selftests: net: Add test for enablement of ports with teamd There are no tests that verify enablement and disablement of team driver ports with teamd. This should work even with changes to the enablement option, so it is important to test. This test sets up an active-backup network configuration across two network namespaces, and tries to send traffic while changing which link is the active one. Also increase the team test timeout to 300 seconds, because gracefully killing teamd can take 30 seconds for each instance. Signed-off-by: Marc Harvey Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260409-teaming-driver-internal-v7-5-f47e7589685d@google.com Signed-off-by: Paolo Abeni --- .../selftests/drivers/net/team/Makefile | 1 + .../selftests/drivers/net/team/settings | 1 + .../selftests/drivers/net/team/team_lib.sh | 26 ++ .../drivers/net/team/teamd_activebackup.sh | 246 ++++++++++++++++++ tools/testing/selftests/net/lib.sh | 13 + 5 files changed, 287 insertions(+) create mode 100644 tools/testing/selftests/drivers/net/team/settings create mode 100755 tools/testing/selftests/drivers/net/team/teamd_activebackup.sh diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile index 777da2e0429e..dab922d7f83d 100644 --- a/tools/testing/selftests/drivers/net/team/Makefile +++ b/tools/testing/selftests/drivers/net/team/Makefile @@ -7,6 +7,7 @@ TEST_PROGS := \ options.sh \ propagation.sh \ refleak.sh \ + teamd_activebackup.sh \ transmit_failover.sh \ # end of TEST_PROGS diff --git a/tools/testing/selftests/drivers/net/team/settings b/tools/testing/selftests/drivers/net/team/settings new file mode 100644 index 000000000000..694d70710ff0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/settings @@ -0,0 +1 @@ +timeout=300 diff --git a/tools/testing/selftests/drivers/net/team/team_lib.sh b/tools/testing/selftests/drivers/net/team/team_lib.sh index 2057f5edee79..02ef0ee02d6a 100644 --- a/tools/testing/selftests/drivers/net/team/team_lib.sh +++ b/tools/testing/selftests/drivers/net/team/team_lib.sh @@ -146,3 +146,29 @@ did_interface_receive() false fi } + +# Return true if the given interface in the given namespace does NOT receive +# traffic over a 1 second period. +# Arguments: +# interface - The name of the interface. +# ip_address - The destination IP address. +# namespace - The name of the namespace that the interface is in. +check_no_traffic() +{ + local interface="$1" + local ip_address="$2" + local namespace="$3" + local rc + + save_tcpdump_outputs "${namespace}" "${interface}" + did_interface_receive "${interface}" "${ip_address}" + rc=$? + + clear_tcpdump_outputs "${interface}" + + if [[ "${rc}" -eq 0 ]]; then + return 1 + else + return 0 + fi +} diff --git a/tools/testing/selftests/drivers/net/team/teamd_activebackup.sh b/tools/testing/selftests/drivers/net/team/teamd_activebackup.sh new file mode 100755 index 000000000000..2b26a697e179 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/teamd_activebackup.sh @@ -0,0 +1,246 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# These tests verify that teamd is able to enable and disable ports via the +# active backup runner. +# +# Topology: +# +# +-------------------------+ NS1 +# | test_team1 | +# | + | +# | eth0 | eth1 | +# | +---+---+ | +# | | | | +# +-------------------------+ +# | | +# +-------------------------+ NS2 +# | | | | +# | +-------+ | +# | eth0 | eth1 | +# | + | +# | test_team2 | +# +-------------------------+ + +export ALL_TESTS="teamd_test_active_backup" + +test_dir="$(dirname "$0")" +# shellcheck disable=SC1091 +source "${test_dir}/../../../net/lib.sh" +# shellcheck disable=SC1091 +source "${test_dir}/team_lib.sh" + +NS1="" +NS2="" +export NODAD="nodad" +PREFIX_LENGTH="64" +NS1_IP="fd00::1" +NS2_IP="fd00::2" +NS1_IP4="192.168.0.1" +NS2_IP4="192.168.0.2" +NS1_TEAMD_CONF="" +NS2_TEAMD_CONF="" +NS1_TEAMD_PID="" +NS2_TEAMD_PID="" + +while getopts "4" opt; do + case $opt in + 4) + echo "IPv4 mode selected." + export NODAD= + PREFIX_LENGTH="24" + NS1_IP="${NS1_IP4}" + NS2_IP="${NS2_IP4}" + ;; + \?) + echo "Invalid option: -${OPTARG}" >&2 + exit 1 + ;; + esac +done + +teamd_config_create() +{ + local runner=$1 + local dev=$2 + local conf + + conf=$(mktemp) + + cat > "${conf}" <<-EOF + { + "device": "${dev}", + "runner": {"name": "${runner}"}, + "ports": { + "eth0": {}, + "eth1": {} + } + } + EOF + echo "${conf}" +} + +# Create the network namespaces, veth pair, and team devices in the specified +# runner. +# Globals: +# RET - Used by test infra, set by `check_err` functions. +# Arguments: +# runner - The Teamd runner to use for the Team devices. +environment_create() +{ + local runner=$1 + + echo "Setting up two-link aggregation for runner ${runner}" + echo "Teamd version is: $(teamd --version)" + trap environment_destroy EXIT + + setup_ns ns1 ns2 + NS1="${NS_LIST[0]}" + NS2="${NS_LIST[1]}" + + for link in $(seq 0 1); do + ip -n "${NS1}" link add "eth${link}" type veth peer name \ + "eth${link}" netns "${NS2}" + check_err $? "Failed to create veth pair" + done + + NS1_TEAMD_CONF=$(teamd_config_create "${runner}" "test_team1") + NS2_TEAMD_CONF=$(teamd_config_create "${runner}" "test_team2") + echo "Conf files are ${NS1_TEAMD_CONF} and ${NS2_TEAMD_CONF}" + + ip netns exec "${NS1}" teamd -d -f "${NS1_TEAMD_CONF}" + check_err $? "Failed to create team device in ${NS1}" + NS1_TEAMD_PID=$(pgrep -f "teamd -d -f ${NS1_TEAMD_CONF}") + + ip netns exec "${NS2}" teamd -d -f "${NS2_TEAMD_CONF}" + check_err $? "Failed to create team device in ${NS2}" + NS2_TEAMD_PID=$(pgrep -f "teamd -d -f ${NS2_TEAMD_CONF}") + + echo "Created team devices" + echo "Teamd PIDs are ${NS1_TEAMD_PID} and ${NS2_TEAMD_PID}" + + ip -n "${NS1}" link set test_team1 up + check_err $? "Failed to set test_team1 up in ${NS1}" + ip -n "${NS2}" link set test_team2 up + check_err $? "Failed to set test_team2 up in ${NS2}" + + ip -n "${NS1}" addr add "${NS1_IP}/${PREFIX_LENGTH}" "${NODAD}" dev \ + test_team1 + check_err $? "Failed to add address to team device in ${NS1}" + ip -n "${NS2}" addr add "${NS2_IP}/${PREFIX_LENGTH}" "${NODAD}" dev \ + test_team2 + check_err $? "Failed to add address to team device in ${NS2}" + + slowwait 2 timeout 0.5 ip netns exec "${NS1}" ping -W 1 -c 1 "${NS2_IP}" +} + +# Tear down the environment: kill teamd and delete network namespaces. +environment_destroy() +{ + echo "Tearing down two-link aggregation" + + rm "${NS1_TEAMD_CONF}" + rm "${NS2_TEAMD_CONF}" + + # First, try graceful teamd teardown. + ip netns exec "${NS1}" teamd -k -t test_team1 + ip netns exec "${NS2}" teamd -k -t test_team2 + + # If teamd can't be killed gracefully, then sigkill. + if kill -0 "${NS1_TEAMD_PID}" 2>/dev/null; then + echo "Sending sigkill to teamd for test_team1" + kill -9 "${NS1_TEAMD_PID}" + rm -f /var/run/teamd/test_team1.{pid,sock} + fi + if kill -0 "${NS2_TEAMD_PID}" 2>/dev/null; then + echo "Sending sigkill to teamd for test_team2" + kill -9 "${NS2_TEAMD_PID}" + rm -f /var/run/teamd/test_team2.{pid,sock} + fi + cleanup_all_ns +} + +# Change the active port for an active-backup mode team. +# Arguments: +# namespace - The network namespace that the team is in. +# team - The name of the team. +# active_port - The port to make active. +set_active_port() +{ + local namespace=$1 + local team=$2 + local active_port=$3 + + ip netns exec "${namespace}" teamdctl "${team}" state item set \ + runner.active_port "${active_port}" + slowwait 2 bash -c "ip netns exec ${namespace} teamdctl ${team} state \ + item get runner.active_port | grep -q ${active_port}" +} + +# Wait for an interface to stop receiving traffic. If it keeps receiving traffic +# for the duration of the timeout, then return an error. +# Arguments: +# - namespace - The network namespace that the interface is in. +# - interface - The name of the interface. +wait_to_stop_receiving() +{ + local namespace=$1 + local interface=$2 + + echo "Waiting for ${interface} in ${namespace} to stop receiving" + slowwait 10 check_no_traffic "${interface}" "${NS2_IP}" \ + "${namespace}" +} + +# Test that active backup runner can change active ports. +# Globals: +# RET - Used by test infra, set by `check_err` functions. +teamd_test_active_backup() +{ + export RET=0 + + start_listening_and_sending + + ### Scenario 1: Don't manually set active port, just make sure team + # works. + save_tcpdump_outputs "${NS2}" test_team2 + did_interface_receive test_team2 "${NS2_IP}" + check_err $? "Traffic did not reach team interface in NS2." + clear_tcpdump_outputs test_team2 + + ### Scenario 2: Choose active port. + set_active_port "${NS1}" test_team1 eth1 + set_active_port "${NS2}" test_team2 eth1 + + wait_to_stop_receiving "${NS2}" eth0 + save_tcpdump_outputs "${NS2}" eth0 eth1 + did_interface_receive eth0 "${NS2_IP}" + check_fail $? "eth0 IS transmitting when inactive" + did_interface_receive eth1 "${NS2_IP}" + check_err $? "eth1 not transmitting when active" + clear_tcpdump_outputs eth0 eth1 + + ### Scenario 3: Change active port. + set_active_port "${NS1}" test_team1 eth0 + set_active_port "${NS2}" test_team2 eth0 + + wait_to_stop_receiving "${NS2}" eth1 + save_tcpdump_outputs "${NS2}" eth0 eth1 + did_interface_receive eth0 "${NS2_IP}" + check_err $? "eth0 not transmitting when active" + did_interface_receive eth1 "${NS2_IP}" + check_fail $? "eth1 IS transmitting when inactive" + clear_tcpdump_outputs eth0 eth1 + + log_test "teamd active backup runner test" + + stop_sending_and_listening +} + +require_command teamd +require_command teamdctl +require_command iperf3 +require_command tcpdump +environment_create activebackup +tests_run +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index e915386daf1b..b3827b43782b 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -224,6 +224,19 @@ setup_ns() NS_LIST+=("${ns_list[@]}") } +in_all_ns() +{ + local ret=0 + local ns_list=("${NS_LIST[@]}") + + for ns in "${ns_list[@]}"; do + ip netns exec "${ns}" "$@" + (( ret = ret || $? )) + done + + return "${ret}" +} + # Create netdevsim with given id and net namespace. create_netdevsim() { local id="$1" From fa6ed31dd913b0f68c75ec80c3f4a324572071fc Mon Sep 17 00:00:00 2001 From: Marc Harvey Date: Thu, 9 Apr 2026 02:59:28 +0000 Subject: [PATCH 1488/1561] net: team: Rename enablement functions and struct members to tx Add no functional changes, but rename enablement functions, variables etc. that are used in teaming driver transmit decisions. Since rx and tx enablement are still coupled, some of the variables renamed in this patch are still used for the rx path, but that will change in a follow-up patch. Reviewed-by: Jiri Pirko Signed-off-by: Marc Harvey Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260409-teaming-driver-internal-v7-6-f47e7589685d@google.com Signed-off-by: Paolo Abeni --- drivers/net/team/team_core.c | 44 ++++++++++++----------- drivers/net/team/team_mode_loadbalance.c | 2 +- drivers/net/team/team_mode_random.c | 4 +-- drivers/net/team/team_mode_roundrobin.c | 2 +- include/linux/if_team.h | 46 ++++++++++++------------ 5 files changed, 51 insertions(+), 47 deletions(-) diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index 2ce31999c99f..826769473878 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -532,13 +532,13 @@ static void team_adjust_ops(struct team *team) * correct ops are always set. */ - if (!team->en_port_count || !team_is_mode_set(team) || + if (!team->tx_en_port_count || !team_is_mode_set(team) || !team->mode->ops->transmit) team->ops.transmit = team_dummy_transmit; else team->ops.transmit = team->mode->ops->transmit; - if (!team->en_port_count || !team_is_mode_set(team) || + if (!team->tx_en_port_count || !team_is_mode_set(team) || !team->mode->ops->receive) team->ops.receive = team_dummy_receive; else @@ -831,7 +831,7 @@ static bool team_queue_override_port_has_gt_prio_than(struct team_port *port, return true; if (port->priority > cur->priority) return false; - if (port->index < cur->index) + if (port->tx_index < cur->tx_index) return true; return false; } @@ -929,7 +929,7 @@ static bool team_port_find(const struct team *team, /* * Enable/disable port by adding to enabled port hashlist and setting - * port->index (Might be racy so reader could see incorrect ifindex when + * port->tx_index (Might be racy so reader could see incorrect ifindex when * processing a flying packet, but that is not a problem). Write guarded * by RTNL. */ @@ -938,10 +938,10 @@ static void team_port_enable(struct team *team, { if (team_port_enabled(port)) return; - WRITE_ONCE(port->index, team->en_port_count); - WRITE_ONCE(team->en_port_count, team->en_port_count + 1); - hlist_add_head_rcu(&port->hlist, - team_port_index_hash(team, port->index)); + WRITE_ONCE(port->tx_index, team->tx_en_port_count); + WRITE_ONCE(team->tx_en_port_count, team->tx_en_port_count + 1); + hlist_add_head_rcu(&port->tx_hlist, + team_tx_port_index_hash(team, port->tx_index)); team_adjust_ops(team); team_queue_override_port_add(team, port); team_notify_peers(team); @@ -951,15 +951,17 @@ static void team_port_enable(struct team *team, static void __reconstruct_port_hlist(struct team *team, int rm_index) { - int i; + struct hlist_head *tx_port_index_hash; struct team_port *port; + int i; - for (i = rm_index + 1; i < team->en_port_count; i++) { - port = team_get_port_by_index(team, i); - hlist_del_rcu(&port->hlist); - WRITE_ONCE(port->index, port->index - 1); - hlist_add_head_rcu(&port->hlist, - team_port_index_hash(team, port->index)); + for (i = rm_index + 1; i < team->tx_en_port_count; i++) { + port = team_get_port_by_tx_index(team, i); + hlist_del_rcu(&port->tx_hlist); + WRITE_ONCE(port->tx_index, port->tx_index - 1); + tx_port_index_hash = team_tx_port_index_hash(team, + port->tx_index); + hlist_add_head_rcu(&port->tx_hlist, tx_port_index_hash); } } @@ -970,10 +972,10 @@ static void team_port_disable(struct team *team, return; if (team->ops.port_tx_disabled) team->ops.port_tx_disabled(team, port); - hlist_del_rcu(&port->hlist); - __reconstruct_port_hlist(team, port->index); - WRITE_ONCE(port->index, -1); - WRITE_ONCE(team->en_port_count, team->en_port_count - 1); + hlist_del_rcu(&port->tx_hlist); + __reconstruct_port_hlist(team, port->tx_index); + WRITE_ONCE(port->tx_index, -1); + WRITE_ONCE(team->tx_en_port_count, team->tx_en_port_count - 1); team_queue_override_port_del(team, port); team_adjust_ops(team); team_lower_state_changed(port); @@ -1244,7 +1246,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev, netif_addr_unlock_bh(dev); } - WRITE_ONCE(port->index, -1); + WRITE_ONCE(port->tx_index, -1); list_add_tail_rcu(&port->list, &team->port_list); team_port_enable(team, port); netdev_compute_master_upper_features(dev, true); @@ -1595,7 +1597,7 @@ static int team_init(struct net_device *dev) return -ENOMEM; for (i = 0; i < TEAM_PORT_HASHENTRIES; i++) - INIT_HLIST_HEAD(&team->en_port_hlist[i]); + INIT_HLIST_HEAD(&team->tx_en_port_hlist[i]); INIT_LIST_HEAD(&team->port_list); err = team_queue_override_init(team); if (err) diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index 840f409d250b..4833fbfe241e 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -120,7 +120,7 @@ static struct team_port *lb_hash_select_tx_port(struct team *team, { int port_index = team_num_to_port_index(team, hash); - return team_get_port_by_index_rcu(team, port_index); + return team_get_port_by_tx_index_rcu(team, port_index); } /* Hash to port mapping select tx port */ diff --git a/drivers/net/team/team_mode_random.c b/drivers/net/team/team_mode_random.c index 169a7bc865b2..370e974f3dca 100644 --- a/drivers/net/team/team_mode_random.c +++ b/drivers/net/team/team_mode_random.c @@ -16,8 +16,8 @@ static bool rnd_transmit(struct team *team, struct sk_buff *skb) struct team_port *port; int port_index; - port_index = get_random_u32_below(READ_ONCE(team->en_port_count)); - port = team_get_port_by_index_rcu(team, port_index); + port_index = get_random_u32_below(READ_ONCE(team->tx_en_port_count)); + port = team_get_port_by_tx_index_rcu(team, port_index); if (unlikely(!port)) goto drop; port = team_get_first_port_txable_rcu(team, port); diff --git a/drivers/net/team/team_mode_roundrobin.c b/drivers/net/team/team_mode_roundrobin.c index dd405d82c6ac..ecbeef28c221 100644 --- a/drivers/net/team/team_mode_roundrobin.c +++ b/drivers/net/team/team_mode_roundrobin.c @@ -27,7 +27,7 @@ static bool rr_transmit(struct team *team, struct sk_buff *skb) port_index = team_num_to_port_index(team, rr_priv(team)->sent_packets++); - port = team_get_port_by_index_rcu(team, port_index); + port = team_get_port_by_tx_index_rcu(team, port_index); if (unlikely(!port)) goto drop; port = team_get_first_port_txable_rcu(team, port); diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 740cb3100dfc..c777170ef552 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -27,10 +27,10 @@ struct team; struct team_port { struct net_device *dev; - struct hlist_node hlist; /* node in enabled ports hash list */ + struct hlist_node tx_hlist; /* node in tx-enabled ports hash list */ struct list_head list; /* node in ordinary list */ struct team *team; - int index; /* index of enabled port. If disabled, it's set to -1 */ + int tx_index; /* index of tx enabled port. If disabled, -1 */ bool linkup; /* either state.linkup or user.linkup */ @@ -77,7 +77,7 @@ static inline struct team_port *team_port_get_rcu(const struct net_device *dev) static inline bool team_port_enabled(struct team_port *port) { - return READ_ONCE(port->index) != -1; + return READ_ONCE(port->tx_index) != -1; } static inline bool team_port_txable(struct team_port *port) @@ -190,10 +190,10 @@ struct team { const struct header_ops *header_ops_cache; /* - * List of enabled ports and their count + * List of tx-enabled ports and counts of rx and tx-enabled ports. */ - int en_port_count; - struct hlist_head en_port_hlist[TEAM_PORT_HASHENTRIES]; + int tx_en_port_count; + struct hlist_head tx_en_port_hlist[TEAM_PORT_HASHENTRIES]; struct list_head port_list; /* list of all ports */ @@ -237,41 +237,43 @@ static inline int team_dev_queue_xmit(struct team *team, struct team_port *port, return dev_queue_xmit(skb); } -static inline struct hlist_head *team_port_index_hash(struct team *team, - int port_index) +static inline struct hlist_head *team_tx_port_index_hash(struct team *team, + int tx_port_index) { - return &team->en_port_hlist[port_index & (TEAM_PORT_HASHENTRIES - 1)]; + unsigned int list_entry = tx_port_index & (TEAM_PORT_HASHENTRIES - 1); + + return &team->tx_en_port_hlist[list_entry]; } -static inline struct team_port *team_get_port_by_index(struct team *team, - int port_index) +static inline struct team_port *team_get_port_by_tx_index(struct team *team, + int tx_port_index) { + struct hlist_head *head = team_tx_port_index_hash(team, tx_port_index); struct team_port *port; - struct hlist_head *head = team_port_index_hash(team, port_index); - hlist_for_each_entry(port, head, hlist) - if (port->index == port_index) + hlist_for_each_entry(port, head, tx_hlist) + if (port->tx_index == tx_port_index) return port; return NULL; } static inline int team_num_to_port_index(struct team *team, unsigned int num) { - int en_port_count = READ_ONCE(team->en_port_count); + int tx_en_port_count = READ_ONCE(team->tx_en_port_count); - if (unlikely(!en_port_count)) + if (unlikely(!tx_en_port_count)) return 0; - return num % en_port_count; + return num % tx_en_port_count; } -static inline struct team_port *team_get_port_by_index_rcu(struct team *team, - int port_index) +static inline struct team_port *team_get_port_by_tx_index_rcu(struct team *team, + int tx_port_index) { + struct hlist_head *head = team_tx_port_index_hash(team, tx_port_index); struct team_port *port; - struct hlist_head *head = team_port_index_hash(team, port_index); - hlist_for_each_entry_rcu(port, head, hlist) - if (READ_ONCE(port->index) == port_index) + hlist_for_each_entry_rcu(port, head, tx_hlist) + if (READ_ONCE(port->tx_index) == tx_port_index) return port; return NULL; } From 68f0833f279ac209ec865da76568c843dd38c508 Mon Sep 17 00:00:00 2001 From: Marc Harvey Date: Thu, 9 Apr 2026 02:59:29 +0000 Subject: [PATCH 1489/1561] net: team: Track rx enablement separately from tx enablement Separate the rx and tx enablement/disablement into different functions so that it is easier to interact with them independently later. Although this patch changes receive and transmit paths, the actual behavior of the teaming driver should remain unchanged, since there is no option introduced yet to change rx or tx enablement independently. Those options will be added in follow-up patches. Reviewed-by: Jiri Pirko Signed-off-by: Marc Harvey Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260409-teaming-driver-internal-v7-7-f47e7589685d@google.com Signed-off-by: Paolo Abeni --- drivers/net/team/team_core.c | 114 +++++++++++++++++------ drivers/net/team/team_mode_loadbalance.c | 2 +- include/linux/if_team.h | 16 +++- 3 files changed, 100 insertions(+), 32 deletions(-) diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index 826769473878..e437099a5a17 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -87,7 +87,7 @@ static void team_lower_state_changed(struct team_port *port) struct netdev_lag_lower_state_info info; info.link_up = port->linkup; - info.tx_enabled = team_port_enabled(port); + info.tx_enabled = team_port_tx_enabled(port); netdev_lower_state_changed(port->dev, &info); } @@ -538,7 +538,7 @@ static void team_adjust_ops(struct team *team) else team->ops.transmit = team->mode->ops->transmit; - if (!team->tx_en_port_count || !team_is_mode_set(team) || + if (!team->rx_en_port_count || !team_is_mode_set(team) || !team->mode->ops->receive) team->ops.receive = team_dummy_receive; else @@ -734,7 +734,7 @@ static rx_handler_result_t team_handle_frame(struct sk_buff **pskb) port = team_port_get_rcu(skb->dev); team = port->team; - if (!team_port_enabled(port)) { + if (!team_port_rx_enabled(port)) { if (is_link_local_ether_addr(eth_hdr(skb)->h_dest)) /* link-local packets are mostly useful when stack receives them * with the link they arrive on. @@ -876,7 +876,7 @@ static void __team_queue_override_enabled_check(struct team *team) static void team_queue_override_port_prio_changed(struct team *team, struct team_port *port) { - if (!port->queue_id || !team_port_enabled(port)) + if (!port->queue_id || !team_port_tx_enabled(port)) return; __team_queue_override_port_del(team, port); __team_queue_override_port_add(team, port); @@ -887,7 +887,7 @@ static void team_queue_override_port_change_queue_id(struct team *team, struct team_port *port, u16 new_queue_id) { - if (team_port_enabled(port)) { + if (team_port_tx_enabled(port)) { __team_queue_override_port_del(team, port); port->queue_id = new_queue_id; __team_queue_override_port_add(team, port); @@ -927,26 +927,33 @@ static bool team_port_find(const struct team *team, return false; } -/* - * Enable/disable port by adding to enabled port hashlist and setting - * port->tx_index (Might be racy so reader could see incorrect ifindex when - * processing a flying packet, but that is not a problem). Write guarded - * by RTNL. - */ -static void team_port_enable(struct team *team, - struct team_port *port) +static void __team_port_enable_rx(struct team *team, + struct team_port *port) +{ + team->rx_en_port_count++; + WRITE_ONCE(port->rx_enabled, true); +} + +static void __team_port_disable_rx(struct team *team, + struct team_port *port) +{ + team->rx_en_port_count--; + WRITE_ONCE(port->rx_enabled, false); +} + +/* + * Enable just TX on the port by adding to tx-enabled port hashlist and + * setting port->tx_index (Might be racy so reader could see incorrect + * ifindex when processing a flying packet, but that is not a problem). + * Write guarded by RTNL. + */ +static void __team_port_enable_tx(struct team *team, + struct team_port *port) { - if (team_port_enabled(port)) - return; WRITE_ONCE(port->tx_index, team->tx_en_port_count); WRITE_ONCE(team->tx_en_port_count, team->tx_en_port_count + 1); hlist_add_head_rcu(&port->tx_hlist, team_tx_port_index_hash(team, port->tx_index)); - team_adjust_ops(team); - team_queue_override_port_add(team, port); - team_notify_peers(team); - team_mcast_rejoin(team); - team_lower_state_changed(port); } static void __reconstruct_port_hlist(struct team *team, int rm_index) @@ -965,20 +972,69 @@ static void __reconstruct_port_hlist(struct team *team, int rm_index) } } +static void __team_port_disable_tx(struct team *team, + struct team_port *port) +{ + if (team->ops.port_tx_disabled) + team->ops.port_tx_disabled(team, port); + + hlist_del_rcu(&port->tx_hlist); + __reconstruct_port_hlist(team, port->tx_index); + + WRITE_ONCE(port->tx_index, -1); + WRITE_ONCE(team->tx_en_port_count, team->tx_en_port_count - 1); +} + +/* + * Enable TX AND RX on the port. + */ +static void team_port_enable(struct team *team, + struct team_port *port) +{ + bool rx_was_enabled; + bool tx_was_enabled; + + if (team_port_enabled(port)) + return; + + rx_was_enabled = team_port_rx_enabled(port); + tx_was_enabled = team_port_tx_enabled(port); + + if (!rx_was_enabled) + __team_port_enable_rx(team, port); + if (!tx_was_enabled) + __team_port_enable_tx(team, port); + + team_adjust_ops(team); + if (!tx_was_enabled) + team_queue_override_port_add(team, port); + team_notify_peers(team); + if (!rx_was_enabled) + team_mcast_rejoin(team); + if (!tx_was_enabled) + team_lower_state_changed(port); +} + static void team_port_disable(struct team *team, struct team_port *port) { - if (!team_port_enabled(port)) + bool rx_was_enabled = team_port_rx_enabled(port); + bool tx_was_enabled = team_port_tx_enabled(port); + + if (!tx_was_enabled && !rx_was_enabled) return; - if (team->ops.port_tx_disabled) - team->ops.port_tx_disabled(team, port); - hlist_del_rcu(&port->tx_hlist); - __reconstruct_port_hlist(team, port->tx_index); - WRITE_ONCE(port->tx_index, -1); - WRITE_ONCE(team->tx_en_port_count, team->tx_en_port_count - 1); - team_queue_override_port_del(team, port); + + if (tx_was_enabled) { + __team_port_disable_tx(team, port); + team_queue_override_port_del(team, port); + } + if (rx_was_enabled) + __team_port_disable_rx(team, port); + team_adjust_ops(team); - team_lower_state_changed(port); + + if (tx_was_enabled) + team_lower_state_changed(port); } static int team_port_enter(struct team *team, struct team_port *port) diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index 4833fbfe241e..38a459649569 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -380,7 +380,7 @@ static int lb_tx_hash_to_port_mapping_set(struct team *team, list_for_each_entry(port, &team->port_list, list) { if (ctx->data.u32_val == port->dev->ifindex && - team_port_enabled(port)) { + team_port_tx_enabled(port)) { rcu_assign_pointer(LB_HTPM_PORT_BY_HASH(lb_priv, hash), port); return 0; diff --git a/include/linux/if_team.h b/include/linux/if_team.h index c777170ef552..3d21e06fda67 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -31,6 +31,7 @@ struct team_port { struct list_head list; /* node in ordinary list */ struct team *team; int tx_index; /* index of tx enabled port. If disabled, -1 */ + bool rx_enabled; bool linkup; /* either state.linkup or user.linkup */ @@ -75,14 +76,24 @@ static inline struct team_port *team_port_get_rcu(const struct net_device *dev) return rcu_dereference(dev->rx_handler_data); } -static inline bool team_port_enabled(struct team_port *port) +static inline bool team_port_rx_enabled(struct team_port *port) +{ + return READ_ONCE(port->rx_enabled); +} + +static inline bool team_port_tx_enabled(struct team_port *port) { return READ_ONCE(port->tx_index) != -1; } +static inline bool team_port_enabled(struct team_port *port) +{ + return team_port_rx_enabled(port) && team_port_tx_enabled(port); +} + static inline bool team_port_txable(struct team_port *port) { - return port->linkup && team_port_enabled(port); + return port->linkup && team_port_tx_enabled(port); } static inline bool team_port_dev_txable(const struct net_device *port_dev) @@ -193,6 +204,7 @@ struct team { * List of tx-enabled ports and counts of rx and tx-enabled ports. */ int tx_en_port_count; + int rx_en_port_count; struct hlist_head tx_en_port_hlist[TEAM_PORT_HASHENTRIES]; struct list_head port_list; /* list of all ports */ From 0e47569a574d447fec072abf3b4330974a471b97 Mon Sep 17 00:00:00 2001 From: Marc Harvey Date: Thu, 9 Apr 2026 02:59:30 +0000 Subject: [PATCH 1490/1561] net: team: Add new rx_enabled team port option Allow independent control over rx enablement via the rx_enabled option without affecting tx enablement. This affects the normal enabled option since a port is only considered enabled if both tx and rx are enabled. If this option is not used, then the enabled option will continue to behave exactly as it did before. Tested in a follow-up patch with a new selftest. Reviewed-by: Jiri Pirko Signed-off-by: Marc Harvey Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260409-teaming-driver-internal-v7-8-f47e7589685d@google.com Signed-off-by: Paolo Abeni --- drivers/net/team/team_core.c | 49 ++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index e437099a5a17..67f77de4cf10 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -941,6 +941,28 @@ static void __team_port_disable_rx(struct team *team, WRITE_ONCE(port->rx_enabled, false); } +static void team_port_enable_rx(struct team *team, + struct team_port *port) +{ + if (team_port_rx_enabled(port)) + return; + + __team_port_enable_rx(team, port); + team_adjust_ops(team); + team_notify_peers(team); + team_mcast_rejoin(team); +} + +static void team_port_disable_rx(struct team *team, + struct team_port *port) +{ + if (!team_port_rx_enabled(port)) + return; + + __team_port_disable_rx(team, port); + team_adjust_ops(team); +} + /* * Enable just TX on the port by adding to tx-enabled port hashlist and * setting port->tx_index (Might be racy so reader could see incorrect @@ -1487,6 +1509,26 @@ static int team_port_en_option_set(struct team *team, return 0; } +static void team_port_rx_en_option_get(struct team *team, + struct team_gsetter_ctx *ctx) +{ + struct team_port *port = ctx->info->port; + + ctx->data.bool_val = team_port_rx_enabled(port); +} + +static int team_port_rx_en_option_set(struct team *team, + struct team_gsetter_ctx *ctx) +{ + struct team_port *port = ctx->info->port; + + if (ctx->data.bool_val) + team_port_enable_rx(team, port); + else + team_port_disable_rx(team, port); + return 0; +} + static void team_user_linkup_option_get(struct team *team, struct team_gsetter_ctx *ctx) { @@ -1608,6 +1650,13 @@ static const struct team_option team_options[] = { .getter = team_port_en_option_get, .setter = team_port_en_option_set, }, + { + .name = "rx_enabled", + .type = TEAM_OPTION_TYPE_BOOL, + .per_port = true, + .getter = team_port_rx_en_option_get, + .setter = team_port_rx_en_option_set, + }, { .name = "user_linkup", .type = TEAM_OPTION_TYPE_BOOL, From bb9215a98179509e9b1b9a31d68faeebd9339f7c Mon Sep 17 00:00:00 2001 From: Marc Harvey Date: Thu, 9 Apr 2026 02:59:31 +0000 Subject: [PATCH 1491/1561] net: team: Add new tx_enabled team port option This option allows independent control over tx enablement without affecting rx enablement. Like the rx_enabled option, this also implicitly affects the enabled option. If this option is not used, then the enabled option will continue to behave as it did before. Tested in a follow-up patch with a new selftest. Reviewed-by: Jiri Pirko Signed-off-by: Marc Harvey Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260409-teaming-driver-internal-v7-9-f47e7589685d@google.com Signed-off-by: Paolo Abeni --- drivers/net/team/team_core.c | 55 ++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index 67f77de4cf10..0c87f9972457 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -978,6 +978,21 @@ static void __team_port_enable_tx(struct team *team, team_tx_port_index_hash(team, port->tx_index)); } +static void team_port_enable_tx(struct team *team, + struct team_port *port) +{ + if (team_port_tx_enabled(port)) + return; + + __team_port_enable_tx(team, port); + team_adjust_ops(team); + team_queue_override_port_add(team, port); + + /* Don't rejoin multicast, since this port might not be receiving. */ + team_notify_peers(team); + team_lower_state_changed(port); +} + static void __reconstruct_port_hlist(struct team *team, int rm_index) { struct hlist_head *tx_port_index_hash; @@ -1007,6 +1022,19 @@ static void __team_port_disable_tx(struct team *team, WRITE_ONCE(team->tx_en_port_count, team->tx_en_port_count - 1); } +static void team_port_disable_tx(struct team *team, + struct team_port *port) +{ + if (!team_port_tx_enabled(port)) + return; + + __team_port_disable_tx(team, port); + + team_queue_override_port_del(team, port); + team_adjust_ops(team); + team_lower_state_changed(port); +} + /* * Enable TX AND RX on the port. */ @@ -1529,6 +1557,26 @@ static int team_port_rx_en_option_set(struct team *team, return 0; } +static void team_port_tx_en_option_get(struct team *team, + struct team_gsetter_ctx *ctx) +{ + struct team_port *port = ctx->info->port; + + ctx->data.bool_val = team_port_tx_enabled(port); +} + +static int team_port_tx_en_option_set(struct team *team, + struct team_gsetter_ctx *ctx) +{ + struct team_port *port = ctx->info->port; + + if (ctx->data.bool_val) + team_port_enable_tx(team, port); + else + team_port_disable_tx(team, port); + return 0; +} + static void team_user_linkup_option_get(struct team *team, struct team_gsetter_ctx *ctx) { @@ -1657,6 +1705,13 @@ static const struct team_option team_options[] = { .getter = team_port_rx_en_option_get, .setter = team_port_rx_en_option_set, }, + { + .name = "tx_enabled", + .type = TEAM_OPTION_TYPE_BOOL, + .per_port = true, + .getter = team_port_tx_en_option_get, + .setter = team_port_tx_en_option_set, + }, { .name = "user_linkup", .type = TEAM_OPTION_TYPE_BOOL, From d3870724eb16c1e6a73cff8a548af0c03a0e6c52 Mon Sep 17 00:00:00 2001 From: Marc Harvey Date: Thu, 9 Apr 2026 02:59:32 +0000 Subject: [PATCH 1492/1561] selftests: net: Add tests for team driver decoupled tx and rx control Use ping and tcpdump to verify that independent rx and tx enablement of team driver member interfaces works as intended. Signed-off-by: Marc Harvey Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260409-teaming-driver-internal-v7-10-f47e7589685d@google.com Signed-off-by: Paolo Abeni --- .../selftests/drivers/net/team/Makefile | 1 + .../drivers/net/team/decoupled_enablement.sh | 249 ++++++++++++++++++ .../selftests/drivers/net/team/options.sh | 99 ++++++- 3 files changed, 348 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/drivers/net/team/decoupled_enablement.sh diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile index dab922d7f83d..7c58cf82121e 100644 --- a/tools/testing/selftests/drivers/net/team/Makefile +++ b/tools/testing/selftests/drivers/net/team/Makefile @@ -2,6 +2,7 @@ # Makefile for net selftests TEST_PROGS := \ + decoupled_enablement.sh \ dev_addr_lists.sh \ non_ether_header_ops.sh \ options.sh \ diff --git a/tools/testing/selftests/drivers/net/team/decoupled_enablement.sh b/tools/testing/selftests/drivers/net/team/decoupled_enablement.sh new file mode 100755 index 000000000000..0d3d9c98e9f5 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/decoupled_enablement.sh @@ -0,0 +1,249 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# These tests verify the decoupled RX and TX enablement of team driver member +# interfaces. +# +# Topology +# +# +---------------------+ NS1 +# | test_team1 | +# | | | +# | eth0 | +# | | | +# | | | +# +---------------------+ +# | +# +---------------------+ NS2 +# | | | +# | | | +# | eth0 | +# | | | +# | test_team2 | +# +---------------------+ + +export ALL_TESTS=" + team_test_tx_enablement + team_test_rx_enablement +" + +test_dir="$(dirname "$0")" +# shellcheck disable=SC1091 +source "${test_dir}/../../../net/lib.sh" +# shellcheck disable=SC1091 +source "${test_dir}/team_lib.sh" + +NS1="" +NS2="" +export NODAD="nodad" +PREFIX_LENGTH="64" +NS1_IP="fd00::1" +NS2_IP="fd00::2" +NS1_IP4="192.168.0.1" +NS2_IP4="192.168.0.2" +MEMBERS=("eth0") +PING_COUNT=5 +PING_TIMEOUT_S=1 +PING_INTERVAL=0.1 + +while getopts "4" opt; do + case $opt in + 4) + echo "IPv4 mode selected." + export NODAD= + PREFIX_LENGTH="24" + NS1_IP="${NS1_IP4}" + NS2_IP="${NS2_IP4}" + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + exit 1 + ;; + esac +done + +# This has to be sourced after opts are gathered... +export REQUIRE_MZ=no +export NUM_NETIFS=0 +# shellcheck disable=SC1091 +source "${test_dir}/../../../net/forwarding/lib.sh" + +# Create the network namespaces, veth pair, and team devices in the specified +# mode. +# Globals: +# RET - Used by test infra, set by `check_err` functions. +# Arguments: +# mode - The team driver mode to use for the team devices. +environment_create() +{ + trap cleanup_all_ns EXIT + setup_ns ns1 ns2 + NS1="${NS_LIST[0]}" + NS2="${NS_LIST[1]}" + + # Create the interfaces. + ip -n "${NS1}" link add eth0 type veth peer name eth0 netns "${NS2}" + ip -n "${NS1}" link add test_team1 type team + ip -n "${NS2}" link add test_team2 type team + + # Set up the receiving network namespace's team interface. + setup_team "${NS2}" test_team2 roundrobin "${NS2_IP}" \ + "${PREFIX_LENGTH}" "${MEMBERS[@]}" +} + +# Set a particular option value for team or team port. +# Arguments: +# namespace - The namespace name that has the team. +# option_name - The option name to set. +# option_value - The value to set the option to. +# team_name - The name of team to set the option for. +# member_name - The (optional) optional name of the member port. +set_option_value() +{ + local namespace="$1" + local option_name="$2" + local option_value="$3" + local team_name="$4" + local member_name="$5" + local port_flag="--port=${member_name}" + + ip netns exec "${namespace}" teamnl "${team_name}" setoption \ + "${option_name}" "${option_value}" "${port_flag}" + return $? +} + +# Send some pings and return the ping command return value. +try_ping() +{ + ip netns exec "${NS1}" ping -i "${PING_INTERVAL}" -c "${PING_COUNT}" \ + "${NS2_IP}" -W "${PING_TIMEOUT_S}" +} + +# Checks tcpdump output from net/forwarding lib, and checks if there are any +# ICMP(4 or 6) packets. +# Arguments: +# interface - The interface name to search for. +# ip_address - The destination IP address (4 or 6) to search for. +did_interface_receive_icmp() +{ + local interface="$1" + local ip_address="$2" + local packet_count + + packet_count=$(tcpdump_show "$interface" | grep -c \ + "> ${ip_address}: ICMP") + echo "Packet count for ${interface} was ${packet_count}" + + if [[ "$packet_count" -gt 0 ]]; then + true + else + false + fi +} + +# Test JUST tx enablement with a given mode. +# Globals: +# RET - Used by test infra, set by `check_err` functions. +# Arguments: +# mode - The mode to set the team interfaces to. +team_test_mode_tx_enablement() +{ + local mode="$1" + export RET=0 + + # Set up the sender team with the correct mode. + setup_team "${NS1}" test_team1 "${mode}" "${NS1_IP}" \ + "${PREFIX_LENGTH}" "${MEMBERS[@]}" + check_err $? "Failed to set up sender team" + + ### Scenario 1: Member interface initially enabled. + # Expect ping to pass + try_ping + check_err $? "Ping failed when TX enabled" + + ### Scenario 2: One tx-side interface disabled. + # Expect ping to fail. + set_option_value "${NS1}" tx_enabled false test_team1 eth0 + check_err $? "Failed to disable TX" + tcpdump_start eth0 "${NS2}" + try_ping + check_fail $? "Ping succeeded when TX disabled" + tcpdump_stop eth0 + # Expect no packets to be transmitted, since TX is disabled. + did_interface_receive_icmp eth0 "${NS2_IP}" + check_fail $? "eth0 IS transmitting when TX disabled" + tcpdump_cleanup eth0 + + ### Scenario 3: The interface has tx re-enabled. + # Expect ping to pass. + set_option_value "${NS1}" tx_enabled true test_team1 eth0 + check_err $? "Failed to reenable TX" + try_ping + check_err $? "Ping failed when TX reenabled" + + log_test "TX failover of '${mode}' test" +} + +# Test JUST rx enablement with a given mode. +# Globals: +# RET - Used by test infra, set by `check_err` functions. +# Arguments: +# mode - The mode to set the team interfaces to. +team_test_mode_rx_enablement() +{ + local mode="$1" + export RET=0 + + # Set up the sender team with the correct mode. + setup_team "${NS1}" test_team1 "${mode}" "${NS1_IP}" \ + "${PREFIX_LENGTH}" "${MEMBERS[@]}" + check_err $? "Failed to set up sender team" + + ### Scenario 1: Member interface initially enabled. + # Expect ping to pass + try_ping + check_err $? "Ping failed when RX enabled" + + ### Scenario 2: One rx-side interface disabled. + # Expect ping to fail. + set_option_value "${NS1}" rx_enabled false test_team1 eth0 + check_err $? "Failed to disable RX" + tcpdump_start eth0 "${NS2}" + try_ping + check_fail $? "Ping succeeded when RX disabled" + tcpdump_stop eth0 + # Expect packets to be transmitted, since only RX is disabled. + did_interface_receive_icmp eth0 "${NS2_IP}" + check_err $? "eth0 not transmitting when RX disabled" + tcpdump_cleanup eth0 + + ### Scenario 3: The interface has rx re-enabled. + # Expect ping to pass. + set_option_value "${NS1}" rx_enabled true test_team1 eth0 + check_err $? "Failed to reenable RX" + try_ping + check_err $? "Ping failed when RX reenabled" + + log_test "RX failover of '${mode}' test" +} + +team_test_tx_enablement() +{ + team_test_mode_tx_enablement broadcast + team_test_mode_tx_enablement roundrobin + team_test_mode_tx_enablement random +} + +team_test_rx_enablement() +{ + team_test_mode_rx_enablement broadcast + team_test_mode_rx_enablement roundrobin + team_test_mode_rx_enablement random +} + +require_command teamnl +require_command tcpdump +require_command ping +environment_create +tests_run +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/team/options.sh b/tools/testing/selftests/drivers/net/team/options.sh index 44888f32b513..66c0cb896dad 100755 --- a/tools/testing/selftests/drivers/net/team/options.sh +++ b/tools/testing/selftests/drivers/net/team/options.sh @@ -11,10 +11,14 @@ if [[ $# -eq 0 ]]; then exit $? fi -ALL_TESTS=" +export ALL_TESTS=" team_test_options + team_test_enabled_implicit_changes + team_test_rx_enabled_implicit_changes + team_test_tx_enabled_implicit_changes " +# shellcheck disable=SC1091 source "${test_dir}/../../../net/lib.sh" TEAM_PORT="team0" @@ -176,12 +180,105 @@ team_test_options() team_test_option mcast_rejoin_count 0 5 team_test_option mcast_rejoin_interval 0 5 team_test_option enabled true false "${MEMBER_PORT}" + team_test_option rx_enabled true false "${MEMBER_PORT}" + team_test_option tx_enabled true false "${MEMBER_PORT}" team_test_option user_linkup true false "${MEMBER_PORT}" team_test_option user_linkup_enabled true false "${MEMBER_PORT}" team_test_option priority 10 20 "${MEMBER_PORT}" team_test_option queue_id 0 1 "${MEMBER_PORT}" } +team_test_enabled_implicit_changes() +{ + export RET=0 + + attach_port_if_specified "${MEMBER_PORT}" + check_err $? "Couldn't attach ${MEMBER_PORT} to master" + + # Set enabled to true. + set_and_check_get enabled true "--port=${MEMBER_PORT}" + check_err $? "Failed to set 'enabled' to true" + + # Show that both rx enabled and tx enabled are true. + get_and_check_value rx_enabled true "--port=${MEMBER_PORT}" + check_err $? "'Rx_enabled' wasn't implicitly set to true" + get_and_check_value tx_enabled true "--port=${MEMBER_PORT}" + check_err $? "'Tx_enabled' wasn't implicitly set to true" + + # Set enabled to false. + set_and_check_get enabled false "--port=${MEMBER_PORT}" + check_err $? "Failed to set 'enabled' to false" + + # Show that both rx enabled and tx enabled are false. + get_and_check_value rx_enabled false "--port=${MEMBER_PORT}" + check_err $? "'Rx_enabled' wasn't implicitly set to false" + get_and_check_value tx_enabled false "--port=${MEMBER_PORT}" + check_err $? "'Tx_enabled' wasn't implicitly set to false" + + log_test "'Enabled' implicit changes" +} + +team_test_rx_enabled_implicit_changes() +{ + export RET=0 + + attach_port_if_specified "${MEMBER_PORT}" + check_err $? "Couldn't attach ${MEMBER_PORT} to master" + + # Set enabled to true. + set_and_check_get enabled true "--port=${MEMBER_PORT}" + check_err $? "Failed to set 'enabled' to true" + + # Set rx_enabled to false. + set_and_check_get rx_enabled false "--port=${MEMBER_PORT}" + check_err $? "Failed to set 'rx_enabled' to false" + + # Show that enabled is false. + get_and_check_value enabled false "--port=${MEMBER_PORT}" + check_err $? "'enabled' wasn't implicitly set to false" + + # Set rx_enabled to true. + set_and_check_get rx_enabled true "--port=${MEMBER_PORT}" + check_err $? "Failed to set 'rx_enabled' to true" + + # Show that enabled is true. + get_and_check_value enabled true "--port=${MEMBER_PORT}" + check_err $? "'enabled' wasn't implicitly set to true" + + log_test "'Rx_enabled' implicit changes" +} + +team_test_tx_enabled_implicit_changes() +{ + export RET=0 + + attach_port_if_specified "${MEMBER_PORT}" + check_err $? "Couldn't attach ${MEMBER_PORT} to master" + + # Set enabled to true. + set_and_check_get enabled true "--port=${MEMBER_PORT}" + check_err $? "Failed to set 'enabled' to true" + + # Set tx_enabled to false. + set_and_check_get tx_enabled false "--port=${MEMBER_PORT}" + check_err $? "Failed to set 'tx_enabled' to false" + + # Show that enabled is false. + get_and_check_value enabled false "--port=${MEMBER_PORT}" + check_err $? "'enabled' wasn't implicitly set to false" + + # Set tx_enabled to true. + set_and_check_get tx_enabled true "--port=${MEMBER_PORT}" + check_err $? "Failed to set 'tx_enabled' to true" + + # Show that enabled is true. + get_and_check_value enabled true "--port=${MEMBER_PORT}" + check_err $? "'enabled' wasn't implicitly set to true" + + log_test "'Tx_enabled' implicit changes" +} + + require_command teamnl setup tests_run From 15bf35a660eb82a49f8397fc3d3acada8dae13db Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 16 Mar 2026 14:34:13 -0400 Subject: [PATCH 1493/1561] Bluetooth: L2CAP: Fix printing wrong information if SDU length exceeds MTU The code was printing skb->len and sdu_len in the places where it should be sdu_len and chan->imtu respectively to match the if conditions. Link: https://lore.kernel.org/linux-bluetooth/20260315132013.75ab40c5@kernel.org/T/#m1418f9c82eeff8510c1beaa21cf53af20db96c06 Fixes: e1d9a6688986 ("Bluetooth: LE L2CAP: Disconnect if received packet's SDU exceeds IMTU") Signed-off-by: Luiz Augusto von Dentz Reviewed-by: Paul Menzel --- net/bluetooth/l2cap_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 9916ae6abef0..5a4cd530ef33 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6740,7 +6740,7 @@ static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) if (sdu_len > chan->imtu) { BT_ERR("Too big LE L2CAP SDU length: len %u > %u", - skb->len, sdu_len); + sdu_len, chan->imtu); l2cap_send_disconn_req(chan, ECONNRESET); err = -EMSGSIZE; goto failed; From 552aac414f9a2e663dfe2810999f28988720f2a4 Mon Sep 17 00:00:00 2001 From: Lukas Kraft Date: Fri, 13 Mar 2026 17:39:12 +0100 Subject: [PATCH 1494/1561] bluetooth: btusb: Fix whitespace in btusb.c Replace single space with tab and insert blank line after declaration, according to checkpatch Signed-off-by: Lukas Kraft Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 71f4af23b556..07fc169e03b6 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -803,7 +803,7 @@ static const struct usb_device_id quirks_table[] = { { USB_DEVICE(0x2357, 0x0604), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0b05, 0x190e), .driver_info = BTUSB_REALTEK | - BTUSB_WIDEBAND_SPEECH }, + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x2550, 0x8761), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0x8771), .driver_info = BTUSB_REALTEK | @@ -2485,6 +2485,7 @@ static int btusb_setup_csr(struct hci_dev *hdev) HCI_INIT_TIMEOUT); if (IS_ERR(skb)) { int err = PTR_ERR(skb); + bt_dev_err(hdev, "CSR: Local version failed (%d)", err); return err; } From 7b75867803a8712bdf7683c31d71d3d5e28ce821 Mon Sep 17 00:00:00 2001 From: Shuai Zhang Date: Tue, 24 Mar 2026 10:30:16 +0800 Subject: [PATCH 1495/1561] Bluetooth: hci_qca: disable power control for WCN7850 when bt_en is not defined On platforms using an M.2 slot with both UART and USB support, bt_en is pulled high by hardware. In this case, software-based power control should be disabled. The current platforms are Lemans-EVK and Monaco-EVK. Add QCA_WCN7850 to the existing condition so that power_ctrl_enabled is cleared when bt_en is not software-controlled (or absent), aligning its behavior with WCN6750 and WCN6855 Signed-off-by: Shuai Zhang Reviewed-by: Bartosz Golaszewski Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index c17a462aef55..4512ff7cd0c0 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2484,7 +2484,8 @@ static int qca_serdev_probe(struct serdev_device *serdev) if (!qcadev->bt_en && (data->soc_type == QCA_WCN6750 || - data->soc_type == QCA_WCN6855)) + data->soc_type == QCA_WCN6855 || + data->soc_type == QCA_WCN7850)) power_ctrl_enabled = false; qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl", From d288f4db0909c22342eb50cd1632b4d850517281 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Wed, 25 Mar 2026 21:07:45 +0200 Subject: [PATCH 1496/1561] Bluetooth: hci_sync: make hci_cmd_sync_run_once return -EEXIST if exists hci_cmd_sync_run_once() needs to indicate whether a queue item was added, so caller can know if callbacks are called, so it can avoid leaking resources. Change the function to return -EEXIST if queue item already exists. Modify all callsites vs. the changes. The only callsite is hci_abort_conn(). Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 4 +++- net/bluetooth/hci_sync.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 11d3ad8d2551..3a0592599086 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -3083,6 +3083,7 @@ static int abort_conn_sync(struct hci_dev *hdev, void *data) int hci_abort_conn(struct hci_conn *conn, u8 reason) { struct hci_dev *hdev = conn->hdev; + int err; /* If abort_reason has already been set it means the connection is * already being aborted so don't attempt to overwrite it. @@ -3119,7 +3120,8 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) * as a result to MGMT_OP_DISCONNECT/MGMT_OP_UNPAIR which does * already queue its callback on cmd_sync_work. */ - return hci_cmd_sync_run_once(hdev, abort_conn_sync, conn, NULL); + err = hci_cmd_sync_run_once(hdev, abort_conn_sync, conn, NULL); + return (err == -EEXIST) ? 0 : err; } void hci_setup_tx_timestamp(struct sk_buff *skb, size_t key_offset, diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 919ec275dd23..fd3aacdea512 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -825,7 +825,7 @@ int hci_cmd_sync_run_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy) { if (hci_cmd_sync_lookup_entry(hdev, func, data, destroy)) - return 0; + return -EEXIST; return hci_cmd_sync_run(hdev, func, data, destroy); } From 68d39ea5e0adc9ecaea1ce8abd842ec972eb8718 Mon Sep 17 00:00:00 2001 From: Jonathan Rissanen Date: Fri, 27 Mar 2026 11:47:20 +0100 Subject: [PATCH 1497/1561] Bluetooth: hci_ldisc: Clear HCI_UART_PROTO_INIT on error When hci_register_dev() fails in hci_uart_register_dev() HCI_UART_PROTO_INIT is not cleared before calling hu->proto->close(hu) and setting hu->hdev to NULL. This means incoming UART data will reach the protocol-specific recv handler in hci_uart_tty_receive() after resources are freed. Clear HCI_UART_PROTO_INIT with a write lock before calling hu->proto->close() and setting hu->hdev to NULL. The write lock ensures all active readers have completed and no new reader can enter the protocol recv path before resources are freed. This allows the protocol-specific recv functions to remove the "HCI_UART_REGISTERED" guard without risking a null pointer dereference if hci_register_dev() fails. Fixes: 5df5dafc171b ("Bluetooth: hci_uart: Fix another race during initialization") Signed-off-by: Jonathan Rissanen Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_ldisc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index 71c1997a0f73..275ea865bc29 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -692,6 +692,9 @@ static int hci_uart_register_dev(struct hci_uart *hu) if (hci_register_dev(hdev) < 0) { BT_ERR("Can't register HCI device"); + percpu_down_write(&hu->proto_lock); + clear_bit(HCI_UART_PROTO_INIT, &hu->flags); + percpu_up_write(&hu->proto_lock); hu->proto->close(hu); hu->hdev = NULL; hci_free_dev(hdev); From 5c31aaa05624b54dc18c9e313bcee5a88c025593 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 30 Mar 2026 11:41:33 +0200 Subject: [PATCH 1498/1561] Bluetooth: btusb: refactor endpoint lookup Use the common USB helper for looking up bulk and interrupt endpoints instead of open coding. Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 51 ++++++--------------------------------- 1 file changed, 8 insertions(+), 43 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 07fc169e03b6..4fb0e9dceca0 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -3689,31 +3689,14 @@ static inline int __set_diag_interface(struct hci_dev *hdev) { struct btusb_data *data = hci_get_drvdata(hdev); struct usb_interface *intf = data->diag; - int i; + int ret; if (!data->diag) return -ENODEV; - data->diag_tx_ep = NULL; - data->diag_rx_ep = NULL; - - for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; i++) { - struct usb_endpoint_descriptor *ep_desc; - - ep_desc = &intf->cur_altsetting->endpoint[i].desc; - - if (!data->diag_tx_ep && usb_endpoint_is_bulk_out(ep_desc)) { - data->diag_tx_ep = ep_desc; - continue; - } - - if (!data->diag_rx_ep && usb_endpoint_is_bulk_in(ep_desc)) { - data->diag_rx_ep = ep_desc; - continue; - } - } - - if (!data->diag_tx_ep || !data->diag_rx_ep) { + ret = usb_find_common_endpoints(intf->cur_altsetting, &data->diag_rx_ep, + &data->diag_tx_ep, NULL, NULL); + if (ret) { bt_dev_err(hdev, "invalid diagnostic descriptors"); return -ENODEV; } @@ -4039,12 +4022,11 @@ static struct hci_drv btusb_hci_drv = { static int btusb_probe(struct usb_interface *intf, const struct usb_device_id *id) { - struct usb_endpoint_descriptor *ep_desc; struct gpio_desc *reset_gpio; struct btusb_data *data; struct hci_dev *hdev; unsigned ifnum_base; - int i, err, priv_size; + int err, priv_size; BT_DBG("intf %p id %p", intf, id); @@ -4081,26 +4063,9 @@ static int btusb_probe(struct usb_interface *intf, if (!data) return -ENOMEM; - for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; i++) { - ep_desc = &intf->cur_altsetting->endpoint[i].desc; - - if (!data->intr_ep && usb_endpoint_is_int_in(ep_desc)) { - data->intr_ep = ep_desc; - continue; - } - - if (!data->bulk_tx_ep && usb_endpoint_is_bulk_out(ep_desc)) { - data->bulk_tx_ep = ep_desc; - continue; - } - - if (!data->bulk_rx_ep && usb_endpoint_is_bulk_in(ep_desc)) { - data->bulk_rx_ep = ep_desc; - continue; - } - } - - if (!data->intr_ep || !data->bulk_tx_ep || !data->bulk_rx_ep) { + err = usb_find_common_endpoints(intf->cur_altsetting, &data->bulk_rx_ep, + &data->bulk_tx_ep, &data->intr_ep, NULL); + if (err) { kfree(data); return -ENODEV; } From 120941654f187674b3aac4d546c2a915965b3937 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 30 Mar 2026 11:41:34 +0200 Subject: [PATCH 1499/1561] Bluetooth: btmtk: refactor endpoint lookup Use the common USB helper for looking up bulk and interrupt endpoints instead of open coding. Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtk.c | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index 55516b4602db..c3e5975b9f37 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -995,7 +995,7 @@ static int __set_mtk_intr_interface(struct hci_dev *hdev) { struct btmtk_data *btmtk_data = hci_get_priv(hdev); struct usb_interface *intf = btmtk_data->isopkt_intf; - int i, err; + int err; if (!btmtk_data->isopkt_intf) return -ENODEV; @@ -1006,29 +1006,10 @@ static int __set_mtk_intr_interface(struct hci_dev *hdev) return err; } - btmtk_data->isopkt_tx_ep = NULL; - btmtk_data->isopkt_rx_ep = NULL; - - for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; i++) { - struct usb_endpoint_descriptor *ep_desc; - - ep_desc = &intf->cur_altsetting->endpoint[i].desc; - - if (!btmtk_data->isopkt_tx_ep && - usb_endpoint_is_int_out(ep_desc)) { - btmtk_data->isopkt_tx_ep = ep_desc; - continue; - } - - if (!btmtk_data->isopkt_rx_ep && - usb_endpoint_is_int_in(ep_desc)) { - btmtk_data->isopkt_rx_ep = ep_desc; - continue; - } - } - - if (!btmtk_data->isopkt_tx_ep || - !btmtk_data->isopkt_rx_ep) { + err = usb_find_common_endpoints(intf->cur_altsetting, NULL, NULL, + &btmtk_data->isopkt_rx_ep, + &btmtk_data->isopkt_tx_ep); + if (err) { bt_dev_err(hdev, "invalid interrupt descriptors"); return -ENODEV; } From 5c7209a341ff2ac338b2b0375c34a307b37c9ac2 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sun, 29 Mar 2026 16:42:59 +0300 Subject: [PATCH 1500/1561] Bluetooth: fix locking in hci_conn_request_evt() with HCI_PROTO_DEFER When protocol sets HCI_PROTO_DEFER, hci_conn_request_evt() calls hci_connect_cfm(conn) without hdev->lock. Generally hci_connect_cfm() assumes it is held, and if conn is deleted concurrently -> UAF. Only SCO and ISO set HCI_PROTO_DEFER and only for defer setup listen, and HCI_EV_CONN_REQUEST is not generated for ISO. In the non-deferred listening socket code paths, hci_connect_cfm(conn) is called with hdev->lock held. Fix by holding the lock. Fixes: 70c464256310 ("Bluetooth: Refactor connection request handling") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 3ebc5e6d45d9..83248085dd4f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3340,8 +3340,6 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data, memcpy(conn->dev_class, ev->dev_class, 3); - hci_dev_unlock(hdev); - if (ev->link_type == ACL_LINK || (!(flags & HCI_PROTO_DEFER) && !lmp_esco_capable(hdev))) { struct hci_cp_accept_conn_req cp; @@ -3375,7 +3373,6 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data, hci_connect_cfm(conn, 0); } - return; unlock: hci_dev_unlock(hdev); } From 28b7c5a6db74e9305c6cbcbe52f259ff1cf85158 Mon Sep 17 00:00:00 2001 From: Javier Tia Date: Mon, 30 Mar 2026 14:39:23 -0600 Subject: [PATCH 1501/1561] Bluetooth: btmtk: Add MT6639 (MT7927) Bluetooth support The MediaTek MT7927 (Filogic 380) combo WiFi 7 + BT 5.4 module uses hardware variant 0x6639 for its Bluetooth subsystem. Without this patch, the chip fails with "Unsupported hardware variant (00006639)" or hangs during firmware download. Three changes are needed to support MT6639: 1. CHIPID workaround: On some boards the BT USB MMIO register reads 0x0000 for dev_id, causing the driver to skip the 0x6639 init path. Force dev_id to 0x6639 only when the USB VID/PID matches a known MT6639 device, avoiding misdetection if a future chip also reads zero. This follows the WiFi-side pattern that uses PCI device IDs to scope the same workaround. 2. Firmware naming: MT6639 uses firmware version prefix "2_1" instead of "1_1" used by MT7925 and other variants. The firmware path is mediatek/mt7927/BT_RAM_CODE_MT6639_2_1_hdr.bin, using the mt7927 directory to match the WiFi firmware convention. The filename will likely change to use MT7927 once MediaTek submits a dedicated Linux firmware binary. 3. Section filtering: The MT6639 firmware binary contains 9 sections, but only sections with (dlmodecrctype & 0xff) == 0x01 are Bluetooth-related. Sending the remaining WiFi/other sections causes an irreversible BT subsystem hang requiring a full power cycle. This matches the Windows driver behavior observed via USB captures. Also add 0x6639 to the reset register (CONNV3) and firmware setup switch cases alongside the existing 0x7925 handling. Link: https://bugzilla.kernel.org/show_bug.cgi?id=221096 Link: https://github.com/openwrt/mt76/issues/927 Reported-by: Ryan Gilbert Signed-off-by: Javier Tia Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtk.c | 60 ++++++++++++++++++++++++++++++++--- drivers/bluetooth/btmtk.h | 7 ++-- drivers/bluetooth/btmtksdio.c | 2 +- 3 files changed, 61 insertions(+), 8 deletions(-) diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index c3e5975b9f37..cef7a6405e1d 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -25,6 +25,22 @@ /* It is for mt79xx iso data transmission setting */ #define MTK_ISO_THRESHOLD 264 +/* Known MT6639 (MT7927) Bluetooth USB devices. + * Used to scope the zero-CHIPID workaround to real MT6639 hardware, + * since some boards return 0x0000 from the MMIO chip ID register. + */ +static const struct { + u16 vendor; + u16 product; +} btmtk_mt6639_devs[] = { + { 0x0489, 0xe13a }, /* ASUS ROG Crosshair X870E Hero */ + { 0x0489, 0xe0fa }, /* Lenovo Legion Pro 7 16ARX9 */ + { 0x0489, 0xe10f }, /* Gigabyte Z790 AORUS MASTER X */ + { 0x0489, 0xe110 }, /* MSI X870E Ace Max */ + { 0x0489, 0xe116 }, /* TP-Link Archer TBE550E */ + { 0x13d3, 0x3588 }, /* ASUS ROG STRIX X870E-E */ +}; + struct btmtk_patch_header { u8 datetime[16]; u8 platform[4]; @@ -112,7 +128,11 @@ static void btmtk_coredump_notify(struct hci_dev *hdev, int state) void btmtk_fw_get_filename(char *buf, size_t size, u32 dev_id, u32 fw_ver, u32 fw_flavor) { - if (dev_id == 0x7925) + if (dev_id == 0x6639) + snprintf(buf, size, + "mediatek/mt7927/BT_RAM_CODE_MT%04x_2_%x_hdr.bin", + dev_id & 0xffff, (fw_ver & 0xff) + 1); + else if (dev_id == 0x7925) snprintf(buf, size, "mediatek/mt%04x/BT_RAM_CODE_MT%04x_1_%x_hdr.bin", dev_id & 0xffff, dev_id & 0xffff, (fw_ver & 0xff) + 1); @@ -128,7 +148,8 @@ void btmtk_fw_get_filename(char *buf, size_t size, u32 dev_id, u32 fw_ver, EXPORT_SYMBOL_GPL(btmtk_fw_get_filename); int btmtk_setup_firmware_79xx(struct hci_dev *hdev, const char *fwname, - wmt_cmd_sync_func_t wmt_cmd_sync) + wmt_cmd_sync_func_t wmt_cmd_sync, + u32 dev_id) { struct btmtk_hci_wmt_params wmt_params; struct btmtk_patch_header *hdr; @@ -166,6 +187,14 @@ int btmtk_setup_firmware_79xx(struct hci_dev *hdev, const char *fwname, section_offset = le32_to_cpu(sectionmap->secoffset); dl_size = le32_to_cpu(sectionmap->bin_info_spec.dlsize); + /* MT6639: only download sections where dlmode byte0 == 0x01, + * matching the Windows driver behavior which skips WiFi/other + * sections that would cause the chip to hang. + */ + if (dev_id == 0x6639 && dl_size > 0 && + (le32_to_cpu(sectionmap->bin_info_spec.dlmodecrctype) & 0xff) != 0x01) + continue; + if (dl_size > 0) { retry = 20; while (retry > 0) { @@ -852,7 +881,7 @@ int btmtk_usb_subsys_reset(struct hci_dev *hdev, u32 dev_id) if (err < 0) return err; msleep(100); - } else if (dev_id == 0x7925) { + } else if (dev_id == 0x7925 || dev_id == 0x6639) { err = btmtk_usb_uhw_reg_read(hdev, MTK_BT_RESET_REG_CONNV3, &val); if (err < 0) return err; @@ -938,7 +967,7 @@ int btmtk_usb_subsys_reset(struct hci_dev *hdev, u32 dev_id) } err = btmtk_usb_id_get(hdev, 0x70010200, &val); - if (err < 0 || !val) + if (err < 0 || (!val && dev_id != 0x6639)) bt_dev_err(hdev, "Can't get device id, subsys reset fail."); return err; @@ -1303,6 +1332,24 @@ int btmtk_usb_setup(struct hci_dev *hdev) fw_flavor = (fw_flavor & 0x00000080) >> 7; } + if (!dev_id) { + u16 vid = le16_to_cpu(btmtk_data->udev->descriptor.idVendor); + u16 pid = le16_to_cpu(btmtk_data->udev->descriptor.idProduct); + int i; + + for (i = 0; i < ARRAY_SIZE(btmtk_mt6639_devs); i++) { + if (vid == btmtk_mt6639_devs[i].vendor && + pid == btmtk_mt6639_devs[i].product) { + dev_id = 0x6639; + break; + } + } + + if (dev_id) + bt_dev_info(hdev, "MT6639: CHIPID=0x0000 with VID=%04x PID=%04x, using 0x6639", + vid, pid); + } + btmtk_data->dev_id = dev_id; err = btmtk_register_coredump(hdev, btmtk_data->drv_name, fw_version); @@ -1320,11 +1367,13 @@ int btmtk_usb_setup(struct hci_dev *hdev) case 0x7925: case 0x7961: case 0x7902: + case 0x6639: btmtk_fw_get_filename(fw_bin_name, sizeof(fw_bin_name), dev_id, fw_version, fw_flavor); err = btmtk_setup_firmware_79xx(hdev, fw_bin_name, - btmtk_usb_hci_wmt_sync); + btmtk_usb_hci_wmt_sync, + dev_id); if (err < 0) { /* retry once if setup firmware error */ if (!test_and_set_bit(BTMTK_FIRMWARE_DL_RETRY, &btmtk_data->flags)) @@ -1497,3 +1546,4 @@ MODULE_FIRMWARE(FIRMWARE_MT7668); MODULE_FIRMWARE(FIRMWARE_MT7922); MODULE_FIRMWARE(FIRMWARE_MT7961); MODULE_FIRMWARE(FIRMWARE_MT7925); +MODULE_FIRMWARE(FIRMWARE_MT7927); diff --git a/drivers/bluetooth/btmtk.h b/drivers/bluetooth/btmtk.h index adaf385626ee..c564aedc0ce0 100644 --- a/drivers/bluetooth/btmtk.h +++ b/drivers/bluetooth/btmtk.h @@ -8,6 +8,7 @@ #define FIRMWARE_MT7902 "mediatek/BT_RAM_CODE_MT7902_1_1_hdr.bin" #define FIRMWARE_MT7961 "mediatek/BT_RAM_CODE_MT7961_1_2_hdr.bin" #define FIRMWARE_MT7925 "mediatek/mt7925/BT_RAM_CODE_MT7925_1_1_hdr.bin" +#define FIRMWARE_MT7927 "mediatek/mt7927/BT_RAM_CODE_MT6639_2_1_hdr.bin" #define HCI_EV_WMT 0xe4 #define HCI_WMT_MAX_EVENT_SIZE 64 @@ -189,7 +190,8 @@ typedef int (*wmt_cmd_sync_func_t)(struct hci_dev *, int btmtk_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr); int btmtk_setup_firmware_79xx(struct hci_dev *hdev, const char *fwname, - wmt_cmd_sync_func_t wmt_cmd_sync); + wmt_cmd_sync_func_t wmt_cmd_sync, + u32 dev_id); int btmtk_setup_firmware(struct hci_dev *hdev, const char *fwname, wmt_cmd_sync_func_t wmt_cmd_sync); @@ -228,7 +230,8 @@ static inline int btmtk_set_bdaddr(struct hci_dev *hdev, static inline int btmtk_setup_firmware_79xx(struct hci_dev *hdev, const char *fwname, - wmt_cmd_sync_func_t wmt_cmd_sync) + wmt_cmd_sync_func_t wmt_cmd_sync, + u32 dev_id) { return -EOPNOTSUPP; } diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index 042064464d34..5b0fab7b89b5 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -883,7 +883,7 @@ static int mt79xx_setup(struct hci_dev *hdev, const char *fwname) u8 param = 0x1; int err; - err = btmtk_setup_firmware_79xx(hdev, fwname, mtk_hci_wmt_sync); + err = btmtk_setup_firmware_79xx(hdev, fwname, mtk_hci_wmt_sync, 0); if (err < 0) { bt_dev_err(hdev, "Failed to setup 79xx firmware (%d)", err); return err; From e4f6bc7f8222a9e40283d3fc6c4c4c656d79a48e Mon Sep 17 00:00:00 2001 From: Javier Tia Date: Mon, 30 Mar 2026 14:39:24 -0600 Subject: [PATCH 1502/1561] Bluetooth: btmtk: fix ISO interface setup for single alt setting Some MT6639 Bluetooth USB interfaces (e.g. IMC Networks 13d3:3588 on ASUS ROG STRIX X870E-E and ProArt X870E-Creator boards) expose only a single alternate setting (alt 0) on the ISO interface. The driver unconditionally requests alt setting 1, which fails with EINVAL on these devices, causing a ~20 second initialization delay and no LE audio support. Check the number of available alternate settings before selecting one. If only alt 0 exists, use it; otherwise request alt 1 as before. Closes: https://github.com/jetm/mediatek-mt7927-dkms/pull/39 Signed-off-by: Javier Tia Reported-by: Ryan Gilbert Tested-by: Ryan Gilbert Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index cef7a6405e1d..099188bf772e 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -1029,7 +1029,8 @@ static int __set_mtk_intr_interface(struct hci_dev *hdev) if (!btmtk_data->isopkt_intf) return -ENODEV; - err = usb_set_interface(btmtk_data->udev, MTK_ISO_IFNUM, 1); + err = usb_set_interface(btmtk_data->udev, MTK_ISO_IFNUM, + (intf->num_altsetting > 1) ? 1 : 0); if (err < 0) { bt_dev_err(hdev, "setting interface failed (%d)", -err); return err; From bfe11e17b3005e7458b46530e9d27e1d36924541 Mon Sep 17 00:00:00 2001 From: Javier Tia Date: Mon, 30 Mar 2026 14:39:25 -0600 Subject: [PATCH 1503/1561] Bluetooth: btusb: Add MT7927 ID for ASUS ROG Crosshair X870E Hero Add USB device ID 0489:e13a (Foxconn/Hon Hai) for the MediaTek MT7927 (Filogic 380) Bluetooth interface found on the ASUS ROG Crosshair X870E Hero WiFi motherboard. The information in /sys/kernel/debug/usb/devices about the Bluetooth device is listed as the below. T: Bus=01 Lev=01 Prnt=01 Port=05 Cnt=04 Dev#= 5 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0489 ProdID=e13a Rev= 1.00 S: Manufacturer=MediaTek Inc. S: Product=Wireless_Device S: SerialNumber=000000000 C:* #Ifs= 3 Cfg#= 1 Atr=e0 MxPwr=100mA A: FirstIf#= 0 IfCount= 3 Cls=e0(wlcon) Sub=01 Prot=01 I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=125us E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb ... I:* If#= 2 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=8a(I) Atr=03(Int.) MxPS= 512 Ivl=125us E: Ad=0a(O) Atr=03(Int.) MxPS= 512 Ivl=125us Link: https://bugzilla.kernel.org/show_bug.cgi?id=221096 Link: https://github.com/openwrt/mt76/issues/927 Signed-off-by: Javier Tia Tested-by: Jose Tiburcio Ribeiro Netto Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 4fb0e9dceca0..f88e2324421f 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -751,6 +751,8 @@ static const struct usb_device_id quirks_table[] = { BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0489, 0xe139), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x0489, 0xe13a), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0489, 0xe14e), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0489, 0xe14f), .driver_info = BTUSB_MEDIATEK | From 6f7d4428cc921d687d8778c2edcc18cd0ee0bd17 Mon Sep 17 00:00:00 2001 From: Javier Tia Date: Mon, 30 Mar 2026 14:39:26 -0600 Subject: [PATCH 1504/1561] Bluetooth: btusb: Add MT7927 ID for Lenovo Legion Pro 7 16ARX9 Add USB device ID 0489:e0fa (Foxconn/Hon Hai) for the MediaTek MT7927 (Filogic 380) Bluetooth interface found on the Lenovo Legion Pro 7 16ARX9 laptop. The information in /sys/kernel/debug/usb/devices about the Bluetooth device is listed as the below. T: Bus=01 Lev=01 Prnt=01 Port=05 Cnt=04 Dev#= 5 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0489 ProdID=e0fa Rev= 1.00 S: Manufacturer=MediaTek Inc. S: Product=Wireless_Device S: SerialNumber=000000000 C:* #Ifs= 3 Cfg#= 1 Atr=e0 MxPwr=100mA A: FirstIf#= 0 IfCount= 3 Cls=e0(wlcon) Sub=01 Prot=01 I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=125us E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb ... I:* If#= 2 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=8a(I) Atr=03(Int.) MxPS= 512 Ivl=125us E: Ad=0a(O) Atr=03(Int.) MxPS= 512 Ivl=125us Link: https://bugzilla.kernel.org/show_bug.cgi?id=221096 Link: https://github.com/openwrt/mt76/issues/927 Signed-off-by: Javier Tia Tested-by: Llewellyn Curran Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index f88e2324421f..d62b902d01d9 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -753,6 +753,8 @@ static const struct usb_device_id quirks_table[] = { BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0489, 0xe13a), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x0489, 0xe0fa), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0489, 0xe14e), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0489, 0xe14f), .driver_info = BTUSB_MEDIATEK | From 0e2f6aca7c3b30b72a3e6dd8b0af5c63b4ddf7a5 Mon Sep 17 00:00:00 2001 From: Javier Tia Date: Mon, 30 Mar 2026 14:39:27 -0600 Subject: [PATCH 1505/1561] Bluetooth: btusb: Add MT7927 ID for Gigabyte Z790 AORUS MASTER X Add USB device ID 0489:e10f (Foxconn/Hon Hai) for the MediaTek MT7927 (Filogic 380) Bluetooth interface found on the Gigabyte Z790 AORUS MASTER X motherboard. The information in /sys/kernel/debug/usb/devices about the Bluetooth device is listed as the below. T: Bus=01 Lev=01 Prnt=01 Port=05 Cnt=04 Dev#= 5 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0489 ProdID=e10f Rev= 1.00 S: Manufacturer=MediaTek Inc. S: Product=Wireless_Device S: SerialNumber=000000000 C:* #Ifs= 3 Cfg#= 1 Atr=e0 MxPwr=100mA A: FirstIf#= 0 IfCount= 3 Cls=e0(wlcon) Sub=01 Prot=01 I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=125us E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb ... I:* If#= 2 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=8a(I) Atr=03(Int.) MxPS= 512 Ivl=125us E: Ad=0a(O) Atr=03(Int.) MxPS= 512 Ivl=125us Link: https://bugzilla.kernel.org/show_bug.cgi?id=221096 Link: https://github.com/openwrt/mt76/issues/927 Signed-off-by: Javier Tia Tested-by: Chapuis Dario Tested-by: Evgeny Kapusta <3193631@gmail.com> Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index d62b902d01d9..12c93ee5e6d8 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -755,6 +755,8 @@ static const struct usb_device_id quirks_table[] = { BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0489, 0xe0fa), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x0489, 0xe10f), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0489, 0xe14e), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0489, 0xe14f), .driver_info = BTUSB_MEDIATEK | From 71708de05e77165f3030978662f1c7969ff2e874 Mon Sep 17 00:00:00 2001 From: Javier Tia Date: Mon, 30 Mar 2026 14:39:28 -0600 Subject: [PATCH 1506/1561] Bluetooth: btusb: Add MT7927 ID for MSI X870E Ace Max Add USB device ID 0489:e110 (Foxconn/Hon Hai) for the MediaTek MT7927 (Filogic 380) Bluetooth interface found on the MSI X870E Ace Max motherboard. The information in /sys/kernel/debug/usb/devices about the Bluetooth device is listed as the below. T: Bus=01 Lev=01 Prnt=01 Port=05 Cnt=04 Dev#= 5 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0489 ProdID=e110 Rev= 1.00 S: Manufacturer=MediaTek Inc. S: Product=Wireless_Device S: SerialNumber=000000000 C:* #Ifs= 3 Cfg#= 1 Atr=e0 MxPwr=100mA A: FirstIf#= 0 IfCount= 3 Cls=e0(wlcon) Sub=01 Prot=01 I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=125us E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb ... I:* If#= 2 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=8a(I) Atr=03(Int.) MxPS= 512 Ivl=125us E: Ad=0a(O) Atr=03(Int.) MxPS= 512 Ivl=125us Link: https://bugzilla.kernel.org/show_bug.cgi?id=221096 Link: https://github.com/openwrt/mt76/issues/927 Signed-off-by: Javier Tia Tested-by: Nitin Gurram Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 12c93ee5e6d8..cbf286c310b6 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -757,6 +757,8 @@ static const struct usb_device_id quirks_table[] = { BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0489, 0xe10f), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x0489, 0xe110), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0489, 0xe14e), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0489, 0xe14f), .driver_info = BTUSB_MEDIATEK | From adcbb348e9ad4cd3ff2ba17c14c8c183a51c59e9 Mon Sep 17 00:00:00 2001 From: Javier Tia Date: Mon, 30 Mar 2026 14:39:29 -0600 Subject: [PATCH 1507/1561] Bluetooth: btusb: Add MT7927 ID for TP-Link Archer TBE550E Add USB device ID 0489:e116 (Foxconn/Hon Hai) for the MediaTek MT7927 (Filogic 380) Bluetooth interface found on the TP-Link Archer TBE550E PCIe adapter. The information in /sys/kernel/debug/usb/devices about the Bluetooth device is listed as the below. T: Bus=01 Lev=01 Prnt=01 Port=05 Cnt=04 Dev#= 5 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0489 ProdID=e116 Rev= 1.00 S: Manufacturer=MediaTek Inc. S: Product=Wireless_Device S: SerialNumber=000000000 C:* #Ifs= 3 Cfg#= 1 Atr=e0 MxPwr=100mA A: FirstIf#= 0 IfCount= 3 Cls=e0(wlcon) Sub=01 Prot=01 I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=125us E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb ... I:* If#= 2 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=8a(I) Atr=03(Int.) MxPS= 512 Ivl=125us E: Ad=0a(O) Atr=03(Int.) MxPS= 512 Ivl=125us Link: https://bugzilla.kernel.org/show_bug.cgi?id=221096 Link: https://github.com/openwrt/mt76/issues/927 Signed-off-by: Javier Tia Tested-by: Thibaut FRANCOIS Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index cbf286c310b6..e43e744e44cf 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -759,6 +759,8 @@ static const struct usb_device_id quirks_table[] = { BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0489, 0xe110), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x0489, 0xe116), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0489, 0xe14e), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0489, 0xe14f), .driver_info = BTUSB_MEDIATEK | From 751b06a28b3fce0ec7a1d0df357e329dd58ad7c3 Mon Sep 17 00:00:00 2001 From: Javier Tia Date: Mon, 30 Mar 2026 14:39:30 -0600 Subject: [PATCH 1508/1561] Bluetooth: btusb: Add MT7927 ID for ASUS X870E / ProArt X870E-Creator Add USB device ID 13d3:3588 (IMC Networks/Azurewave) for the MediaTek MT7927 (Filogic 380) Bluetooth interface found on the ASUS ROG STRIX X870E-E GAMING WIFI and ASUS ProArt X870E-Creator WiFi motherboards. The information in /sys/kernel/debug/usb/devices about the Bluetooth device is listed as the below. T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 5 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=13d3 ProdID=3588 Rev= 1.00 S: Manufacturer=MediaTek Inc. S: Product=Wireless_Device S: SerialNumber=000000000 C:* #Ifs= 3 Cfg#= 1 Atr=e0 MxPwr=100mA A: FirstIf#= 0 IfCount= 3 Cls=e0(wlcon) Sub=01 Prot=01 I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=125us E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=8a(I) Atr=03(Int.) MxPS= 512 Ivl=125us E: Ad=0a(O) Atr=03(Int.) MxPS= 512 Ivl=125us Link: https://bugzilla.kernel.org/show_bug.cgi?id=221096 Link: https://github.com/openwrt/mt76/issues/927 Signed-off-by: Javier Tia Tested-by: Jose Tiburcio Ribeiro Netto Tested-by: Ivan Lubnin Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index e43e744e44cf..63020c061fc0 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -761,6 +761,8 @@ static const struct usb_device_id quirks_table[] = { BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0489, 0xe116), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x13d3, 0x3588), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0489, 0xe14e), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0489, 0xe14f), .driver_info = BTUSB_MEDIATEK | From 81f971c6abec59240e2bcfc38756bda8172fa788 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Apr 2026 16:11:15 +0200 Subject: [PATCH 1509/1561] Bluetooth: btmtk: hide unused btmtk_mt6639_devs[] array When USB support is disabled, the array is not referenced anywhere, causing a warning: drivers/bluetooth/btmtk.c:35:3: error: 'btmtk_mt6639_devs' defined but not used [-Werror=unused-const-variable=] 35 | } btmtk_mt6639_devs[] = { | ^~~~~~~~~~~~~~~~~ Move it into the #ifdef block. Fixes: 28b7c5a6db74 ("Bluetooth: btmtk: Add MT6639 (MT7927) Bluetooth support") Signed-off-by: Arnd Bergmann Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtk.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index 099188bf772e..6fb6ca274808 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -25,22 +25,6 @@ /* It is for mt79xx iso data transmission setting */ #define MTK_ISO_THRESHOLD 264 -/* Known MT6639 (MT7927) Bluetooth USB devices. - * Used to scope the zero-CHIPID workaround to real MT6639 hardware, - * since some boards return 0x0000 from the MMIO chip ID register. - */ -static const struct { - u16 vendor; - u16 product; -} btmtk_mt6639_devs[] = { - { 0x0489, 0xe13a }, /* ASUS ROG Crosshair X870E Hero */ - { 0x0489, 0xe0fa }, /* Lenovo Legion Pro 7 16ARX9 */ - { 0x0489, 0xe10f }, /* Gigabyte Z790 AORUS MASTER X */ - { 0x0489, 0xe110 }, /* MSI X870E Ace Max */ - { 0x0489, 0xe116 }, /* TP-Link Archer TBE550E */ - { 0x13d3, 0x3588 }, /* ASUS ROG STRIX X870E-E */ -}; - struct btmtk_patch_header { u8 datetime[16]; u8 platform[4]; @@ -483,6 +467,22 @@ int btmtk_process_coredump(struct hci_dev *hdev, struct sk_buff *skb) EXPORT_SYMBOL_GPL(btmtk_process_coredump); #if IS_ENABLED(CONFIG_BT_HCIBTUSB_MTK) +/* Known MT6639 (MT7927) Bluetooth USB devices. + * Used to scope the zero-CHIPID workaround to real MT6639 hardware, + * since some boards return 0x0000 from the MMIO chip ID register. + */ +static const struct { + u16 vendor; + u16 product; +} btmtk_mt6639_devs[] = { + { 0x0489, 0xe13a }, /* ASUS ROG Crosshair X870E Hero */ + { 0x0489, 0xe0fa }, /* Lenovo Legion Pro 7 16ARX9 */ + { 0x0489, 0xe10f }, /* Gigabyte Z790 AORUS MASTER X */ + { 0x0489, 0xe110 }, /* MSI X870E Ace Max */ + { 0x0489, 0xe116 }, /* TP-Link Archer TBE550E */ + { 0x13d3, 0x3588 }, /* ASUS ROG STRIX X870E-E */ +}; + static void btmtk_usb_wmt_recv(struct urb *urb) { struct hci_dev *hdev = urb->context; From 5e17010bfc7e6820a5004f1e06d08db886e3927e Mon Sep 17 00:00:00 2001 From: Kamiyama Chiaki Date: Wed, 1 Apr 2026 02:53:01 +0900 Subject: [PATCH 1510/1561] Bluetooth: btusb: MediaTek MT7922: Add VID 0489 & PID e11d Add VID 0489 & PID e11d for MediaTek MT7922 USB Bluetooth chip. Found in Dynabook GA/ZY (W6GAZY5RCL). The information in /sys/kernel/debug/usb/devices about the Bluetooth device is listed as the below. T: Bus=03 Lev=01 Prnt=01 Port=03 Cnt=02 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0489 ProdID=e11d Rev= 1.00 S: Manufacturer=MediaTek Inc. S: Product=Wireless_Device S: SerialNumber=000000000 Reviewed-by: Paul Menzel Signed-off-by: Kamiyama Chiaki Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 63020c061fc0..7f5fce93d984 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -707,6 +707,8 @@ static const struct usb_device_id quirks_table[] = { BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0489, 0xe102), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x0489, 0xe11d), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0489, 0xe152), .driver_info = BTUSB_MEDIATEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0489, 0xe153), .driver_info = BTUSB_MEDIATEK | From 1c0bc11cd445ba8235ac8ec87d5999b6769ed8b9 Mon Sep 17 00:00:00 2001 From: Stefano Radaelli Date: Thu, 2 Apr 2026 20:47:03 +0200 Subject: [PATCH 1511/1561] Bluetooth: hci_ll: Enable BROKEN_ENHANCED_SETUP_SYNC_CONN for WL183x TI WL183x controllers advertise support for the HCI Enhanced Setup Synchronous Connection command, but SCO setup fails when the enhanced path is used. The only working configuration is to fall back to the legacy HCI Setup Synchronous Connection (0x0028). This matches the scenario described in commit 05abad857277 ("Bluetooth: HCI: Add HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN quirk"). Enable HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN automatically for devices compatible with: - ti,wl1831-st - ti,wl1835-st - ti,wl1837-st Signed-off-by: Stefano Radaelli Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_ll.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c index 91c96ad12342..ab744001dafc 100644 --- a/drivers/bluetooth/hci_ll.c +++ b/drivers/bluetooth/hci_ll.c @@ -68,6 +68,7 @@ struct ll_device { struct gpio_desc *enable_gpio; struct clk *ext_clk; bdaddr_t bdaddr; + bool broken_enhanced_setup; }; struct ll_struct { @@ -658,6 +659,10 @@ static int ll_setup(struct hci_uart *hu) hci_set_quirk(hu->hdev, HCI_QUIRK_INVALID_BDADDR); } + if (lldev->broken_enhanced_setup) + hci_set_quirk(hu->hdev, + HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN); + /* Operational speed if any */ if (hu->oper_speed) speed = hu->oper_speed; @@ -712,6 +717,11 @@ static int hci_ti_probe(struct serdev_device *serdev) of_property_read_u32(serdev->dev.of_node, "max-speed", &max_speed); hci_uart_set_speeds(hu, 115200, max_speed); + if (of_device_is_compatible(serdev->dev.of_node, "ti,wl1831-st") || + of_device_is_compatible(serdev->dev.of_node, "ti,wl1835-st") || + of_device_is_compatible(serdev->dev.of_node, "ti,wl1837-st")) + lldev->broken_enhanced_setup = true; + /* optional BD address from nvram */ bdaddr_cell = nvmem_cell_get(&serdev->dev, "bd-address"); if (IS_ERR(bdaddr_cell)) { From 42776497cdbc9a665b384a6dcb85f0d4bd927eab Mon Sep 17 00:00:00 2001 From: Dudu Lu Date: Sun, 5 Apr 2026 23:47:41 +0800 Subject: [PATCH 1512/1561] Bluetooth: l2cap: Add missing chan lock in l2cap_ecred_reconf_rsp l2cap_ecred_reconf_rsp() calls l2cap_chan_del() without holding l2cap_chan_lock(). Every other l2cap_chan_del() caller in the file acquires the lock first. A remote BLE device can send a crafted L2CAP ECRED reconfiguration response to corrupt the channel list while another thread is iterating it. Add l2cap_chan_hold() and l2cap_chan_lock() before l2cap_chan_del(), and l2cap_chan_unlock() and l2cap_chan_put() after, matching the pattern used in l2cap_ecred_conn_rsp() and l2cap_conn_del(). Fixes: 15f02b910562 ("Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode") Signed-off-by: Dudu Lu Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 5a4cd530ef33..77dec104a9c3 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5473,7 +5473,13 @@ static inline int l2cap_ecred_reconf_rsp(struct l2cap_conn *conn, if (chan->ident != cmd->ident) continue; + l2cap_chan_hold(chan); + l2cap_chan_lock(chan); + l2cap_chan_del(chan, ECONNRESET); + + l2cap_chan_unlock(chan); + l2cap_chan_put(chan); } return 0; From 9e3d074bb21ddc35374d80bf69c3592cd3d057a2 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Tue, 7 Apr 2026 15:32:40 +0530 Subject: [PATCH 1513/1561] Bluetooth: btintel_pcie: Align shared DMA memory to 128 bytes Align each descriptor/index/context region to 128 bytes before calculating the total DMA pool size. This ensures the memory layout shared with firmware meets the 128-byte alignment requirement. The DMA pool alignment is also set to 128 bytes to match the firmware expectation for all shared structures. Signed-off-by: Kiran K Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 94 ++++++++++++++++++-------------- drivers/bluetooth/btintel_pcie.h | 3 - 2 files changed, 53 insertions(+), 44 deletions(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 05f82bc3f0d7..8fb3ebe98146 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -37,6 +37,8 @@ #define POLL_INTERVAL_US 10 +#define BTINTEL_PCIE_DMA_ALIGN_128B 128 /* 128 byte aligned */ + /* Intel Bluetooth PCIe device id table */ static const struct pci_device_id btintel_pcie_table[] = { /* BlazarI, Wildcat Lake */ @@ -1751,27 +1753,6 @@ static int btintel_pcie_setup_rxq_bufs(struct btintel_pcie_data *data, return 0; } -static void btintel_pcie_setup_ia(struct btintel_pcie_data *data, - dma_addr_t p_addr, void *v_addr, - struct ia *ia) -{ - /* TR Head Index Array */ - ia->tr_hia_p_addr = p_addr; - ia->tr_hia = v_addr; - - /* TR Tail Index Array */ - ia->tr_tia_p_addr = p_addr + sizeof(u16) * BTINTEL_PCIE_NUM_QUEUES; - ia->tr_tia = v_addr + sizeof(u16) * BTINTEL_PCIE_NUM_QUEUES; - - /* CR Head index Array */ - ia->cr_hia_p_addr = p_addr + (sizeof(u16) * BTINTEL_PCIE_NUM_QUEUES * 2); - ia->cr_hia = v_addr + (sizeof(u16) * BTINTEL_PCIE_NUM_QUEUES * 2); - - /* CR Tail Index Array */ - ia->cr_tia_p_addr = p_addr + (sizeof(u16) * BTINTEL_PCIE_NUM_QUEUES * 3); - ia->cr_tia = v_addr + (sizeof(u16) * BTINTEL_PCIE_NUM_QUEUES * 3); -} - static void btintel_pcie_free(struct btintel_pcie_data *data) { btintel_pcie_free_rxq_bufs(data, &data->rxq); @@ -1789,13 +1770,16 @@ static int btintel_pcie_alloc(struct btintel_pcie_data *data) size_t total; dma_addr_t p_addr; void *v_addr; + size_t tfd_size, frbd_size, ctx_size, ci_size, urbd0_size, urbd1_size; /* Allocate the chunk of DMA memory for descriptors, index array, and * context information, instead of allocating individually. * The DMA memory for data buffer is allocated while setting up the * each queue. * - * Total size is sum of the following + * Total size is sum of the following and each of the individual sizes + * are aligned to 128 bytes before adding up. + * * + size of TFD * Number of descriptors in queue * + size of URBD0 * Number of descriptors in queue * + size of FRBD * Number of descriptors in queue @@ -1803,15 +1787,25 @@ static int btintel_pcie_alloc(struct btintel_pcie_data *data) * + size of index * Number of queues(2) * type of index array(4) * + size of context information */ - total = (sizeof(struct tfd) + sizeof(struct urbd0)) * BTINTEL_PCIE_TX_DESCS_COUNT; - total += (sizeof(struct frbd) + sizeof(struct urbd1)) * BTINTEL_PCIE_RX_DESCS_COUNT; + tfd_size = ALIGN(sizeof(struct tfd) * BTINTEL_PCIE_TX_DESCS_COUNT, + BTINTEL_PCIE_DMA_ALIGN_128B); + urbd0_size = ALIGN(sizeof(struct urbd0) * BTINTEL_PCIE_TX_DESCS_COUNT, + BTINTEL_PCIE_DMA_ALIGN_128B); - /* Add the sum of size of index array and size of ci struct */ - total += (sizeof(u16) * BTINTEL_PCIE_NUM_QUEUES * 4) + sizeof(struct ctx_info); + frbd_size = ALIGN(sizeof(struct frbd) * BTINTEL_PCIE_RX_DESCS_COUNT, + BTINTEL_PCIE_DMA_ALIGN_128B); + urbd1_size = ALIGN(sizeof(struct urbd1) * BTINTEL_PCIE_RX_DESCS_COUNT, + BTINTEL_PCIE_DMA_ALIGN_128B); + + ci_size = ALIGN(sizeof(u16) * BTINTEL_PCIE_NUM_QUEUES, + BTINTEL_PCIE_DMA_ALIGN_128B); + + ctx_size = ALIGN(sizeof(struct ctx_info), BTINTEL_PCIE_DMA_ALIGN_128B); + + total = tfd_size + urbd0_size + frbd_size + urbd1_size + ctx_size + ci_size * 4; - /* Allocate DMA Pool */ data->dma_pool = dma_pool_create(KBUILD_MODNAME, &data->pdev->dev, - total, BTINTEL_PCIE_DMA_POOL_ALIGNMENT, 0); + total, BTINTEL_PCIE_DMA_ALIGN_128B, 0); if (!data->dma_pool) { err = -ENOMEM; goto exit_error; @@ -1836,29 +1830,29 @@ static int btintel_pcie_alloc(struct btintel_pcie_data *data) data->txq.tfds_p_addr = p_addr; data->txq.tfds = v_addr; - p_addr += (sizeof(struct tfd) * BTINTEL_PCIE_TX_DESCS_COUNT); - v_addr += (sizeof(struct tfd) * BTINTEL_PCIE_TX_DESCS_COUNT); + p_addr += tfd_size; + v_addr += tfd_size; /* Setup urbd0 */ data->txq.urbd0s_p_addr = p_addr; data->txq.urbd0s = v_addr; - p_addr += (sizeof(struct urbd0) * BTINTEL_PCIE_TX_DESCS_COUNT); - v_addr += (sizeof(struct urbd0) * BTINTEL_PCIE_TX_DESCS_COUNT); + p_addr += urbd0_size; + v_addr += urbd0_size; /* Setup FRBD*/ data->rxq.frbds_p_addr = p_addr; data->rxq.frbds = v_addr; - p_addr += (sizeof(struct frbd) * BTINTEL_PCIE_RX_DESCS_COUNT); - v_addr += (sizeof(struct frbd) * BTINTEL_PCIE_RX_DESCS_COUNT); + p_addr += frbd_size; + v_addr += frbd_size; /* Setup urbd1 */ data->rxq.urbd1s_p_addr = p_addr; data->rxq.urbd1s = v_addr; - p_addr += (sizeof(struct urbd1) * BTINTEL_PCIE_RX_DESCS_COUNT); - v_addr += (sizeof(struct urbd1) * BTINTEL_PCIE_RX_DESCS_COUNT); + p_addr += urbd1_size; + v_addr += urbd1_size; /* Setup data buffers for txq */ err = btintel_pcie_setup_txq_bufs(data, &data->txq); @@ -1870,8 +1864,29 @@ static int btintel_pcie_alloc(struct btintel_pcie_data *data) if (err) goto exit_error_txq; - /* Setup Index Array */ - btintel_pcie_setup_ia(data, p_addr, v_addr, &data->ia); + /* TR Head Index Array */ + data->ia.tr_hia_p_addr = p_addr; + data->ia.tr_hia = v_addr; + p_addr += ci_size; + v_addr += ci_size; + + /* TR Tail Index Array */ + data->ia.tr_tia_p_addr = p_addr; + data->ia.tr_tia = v_addr; + p_addr += ci_size; + v_addr += ci_size; + + /* CR Head index Array */ + data->ia.cr_hia_p_addr = p_addr; + data->ia.cr_hia = v_addr; + p_addr += ci_size; + v_addr += ci_size; + + /* CR Tail Index Array */ + data->ia.cr_tia_p_addr = p_addr; + data->ia.cr_tia = v_addr; + p_addr += ci_size; + v_addr += ci_size; /* Setup data buffers for dbgc */ err = btintel_pcie_setup_dbgc(data); @@ -1879,9 +1894,6 @@ static int btintel_pcie_alloc(struct btintel_pcie_data *data) goto exit_error_txq; /* Setup Context Information */ - p_addr += sizeof(u16) * BTINTEL_PCIE_NUM_QUEUES * 4; - v_addr += sizeof(u16) * BTINTEL_PCIE_NUM_QUEUES * 4; - data->ci = v_addr; data->ci_p_addr = p_addr; diff --git a/drivers/bluetooth/btintel_pcie.h b/drivers/bluetooth/btintel_pcie.h index e3d941ffef4a..3c7bb708362d 100644 --- a/drivers/bluetooth/btintel_pcie.h +++ b/drivers/bluetooth/btintel_pcie.h @@ -178,9 +178,6 @@ enum { /* The size of DMA buffer for TX and RX in bytes */ #define BTINTEL_PCIE_BUFFER_SIZE 4096 -/* DMA allocation alignment */ -#define BTINTEL_PCIE_DMA_POOL_ALIGNMENT 256 - #define BTINTEL_PCIE_TX_WAIT_TIMEOUT_MS 500 /* Doorbell vector for TFD */ From 4e10a9ebbf081c16517cdd9366ac618bf38d7d0c Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 7 Apr 2026 17:13:45 +0200 Subject: [PATCH 1514/1561] Bluetooth: SCO: check for codecs->num_codecs == 1 before assigning to sco_pi(sk)->codec copy_struct_from_sockptr() fill 'buffer' in sco_sock_setsockopt() with zeros, so there's no real problem. But it actually looks strange to do this, without checking all of codecs->codecs[0] really comes from userspace: sco_pi(sk)->codec = codecs->codecs[0]; As only optlen < sizeof(struct bt_codecs) is checked and codecs->num_codecs is not checked against != 1, but only <= 1, and the space for the additional struct bt_codec is not checked. Note I don't understand bluetooth and I didn't do any runtime tests with this! I just found it when debugging a problem in copy_struct_from_sockptr(). I just added this to check the size is as expected: BUILD_BUG_ON(struct_size(codecs, codecs, 0) != 1); BUILD_BUG_ON(struct_size(codecs, codecs, 1) != 8); And made sure it still compiles using this: make CF=-D__CHECK_ENDIAN__ W=1ce C=1 net/bluetooth/sco.o Fixes: 3e643e4efa1e ("Bluetooth: Improve setsockopt() handling of malformed user input") Cc: Michal Luczaj Cc: Luiz Augusto von Dentz Cc: Luiz Augusto von Dentz Cc: Marcel Holtmann Cc: David Wei Cc: linux-bluetooth@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Stefan Metzmacher Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/sco.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index b84587811ef4..18826d4b9c0b 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -1045,7 +1045,8 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, codecs = (void *)buffer; - if (codecs->num_codecs > 1) { + if (codecs->num_codecs != 1 || + optlen < struct_size(codecs, codecs, codecs->num_codecs)) { hci_dev_put(hdev); err = -EINVAL; break; From a0cff16d0f6e0bcd4173583694b85df42912e644 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 7 Apr 2026 14:16:46 -0600 Subject: [PATCH 1515/1561] Bluetooth: hci.h: Avoid a couple -Wflex-array-member-not-at-end warnings -Wflex-array-member-not-at-end was introduced in GCC-14, and we are getting ready to enable it, globally. struct hci_std_codecs and struct hci_std_codecs_v2 are flexible structures, this is structures that contain a flexible-array member (__u8 codec[]; and struct hci_std_codec_v2 codec[];, correspondingly.) Since struct hci_rp_read_local_supported_codecs and struct hci_rp_read_local_supported_codecs_v2 are defined by hardware, we create the new struct hci_std_codecs_hdr and struct hci_std_codecs_v2_hdr types, and use them to replace the object types causing trouble in struct hci_rp_read_local_supported_codecs and struct hci_rp_read_local_supported_codecs_v2, namely struct hci_std_codecs std_codecs; and struct hci_std_codecs_v2_hdr std_codecs;. Also, once -fms-extensions is enabled, we can use transparent struct members in both struct hci_std_codecs and struct hci_std_codecs_v2_hdr. Notice that the newly created types does not contain the flex-array member `codec`, which is the object causing the -Wfamnae warnings. After these changes, the size of struct hci_rp_read_local_supported_codecs and struct hci_rp_read_local_supported_codecs_v2, along with their member's offsets remain the same, hence the memory layouts don't change: Before changes: struct hci_rp_read_local_supported_codecs { __u8 status; /* 0 1 */ struct hci_std_codecs std_codecs; /* 1 1 */ struct hci_vnd_codecs vnd_codecs; /* 2 1 */ /* size: 3, cachelines: 1, members: 3 */ /* last cacheline: 3 bytes */ } __attribute__((__packed__)); struct hci_rp_read_local_supported_codecs_v2 { __u8 status; /* 0 1 */ struct hci_std_codecs_v2 std_codecs; /* 1 1 */ struct hci_vnd_codecs_v2 vendor_codecs; /* 2 1 */ /* size: 3, cachelines: 1, members: 3 */ /* last cacheline: 3 bytes */ } __attribute__((__packed__)); After changes: struct hci_rp_read_local_supported_codecs { __u8 status; /* 0 1 */ struct hci_std_codecs_hdr std_codecs; /* 1 1 */ struct hci_vnd_codecs vnd_codecs; /* 2 1 */ /* size: 3, cachelines: 1, members: 3 */ /* last cacheline: 3 bytes */ } __attribute__((__packed__)); struct hci_rp_read_local_supported_codecs_v2 { __u8 status; /* 0 1 */ struct hci_std_codecs_v2_hdr std_codecs; /* 1 1 */ struct hci_vnd_codecs_v2 vendor_codecs; /* 2 1 */ /* size: 3, cachelines: 1, members: 3 */ /* last cacheline: 3 bytes */ } __attribute__((__packed__)); With these changes fix the following warnings: include/net/bluetooth/hci.h:1490:31: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] include/net/bluetooth/hci.h:1525:34: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] Signed-off-by: Gustavo A. R. Silva Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 89ad9470fa71..572b1c620c5d 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1468,8 +1468,12 @@ struct hci_rp_read_data_block_size { } __packed; #define HCI_OP_READ_LOCAL_CODECS 0x100b -struct hci_std_codecs { +struct hci_std_codecs_hdr { __u8 num; +} __packed; + +struct hci_std_codecs { + struct hci_std_codecs_hdr; __u8 codec[]; } __packed; @@ -1487,7 +1491,7 @@ struct hci_vnd_codecs { struct hci_rp_read_local_supported_codecs { __u8 status; - struct hci_std_codecs std_codecs; + struct hci_std_codecs_hdr std_codecs; struct hci_vnd_codecs vnd_codecs; } __packed; @@ -1504,8 +1508,12 @@ struct hci_std_codec_v2 { __u8 transport; } __packed; -struct hci_std_codecs_v2 { +struct hci_std_codecs_v2_hdr { __u8 num; +} __packed; + +struct hci_std_codecs_v2 { + struct hci_std_codecs_v2_hdr; struct hci_std_codec_v2 codec[]; } __packed; @@ -1522,7 +1530,7 @@ struct hci_vnd_codecs_v2 { struct hci_rp_read_local_supported_codecs_v2 { __u8 status; - struct hci_std_codecs_v2 std_codecs; + struct hci_std_codecs_v2_hdr std_codecs; struct hci_vnd_codecs_v2 vendor_codecs; } __packed; From 85fa3512048793076eef658f66489112dcc91993 Mon Sep 17 00:00:00 2001 From: Shuvam Pandey Date: Thu, 9 Apr 2026 00:32:30 +0545 Subject: [PATCH 1516/1561] Bluetooth: hci_event: fix potential UAF in SSP passkey handlers hci_conn lookup and field access must be covered by hdev lock in hci_user_passkey_notify_evt() and hci_keypress_notify_evt(), otherwise the connection can be freed concurrently. Extend the hci_dev_lock critical section to cover all conn usage in both handlers. Keep the existing keypress notification behavior unchanged by routing the early exits through a common unlock path. Fixes: 92a25256f142 ("Bluetooth: mgmt: Implement support for passkey notification") Cc: stable@vger.kernel.org Signed-off-by: Shuvam Pandey Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 83248085dd4f..b2ee6b6a0f56 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5495,9 +5495,11 @@ static void hci_user_passkey_notify_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, ""); + hci_dev_lock(hdev); + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); if (!conn) - return; + goto unlock; conn->passkey_notify = __le32_to_cpu(ev->passkey); conn->passkey_entered = 0; @@ -5506,6 +5508,9 @@ static void hci_user_passkey_notify_evt(struct hci_dev *hdev, void *data, mgmt_user_passkey_notify(hdev, &conn->dst, conn->type, conn->dst_type, conn->passkey_notify, conn->passkey_entered); + +unlock: + hci_dev_unlock(hdev); } static void hci_keypress_notify_evt(struct hci_dev *hdev, void *data, @@ -5516,14 +5521,16 @@ static void hci_keypress_notify_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, ""); + hci_dev_lock(hdev); + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); if (!conn) - return; + goto unlock; switch (ev->type) { case HCI_KEYPRESS_STARTED: conn->passkey_entered = 0; - return; + goto unlock; case HCI_KEYPRESS_ENTERED: conn->passkey_entered++; @@ -5538,13 +5545,16 @@ static void hci_keypress_notify_evt(struct hci_dev *hdev, void *data, break; case HCI_KEYPRESS_COMPLETED: - return; + goto unlock; } if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_user_passkey_notify(hdev, &conn->dst, conn->type, conn->dst_type, conn->passkey_notify, conn->passkey_entered); + +unlock: + hci_dev_unlock(hdev); } static void hci_simple_pair_complete_evt(struct hci_dev *hdev, void *data, From 76388eae1da94f6c29886c7e49bce1d5abf88734 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 1 Apr 2026 18:46:24 +0200 Subject: [PATCH 1517/1561] Bluetooth: btintel_pcie: use strscpy to copy plain strings Use strscpy() instead of snprintf() to copy plain strings with no format specifiers. Signed-off-by: Thorsten Blum Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 8fb3ebe98146..2f59c0d6f9ec 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -351,7 +351,7 @@ static inline void btintel_pcie_dump_debug_registers(struct hci_dev *hdev) snprintf(buf, sizeof(buf), "txq: cr_tia: %u cr_hia: %u", cr_tia, cr_hia); skb_put_data(skb, buf, strlen(buf)); bt_dev_dbg(hdev, "%s", buf); - snprintf(buf, sizeof(buf), "--------------------------------"); + strscpy(buf, "--------------------------------"); bt_dev_dbg(hdev, "%s", buf); hci_recv_diag(hdev, skb); @@ -661,7 +661,7 @@ static int btintel_pcie_read_dram_buffers(struct btintel_pcie_data *data) else return -EINVAL; - snprintf(vendor, sizeof(vendor), "Vendor: Intel\n"); + strscpy(vendor, "Vendor: Intel\n"); snprintf(driver, sizeof(driver), "Driver: %s\n", data->dmp_hdr.driver_name); From c347ca17d62a32c25564fee0ca3a2a7bc2d5fd6f Mon Sep 17 00:00:00 2001 From: Shuai Zhang Date: Fri, 10 Apr 2026 17:54:43 +0800 Subject: [PATCH 1518/1561] Bluetooth: hci_qca: Fix missing wakeup during SSR memdump handling When a Bluetooth controller encounters a coredump, it triggers the Subsystem Restart (SSR) mechanism. The controller first reports the coredump data and, once the upload is complete, sends a hw_error event. The host relies on this event to proceed with subsequent recovery actions. If the host has not finished processing the coredump data when the hw_error event is received, it waits until either the processing is complete or the 8-second timeout expires before handling the event. The current implementation clears QCA_MEMDUMP_COLLECTION using clear_bit(), which does not wake up waiters sleeping in wait_on_bit_timeout(). As a result, the waiting thread may remain blocked until the timeout expires even if the coredump collection has already completed. Fix this by clearing QCA_MEMDUMP_COLLECTION with clear_and_wake_up_bit(), which also wakes up the waiting thread and allows the hw_error handling to proceed immediately. Test case: - Trigger a controller coredump using: hcitool cmd 0x3f 0c 26 - Tested on QCA6390. - Capture HCI logs using btmon. - Verify that the delay between receiving the hw_error event and initiating the power-off sequence is reduced compared to the timeout-based behavior. Reviewed-by: Bartosz Golaszewski Reviewed-by: Paul Menzel Signed-off-by: Shuai Zhang Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 4512ff7cd0c0..cd1834246b47 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1108,7 +1108,7 @@ static void qca_controller_memdump(struct work_struct *work) qca->qca_memdump = NULL; qca->memdump_state = QCA_MEMDUMP_COLLECTED; cancel_delayed_work(&qca->ctrl_memdump_timeout); - clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags); + clear_and_wake_up_bit(QCA_MEMDUMP_COLLECTION, &qca->flags); clear_bit(QCA_IBS_DISABLED, &qca->flags); mutex_unlock(&qca->hci_memdump_lock); return; @@ -1186,7 +1186,7 @@ static void qca_controller_memdump(struct work_struct *work) kfree(qca->qca_memdump); qca->qca_memdump = NULL; qca->memdump_state = QCA_MEMDUMP_COLLECTED; - clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags); + clear_and_wake_up_bit(QCA_MEMDUMP_COLLECTION, &qca->flags); } mutex_unlock(&qca->hci_memdump_lock); From b025461303d87923abfaae6cc07ba8a83ddfd844 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 7 Apr 2026 17:14:38 -0700 Subject: [PATCH 1519/1561] tcp: update window_clamp when SO_RCVBUF is set Commit under Fixes moved recomputing the window clamp to tcp_measure_rcv_mss() (when scaling_ratio changes). I suspect it missed the fact that we don't recompute the clamp when rcvbuf is set. Until scaling_ratio changes we are stuck with the old window clamp which may be based on the small initial buffer. scaling_ratio may never change. Inspired by Eric's recent commit d1361840f8c5 ("tcp: fix SO_RCVLOWAT and RCVBUF autotuning") plumb the user action thru to TCP and have it update the clamp. A smaller fix would be to just have tcp_rcvbuf_grow() adjust the clamp even if SOCK_RCVBUF_LOCK is set. But IIUC this is what we were trying to get away from in the first place. Fixes: a2cbb1603943 ("tcp: Update window clamping condition") Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260408001438.129165-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- include/linux/net.h | 1 + include/net/tcp.h | 1 + net/core/sock.c | 9 +++++++++ net/ipv4/af_inet.c | 1 + net/ipv4/tcp.c | 5 +++++ net/ipv6/af_inet6.c | 1 + 6 files changed, 18 insertions(+) diff --git a/include/linux/net.h b/include/linux/net.h index a8e818de95b3..ca6a7bc5c9ae 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -223,6 +223,7 @@ struct proto_ops { int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg, size_t size); int (*set_rcvlowat)(struct sock *sk, int val); + void (*set_rcvbuf)(struct sock *sk, int val); }; #define DECLARE_SOCKADDR(type, dst, src) \ diff --git a/include/net/tcp.h b/include/net/tcp.h index 0f09429ff4cb..dfa52ceefd23 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -516,6 +516,7 @@ void tcp_syn_ack_timeout(const struct request_sock *req); int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags); int tcp_set_rcvlowat(struct sock *sk, int val); +void tcp_set_rcvbuf(struct sock *sk, int val); int tcp_set_window_clamp(struct sock *sk, int val); static inline void diff --git a/net/core/sock.c b/net/core/sock.c index 367fd7bad4ac..b37b664b6eb9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -966,6 +966,8 @@ EXPORT_SYMBOL(sock_set_keepalive); static void __sock_set_rcvbuf(struct sock *sk, int val) { + struct socket *sock = sk->sk_socket; + /* Ensure val * 2 fits into an int, to prevent max_t() from treating it * as a negative value. */ @@ -983,6 +985,13 @@ static void __sock_set_rcvbuf(struct sock *sk, int val) * we actually used in getsockopt is the most desirable behavior. */ WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF)); + + if (sock) { + const struct proto_ops *ops = READ_ONCE(sock->ops); + + if (ops->set_rcvbuf) + ops->set_rcvbuf(sk, sk->sk_rcvbuf); + } } void sock_set_rcvbuf(struct sock *sk, int val) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f98e46ae3e30..0e62032e76b1 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1091,6 +1091,7 @@ const struct proto_ops inet_stream_ops = { .compat_ioctl = inet_compat_ioctl, #endif .set_rcvlowat = tcp_set_rcvlowat, + .set_rcvbuf = tcp_set_rcvbuf, }; EXPORT_SYMBOL(inet_stream_ops); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e57eaffc007a..1a494d18c5fd 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1858,6 +1858,11 @@ int tcp_set_rcvlowat(struct sock *sk, int val) return 0; } +void tcp_set_rcvbuf(struct sock *sk, int val) +{ + tcp_set_window_clamp(sk, tcp_win_from_space(sk, val)); +} + #ifdef CONFIG_MMU static const struct vm_operations_struct tcp_vm_ops = { }; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index ee341a8254bf..0a88b376141d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -690,6 +690,7 @@ const struct proto_ops inet6_stream_ops = { .compat_ioctl = inet6_compat_ioctl, #endif .set_rcvlowat = tcp_set_rcvlowat, + .set_rcvbuf = tcp_set_rcvbuf, }; EXPORT_SYMBOL_GPL(inet6_stream_ops); From bc174d054986ac5767828e6fbb3371f3474fbbd8 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Thu, 9 Apr 2026 02:27:47 -0700 Subject: [PATCH 1520/1561] mlx4: correct error reporting in mlx4_master_process_vhcr() mlx4_master_process_vhcr() logs vhcr->errno on failures, but this field is never populated by the PF path. As a result, all failures are reported with errno 0 and err print in status case which is misleading. Use the actual return value (err) instead, translate it to FW status before logging, and report both values. Signed-off-by: Alok Tiwari Reviewed-by: Tariq Toukan Link: https://patch.msgid.link/20260409092754.508880-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index de0193d82ec1..bdaf152e6712 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1782,6 +1782,7 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, } if (err) { + vhcr_cmd->status = mlx4_errno_to_status(err); if (!(dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) { if (vhcr->op == MLX4_CMD_ALLOC_RES && (vhcr->in_modifier & 0xff) == RES_COUNTER && @@ -1791,9 +1792,8 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, slave, err); else mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with error:%d, status %d\n", - vhcr->op, slave, vhcr->errno, err); + vhcr->op, slave, err, vhcr_cmd->status); } - vhcr_cmd->status = mlx4_errno_to_status(err); goto out_status; } From 1b9707e6f1a9d5f9e1b91750f24743108b093e2b Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 10 Apr 2026 14:07:51 +0100 Subject: [PATCH 1521/1561] net: stmmac: enable RPS and RBU interrupts Enable receive process stopped and receive buffer unavailable interrupts, so that the statistic counters can be updated. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1wBBaR-0000000GZHR-1dbM@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h index af6580332d49..43b036d4e95b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h @@ -99,6 +99,8 @@ static inline u32 dma_chanx_base_addr(const struct dwmac4_addrs *addrs, #define DMA_CHAN_INTR_ENA_NIE_4_10 BIT(15) #define DMA_CHAN_INTR_ENA_AIE_4_10 BIT(14) #define DMA_CHAN_INTR_ENA_FBE BIT(12) +#define DMA_CHAN_INTR_ENA_RPS BIT(8) +#define DMA_CHAN_INTR_ENA_RBU BIT(7) #define DMA_CHAN_INTR_ENA_RIE BIT(6) #define DMA_CHAN_INTR_ENA_TIE BIT(0) @@ -107,6 +109,8 @@ static inline u32 dma_chanx_base_addr(const struct dwmac4_addrs *addrs, DMA_CHAN_INTR_ENA_TIE) #define DMA_CHAN_INTR_ABNORMAL (DMA_CHAN_INTR_ENA_AIE | \ + DMA_CHAN_INTR_ENA_RPS | \ + DMA_CHAN_INTR_ENA_RBU | \ DMA_CHAN_INTR_ENA_FBE) /* DMA default interrupt mask for 4.00 */ #define DMA_CHAN_INTR_DEFAULT_MASK (DMA_CHAN_INTR_NORMAL | \ @@ -117,6 +121,8 @@ static inline u32 dma_chanx_base_addr(const struct dwmac4_addrs *addrs, DMA_CHAN_INTR_ENA_TIE) #define DMA_CHAN_INTR_ABNORMAL_4_10 (DMA_CHAN_INTR_ENA_AIE_4_10 | \ + DMA_CHAN_INTR_ENA_RPS | \ + DMA_CHAN_INTR_ENA_RBU | \ DMA_CHAN_INTR_ENA_FBE) /* DMA default interrupt mask for 4.10a */ #define DMA_CHAN_INTR_DEFAULT_MASK_4_10 (DMA_CHAN_INTR_NORMAL_4_10 | \ From e7a62edd34b1b4bc5f979988efc2f81c075733fd Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Fri, 10 Apr 2026 19:10:20 +0200 Subject: [PATCH 1522/1561] net: phy: qcom: at803x: Use the correct bit to disable extended next page As noted in the blamed commit, the AR8035 and other PHYs from this family advertise the Extended Next Page support by default, which may be understood by some partners as this PHY being multi-gig capable. The fix is to disable XNP advertising, which is done by setting bit 12 of the Auto-Negotiation Advertisement Register (MII_ADVERTISE). The blamed commit incorrectly uses MDIO_AN_CTRL1_XNP, which is bit 13 as per 802.3 : 45.2.7.1 AN control register (Register 7.0) BIT 12 in MII_ADVERTISE is wrapped by ADVERTISE_RESV, used by some drivers such as the aquantia one. 802.3 Clause 28 defines bit 12 as Extended Next Page ability, at least in recent versions of the standard. Let's add a define for it and use it in the at803x driver. Fixes: 3c51fa5d2afe ("net: phy: ar803x: disable extended next page bit") Signed-off-by: Maxime Chevallier Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20260410171021.1277138-1-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/qcom/at803x.c | 2 +- include/uapi/linux/mii.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/qcom/at803x.c b/drivers/net/phy/qcom/at803x.c index 2995b08bac96..63726cf98cd4 100644 --- a/drivers/net/phy/qcom/at803x.c +++ b/drivers/net/phy/qcom/at803x.c @@ -524,7 +524,7 @@ static int at803x_config_init(struct phy_device *phydev) * behaviour but we still need to accommodate it. XNP is only needed * for 10Gbps support, so disable XNP. */ - return phy_modify(phydev, MII_ADVERTISE, MDIO_AN_CTRL1_XNP, 0); + return phy_modify(phydev, MII_ADVERTISE, ADVERTISE_XNP, 0); } static void at803x_link_change_notify(struct phy_device *phydev) diff --git a/include/uapi/linux/mii.h b/include/uapi/linux/mii.h index 39f7c44baf53..61d6edad4b94 100644 --- a/include/uapi/linux/mii.h +++ b/include/uapi/linux/mii.h @@ -82,7 +82,8 @@ #define ADVERTISE_100BASE4 0x0200 /* Try for 100mbps 4k packets */ #define ADVERTISE_PAUSE_CAP 0x0400 /* Try for pause */ #define ADVERTISE_PAUSE_ASYM 0x0800 /* Try for asymetric pause */ -#define ADVERTISE_RESV 0x1000 /* Unused... */ +#define ADVERTISE_XNP 0x1000 /* Extended Next Page */ +#define ADVERTISE_RESV ADVERTISE_XNP /* Used to be reserved */ #define ADVERTISE_RFAULT 0x2000 /* Say we can detect faults */ #define ADVERTISE_LPACK 0x4000 /* Ack link partners response */ #define ADVERTISE_NPAGE 0x8000 /* Next page bit */ From 67fab22a7adcec0279b9b057eb3dc669e32834f0 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 8 Apr 2026 03:30:29 -0700 Subject: [PATCH 1523/1561] net: add getsockopt_iter callback to proto_ops Add a new getsockopt_iter callback to struct proto_ops that uses sockopt_t, a type-safe wrapper around iov_iter. This provides a clean interface for socket option operations that works with both user and kernel buffers. The sockopt_t type encapsulates an iov_iter and an optlen field. The optlen field, although not suggested by Linus, serves as both input (buffer size) and output (returned data size), allowing callbacks to return random values independent of the bytes written via copy_to_iter(), so, keep it separated from iov_iter.count. This is preparatory work for removing the SOL_SOCKET level restriction from io_uring getsockopt operations. Keep in mind that both iter_out and iter_in always point to the same data at all times, and we just have two of them to make the callback implementation sane. Suggested-by: Linus Torvalds Signed-off-by: Breno Leitao Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20260408-getsockopt-v3-1-061bb9cb355d@debian.org Signed-off-by: Jakub Kicinski --- include/linux/net.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/include/linux/net.h b/include/linux/net.h index ca6a7bc5c9ae..f268f395ce47 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -23,9 +23,30 @@ #include #include #include +#include #include +/** + * struct sockopt - socket option value container + * @iter_in: iov_iter for reading optval with the content from the caller. + * Use copy_from_iter() given this iov direction is ITER_SOURCE + * @iter_out: iov_iter for protocols to update optval data to userspace + * Use _copy_to_iter() given iov direction is ITER_DEST + * @optlen: serves as both input (buffer size) and output (returned data size). + * + * Type-safe wrapper for socket option data that works with both + * user and kernel buffers. + * + * The optlen field allows callbacks to return a specific length value + * independent of the bytes written via copy_to_iter(). + */ +typedef struct sockopt { + struct iov_iter iter_in; + struct iov_iter iter_out; + int optlen; +} sockopt_t; + struct poll_table_struct; struct pipe_inode_info; struct inode; @@ -192,6 +213,8 @@ struct proto_ops { unsigned int optlen); int (*getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); + int (*getsockopt_iter)(struct socket *sock, int level, + int optname, sockopt_t *opt); void (*show_fdinfo)(struct seq_file *m, struct socket *sock); int (*sendmsg) (struct socket *sock, struct msghdr *m, size_t total_len); From 5bd0dec150f56b6307d599132dcb7c01007bbecc Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 8 Apr 2026 03:30:30 -0700 Subject: [PATCH 1524/1561] net: call getsockopt_iter if available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update do_sock_getsockopt() to use the new getsockopt_iter callback when available. Add do_sock_getsockopt_iter() helper that: 1. Reads optlen from user/kernel space 2. Initializes a sockopt_t with the appropriate iov_iter (kvec for kernel, ubuf for user buffers) and sets opt.optlen 3. Calls the protocol's getsockopt_iter callback 4. Writes opt.optlen back to user/kernel space The optlen is always written back, even on failure. Some protocols (e.g. CAN raw) return -ERANGE and set optlen to the required buffer size so userspace knows how much to allocate. The callback is responsible for setting opt.optlen to indicate the returned data size. Important to say that iov_out does not need to be copied back in do_sock_getsockopt(). When optval is not kernel (the userspace path), sockptr_to_sockopt() sets up opt->iter_out as a ITER_DEST ubuf iterator pointing directly at the userspace buffer (optval.user). So when getsockopt_iter implementations call copy_to_iter(..., &opt->iter_out), the data is written directly to userspace — no intermediate kernel buffer is involved. When optval.is_kernel is true (the in-kernel path, e.g. from io_uring), the kvec points at the already-provided kernel buffer (optval.kernel), so the data lands in the caller's buffer directly via the kvec-backed iterator. In both cases the iterator writes to the final destination in-place at protocol callback. There's nothing to copy back — only optlen needs to be written back. Signed-off-by: Breno Leitao Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20260408-getsockopt-v3-2-061bb9cb355d@debian.org Signed-off-by: Jakub Kicinski --- net/socket.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/net/socket.c b/net/socket.c index ade2ff5845a0..a25e513cf0f4 100644 --- a/net/socket.c +++ b/net/socket.c @@ -77,6 +77,7 @@ #include #include #include +#include #include #include #include @@ -2349,11 +2350,45 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level, int optname)); +/* + * Initialize a sockopt_t from sockptr optval/optlen, setting up iov_iter + * for both input and output directions. + * It is important to remember that both iov points to the same data, but, + * .iter_in is read-only and .iter_out is write-only by the protocol callbacks + */ +static int sockptr_to_sockopt(sockopt_t *opt, sockptr_t optval, + sockptr_t optlen, struct kvec *kvec) +{ + int koptlen; + + if (copy_from_sockptr(&koptlen, optlen, sizeof(int))) + return -EFAULT; + + if (koptlen < 0) + return -EINVAL; + + if (optval.is_kernel) { + kvec->iov_base = optval.kernel; + kvec->iov_len = koptlen; + iov_iter_kvec(&opt->iter_out, ITER_DEST, kvec, 1, koptlen); + iov_iter_kvec(&opt->iter_in, ITER_SOURCE, kvec, 1, koptlen); + } else { + iov_iter_ubuf(&opt->iter_out, ITER_DEST, optval.user, koptlen); + iov_iter_ubuf(&opt->iter_in, ITER_SOURCE, optval.user, + koptlen); + } + opt->optlen = koptlen; + + return 0; +} + int do_sock_getsockopt(struct socket *sock, bool compat, int level, int optname, sockptr_t optval, sockptr_t optlen) { int max_optlen __maybe_unused = 0; const struct proto_ops *ops; + struct kvec kvec; + sockopt_t opt; int err; err = security_socket_getsockopt(sock, level, optname); @@ -2366,15 +2401,28 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level, ops = READ_ONCE(sock->ops); if (level == SOL_SOCKET) { err = sk_getsockopt(sock->sk, level, optname, optval, optlen); - } else if (unlikely(!ops->getsockopt)) { - err = -EOPNOTSUPP; - } else { + } else if (ops->getsockopt_iter) { + err = sockptr_to_sockopt(&opt, optval, optlen, &kvec); + if (err) + return err; + + err = ops->getsockopt_iter(sock, level, optname, &opt); + + /* Always write back optlen, even on failure. Some protocols + * (e.g. CAN raw) return -ERANGE and set optlen to the + * required buffer size so userspace can discover it. + */ + if (copy_to_sockptr(optlen, &opt.optlen, sizeof(int))) + return -EFAULT; + } else if (ops->getsockopt) { if (WARN_ONCE(optval.is_kernel || optlen.is_kernel, "Invalid argument type")) return -EOPNOTSUPP; err = ops->getsockopt(sock, level, optname, optval.user, optlen.user); + } else { + err = -EOPNOTSUPP; } if (!compat) From 9c99d62705692db7fc8b8921efa0db189e84e694 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 8 Apr 2026 03:30:31 -0700 Subject: [PATCH 1525/1561] af_packet: convert to getsockopt_iter Convert AF_PACKET's getsockopt implementation to use the new getsockopt_iter callback with sockopt_t. Key changes: - Replace (char __user *optval, int __user *optlen) with sockopt_t *opt - Use opt->optlen for buffer length (input) and returned size (output) - Use copy_to_iter() instead of put_user()/copy_to_user() - For PACKET_HDRLEN which reads from optval: use opt->iter_in with copy_from_iter() for the input read, then the common opt->iter_out copy_to_iter() epilogue handles the output Signed-off-by: Breno Leitao Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20260408-getsockopt-v3-3-061bb9cb355d@debian.org Signed-off-by: Jakub Kicinski --- net/packet/af_packet.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index bb2d88205e5a..1da78b6ad3d5 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -49,6 +49,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include @@ -4051,7 +4052,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, } static int packet_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) + sockopt_t *opt) { int len; int val, lv = sizeof(val); @@ -4065,8 +4066,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, if (level != SOL_PACKET) return -ENOPROTOOPT; - if (get_user(len, optlen)) - return -EFAULT; + len = opt->optlen; if (len < 0) return -EINVAL; @@ -4115,7 +4115,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, len = sizeof(int); if (len < sizeof(int)) return -EINVAL; - if (copy_from_user(&val, optval, len)) + if (copy_from_iter(&val, len, &opt->iter_in) != len) return -EFAULT; switch (val) { case TPACKET_V1: @@ -4171,9 +4171,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, if (len > lv) len = lv; - if (put_user(len, optlen)) - return -EFAULT; - if (copy_to_user(optval, data, len)) + opt->optlen = len; + if (copy_to_iter(data, len, &opt->iter_out) != len) return -EFAULT; return 0; } @@ -4672,7 +4671,7 @@ static const struct proto_ops packet_ops = { .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = packet_setsockopt, - .getsockopt = packet_getsockopt, + .getsockopt_iter = packet_getsockopt, .sendmsg = packet_sendmsg, .recvmsg = packet_recvmsg, .mmap = packet_mmap, From 5b75e7d6769557fbee2ae46181deaff0c98ca795 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 8 Apr 2026 03:30:32 -0700 Subject: [PATCH 1526/1561] can: raw: convert to getsockopt_iter Convert CAN raw socket's getsockopt implementation to use the new getsockopt_iter callback with sockopt_t. Key changes: - Replace (char __user *optval, int __user *optlen) with sockopt_t *opt - Use opt->optlen for buffer length (input) and returned size (output) - Use copy_to_iter() instead of copy_to_user() - For CAN_RAW_FILTER and CAN_RAW_XL_VCID_OPTS: on -ERANGE, set opt->optlen to the required buffer size. The wrapper writes this back to userspace even on error, preserving the existing API that lets userspace discover the needed allocation size. Signed-off-by: Breno Leitao Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20260408-getsockopt-v3-4-061bb9cb355d@debian.org Signed-off-by: Jakub Kicinski --- net/can/raw.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/net/can/raw.c b/net/can/raw.c index 56c95c768778..a0c0f5e946d8 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -761,7 +761,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, } static int raw_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) + sockopt_t *opt) { struct sock *sk = sock->sk; struct raw_sock *ro = raw_sk(sk); @@ -771,8 +771,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, if (level != SOL_CAN_RAW) return -EINVAL; - if (get_user(len, optlen)) - return -EFAULT; + len = opt->optlen; if (len < 0) return -EINVAL; @@ -788,12 +787,12 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, if (len < fsize) { /* return -ERANGE and needed space in optlen */ err = -ERANGE; - if (put_user(fsize, optlen)) - err = -EFAULT; + opt->optlen = fsize; } else { if (len > fsize) len = fsize; - if (copy_to_user(optval, ro->filter, len)) + if (copy_to_iter(ro->filter, len, + &opt->iter_out) != len) err = -EFAULT; } } else { @@ -802,7 +801,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, release_sock(sk); if (!err) - err = put_user(len, optlen); + opt->optlen = len; return err; } case CAN_RAW_ERR_FILTER: @@ -846,16 +845,16 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, if (len < sizeof(ro->raw_vcid_opts)) { /* return -ERANGE and needed space in optlen */ err = -ERANGE; - if (put_user(sizeof(ro->raw_vcid_opts), optlen)) - err = -EFAULT; + opt->optlen = sizeof(ro->raw_vcid_opts); } else { if (len > sizeof(ro->raw_vcid_opts)) len = sizeof(ro->raw_vcid_opts); - if (copy_to_user(optval, &ro->raw_vcid_opts, len)) + if (copy_to_iter(&ro->raw_vcid_opts, len, + &opt->iter_out) != len) err = -EFAULT; } if (!err) - err = put_user(len, optlen); + opt->optlen = len; return err; } case CAN_RAW_JOIN_FILTERS: @@ -869,9 +868,8 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, return -ENOPROTOOPT; } - if (put_user(len, optlen)) - return -EFAULT; - if (copy_to_user(optval, val, len)) + opt->optlen = len; + if (copy_to_iter(val, len, &opt->iter_out) != len) return -EFAULT; return 0; } @@ -1078,7 +1076,7 @@ static const struct proto_ops raw_ops = { .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = raw_setsockopt, - .getsockopt = raw_getsockopt, + .getsockopt_iter = raw_getsockopt, .sendmsg = raw_sendmsg, .recvmsg = raw_recvmsg, .mmap = sock_no_mmap, From d5ee2ff98322337951c56398e79d51815acbf955 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 9 Apr 2026 23:04:12 +0530 Subject: [PATCH 1527/1561] net: qrtr: ns: Limit the maximum server registration per node Current code does no bound checking on the number of servers added per node. A malicious client can flood NEW_SERVER messages and exhaust memory. Fix this issue by limiting the maximum number of server registrations to 256 per node. If the NEW_SERVER message is received for an old port, then don't restrict it as it will get replaced. While at it, also rate limit the error messages in the failure path of qrtr_ns_worker(). Note that the limit of 256 is chosen based on the current platform requirements. If requirement changes in the future, this limit can be increased. Cc: stable@vger.kernel.org Fixes: 0c2204a4ad71 ("net: qrtr: Migrate nameservice to kernel from userspace") Reported-by: Yiming Qian Reviewed-by: Simon Horman Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20260409-qrtr-fix-v3-1-00a8a5ff2b51@oss.qualcomm.com Signed-off-by: Jakub Kicinski --- net/qrtr/ns.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 3203b2220860..63cb5861d87a 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -67,8 +67,14 @@ struct qrtr_server { struct qrtr_node { unsigned int id; struct xarray servers; + u32 server_count; }; +/* Max server limit is chosen based on the current platform requirements. If the + * requirement changes in the future, this value can be increased. + */ +#define QRTR_NS_MAX_SERVERS 256 + static struct qrtr_node *node_get(unsigned int node_id) { struct qrtr_node *node; @@ -229,6 +235,17 @@ static struct qrtr_server *server_add(unsigned int service, if (!service || !port) return NULL; + node = node_get(node_id); + if (!node) + return NULL; + + /* Make sure the new servers per port are capped at the maximum value */ + old = xa_load(&node->servers, port); + if (!old && node->server_count >= QRTR_NS_MAX_SERVERS) { + pr_err_ratelimited("QRTR client node %u exceeds max server limit!\n", node_id); + return NULL; + } + srv = kzalloc_obj(*srv); if (!srv) return NULL; @@ -238,10 +255,6 @@ static struct qrtr_server *server_add(unsigned int service, srv->node = node_id; srv->port = port; - node = node_get(node_id); - if (!node) - goto err; - /* Delete the old server on the same port */ old = xa_store(&node->servers, port, srv, GFP_KERNEL); if (old) { @@ -252,6 +265,8 @@ static struct qrtr_server *server_add(unsigned int service, } else { kfree(old); } + } else { + node->server_count++; } trace_qrtr_ns_server_add(srv->service, srv->instance, @@ -292,6 +307,7 @@ static int server_del(struct qrtr_node *node, unsigned int port, bool bcast) } kfree(srv); + node->server_count--; return 0; } @@ -670,7 +686,7 @@ static void qrtr_ns_worker(struct work_struct *work) } if (ret < 0) - pr_err("failed while handling packet from %d:%d", + pr_err_ratelimited("failed while handling packet from %d:%d", sq.sq_node, sq.sq_port); } From 5640227d9a21c6a8be249a10677b832e7f40dc55 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 9 Apr 2026 23:04:13 +0530 Subject: [PATCH 1528/1561] net: qrtr: ns: Limit the maximum number of lookups Current code does no bound checking on the number of lookups a client can perform. Though the code restricts the lookups to local clients, there is still a possibility of a malicious local client sending a flood of NEW_LOOKUP messages over the same socket. Fix this issue by limiting the maximum number of lookups to 64 globally. Since the nameserver allows only atmost one local observer, this global lookup count will ensure that the lookups stay within the limit. Note that, limit of 64 is chosen based on the current platform requirements. If requirement changes in the future, this limit can be increased. Cc: stable@vger.kernel.org Fixes: 0c2204a4ad71 ("net: qrtr: Migrate nameservice to kernel from userspace") Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20260409-qrtr-fix-v3-2-00a8a5ff2b51@oss.qualcomm.com Signed-off-by: Jakub Kicinski --- net/qrtr/ns.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 63cb5861d87a..5b08d4d4840a 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -22,6 +22,7 @@ static struct { struct socket *sock; struct sockaddr_qrtr bcast_sq; struct list_head lookups; + u32 lookup_count; struct workqueue_struct *workqueue; struct work_struct work; int local_node; @@ -70,10 +71,11 @@ struct qrtr_node { u32 server_count; }; -/* Max server limit is chosen based on the current platform requirements. If the - * requirement changes in the future, this value can be increased. +/* Max server, lookup limits are chosen based on the current platform requirements. + * If the requirement changes in the future, these values can be increased. */ #define QRTR_NS_MAX_SERVERS 256 +#define QRTR_NS_MAX_LOOKUPS 64 static struct qrtr_node *node_get(unsigned int node_id) { @@ -433,6 +435,7 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from, list_del(&lookup->li); kfree(lookup); + qrtr_ns.lookup_count--; } /* Remove the server belonging to this port but don't broadcast @@ -550,6 +553,11 @@ static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from, if (from->sq_node != qrtr_ns.local_node) return -EINVAL; + if (qrtr_ns.lookup_count >= QRTR_NS_MAX_LOOKUPS) { + pr_err_ratelimited("QRTR client node exceeds max lookup limit!\n"); + return -ENOSPC; + } + lookup = kzalloc_obj(*lookup); if (!lookup) return -ENOMEM; @@ -558,6 +566,7 @@ static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from, lookup->service = service; lookup->instance = instance; list_add_tail(&lookup->li, &qrtr_ns.lookups); + qrtr_ns.lookup_count++; memset(&filter, 0, sizeof(filter)); filter.service = service; @@ -598,6 +607,7 @@ static void ctrl_cmd_del_lookup(struct sockaddr_qrtr *from, list_del(&lookup->li); kfree(lookup); + qrtr_ns.lookup_count--; } } From 68efba36446a7774ea5b971257ade049272a07ac Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 9 Apr 2026 23:04:14 +0530 Subject: [PATCH 1529/1561] net: qrtr: ns: Free the node during ctrl_cmd_bye() A node sends the BYE packet when it is about to go down. So the nameserver should advertise the removal of the node to all remote and local observers and free the node finally. But currently, the nameserver doesn't free the node memory even after processing the BYE packet. This causes the node memory to leak. Hence, remove the node from Xarray list and free the node memory during both success and failure case of ctrl_cmd_bye(). Cc: stable@vger.kernel.org Fixes: 0c2204a4ad71 ("net: qrtr: Migrate nameservice to kernel from userspace") Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20260409-qrtr-fix-v3-3-00a8a5ff2b51@oss.qualcomm.com Signed-off-by: Jakub Kicinski --- net/qrtr/ns.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 5b08d4d4840a..1b9a90240a68 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -359,7 +359,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) struct qrtr_node *node; unsigned long index; struct kvec iv; - int ret; + int ret = 0; iv.iov_base = &pkt; iv.iov_len = sizeof(pkt); @@ -374,8 +374,10 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) /* Advertise the removal of this client to all local servers */ local_node = node_get(qrtr_ns.local_node); - if (!local_node) - return 0; + if (!local_node) { + ret = 0; + goto delete_node; + } memset(&pkt, 0, sizeof(pkt)); pkt.cmd = cpu_to_le32(QRTR_TYPE_BYE); @@ -392,10 +394,18 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); if (ret < 0 && ret != -ENODEV) { pr_err("failed to send bye cmd\n"); - return ret; + goto delete_node; } } - return 0; + + /* Ignore -ENODEV */ + ret = 0; + +delete_node: + xa_erase(&nodes, from->sq_node); + kfree(node); + + return ret; } static int ctrl_cmd_del_client(struct sockaddr_qrtr *from, From 27d5e84e810b0849d08b9aec68e48570461ce313 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 9 Apr 2026 23:04:15 +0530 Subject: [PATCH 1530/1561] net: qrtr: ns: Limit the total number of nodes Currently, the nameserver doesn't limit the number of nodes it handles. This can be an attack vector if a malicious client starts registering random nodes, leading to memory exhaustion. Hence, limit the maximum number of nodes to 64. Note that, limit of 64 is chosen based on the current platform requirements. If requirement changes in the future, this limit can be increased. Cc: stable@vger.kernel.org Fixes: 0c2204a4ad71 ("net: qrtr: Migrate nameservice to kernel from userspace") Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20260409-qrtr-fix-v3-4-00a8a5ff2b51@oss.qualcomm.com Signed-off-by: Jakub Kicinski --- net/qrtr/ns.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 1b9a90240a68..c0418764470b 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -71,12 +71,16 @@ struct qrtr_node { u32 server_count; }; -/* Max server, lookup limits are chosen based on the current platform requirements. - * If the requirement changes in the future, these values can be increased. +/* Max nodes, server, lookup limits are chosen based on the current platform + * requirements. If the requirement changes in the future, these values can be + * increased. */ +#define QRTR_NS_MAX_NODES 64 #define QRTR_NS_MAX_SERVERS 256 #define QRTR_NS_MAX_LOOKUPS 64 +static u8 node_count; + static struct qrtr_node *node_get(unsigned int node_id) { struct qrtr_node *node; @@ -85,6 +89,11 @@ static struct qrtr_node *node_get(unsigned int node_id) if (node) return node; + if (node_count >= QRTR_NS_MAX_NODES) { + pr_err_ratelimited("QRTR clients exceed max node limit!\n"); + return NULL; + } + /* If node didn't exist, allocate and insert it to the tree */ node = kzalloc_obj(*node); if (!node) @@ -98,6 +107,8 @@ static struct qrtr_node *node_get(unsigned int node_id) return NULL; } + node_count++; + return node; } @@ -404,6 +415,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) delete_node: xa_erase(&nodes, from->sq_node); kfree(node); + node_count--; return ret; } From 7809fea20c9404bfcfa6112ec08d1fe1d3520beb Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 9 Apr 2026 23:04:16 +0530 Subject: [PATCH 1531/1561] net: qrtr: ns: Fix use-after-free in driver remove() In the remove callback, if a packet arrives after destroy_workqueue() is called, but before sock_release(), the qrtr_ns_data_ready() callback will try to queue the work, causing use-after-free issue. Fix this issue by saving the default 'sk_data_ready' callback during qrtr_ns_init() and use it to replace the qrtr_ns_data_ready() callback at the start of remove(). This ensures that even if a packet arrives after destroy_workqueue(), the work struct will not be dereferenced. Note that it is also required to ensure that the RX threads are completed before destroying the workqueue, because the threads could be using the qrtr_ns_data_ready() callback. Cc: stable@vger.kernel.org Fixes: 0c2204a4ad71 ("net: qrtr: Migrate nameservice to kernel from userspace") Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20260409-qrtr-fix-v3-5-00a8a5ff2b51@oss.qualcomm.com Signed-off-by: Jakub Kicinski --- net/qrtr/ns.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index c0418764470b..b3f9bbcf9ab9 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -25,6 +25,7 @@ static struct { u32 lookup_count; struct workqueue_struct *workqueue; struct work_struct work; + void (*saved_data_ready)(struct sock *sk); int local_node; } qrtr_ns; @@ -757,6 +758,7 @@ int qrtr_ns_init(void) goto err_sock; } + qrtr_ns.saved_data_ready = qrtr_ns.sock->sk->sk_data_ready; qrtr_ns.sock->sk->sk_data_ready = qrtr_ns_data_ready; sq.sq_port = QRTR_PORT_CTRL; @@ -797,6 +799,10 @@ int qrtr_ns_init(void) return 0; err_wq: + write_lock_bh(&qrtr_ns.sock->sk->sk_callback_lock); + qrtr_ns.sock->sk->sk_data_ready = qrtr_ns.saved_data_ready; + write_unlock_bh(&qrtr_ns.sock->sk->sk_callback_lock); + destroy_workqueue(qrtr_ns.workqueue); err_sock: sock_release(qrtr_ns.sock); @@ -806,7 +812,12 @@ EXPORT_SYMBOL_GPL(qrtr_ns_init); void qrtr_ns_remove(void) { + write_lock_bh(&qrtr_ns.sock->sk->sk_callback_lock); + qrtr_ns.sock->sk->sk_data_ready = qrtr_ns.saved_data_ready; + write_unlock_bh(&qrtr_ns.sock->sk->sk_callback_lock); + cancel_work_sync(&qrtr_ns.work); + synchronize_net(); destroy_workqueue(qrtr_ns.workqueue); /* sock_release() expects the two references that were put during From 4e5bc3ff060e6a495117a164a1ce6df5cdf1454f Mon Sep 17 00:00:00 2001 From: David Carlier Date: Thu, 9 Apr 2026 19:40:08 +0100 Subject: [PATCH 1532/1561] octeon_ep_vf: introduce octep_vf_oq_next_idx() helper Introduce octep_vf_oq_next_idx() to consolidate the repeated ring index advance and wraparound pattern in __octep_vf_oq_process_rx(). No functional change intended. Signed-off-by: David Carlier Link: https://patch.msgid.link/20260409184009.930359-2-devnexen@gmail.com Signed-off-by: Jakub Kicinski --- .../ethernet/marvell/octeon_ep_vf/octep_vf_rx.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c index b579d5b545c4..7bd1b9b8d7f5 100644 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c @@ -352,6 +352,11 @@ static int octep_vf_oq_check_hw_for_pkts(struct octep_vf_device *oct, return new_pkts; } +static inline u32 octep_vf_oq_next_idx(struct octep_vf_oq *oq, u32 idx) +{ + return (idx + 1 == oq->max_count) ? 0 : idx + 1; +} + /** * __octep_vf_oq_process_rx() - Process hardware Rx queue and push to stack. * @@ -415,10 +420,8 @@ static int __octep_vf_oq_process_rx(struct octep_vf_device *oct, skb = napi_build_skb((void *)resp_hw, PAGE_SIZE); skb_reserve(skb, data_offset); skb_put(skb, buff_info->len); - read_idx++; desc_used++; - if (read_idx == oq->max_count) - read_idx = 0; + read_idx = octep_vf_oq_next_idx(oq, read_idx); } else { struct skb_shared_info *shinfo; u16 data_len; @@ -429,10 +432,8 @@ static int __octep_vf_oq_process_rx(struct octep_vf_device *oct, * subsequent fragments contains only data. */ skb_put(skb, oq->max_single_buffer_size); - read_idx++; desc_used++; - if (read_idx == oq->max_count) - read_idx = 0; + read_idx = octep_vf_oq_next_idx(oq, read_idx); shinfo = skb_shinfo(skb); data_len = buff_info->len - oq->max_single_buffer_size; @@ -454,10 +455,8 @@ static int __octep_vf_oq_process_rx(struct octep_vf_device *oct, buff_info->len, buff_info->len); buff_info->page = NULL; - read_idx++; desc_used++; - if (read_idx == oq->max_count) - read_idx = 0; + read_idx = octep_vf_oq_next_idx(oq, read_idx); } } From dd66b42854705e4e4ee7f14d260f86c578bed3e3 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Thu, 9 Apr 2026 19:40:09 +0100 Subject: [PATCH 1533/1561] octeon_ep_vf: add NULL check for napi_build_skb() napi_build_skb() can return NULL on allocation failure. In __octep_vf_oq_process_rx(), the result is used directly without a NULL check in both the single-buffer and multi-fragment paths, leading to a NULL pointer dereference. Add NULL checks after both napi_build_skb() calls, properly advancing descriptors and consuming remaining fragments on failure. Fixes: 1cd3b407977c ("octeon_ep_vf: add Tx/Rx processing and interrupt support") Cc: stable@vger.kernel.org Signed-off-by: David Carlier Link: https://patch.msgid.link/20260409184009.930359-3-devnexen@gmail.com Signed-off-by: Jakub Kicinski --- .../marvell/octeon_ep_vf/octep_vf_rx.c | 30 +++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c index 7bd1b9b8d7f5..d98247408242 100644 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c @@ -414,10 +414,15 @@ static int __octep_vf_oq_process_rx(struct octep_vf_device *oct, data_offset = OCTEP_VF_OQ_RESP_HW_SIZE; rx_ol_flags = 0; } - rx_bytes += buff_info->len; - if (buff_info->len <= oq->max_single_buffer_size) { skb = napi_build_skb((void *)resp_hw, PAGE_SIZE); + if (!skb) { + oq->stats->alloc_failures++; + desc_used++; + read_idx = octep_vf_oq_next_idx(oq, read_idx); + continue; + } + rx_bytes += buff_info->len; skb_reserve(skb, data_offset); skb_put(skb, buff_info->len); desc_used++; @@ -427,6 +432,27 @@ static int __octep_vf_oq_process_rx(struct octep_vf_device *oct, u16 data_len; skb = napi_build_skb((void *)resp_hw, PAGE_SIZE); + if (!skb) { + oq->stats->alloc_failures++; + desc_used++; + read_idx = octep_vf_oq_next_idx(oq, read_idx); + data_len = buff_info->len - oq->max_single_buffer_size; + while (data_len) { + dma_unmap_page(oq->dev, oq->desc_ring[read_idx].buffer_ptr, + PAGE_SIZE, DMA_FROM_DEVICE); + buff_info = (struct octep_vf_rx_buffer *) + &oq->buff_info[read_idx]; + buff_info->page = NULL; + if (data_len < oq->buffer_size) + data_len = 0; + else + data_len -= oq->buffer_size; + desc_used++; + read_idx = octep_vf_oq_next_idx(oq, read_idx); + } + continue; + } + rx_bytes += buff_info->len; skb_reserve(skb, data_offset); /* Head fragment includes response header(s); * subsequent fragments contains only data. From b80a95ccf1604a882bb153c45ccb4056e44c8edb Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Fri, 10 Apr 2026 11:59:36 -0400 Subject: [PATCH 1534/1561] udp: Force compute_score to always inline Back in 2024 I reported a 7-12% regression on an iperf3 UDP loopback thoughput test that we traced to the extra overhead of calling compute_score on two places, introduced by commit f0ea27e7bfe1 ("udp: re-score reuseport groups when connected sockets are present"). At the time, I pointed out the overhead was caused by the multiple calls, associated with cpu-specific mitigations, and merged commit 50aee97d1511 ("udp: Avoid call to compute_score on multiple sites") to jump back explicitly, to force the rescore call in a single place. Recently though, we got another regression report against a newer distro version, which a team colleague traced back to the same root-cause. Turns out that once we updated to gcc-13, the compiler got smart enough to unroll the loop, undoing my previous mitigation. Let's bite the bullet and __always_inline compute_score on both ipv4 and ipv6 to prevent gcc from de-optimizing it again in the future. These functions are only called in two places each, udpX_lib_lookup1 and udpX_lib_lookup2, so the extra size shouldn't be a problem and it is hot enough to be very visible in profilings. In fact, with gcc13, forcing the inline will prevent gcc from unrolling the fix from commit 50aee97d1511, so we don't end up increasing udpX_lib_lookup2 at all. I haven't recollected the results myself, as I don't have access to the machine at the moment. But the same colleague reported 4.67% inprovement with this patch in the loopback benchmark, solving the regression report within noise margins. Eric Dumazet reported no size change to vmlinux when built with clang. I report the same also with gcc-13: scripts/bloat-o-meter vmlinux vmlinux-inline add/remove: 0/2 grow/shrink: 4/0 up/down: 616/-416 (200) Function old new delta udp6_lib_lookup2 762 949 +187 __udp6_lib_lookup 810 975 +165 udp4_lib_lookup2 757 906 +149 __udp4_lib_lookup 871 986 +115 __pfx_compute_score 32 - -32 compute_score 384 - -384 Total: Before=35011784, After=35011984, chg +0.00% Fixes: 50aee97d1511 ("udp: Avoid call to compute_score on multiple sites") Reviewed-by: Eric Dumazet Acked-by: Willem de Bruijn Signed-off-by: Gabriel Krisman Bertazi Link: https://patch.msgid.link/20260410155936.654915-1-krisman@suse.de Signed-off-by: Jakub Kicinski --- net/ipv4/udp.c | 12 ++++++------ net/ipv6/udp.c | 13 +++++++------ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2fddc7b6b717..e9e2ce9522ef 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -358,10 +358,10 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, hash2_nulladdr); } -static int compute_score(struct sock *sk, const struct net *net, - __be32 saddr, __be16 sport, - __be32 daddr, unsigned short hnum, - int dif, int sdif) +static __always_inline int +compute_score(struct sock *sk, const struct net *net, + __be32 saddr, __be16 sport, __be32 daddr, + unsigned short hnum, int dif, int sdif) { int score; struct inet_sock *inet; @@ -500,8 +500,8 @@ rescore: continue; /* compute_score is too long of a function to be - * inlined, and calling it again here yields - * measurable overhead for some + * inlined twice here, and calling it uninlined + * here yields measurable overhead for some * workloads. Work around it by jumping * backwards to rescore 'result'. */ diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3fac9cb47ae0..15e032194ecc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -127,10 +127,11 @@ static void udp_v6_rehash(struct sock *sk) udp_lib_rehash(sk, new_hash, new_hash4); } -static int compute_score(struct sock *sk, const struct net *net, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, unsigned short hnum, - int dif, int sdif) +static __always_inline int +compute_score(struct sock *sk, const struct net *net, + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, unsigned short hnum, + int dif, int sdif) { int bound_dev_if, score; struct inet_sock *inet; @@ -260,8 +261,8 @@ rescore: continue; /* compute_score is too long of a function to be - * inlined, and calling it again here yields - * measurable overhead for some + * inlined twice here, and calling it uninlined + * here yields measurable overhead for some * workloads. Work around it by jumping * backwards to rescore 'result'. */ From c058bbf05b1197c33df7204842665bd8bc70b3a8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 10 Apr 2026 23:53:27 +0000 Subject: [PATCH 1535/1561] tcp: Don't set treq->req_usec_ts in cookie_tcp_reqsk_init(). Commit de5626b95e13 ("tcp: Factorise cookie-independent fields initialisation in cookie_v[46]_check().") miscategorised tcp_rsk(req)->req_usec_ts init to cookie_tcp_reqsk_init(), which is used by both BPF/non-BPF SYN cookie reqsk. Rather, it should have been moved to cookie_tcp_reqsk_alloc() by commit 8e7bab6b9652 ("tcp: Factorise cookie-dependent fields initialisation in cookie_v[46]_check()") so that only non-BPF SYN cookie sets tcp_rsk(req)->req_usec_ts to false. Let's move the initialisation to cookie_tcp_reqsk_alloc() to respect bpf_tcp_req_attrs.usec_ts_ok. Fixes: e472f88891ab ("bpf: tcp: Support arbitrary SYN Cookie.") Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20260410235328.1773449-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/syncookies.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index fc3affd9c801..b5f0a65c6786 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -286,7 +286,6 @@ static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb, treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = ntohl(th->ack_seq) - 1; treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; - treq->req_usec_ts = false; #if IS_ENABLED(CONFIG_MPTCP) treq->is_mptcp = sk_is_mptcp(sk); @@ -349,6 +348,7 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, ireq->wscale_ok = tcp_opt->wscale_ok; ireq->ecn_ok = !!(tcp_opt->rcv_tsecr & TS_OPT_ECN); + treq->req_usec_ts = false; treq->ts_off = tsoff; return req; From 8b0c25528cb64f71a73b5c0d49cbbcb68540a4ce Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 11 Apr 2026 12:45:25 +0200 Subject: [PATCH 1536/1561] bnge: return after auxiliary_device_uninit() in error path When auxiliary_device_add() fails, the error block calls auxiliary_device_uninit() but does not return. The uninit drops the last reference and synchronously runs bnge_aux_dev_release(), which sets bd->auxr_dev = NULL and frees the underlying object. The subsequent bd->auxr_dev->net = bd->netdev then dereferences NULL, which is not a good thing to have happen when trying to clean up from an error. Add the missing return, as the auxiliary bus documentation states is a requirement (seems that LLM tools read documentation better than humans do...) Cc: Vikas Gupta Cc: Andrew Lunn Fixes: 8ac050ec3b1c ("bng_en: Add RoCE aux device support") Cc: stable Assisted-by: gregkh_clanker_t1000 Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2026041124-banshee-molecular-0f70@gregkh Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnge/bnge_auxr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_auxr.c b/drivers/net/ethernet/broadcom/bnge/bnge_auxr.c index b942076762ef..67e93e17d4d9 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_auxr.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_auxr.c @@ -194,6 +194,7 @@ void bnge_rdma_aux_device_add(struct bnge_dev *bd) dev_warn(bd->dev, "Failed to add auxiliary device for ROCE\n"); auxiliary_device_uninit(aux_dev); bd->flags &= ~BNGE_EN_ROCE; + return; } bd->auxr_dev->net = bd->netdev; From 43a2deae3661d0d51f9d39244ceb0a1701ec0006 Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Sat, 11 Apr 2026 12:51:45 +0200 Subject: [PATCH 1537/1561] net: phy: realtek: use LEDCR page number define on RTL8211F Replace the magic number with an existing define for the LEDCR register page number on the RTL8211F. Signed-off-by: Aleksander Jan Bajkowski Reviewed-by: Daniel Golle Link: https://patch.msgid.link/20260411105150.184577-1-olek2@wp.pl Signed-off-by: Jakub Kicinski --- drivers/net/phy/realtek/realtek_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index 9c26531abe4a..79c867ef64da 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -1012,7 +1012,7 @@ static int rtl8211f_led_hw_control_get(struct phy_device *phydev, u8 index, if (index >= RTL8211x_LED_COUNT) return -EINVAL; - val = phy_read_paged(phydev, 0xd04, RTL8211F_LEDCR); + val = phy_read_paged(phydev, RTL8211F_LEDCR_PAGE, RTL8211F_LEDCR); if (val < 0) return val; @@ -1074,7 +1074,8 @@ static int rtl8211f_led_hw_control_set(struct phy_device *phydev, u8 index, reg <<= RTL8211F_LEDCR_SHIFT * index; reg |= RTL8211F_LEDCR_MODE; /* Mode B */ - return phy_modify_paged(phydev, 0xd04, RTL8211F_LEDCR, mask, reg); + return phy_modify_paged(phydev, RTL8211F_LEDCR_PAGE, RTL8211F_LEDCR, + mask, reg); } static int rtl8211e_led_hw_control_get(struct phy_device *phydev, u8 index, From e6295d124644b14a12b55edf5d3e89cf86a4a2ce Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Sun, 12 Apr 2026 01:01:57 +0100 Subject: [PATCH 1538/1561] net: dsa: mxl862xx: add ethtool statistics support The MxL862xx firmware exposes per-port RMON counters through the RMON_PORT_GET command, covering standard IEEE 802.3 MAC statistics (unicast/multicast/broadcast packet and byte counts, collision counters, pause frames) as well as hardware-specific counters such as extended VLAN discard and MTU exceed events. Add the RMON counter firmware API structures and command definitions. Implement .get_strings, .get_sset_count, and .get_ethtool_stats for legacy ethtool -S support. Implement .get_eth_mac_stats, .get_eth_ctrl_stats, and .get_pause_stats for the standardized IEEE 802.3 statistics interface. Signed-off-by: Daniel Golle Link: https://patch.msgid.link/480be14d5ed51f3db7b1681b298044dbf8e87494.1775951347.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mxl862xx/mxl862xx-api.h | 142 +++++++++++++++++++ drivers/net/dsa/mxl862xx/mxl862xx-cmd.h | 3 + drivers/net/dsa/mxl862xx/mxl862xx.c | 173 ++++++++++++++++++++++++ 3 files changed, 318 insertions(+) diff --git a/drivers/net/dsa/mxl862xx/mxl862xx-api.h b/drivers/net/dsa/mxl862xx/mxl862xx-api.h index c902e90397e5..fb21ddc1bf1c 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx-api.h +++ b/drivers/net/dsa/mxl862xx/mxl862xx-api.h @@ -1224,4 +1224,146 @@ struct mxl862xx_sys_fw_image_version { __le32 iv_build_num; } __packed; +/** + * enum mxl862xx_port_type - Port Type + * @MXL862XX_LOGICAL_PORT: Logical Port + * @MXL862XX_PHYSICAL_PORT: Physical Port + * @MXL862XX_CTP_PORT: Connectivity Termination Port (CTP) + * @MXL862XX_BRIDGE_PORT: Bridge Port + */ +enum mxl862xx_port_type { + MXL862XX_LOGICAL_PORT = 0, + MXL862XX_PHYSICAL_PORT, + MXL862XX_CTP_PORT, + MXL862XX_BRIDGE_PORT, +}; + +/** + * enum mxl862xx_rmon_port_type - RMON counter table type + * @MXL862XX_RMON_CTP_PORT_RX: CTP RX counters + * @MXL862XX_RMON_CTP_PORT_TX: CTP TX counters + * @MXL862XX_RMON_BRIDGE_PORT_RX: Bridge port RX counters + * @MXL862XX_RMON_BRIDGE_PORT_TX: Bridge port TX counters + * @MXL862XX_RMON_CTP_PORT_PCE_BYPASS: CTP PCE bypass counters + * @MXL862XX_RMON_TFLOW_RX: TFLOW RX counters + * @MXL862XX_RMON_TFLOW_TX: TFLOW TX counters + * @MXL862XX_RMON_QMAP: QMAP counters + * @MXL862XX_RMON_METER: Meter counters + * @MXL862XX_RMON_PMAC: PMAC counters + */ +enum mxl862xx_rmon_port_type { + MXL862XX_RMON_CTP_PORT_RX = 0, + MXL862XX_RMON_CTP_PORT_TX, + MXL862XX_RMON_BRIDGE_PORT_RX, + MXL862XX_RMON_BRIDGE_PORT_TX, + MXL862XX_RMON_CTP_PORT_PCE_BYPASS, + MXL862XX_RMON_TFLOW_RX, + MXL862XX_RMON_TFLOW_TX, + MXL862XX_RMON_QMAP = 0x0e, + MXL862XX_RMON_METER = 0x19, + MXL862XX_RMON_PMAC = 0x1c, +}; + +/** + * struct mxl862xx_rmon_port_cnt - RMON counters for a port + * @port_type: Port type for counter retrieval (see &enum mxl862xx_port_type) + * @port_id: Ethernet port number (zero-based) + * @sub_if_id_group: Sub-interface ID group + * @pce_bypass: Separate CTP Tx counters when PCE is bypassed + * @rx_extended_vlan_discard_pkts: Discarded at extended VLAN operation + * @mtu_exceed_discard_pkts: Discarded due to MTU exceeded + * @tx_under_size_good_pkts: Tx undersize (<64) packet count + * @tx_oversize_good_pkts: Tx oversize (>1518) packet count + * @rx_good_pkts: Received good packet count + * @rx_unicast_pkts: Received unicast packet count + * @rx_broadcast_pkts: Received broadcast packet count + * @rx_multicast_pkts: Received multicast packet count + * @rx_fcserror_pkts: Received FCS error packet count + * @rx_under_size_good_pkts: Received undersize good packet count + * @rx_oversize_good_pkts: Received oversize good packet count + * @rx_under_size_error_pkts: Received undersize error packet count + * @rx_good_pause_pkts: Received good pause packet count + * @rx_oversize_error_pkts: Received oversize error packet count + * @rx_align_error_pkts: Received alignment error packet count + * @rx_filtered_pkts: Filtered packet count + * @rx64byte_pkts: Received 64-byte packet count + * @rx127byte_pkts: Received 65-127 byte packet count + * @rx255byte_pkts: Received 128-255 byte packet count + * @rx511byte_pkts: Received 256-511 byte packet count + * @rx1023byte_pkts: Received 512-1023 byte packet count + * @rx_max_byte_pkts: Received 1024-max byte packet count + * @tx_good_pkts: Transmitted good packet count + * @tx_unicast_pkts: Transmitted unicast packet count + * @tx_broadcast_pkts: Transmitted broadcast packet count + * @tx_multicast_pkts: Transmitted multicast packet count + * @tx_single_coll_count: Transmit single collision count + * @tx_mult_coll_count: Transmit multiple collision count + * @tx_late_coll_count: Transmit late collision count + * @tx_excess_coll_count: Transmit excessive collision count + * @tx_coll_count: Transmit collision count + * @tx_pause_count: Transmit pause packet count + * @tx64byte_pkts: Transmitted 64-byte packet count + * @tx127byte_pkts: Transmitted 65-127 byte packet count + * @tx255byte_pkts: Transmitted 128-255 byte packet count + * @tx511byte_pkts: Transmitted 256-511 byte packet count + * @tx1023byte_pkts: Transmitted 512-1023 byte packet count + * @tx_max_byte_pkts: Transmitted 1024-max byte packet count + * @tx_dropped_pkts: Transmit dropped packet count + * @tx_acm_dropped_pkts: Transmit ACM dropped packet count + * @rx_dropped_pkts: Received dropped packet count + * @rx_good_bytes: Received good byte count (64-bit) + * @rx_bad_bytes: Received bad byte count (64-bit) + * @tx_good_bytes: Transmitted good byte count (64-bit) + */ +struct mxl862xx_rmon_port_cnt { + __le32 port_type; /* enum mxl862xx_port_type */ + __le16 port_id; + __le16 sub_if_id_group; + u8 pce_bypass; + __le32 rx_extended_vlan_discard_pkts; + __le32 mtu_exceed_discard_pkts; + __le32 tx_under_size_good_pkts; + __le32 tx_oversize_good_pkts; + __le32 rx_good_pkts; + __le32 rx_unicast_pkts; + __le32 rx_broadcast_pkts; + __le32 rx_multicast_pkts; + __le32 rx_fcserror_pkts; + __le32 rx_under_size_good_pkts; + __le32 rx_oversize_good_pkts; + __le32 rx_under_size_error_pkts; + __le32 rx_good_pause_pkts; + __le32 rx_oversize_error_pkts; + __le32 rx_align_error_pkts; + __le32 rx_filtered_pkts; + __le32 rx64byte_pkts; + __le32 rx127byte_pkts; + __le32 rx255byte_pkts; + __le32 rx511byte_pkts; + __le32 rx1023byte_pkts; + __le32 rx_max_byte_pkts; + __le32 tx_good_pkts; + __le32 tx_unicast_pkts; + __le32 tx_broadcast_pkts; + __le32 tx_multicast_pkts; + __le32 tx_single_coll_count; + __le32 tx_mult_coll_count; + __le32 tx_late_coll_count; + __le32 tx_excess_coll_count; + __le32 tx_coll_count; + __le32 tx_pause_count; + __le32 tx64byte_pkts; + __le32 tx127byte_pkts; + __le32 tx255byte_pkts; + __le32 tx511byte_pkts; + __le32 tx1023byte_pkts; + __le32 tx_max_byte_pkts; + __le32 tx_dropped_pkts; + __le32 tx_acm_dropped_pkts; + __le32 rx_dropped_pkts; + __le64 rx_good_bytes; + __le64 rx_bad_bytes; + __le64 tx_good_bytes; +} __packed; + #endif /* __MXL862XX_API_H */ diff --git a/drivers/net/dsa/mxl862xx/mxl862xx-cmd.h b/drivers/net/dsa/mxl862xx/mxl862xx-cmd.h index 45df37cde40d..f1ea40aa7ea0 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx-cmd.h +++ b/drivers/net/dsa/mxl862xx/mxl862xx-cmd.h @@ -16,6 +16,7 @@ #define MXL862XX_BRDGPORT_MAGIC 0x400 #define MXL862XX_CTP_MAGIC 0x500 #define MXL862XX_QOS_MAGIC 0x600 +#define MXL862XX_RMON_MAGIC 0x700 #define MXL862XX_SWMAC_MAGIC 0xa00 #define MXL862XX_EXTVLAN_MAGIC 0xb00 #define MXL862XX_VLANFILTER_MAGIC 0xc00 @@ -43,6 +44,8 @@ #define MXL862XX_QOS_METERCFGSET (MXL862XX_QOS_MAGIC + 0x2) #define MXL862XX_QOS_METERALLOC (MXL862XX_QOS_MAGIC + 0x2a) +#define MXL862XX_RMON_PORT_GET (MXL862XX_RMON_MAGIC + 0x1) + #define MXL862XX_MAC_TABLEENTRYADD (MXL862XX_SWMAC_MAGIC + 0x2) #define MXL862XX_MAC_TABLEENTRYREAD (MXL862XX_SWMAC_MAGIC + 0x3) #define MXL862XX_MAC_TABLEENTRYQUERY (MXL862XX_SWMAC_MAGIC + 0x4) diff --git a/drivers/net/dsa/mxl862xx/mxl862xx.c b/drivers/net/dsa/mxl862xx/mxl862xx.c index fca9a3e36bb6..58bf7210c6d4 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx.c +++ b/drivers/net/dsa/mxl862xx/mxl862xx.c @@ -30,6 +30,38 @@ #define MXL862XX_API_READ_QUIET(dev, cmd, data) \ mxl862xx_api_wrap(dev, cmd, &(data), sizeof((data)), true, true) +struct mxl862xx_mib_desc { + unsigned int size; + unsigned int offset; + const char *name; +}; + +#define MIB_DESC(_size, _name, _element) \ +{ \ + .size = _size, \ + .name = _name, \ + .offset = offsetof(struct mxl862xx_rmon_port_cnt, _element) \ +} + +/* Hardware-specific counters not covered by any standardized stats callback. */ +static const struct mxl862xx_mib_desc mxl862xx_mib[] = { + MIB_DESC(1, "TxAcmDroppedPkts", tx_acm_dropped_pkts), + MIB_DESC(1, "RxFilteredPkts", rx_filtered_pkts), + MIB_DESC(1, "RxExtendedVlanDiscardPkts", rx_extended_vlan_discard_pkts), + MIB_DESC(1, "MtuExceedDiscardPkts", mtu_exceed_discard_pkts), + MIB_DESC(2, "RxBadBytes", rx_bad_bytes), +}; + +static const struct ethtool_rmon_hist_range mxl862xx_rmon_ranges[] = { + { 0, 64 }, + { 65, 127 }, + { 128, 255 }, + { 256, 511 }, + { 512, 1023 }, + { 1024, 10240 }, + {} +}; + #define MXL862XX_SDMA_PCTRLP(p) (0xbc0 + ((p) * 0x6)) #define MXL862XX_SDMA_PCTRL_EN BIT(0) @@ -1734,6 +1766,140 @@ static int mxl862xx_port_bridge_flags(struct dsa_switch *ds, int port, return 0; } +static void mxl862xx_get_strings(struct dsa_switch *ds, int port, + u32 stringset, u8 *data) +{ + int i; + + if (stringset != ETH_SS_STATS) + return; + + for (i = 0; i < ARRAY_SIZE(mxl862xx_mib); i++) + ethtool_puts(&data, mxl862xx_mib[i].name); +} + +static int mxl862xx_get_sset_count(struct dsa_switch *ds, int port, int sset) +{ + if (sset != ETH_SS_STATS) + return 0; + + return ARRAY_SIZE(mxl862xx_mib); +} + +static int mxl862xx_read_rmon(struct dsa_switch *ds, int port, + struct mxl862xx_rmon_port_cnt *cnt) +{ + memset(cnt, 0, sizeof(*cnt)); + cnt->port_type = cpu_to_le32(MXL862XX_CTP_PORT); + cnt->port_id = cpu_to_le16(port); + + return MXL862XX_API_READ(ds->priv, MXL862XX_RMON_PORT_GET, *cnt); +} + +static void mxl862xx_get_ethtool_stats(struct dsa_switch *ds, int port, + u64 *data) +{ + const struct mxl862xx_mib_desc *mib; + struct mxl862xx_rmon_port_cnt cnt; + int ret, i; + void *field; + + ret = mxl862xx_read_rmon(ds, port, &cnt); + if (ret) { + dev_err(ds->dev, "failed to read RMON stats on port %d\n", port); + return; + } + + for (i = 0; i < ARRAY_SIZE(mxl862xx_mib); i++) { + mib = &mxl862xx_mib[i]; + field = (u8 *)&cnt + mib->offset; + + if (mib->size == 1) + *data++ = le32_to_cpu(*(__le32 *)field); + else + *data++ = le64_to_cpu(*(__le64 *)field); + } +} + +static void mxl862xx_get_eth_mac_stats(struct dsa_switch *ds, int port, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct mxl862xx_rmon_port_cnt cnt; + + if (mxl862xx_read_rmon(ds, port, &cnt)) + return; + + mac_stats->FramesTransmittedOK = le32_to_cpu(cnt.tx_good_pkts); + mac_stats->SingleCollisionFrames = le32_to_cpu(cnt.tx_single_coll_count); + mac_stats->MultipleCollisionFrames = le32_to_cpu(cnt.tx_mult_coll_count); + mac_stats->FramesReceivedOK = le32_to_cpu(cnt.rx_good_pkts); + mac_stats->FrameCheckSequenceErrors = le32_to_cpu(cnt.rx_fcserror_pkts); + mac_stats->AlignmentErrors = le32_to_cpu(cnt.rx_align_error_pkts); + mac_stats->OctetsTransmittedOK = le64_to_cpu(cnt.tx_good_bytes); + mac_stats->LateCollisions = le32_to_cpu(cnt.tx_late_coll_count); + mac_stats->FramesAbortedDueToXSColls = le32_to_cpu(cnt.tx_excess_coll_count); + mac_stats->OctetsReceivedOK = le64_to_cpu(cnt.rx_good_bytes); + mac_stats->MulticastFramesXmittedOK = le32_to_cpu(cnt.tx_multicast_pkts); + mac_stats->BroadcastFramesXmittedOK = le32_to_cpu(cnt.tx_broadcast_pkts); + mac_stats->MulticastFramesReceivedOK = le32_to_cpu(cnt.rx_multicast_pkts); + mac_stats->BroadcastFramesReceivedOK = le32_to_cpu(cnt.rx_broadcast_pkts); + mac_stats->FrameTooLongErrors = le32_to_cpu(cnt.rx_oversize_error_pkts); +} + +static void mxl862xx_get_eth_ctrl_stats(struct dsa_switch *ds, int port, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + struct mxl862xx_rmon_port_cnt cnt; + + if (mxl862xx_read_rmon(ds, port, &cnt)) + return; + + ctrl_stats->MACControlFramesTransmitted = le32_to_cpu(cnt.tx_pause_count); + ctrl_stats->MACControlFramesReceived = le32_to_cpu(cnt.rx_good_pause_pkts); +} + +static void mxl862xx_get_pause_stats(struct dsa_switch *ds, int port, + struct ethtool_pause_stats *pause_stats) +{ + struct mxl862xx_rmon_port_cnt cnt; + + if (mxl862xx_read_rmon(ds, port, &cnt)) + return; + + pause_stats->tx_pause_frames = le32_to_cpu(cnt.tx_pause_count); + pause_stats->rx_pause_frames = le32_to_cpu(cnt.rx_good_pause_pkts); +} + +static void mxl862xx_get_rmon_stats(struct dsa_switch *ds, int port, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges) +{ + struct mxl862xx_rmon_port_cnt cnt; + + if (mxl862xx_read_rmon(ds, port, &cnt)) + return; + + rmon_stats->undersize_pkts = le32_to_cpu(cnt.rx_under_size_good_pkts); + rmon_stats->oversize_pkts = le32_to_cpu(cnt.rx_oversize_good_pkts); + rmon_stats->fragments = le32_to_cpu(cnt.rx_under_size_error_pkts); + rmon_stats->jabbers = le32_to_cpu(cnt.rx_oversize_error_pkts); + + rmon_stats->hist[0] = le32_to_cpu(cnt.rx64byte_pkts); + rmon_stats->hist[1] = le32_to_cpu(cnt.rx127byte_pkts); + rmon_stats->hist[2] = le32_to_cpu(cnt.rx255byte_pkts); + rmon_stats->hist[3] = le32_to_cpu(cnt.rx511byte_pkts); + rmon_stats->hist[4] = le32_to_cpu(cnt.rx1023byte_pkts); + rmon_stats->hist[5] = le32_to_cpu(cnt.rx_max_byte_pkts); + + rmon_stats->hist_tx[0] = le32_to_cpu(cnt.tx64byte_pkts); + rmon_stats->hist_tx[1] = le32_to_cpu(cnt.tx127byte_pkts); + rmon_stats->hist_tx[2] = le32_to_cpu(cnt.tx255byte_pkts); + rmon_stats->hist_tx[3] = le32_to_cpu(cnt.tx511byte_pkts); + rmon_stats->hist_tx[4] = le32_to_cpu(cnt.tx1023byte_pkts); + rmon_stats->hist_tx[5] = le32_to_cpu(cnt.tx_max_byte_pkts); + + *ranges = mxl862xx_rmon_ranges; +} static const struct dsa_switch_ops mxl862xx_switch_ops = { .get_tag_protocol = mxl862xx_get_tag_protocol, .setup = mxl862xx_setup, @@ -1758,6 +1924,13 @@ static const struct dsa_switch_ops mxl862xx_switch_ops = { .port_vlan_filtering = mxl862xx_port_vlan_filtering, .port_vlan_add = mxl862xx_port_vlan_add, .port_vlan_del = mxl862xx_port_vlan_del, + .get_strings = mxl862xx_get_strings, + .get_sset_count = mxl862xx_get_sset_count, + .get_ethtool_stats = mxl862xx_get_ethtool_stats, + .get_eth_mac_stats = mxl862xx_get_eth_mac_stats, + .get_eth_ctrl_stats = mxl862xx_get_eth_ctrl_stats, + .get_pause_stats = mxl862xx_get_pause_stats, + .get_rmon_stats = mxl862xx_get_rmon_stats, }; static void mxl862xx_phylink_mac_config(struct phylink_config *config, From a21d33a5265f0b31d935a8b9b2b6faefb5185911 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Sun, 12 Apr 2026 01:02:05 +0100 Subject: [PATCH 1539/1561] net: dsa: mxl862xx: implement .get_stats64 Poll free-running firmware RMON counters every 2 seconds and accumulate deltas into 64-bit per-port statistics. 32-bit packet counters wrap in ~220s at 10 Gbps line rate with minimum-size frames; the 2s polling interval provides a comfortable margin. The .get_stats64 callback forces a fresh poll so that counters are always up to date when queried. Signed-off-by: Daniel Golle Link: https://patch.msgid.link/fa38548ba05866879e8912721edc91947ce4ff12.1775951347.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mxl862xx/mxl862xx-host.c | 8 +- drivers/net/dsa/mxl862xx/mxl862xx.c | 175 +++++++++++++++++++++++ drivers/net/dsa/mxl862xx/mxl862xx.h | 94 +++++++++++- 3 files changed, 270 insertions(+), 7 deletions(-) diff --git a/drivers/net/dsa/mxl862xx/mxl862xx-host.c b/drivers/net/dsa/mxl862xx/mxl862xx-host.c index cadbdb590cf4..d55f9dff6433 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx-host.c +++ b/drivers/net/dsa/mxl862xx/mxl862xx-host.c @@ -48,7 +48,7 @@ static void mxl862xx_crc_err_work_fn(struct work_struct *work) dev_close(dp->conduit); rtnl_unlock(); - clear_bit(0, &priv->crc_err); + clear_bit(MXL862XX_FLAG_CRC_ERR, &priv->flags); } /* Firmware CRC error codes (outside normal Zephyr errno range). */ @@ -247,7 +247,7 @@ static int mxl862xx_issue_cmd(struct mxl862xx_priv *priv, u16 cmd, u16 len) ret = mxl862xx_crc6_verify(ctrl_enc, len_enc, &fw_result); if (ret) { - if (!test_and_set_bit(0, &priv->crc_err)) + if (!test_and_set_bit(MXL862XX_FLAG_CRC_ERR, &priv->flags)) schedule_work(&priv->crc_err_work); return -EIO; } @@ -314,7 +314,7 @@ static int mxl862xx_send_cmd(struct mxl862xx_priv *priv, u16 cmd, u16 size, if (ret < 0) { if ((ret == MXL862XX_FW_CRC6_ERR || ret == MXL862XX_FW_CRC16_ERR) && - !test_and_set_bit(0, &priv->crc_err)) + !test_and_set_bit(MXL862XX_FLAG_CRC_ERR, &priv->flags)) schedule_work(&priv->crc_err_work); if (!quiet) dev_err(&priv->mdiodev->dev, @@ -458,7 +458,7 @@ int mxl862xx_api_wrap(struct mxl862xx_priv *priv, u16 cmd, void *_data, } if (crc16(0xffff, (const u8 *)data, size) != crc) { - if (!test_and_set_bit(0, &priv->crc_err)) + if (!test_and_set_bit(MXL862XX_FLAG_CRC_ERR, &priv->flags)) schedule_work(&priv->crc_err_work); ret = -EIO; goto out; diff --git a/drivers/net/dsa/mxl862xx/mxl862xx.c b/drivers/net/dsa/mxl862xx/mxl862xx.c index 58bf7210c6d4..b60482d93a85 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx.c +++ b/drivers/net/dsa/mxl862xx/mxl862xx.c @@ -30,6 +30,12 @@ #define MXL862XX_API_READ_QUIET(dev, cmd, data) \ mxl862xx_api_wrap(dev, cmd, &(data), sizeof((data)), true, true) +/* Polling interval for RMON counter accumulation. At 2.5 Gbps with + * minimum-size (64-byte) frames, a 32-bit packet counter wraps in ~880s. + * 2s gives a comfortable margin. + */ +#define MXL862XX_STATS_POLL_INTERVAL (2 * HZ) + struct mxl862xx_mib_desc { unsigned int size; unsigned int offset; @@ -677,6 +683,9 @@ static int mxl862xx_setup(struct dsa_switch *ds) if (ret) return ret; + schedule_delayed_work(&priv->stats_work, + MXL862XX_STATS_POLL_INTERVAL); + return mxl862xx_setup_mdio(ds); } @@ -1900,6 +1909,159 @@ static void mxl862xx_get_rmon_stats(struct dsa_switch *ds, int port, *ranges = mxl862xx_rmon_ranges; } + +/* Compute the delta between two 32-bit free-running counter snapshots, + * handling a single wrap-around correctly via unsigned subtraction. + */ +static u64 mxl862xx_delta32(u32 cur, u32 prev) +{ + return (u32)(cur - prev); +} + +/** + * mxl862xx_stats_poll - Read RMON counters and accumulate into 64-bit stats + * @ds: DSA switch + * @port: port index + * + * The firmware RMON counters are free-running 32-bit values (64-bit for + * byte counters). This function reads the hardware via MDIO (may sleep), + * computes deltas from the previous snapshot, and accumulates them into + * 64-bit per-port stats under a spinlock. + * + * Called only from the stats polling workqueue -- serialized by the + * single-threaded delayed_work, so no MDIO locking is needed here. + */ +static void mxl862xx_stats_poll(struct dsa_switch *ds, int port) +{ + struct mxl862xx_priv *priv = ds->priv; + struct mxl862xx_port_stats *s = &priv->ports[port].stats; + u32 rx_fcserr, rx_under, rx_over, rx_align, tx_drop; + u32 rx_drop, rx_evlan, mtu_exc, tx_acm; + struct mxl862xx_rmon_port_cnt cnt; + u64 rx_bytes, tx_bytes; + u32 rx_mcast, tx_coll; + u32 rx_pkts, tx_pkts; + + /* MDIO read -- may sleep, done outside the spinlock. */ + if (mxl862xx_read_rmon(ds, port, &cnt)) + return; + + rx_pkts = le32_to_cpu(cnt.rx_good_pkts); + tx_pkts = le32_to_cpu(cnt.tx_good_pkts); + rx_bytes = le64_to_cpu(cnt.rx_good_bytes); + tx_bytes = le64_to_cpu(cnt.tx_good_bytes); + rx_fcserr = le32_to_cpu(cnt.rx_fcserror_pkts); + rx_under = le32_to_cpu(cnt.rx_under_size_error_pkts); + rx_over = le32_to_cpu(cnt.rx_oversize_error_pkts); + rx_align = le32_to_cpu(cnt.rx_align_error_pkts); + tx_drop = le32_to_cpu(cnt.tx_dropped_pkts); + rx_drop = le32_to_cpu(cnt.rx_dropped_pkts); + rx_evlan = le32_to_cpu(cnt.rx_extended_vlan_discard_pkts); + mtu_exc = le32_to_cpu(cnt.mtu_exceed_discard_pkts); + tx_acm = le32_to_cpu(cnt.tx_acm_dropped_pkts); + rx_mcast = le32_to_cpu(cnt.rx_multicast_pkts); + tx_coll = le32_to_cpu(cnt.tx_coll_count); + + /* Accumulate deltas under spinlock -- .get_stats64 reads these. */ + spin_lock_bh(&priv->ports[port].stats_lock); + + s->rx_packets += mxl862xx_delta32(rx_pkts, s->prev_rx_good_pkts); + s->tx_packets += mxl862xx_delta32(tx_pkts, s->prev_tx_good_pkts); + s->rx_bytes += rx_bytes - s->prev_rx_good_bytes; + s->tx_bytes += tx_bytes - s->prev_tx_good_bytes; + + s->rx_errors += + mxl862xx_delta32(rx_fcserr, s->prev_rx_fcserror_pkts) + + mxl862xx_delta32(rx_under, s->prev_rx_under_size_error_pkts) + + mxl862xx_delta32(rx_over, s->prev_rx_oversize_error_pkts) + + mxl862xx_delta32(rx_align, s->prev_rx_align_error_pkts); + s->tx_errors += + mxl862xx_delta32(tx_drop, s->prev_tx_dropped_pkts); + + s->rx_dropped += + mxl862xx_delta32(rx_drop, s->prev_rx_dropped_pkts) + + mxl862xx_delta32(rx_evlan, s->prev_rx_evlan_discard_pkts) + + mxl862xx_delta32(mtu_exc, s->prev_mtu_exceed_discard_pkts); + s->tx_dropped += + mxl862xx_delta32(tx_drop, s->prev_tx_dropped_pkts) + + mxl862xx_delta32(tx_acm, s->prev_tx_acm_dropped_pkts); + + s->multicast += mxl862xx_delta32(rx_mcast, s->prev_rx_multicast_pkts); + s->collisions += mxl862xx_delta32(tx_coll, s->prev_tx_coll_count); + + s->rx_length_errors += + mxl862xx_delta32(rx_under, s->prev_rx_under_size_error_pkts) + + mxl862xx_delta32(rx_over, s->prev_rx_oversize_error_pkts); + s->rx_crc_errors += + mxl862xx_delta32(rx_fcserr, s->prev_rx_fcserror_pkts); + s->rx_frame_errors += + mxl862xx_delta32(rx_align, s->prev_rx_align_error_pkts); + + s->prev_rx_good_pkts = rx_pkts; + s->prev_tx_good_pkts = tx_pkts; + s->prev_rx_good_bytes = rx_bytes; + s->prev_tx_good_bytes = tx_bytes; + s->prev_rx_fcserror_pkts = rx_fcserr; + s->prev_rx_under_size_error_pkts = rx_under; + s->prev_rx_oversize_error_pkts = rx_over; + s->prev_rx_align_error_pkts = rx_align; + s->prev_tx_dropped_pkts = tx_drop; + s->prev_rx_dropped_pkts = rx_drop; + s->prev_rx_evlan_discard_pkts = rx_evlan; + s->prev_mtu_exceed_discard_pkts = mtu_exc; + s->prev_tx_acm_dropped_pkts = tx_acm; + s->prev_rx_multicast_pkts = rx_mcast; + s->prev_tx_coll_count = tx_coll; + + spin_unlock_bh(&priv->ports[port].stats_lock); +} + +static void mxl862xx_stats_work_fn(struct work_struct *work) +{ + struct mxl862xx_priv *priv = + container_of(work, struct mxl862xx_priv, stats_work.work); + struct dsa_switch *ds = priv->ds; + struct dsa_port *dp; + + dsa_switch_for_each_available_port(dp, ds) + mxl862xx_stats_poll(ds, dp->index); + + if (!test_bit(MXL862XX_FLAG_WORK_STOPPED, &priv->flags)) + schedule_delayed_work(&priv->stats_work, + MXL862XX_STATS_POLL_INTERVAL); +} + +static void mxl862xx_get_stats64(struct dsa_switch *ds, int port, + struct rtnl_link_stats64 *s) +{ + struct mxl862xx_priv *priv = ds->priv; + struct mxl862xx_port_stats *ps = &priv->ports[port].stats; + + spin_lock_bh(&priv->ports[port].stats_lock); + + s->rx_packets = ps->rx_packets; + s->tx_packets = ps->tx_packets; + s->rx_bytes = ps->rx_bytes; + s->tx_bytes = ps->tx_bytes; + s->rx_errors = ps->rx_errors; + s->tx_errors = ps->tx_errors; + s->rx_dropped = ps->rx_dropped; + s->tx_dropped = ps->tx_dropped; + s->multicast = ps->multicast; + s->collisions = ps->collisions; + s->rx_length_errors = ps->rx_length_errors; + s->rx_crc_errors = ps->rx_crc_errors; + s->rx_frame_errors = ps->rx_frame_errors; + + spin_unlock_bh(&priv->ports[port].stats_lock); + + /* Trigger a fresh poll so the next read sees up-to-date counters. + * No-op if the work is already pending, running, or teardown started. + */ + if (!test_bit(MXL862XX_FLAG_WORK_STOPPED, &priv->flags)) + schedule_delayed_work(&priv->stats_work, 0); +} + static const struct dsa_switch_ops mxl862xx_switch_ops = { .get_tag_protocol = mxl862xx_get_tag_protocol, .setup = mxl862xx_setup, @@ -1931,6 +2093,7 @@ static const struct dsa_switch_ops mxl862xx_switch_ops = { .get_eth_ctrl_stats = mxl862xx_get_eth_ctrl_stats, .get_pause_stats = mxl862xx_get_pause_stats, .get_rmon_stats = mxl862xx_get_rmon_stats, + .get_stats64 = mxl862xx_get_stats64, }; static void mxl862xx_phylink_mac_config(struct phylink_config *config, @@ -1992,16 +2155,22 @@ static int mxl862xx_probe(struct mdio_device *mdiodev) priv->ports[i].priv = priv; INIT_WORK(&priv->ports[i].host_flood_work, mxl862xx_host_flood_work_fn); + spin_lock_init(&priv->ports[i].stats_lock); } + INIT_DELAYED_WORK(&priv->stats_work, mxl862xx_stats_work_fn); + dev_set_drvdata(dev, ds); err = dsa_register_switch(ds); if (err) { + set_bit(MXL862XX_FLAG_WORK_STOPPED, &priv->flags); + cancel_delayed_work_sync(&priv->stats_work); mxl862xx_host_shutdown(priv); for (i = 0; i < MXL862XX_MAX_PORTS; i++) cancel_work_sync(&priv->ports[i].host_flood_work); } + return err; } @@ -2016,6 +2185,9 @@ static void mxl862xx_remove(struct mdio_device *mdiodev) priv = ds->priv; + set_bit(MXL862XX_FLAG_WORK_STOPPED, &priv->flags); + cancel_delayed_work_sync(&priv->stats_work); + dsa_unregister_switch(ds); mxl862xx_host_shutdown(priv); @@ -2042,6 +2214,9 @@ static void mxl862xx_shutdown(struct mdio_device *mdiodev) dsa_switch_shutdown(ds); + set_bit(MXL862XX_FLAG_WORK_STOPPED, &priv->flags); + cancel_delayed_work_sync(&priv->stats_work); + mxl862xx_host_shutdown(priv); for (i = 0; i < MXL862XX_MAX_PORTS; i++) diff --git a/drivers/net/dsa/mxl862xx/mxl862xx.h b/drivers/net/dsa/mxl862xx/mxl862xx.h index a010cf6b961a..80053ab40e4c 100644 --- a/drivers/net/dsa/mxl862xx/mxl862xx.h +++ b/drivers/net/dsa/mxl862xx/mxl862xx.h @@ -116,6 +116,79 @@ struct mxl862xx_evlan_block { u16 n_active; }; +/** + * struct mxl862xx_port_stats - 64-bit accumulated hardware port statistics + * @rx_packets: total received packets + * @tx_packets: total transmitted packets + * @rx_bytes: total received bytes + * @tx_bytes: total transmitted bytes + * @rx_errors: total receive errors + * @tx_errors: total transmit errors + * @rx_dropped: total received packets dropped + * @tx_dropped: total transmitted packets dropped + * @multicast: total received multicast packets + * @collisions: total transmit collisions + * @rx_length_errors: received length errors (undersize + oversize) + * @rx_crc_errors: received FCS errors + * @rx_frame_errors: received alignment errors + * @prev_rx_good_pkts: previous snapshot of rx good packet counter + * @prev_tx_good_pkts: previous snapshot of tx good packet counter + * @prev_rx_good_bytes: previous snapshot of rx good byte counter + * @prev_tx_good_bytes: previous snapshot of tx good byte counter + * @prev_rx_fcserror_pkts: previous snapshot of rx FCS error counter + * @prev_rx_under_size_error_pkts: previous snapshot of rx undersize + * error counter + * @prev_rx_oversize_error_pkts: previous snapshot of rx oversize + * error counter + * @prev_rx_align_error_pkts: previous snapshot of rx alignment + * error counter + * @prev_tx_dropped_pkts: previous snapshot of tx dropped counter + * @prev_rx_dropped_pkts: previous snapshot of rx dropped counter + * @prev_rx_evlan_discard_pkts: previous snapshot of extended VLAN + * discard counter + * @prev_mtu_exceed_discard_pkts: previous snapshot of MTU exceed + * discard counter + * @prev_tx_acm_dropped_pkts: previous snapshot of tx ACM dropped + * counter + * @prev_rx_multicast_pkts: previous snapshot of rx multicast counter + * @prev_tx_coll_count: previous snapshot of tx collision counter + * + * The firmware RMON counters are 32-bit free-running (64-bit for byte + * counters). This structure holds 64-bit accumulators alongside the + * previous raw snapshot so that deltas can be computed across polls, + * handling 32-bit wrap correctly via unsigned subtraction. + */ +struct mxl862xx_port_stats { + u64 rx_packets; + u64 tx_packets; + u64 rx_bytes; + u64 tx_bytes; + u64 rx_errors; + u64 tx_errors; + u64 rx_dropped; + u64 tx_dropped; + u64 multicast; + u64 collisions; + u64 rx_length_errors; + u64 rx_crc_errors; + u64 rx_frame_errors; + u32 prev_rx_good_pkts; + u32 prev_tx_good_pkts; + u64 prev_rx_good_bytes; + u64 prev_tx_good_bytes; + u32 prev_rx_fcserror_pkts; + u32 prev_rx_under_size_error_pkts; + u32 prev_rx_oversize_error_pkts; + u32 prev_rx_align_error_pkts; + u32 prev_tx_dropped_pkts; + u32 prev_rx_dropped_pkts; + u32 prev_rx_evlan_discard_pkts; + u32 prev_mtu_exceed_discard_pkts; + u32 prev_tx_acm_dropped_pkts; + u32 prev_rx_multicast_pkts; + u32 prev_tx_coll_count; +}; + /** * struct mxl862xx_port - per-port state tracked by the driver * @priv: back-pointer to switch private data; needed by @@ -145,6 +218,10 @@ struct mxl862xx_evlan_block { * The worker acquires rtnl_lock() to serialize with * DSA callbacks and checks @setup_done to avoid * acting on torn-down ports. + * @stats: 64-bit accumulated hardware statistics; updated + * periodically by the stats polling work + * @stats_lock: protects accumulator reads in .get_stats64 against + * concurrent updates from the polling work */ struct mxl862xx_port { struct mxl862xx_priv *priv; @@ -160,16 +237,24 @@ struct mxl862xx_port { bool host_flood_uc; bool host_flood_mc; struct work_struct host_flood_work; + struct mxl862xx_port_stats stats; + spinlock_t stats_lock; /* protects stats accumulators */ }; +/* Bit indices for struct mxl862xx_priv::flags */ +#define MXL862XX_FLAG_CRC_ERR 0 +#define MXL862XX_FLAG_WORK_STOPPED 1 + /** * struct mxl862xx_priv - driver private data for an MxL862xx switch * @ds: pointer to the DSA switch instance * @mdiodev: MDIO device used to communicate with the switch firmware * @crc_err_work: deferred work for shutting down all ports on MDIO CRC * errors - * @crc_err: set atomically before CRC-triggered shutdown, cleared - * after + * @flags: atomic status flags; %MXL862XX_FLAG_CRC_ERR is set + * before CRC-triggered shutdown and cleared after; + * %MXL862XX_FLAG_WORK_STOPPED is set before cancelling + * stats_work to prevent rescheduling during teardown * @drop_meter: index of the single shared zero-rate firmware meter * used to unconditionally drop traffic (used to block * flooding) @@ -181,18 +266,21 @@ struct mxl862xx_port { * @evlan_ingress_size: per-port ingress Extended VLAN block size * @evlan_egress_size: per-port egress Extended VLAN block size * @vf_block_size: per-port VLAN Filter block size + * @stats_work: periodic work item that polls RMON hardware counters + * and accumulates them into 64-bit per-port stats */ struct mxl862xx_priv { struct dsa_switch *ds; struct mdio_device *mdiodev; struct work_struct crc_err_work; - unsigned long crc_err; + unsigned long flags; u16 drop_meter; struct mxl862xx_port ports[MXL862XX_MAX_PORTS]; u16 bridges[MXL862XX_MAX_BRIDGES + 1]; u16 evlan_ingress_size; u16 evlan_egress_size; u16 vf_block_size; + struct delayed_work stats_work; }; #endif /* __MXL862XX_H */ From 360d745a5319f09849a94dee0974c8ead721e392 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 12 Apr 2026 19:13:12 +0200 Subject: [PATCH 1540/1561] net: airoha: Rely on net_device pointer in airoha_dev_setup_tc_block signature Remove airoha_gdm_port dependency in airoha_dev_setup_tc_block routine signature and rely on net_device pointer instead. Please note this patch does not introduce any logical change and it is a preliminary patch to support multiple net_devices connected to the GDM3 or GDM4 ports via an external hw arbiter. Tested-by: Xuegang Lu Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260412-airoha-multi-serdes-preliminary-patch-v1-1-08d5b670ca8f@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 8e4b043af4bc..a98512e963b5 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2687,7 +2687,7 @@ static int airoha_dev_setup_tc_block_cb(enum tc_setup_type type, } } -static int airoha_dev_setup_tc_block(struct airoha_gdm_port *port, +static int airoha_dev_setup_tc_block(struct net_device *dev, struct flow_block_offload *f) { flow_setup_cb_t *cb = airoha_dev_setup_tc_block_cb; @@ -2700,12 +2700,12 @@ static int airoha_dev_setup_tc_block(struct airoha_gdm_port *port, f->driver_block_list = &block_cb_list; switch (f->command) { case FLOW_BLOCK_BIND: - block_cb = flow_block_cb_lookup(f->block, cb, port->dev); + block_cb = flow_block_cb_lookup(f->block, cb, dev); if (block_cb) { flow_block_cb_incref(block_cb); return 0; } - block_cb = flow_block_cb_alloc(cb, port->dev, port->dev, NULL); + block_cb = flow_block_cb_alloc(cb, dev, dev, NULL); if (IS_ERR(block_cb)) return PTR_ERR(block_cb); @@ -2714,7 +2714,7 @@ static int airoha_dev_setup_tc_block(struct airoha_gdm_port *port, list_add_tail(&block_cb->driver_list, &block_cb_list); return 0; case FLOW_BLOCK_UNBIND: - block_cb = flow_block_cb_lookup(f->block, cb, port->dev); + block_cb = flow_block_cb_lookup(f->block, cb, dev); if (!block_cb) return -ENOENT; @@ -2813,7 +2813,7 @@ static int airoha_dev_tc_setup(struct net_device *dev, enum tc_setup_type type, return airoha_tc_setup_qdisc_htb(port, type_data); case TC_SETUP_BLOCK: case TC_SETUP_FT: - return airoha_dev_setup_tc_block(port, type_data); + return airoha_dev_setup_tc_block(dev, type_data); default: return -EOPNOTSUPP; } From 8baf4bf72ef94c955ef89d4644f1986603ee8320 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 12 Apr 2026 19:13:13 +0200 Subject: [PATCH 1541/1561] net: airoha: Rely on net_device pointer in HTB callbacks Remove airoha_gdm_port dependency in HTB tc callback signatures and rely on net_device pointer instead. Please note this patch does not introduce any logical change and it is a preliminary patch in order to support multiple net_devices connected to the same GDM3 or GDM4 port via an external hw arbiter. Tested-by: Xuegang Lu Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260412-airoha-multi-serdes-preliminary-patch-v1-2-08d5b670ca8f@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 45 +++++++++++++----------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index a98512e963b5..c2b5fcffce82 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2492,10 +2492,11 @@ static int airoha_qdma_set_trtcm_token_bucket(struct airoha_qdma *qdma, mode, val); } -static int airoha_qdma_set_tx_rate_limit(struct airoha_gdm_port *port, +static int airoha_qdma_set_tx_rate_limit(struct net_device *dev, int channel, u32 rate, u32 bucket_size) { + struct airoha_gdm_port *port = netdev_priv(dev); int i, err; for (i = 0; i <= TRTCM_PEAK_MODE; i++) { @@ -2515,21 +2516,20 @@ static int airoha_qdma_set_tx_rate_limit(struct airoha_gdm_port *port, return 0; } -static int airoha_tc_htb_alloc_leaf_queue(struct airoha_gdm_port *port, +static int airoha_tc_htb_alloc_leaf_queue(struct net_device *dev, struct tc_htb_qopt_offload *opt) { u32 channel = TC_H_MIN(opt->classid) % AIROHA_NUM_QOS_CHANNELS; u32 rate = div_u64(opt->rate, 1000) << 3; /* kbps */ - struct net_device *dev = port->dev; - int num_tx_queues = dev->real_num_tx_queues; - int err; + int err, num_tx_queues = dev->real_num_tx_queues; + struct airoha_gdm_port *port = netdev_priv(dev); if (opt->parent_classid != TC_HTB_CLASSID_ROOT) { NL_SET_ERR_MSG_MOD(opt->extack, "invalid parent classid"); return -EINVAL; } - err = airoha_qdma_set_tx_rate_limit(port, channel, rate, opt->quantum); + err = airoha_qdma_set_tx_rate_limit(dev, channel, rate, opt->quantum); if (err) { NL_SET_ERR_MSG_MOD(opt->extack, "failed configuring htb offload"); @@ -2541,7 +2541,7 @@ static int airoha_tc_htb_alloc_leaf_queue(struct airoha_gdm_port *port, err = netif_set_real_num_tx_queues(dev, num_tx_queues + 1); if (err) { - airoha_qdma_set_tx_rate_limit(port, channel, 0, opt->quantum); + airoha_qdma_set_tx_rate_limit(dev, channel, 0, opt->quantum); NL_SET_ERR_MSG_MOD(opt->extack, "failed setting real_num_tx_queues"); return err; @@ -2728,44 +2728,47 @@ static int airoha_dev_setup_tc_block(struct net_device *dev, } } -static void airoha_tc_remove_htb_queue(struct airoha_gdm_port *port, int queue) +static void airoha_tc_remove_htb_queue(struct net_device *dev, int queue) { - struct net_device *dev = port->dev; + struct airoha_gdm_port *port = netdev_priv(dev); netif_set_real_num_tx_queues(dev, dev->real_num_tx_queues - 1); - airoha_qdma_set_tx_rate_limit(port, queue + 1, 0, 0); + airoha_qdma_set_tx_rate_limit(dev, queue + 1, 0, 0); clear_bit(queue, port->qos_sq_bmap); } -static int airoha_tc_htb_delete_leaf_queue(struct airoha_gdm_port *port, +static int airoha_tc_htb_delete_leaf_queue(struct net_device *dev, struct tc_htb_qopt_offload *opt) { u32 channel = TC_H_MIN(opt->classid) % AIROHA_NUM_QOS_CHANNELS; + struct airoha_gdm_port *port = netdev_priv(dev); if (!test_bit(channel, port->qos_sq_bmap)) { NL_SET_ERR_MSG_MOD(opt->extack, "invalid queue id"); return -EINVAL; } - airoha_tc_remove_htb_queue(port, channel); + airoha_tc_remove_htb_queue(dev, channel); return 0; } -static int airoha_tc_htb_destroy(struct airoha_gdm_port *port) +static int airoha_tc_htb_destroy(struct net_device *dev) { + struct airoha_gdm_port *port = netdev_priv(dev); int q; for_each_set_bit(q, port->qos_sq_bmap, AIROHA_NUM_QOS_CHANNELS) - airoha_tc_remove_htb_queue(port, q); + airoha_tc_remove_htb_queue(dev, q); return 0; } -static int airoha_tc_get_htb_get_leaf_queue(struct airoha_gdm_port *port, +static int airoha_tc_get_htb_get_leaf_queue(struct net_device *dev, struct tc_htb_qopt_offload *opt) { u32 channel = TC_H_MIN(opt->classid) % AIROHA_NUM_QOS_CHANNELS; + struct airoha_gdm_port *port = netdev_priv(dev); if (!test_bit(channel, port->qos_sq_bmap)) { NL_SET_ERR_MSG_MOD(opt->extack, "invalid queue id"); @@ -2777,23 +2780,23 @@ static int airoha_tc_get_htb_get_leaf_queue(struct airoha_gdm_port *port, return 0; } -static int airoha_tc_setup_qdisc_htb(struct airoha_gdm_port *port, +static int airoha_tc_setup_qdisc_htb(struct net_device *dev, struct tc_htb_qopt_offload *opt) { switch (opt->command) { case TC_HTB_CREATE: break; case TC_HTB_DESTROY: - return airoha_tc_htb_destroy(port); + return airoha_tc_htb_destroy(dev); case TC_HTB_NODE_MODIFY: case TC_HTB_LEAF_ALLOC_QUEUE: - return airoha_tc_htb_alloc_leaf_queue(port, opt); + return airoha_tc_htb_alloc_leaf_queue(dev, opt); case TC_HTB_LEAF_DEL: case TC_HTB_LEAF_DEL_LAST: case TC_HTB_LEAF_DEL_LAST_FORCE: - return airoha_tc_htb_delete_leaf_queue(port, opt); + return airoha_tc_htb_delete_leaf_queue(dev, opt); case TC_HTB_LEAF_QUERY_QUEUE: - return airoha_tc_get_htb_get_leaf_queue(port, opt); + return airoha_tc_get_htb_get_leaf_queue(dev, opt); default: return -EOPNOTSUPP; } @@ -2810,7 +2813,7 @@ static int airoha_dev_tc_setup(struct net_device *dev, enum tc_setup_type type, case TC_SETUP_QDISC_ETS: return airoha_tc_setup_qdisc_ets(port, type_data); case TC_SETUP_QDISC_HTB: - return airoha_tc_setup_qdisc_htb(port, type_data); + return airoha_tc_setup_qdisc_htb(dev, type_data); case TC_SETUP_BLOCK: case TC_SETUP_FT: return airoha_dev_setup_tc_block(dev, type_data); From ae32f80018f0f0f4ebc7a0a70d4092d08a1545e8 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 12 Apr 2026 19:13:14 +0200 Subject: [PATCH 1542/1561] net: airoha: Rely on net_device pointer in ETS callbacks Remove airoha_gdm_port dependency in ETS tc callback signatures and rely on net_device pointer instead. Please note this patch does not introduce any logical change and it is a preliminary patch in order to support multiple net_devices connected to the same GDM3 or GDM4 port via an external hw arbiter. Tested-by: Xuegang Lu Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260412-airoha-multi-serdes-preliminary-patch-v1-3-08d5b670ca8f@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 30 +++++++++++------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index c2b5fcffce82..d426dc173d99 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2138,10 +2138,11 @@ airoha_ethtool_get_rmon_stats(struct net_device *dev, } while (u64_stats_fetch_retry(&port->stats.syncp, start)); } -static int airoha_qdma_set_chan_tx_sched(struct airoha_gdm_port *port, +static int airoha_qdma_set_chan_tx_sched(struct net_device *dev, int channel, enum tx_sched_mode mode, const u16 *weights, u8 n_weights) { + struct airoha_gdm_port *port = netdev_priv(dev); int i; for (i = 0; i < AIROHA_NUM_TX_RING; i++) @@ -2173,17 +2174,15 @@ static int airoha_qdma_set_chan_tx_sched(struct airoha_gdm_port *port, return 0; } -static int airoha_qdma_set_tx_prio_sched(struct airoha_gdm_port *port, - int channel) +static int airoha_qdma_set_tx_prio_sched(struct net_device *dev, int channel) { static const u16 w[AIROHA_NUM_QOS_QUEUES] = {}; - return airoha_qdma_set_chan_tx_sched(port, channel, TC_SCH_SP, w, + return airoha_qdma_set_chan_tx_sched(dev, channel, TC_SCH_SP, w, ARRAY_SIZE(w)); } -static int airoha_qdma_set_tx_ets_sched(struct airoha_gdm_port *port, - int channel, +static int airoha_qdma_set_tx_ets_sched(struct net_device *dev, int channel, struct tc_ets_qopt_offload *opt) { struct tc_ets_qopt_offload_replace_params *p = &opt->replace_params; @@ -2224,20 +2223,21 @@ static int airoha_qdma_set_tx_ets_sched(struct airoha_gdm_port *port, else if (nstrict < AIROHA_NUM_QOS_QUEUES - 1) mode = nstrict + 1; - return airoha_qdma_set_chan_tx_sched(port, channel, mode, w, + return airoha_qdma_set_chan_tx_sched(dev, channel, mode, w, ARRAY_SIZE(w)); } -static int airoha_qdma_get_tx_ets_stats(struct airoha_gdm_port *port, - int channel, +static int airoha_qdma_get_tx_ets_stats(struct net_device *dev, int channel, struct tc_ets_qopt_offload *opt) { + struct airoha_gdm_port *port = netdev_priv(dev); u64 cpu_tx_packets = airoha_qdma_rr(port->qdma, REG_CNTR_VAL(channel << 1)); u64 fwd_tx_packets = airoha_qdma_rr(port->qdma, REG_CNTR_VAL((channel << 1) + 1)); u64 tx_packets = (cpu_tx_packets - port->cpu_tx_packets) + (fwd_tx_packets - port->fwd_tx_packets); + _bstats_update(opt->stats.bstats, 0, tx_packets); port->cpu_tx_packets = cpu_tx_packets; @@ -2246,7 +2246,7 @@ static int airoha_qdma_get_tx_ets_stats(struct airoha_gdm_port *port, return 0; } -static int airoha_tc_setup_qdisc_ets(struct airoha_gdm_port *port, +static int airoha_tc_setup_qdisc_ets(struct net_device *dev, struct tc_ets_qopt_offload *opt) { int channel; @@ -2259,12 +2259,12 @@ static int airoha_tc_setup_qdisc_ets(struct airoha_gdm_port *port, switch (opt->command) { case TC_ETS_REPLACE: - return airoha_qdma_set_tx_ets_sched(port, channel, opt); + return airoha_qdma_set_tx_ets_sched(dev, channel, opt); case TC_ETS_DESTROY: /* PRIO is default qdisc scheduler */ - return airoha_qdma_set_tx_prio_sched(port, channel); + return airoha_qdma_set_tx_prio_sched(dev, channel); case TC_ETS_STATS: - return airoha_qdma_get_tx_ets_stats(port, channel, opt); + return airoha_qdma_get_tx_ets_stats(dev, channel, opt); default: return -EOPNOTSUPP; } @@ -2807,11 +2807,9 @@ static int airoha_tc_setup_qdisc_htb(struct net_device *dev, static int airoha_dev_tc_setup(struct net_device *dev, enum tc_setup_type type, void *type_data) { - struct airoha_gdm_port *port = netdev_priv(dev); - switch (type) { case TC_SETUP_QDISC_ETS: - return airoha_tc_setup_qdisc_ets(port, type_data); + return airoha_tc_setup_qdisc_ets(dev, type_data); case TC_SETUP_QDISC_HTB: return airoha_tc_setup_qdisc_htb(dev, type_data); case TC_SETUP_BLOCK: From bf6f95ae3b8b2638c0e1d6d802d50983ce5d0f45 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 12 Apr 2026 14:13:51 -0400 Subject: [PATCH 1543/1561] sctp: fix missing encap_port propagation for GSO fragments encap_port in SCTP_INPUT_CB(skb) is used by sctp_vtag_verify() for SCTP-over-UDP processing. In the GSO case, it is only set on the head skb, while fragment skbs leave it 0. This results in fragment skbs seeing encap_port == 0, breaking SCTP-over-UDP connections. Fix it by propagating encap_port from the head skb cb when initializing fragment skbs in sctp_inq_pop(). Fixes: 046c052b475e ("sctp: enable udp tunneling socks") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Link: https://patch.msgid.link/ea65ed61b3598d8b4940f0170b9aa1762307e6c3.1776017631.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/sctp/inqueue.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index f5a7d5a38755..a024c0843247 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -201,6 +201,7 @@ new_skb: cb->chunk = head_cb->chunk; cb->af = head_cb->af; + cb->encap_port = head_cb->encap_port; } } From 2cd7e6971fc2787408ceef17906ea152791448cf Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 12 Apr 2026 14:15:27 -0400 Subject: [PATCH 1544/1561] sctp: disable BH before calling udp_tunnel_xmit_skb() udp_tunnel_xmit_skb() / udp_tunnel6_xmit_skb() are expected to run with BH disabled. After commit 6f1a9140ecda ("add xmit recursion limit to tunnel xmit functions"), on the path: udp(6)_tunnel_xmit_skb() -> ip(6)tunnel_xmit() dev_xmit_recursion_inc()/dec() must stay balanced on the same CPU. Without local_bh_disable(), the context may move between CPUs, which can break the inc/dec pairing. This may lead to incorrect recursion level detection and cause packets to be dropped in ip(6)_tunnel_xmit() or __dev_queue_xmit(). Fix it by disabling BH around both IPv4 and IPv6 SCTP UDP xmit paths. In my testing, after enabling the SCTP over UDP: # ip net exec ha sysctl -w net.sctp.udp_port=9899 # ip net exec ha sysctl -w net.sctp.encap_port=9899 # ip net exec hb sysctl -w net.sctp.udp_port=9899 # ip net exec hb sysctl -w net.sctp.encap_port=9899 # ip net exec ha iperf3 -s - without this patch: # ip net exec hb iperf3 -c 192.168.0.1 --sctp [ 5] 0.00-10.00 sec 37.2 MBytes 31.2 Mbits/sec sender [ 5] 0.00-10.00 sec 37.1 MBytes 31.1 Mbits/sec receiver - with this patch: # ip net exec hb iperf3 -c 192.168.0.1 --sctp [ 5] 0.00-10.00 sec 3.14 GBytes 2.69 Gbits/sec sender [ 5] 0.00-10.00 sec 3.14 GBytes 2.69 Gbits/sec receiver Fixes: 6f1a9140ecda ("net: add xmit recursion limit to tunnel xmit functions") Fixes: 046c052b475e ("sctp: enable udp tunneling socks") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Link: https://patch.msgid.link/c874a8548221dcd56ff03c65ba75a74e6cf99119.1776017727.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/sctp/ipv6.c | 2 ++ net/sctp/protocol.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 53a5c027f8e3..cd15b695607e 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -261,9 +261,11 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *t) skb_set_inner_ipproto(skb, IPPROTO_SCTP); label = ip6_make_flowlabel(sock_net(sk), skb, fl6->flowlabel, true, fl6); + local_bh_disable(); udp_tunnel6_xmit_skb(dst, sk, skb, NULL, &fl6->saddr, &fl6->daddr, tclass, ip6_dst_hoplimit(dst), label, sctp_sk(sk)->udp_port, t->encap_port, false, 0); + local_bh_enable(); return 0; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 828a59b8e7bf..5800e7ee7ea0 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1070,10 +1070,12 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, struct sctp_transport *t) skb_reset_inner_mac_header(skb); skb_reset_inner_transport_header(skb); skb_set_inner_ipproto(skb, IPPROTO_SCTP); + local_bh_disable(); udp_tunnel_xmit_skb(dst_rtable(dst), sk, skb, fl4->saddr, fl4->daddr, dscp, ip4_dst_hoplimit(dst), df, sctp_sk(sk)->udp_port, t->encap_port, false, false, 0); + local_bh_enable(); return 0; } From 34e1a98ff2a87cf4b8de3ccebe9d45273f014aeb Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 12 Apr 2026 11:56:25 +0200 Subject: [PATCH 1545/1561] net: airoha: Remove PCE_MC_EN_MASK bit in REG_FE_PCE_CFG configuration PCE_MC_EN_MASK bit in REG_FE_PCE_CFG configuration performed in airoha_fe_init() is used to duplicate multicast packets and send a copy to the CPU when the traffic is offloaded. This is necessary just if it is requested by the user. Disable multicast packets duplication by default. Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260412-airoha_fe_init_remove_mc_en_bit-v1-1-7b6a5a25a74d@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index d426dc173d99..2bd79da70934 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -458,9 +458,8 @@ static int airoha_fe_init(struct airoha_eth *eth) FIELD_PREP(PSE_IQ_RES2_P5_MASK, 0x40) | FIELD_PREP(PSE_IQ_RES2_P4_MASK, 0x34)); - /* enable FE copy engine for MC/KA/DPI */ - airoha_fe_wr(eth, REG_FE_PCE_CFG, - PCE_DPI_EN_MASK | PCE_KA_EN_MASK | PCE_MC_EN_MASK); + /* enable FE copy engine for KA/DPI */ + airoha_fe_wr(eth, REG_FE_PCE_CFG, PCE_DPI_EN_MASK | PCE_KA_EN_MASK); /* set vip queue selection to ring 1 */ airoha_fe_rmw(eth, REG_CDM_FWD_CFG(1), CDM_VIP_QSEL_MASK, FIELD_PREP(CDM_VIP_QSEL_MASK, 0x4)); From 1921f91298d1388a0bb9db8f83800c998b649cb3 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Sat, 11 Apr 2026 08:55:19 +0800 Subject: [PATCH 1546/1561] net, bpf: fix null-ptr-deref in xdp_master_redirect() for down master syzkaller reported a kernel panic in bond_rr_gen_slave_id() reached via xdp_master_redirect(). Full decoded trace: https://syzkaller.appspot.com/bug?extid=80e046b8da2820b6ba73 bond_rr_gen_slave_id() dereferences bond->rr_tx_counter, a per-CPU counter that bonding only allocates in bond_open() when the mode is round-robin. If the bond device was never brought up, rr_tx_counter stays NULL. The XDP redirect path can still reach that code on a bond that was never opened: bpf_master_redirect_enabled_key is a global static key, so as soon as any bond device has native XDP attached, the XDP_TX -> xdp_master_redirect() interception is enabled for every slave system-wide. The path xdp_master_redirect() -> bond_xdp_get_xmit_slave() -> bond_xdp_xmit_roundrobin_slave_get() -> bond_rr_gen_slave_id() then runs against a bond that has no rr_tx_counter and crashes. Fix this in the generic xdp_master_redirect() by refusing to call into the master's ->ndo_xdp_get_xmit_slave() when the master device is not up. IFF_UP is only set after ->ndo_open() has successfully returned, so this reliably excludes masters whose XDP state has not been fully initialized. Drop the frame with XDP_ABORTED so the exception is visible via trace_xdp_exception() rather than silently falling through. This is not specific to bonding: any current or future master that defers XDP state allocation to ->ndo_open() is protected. Fixes: 879af96ffd72 ("net, core: Add support for XDP redirection to slave device") Reported-by: syzbot+80e046b8da2820b6ba73@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/698f84c6.a70a0220.2c38d7.00cc.GAE@google.com/T/ Suggested-by: Daniel Borkmann Acked-by: Daniel Borkmann Signed-off-by: Jiayuan Chen Link: https://patch.msgid.link/20260411005524.201200-2-jiayuan.chen@linux.dev Signed-off-by: Paolo Abeni --- net/core/filter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index 53ce06ed4a88..90ae4f314b6c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4395,6 +4395,8 @@ u32 xdp_master_redirect(struct xdp_buff *xdp) struct net_device *master, *slave; master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev); + if (unlikely(!(master->flags & IFF_UP))) + return XDP_ABORTED; slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp); if (slave && slave != xdp->rxq->dev) { /* The target device is different from the receiving device, so From 8dd1bdde38af8418889ba322a3663c401a60fe28 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Sat, 11 Apr 2026 08:55:20 +0800 Subject: [PATCH 1547/1561] selftests/bpf: add test for xdp_master_redirect with bond not up Add a selftest that reproduces the null-ptr-deref in bond_rr_gen_slave_id() when XDP redirect targets a bond device in round-robin mode that was never brought up. The test verifies the fix by ensuring no crash occurs. Test setup: - bond0: active-backup mode, UP, with native XDP (enables bpf_master_redirect_enabled_key globally) - bond1: round-robin mode, never UP - veth1: slave of bond1, with generic XDP (XDP_TX) - BPF_PROG_TEST_RUN with live frames triggers the redirect path Signed-off-by: Jiayuan Chen Link: https://patch.msgid.link/20260411005524.201200-3-jiayuan.chen@linux.dev Signed-off-by: Paolo Abeni --- .../selftests/bpf/prog_tests/xdp_bonding.c | 96 ++++++++++++++++++- 1 file changed, 94 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c index e8ea26464349..c42488e445c2 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c @@ -191,13 +191,18 @@ fail: return -1; } -static void bonding_cleanup(struct skeletons *skeletons) +static void link_cleanup(struct skeletons *skeletons) { - restore_root_netns(); while (skeletons->nlinks) { skeletons->nlinks--; bpf_link__destroy(skeletons->links[skeletons->nlinks]); } +} + +static void bonding_cleanup(struct skeletons *skeletons) +{ + restore_root_netns(); + link_cleanup(skeletons); ASSERT_OK(system("ip link delete bond1"), "delete bond1"); ASSERT_OK(system("ip link delete veth1_1"), "delete veth1_1"); ASSERT_OK(system("ip link delete veth1_2"), "delete veth1_2"); @@ -493,6 +498,90 @@ out: system("ip link del bond_nest2"); } +/* + * Test that XDP redirect via xdp_master_redirect() does not crash when + * the bond master device is not up. When bond is in round-robin mode but + * never opened, rr_tx_counter is NULL. + */ +static void test_xdp_bonding_redirect_no_up(struct skeletons *skeletons) +{ + struct nstoken *nstoken = NULL; + int xdp_pass_fd; + int veth1_ifindex; + int err; + char pkt[ETH_HLEN + 1]; + struct xdp_md ctx_in = {}; + + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt, + .data_size_in = sizeof(pkt), + .ctx_in = &ctx_in, + .ctx_size_in = sizeof(ctx_in), + .flags = BPF_F_TEST_XDP_LIVE_FRAMES, + .repeat = 1, + .batch_size = 1, + ); + + /* We can't use bonding_setup() because bond will be active */ + SYS(out, "ip netns add ns_rr_no_up"); + nstoken = open_netns("ns_rr_no_up"); + if (!ASSERT_OK_PTR(nstoken, "open ns_rr_no_up")) + goto out; + + /* bond0: active-backup, UP with slave veth0. + * Attaching native XDP to bond0 enables bpf_master_redirect_enabled_key + * globally. + */ + SYS(out, "ip link add bond0 type bond mode active-backup"); + SYS(out, "ip link add veth0 type veth peer name veth0p"); + SYS(out, "ip link set veth0 master bond0"); + SYS(out, "ip link set bond0 up"); + SYS(out, "ip link set veth0p up"); + + /* bond1: round-robin, never UP -> rr_tx_counter stays NULL */ + SYS(out, "ip link add bond1 type bond mode balance-rr"); + SYS(out, "ip link add veth1 type veth peer name veth1p"); + SYS(out, "ip link set veth1 master bond1"); + + veth1_ifindex = if_nametoindex("veth1"); + if (!ASSERT_GT(veth1_ifindex, 0, "veth1_ifindex")) + goto out; + + /* Attach native XDP to bond0 -> enables global redirect key */ + if (xdp_attach(skeletons, skeletons->xdp_tx->progs.xdp_tx, "bond0")) + goto out; + + /* Attach generic XDP (XDP_TX) to veth1. + * When packets arrive at veth1 via netif_receive_skb, do_xdp_generic() + * runs this program. XDP_TX + bond slave triggers xdp_master_redirect(). + */ + err = bpf_xdp_attach(veth1_ifindex, + bpf_program__fd(skeletons->xdp_tx->progs.xdp_tx), + XDP_FLAGS_SKB_MODE, NULL); + if (!ASSERT_OK(err, "attach generic XDP to veth1")) + goto out; + + /* Run BPF_PROG_TEST_RUN with XDP_PASS live frames on veth1. + * XDP_PASS frames become SKBs with skb->dev = veth1, entering + * netif_receive_skb -> do_xdp_generic -> xdp_master_redirect. + * Without the fix, bond_rr_gen_slave_id() dereferences NULL + * rr_tx_counter and crashes. + */ + xdp_pass_fd = bpf_program__fd(skeletons->xdp_dummy->progs.xdp_dummy_prog); + + memset(pkt, 0, sizeof(pkt)); + ctx_in.data_end = sizeof(pkt); + ctx_in.ingress_ifindex = veth1_ifindex; + + err = bpf_prog_test_run_opts(xdp_pass_fd, &opts); + ASSERT_OK(err, "xdp_pass test_run should not crash"); + +out: + link_cleanup(skeletons); + close_netns(nstoken); + SYS_NOFAIL("ip netns del ns_rr_no_up"); +} + static void test_xdp_bonding_features(struct skeletons *skeletons) { LIBBPF_OPTS(bpf_xdp_query_opts, query_opts); @@ -738,6 +827,9 @@ void serial_test_xdp_bonding(void) if (test__start_subtest("xdp_bonding_redirect_multi")) test_xdp_bonding_redirect_multi(&skeletons); + if (test__start_subtest("xdp_bonding_redirect_no_up")) + test_xdp_bonding_redirect_no_up(&skeletons); + out: xdp_dummy__destroy(skeletons.xdp_dummy); xdp_tx__destroy(skeletons.xdp_tx); From 600dc40554dc5ad1e6f3af51f700228033f43ea7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 11 Apr 2026 13:01:35 +0200 Subject: [PATCH 1548/1561] net: usb: cdc-phonet: fix skb frags[] overflow in rx_complete() A malicious USB device claiming to be a CDC Phonet modem can overflow the skb_shared_info->frags[] array by sending an unbounded sequence of full-page bulk transfers. Drop the skb and increment the length error when the frag limit is reached. This matches the same fix that commit f0813bcd2d9d ("net: wwan: t7xx: fix potential skb->frags overflow in RX path") did for the t7xx driver. Cc: Andrew Lunn Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: stable Assisted-by: gregkh_clanker_t1000 Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2026041134-dreamboat-buddhism-d1ec@gregkh Fixes: 87cf65601e17 ("USB host CDC Phonet network interface driver") Signed-off-by: Paolo Abeni --- drivers/net/usb/cdc-phonet.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/cdc-phonet.c b/drivers/net/usb/cdc-phonet.c index ad5121e9cf5d..165650ecef64 100644 --- a/drivers/net/usb/cdc-phonet.c +++ b/drivers/net/usb/cdc-phonet.c @@ -157,11 +157,16 @@ static void rx_complete(struct urb *req) PAGE_SIZE); page = NULL; } - } else { + } else if (skb_shinfo(skb)->nr_frags < MAX_SKB_FRAGS) { skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, 0, req->actual_length, PAGE_SIZE); page = NULL; + } else { + dev_kfree_skb_any(skb); + pnd->rx_skb = NULL; + skb = NULL; + dev->stats.rx_length_errors++; } if (req->actual_length < PAGE_SIZE) pnd->rx_skb = NULL; /* Last fragment */ From fe72340daaf1af588be88056faf98965f39e6032 Mon Sep 17 00:00:00 2001 From: Luxiao Xu Date: Sat, 11 Apr 2026 23:10:10 +0800 Subject: [PATCH 1549/1561] net: strparser: fix skb_head leak in strp_abort_strp() When the stream parser is aborted, for example after a message assembly timeout, it can still hold a reference to a partially assembled message in strp->skb_head. That skb is not released in strp_abort_strp(), which leaks the partially assembled message and can be triggered repeatedly to exhaust memory. Fix this by freeing strp->skb_head and resetting the parser state in the abort path. Leave strp_stop() unchanged so final cleanup still happens in strp_done() after the work and timer have been synchronized. Fixes: 43a0c6751a32 ("strparser: Stream parser for messages") Cc: stable@kernel.org Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Tested-by: Yuan Tan Signed-off-by: Luxiao Xu Signed-off-by: Ren Wei Link: https://patch.msgid.link/ade3857a9404999ce9a1c27ec523efc896072678.1775482694.git.rakukuip@gmail.com Signed-off-by: Paolo Abeni --- net/strparser/strparser.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index fe0e76fdd1f1..a23f4b4dfc67 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -45,6 +45,14 @@ static void strp_abort_strp(struct strparser *strp, int err) strp->stopped = 1; + if (strp->skb_head) { + kfree_skb(strp->skb_head); + strp->skb_head = NULL; + } + + strp->skb_nextp = NULL; + strp->need_bytes = 0; + if (strp->sk) { struct sock *sk = strp->sk; From f7cf8ece8cee3c1ee361991470cdb1eb65ab02e8 Mon Sep 17 00:00:00 2001 From: Zhengchuan Liang Date: Sat, 11 Apr 2026 23:10:26 +0800 Subject: [PATCH 1550/1561] net: caif: clear client service pointer on teardown `caif_connect()` can tear down an existing client after remote shutdown by calling `caif_disconnect_client()` followed by `caif_free_client()`. `caif_free_client()` releases the service layer referenced by `adap_layer->dn`, but leaves that pointer stale. When the socket is later destroyed, `caif_sock_destructor()` calls `caif_free_client()` again and dereferences the freed service pointer. Clear the client/service links before releasing the service object so repeated teardown becomes harmless. Fixes: 43e369210108 ("caif: Move refcount from service layer to sock and dev.") Cc: stable@kernel.org Reported-by: Yifan Wu Reported-by: Juefei Pu Co-developed-by: Yuan Tan Signed-off-by: Yuan Tan Suggested-by: Xin Liu Tested-by: Ren Wei Signed-off-by: Zhengchuan Liang Signed-off-by: Ren Wei Link: https://patch.msgid.link/9f3d37847c0037568aae698ca23cd47c6691acb0.1775897577.git.zcliangcn@gmail.com Signed-off-by: Paolo Abeni --- net/caif/cfsrvl.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c index 171fa32ada85..d687fd0b4ed3 100644 --- a/net/caif/cfsrvl.c +++ b/net/caif/cfsrvl.c @@ -191,10 +191,20 @@ bool cfsrvl_phyid_match(struct cflayer *layer, int phyid) void caif_free_client(struct cflayer *adap_layer) { + struct cflayer *serv_layer; struct cfsrvl *servl; - if (adap_layer == NULL || adap_layer->dn == NULL) + + if (!adap_layer) return; - servl = container_obj(adap_layer->dn); + + serv_layer = adap_layer->dn; + if (!serv_layer) + return; + + layer_set_dn(adap_layer, NULL); + layer_set_up(serv_layer, NULL); + + servl = container_obj(serv_layer); servl->release(&servl->layer); } EXPORT_SYMBOL(caif_free_client); From 1acdfbdb516b32165a8ecd1d5f8c68e4eac64637 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 12 Apr 2026 09:57:29 +0200 Subject: [PATCH 1551/1561] net: airoha: Fix VIP configuration for AN7583 SoC EN7581 and AN7583 SoCs have different VIP definitions. Introduce get_vip_port callback in airoha_eth_soc_data struct in order to take into account EN7581 and AN7583 VIP register layout and definition differences. Introduce nbq parameter in airoha_gdm_port struct. At the moment nbq is set statically to value previously used in airhoha_set_gdm2_loopback routine and it will be read from device tree in subsequent patches. Fixes: e4e5ce823bdd ("net: airoha: Add AN7583 SoC support") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20260412-airoha-7583-vip-fix-v1-1-c35e02b054bb@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_eth.c | 66 ++++++++++++++++++------ drivers/net/ethernet/airoha/airoha_eth.h | 2 + 2 files changed, 51 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 9e995094c32a..f484835af703 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -107,19 +107,7 @@ static int airoha_set_vip_for_gdm_port(struct airoha_gdm_port *port, struct airoha_eth *eth = port->qdma->eth; u32 vip_port; - switch (port->id) { - case AIROHA_GDM3_IDX: - /* FIXME: handle XSI_PCIE1_PORT */ - vip_port = XSI_PCIE0_VIP_PORT_MASK; - break; - case AIROHA_GDM4_IDX: - /* FIXME: handle XSI_USB_PORT */ - vip_port = XSI_ETH_VIP_PORT_MASK; - break; - default: - return 0; - } - + vip_port = eth->soc->ops.get_vip_port(port, port->nbq); if (enable) { airoha_fe_set(eth, REG_FE_VIP_PORT_EN, vip_port); airoha_fe_set(eth, REG_FE_IFC_PORT_EN, vip_port); @@ -1710,7 +1698,7 @@ static int airoha_dev_set_macaddr(struct net_device *dev, void *p) static int airhoha_set_gdm2_loopback(struct airoha_gdm_port *port) { struct airoha_eth *eth = port->qdma->eth; - u32 val, pse_port, chan, nbq; + u32 val, pse_port, chan; int src_port; /* Forward the traffic to the proper GDM port */ @@ -1740,9 +1728,7 @@ static int airhoha_set_gdm2_loopback(struct airoha_gdm_port *port) airoha_fe_clear(eth, REG_FE_VIP_PORT_EN, BIT(AIROHA_GDM2_IDX)); airoha_fe_clear(eth, REG_FE_IFC_PORT_EN, BIT(AIROHA_GDM2_IDX)); - /* XXX: handle XSI_USB_PORT and XSI_PCE1_PORT */ - nbq = port->id == AIROHA_GDM3_IDX && airoha_is_7581(eth) ? 4 : 0; - src_port = eth->soc->ops.get_src_port_id(port, nbq); + src_port = eth->soc->ops.get_src_port_id(port, port->nbq); if (src_port < 0) return src_port; @@ -2951,6 +2937,8 @@ static int airoha_alloc_gdm_port(struct airoha_eth *eth, port->qdma = qdma; port->dev = dev; port->id = id; + /* XXX: Read nbq from DTS */ + port->nbq = id == AIROHA_GDM3_IDX && airoha_is_7581(eth) ? 4 : 0; eth->ports[p] = port; return airoha_metadata_dst_alloc(port); @@ -3152,6 +3140,28 @@ static int airoha_en7581_get_src_port_id(struct airoha_gdm_port *port, int nbq) return -EINVAL; } +static u32 airoha_en7581_get_vip_port(struct airoha_gdm_port *port, int nbq) +{ + switch (port->id) { + case AIROHA_GDM3_IDX: + if (nbq == 4) + return XSI_PCIE0_VIP_PORT_MASK; + if (nbq == 5) + return XSI_PCIE1_VIP_PORT_MASK; + break; + case AIROHA_GDM4_IDX: + if (!nbq) + return XSI_ETH_VIP_PORT_MASK; + if (nbq == 1) + return XSI_USB_VIP_PORT_MASK; + break; + default: + break; + } + + return 0; +} + static const char * const an7583_xsi_rsts_names[] = { "xsi-mac", "hsi0-mac", @@ -3181,6 +3191,26 @@ static int airoha_an7583_get_src_port_id(struct airoha_gdm_port *port, int nbq) return -EINVAL; } +static u32 airoha_an7583_get_vip_port(struct airoha_gdm_port *port, int nbq) +{ + switch (port->id) { + case AIROHA_GDM3_IDX: + if (!nbq) + return XSI_ETH_VIP_PORT_MASK; + break; + case AIROHA_GDM4_IDX: + if (!nbq) + return XSI_PCIE0_VIP_PORT_MASK; + if (nbq == 1) + return XSI_USB_VIP_PORT_MASK; + break; + default: + break; + } + + return 0; +} + static const struct airoha_eth_soc_data en7581_soc_data = { .version = 0x7581, .xsi_rsts_names = en7581_xsi_rsts_names, @@ -3188,6 +3218,7 @@ static const struct airoha_eth_soc_data en7581_soc_data = { .num_ppe = 2, .ops = { .get_src_port_id = airoha_en7581_get_src_port_id, + .get_vip_port = airoha_en7581_get_vip_port, }, }; @@ -3198,6 +3229,7 @@ static const struct airoha_eth_soc_data an7583_soc_data = { .num_ppe = 1, .ops = { .get_src_port_id = airoha_an7583_get_src_port_id, + .get_vip_port = airoha_an7583_get_vip_port, }, }; diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index a97903569335..8bcd809e6f53 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -536,6 +536,7 @@ struct airoha_gdm_port { struct airoha_qdma *qdma; struct net_device *dev; int id; + int nbq; struct airoha_hw_stats stats; @@ -576,6 +577,7 @@ struct airoha_eth_soc_data { int num_ppe; struct { int (*get_src_port_id)(struct airoha_gdm_port *port, int nbq); + u32 (*get_vip_port)(struct airoha_gdm_port *port, int nbq); } ops; }; From b9d8b856689d2b968495d79fe653d87fcb8ad98c Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 12 Apr 2026 10:43:26 +0200 Subject: [PATCH 1552/1561] net: airoha: Add missing PPE configurations in airoha_ppe_hw_init() Add the following PPE configuration in airoha_ppe_hw_init routine: - 6RD hw offloading is currently not supported by Netfilter flowtable. Disable explicitly PPE 6RD offloading in order to prevent PPE to learn 6RD flows and eventually interrupt the traffic. - Add missing PPE bind rate configuration for L3 and L2 traffic. PPE bind rate configuration specifies the pps threshold to move a PPE entry state from UNBIND to BIND. Without this configuration this value is random. - Set ageing thresholds to the values used in the vendor SDK in order to improve connection stability under load and avoid packet loss caused by fast aging. Fixes: 00a7678310fe3 ("net: airoha: Introduce flowtable offload support") Signed-off-by: Lorenzo Bianconi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20260412-airoha_ppe_hw_init-missing-bits-v1-1-06ac670819e3@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_ppe.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index c2c32b6833df..62cfffb4f0e5 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -111,13 +111,13 @@ static void airoha_ppe_hw_init(struct airoha_ppe *ppe) airoha_fe_rmw(eth, REG_PPE_BND_AGE0(i), PPE_BIND_AGE0_DELTA_NON_L4 | PPE_BIND_AGE0_DELTA_UDP, - FIELD_PREP(PPE_BIND_AGE0_DELTA_NON_L4, 1) | - FIELD_PREP(PPE_BIND_AGE0_DELTA_UDP, 12)); + FIELD_PREP(PPE_BIND_AGE0_DELTA_NON_L4, 60) | + FIELD_PREP(PPE_BIND_AGE0_DELTA_UDP, 60)); airoha_fe_rmw(eth, REG_PPE_BND_AGE1(i), PPE_BIND_AGE1_DELTA_TCP_FIN | PPE_BIND_AGE1_DELTA_TCP, FIELD_PREP(PPE_BIND_AGE1_DELTA_TCP_FIN, 1) | - FIELD_PREP(PPE_BIND_AGE1_DELTA_TCP, 7)); + FIELD_PREP(PPE_BIND_AGE1_DELTA_TCP, 60)); airoha_fe_rmw(eth, REG_PPE_TB_HASH_CFG(i), PPE_SRAM_TABLE_EN_MASK | @@ -145,7 +145,15 @@ static void airoha_ppe_hw_init(struct airoha_ppe *ppe) FIELD_PREP(PPE_DRAM_TB_NUM_ENTRY_MASK, dram_num_entries)); + airoha_fe_rmw(eth, REG_PPE_BIND_RATE(i), + PPE_BIND_RATE_L2B_BIND_MASK | + PPE_BIND_RATE_BIND_MASK, + FIELD_PREP(PPE_BIND_RATE_L2B_BIND_MASK, 0x1e) | + FIELD_PREP(PPE_BIND_RATE_BIND_MASK, 0x1e)); + airoha_fe_wr(eth, REG_PPE_HASH_SEED(i), PPE_HASH_SEED); + airoha_fe_clear(eth, REG_PPE_PPE_FLOW_CFG(i), + PPE_FLOW_CFG_IP6_6RD_MASK); for (p = 0; p < ARRAY_SIZE(eth->ports); p++) airoha_fe_rmw(eth, REG_PPE_MTU(i, p), From 4a6fe5fe60040c31c25767ca815a06fab35c1eb7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 Apr 2026 00:08:04 +0200 Subject: [PATCH 1553/1561] tools/ynl: Make YnlFamily closeable as a context manager YnlFamily opens an AF_NETLINK socket in __init__ but has no way to release it other than leaving it to the GC. YnlFamily holds a self reference cycle through SpecFamily's self.family = self in its super().__init__() call, so refcount GC cannot reclaim it and the socket stays open until the cyclic GC runs. If a test creates a guest netns, instantiates a YnlFamily inside it via NetNSEnter(), performs some test case work via Ynl, and then deletes the netns, then the 'ip netns del' only drops the mount binding and cleanup_net in the kernel never runs, so any subsequent test case assertions that objects got cleaned up would fail given this only gets triggered later via cyclic GC run. Add an explicit close() that closes the netlink socket and wire up the __enter__/__exit__ so callers can scope the instance deterministically via 'with YnlFamily(...) as ynl: ...'. Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260413220809.604592-2-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/lib/ynl.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py index 9c078599cea0..f63c6f828735 100644 --- a/tools/net/ynl/pyynl/lib/ynl.py +++ b/tools/net/ynl/pyynl/lib/ynl.py @@ -731,6 +731,16 @@ class YnlFamily(SpecFamily): bound_f = functools.partial(self._op, op_name) setattr(self, op.ident_name, bound_f) + def close(self): + if self.sock is not None: + self.sock.close() + self.sock = None + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + self.close() def ntf_subscribe(self, mcast_name): mcast_id = self.nlproto.get_mcast_id(mcast_name, self.mcast_groups) From e254ffb9502c8b4c7f8712c34ae6590796825260 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 Apr 2026 00:08:05 +0200 Subject: [PATCH 1554/1561] selftests/net: Split netdevsim tests from HW tests in nk_qlease As pointed out in 3d2c3d2eea9a ("selftests: net: py: explicitly forbid multiple ksft_run() calls"), ksft_run() cannot be called multiple times. Move the netdevsim-based queue lease tests to selftests/net/ so that each file has exactly one ksft_run() call. The HW tests (io_uring ZC RX, queue attrs, XDP with MP, destroy) remain in selftests/drivers/net/hw/. Fixes: 65d657d80684 ("selftests/net: Add queue leasing tests with netkit") Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/netdev/20260409181950.7e099b6c@kernel.org Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260413220809.604592-3-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/hw/nk_qlease.py | 1142 ---------------- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/nk_qlease.py | 1168 +++++++++++++++++ 3 files changed, 1169 insertions(+), 1142 deletions(-) create mode 100755 tools/testing/selftests/net/nk_qlease.py diff --git a/tools/testing/selftests/drivers/net/hw/nk_qlease.py b/tools/testing/selftests/drivers/net/hw/nk_qlease.py index 2bc5ffe96c7d..aa83dc321328 100755 --- a/tools/testing/selftests/drivers/net/hw/nk_qlease.py +++ b/tools/testing/selftests/drivers/net/hw/nk_qlease.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 -import errno import re import time import threading @@ -10,23 +9,17 @@ from lib.py import ( ksft_run, ksft_exit, ksft_eq, - ksft_ne, ksft_in, ksft_not_in, ksft_raises, ) from lib.py import ( NetDrvContEnv, - NetNS, NetNSEnter, EthtoolFamily, NetdevFamily, - RtnlFamily, - NetdevSimDev, ) from lib.py import ( - NlError, - Netlink, bkg, cmd, defer, @@ -46,1100 +39,6 @@ def set_flow_rule(cfg): return int(values) -def create_netkit(rxqueues): - all_links = ip("-d link show", json=True) - old_idxs = { - link["ifindex"] - for link in all_links - if link.get("linkinfo", {}).get("info_kind") == "netkit" - } - - rtnl = RtnlFamily() - rtnl.newlink( - { - "linkinfo": { - "kind": "netkit", - "data": { - "mode": "l2", - "policy": "forward", - "peer-policy": "forward", - }, - }, - "num-rx-queues": rxqueues, - }, - flags=[Netlink.NLM_F_CREATE, Netlink.NLM_F_EXCL], - ) - - all_links = ip("-d link show", json=True) - nk_links = [ - link - for link in all_links - if link.get("linkinfo", {}).get("info_kind") == "netkit" - and link["ifindex"] not in old_idxs - ] - nk_links.sort(key=lambda x: x["ifindex"]) - return ( - nk_links[1]["ifname"], - nk_links[1]["ifindex"], - nk_links[0]["ifname"], - nk_links[0]["ifindex"], - ) - - -def create_netkit_single(rxqueues): - rtnl = RtnlFamily() - rtnl.newlink( - { - "linkinfo": { - "kind": "netkit", - "data": { - "mode": "l2", - "pairing": "single", - }, - }, - "num-rx-queues": rxqueues, - }, - flags=[Netlink.NLM_F_CREATE, Netlink.NLM_F_EXCL], - ) - - all_links = ip("-d link show", json=True) - nk_links = [ - link - for link in all_links - if link.get("linkinfo", {}).get("info_kind") == "netkit" - and "UP" not in link.get("flags", []) - ] - return nk_links[0]["ifname"], nk_links[0]["ifindex"] - - -def test_remove_phys(netns) -> None: - nsimdev = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev.remove) - nsim = nsimdev.nsims[0] - ip(f"link set dev {nsim.ifname} up") - - nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) - defer(cmd, f"ip link del dev {nk_host}", fail=False) - - ip(f"link set dev {nk_guest} netns {netns.name}") - ip(f"link set dev {nk_host} up") - ip(f"link set dev {nk_guest} up", ns=netns) - - src_queue = 1 - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - result = netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": src_queue, "type": "rx"}, - "netns-id": 0, - }, - } - ) - nk_queue_id = result["id"] - - netdevnl = NetdevFamily() - queue_info = netdevnl.queue_get( - {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} - ) - ksft_in("lease", queue_info) - ksft_eq(queue_info["lease"]["ifindex"], nk_guest_idx) - ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) - - nsimdev.remove() - time.sleep(0.1) - ret = cmd(f"ip link show dev {nk_host}", fail=False) - ksft_ne(ret.ret, 0) - - -def test_double_lease(netns) -> None: - nsimdev = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev.remove) - nsim = nsimdev.nsims[0] - ip(f"link set dev {nsim.ifname} up") - - nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=3) - defer(cmd, f"ip link del dev {nk_host}") - - ip(f"link set dev {nk_guest} netns {netns.name}") - ip(f"link set dev {nk_host} up") - ip(f"link set dev {nk_guest} up", ns=netns) - - src_queue = 1 - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - result = netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": src_queue, "type": "rx"}, - "netns-id": 0, - }, - } - ) - ksft_eq(result["id"], 1) - - with ksft_raises(NlError) as e: - netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": src_queue, "type": "rx"}, - "netns-id": 0, - }, - } - ) - ksft_eq(e.exception.nl_msg.error, -errno.EBUSY) - - -def test_virtual_lessor(netns) -> None: - nk_host_a, _, nk_guest_a, nk_guest_a_idx = create_netkit(rxqueues=2) - defer(cmd, f"ip link del dev {nk_host_a}") - ip(f"link set dev {nk_host_a} up") - ip(f"link set dev {nk_guest_a} up") - - nk_host_b, _, nk_guest_b, nk_guest_b_idx = create_netkit(rxqueues=2) - defer(cmd, f"ip link del dev {nk_host_b}") - - ip(f"link set dev {nk_guest_b} netns {netns.name}") - ip(f"link set dev {nk_host_b} up") - ip(f"link set dev {nk_guest_b} up", ns=netns) - - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - with ksft_raises(NlError) as e: - netdevnl.queue_create( - { - "ifindex": nk_guest_b_idx, - "type": "rx", - "lease": { - "ifindex": nk_guest_a_idx, - "queue": {"id": 0, "type": "rx"}, - "netns-id": 0, - }, - } - ) - ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) - - -def test_phys_lessee(_netns) -> None: - nsimdev_a = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev_a.remove) - nsim_a = nsimdev_a.nsims[0] - ip(f"link set dev {nsim_a.ifname} up") - - nsimdev_b = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev_b.remove) - nsim_b = nsimdev_b.nsims[0] - ip(f"link set dev {nsim_b.ifname} up") - - netdevnl = NetdevFamily() - with ksft_raises(NlError) as e: - netdevnl.queue_create( - { - "ifindex": nsim_a.ifindex, - "type": "rx", - "lease": { - "ifindex": nsim_b.ifindex, - "queue": {"id": 0, "type": "rx"}, - }, - } - ) - ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) - - -def test_different_lessors(netns) -> None: - nsimdev_a = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev_a.remove) - nsim_a = nsimdev_a.nsims[0] - ip(f"link set dev {nsim_a.ifname} up") - - nsimdev_b = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev_b.remove) - nsim_b = nsimdev_b.nsims[0] - ip(f"link set dev {nsim_b.ifname} up") - - nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=3) - defer(cmd, f"ip link del dev {nk_host}", fail=False) - - ip(f"link set dev {nk_guest} netns {netns.name}") - ip(f"link set dev {nk_host} up") - ip(f"link set dev {nk_guest} up", ns=netns) - - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": nsim_a.ifindex, - "queue": {"id": 1, "type": "rx"}, - "netns-id": 0, - }, - } - ) - - with ksft_raises(NlError) as e: - netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": nsim_b.ifindex, - "queue": {"id": 1, "type": "rx"}, - "netns-id": 0, - }, - } - ) - ksft_eq(e.exception.nl_msg.error, -errno.EOPNOTSUPP) - - -def test_queue_out_of_range(netns) -> None: - nsimdev = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev.remove) - nsim = nsimdev.nsims[0] - ip(f"link set dev {nsim.ifname} up") - - nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) - defer(cmd, f"ip link del dev {nk_host}", fail=False) - - ip(f"link set dev {nk_guest} netns {netns.name}") - ip(f"link set dev {nk_host} up") - ip(f"link set dev {nk_guest} up", ns=netns) - - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - with ksft_raises(NlError) as e: - netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": 2, "type": "rx"}, - "netns-id": 0, - }, - } - ) - ksft_eq(e.exception.nl_msg.error, -errno.ERANGE) - - -def test_resize_leased(netns) -> None: - nsimdev = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev.remove) - nsim = nsimdev.nsims[0] - ip(f"link set dev {nsim.ifname} up") - - nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) - defer(cmd, f"ip link del dev {nk_host}", fail=False) - - ip(f"link set dev {nk_guest} netns {netns.name}") - ip(f"link set dev {nk_host} up") - ip(f"link set dev {nk_guest} up", ns=netns) - - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": 1, "type": "rx"}, - "netns-id": 0, - }, - } - ) - - ethnl = EthtoolFamily() - with ksft_raises(NlError) as e: - ethnl.channels_set({"header": {"dev-index": nsim.ifindex}, "combined-count": 1}) - ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) - - -def test_self_lease(_netns) -> None: - nk_host, _, _, nk_guest_idx = create_netkit(rxqueues=2) - defer(cmd, f"ip link del dev {nk_host}", fail=False) - - netdevnl = NetdevFamily() - with ksft_raises(NlError) as e: - netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": nk_guest_idx, - "queue": {"id": 0, "type": "rx"}, - }, - } - ) - ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) - - -def test_veth_queue_create(netns) -> None: - nsimdev = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev.remove) - nsim = nsimdev.nsims[0] - ip(f"link set dev {nsim.ifname} up") - - ip("link add veth0 type veth peer name veth1") - defer(cmd, "ip link del dev veth0", fail=False) - - all_links = ip("-d link show", json=True) - veth_peer = [ - link - for link in all_links - if link.get("ifname") == "veth1" - ] - veth_peer_idx = veth_peer[0]["ifindex"] - - ip(f"link set dev veth1 netns {netns.name}") - ip("link set dev veth0 up") - ip("link set dev veth1 up", ns=netns) - - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - with ksft_raises(NlError) as e: - netdevnl.queue_create( - { - "ifindex": veth_peer_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": 1, "type": "rx"}, - "netns-id": 0, - }, - } - ) - ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) - - -def test_create_tx_type(netns) -> None: - nsimdev = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev.remove) - nsim = nsimdev.nsims[0] - ip(f"link set dev {nsim.ifname} up") - - nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) - defer(cmd, f"ip link del dev {nk_host}", fail=False) - - ip(f"link set dev {nk_guest} netns {netns.name}") - ip(f"link set dev {nk_host} up") - ip(f"link set dev {nk_guest} up", ns=netns) - - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - with ksft_raises(NlError) as e: - netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "tx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": 1, "type": "rx"}, - "netns-id": 0, - }, - } - ) - ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) - - -def test_create_primary(_netns) -> None: - nsimdev = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev.remove) - nsim = nsimdev.nsims[0] - ip(f"link set dev {nsim.ifname} up") - - nk_host, nk_host_idx, _, _ = create_netkit(rxqueues=2) - defer(cmd, f"ip link del dev {nk_host}", fail=False) - - ip(f"link set dev {nk_host} up") - - netdevnl = NetdevFamily() - with ksft_raises(NlError) as e: - netdevnl.queue_create( - { - "ifindex": nk_host_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": 1, "type": "rx"}, - }, - } - ) - ksft_eq(e.exception.nl_msg.error, -errno.EOPNOTSUPP) - - -def test_create_limit(netns) -> None: - nsimdev = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev.remove) - nsim = nsimdev.nsims[0] - ip(f"link set dev {nsim.ifname} up") - - nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=1) - defer(cmd, f"ip link del dev {nk_host}", fail=False) - - ip(f"link set dev {nk_guest} netns {netns.name}") - ip(f"link set dev {nk_host} up") - ip(f"link set dev {nk_guest} up", ns=netns) - - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - with ksft_raises(NlError) as e: - netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": 1, "type": "rx"}, - "netns-id": 0, - }, - } - ) - ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) - - -def test_link_flap_phys(netns) -> None: - nsimdev = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev.remove) - nsim = nsimdev.nsims[0] - ip(f"link set dev {nsim.ifname} up") - - nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) - defer(cmd, f"ip link del dev {nk_host}") - - ip(f"link set dev {nk_guest} netns {netns.name}") - ip(f"link set dev {nk_host} up") - ip(f"link set dev {nk_guest} up", ns=netns) - - src_queue = 1 - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - result = netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": src_queue, "type": "rx"}, - "netns-id": 0, - }, - } - ) - nk_queue_id = result["id"] - - netdevnl = NetdevFamily() - queue_info = netdevnl.queue_get( - {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} - ) - ksft_in("lease", queue_info) - ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) - - # Link flap the physical device - ip(f"link set dev {nsim.ifname} down") - ip(f"link set dev {nsim.ifname} up") - - # Verify lease survives the flap - queue_info = netdevnl.queue_get( - {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} - ) - ksft_in("lease", queue_info) - ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) - - -def test_queue_get_virtual(netns) -> None: - nsimdev = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev.remove) - nsim = nsimdev.nsims[0] - ip(f"link set dev {nsim.ifname} up") - - nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) - defer(cmd, f"ip link del dev {nk_host}") - - ip(f"link set dev {nk_guest} netns {netns.name}") - ip(f"link set dev {nk_host} up") - ip(f"link set dev {nk_guest} up", ns=netns) - - src_queue = 1 - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - result = netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": src_queue, "type": "rx"}, - "netns-id": 0, - }, - } - ) - nk_queue_id = result["id"] - - # queue-get on virtual device's leased queue should not show lease - # info (lease info is only shown from the physical device's side) - queue_info = netdevnl.queue_get( - {"ifindex": nk_guest_idx, "id": nk_queue_id, "type": "rx"} - ) - ksft_eq(queue_info["id"], nk_queue_id) - ksft_eq(queue_info["ifindex"], nk_guest_idx) - ksft_not_in("lease", queue_info) - - # Default queue (not leased) also has no lease info - queue_info = netdevnl.queue_get( - {"ifindex": nk_guest_idx, "id": 0, "type": "rx"} - ) - ksft_not_in("lease", queue_info) - - -def test_remove_virt_first(netns) -> None: - nsimdev = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev.remove) - nsim = nsimdev.nsims[0] - ip(f"link set dev {nsim.ifname} up") - - nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) - - ip(f"link set dev {nk_guest} netns {netns.name}") - ip(f"link set dev {nk_host} up") - ip(f"link set dev {nk_guest} up", ns=netns) - - src_queue = 1 - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - result = netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": src_queue, "type": "rx"}, - "netns-id": 0, - }, - } - ) - ksft_eq(result["id"], 1) - - netdevnl = NetdevFamily() - queue_info = netdevnl.queue_get( - {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} - ) - ksft_in("lease", queue_info) - ksft_eq(queue_info["lease"]["queue"]["id"], result["id"]) - - # Delete netkit (virtual device removed first, physical stays) - cmd(f"ip link del dev {nk_host}") - - # Verify lease is cleaned up on physical device - queue_info = netdevnl.queue_get( - {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} - ) - ksft_not_in("lease", queue_info) - - -def test_multiple_leases(netns) -> None: - nsimdev = NetdevSimDev(port_count=1, queue_count=3) - defer(nsimdev.remove) - nsim = nsimdev.nsims[0] - ip(f"link set dev {nsim.ifname} up") - - nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=4) - defer(cmd, f"ip link del dev {nk_host}", fail=False) - - ip(f"link set dev {nk_guest} netns {netns.name}") - ip(f"link set dev {nk_host} up") - ip(f"link set dev {nk_guest} up", ns=netns) - - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - r1 = netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": 1, "type": "rx"}, - "netns-id": 0, - }, - } - ) - r2 = netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": 2, "type": "rx"}, - "netns-id": 0, - }, - } - ) - - ksft_eq(r1["id"], 1) - ksft_eq(r2["id"], 2) - - # Verify both leases visible on physical device - netdevnl = NetdevFamily() - q1 = netdevnl.queue_get( - {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} - ) - q2 = netdevnl.queue_get( - {"ifindex": nsim.ifindex, "id": 2, "type": "rx"} - ) - ksft_in("lease", q1) - ksft_in("lease", q2) - ksft_eq(q1["lease"]["ifindex"], nk_guest_idx) - ksft_eq(q2["lease"]["ifindex"], nk_guest_idx) - ksft_eq(q1["lease"]["queue"]["id"], r1["id"]) - ksft_eq(q2["lease"]["queue"]["id"], r2["id"]) - - -def test_lease_queue_tx_type(netns) -> None: - nsimdev = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev.remove) - nsim = nsimdev.nsims[0] - ip(f"link set dev {nsim.ifname} up") - - nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) - defer(cmd, f"ip link del dev {nk_host}", fail=False) - - ip(f"link set dev {nk_guest} netns {netns.name}") - ip(f"link set dev {nk_host} up") - ip(f"link set dev {nk_guest} up", ns=netns) - - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - with ksft_raises(NlError) as e: - netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": 1, "type": "tx"}, - "netns-id": 0, - }, - } - ) - ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) - - -def test_invalid_netns(netns) -> None: - nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) - defer(cmd, f"ip link del dev {nk_host}", fail=False) - - ip(f"link set dev {nk_guest} netns {netns.name}") - ip(f"link set dev {nk_host} up") - ip(f"link set dev {nk_guest} up", ns=netns) - - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - with ksft_raises(NlError) as e: - netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": 1, - "queue": {"id": 0, "type": "rx"}, - "netns-id": 999, - }, - } - ) - ksft_eq(e.exception.nl_msg.error, -errno.ENONET) - - -def test_invalid_phys_ifindex(netns) -> None: - nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) - defer(cmd, f"ip link del dev {nk_host}", fail=False) - - ip(f"link set dev {nk_guest} netns {netns.name}") - ip(f"link set dev {nk_host} up") - ip(f"link set dev {nk_guest} up", ns=netns) - - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - with ksft_raises(NlError) as e: - netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": 99999, - "queue": {"id": 0, "type": "rx"}, - "netns-id": 0, - }, - } - ) - ksft_eq(e.exception.nl_msg.error, -errno.ENODEV) - - -def test_multi_netkit_remove_phys(netns) -> None: - nsimdev = NetdevSimDev(port_count=1, queue_count=3) - defer(nsimdev.remove) - nsim = nsimdev.nsims[0] - ip(f"link set dev {nsim.ifname} up") - - # Create two netkit pairs, each leasing a different physical queue - nk_host_a, _, nk_guest_a, nk_guest_a_idx = create_netkit(rxqueues=2) - defer(cmd, f"ip link del dev {nk_host_a}", fail=False) - - nk_host_b, _, nk_guest_b, nk_guest_b_idx = create_netkit(rxqueues=2) - defer(cmd, f"ip link del dev {nk_host_b}", fail=False) - - ip(f"link set dev {nk_guest_a} netns {netns.name}") - ip(f"link set dev {nk_host_a} up") - ip(f"link set dev {nk_guest_a} up", ns=netns) - - ip(f"link set dev {nk_guest_b} netns {netns.name}") - ip(f"link set dev {nk_host_b} up") - ip(f"link set dev {nk_guest_b} up", ns=netns) - - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - netdevnl.queue_create( - { - "ifindex": nk_guest_a_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": 1, "type": "rx"}, - "netns-id": 0, - }, - } - ) - netdevnl.queue_create( - { - "ifindex": nk_guest_b_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": 2, "type": "rx"}, - "netns-id": 0, - }, - } - ) - - # Removing the physical device should take down both netkit pairs - nsimdev.remove() - time.sleep(0.1) - ret = cmd(f"ip link show dev {nk_host_a}", fail=False) - ksft_ne(ret.ret, 0) - ret = cmd(f"ip link show dev {nk_host_b}", fail=False) - ksft_ne(ret.ret, 0) - - -def test_single_remove_phys(_netns) -> None: - nsimdev = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev.remove) - nsim = nsimdev.nsims[0] - ip(f"link set dev {nsim.ifname} up") - - nk_name, nk_idx = create_netkit_single(rxqueues=2) - defer(cmd, f"ip link del dev {nk_name}", fail=False) - - ip(f"link set dev {nk_name} up") - - netdevnl = NetdevFamily() - netdevnl.queue_create( - { - "ifindex": nk_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": 1, "type": "rx"}, - }, - } - ) - - # Removing the physical device should take down the single netkit device - nsimdev.remove() - time.sleep(0.1) - ret = cmd(f"ip link show dev {nk_name}", fail=False) - ksft_ne(ret.ret, 0) - - -def test_link_flap_virt(netns) -> None: - nsimdev = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev.remove) - nsim = nsimdev.nsims[0] - ip(f"link set dev {nsim.ifname} up") - - nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) - defer(cmd, f"ip link del dev {nk_host}") - - ip(f"link set dev {nk_guest} netns {netns.name}") - ip(f"link set dev {nk_host} up") - ip(f"link set dev {nk_guest} up", ns=netns) - - src_queue = 1 - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - result = netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": src_queue, "type": "rx"}, - "netns-id": 0, - }, - } - ) - nk_queue_id = result["id"] - - netdevnl = NetdevFamily() - queue_info = netdevnl.queue_get( - {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} - ) - ksft_in("lease", queue_info) - ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) - - # Link flap the virtual (netkit) device - ip(f"link set dev {nk_guest} down", ns=netns) - ip(f"link set dev {nk_guest} up", ns=netns) - - # Verify lease survives the virtual device flap - queue_info = netdevnl.queue_get( - {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} - ) - ksft_in("lease", queue_info) - ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) - - -def test_phys_queue_no_lease(netns) -> None: - nsimdev = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev.remove) - nsim = nsimdev.nsims[0] - ip(f"link set dev {nsim.ifname} up") - - nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) - defer(cmd, f"ip link del dev {nk_host}") - - ip(f"link set dev {nk_guest} netns {netns.name}") - ip(f"link set dev {nk_host} up") - ip(f"link set dev {nk_guest} up", ns=netns) - - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": 1, "type": "rx"}, - "netns-id": 0, - }, - } - ) - - # Physical queue 0 (not leased) should have no lease info - netdevnl = NetdevFamily() - queue_info = netdevnl.queue_get( - {"ifindex": nsim.ifindex, "id": 0, "type": "rx"} - ) - ksft_not_in("lease", queue_info) - - # Physical queue 1 (leased) should have lease info - queue_info = netdevnl.queue_get( - {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} - ) - ksft_in("lease", queue_info) - - -def test_same_ns_lease(_netns) -> None: - nsimdev = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev.remove) - nsim = nsimdev.nsims[0] - ip(f"link set dev {nsim.ifname} up") - - nk_name, nk_idx = create_netkit_single(rxqueues=2) - defer(cmd, f"ip link del dev {nk_name}", fail=False) - - ip(f"link set dev {nk_name} up") - - netdevnl = NetdevFamily() - result = netdevnl.queue_create( - { - "ifindex": nk_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": 1, "type": "rx"}, - }, - } - ) - ksft_eq(result["id"], 1) - - # Same namespace: lease info should NOT have netns-id - queue_info = netdevnl.queue_get( - {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} - ) - ksft_in("lease", queue_info) - ksft_eq(queue_info["lease"]["ifindex"], nk_idx) - ksft_eq(queue_info["lease"]["queue"]["id"], result["id"]) - ksft_not_in("netns-id", queue_info["lease"]) - - -def test_resize_after_unlease(netns) -> None: - nsimdev = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev.remove) - nsim = nsimdev.nsims[0] - ip(f"link set dev {nsim.ifname} up") - - nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) - - ip(f"link set dev {nk_guest} netns {netns.name}") - ip(f"link set dev {nk_host} up") - ip(f"link set dev {nk_guest} up", ns=netns) - - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": 1, "type": "rx"}, - "netns-id": 0, - }, - } - ) - - # Resize should fail while lease is active - ethnl = EthtoolFamily() - with ksft_raises(NlError) as e: - ethnl.channels_set({"header": {"dev-index": nsim.ifindex}, "combined-count": 1}) - ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) - - # Delete netkit, clearing the lease - cmd(f"ip link del dev {nk_host}") - - # Resize should now succeed - ethnl.channels_set({"header": {"dev-index": nsim.ifindex}, "combined-count": 1}) - - -def test_lease_queue_zero(netns) -> None: - nsimdev = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev.remove) - nsim = nsimdev.nsims[0] - ip(f"link set dev {nsim.ifname} up") - - nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) - defer(cmd, f"ip link del dev {nk_host}", fail=False) - - ip(f"link set dev {nk_guest} netns {netns.name}") - ip(f"link set dev {nk_host} up") - ip(f"link set dev {nk_guest} up", ns=netns) - - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - result = netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": 0, "type": "rx"}, - "netns-id": 0, - }, - } - ) - ksft_eq(result["id"], 1) - - netdevnl = NetdevFamily() - queue_info = netdevnl.queue_get( - {"ifindex": nsim.ifindex, "id": 0, "type": "rx"} - ) - ksft_in("lease", queue_info) - ksft_eq(queue_info["lease"]["queue"]["id"], result["id"]) - - -def test_release_and_reuse(netns) -> None: - nsimdev = NetdevSimDev(port_count=1, queue_count=2) - defer(nsimdev.remove) - nsim = nsimdev.nsims[0] - ip(f"link set dev {nsim.ifname} up") - - src_queue = 1 - - # First lease - nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) - - ip(f"link set dev {nk_guest} netns {netns.name}") - ip(f"link set dev {nk_host} up") - ip(f"link set dev {nk_guest} up", ns=netns) - - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": src_queue, "type": "rx"}, - "netns-id": 0, - }, - } - ) - - netdevnl = NetdevFamily() - queue_info = netdevnl.queue_get( - {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} - ) - ksft_in("lease", queue_info) - - # Delete netkit, freeing the lease - cmd(f"ip link del dev {nk_host}") - - queue_info = netdevnl.queue_get( - {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} - ) - ksft_not_in("lease", queue_info) - - # Re-create netkit and lease the same physical queue again - nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) - defer(cmd, f"ip link del dev {nk_host}", fail=False) - - ip(f"link set dev {nk_guest} netns {netns.name}") - ip(f"link set dev {nk_host} up") - ip(f"link set dev {nk_guest} up", ns=netns) - - with NetNSEnter(str(netns)): - netdevnl = NetdevFamily() - result = netdevnl.queue_create( - { - "ifindex": nk_guest_idx, - "type": "rx", - "lease": { - "ifindex": nsim.ifindex, - "queue": {"id": src_queue, "type": "rx"}, - "netns-id": 0, - }, - } - ) - ksft_eq(result["id"], 1) - - netdevnl = NetdevFamily() - queue_info = netdevnl.queue_get( - {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} - ) - ksft_in("lease", queue_info) - ksft_eq(queue_info["lease"]["queue"]["id"], result["id"]) - - def test_iou_zcrx(cfg) -> None: cfg.require_ipver("6") ethnl = EthtoolFamily() @@ -1324,47 +223,6 @@ def test_destroy(cfg) -> None: def main() -> None: - netns = NetNS() - cmd("ip netns attach init 1") - ip("netns set init 0", ns=netns) - ip("link set lo up", ns=netns) - - ksft_run( - [ - test_remove_phys, - test_double_lease, - test_virtual_lessor, - test_phys_lessee, - test_different_lessors, - test_queue_out_of_range, - test_resize_leased, - test_self_lease, - test_create_tx_type, - test_create_primary, - test_create_limit, - test_link_flap_phys, - test_queue_get_virtual, - test_remove_virt_first, - test_multiple_leases, - test_lease_queue_tx_type, - test_invalid_netns, - test_invalid_phys_ifindex, - test_multi_netkit_remove_phys, - test_single_remove_phys, - test_link_flap_virt, - test_phys_queue_no_lease, - test_same_ns_lease, - test_resize_after_unlease, - test_lease_queue_zero, - test_release_and_reuse, - test_veth_queue_create, - ], - args=(netns,), - ) - - cmd("ip netns del init", fail=False) - del netns - with NetDrvContEnv(__file__, rxqueues=2) as cfg: cfg.bin_local = path.abspath( path.dirname(__file__) + "/../../../drivers/net/hw/iou-zcrx" diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 231245a95879..a275ed584026 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -65,6 +65,7 @@ TEST_PROGS := \ netdevice.sh \ netns-name.sh \ netns-sysctl.sh \ + nk_qlease.py \ nl_netdev.py \ nl_nlctrl.py \ pmtu.sh \ diff --git a/tools/testing/selftests/net/nk_qlease.py b/tools/testing/selftests/net/nk_qlease.py new file mode 100755 index 000000000000..6ed4fb5e90f6 --- /dev/null +++ b/tools/testing/selftests/net/nk_qlease.py @@ -0,0 +1,1168 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import errno +import time +from lib.py import ( + ksft_run, + ksft_exit, + ksft_eq, + ksft_ne, + ksft_in, + ksft_not_in, + ksft_raises, +) +from lib.py import ( + NetNS, + NetNSEnter, + EthtoolFamily, + NetdevFamily, + RtnlFamily, + NetdevSimDev, +) +from lib.py import ( + NlError, + Netlink, + cmd, + defer, + ip, +) + +def create_netkit(rxqueues): + all_links = ip("-d link show", json=True) + old_idxs = { + link["ifindex"] + for link in all_links + if link.get("linkinfo", {}).get("info_kind") == "netkit" + } + + rtnl = RtnlFamily() + rtnl.newlink( + { + "linkinfo": { + "kind": "netkit", + "data": { + "mode": "l2", + "policy": "forward", + "peer-policy": "forward", + }, + }, + "num-rx-queues": rxqueues, + }, + flags=[Netlink.NLM_F_CREATE, Netlink.NLM_F_EXCL], + ) + + all_links = ip("-d link show", json=True) + nk_links = [ + link + for link in all_links + if link.get("linkinfo", {}).get("info_kind") == "netkit" + and link["ifindex"] not in old_idxs + ] + nk_links.sort(key=lambda x: x["ifindex"]) + return ( + nk_links[1]["ifname"], + nk_links[1]["ifindex"], + nk_links[0]["ifname"], + nk_links[0]["ifindex"], + ) + + +def create_netkit_single(rxqueues): + rtnl = RtnlFamily() + rtnl.newlink( + { + "linkinfo": { + "kind": "netkit", + "data": { + "mode": "l2", + "pairing": "single", + }, + }, + "num-rx-queues": rxqueues, + }, + flags=[Netlink.NLM_F_CREATE, Netlink.NLM_F_EXCL], + ) + + all_links = ip("-d link show", json=True) + nk_links = [ + link + for link in all_links + if link.get("linkinfo", {}).get("info_kind") == "netkit" + and "UP" not in link.get("flags", []) + ] + return nk_links[0]["ifname"], nk_links[0]["ifindex"] + +def test_remove_phys(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + nk_queue_id = result["id"] + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["ifindex"], nk_guest_idx) + ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) + + nsimdev.remove() + time.sleep(0.1) + ret = cmd(f"ip link show dev {nk_host}", fail=False) + ksft_ne(ret.ret, 0) + + +def test_double_lease(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=3) + defer(cmd, f"ip link del dev {nk_host}") + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(result["id"], 1) + + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EBUSY) + + +def test_virtual_lessor(netns) -> None: + nk_host_a, _, nk_guest_a, nk_guest_a_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_a}") + ip(f"link set dev {nk_host_a} up") + ip(f"link set dev {nk_guest_a} up") + + nk_host_b, _, nk_guest_b, nk_guest_b_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_b}") + + ip(f"link set dev {nk_guest_b} netns {netns.name}") + ip(f"link set dev {nk_host_b} up") + ip(f"link set dev {nk_guest_b} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_b_idx, + "type": "rx", + "lease": { + "ifindex": nk_guest_a_idx, + "queue": {"id": 0, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_phys_lessee(_netns) -> None: + nsimdev_a = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev_a.remove) + nsim_a = nsimdev_a.nsims[0] + ip(f"link set dev {nsim_a.ifname} up") + + nsimdev_b = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev_b.remove) + nsim_b = nsimdev_b.nsims[0] + ip(f"link set dev {nsim_b.ifname} up") + + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nsim_a.ifindex, + "type": "rx", + "lease": { + "ifindex": nsim_b.ifindex, + "queue": {"id": 0, "type": "rx"}, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_different_lessors(netns) -> None: + nsimdev_a = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev_a.remove) + nsim_a = nsimdev_a.nsims[0] + ip(f"link set dev {nsim_a.ifname} up") + + nsimdev_b = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev_b.remove) + nsim_b = nsimdev_b.nsims[0] + ip(f"link set dev {nsim_b.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=3) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim_a.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim_b.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EOPNOTSUPP) + + +def test_queue_out_of_range(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 2, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.ERANGE) + + +def test_resize_leased(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + ethnl = EthtoolFamily() + with ksft_raises(NlError) as e: + ethnl.channels_set({"header": {"dev-index": nsim.ifindex}, "combined-count": 1}) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_self_lease(_netns) -> None: + nk_host, _, _, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nk_guest_idx, + "queue": {"id": 0, "type": "rx"}, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_veth_queue_create(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + ip("link add veth0 type veth peer name veth1") + defer(cmd, "ip link del dev veth0", fail=False) + + all_links = ip("-d link show", json=True) + veth_peer = [ + link + for link in all_links + if link.get("ifname") == "veth1" + ] + veth_peer_idx = veth_peer[0]["ifindex"] + + ip(f"link set dev veth1 netns {netns.name}") + ip("link set dev veth0 up") + ip("link set dev veth1 up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": veth_peer_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_create_tx_type(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "tx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_create_primary(_netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, nk_host_idx, _, _ = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_host} up") + + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_host_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EOPNOTSUPP) + + +def test_create_limit(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=1) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_link_flap_phys(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}") + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + nk_queue_id = result["id"] + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) + + # Link flap the physical device + ip(f"link set dev {nsim.ifname} down") + ip(f"link set dev {nsim.ifname} up") + + # Verify lease survives the flap + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) + + +def test_queue_get_virtual(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}") + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + nk_queue_id = result["id"] + + # queue-get on virtual device's leased queue should not show lease + # info (lease info is only shown from the physical device's side) + queue_info = netdevnl.queue_get( + {"ifindex": nk_guest_idx, "id": nk_queue_id, "type": "rx"} + ) + ksft_eq(queue_info["id"], nk_queue_id) + ksft_eq(queue_info["ifindex"], nk_guest_idx) + ksft_not_in("lease", queue_info) + + # Default queue (not leased) also has no lease info + queue_info = netdevnl.queue_get( + {"ifindex": nk_guest_idx, "id": 0, "type": "rx"} + ) + ksft_not_in("lease", queue_info) + + +def test_remove_virt_first(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(result["id"], 1) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], result["id"]) + + # Delete netkit (virtual device removed first, physical stays) + cmd(f"ip link del dev {nk_host}") + + # Verify lease is cleaned up on physical device + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_not_in("lease", queue_info) + + +def test_multiple_leases(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=3) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=4) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + r1 = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + r2 = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 2, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + ksft_eq(r1["id"], 1) + ksft_eq(r2["id"], 2) + + # Verify both leases visible on physical device + netdevnl = NetdevFamily() + q1 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + q2 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 2, "type": "rx"} + ) + ksft_in("lease", q1) + ksft_in("lease", q2) + ksft_eq(q1["lease"]["ifindex"], nk_guest_idx) + ksft_eq(q2["lease"]["ifindex"], nk_guest_idx) + ksft_eq(q1["lease"]["queue"]["id"], r1["id"]) + ksft_eq(q2["lease"]["queue"]["id"], r2["id"]) + + +def test_lease_queue_tx_type(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "tx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_invalid_netns(netns) -> None: + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": 1, + "queue": {"id": 0, "type": "rx"}, + "netns-id": 999, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.ENONET) + + +def test_invalid_phys_ifindex(netns) -> None: + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": 99999, + "queue": {"id": 0, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.ENODEV) + + +def test_multi_netkit_remove_phys(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=3) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + # Create two netkit pairs, each leasing a different physical queue + nk_host_a, _, nk_guest_a, nk_guest_a_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_a}", fail=False) + + nk_host_b, _, nk_guest_b, nk_guest_b_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_b}", fail=False) + + ip(f"link set dev {nk_guest_a} netns {netns.name}") + ip(f"link set dev {nk_host_a} up") + ip(f"link set dev {nk_guest_a} up", ns=netns) + + ip(f"link set dev {nk_guest_b} netns {netns.name}") + ip(f"link set dev {nk_host_b} up") + ip(f"link set dev {nk_guest_b} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_guest_a_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + netdevnl.queue_create( + { + "ifindex": nk_guest_b_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 2, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + # Removing the physical device should take down both netkit pairs + nsimdev.remove() + time.sleep(0.1) + ret = cmd(f"ip link show dev {nk_host_a}", fail=False) + ksft_ne(ret.ret, 0) + ret = cmd(f"ip link show dev {nk_host_b}", fail=False) + ksft_ne(ret.ret, 0) + + +def test_single_remove_phys(_netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_name, nk_idx = create_netkit_single(rxqueues=2) + defer(cmd, f"ip link del dev {nk_name}", fail=False) + + ip(f"link set dev {nk_name} up") + + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + }, + } + ) + + # Removing the physical device should take down the single netkit device + nsimdev.remove() + time.sleep(0.1) + ret = cmd(f"ip link show dev {nk_name}", fail=False) + ksft_ne(ret.ret, 0) + + +def test_link_flap_virt(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}") + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + nk_queue_id = result["id"] + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) + + # Link flap the virtual (netkit) device + ip(f"link set dev {nk_guest} down", ns=netns) + ip(f"link set dev {nk_guest} up", ns=netns) + + # Verify lease survives the virtual device flap + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) + + +def test_phys_queue_no_lease(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}") + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + # Physical queue 0 (not leased) should have no lease info + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 0, "type": "rx"} + ) + ksft_not_in("lease", queue_info) + + # Physical queue 1 (leased) should have lease info + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + ksft_in("lease", queue_info) + + +def test_same_ns_lease(_netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_name, nk_idx = create_netkit_single(rxqueues=2) + defer(cmd, f"ip link del dev {nk_name}", fail=False) + + ip(f"link set dev {nk_name} up") + + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + }, + } + ) + ksft_eq(result["id"], 1) + + # Same namespace: lease info should NOT have netns-id + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["ifindex"], nk_idx) + ksft_eq(queue_info["lease"]["queue"]["id"], result["id"]) + ksft_not_in("netns-id", queue_info["lease"]) + + +def test_resize_after_unlease(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + # Resize should fail while lease is active + ethnl = EthtoolFamily() + with ksft_raises(NlError) as e: + ethnl.channels_set({"header": {"dev-index": nsim.ifindex}, "combined-count": 1}) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + # Delete netkit, clearing the lease + cmd(f"ip link del dev {nk_host}") + + # Resize should now succeed + ethnl.channels_set({"header": {"dev-index": nsim.ifindex}, "combined-count": 1}) + + +def test_lease_queue_zero(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 0, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(result["id"], 1) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 0, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], result["id"]) + + +def test_release_and_reuse(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + src_queue = 1 + + # First lease + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + + # Delete netkit, freeing the lease + cmd(f"ip link del dev {nk_host}") + + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_not_in("lease", queue_info) + + # Re-create netkit and lease the same physical queue again + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)): + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(result["id"], 1) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], result["id"]) + + +def main() -> None: + netns = NetNS() + cmd("ip netns attach init 1") + ip("netns set init 0", ns=netns) + ip("link set lo up", ns=netns) + + ksft_run( + [ + test_remove_phys, + test_double_lease, + test_virtual_lessor, + test_phys_lessee, + test_different_lessors, + test_queue_out_of_range, + test_resize_leased, + test_self_lease, + test_create_tx_type, + test_create_primary, + test_create_limit, + test_link_flap_phys, + test_queue_get_virtual, + test_remove_virt_first, + test_multiple_leases, + test_lease_queue_tx_type, + test_invalid_netns, + test_invalid_phys_ifindex, + test_multi_netkit_remove_phys, + test_single_remove_phys, + test_link_flap_virt, + test_phys_queue_no_lease, + test_same_ns_lease, + test_resize_after_unlease, + test_lease_queue_zero, + test_release_and_reuse, + test_veth_queue_create, + ], + args=(netns,), + ) + + cmd("ip netns del init", fail=False) + ksft_exit() + + +if __name__ == "__main__": + main() From 1e822171ba9bca7a5d2371bc10358340835bdad3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 Apr 2026 00:08:06 +0200 Subject: [PATCH 1555/1561] selftests/net: Add additional test coverage in nk_qlease Add further netkit queue-lease coverage for netns lifecycle of the guest and physical halves, channel resize across active leases, single-device and multi-lessee scenarios, L3 mode operation, lease capacity exhaustion, and corner-cases of e.g. queue-create rejection paths. Also make the tests more robust by removing the time.sleep(0.1) after netns deletion and turn them into a wait loop. Full test run: # ./nk_qlease.py TAP version 13 1..45 ok 1 nk_qlease.test_remove_phys ok 2 nk_qlease.test_double_lease ok 3 nk_qlease.test_virtual_lessor ok 4 nk_qlease.test_phys_lessee ok 5 nk_qlease.test_different_lessors ok 6 nk_qlease.test_queue_out_of_range ok 7 nk_qlease.test_resize_leased ok 8 nk_qlease.test_self_lease ok 9 nk_qlease.test_create_tx_type ok 10 nk_qlease.test_create_primary ok 11 nk_qlease.test_create_limit ok 12 nk_qlease.test_link_flap_phys ok 13 nk_qlease.test_queue_get_virtual ok 14 nk_qlease.test_remove_virt_first ok 15 nk_qlease.test_multiple_leases ok 16 nk_qlease.test_lease_queue_tx_type ok 17 nk_qlease.test_invalid_netns ok 18 nk_qlease.test_invalid_phys_ifindex ok 19 nk_qlease.test_multi_netkit_remove_phys ok 20 nk_qlease.test_single_remove_phys ok 21 nk_qlease.test_link_flap_virt ok 22 nk_qlease.test_phys_queue_no_lease ok 23 nk_qlease.test_same_ns_lease ok 24 nk_qlease.test_resize_after_unlease ok 25 nk_qlease.test_lease_queue_zero ok 26 nk_qlease.test_release_and_reuse ok 27 nk_qlease.test_veth_queue_create ok 28 nk_qlease.test_two_netkits_same_queue ok 29 nk_qlease.test_l3_mode_lease ok 30 nk_qlease.test_single_double_lease ok 31 nk_qlease.test_single_different_lessors ok 32 nk_qlease.test_cross_ns_netns_id ok 33 nk_qlease.test_delete_guest_netns ok 34 nk_qlease.test_move_guest_netns ok 35 nk_qlease.test_resize_phys_no_reduction ok 36 nk_qlease.test_delete_one_netkit_of_two ok 37 nk_qlease.test_bind_rx_leased_phys_queue ok 38 nk_qlease.test_resize_phys_shrink_past_leased ok 39 nk_qlease.test_resize_virt_not_supported ok 40 nk_qlease.test_lease_devices_down ok 41 nk_qlease.test_lease_capacity_exhaustion ok 42 nk_qlease.test_resize_phys_up ok 43 nk_qlease.test_multi_ns_lease ok 44 nk_qlease.test_multi_ns_delete_one ok 45 nk_qlease.test_move_phys_netns # Totals: pass:45 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260413220809.604592-4-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/nk_qlease.py | 951 ++++++++++++++++++++++- 1 file changed, 946 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/nk_qlease.py b/tools/testing/selftests/net/nk_qlease.py index 6ed4fb5e90f6..a84a73ff4eda 100755 --- a/tools/testing/selftests/net/nk_qlease.py +++ b/tools/testing/selftests/net/nk_qlease.py @@ -28,7 +28,16 @@ from lib.py import ( ip, ) -def create_netkit(rxqueues): + +def wait_until(cond, timeout=2.0, interval=0.05): + deadline = time.monotonic() + timeout + while not cond(): + if time.monotonic() >= deadline: + return + time.sleep(interval) + + +def create_netkit(rxqueues, mode="l2"): all_links = ip("-d link show", json=True) old_idxs = { link["ifindex"] @@ -42,7 +51,7 @@ def create_netkit(rxqueues): "linkinfo": { "kind": "netkit", "data": { - "mode": "l2", + "mode": mode, "policy": "forward", "peer-policy": "forward", }, @@ -93,6 +102,7 @@ def create_netkit_single(rxqueues): ] return nk_links[0]["ifname"], nk_links[0]["ifindex"] + def test_remove_phys(netns) -> None: nsimdev = NetdevSimDev(port_count=1, queue_count=2) defer(nsimdev.remove) @@ -131,7 +141,7 @@ def test_remove_phys(netns) -> None: ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) nsimdev.remove() - time.sleep(0.1) + wait_until(lambda: cmd(f"ip link show dev {nk_host}", fail=False).ret != 0) ret = cmd(f"ip link show dev {nk_host}", fail=False) ksft_ne(ret.ret, 0) @@ -812,7 +822,8 @@ def test_multi_netkit_remove_phys(netns) -> None: # Removing the physical device should take down both netkit pairs nsimdev.remove() - time.sleep(0.1) + wait_until(lambda: cmd(f"ip link show dev {nk_host_a}", fail=False).ret != 0 + and cmd(f"ip link show dev {nk_host_b}", fail=False).ret != 0) ret = cmd(f"ip link show dev {nk_host_a}", fail=False) ksft_ne(ret.ret, 0) ret = cmd(f"ip link show dev {nk_host_b}", fail=False) @@ -844,7 +855,7 @@ def test_single_remove_phys(_netns) -> None: # Removing the physical device should take down the single netkit device nsimdev.remove() - time.sleep(0.1) + wait_until(lambda: cmd(f"ip link show dev {nk_name}", fail=False).ret != 0) ret = cmd(f"ip link show dev {nk_name}", fail=False) ksft_ne(ret.ret, 0) @@ -1121,6 +1132,918 @@ def test_release_and_reuse(netns) -> None: ksft_eq(queue_info["lease"]["queue"]["id"], result["id"]) +def test_two_netkits_same_queue(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host_a, _, nk_guest_a, nk_guest_a_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_a}", fail=False) + + nk_host_b, _, nk_guest_b, nk_guest_b_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_b}", fail=False) + + ip(f"link set dev {nk_guest_a} netns {netns.name}") + ip(f"link set dev {nk_host_a} up") + ip(f"link set dev {nk_guest_a} up", ns=netns) + + ip(f"link set dev {nk_guest_b} netns {netns.name}") + ip(f"link set dev {nk_host_b} up") + ip(f"link set dev {nk_guest_b} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_a_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + with ksft_raises(NlError) as e: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_b_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EBUSY) + + +def test_l3_mode_lease(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2, mode="l3") + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + result = netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(result["id"], 1) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["ifindex"], nk_guest_idx) + ksft_eq(queue_info["lease"]["queue"]["id"], result["id"]) + + +def test_single_double_lease(_netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_name, nk_idx = create_netkit_single(rxqueues=3) + defer(cmd, f"ip link del dev {nk_name}", fail=False) + + ip(f"link set dev {nk_name} up") + + netdevnl = NetdevFamily() + result = netdevnl.queue_create( + { + "ifindex": nk_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + }, + } + ) + ksft_eq(result["id"], 1) + + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EBUSY) + + +def test_single_different_lessors(_netns) -> None: + nsimdev_a = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev_a.remove) + nsim_a = nsimdev_a.nsims[0] + ip(f"link set dev {nsim_a.ifname} up") + + nsimdev_b = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev_b.remove) + nsim_b = nsimdev_b.nsims[0] + ip(f"link set dev {nsim_b.ifname} up") + + nk_name, nk_idx = create_netkit_single(rxqueues=3) + defer(cmd, f"ip link del dev {nk_name}", fail=False) + + ip(f"link set dev {nk_name} up") + + netdevnl = NetdevFamily() + netdevnl.queue_create( + { + "ifindex": nk_idx, + "type": "rx", + "lease": { + "ifindex": nsim_a.ifindex, + "queue": {"id": 1, "type": "rx"}, + }, + } + ) + + with ksft_raises(NlError) as e: + netdevnl.queue_create( + { + "ifindex": nk_idx, + "type": "rx", + "lease": { + "ifindex": nsim_b.ifindex, + "queue": {"id": 1, "type": "rx"}, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EOPNOTSUPP) + + +def test_cross_ns_netns_id(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_in("netns-id", queue_info["lease"]) + + +def test_delete_guest_netns(_netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + test_ns = NetNS() + ip("netns set init 0", ns=test_ns) + ip("link set lo up", ns=test_ns) + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {test_ns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=test_ns) + + src_queue = 1 + with NetNSEnter(str(test_ns)), NetdevFamily() as netdevnl_ns: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + + del test_ns + wait_until(lambda: "lease" not in netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"})) + + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_not_in("lease", queue_info) + + ret = cmd(f"ip link show dev {nk_host}", fail=False) + ksft_ne(ret.ret, 0) + + +def test_move_guest_netns(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + result = netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + nk_queue_id = result["id"] + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) + + new_ns = NetNS() + defer(new_ns.__del__) + ip(f"link set dev {nk_guest} netns {new_ns.name}", ns=netns) + + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) + + +def test_resize_phys_no_reduction(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + ethnl = EthtoolFamily() + ethnl.channels_set( + {"header": {"dev-index": nsim.ifindex}, "combined-count": 2} + ) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + ksft_in("lease", queue_info) + + +def test_delete_one_netkit_of_two(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=3) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host_a, _, nk_guest_a, nk_guest_a_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_a}", fail=False) + + nk_host_b, _, nk_guest_b, nk_guest_b_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_b}", fail=False) + + ip(f"link set dev {nk_guest_a} netns {netns.name}") + ip(f"link set dev {nk_host_a} up") + ip(f"link set dev {nk_guest_a} up", ns=netns) + + ip(f"link set dev {nk_guest_b} netns {netns.name}") + ip(f"link set dev {nk_host_b} up") + ip(f"link set dev {nk_guest_b} up", ns=netns) + + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_a_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_b_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 2, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + netdevnl = NetdevFamily() + q1 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + q2 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 2, "type": "rx"} + ) + ksft_in("lease", q1) + ksft_in("lease", q2) + + cmd(f"ip link del dev {nk_host_a}") + + q1 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + q2 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 2, "type": "rx"} + ) + ksft_not_in("lease", q1) + ksft_in("lease", q2) + + +def test_bind_rx_leased_phys_queue(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + netdevnl = NetdevFamily() + with ksft_raises(NlError) as e: + netdevnl.bind_rx( + { + "ifindex": nsim.ifindex, + "fd": 0, + "queues": [ + {"id": 0, "type": "rx"}, + {"id": 1, "type": "rx"}, + ], + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EOPNOTSUPP) + + +def test_resize_phys_shrink_past_leased(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=4) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + ethnl = EthtoolFamily() + + # Shrink past the leased queue — only queue 3 removed, queue 1 untouched + ethnl.channels_set( + {"header": {"dev-index": nsim.ifindex}, "combined-count": 3} + ) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + ksft_in("lease", queue_info) + + # Shrink further — queue 2 removed, queue 1 still untouched + ethnl.channels_set( + {"header": {"dev-index": nsim.ifindex}, "combined-count": 2} + ) + + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + ksft_in("lease", queue_info) + + # Shrink into the leased queue — queue 1 is busy, must fail + with ksft_raises(NlError) as e: + ethnl.channels_set( + {"header": {"dev-index": nsim.ifindex}, "combined-count": 1} + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def test_resize_virt_not_supported(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, nk_host_idx, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + # Channel resize on the netkit host must fail — not supported + ethnl = EthtoolFamily() + with ksft_raises(NlError) as e: + ethnl.channels_set( + {"header": {"dev-index": nk_host_idx}, "combined-count": 1} + ) + ksft_eq(e.exception.nl_msg.error, -errno.EOPNOTSUPP) + + # Lease must be intact + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + ksft_in("lease", queue_info) + + +def test_lease_devices_down(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + + # Create lease while both physical and virtual devices are down + src_queue = 1 + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + result = netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(result["id"], 1) + + # Bring devices up before queue_get: netdevsim only instantiates NAPIs in + # ndo_open, and netdev-genl queue_get returns -ENOENT without a NAPI. + ip(f"link set dev {nsim.ifname} up") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], result["id"]) + + +def test_lease_capacity_exhaustion(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=4) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + # rxqueues=3 means num_rx_queues=3, real_num_rx_queues starts at 1. + # Can create 2 leased queues (real goes 1->2->3) but not a 3rd (3->4 > 3). + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=3) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + r1 = netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(r1["id"], 1) + + r2 = netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 2, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(r2["id"], 2) + + # Third lease fails — netkit queue capacity exhausted + with ksft_raises(NlError) as e: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 3, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + # Verify the two successful leases are intact + netdevnl = NetdevFamily() + q1 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + q2 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 2, "type": "rx"} + ) + ksft_in("lease", q1) + ksft_in("lease", q2) + + +def test_resize_phys_up(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=3) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + # Shrink nsim first so we have room to grow + ethnl = EthtoolFamily() + ethnl.channels_set( + {"header": {"dev-index": nsim.ifindex}, "combined-count": 2} + ) + + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + # Grow channels — should succeed since leased queue is not removed + ethnl.channels_set( + {"header": {"dev-index": nsim.ifindex}, "combined-count": 3} + ) + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + ksft_in("lease", queue_info) + + # New queue 2 should exist without a lease + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 2, "type": "rx"} + ) + ksft_not_in("lease", queue_info) + + +def test_multi_ns_lease(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=3) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + ns_b = NetNS() + defer(ns_b.__del__) + ip("netns set init 0", ns=ns_b) + ip("link set lo up", ns=ns_b) + + # First netkit pair, guest in netns + nk_host_a, _, nk_guest_a, nk_guest_a_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_a}", fail=False) + ip(f"link set dev {nk_guest_a} netns {netns.name}") + ip(f"link set dev {nk_host_a} up") + ip(f"link set dev {nk_guest_a} up", ns=netns) + + # Second netkit pair, guest in ns_b + nk_host_b, _, nk_guest_b, nk_guest_b_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_b}", fail=False) + ip(f"link set dev {nk_guest_b} netns {ns_b.name}") + ip(f"link set dev {nk_host_b} up") + ip(f"link set dev {nk_guest_b} up", ns=ns_b) + + # Lease from netns + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + result = netdevnl_ns.queue_create( + { + "ifindex": nk_guest_a_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(result["id"], 1) + + # Lease from ns_b (different namespace, same physical device) + with NetNSEnter(str(ns_b)), NetdevFamily() as netdevnl_ns: + result = netdevnl_ns.queue_create( + { + "ifindex": nk_guest_b_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 2, "type": "rx"}, + "netns-id": 0, + }, + } + ) + ksft_eq(result["id"], 1) + + # Verify both leases from the physical side + netdevnl = NetdevFamily() + q1 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + q2 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 2, "type": "rx"} + ) + ksft_in("lease", q1) + ksft_in("lease", q2) + ksft_eq(q1["lease"]["ifindex"], nk_guest_a_idx) + ksft_eq(q2["lease"]["ifindex"], nk_guest_b_idx) + + +def test_multi_ns_delete_one(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=3) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + ns_b = NetNS() + ip("netns set init 0", ns=ns_b) + ip("link set lo up", ns=ns_b) + + # First netkit pair, guest in netns (ns_a) + nk_host_a, _, nk_guest_a, nk_guest_a_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_a}", fail=False) + ip(f"link set dev {nk_guest_a} netns {netns.name}") + ip(f"link set dev {nk_host_a} up") + ip(f"link set dev {nk_guest_a} up", ns=netns) + + # Second netkit pair, guest in ns_b + nk_host_b, _, nk_guest_b, nk_guest_b_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host_b}", fail=False) + + ip(f"link set dev {nk_guest_b} netns {ns_b.name}") + ip(f"link set dev {nk_host_b} up") + ip(f"link set dev {nk_guest_b} up", ns=ns_b) + + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_a_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 1, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + with NetNSEnter(str(ns_b)), NetdevFamily() as netdevnl_ns: + netdevnl_ns.queue_create( + { + "ifindex": nk_guest_b_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": 2, "type": "rx"}, + "netns-id": 0, + }, + } + ) + + netdevnl = NetdevFamily() + q1 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + q2 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 2, "type": "rx"} + ) + ksft_in("lease", q1) + ksft_in("lease", q2) + + # Delete ns_b — destroys nk_guest_b, triggers unlease of queue 2 + del ns_b + wait_until(lambda: "lease" not in netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 2, "type": "rx"})) + + # ns_a's lease on queue 1 must survive + q1 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 1, "type": "rx"} + ) + ksft_in("lease", q1) + ksft_eq(q1["lease"]["ifindex"], nk_guest_a_idx) + + # ns_b's lease on queue 2 must be gone + q2 = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": 2, "type": "rx"} + ) + ksft_not_in("lease", q2) + + # nk_host_b should be gone too (phys removal cascades to netkit pair) + ret = cmd(f"ip link show dev {nk_host_b}", fail=False) + ksft_ne(ret.ret, 0) + + +def test_move_phys_netns(netns) -> None: + nsimdev = NetdevSimDev(port_count=1, queue_count=2) + defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + ip(f"link set dev {nsim.ifname} up") + + nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2) + defer(cmd, f"ip link del dev {nk_host}", fail=False) + + ip(f"link set dev {nk_guest} netns {netns.name}") + ip(f"link set dev {nk_host} up") + ip(f"link set dev {nk_guest} up", ns=netns) + + src_queue = 1 + with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns: + nk_queue_id = netdevnl_ns.queue_create( + { + "ifindex": nk_guest_idx, + "type": "rx", + "lease": { + "ifindex": nsim.ifindex, + "queue": {"id": src_queue, "type": "rx"}, + "netns-id": 0, + }, + } + )["id"] + + netdevnl = NetdevFamily() + queue_info = netdevnl.queue_get( + {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + + # Move the physical device to a new namespace. Move it back to init_net + # on cleanup before the other defers fire (new_ns deletion, nsimdev.remove) + # so nsim lives in a stable namespace when they run. + new_ns = NetNS() + defer(new_ns.__del__) + ip(f"link set dev {nsim.ifname} netns {new_ns.name}") + defer(ip, f"link set dev {nsim.ifname} netns init", ns=new_ns) + + # Physical device is now in new_ns — find its ifindex there + all_links = ip("-d link show", json=True, ns=new_ns) + nsim_in_new = [lnk for lnk in all_links if lnk.get("ifname") == nsim.ifname] + new_ifindex = nsim_in_new[0]["ifindex"] + + # Moving a device across netns brings it admin-down; bring it back up so + # netdevsim re-creates the NAPI (netdev-genl queue_get needs it). + ip(f"link set dev {nsim.ifname} up", ns=new_ns) + + # Verify lease survived the namespace move + with NetNSEnter(str(new_ns)), NetdevFamily() as netdevnl_ns: + queue_info = netdevnl_ns.queue_get( + {"ifindex": new_ifindex, "id": src_queue, "type": "rx"} + ) + ksft_in("lease", queue_info) + ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id) + + def main() -> None: netns = NetNS() cmd("ip netns attach init 1") @@ -1156,6 +2079,24 @@ def main() -> None: test_lease_queue_zero, test_release_and_reuse, test_veth_queue_create, + test_two_netkits_same_queue, + test_l3_mode_lease, + test_single_double_lease, + test_single_different_lessors, + test_cross_ns_netns_id, + test_delete_guest_netns, + test_move_guest_netns, + test_resize_phys_no_reduction, + test_delete_one_netkit_of_two, + test_bind_rx_leased_phys_queue, + test_resize_phys_shrink_past_leased, + test_resize_virt_not_supported, + test_lease_devices_down, + test_lease_capacity_exhaustion, + test_resize_phys_up, + test_multi_ns_lease, + test_multi_ns_delete_one, + test_move_phys_netns, ], args=(netns,), ) From bc28831d7a09f7058cdca4658d81e5faf635bed7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 Apr 2026 09:52:49 +0200 Subject: [PATCH 1556/1561] MAINTAINERS: Add netkit selftest files The following selftest files are related to netkit and should have netkit folks in Cc for review: - tools/testing/selftests/bpf/prog_tests/tc_netkit.c - tools/testing/selftests/drivers/net/hw/nk_qlease.py - tools/testing/selftests/net/nk_qlease.py Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260414075249.611608-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- MAINTAINERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 65902b97f5df..fa1bdb1db73e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4901,6 +4901,9 @@ L: netdev@vger.kernel.org S: Supported F: drivers/net/netkit.c F: include/net/netkit.h +F: tools/testing/selftests/bpf/prog_tests/tc_netkit.c +F: tools/testing/selftests/drivers/net/hw/nk_qlease.py +F: tools/testing/selftests/net/nk_qlease.py BPF [NETWORKING] (struct_ops, reuseport) M: Martin KaFai Lau From e5549aecdd24c43ef9d05c376343a621f0e04277 Mon Sep 17 00:00:00 2001 From: Fushuai Wang Date: Tue, 14 Apr 2026 17:39:41 +0200 Subject: [PATCH 1557/1561] wireguard: allowedips: Use kfree_rcu() instead of call_rcu() Replace call_rcu() + kmem_cache_free() with kfree_rcu() to simplify the code and reduce function size. Signed-off-by: Fushuai Wang Reviewed-by: Simon Horman Signed-off-by: Jason A. Donenfeld Link: https://patch.msgid.link/20260414153944.2742252-2-Jason@zx2c4.com Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/allowedips.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c index 09f7fcd7da78..5ece9acad64d 100644 --- a/drivers/net/wireguard/allowedips.c +++ b/drivers/net/wireguard/allowedips.c @@ -48,11 +48,6 @@ static void push_rcu(struct allowedips_node **stack, } } -static void node_free_rcu(struct rcu_head *rcu) -{ - kmem_cache_free(node_cache, container_of(rcu, struct allowedips_node, rcu)); -} - static void root_free_rcu(struct rcu_head *rcu) { struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_DEPTH] = { @@ -271,13 +266,13 @@ static void remove_node(struct allowedips_node *node, struct mutex *lock) if (free_parent) child = rcu_dereference_protected(parent->bit[!(node->parent_bit_packed & 1)], lockdep_is_held(lock)); - call_rcu(&node->rcu, node_free_rcu); + kfree_rcu(node, rcu); if (!free_parent) return; if (child) child->parent_bit_packed = parent->parent_bit_packed; *(struct allowedips_node **)(parent->parent_bit_packed & ~3UL) = child; - call_rcu(&parent->rcu, node_free_rcu); + kfree_rcu(parent, rcu); } static int remove(struct allowedips_node __rcu **trie, u8 bits, const u8 *key, From 121f416756d63f6e130f4b9c49676c2b132c48b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Tue, 14 Apr 2026 17:39:42 +0200 Subject: [PATCH 1558/1561] tools: ynl: add sample for wireguard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a sample application for WireGuard, using the generated C library. The main benefit of this is to exercise the generated library, which might be useful for future self-tests. Example: $ make -C tools/net/ynl/lib $ make -C tools/net/ynl/generated $ make -C tools/net/ynl/tests wireguard $ ./tools/net/ynl/tests/wireguard usage: ./tools/net/ynl/tests/wireguard $ sudo ./tools/net/ynl/tests/wireguard wg-test Interface 3: wg-test Peer 6adfb183a4a2c94a2f92dab5ade762a4788[...]: Data: rx: 42 / tx: 42 bytes Allowed IPs: 0.0.0.0/0 ::/0 Signed-off-by: Asbjørn Sloth Tønnesen Signed-off-by: Jason A. Donenfeld Link: https://patch.msgid.link/20260414153944.2742252-3-Jason@zx2c4.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/tests/.gitignore | 1 + tools/net/ynl/tests/wireguard.c | 106 ++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 tools/net/ynl/tests/wireguard.c diff --git a/tools/net/ynl/tests/.gitignore b/tools/net/ynl/tests/.gitignore index 045385df42a4..a7832ebfdbbc 100644 --- a/tools/net/ynl/tests/.gitignore +++ b/tools/net/ynl/tests/.gitignore @@ -7,3 +7,4 @@ rt-link rt-route tc tc-filter-add +wireguard diff --git a/tools/net/ynl/tests/wireguard.c b/tools/net/ynl/tests/wireguard.c new file mode 100644 index 000000000000..df601e742c28 --- /dev/null +++ b/tools/net/ynl/tests/wireguard.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include + +#include "wireguard-user.h" + +static void print_allowed_ip(const struct wireguard_wgallowedip *aip) +{ + char addr_out[INET6_ADDRSTRLEN]; + + if (!inet_ntop(aip->family, aip->ipaddr, addr_out, sizeof(addr_out))) { + addr_out[0] = '?'; + addr_out[1] = '\0'; + } + printf("\t\t\t%s/%u\n", addr_out, aip->cidr_mask); +} + +/* Only printing public key in this demo. For better key formatting, + * use the constant-time implementation as found in wireguard-tools. + */ +static void print_peer_header(const struct wireguard_wgpeer *peer) +{ + unsigned int len = peer->_len.public_key; + uint8_t *key = peer->public_key; + unsigned int i; + + if (len != 32) + return; + printf("\tPeer "); + for (i = 0; i < len; i++) + printf("%02x", key[i]); + printf(":\n"); +} + +static void print_peer(const struct wireguard_wgpeer *peer) +{ + unsigned int i; + + print_peer_header(peer); + printf("\t\tData: rx: %llu / tx: %llu bytes\n", + peer->rx_bytes, peer->tx_bytes); + printf("\t\tAllowed IPs:\n"); + for (i = 0; i < peer->_count.allowedips; i++) + print_allowed_ip(&peer->allowedips[i]); +} + +static void build_request(struct wireguard_get_device_req *req, char *arg) +{ + char *endptr; + int ifindex; + + ifindex = strtol(arg, &endptr, 0); + if (endptr != arg + strlen(arg) || errno != 0) + ifindex = 0; + if (ifindex > 0) + wireguard_get_device_req_set_ifindex(req, ifindex); + else + wireguard_get_device_req_set_ifname(req, arg); +} + +int main(int argc, char **argv) +{ + struct wireguard_get_device_list *devs; + struct wireguard_get_device_req *req; + struct ynl_error yerr; + struct ynl_sock *ys; + + if (argc < 2) { + fprintf(stderr, "usage: %s \n", argv[0]); + return 1; + } + + ys = ynl_sock_create(&ynl_wireguard_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return 2; + } + + req = wireguard_get_device_req_alloc(); + build_request(req, argv[1]); + + devs = wireguard_get_device_dump(ys, req); + if (!devs) { + fprintf(stderr, "YNL (%d): %s\n", ys->err.code, ys->err.msg); + wireguard_get_device_req_free(req); + ynl_sock_destroy(ys); + return 3; + } + + ynl_dump_foreach(devs, d) { + unsigned int i; + + printf("Interface %d: %s\n", d->ifindex, d->ifname); + for (i = 0; i < d->_count.peers; i++) + print_peer(&d->peers[i]); + } + + wireguard_get_device_list_free(devs); + wireguard_get_device_req_free(req); + ynl_sock_destroy(ys); + + return 0; +} From f364db381c9d38c96de3148ac584f859c550fad5 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 14 Apr 2026 17:39:43 +0200 Subject: [PATCH 1559/1561] wireguard: allowedips: remove redundant space Not a contentful commit, but amusingly found when porting ba3d7b93 to Windows. Signed-off-by: Jason A. Donenfeld Link: https://patch.msgid.link/20260414153944.2742252-4-Jason@zx2c4.com Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/selftest/allowedips.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireguard/selftest/allowedips.c b/drivers/net/wireguard/selftest/allowedips.c index 2da3008c3a01..3e857e6fb627 100644 --- a/drivers/net/wireguard/selftest/allowedips.c +++ b/drivers/net/wireguard/selftest/allowedips.c @@ -623,7 +623,7 @@ bool __init wg_allowedips_selftest(void) test_boolean(!remove(6, b, 0x24446801, 0x40e40800, 0xdeaebeef, 0xdefbeef, 128)); test(6, a, 0x24446801, 0x40e40800, 0xdeaebeef, 0xdefbeef); /* invalid CIDR should have no effect and return -EINVAL */ - test_boolean(remove(6, a, 0x24446801, 0x40e40800, 0xdeaebeef, 0xdefbeef, 129) == -EINVAL); + test_boolean(remove(6, a, 0x24446801, 0x40e40800, 0xdeaebeef, 0xdefbeef, 129) == -EINVAL); test(6, a, 0x24446801, 0x40e40800, 0xdeaebeef, 0xdefbeef); remove(6, a, 0x24446801, 0x40e40800, 0xdeaebeef, 0xdefbeef, 128); test_negative(6, a, 0x24446801, 0x40e40800, 0xdeaebeef, 0xdefbeef); From 60a25ef8dacb3566b1a8c4de00572a498e2a3bf9 Mon Sep 17 00:00:00 2001 From: Shardul Bankar Date: Tue, 14 Apr 2026 17:39:44 +0200 Subject: [PATCH 1560/1561] wireguard: device: use exit_rtnl callback instead of manual rtnl_lock in pre_exit wg_netns_pre_exit() manually acquires rtnl_lock() inside the pernet .pre_exit callback. This causes a hung task when another thread holds rtnl_mutex - the cleanup_net workqueue (or the setup_net failure rollback path) blocks indefinitely in wg_netns_pre_exit() waiting to acquire the lock. Convert to .exit_rtnl, introduced in commit 7a60d91c690b ("net: Add ->exit_rtnl() hook to struct pernet_operations."), where the framework already holds RTNL and batches all callbacks under a single rtnl_lock()/rtnl_unlock() pair, eliminating the contention window. The rcu_assign_pointer(wg->creating_net, NULL) is safe to move from .pre_exit to .exit_rtnl (which runs after synchronize_rcu()) because all RCU readers of creating_net either use maybe_get_net() - which returns NULL for a dying namespace with zero refcount - or access net->user_ns which remains valid throughout the entire ops_undo_list sequence. Reported-by: syzbot+f2fbf7478a35a94c8b7c@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?id=cb64c22a492202ca929e18262fdb8cb89e635c70 Signed-off-by: Shardul Bankar [ Jason: added __net_exit and __read_mostly annotations that were missing. ] Fixes: 900575aa33a3 ("wireguard: device: avoid circular netns references") Cc: stable@vger.kernel.org Signed-off-by: Jason A. Donenfeld Link: https://patch.msgid.link/20260414153944.2742252-5-Jason@zx2c4.com Signed-off-by: Jakub Kicinski --- drivers/net/wireguard/device.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index 46a71ec36af8..67b07ee2d660 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -411,12 +411,11 @@ static struct rtnl_link_ops link_ops __read_mostly = { .newlink = wg_newlink, }; -static void wg_netns_pre_exit(struct net *net) +static void __net_exit wg_netns_exit_rtnl(struct net *net, struct list_head *dev_kill_list) { struct wg_device *wg; struct wg_peer *peer; - rtnl_lock(); list_for_each_entry(wg, &device_list, device_list) { if (rcu_access_pointer(wg->creating_net) == net) { pr_debug("%s: Creating namespace exiting\n", wg->dev->name); @@ -429,11 +428,10 @@ static void wg_netns_pre_exit(struct net *net) mutex_unlock(&wg->device_update_lock); } } - rtnl_unlock(); } -static struct pernet_operations pernet_ops = { - .pre_exit = wg_netns_pre_exit +static struct pernet_operations pernet_ops __read_mostly = { + .exit_rtnl = wg_netns_exit_rtnl }; int __init wg_device_init(void) From 6bb6bafa88b4edfea59d931c8d85b73dd7a662ae Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Tue, 14 Apr 2026 17:09:47 +0200 Subject: [PATCH 1561/1561] net: pse-pd: fix kernel-doc function name for pse_control_find_by_id() The kernel-doc comment header incorrectly referenced the function name pse_control_find_net_by_id() instead of the actual function name pse_control_find_by_id(). Correct the function name in the documentation to match the implementation. Signed-off-by: Kory Maincent Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20260414150948.744618-1-kory.maincent@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/pse-pd/pse_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/pse-pd/pse_core.c b/drivers/net/pse-pd/pse_core.c index 3beaaaeec9e1..f6b94ac7a68a 100644 --- a/drivers/net/pse-pd/pse_core.c +++ b/drivers/net/pse-pd/pse_core.c @@ -234,7 +234,7 @@ out: } /** - * pse_control_find_net_by_id - Find net attached to the pse control id + * pse_control_find_by_id - Find pse_control from an id * @pcdev: a pointer to the PSE * @id: index of the PSE control *